##// END OF EJS Templates
revlog: use proper version comparison during verify...
Gregory Szorc -
r39881:e6d3d39c default
parent child Browse files
Show More
@@ -1,2609 +1,2609
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import os
21 21 import re
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullhex,
30 30 nullid,
31 31 nullrev,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .revlogutils.constants import (
39 39 FLAG_GENERALDELTA,
40 40 FLAG_INLINE_DATA,
41 41 REVIDX_DEFAULT_FLAGS,
42 42 REVIDX_ELLIPSIS,
43 43 REVIDX_EXTSTORED,
44 44 REVIDX_FLAGS_ORDER,
45 45 REVIDX_ISCENSORED,
46 46 REVIDX_KNOWN_FLAGS,
47 47 REVIDX_RAWTEXT_CHANGING_FLAGS,
48 48 REVLOGV0,
49 49 REVLOGV1,
50 50 REVLOGV1_FLAGS,
51 51 REVLOGV2,
52 52 REVLOGV2_FLAGS,
53 53 REVLOG_DEFAULT_FLAGS,
54 54 REVLOG_DEFAULT_FORMAT,
55 55 REVLOG_DEFAULT_VERSION,
56 56 )
57 57 from .thirdparty import (
58 58 attr,
59 59 )
60 60 from . import (
61 61 ancestor,
62 62 error,
63 63 mdiff,
64 64 policy,
65 65 pycompat,
66 66 repository,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .revlogutils import (
71 71 deltas as deltautil,
72 72 )
73 73 from .utils import (
74 74 interfaceutil,
75 75 stringutil,
76 76 )
77 77
78 78 # blanked usage of all the name to prevent pyflakes constraints
79 79 # We need these name available in the module for extensions.
80 80 REVLOGV0
81 81 REVLOGV1
82 82 REVLOGV2
83 83 FLAG_INLINE_DATA
84 84 FLAG_GENERALDELTA
85 85 REVLOG_DEFAULT_FLAGS
86 86 REVLOG_DEFAULT_FORMAT
87 87 REVLOG_DEFAULT_VERSION
88 88 REVLOGV1_FLAGS
89 89 REVLOGV2_FLAGS
90 90 REVIDX_ISCENSORED
91 91 REVIDX_ELLIPSIS
92 92 REVIDX_EXTSTORED
93 93 REVIDX_DEFAULT_FLAGS
94 94 REVIDX_FLAGS_ORDER
95 95 REVIDX_KNOWN_FLAGS
96 96 REVIDX_RAWTEXT_CHANGING_FLAGS
97 97
98 98 parsers = policy.importmod(r'parsers')
99 99
100 100 # Aliased for performance.
101 101 _zlibdecompress = zlib.decompress
102 102
103 103 # max size of revlog with inline data
104 104 _maxinline = 131072
105 105 _chunksize = 1048576
106 106
107 107 # Store flag processors (cf. 'addflagprocessor()' to register)
108 108 _flagprocessors = {
109 109 REVIDX_ISCENSORED: None,
110 110 }
111 111
112 112 # Flag processors for REVIDX_ELLIPSIS.
113 113 def ellipsisreadprocessor(rl, text):
114 114 return text, False
115 115
116 116 def ellipsiswriteprocessor(rl, text):
117 117 return text, False
118 118
119 119 def ellipsisrawprocessor(rl, text):
120 120 return False
121 121
122 122 ellipsisprocessor = (
123 123 ellipsisreadprocessor,
124 124 ellipsiswriteprocessor,
125 125 ellipsisrawprocessor,
126 126 )
127 127
128 128 _mdre = re.compile('\1\n')
129 129 def parsemeta(text):
130 130 """return (metadatadict, metadatasize)"""
131 131 # text can be buffer, so we can't use .startswith or .index
132 132 if text[:2] != '\1\n':
133 133 return None, None
134 134 s = _mdre.search(text, 2).start()
135 135 mtext = text[2:s]
136 136 meta = {}
137 137 for l in mtext.splitlines():
138 138 k, v = l.split(": ", 1)
139 139 meta[k] = v
140 140 return meta, (s + 2)
141 141
142 142 def packmeta(meta, text):
143 143 keys = sorted(meta)
144 144 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
145 145 return "\1\n%s\1\n%s" % (metatext, text)
146 146
147 147 def _censoredtext(text):
148 148 m, offs = parsemeta(text)
149 149 return m and "censored" in m
150 150
151 151 def addflagprocessor(flag, processor):
152 152 """Register a flag processor on a revision data flag.
153 153
154 154 Invariant:
155 155 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
156 156 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
157 157 - Only one flag processor can be registered on a specific flag.
158 158 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
159 159 following signatures:
160 160 - (read) f(self, rawtext) -> text, bool
161 161 - (write) f(self, text) -> rawtext, bool
162 162 - (raw) f(self, rawtext) -> bool
163 163 "text" is presented to the user. "rawtext" is stored in revlog data, not
164 164 directly visible to the user.
165 165 The boolean returned by these transforms is used to determine whether
166 166 the returned text can be used for hash integrity checking. For example,
167 167 if "write" returns False, then "text" is used to generate hash. If
168 168 "write" returns True, that basically means "rawtext" returned by "write"
169 169 should be used to generate hash. Usually, "write" and "read" return
170 170 different booleans. And "raw" returns a same boolean as "write".
171 171
172 172 Note: The 'raw' transform is used for changegroup generation and in some
173 173 debug commands. In this case the transform only indicates whether the
174 174 contents can be used for hash integrity checks.
175 175 """
176 176 if not flag & REVIDX_KNOWN_FLAGS:
177 177 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
178 178 raise error.ProgrammingError(msg)
179 179 if flag not in REVIDX_FLAGS_ORDER:
180 180 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
181 181 raise error.ProgrammingError(msg)
182 182 if flag in _flagprocessors:
183 183 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
184 184 raise error.Abort(msg)
185 185 _flagprocessors[flag] = processor
186 186
187 187 def getoffset(q):
188 188 return int(q >> 16)
189 189
190 190 def gettype(q):
191 191 return int(q & 0xFFFF)
192 192
193 193 def offset_type(offset, type):
194 194 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
195 195 raise ValueError('unknown revlog index flags')
196 196 return int(int(offset) << 16 | type)
197 197
198 198 _nullhash = hashlib.sha1(nullid)
199 199
200 200 def hash(text, p1, p2):
201 201 """generate a hash from the given text and its parent hashes
202 202
203 203 This hash combines both the current file contents and its history
204 204 in a manner that makes it easy to distinguish nodes with the same
205 205 content in the revision graph.
206 206 """
207 207 # As of now, if one of the parent node is null, p2 is null
208 208 if p2 == nullid:
209 209 # deep copy of a hash is faster than creating one
210 210 s = _nullhash.copy()
211 211 s.update(p1)
212 212 else:
213 213 # none of the parent nodes are nullid
214 214 if p1 < p2:
215 215 a = p1
216 216 b = p2
217 217 else:
218 218 a = p2
219 219 b = p1
220 220 s = hashlib.sha1(a)
221 221 s.update(b)
222 222 s.update(text)
223 223 return s.digest()
224 224
225 225 @attr.s(slots=True, frozen=True)
226 226 class _revisioninfo(object):
227 227 """Information about a revision that allows building its fulltext
228 228 node: expected hash of the revision
229 229 p1, p2: parent revs of the revision
230 230 btext: built text cache consisting of a one-element list
231 231 cachedelta: (baserev, uncompressed_delta) or None
232 232 flags: flags associated to the revision storage
233 233
234 234 One of btext[0] or cachedelta must be set.
235 235 """
236 236 node = attr.ib()
237 237 p1 = attr.ib()
238 238 p2 = attr.ib()
239 239 btext = attr.ib()
240 240 textlen = attr.ib()
241 241 cachedelta = attr.ib()
242 242 flags = attr.ib()
243 243
244 244 @interfaceutil.implementer(repository.irevisiondelta)
245 245 @attr.s(slots=True, frozen=True)
246 246 class revlogrevisiondelta(object):
247 247 node = attr.ib()
248 248 p1node = attr.ib()
249 249 p2node = attr.ib()
250 250 basenode = attr.ib()
251 251 linknode = attr.ib()
252 252 flags = attr.ib()
253 253 baserevisionsize = attr.ib()
254 254 revision = attr.ib()
255 255 delta = attr.ib()
256 256
257 257 @interfaceutil.implementer(repository.iverifyproblem)
258 258 @attr.s(frozen=True)
259 259 class revlogproblem(object):
260 260 warning = attr.ib(default=None)
261 261 error = attr.ib(default=None)
262 262
263 263 # index v0:
264 264 # 4 bytes: offset
265 265 # 4 bytes: compressed length
266 266 # 4 bytes: base rev
267 267 # 4 bytes: link rev
268 268 # 20 bytes: parent 1 nodeid
269 269 # 20 bytes: parent 2 nodeid
270 270 # 20 bytes: nodeid
271 271 indexformatv0 = struct.Struct(">4l20s20s20s")
272 272 indexformatv0_pack = indexformatv0.pack
273 273 indexformatv0_unpack = indexformatv0.unpack
274 274
275 275 class revlogoldindex(list):
276 276 def __getitem__(self, i):
277 277 if i == -1:
278 278 return (0, 0, 0, -1, -1, -1, -1, nullid)
279 279 return list.__getitem__(self, i)
280 280
281 281 class revlogoldio(object):
282 282 def __init__(self):
283 283 self.size = indexformatv0.size
284 284
285 285 def parseindex(self, data, inline):
286 286 s = self.size
287 287 index = []
288 288 nodemap = {nullid: nullrev}
289 289 n = off = 0
290 290 l = len(data)
291 291 while off + s <= l:
292 292 cur = data[off:off + s]
293 293 off += s
294 294 e = indexformatv0_unpack(cur)
295 295 # transform to revlogv1 format
296 296 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
297 297 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
298 298 index.append(e2)
299 299 nodemap[e[6]] = n
300 300 n += 1
301 301
302 302 return revlogoldindex(index), nodemap, None
303 303
304 304 def packentry(self, entry, node, version, rev):
305 305 if gettype(entry[0]):
306 306 raise error.RevlogError(_('index entry flags need revlog '
307 307 'version 1'))
308 308 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
309 309 node(entry[5]), node(entry[6]), entry[7])
310 310 return indexformatv0_pack(*e2)
311 311
312 312 # index ng:
313 313 # 6 bytes: offset
314 314 # 2 bytes: flags
315 315 # 4 bytes: compressed length
316 316 # 4 bytes: uncompressed length
317 317 # 4 bytes: base rev
318 318 # 4 bytes: link rev
319 319 # 4 bytes: parent 1 rev
320 320 # 4 bytes: parent 2 rev
321 321 # 32 bytes: nodeid
322 322 indexformatng = struct.Struct(">Qiiiiii20s12x")
323 323 indexformatng_pack = indexformatng.pack
324 324 versionformat = struct.Struct(">I")
325 325 versionformat_pack = versionformat.pack
326 326 versionformat_unpack = versionformat.unpack
327 327
328 328 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
329 329 # signed integer)
330 330 _maxentrysize = 0x7fffffff
331 331
332 332 class revlogio(object):
333 333 def __init__(self):
334 334 self.size = indexformatng.size
335 335
336 336 def parseindex(self, data, inline):
337 337 # call the C implementation to parse the index data
338 338 index, cache = parsers.parse_index2(data, inline)
339 339 return index, getattr(index, 'nodemap', None), cache
340 340
341 341 def packentry(self, entry, node, version, rev):
342 342 p = indexformatng_pack(*entry)
343 343 if rev == 0:
344 344 p = versionformat_pack(version) + p[4:]
345 345 return p
346 346
347 347 class revlog(object):
348 348 """
349 349 the underlying revision storage object
350 350
351 351 A revlog consists of two parts, an index and the revision data.
352 352
353 353 The index is a file with a fixed record size containing
354 354 information on each revision, including its nodeid (hash), the
355 355 nodeids of its parents, the position and offset of its data within
356 356 the data file, and the revision it's based on. Finally, each entry
357 357 contains a linkrev entry that can serve as a pointer to external
358 358 data.
359 359
360 360 The revision data itself is a linear collection of data chunks.
361 361 Each chunk represents a revision and is usually represented as a
362 362 delta against the previous chunk. To bound lookup time, runs of
363 363 deltas are limited to about 2 times the length of the original
364 364 version data. This makes retrieval of a version proportional to
365 365 its size, or O(1) relative to the number of revisions.
366 366
367 367 Both pieces of the revlog are written to in an append-only
368 368 fashion, which means we never need to rewrite a file to insert or
369 369 remove data, and can use some simple techniques to avoid the need
370 370 for locking while reading.
371 371
372 372 If checkambig, indexfile is opened with checkambig=True at
373 373 writing, to avoid file stat ambiguity.
374 374
375 375 If mmaplargeindex is True, and an mmapindexthreshold is set, the
376 376 index will be mmapped rather than read if it is larger than the
377 377 configured threshold.
378 378
379 379 If censorable is True, the revlog can have censored revisions.
380 380 """
381 381 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
382 382 mmaplargeindex=False, censorable=False):
383 383 """
384 384 create a revlog object
385 385
386 386 opener is a function that abstracts the file opening operation
387 387 and can be used to implement COW semantics or the like.
388 388 """
389 389 self.indexfile = indexfile
390 390 self.datafile = datafile or (indexfile[:-2] + ".d")
391 391 self.opener = opener
392 392 # When True, indexfile is opened with checkambig=True at writing, to
393 393 # avoid file stat ambiguity.
394 394 self._checkambig = checkambig
395 395 self._censorable = censorable
396 396 # 3-tuple of (node, rev, text) for a raw revision.
397 397 self._cache = None
398 398 # Maps rev to chain base rev.
399 399 self._chainbasecache = util.lrucachedict(100)
400 400 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
401 401 self._chunkcache = (0, '')
402 402 # How much data to read and cache into the raw revlog data cache.
403 403 self._chunkcachesize = 65536
404 404 self._maxchainlen = None
405 405 self._deltabothparents = True
406 406 self.index = []
407 407 # Mapping of partial identifiers to full nodes.
408 408 self._pcache = {}
409 409 # Mapping of revision integer to full node.
410 410 self._nodecache = {nullid: nullrev}
411 411 self._nodepos = None
412 412 self._compengine = 'zlib'
413 413 self._maxdeltachainspan = -1
414 414 self._withsparseread = False
415 415 self._sparserevlog = False
416 416 self._srdensitythreshold = 0.50
417 417 self._srmingapsize = 262144
418 418
419 419 # Make copy of flag processors so each revlog instance can support
420 420 # custom flags.
421 421 self._flagprocessors = dict(_flagprocessors)
422 422
423 423 mmapindexthreshold = None
424 424 v = REVLOG_DEFAULT_VERSION
425 425 opts = getattr(opener, 'options', None)
426 426 if opts is not None:
427 427 if 'revlogv2' in opts:
428 428 # version 2 revlogs always use generaldelta.
429 429 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
430 430 elif 'revlogv1' in opts:
431 431 if 'generaldelta' in opts:
432 432 v |= FLAG_GENERALDELTA
433 433 else:
434 434 v = 0
435 435 if 'chunkcachesize' in opts:
436 436 self._chunkcachesize = opts['chunkcachesize']
437 437 if 'maxchainlen' in opts:
438 438 self._maxchainlen = opts['maxchainlen']
439 439 if 'deltabothparents' in opts:
440 440 self._deltabothparents = opts['deltabothparents']
441 441 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
442 442 if 'compengine' in opts:
443 443 self._compengine = opts['compengine']
444 444 if 'maxdeltachainspan' in opts:
445 445 self._maxdeltachainspan = opts['maxdeltachainspan']
446 446 if mmaplargeindex and 'mmapindexthreshold' in opts:
447 447 mmapindexthreshold = opts['mmapindexthreshold']
448 448 self._sparserevlog = bool(opts.get('sparse-revlog', False))
449 449 withsparseread = bool(opts.get('with-sparse-read', False))
450 450 # sparse-revlog forces sparse-read
451 451 self._withsparseread = self._sparserevlog or withsparseread
452 452 if 'sparse-read-density-threshold' in opts:
453 453 self._srdensitythreshold = opts['sparse-read-density-threshold']
454 454 if 'sparse-read-min-gap-size' in opts:
455 455 self._srmingapsize = opts['sparse-read-min-gap-size']
456 456 if opts.get('enableellipsis'):
457 457 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
458 458
459 459 if self._chunkcachesize <= 0:
460 460 raise error.RevlogError(_('revlog chunk cache size %r is not '
461 461 'greater than 0') % self._chunkcachesize)
462 462 elif self._chunkcachesize & (self._chunkcachesize - 1):
463 463 raise error.RevlogError(_('revlog chunk cache size %r is not a '
464 464 'power of 2') % self._chunkcachesize)
465 465
466 466 indexdata = ''
467 467 self._initempty = True
468 468 try:
469 469 with self._indexfp() as f:
470 470 if (mmapindexthreshold is not None and
471 471 self.opener.fstat(f).st_size >= mmapindexthreshold):
472 472 indexdata = util.buffer(util.mmapread(f))
473 473 else:
474 474 indexdata = f.read()
475 475 if len(indexdata) > 0:
476 476 v = versionformat_unpack(indexdata[:4])[0]
477 477 self._initempty = False
478 478 except IOError as inst:
479 479 if inst.errno != errno.ENOENT:
480 480 raise
481 481
482 482 self.version = v
483 483 self._inline = v & FLAG_INLINE_DATA
484 484 self._generaldelta = v & FLAG_GENERALDELTA
485 485 flags = v & ~0xFFFF
486 486 fmt = v & 0xFFFF
487 487 if fmt == REVLOGV0:
488 488 if flags:
489 489 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
490 490 'revlog %s') %
491 491 (flags >> 16, fmt, self.indexfile))
492 492 elif fmt == REVLOGV1:
493 493 if flags & ~REVLOGV1_FLAGS:
494 494 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
495 495 'revlog %s') %
496 496 (flags >> 16, fmt, self.indexfile))
497 497 elif fmt == REVLOGV2:
498 498 if flags & ~REVLOGV2_FLAGS:
499 499 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
500 500 'revlog %s') %
501 501 (flags >> 16, fmt, self.indexfile))
502 502 else:
503 503 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
504 504 (fmt, self.indexfile))
505 505
506 506 self._storedeltachains = True
507 507
508 508 self._io = revlogio()
509 509 if self.version == REVLOGV0:
510 510 self._io = revlogoldio()
511 511 try:
512 512 d = self._io.parseindex(indexdata, self._inline)
513 513 except (ValueError, IndexError):
514 514 raise error.RevlogError(_("index %s is corrupted") %
515 515 self.indexfile)
516 516 self.index, nodemap, self._chunkcache = d
517 517 if nodemap is not None:
518 518 self.nodemap = self._nodecache = nodemap
519 519 if not self._chunkcache:
520 520 self._chunkclear()
521 521 # revnum -> (chain-length, sum-delta-length)
522 522 self._chaininfocache = {}
523 523 # revlog header -> revlog compressor
524 524 self._decompressors = {}
525 525
526 526 @util.propertycache
527 527 def _compressor(self):
528 528 return util.compengines[self._compengine].revlogcompressor()
529 529
530 530 def _indexfp(self, mode='r'):
531 531 """file object for the revlog's index file"""
532 532 args = {r'mode': mode}
533 533 if mode != 'r':
534 534 args[r'checkambig'] = self._checkambig
535 535 if mode == 'w':
536 536 args[r'atomictemp'] = True
537 537 return self.opener(self.indexfile, **args)
538 538
539 539 def _datafp(self, mode='r'):
540 540 """file object for the revlog's data file"""
541 541 return self.opener(self.datafile, mode=mode)
542 542
543 543 @contextlib.contextmanager
544 544 def _datareadfp(self, existingfp=None):
545 545 """file object suitable to read data"""
546 546 if existingfp is not None:
547 547 yield existingfp
548 548 else:
549 549 if self._inline:
550 550 func = self._indexfp
551 551 else:
552 552 func = self._datafp
553 553 with func() as fp:
554 554 yield fp
555 555
556 556 def tip(self):
557 557 return self.node(len(self.index) - 1)
558 558 def __contains__(self, rev):
559 559 return 0 <= rev < len(self)
560 560 def __len__(self):
561 561 return len(self.index)
562 562 def __iter__(self):
563 563 return iter(pycompat.xrange(len(self)))
564 564 def revs(self, start=0, stop=None):
565 565 """iterate over all rev in this revlog (from start to stop)"""
566 566 step = 1
567 567 length = len(self)
568 568 if stop is not None:
569 569 if start > stop:
570 570 step = -1
571 571 stop += step
572 572 if stop > length:
573 573 stop = length
574 574 else:
575 575 stop = length
576 576 return pycompat.xrange(start, stop, step)
577 577
578 578 @util.propertycache
579 579 def nodemap(self):
580 580 if self.index:
581 581 # populate mapping down to the initial node
582 582 node0 = self.index[0][7] # get around changelog filtering
583 583 self.rev(node0)
584 584 return self._nodecache
585 585
586 586 def hasnode(self, node):
587 587 try:
588 588 self.rev(node)
589 589 return True
590 590 except KeyError:
591 591 return False
592 592
593 593 def candelta(self, baserev, rev):
594 594 """whether two revisions (baserev, rev) can be delta-ed or not"""
595 595 # Disable delta if either rev requires a content-changing flag
596 596 # processor (ex. LFS). This is because such flag processor can alter
597 597 # the rawtext content that the delta will be based on, and two clients
598 598 # could have a same revlog node with different flags (i.e. different
599 599 # rawtext contents) and the delta could be incompatible.
600 600 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
601 601 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
602 602 return False
603 603 return True
604 604
605 605 def clearcaches(self):
606 606 self._cache = None
607 607 self._chainbasecache.clear()
608 608 self._chunkcache = (0, '')
609 609 self._pcache = {}
610 610
611 611 try:
612 612 self._nodecache.clearcaches()
613 613 except AttributeError:
614 614 self._nodecache = {nullid: nullrev}
615 615 self._nodepos = None
616 616
617 617 def rev(self, node):
618 618 try:
619 619 return self._nodecache[node]
620 620 except TypeError:
621 621 raise
622 622 except error.RevlogError:
623 623 # parsers.c radix tree lookup failed
624 624 if node == wdirid or node in wdirfilenodeids:
625 625 raise error.WdirUnsupported
626 626 raise error.LookupError(node, self.indexfile, _('no node'))
627 627 except KeyError:
628 628 # pure python cache lookup failed
629 629 n = self._nodecache
630 630 i = self.index
631 631 p = self._nodepos
632 632 if p is None:
633 633 p = len(i) - 1
634 634 else:
635 635 assert p < len(i)
636 636 for r in pycompat.xrange(p, -1, -1):
637 637 v = i[r][7]
638 638 n[v] = r
639 639 if v == node:
640 640 self._nodepos = r - 1
641 641 return r
642 642 if node == wdirid or node in wdirfilenodeids:
643 643 raise error.WdirUnsupported
644 644 raise error.LookupError(node, self.indexfile, _('no node'))
645 645
646 646 # Accessors for index entries.
647 647
648 648 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
649 649 # are flags.
650 650 def start(self, rev):
651 651 return int(self.index[rev][0] >> 16)
652 652
653 653 def flags(self, rev):
654 654 return self.index[rev][0] & 0xFFFF
655 655
656 656 def length(self, rev):
657 657 return self.index[rev][1]
658 658
659 659 def rawsize(self, rev):
660 660 """return the length of the uncompressed text for a given revision"""
661 661 l = self.index[rev][2]
662 662 if l >= 0:
663 663 return l
664 664
665 665 t = self.revision(rev, raw=True)
666 666 return len(t)
667 667
668 668 def size(self, rev):
669 669 """length of non-raw text (processed by a "read" flag processor)"""
670 670 # fast path: if no "read" flag processor could change the content,
671 671 # size is rawsize. note: ELLIPSIS is known to not change the content.
672 672 flags = self.flags(rev)
673 673 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
674 674 return self.rawsize(rev)
675 675
676 676 return len(self.revision(rev, raw=False))
677 677
678 678 def chainbase(self, rev):
679 679 base = self._chainbasecache.get(rev)
680 680 if base is not None:
681 681 return base
682 682
683 683 index = self.index
684 684 iterrev = rev
685 685 base = index[iterrev][3]
686 686 while base != iterrev:
687 687 iterrev = base
688 688 base = index[iterrev][3]
689 689
690 690 self._chainbasecache[rev] = base
691 691 return base
692 692
693 693 def linkrev(self, rev):
694 694 return self.index[rev][4]
695 695
696 696 def parentrevs(self, rev):
697 697 try:
698 698 entry = self.index[rev]
699 699 except IndexError:
700 700 if rev == wdirrev:
701 701 raise error.WdirUnsupported
702 702 raise
703 703
704 704 return entry[5], entry[6]
705 705
706 706 def node(self, rev):
707 707 try:
708 708 return self.index[rev][7]
709 709 except IndexError:
710 710 if rev == wdirrev:
711 711 raise error.WdirUnsupported
712 712 raise
713 713
714 714 # Derived from index values.
715 715
716 716 def end(self, rev):
717 717 return self.start(rev) + self.length(rev)
718 718
719 719 def parents(self, node):
720 720 i = self.index
721 721 d = i[self.rev(node)]
722 722 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
723 723
724 724 def chainlen(self, rev):
725 725 return self._chaininfo(rev)[0]
726 726
727 727 def _chaininfo(self, rev):
728 728 chaininfocache = self._chaininfocache
729 729 if rev in chaininfocache:
730 730 return chaininfocache[rev]
731 731 index = self.index
732 732 generaldelta = self._generaldelta
733 733 iterrev = rev
734 734 e = index[iterrev]
735 735 clen = 0
736 736 compresseddeltalen = 0
737 737 while iterrev != e[3]:
738 738 clen += 1
739 739 compresseddeltalen += e[1]
740 740 if generaldelta:
741 741 iterrev = e[3]
742 742 else:
743 743 iterrev -= 1
744 744 if iterrev in chaininfocache:
745 745 t = chaininfocache[iterrev]
746 746 clen += t[0]
747 747 compresseddeltalen += t[1]
748 748 break
749 749 e = index[iterrev]
750 750 else:
751 751 # Add text length of base since decompressing that also takes
752 752 # work. For cache hits the length is already included.
753 753 compresseddeltalen += e[1]
754 754 r = (clen, compresseddeltalen)
755 755 chaininfocache[rev] = r
756 756 return r
757 757
758 758 def _deltachain(self, rev, stoprev=None):
759 759 """Obtain the delta chain for a revision.
760 760
761 761 ``stoprev`` specifies a revision to stop at. If not specified, we
762 762 stop at the base of the chain.
763 763
764 764 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
765 765 revs in ascending order and ``stopped`` is a bool indicating whether
766 766 ``stoprev`` was hit.
767 767 """
768 768 # Try C implementation.
769 769 try:
770 770 return self.index.deltachain(rev, stoprev, self._generaldelta)
771 771 except AttributeError:
772 772 pass
773 773
774 774 chain = []
775 775
776 776 # Alias to prevent attribute lookup in tight loop.
777 777 index = self.index
778 778 generaldelta = self._generaldelta
779 779
780 780 iterrev = rev
781 781 e = index[iterrev]
782 782 while iterrev != e[3] and iterrev != stoprev:
783 783 chain.append(iterrev)
784 784 if generaldelta:
785 785 iterrev = e[3]
786 786 else:
787 787 iterrev -= 1
788 788 e = index[iterrev]
789 789
790 790 if iterrev == stoprev:
791 791 stopped = True
792 792 else:
793 793 chain.append(iterrev)
794 794 stopped = False
795 795
796 796 chain.reverse()
797 797 return chain, stopped
798 798
799 799 def ancestors(self, revs, stoprev=0, inclusive=False):
800 800 """Generate the ancestors of 'revs' in reverse topological order.
801 801 Does not generate revs lower than stoprev.
802 802
803 803 See the documentation for ancestor.lazyancestors for more details."""
804 804
805 805 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
806 806 inclusive=inclusive)
807 807
808 808 def descendants(self, revs):
809 809 """Generate the descendants of 'revs' in revision order.
810 810
811 811 Yield a sequence of revision numbers starting with a child of
812 812 some rev in revs, i.e., each revision is *not* considered a
813 813 descendant of itself. Results are ordered by revision number (a
814 814 topological sort)."""
815 815 first = min(revs)
816 816 if first == nullrev:
817 817 for i in self:
818 818 yield i
819 819 return
820 820
821 821 seen = set(revs)
822 822 for i in self.revs(start=first + 1):
823 823 for x in self.parentrevs(i):
824 824 if x != nullrev and x in seen:
825 825 seen.add(i)
826 826 yield i
827 827 break
828 828
829 829 def findcommonmissing(self, common=None, heads=None):
830 830 """Return a tuple of the ancestors of common and the ancestors of heads
831 831 that are not ancestors of common. In revset terminology, we return the
832 832 tuple:
833 833
834 834 ::common, (::heads) - (::common)
835 835
836 836 The list is sorted by revision number, meaning it is
837 837 topologically sorted.
838 838
839 839 'heads' and 'common' are both lists of node IDs. If heads is
840 840 not supplied, uses all of the revlog's heads. If common is not
841 841 supplied, uses nullid."""
842 842 if common is None:
843 843 common = [nullid]
844 844 if heads is None:
845 845 heads = self.heads()
846 846
847 847 common = [self.rev(n) for n in common]
848 848 heads = [self.rev(n) for n in heads]
849 849
850 850 # we want the ancestors, but inclusive
851 851 class lazyset(object):
852 852 def __init__(self, lazyvalues):
853 853 self.addedvalues = set()
854 854 self.lazyvalues = lazyvalues
855 855
856 856 def __contains__(self, value):
857 857 return value in self.addedvalues or value in self.lazyvalues
858 858
859 859 def __iter__(self):
860 860 added = self.addedvalues
861 861 for r in added:
862 862 yield r
863 863 for r in self.lazyvalues:
864 864 if not r in added:
865 865 yield r
866 866
867 867 def add(self, value):
868 868 self.addedvalues.add(value)
869 869
870 870 def update(self, values):
871 871 self.addedvalues.update(values)
872 872
873 873 has = lazyset(self.ancestors(common))
874 874 has.add(nullrev)
875 875 has.update(common)
876 876
877 877 # take all ancestors from heads that aren't in has
878 878 missing = set()
879 879 visit = collections.deque(r for r in heads if r not in has)
880 880 while visit:
881 881 r = visit.popleft()
882 882 if r in missing:
883 883 continue
884 884 else:
885 885 missing.add(r)
886 886 for p in self.parentrevs(r):
887 887 if p not in has:
888 888 visit.append(p)
889 889 missing = list(missing)
890 890 missing.sort()
891 891 return has, [self.node(miss) for miss in missing]
892 892
893 893 def incrementalmissingrevs(self, common=None):
894 894 """Return an object that can be used to incrementally compute the
895 895 revision numbers of the ancestors of arbitrary sets that are not
896 896 ancestors of common. This is an ancestor.incrementalmissingancestors
897 897 object.
898 898
899 899 'common' is a list of revision numbers. If common is not supplied, uses
900 900 nullrev.
901 901 """
902 902 if common is None:
903 903 common = [nullrev]
904 904
905 905 return ancestor.incrementalmissingancestors(self.parentrevs, common)
906 906
907 907 def findmissingrevs(self, common=None, heads=None):
908 908 """Return the revision numbers of the ancestors of heads that
909 909 are not ancestors of common.
910 910
911 911 More specifically, return a list of revision numbers corresponding to
912 912 nodes N such that every N satisfies the following constraints:
913 913
914 914 1. N is an ancestor of some node in 'heads'
915 915 2. N is not an ancestor of any node in 'common'
916 916
917 917 The list is sorted by revision number, meaning it is
918 918 topologically sorted.
919 919
920 920 'heads' and 'common' are both lists of revision numbers. If heads is
921 921 not supplied, uses all of the revlog's heads. If common is not
922 922 supplied, uses nullid."""
923 923 if common is None:
924 924 common = [nullrev]
925 925 if heads is None:
926 926 heads = self.headrevs()
927 927
928 928 inc = self.incrementalmissingrevs(common=common)
929 929 return inc.missingancestors(heads)
930 930
931 931 def findmissing(self, common=None, heads=None):
932 932 """Return the ancestors of heads that are not ancestors of common.
933 933
934 934 More specifically, return a list of nodes N such that every N
935 935 satisfies the following constraints:
936 936
937 937 1. N is an ancestor of some node in 'heads'
938 938 2. N is not an ancestor of any node in 'common'
939 939
940 940 The list is sorted by revision number, meaning it is
941 941 topologically sorted.
942 942
943 943 'heads' and 'common' are both lists of node IDs. If heads is
944 944 not supplied, uses all of the revlog's heads. If common is not
945 945 supplied, uses nullid."""
946 946 if common is None:
947 947 common = [nullid]
948 948 if heads is None:
949 949 heads = self.heads()
950 950
951 951 common = [self.rev(n) for n in common]
952 952 heads = [self.rev(n) for n in heads]
953 953
954 954 inc = self.incrementalmissingrevs(common=common)
955 955 return [self.node(r) for r in inc.missingancestors(heads)]
956 956
957 957 def nodesbetween(self, roots=None, heads=None):
958 958 """Return a topological path from 'roots' to 'heads'.
959 959
960 960 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
961 961 topologically sorted list of all nodes N that satisfy both of
962 962 these constraints:
963 963
964 964 1. N is a descendant of some node in 'roots'
965 965 2. N is an ancestor of some node in 'heads'
966 966
967 967 Every node is considered to be both a descendant and an ancestor
968 968 of itself, so every reachable node in 'roots' and 'heads' will be
969 969 included in 'nodes'.
970 970
971 971 'outroots' is the list of reachable nodes in 'roots', i.e., the
972 972 subset of 'roots' that is returned in 'nodes'. Likewise,
973 973 'outheads' is the subset of 'heads' that is also in 'nodes'.
974 974
975 975 'roots' and 'heads' are both lists of node IDs. If 'roots' is
976 976 unspecified, uses nullid as the only root. If 'heads' is
977 977 unspecified, uses list of all of the revlog's heads."""
978 978 nonodes = ([], [], [])
979 979 if roots is not None:
980 980 roots = list(roots)
981 981 if not roots:
982 982 return nonodes
983 983 lowestrev = min([self.rev(n) for n in roots])
984 984 else:
985 985 roots = [nullid] # Everybody's a descendant of nullid
986 986 lowestrev = nullrev
987 987 if (lowestrev == nullrev) and (heads is None):
988 988 # We want _all_ the nodes!
989 989 return ([self.node(r) for r in self], [nullid], list(self.heads()))
990 990 if heads is None:
991 991 # All nodes are ancestors, so the latest ancestor is the last
992 992 # node.
993 993 highestrev = len(self) - 1
994 994 # Set ancestors to None to signal that every node is an ancestor.
995 995 ancestors = None
996 996 # Set heads to an empty dictionary for later discovery of heads
997 997 heads = {}
998 998 else:
999 999 heads = list(heads)
1000 1000 if not heads:
1001 1001 return nonodes
1002 1002 ancestors = set()
1003 1003 # Turn heads into a dictionary so we can remove 'fake' heads.
1004 1004 # Also, later we will be using it to filter out the heads we can't
1005 1005 # find from roots.
1006 1006 heads = dict.fromkeys(heads, False)
1007 1007 # Start at the top and keep marking parents until we're done.
1008 1008 nodestotag = set(heads)
1009 1009 # Remember where the top was so we can use it as a limit later.
1010 1010 highestrev = max([self.rev(n) for n in nodestotag])
1011 1011 while nodestotag:
1012 1012 # grab a node to tag
1013 1013 n = nodestotag.pop()
1014 1014 # Never tag nullid
1015 1015 if n == nullid:
1016 1016 continue
1017 1017 # A node's revision number represents its place in a
1018 1018 # topologically sorted list of nodes.
1019 1019 r = self.rev(n)
1020 1020 if r >= lowestrev:
1021 1021 if n not in ancestors:
1022 1022 # If we are possibly a descendant of one of the roots
1023 1023 # and we haven't already been marked as an ancestor
1024 1024 ancestors.add(n) # Mark as ancestor
1025 1025 # Add non-nullid parents to list of nodes to tag.
1026 1026 nodestotag.update([p for p in self.parents(n) if
1027 1027 p != nullid])
1028 1028 elif n in heads: # We've seen it before, is it a fake head?
1029 1029 # So it is, real heads should not be the ancestors of
1030 1030 # any other heads.
1031 1031 heads.pop(n)
1032 1032 if not ancestors:
1033 1033 return nonodes
1034 1034 # Now that we have our set of ancestors, we want to remove any
1035 1035 # roots that are not ancestors.
1036 1036
1037 1037 # If one of the roots was nullid, everything is included anyway.
1038 1038 if lowestrev > nullrev:
1039 1039 # But, since we weren't, let's recompute the lowest rev to not
1040 1040 # include roots that aren't ancestors.
1041 1041
1042 1042 # Filter out roots that aren't ancestors of heads
1043 1043 roots = [root for root in roots if root in ancestors]
1044 1044 # Recompute the lowest revision
1045 1045 if roots:
1046 1046 lowestrev = min([self.rev(root) for root in roots])
1047 1047 else:
1048 1048 # No more roots? Return empty list
1049 1049 return nonodes
1050 1050 else:
1051 1051 # We are descending from nullid, and don't need to care about
1052 1052 # any other roots.
1053 1053 lowestrev = nullrev
1054 1054 roots = [nullid]
1055 1055 # Transform our roots list into a set.
1056 1056 descendants = set(roots)
1057 1057 # Also, keep the original roots so we can filter out roots that aren't
1058 1058 # 'real' roots (i.e. are descended from other roots).
1059 1059 roots = descendants.copy()
1060 1060 # Our topologically sorted list of output nodes.
1061 1061 orderedout = []
1062 1062 # Don't start at nullid since we don't want nullid in our output list,
1063 1063 # and if nullid shows up in descendants, empty parents will look like
1064 1064 # they're descendants.
1065 1065 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1066 1066 n = self.node(r)
1067 1067 isdescendant = False
1068 1068 if lowestrev == nullrev: # Everybody is a descendant of nullid
1069 1069 isdescendant = True
1070 1070 elif n in descendants:
1071 1071 # n is already a descendant
1072 1072 isdescendant = True
1073 1073 # This check only needs to be done here because all the roots
1074 1074 # will start being marked is descendants before the loop.
1075 1075 if n in roots:
1076 1076 # If n was a root, check if it's a 'real' root.
1077 1077 p = tuple(self.parents(n))
1078 1078 # If any of its parents are descendants, it's not a root.
1079 1079 if (p[0] in descendants) or (p[1] in descendants):
1080 1080 roots.remove(n)
1081 1081 else:
1082 1082 p = tuple(self.parents(n))
1083 1083 # A node is a descendant if either of its parents are
1084 1084 # descendants. (We seeded the dependents list with the roots
1085 1085 # up there, remember?)
1086 1086 if (p[0] in descendants) or (p[1] in descendants):
1087 1087 descendants.add(n)
1088 1088 isdescendant = True
1089 1089 if isdescendant and ((ancestors is None) or (n in ancestors)):
1090 1090 # Only include nodes that are both descendants and ancestors.
1091 1091 orderedout.append(n)
1092 1092 if (ancestors is not None) and (n in heads):
1093 1093 # We're trying to figure out which heads are reachable
1094 1094 # from roots.
1095 1095 # Mark this head as having been reached
1096 1096 heads[n] = True
1097 1097 elif ancestors is None:
1098 1098 # Otherwise, we're trying to discover the heads.
1099 1099 # Assume this is a head because if it isn't, the next step
1100 1100 # will eventually remove it.
1101 1101 heads[n] = True
1102 1102 # But, obviously its parents aren't.
1103 1103 for p in self.parents(n):
1104 1104 heads.pop(p, None)
1105 1105 heads = [head for head, flag in heads.iteritems() if flag]
1106 1106 roots = list(roots)
1107 1107 assert orderedout
1108 1108 assert roots
1109 1109 assert heads
1110 1110 return (orderedout, roots, heads)
1111 1111
1112 1112 def headrevs(self):
1113 1113 try:
1114 1114 return self.index.headrevs()
1115 1115 except AttributeError:
1116 1116 return self._headrevs()
1117 1117
1118 1118 def computephases(self, roots):
1119 1119 return self.index.computephasesmapsets(roots)
1120 1120
1121 1121 def _headrevs(self):
1122 1122 count = len(self)
1123 1123 if not count:
1124 1124 return [nullrev]
1125 1125 # we won't iter over filtered rev so nobody is a head at start
1126 1126 ishead = [0] * (count + 1)
1127 1127 index = self.index
1128 1128 for r in self:
1129 1129 ishead[r] = 1 # I may be an head
1130 1130 e = index[r]
1131 1131 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1132 1132 return [r for r, val in enumerate(ishead) if val]
1133 1133
1134 1134 def heads(self, start=None, stop=None):
1135 1135 """return the list of all nodes that have no children
1136 1136
1137 1137 if start is specified, only heads that are descendants of
1138 1138 start will be returned
1139 1139 if stop is specified, it will consider all the revs from stop
1140 1140 as if they had no children
1141 1141 """
1142 1142 if start is None and stop is None:
1143 1143 if not len(self):
1144 1144 return [nullid]
1145 1145 return [self.node(r) for r in self.headrevs()]
1146 1146
1147 1147 if start is None:
1148 1148 start = nullid
1149 1149 if stop is None:
1150 1150 stop = []
1151 1151 stoprevs = set([self.rev(n) for n in stop])
1152 1152 startrev = self.rev(start)
1153 1153 reachable = {startrev}
1154 1154 heads = {startrev}
1155 1155
1156 1156 parentrevs = self.parentrevs
1157 1157 for r in self.revs(start=startrev + 1):
1158 1158 for p in parentrevs(r):
1159 1159 if p in reachable:
1160 1160 if r not in stoprevs:
1161 1161 reachable.add(r)
1162 1162 heads.add(r)
1163 1163 if p in heads and p not in stoprevs:
1164 1164 heads.remove(p)
1165 1165
1166 1166 return [self.node(r) for r in heads]
1167 1167
1168 1168 def children(self, node):
1169 1169 """find the children of a given node"""
1170 1170 c = []
1171 1171 p = self.rev(node)
1172 1172 for r in self.revs(start=p + 1):
1173 1173 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1174 1174 if prevs:
1175 1175 for pr in prevs:
1176 1176 if pr == p:
1177 1177 c.append(self.node(r))
1178 1178 elif p == nullrev:
1179 1179 c.append(self.node(r))
1180 1180 return c
1181 1181
1182 1182 def commonancestorsheads(self, a, b):
1183 1183 """calculate all the heads of the common ancestors of nodes a and b"""
1184 1184 a, b = self.rev(a), self.rev(b)
1185 1185 ancs = self._commonancestorsheads(a, b)
1186 1186 return pycompat.maplist(self.node, ancs)
1187 1187
1188 1188 def _commonancestorsheads(self, *revs):
1189 1189 """calculate all the heads of the common ancestors of revs"""
1190 1190 try:
1191 1191 ancs = self.index.commonancestorsheads(*revs)
1192 1192 except (AttributeError, OverflowError): # C implementation failed
1193 1193 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1194 1194 return ancs
1195 1195
1196 1196 def isancestor(self, a, b):
1197 1197 """return True if node a is an ancestor of node b
1198 1198
1199 1199 A revision is considered an ancestor of itself."""
1200 1200 a, b = self.rev(a), self.rev(b)
1201 1201 return self.isancestorrev(a, b)
1202 1202
1203 1203 def isancestorrev(self, a, b):
1204 1204 """return True if revision a is an ancestor of revision b
1205 1205
1206 1206 A revision is considered an ancestor of itself.
1207 1207
1208 1208 The implementation of this is trivial but the use of
1209 1209 commonancestorsheads is not."""
1210 1210 if a == nullrev:
1211 1211 return True
1212 1212 elif a == b:
1213 1213 return True
1214 1214 elif a > b:
1215 1215 return False
1216 1216 return a in self._commonancestorsheads(a, b)
1217 1217
1218 1218 def ancestor(self, a, b):
1219 1219 """calculate the "best" common ancestor of nodes a and b"""
1220 1220
1221 1221 a, b = self.rev(a), self.rev(b)
1222 1222 try:
1223 1223 ancs = self.index.ancestors(a, b)
1224 1224 except (AttributeError, OverflowError):
1225 1225 ancs = ancestor.ancestors(self.parentrevs, a, b)
1226 1226 if ancs:
1227 1227 # choose a consistent winner when there's a tie
1228 1228 return min(map(self.node, ancs))
1229 1229 return nullid
1230 1230
1231 1231 def _match(self, id):
1232 1232 if isinstance(id, int):
1233 1233 # rev
1234 1234 return self.node(id)
1235 1235 if len(id) == 20:
1236 1236 # possibly a binary node
1237 1237 # odds of a binary node being all hex in ASCII are 1 in 10**25
1238 1238 try:
1239 1239 node = id
1240 1240 self.rev(node) # quick search the index
1241 1241 return node
1242 1242 except error.LookupError:
1243 1243 pass # may be partial hex id
1244 1244 try:
1245 1245 # str(rev)
1246 1246 rev = int(id)
1247 1247 if "%d" % rev != id:
1248 1248 raise ValueError
1249 1249 if rev < 0:
1250 1250 rev = len(self) + rev
1251 1251 if rev < 0 or rev >= len(self):
1252 1252 raise ValueError
1253 1253 return self.node(rev)
1254 1254 except (ValueError, OverflowError):
1255 1255 pass
1256 1256 if len(id) == 40:
1257 1257 try:
1258 1258 # a full hex nodeid?
1259 1259 node = bin(id)
1260 1260 self.rev(node)
1261 1261 return node
1262 1262 except (TypeError, error.LookupError):
1263 1263 pass
1264 1264
1265 1265 def _partialmatch(self, id):
1266 1266 # we don't care wdirfilenodeids as they should be always full hash
1267 1267 maybewdir = wdirhex.startswith(id)
1268 1268 try:
1269 1269 partial = self.index.partialmatch(id)
1270 1270 if partial and self.hasnode(partial):
1271 1271 if maybewdir:
1272 1272 # single 'ff...' match in radix tree, ambiguous with wdir
1273 1273 raise error.RevlogError
1274 1274 return partial
1275 1275 if maybewdir:
1276 1276 # no 'ff...' match in radix tree, wdir identified
1277 1277 raise error.WdirUnsupported
1278 1278 return None
1279 1279 except error.RevlogError:
1280 1280 # parsers.c radix tree lookup gave multiple matches
1281 1281 # fast path: for unfiltered changelog, radix tree is accurate
1282 1282 if not getattr(self, 'filteredrevs', None):
1283 1283 raise error.AmbiguousPrefixLookupError(
1284 1284 id, self.indexfile, _('ambiguous identifier'))
1285 1285 # fall through to slow path that filters hidden revisions
1286 1286 except (AttributeError, ValueError):
1287 1287 # we are pure python, or key was too short to search radix tree
1288 1288 pass
1289 1289
1290 1290 if id in self._pcache:
1291 1291 return self._pcache[id]
1292 1292
1293 1293 if len(id) <= 40:
1294 1294 try:
1295 1295 # hex(node)[:...]
1296 1296 l = len(id) // 2 # grab an even number of digits
1297 1297 prefix = bin(id[:l * 2])
1298 1298 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1299 1299 nl = [n for n in nl if hex(n).startswith(id) and
1300 1300 self.hasnode(n)]
1301 1301 if nullhex.startswith(id):
1302 1302 nl.append(nullid)
1303 1303 if len(nl) > 0:
1304 1304 if len(nl) == 1 and not maybewdir:
1305 1305 self._pcache[id] = nl[0]
1306 1306 return nl[0]
1307 1307 raise error.AmbiguousPrefixLookupError(
1308 1308 id, self.indexfile, _('ambiguous identifier'))
1309 1309 if maybewdir:
1310 1310 raise error.WdirUnsupported
1311 1311 return None
1312 1312 except TypeError:
1313 1313 pass
1314 1314
1315 1315 def lookup(self, id):
1316 1316 """locate a node based on:
1317 1317 - revision number or str(revision number)
1318 1318 - nodeid or subset of hex nodeid
1319 1319 """
1320 1320 n = self._match(id)
1321 1321 if n is not None:
1322 1322 return n
1323 1323 n = self._partialmatch(id)
1324 1324 if n:
1325 1325 return n
1326 1326
1327 1327 raise error.LookupError(id, self.indexfile, _('no match found'))
1328 1328
1329 1329 def shortest(self, node, minlength=1):
1330 1330 """Find the shortest unambiguous prefix that matches node."""
1331 1331 def isvalid(prefix):
1332 1332 try:
1333 1333 node = self._partialmatch(prefix)
1334 1334 except error.AmbiguousPrefixLookupError:
1335 1335 return False
1336 1336 except error.WdirUnsupported:
1337 1337 # single 'ff...' match
1338 1338 return True
1339 1339 if node is None:
1340 1340 raise error.LookupError(node, self.indexfile, _('no node'))
1341 1341 return True
1342 1342
1343 1343 def maybewdir(prefix):
1344 1344 return all(c == 'f' for c in prefix)
1345 1345
1346 1346 hexnode = hex(node)
1347 1347
1348 1348 def disambiguate(hexnode, minlength):
1349 1349 """Disambiguate against wdirid."""
1350 1350 for length in range(minlength, 41):
1351 1351 prefix = hexnode[:length]
1352 1352 if not maybewdir(prefix):
1353 1353 return prefix
1354 1354
1355 1355 if not getattr(self, 'filteredrevs', None):
1356 1356 try:
1357 1357 length = max(self.index.shortest(node), minlength)
1358 1358 return disambiguate(hexnode, length)
1359 1359 except error.RevlogError:
1360 1360 if node != wdirid:
1361 1361 raise error.LookupError(node, self.indexfile, _('no node'))
1362 1362 except AttributeError:
1363 1363 # Fall through to pure code
1364 1364 pass
1365 1365
1366 1366 if node == wdirid:
1367 1367 for length in range(minlength, 41):
1368 1368 prefix = hexnode[:length]
1369 1369 if isvalid(prefix):
1370 1370 return prefix
1371 1371
1372 1372 for length in range(minlength, 41):
1373 1373 prefix = hexnode[:length]
1374 1374 if isvalid(prefix):
1375 1375 return disambiguate(hexnode, length)
1376 1376
1377 1377 def cmp(self, node, text):
1378 1378 """compare text with a given file revision
1379 1379
1380 1380 returns True if text is different than what is stored.
1381 1381 """
1382 1382 p1, p2 = self.parents(node)
1383 1383 return hash(text, p1, p2) != node
1384 1384
1385 1385 def _cachesegment(self, offset, data):
1386 1386 """Add a segment to the revlog cache.
1387 1387
1388 1388 Accepts an absolute offset and the data that is at that location.
1389 1389 """
1390 1390 o, d = self._chunkcache
1391 1391 # try to add to existing cache
1392 1392 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1393 1393 self._chunkcache = o, d + data
1394 1394 else:
1395 1395 self._chunkcache = offset, data
1396 1396
1397 1397 def _readsegment(self, offset, length, df=None):
1398 1398 """Load a segment of raw data from the revlog.
1399 1399
1400 1400 Accepts an absolute offset, length to read, and an optional existing
1401 1401 file handle to read from.
1402 1402
1403 1403 If an existing file handle is passed, it will be seeked and the
1404 1404 original seek position will NOT be restored.
1405 1405
1406 1406 Returns a str or buffer of raw byte data.
1407 1407 """
1408 1408 # Cache data both forward and backward around the requested
1409 1409 # data, in a fixed size window. This helps speed up operations
1410 1410 # involving reading the revlog backwards.
1411 1411 cachesize = self._chunkcachesize
1412 1412 realoffset = offset & ~(cachesize - 1)
1413 1413 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1414 1414 - realoffset)
1415 1415 with self._datareadfp(df) as df:
1416 1416 df.seek(realoffset)
1417 1417 d = df.read(reallength)
1418 1418 self._cachesegment(realoffset, d)
1419 1419 if offset != realoffset or reallength != length:
1420 1420 return util.buffer(d, offset - realoffset, length)
1421 1421 return d
1422 1422
1423 1423 def _getsegment(self, offset, length, df=None):
1424 1424 """Obtain a segment of raw data from the revlog.
1425 1425
1426 1426 Accepts an absolute offset, length of bytes to obtain, and an
1427 1427 optional file handle to the already-opened revlog. If the file
1428 1428 handle is used, it's original seek position will not be preserved.
1429 1429
1430 1430 Requests for data may be returned from a cache.
1431 1431
1432 1432 Returns a str or a buffer instance of raw byte data.
1433 1433 """
1434 1434 o, d = self._chunkcache
1435 1435 l = len(d)
1436 1436
1437 1437 # is it in the cache?
1438 1438 cachestart = offset - o
1439 1439 cacheend = cachestart + length
1440 1440 if cachestart >= 0 and cacheend <= l:
1441 1441 if cachestart == 0 and cacheend == l:
1442 1442 return d # avoid a copy
1443 1443 return util.buffer(d, cachestart, cacheend - cachestart)
1444 1444
1445 1445 return self._readsegment(offset, length, df=df)
1446 1446
1447 1447 def _getsegmentforrevs(self, startrev, endrev, df=None):
1448 1448 """Obtain a segment of raw data corresponding to a range of revisions.
1449 1449
1450 1450 Accepts the start and end revisions and an optional already-open
1451 1451 file handle to be used for reading. If the file handle is read, its
1452 1452 seek position will not be preserved.
1453 1453
1454 1454 Requests for data may be satisfied by a cache.
1455 1455
1456 1456 Returns a 2-tuple of (offset, data) for the requested range of
1457 1457 revisions. Offset is the integer offset from the beginning of the
1458 1458 revlog and data is a str or buffer of the raw byte data.
1459 1459
1460 1460 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1461 1461 to determine where each revision's data begins and ends.
1462 1462 """
1463 1463 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1464 1464 # (functions are expensive).
1465 1465 index = self.index
1466 1466 istart = index[startrev]
1467 1467 start = int(istart[0] >> 16)
1468 1468 if startrev == endrev:
1469 1469 end = start + istart[1]
1470 1470 else:
1471 1471 iend = index[endrev]
1472 1472 end = int(iend[0] >> 16) + iend[1]
1473 1473
1474 1474 if self._inline:
1475 1475 start += (startrev + 1) * self._io.size
1476 1476 end += (endrev + 1) * self._io.size
1477 1477 length = end - start
1478 1478
1479 1479 return start, self._getsegment(start, length, df=df)
1480 1480
1481 1481 def _chunk(self, rev, df=None):
1482 1482 """Obtain a single decompressed chunk for a revision.
1483 1483
1484 1484 Accepts an integer revision and an optional already-open file handle
1485 1485 to be used for reading. If used, the seek position of the file will not
1486 1486 be preserved.
1487 1487
1488 1488 Returns a str holding uncompressed data for the requested revision.
1489 1489 """
1490 1490 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1491 1491
1492 1492 def _chunks(self, revs, df=None, targetsize=None):
1493 1493 """Obtain decompressed chunks for the specified revisions.
1494 1494
1495 1495 Accepts an iterable of numeric revisions that are assumed to be in
1496 1496 ascending order. Also accepts an optional already-open file handle
1497 1497 to be used for reading. If used, the seek position of the file will
1498 1498 not be preserved.
1499 1499
1500 1500 This function is similar to calling ``self._chunk()`` multiple times,
1501 1501 but is faster.
1502 1502
1503 1503 Returns a list with decompressed data for each requested revision.
1504 1504 """
1505 1505 if not revs:
1506 1506 return []
1507 1507 start = self.start
1508 1508 length = self.length
1509 1509 inline = self._inline
1510 1510 iosize = self._io.size
1511 1511 buffer = util.buffer
1512 1512
1513 1513 l = []
1514 1514 ladd = l.append
1515 1515
1516 1516 if not self._withsparseread:
1517 1517 slicedchunks = (revs,)
1518 1518 else:
1519 1519 slicedchunks = deltautil.slicechunk(self, revs,
1520 1520 targetsize=targetsize)
1521 1521
1522 1522 for revschunk in slicedchunks:
1523 1523 firstrev = revschunk[0]
1524 1524 # Skip trailing revisions with empty diff
1525 1525 for lastrev in revschunk[::-1]:
1526 1526 if length(lastrev) != 0:
1527 1527 break
1528 1528
1529 1529 try:
1530 1530 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1531 1531 except OverflowError:
1532 1532 # issue4215 - we can't cache a run of chunks greater than
1533 1533 # 2G on Windows
1534 1534 return [self._chunk(rev, df=df) for rev in revschunk]
1535 1535
1536 1536 decomp = self.decompress
1537 1537 for rev in revschunk:
1538 1538 chunkstart = start(rev)
1539 1539 if inline:
1540 1540 chunkstart += (rev + 1) * iosize
1541 1541 chunklength = length(rev)
1542 1542 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1543 1543
1544 1544 return l
1545 1545
1546 1546 def _chunkclear(self):
1547 1547 """Clear the raw chunk cache."""
1548 1548 self._chunkcache = (0, '')
1549 1549
1550 1550 def deltaparent(self, rev):
1551 1551 """return deltaparent of the given revision"""
1552 1552 base = self.index[rev][3]
1553 1553 if base == rev:
1554 1554 return nullrev
1555 1555 elif self._generaldelta:
1556 1556 return base
1557 1557 else:
1558 1558 return rev - 1
1559 1559
1560 1560 def issnapshot(self, rev):
1561 1561 """tells whether rev is a snapshot
1562 1562 """
1563 1563 if rev == nullrev:
1564 1564 return True
1565 1565 deltap = self.deltaparent(rev)
1566 1566 if deltap == nullrev:
1567 1567 return True
1568 1568 p1, p2 = self.parentrevs(rev)
1569 1569 if deltap in (p1, p2):
1570 1570 return False
1571 1571 return self.issnapshot(deltap)
1572 1572
1573 1573 def snapshotdepth(self, rev):
1574 1574 """number of snapshot in the chain before this one"""
1575 1575 if not self.issnapshot(rev):
1576 1576 raise error.ProgrammingError('revision %d not a snapshot')
1577 1577 return len(self._deltachain(rev)[0]) - 1
1578 1578
1579 1579 def revdiff(self, rev1, rev2):
1580 1580 """return or calculate a delta between two revisions
1581 1581
1582 1582 The delta calculated is in binary form and is intended to be written to
1583 1583 revlog data directly. So this function needs raw revision data.
1584 1584 """
1585 1585 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1586 1586 return bytes(self._chunk(rev2))
1587 1587
1588 1588 return mdiff.textdiff(self.revision(rev1, raw=True),
1589 1589 self.revision(rev2, raw=True))
1590 1590
1591 1591 def revision(self, nodeorrev, _df=None, raw=False):
1592 1592 """return an uncompressed revision of a given node or revision
1593 1593 number.
1594 1594
1595 1595 _df - an existing file handle to read from. (internal-only)
1596 1596 raw - an optional argument specifying if the revision data is to be
1597 1597 treated as raw data when applying flag transforms. 'raw' should be set
1598 1598 to True when generating changegroups or in debug commands.
1599 1599 """
1600 1600 if isinstance(nodeorrev, int):
1601 1601 rev = nodeorrev
1602 1602 node = self.node(rev)
1603 1603 else:
1604 1604 node = nodeorrev
1605 1605 rev = None
1606 1606
1607 1607 cachedrev = None
1608 1608 flags = None
1609 1609 rawtext = None
1610 1610 if node == nullid:
1611 1611 return ""
1612 1612 if self._cache:
1613 1613 if self._cache[0] == node:
1614 1614 # _cache only stores rawtext
1615 1615 if raw:
1616 1616 return self._cache[2]
1617 1617 # duplicated, but good for perf
1618 1618 if rev is None:
1619 1619 rev = self.rev(node)
1620 1620 if flags is None:
1621 1621 flags = self.flags(rev)
1622 1622 # no extra flags set, no flag processor runs, text = rawtext
1623 1623 if flags == REVIDX_DEFAULT_FLAGS:
1624 1624 return self._cache[2]
1625 1625 # rawtext is reusable. need to run flag processor
1626 1626 rawtext = self._cache[2]
1627 1627
1628 1628 cachedrev = self._cache[1]
1629 1629
1630 1630 # look up what we need to read
1631 1631 if rawtext is None:
1632 1632 if rev is None:
1633 1633 rev = self.rev(node)
1634 1634
1635 1635 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1636 1636 if stopped:
1637 1637 rawtext = self._cache[2]
1638 1638
1639 1639 # drop cache to save memory
1640 1640 self._cache = None
1641 1641
1642 1642 targetsize = None
1643 1643 rawsize = self.index[rev][2]
1644 1644 if 0 <= rawsize:
1645 1645 targetsize = 4 * rawsize
1646 1646
1647 1647 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1648 1648 if rawtext is None:
1649 1649 rawtext = bytes(bins[0])
1650 1650 bins = bins[1:]
1651 1651
1652 1652 rawtext = mdiff.patches(rawtext, bins)
1653 1653 self._cache = (node, rev, rawtext)
1654 1654
1655 1655 if flags is None:
1656 1656 if rev is None:
1657 1657 rev = self.rev(node)
1658 1658 flags = self.flags(rev)
1659 1659
1660 1660 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1661 1661 if validatehash:
1662 1662 self.checkhash(text, node, rev=rev)
1663 1663
1664 1664 return text
1665 1665
1666 1666 def hash(self, text, p1, p2):
1667 1667 """Compute a node hash.
1668 1668
1669 1669 Available as a function so that subclasses can replace the hash
1670 1670 as needed.
1671 1671 """
1672 1672 return hash(text, p1, p2)
1673 1673
1674 1674 def _processflags(self, text, flags, operation, raw=False):
1675 1675 """Inspect revision data flags and applies transforms defined by
1676 1676 registered flag processors.
1677 1677
1678 1678 ``text`` - the revision data to process
1679 1679 ``flags`` - the revision flags
1680 1680 ``operation`` - the operation being performed (read or write)
1681 1681 ``raw`` - an optional argument describing if the raw transform should be
1682 1682 applied.
1683 1683
1684 1684 This method processes the flags in the order (or reverse order if
1685 1685 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1686 1686 flag processors registered for present flags. The order of flags defined
1687 1687 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1688 1688
1689 1689 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1690 1690 processed text and ``validatehash`` is a bool indicating whether the
1691 1691 returned text should be checked for hash integrity.
1692 1692
1693 1693 Note: If the ``raw`` argument is set, it has precedence over the
1694 1694 operation and will only update the value of ``validatehash``.
1695 1695 """
1696 1696 # fast path: no flag processors will run
1697 1697 if flags == 0:
1698 1698 return text, True
1699 1699 if not operation in ('read', 'write'):
1700 1700 raise error.ProgrammingError(_("invalid '%s' operation") %
1701 1701 operation)
1702 1702 # Check all flags are known.
1703 1703 if flags & ~REVIDX_KNOWN_FLAGS:
1704 1704 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1705 1705 (flags & ~REVIDX_KNOWN_FLAGS))
1706 1706 validatehash = True
1707 1707 # Depending on the operation (read or write), the order might be
1708 1708 # reversed due to non-commutative transforms.
1709 1709 orderedflags = REVIDX_FLAGS_ORDER
1710 1710 if operation == 'write':
1711 1711 orderedflags = reversed(orderedflags)
1712 1712
1713 1713 for flag in orderedflags:
1714 1714 # If a flagprocessor has been registered for a known flag, apply the
1715 1715 # related operation transform and update result tuple.
1716 1716 if flag & flags:
1717 1717 vhash = True
1718 1718
1719 1719 if flag not in self._flagprocessors:
1720 1720 message = _("missing processor for flag '%#x'") % (flag)
1721 1721 raise error.RevlogError(message)
1722 1722
1723 1723 processor = self._flagprocessors[flag]
1724 1724 if processor is not None:
1725 1725 readtransform, writetransform, rawtransform = processor
1726 1726
1727 1727 if raw:
1728 1728 vhash = rawtransform(self, text)
1729 1729 elif operation == 'read':
1730 1730 text, vhash = readtransform(self, text)
1731 1731 else: # write operation
1732 1732 text, vhash = writetransform(self, text)
1733 1733 validatehash = validatehash and vhash
1734 1734
1735 1735 return text, validatehash
1736 1736
1737 1737 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1738 1738 """Check node hash integrity.
1739 1739
1740 1740 Available as a function so that subclasses can extend hash mismatch
1741 1741 behaviors as needed.
1742 1742 """
1743 1743 try:
1744 1744 if p1 is None and p2 is None:
1745 1745 p1, p2 = self.parents(node)
1746 1746 if node != self.hash(text, p1, p2):
1747 1747 revornode = rev
1748 1748 if revornode is None:
1749 1749 revornode = templatefilters.short(hex(node))
1750 1750 raise error.RevlogError(_("integrity check failed on %s:%s")
1751 1751 % (self.indexfile, pycompat.bytestr(revornode)))
1752 1752 except error.RevlogError:
1753 1753 if self._censorable and _censoredtext(text):
1754 1754 raise error.CensoredNodeError(self.indexfile, node, text)
1755 1755 raise
1756 1756
1757 1757 def _enforceinlinesize(self, tr, fp=None):
1758 1758 """Check if the revlog is too big for inline and convert if so.
1759 1759
1760 1760 This should be called after revisions are added to the revlog. If the
1761 1761 revlog has grown too large to be an inline revlog, it will convert it
1762 1762 to use multiple index and data files.
1763 1763 """
1764 1764 tiprev = len(self) - 1
1765 1765 if (not self._inline or
1766 1766 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1767 1767 return
1768 1768
1769 1769 trinfo = tr.find(self.indexfile)
1770 1770 if trinfo is None:
1771 1771 raise error.RevlogError(_("%s not found in the transaction")
1772 1772 % self.indexfile)
1773 1773
1774 1774 trindex = trinfo[2]
1775 1775 if trindex is not None:
1776 1776 dataoff = self.start(trindex)
1777 1777 else:
1778 1778 # revlog was stripped at start of transaction, use all leftover data
1779 1779 trindex = len(self) - 1
1780 1780 dataoff = self.end(tiprev)
1781 1781
1782 1782 tr.add(self.datafile, dataoff)
1783 1783
1784 1784 if fp:
1785 1785 fp.flush()
1786 1786 fp.close()
1787 1787
1788 1788 with self._datafp('w') as df:
1789 1789 for r in self:
1790 1790 df.write(self._getsegmentforrevs(r, r)[1])
1791 1791
1792 1792 with self._indexfp('w') as fp:
1793 1793 self.version &= ~FLAG_INLINE_DATA
1794 1794 self._inline = False
1795 1795 io = self._io
1796 1796 for i in self:
1797 1797 e = io.packentry(self.index[i], self.node, self.version, i)
1798 1798 fp.write(e)
1799 1799
1800 1800 # the temp file replace the real index when we exit the context
1801 1801 # manager
1802 1802
1803 1803 tr.replace(self.indexfile, trindex * self._io.size)
1804 1804 self._chunkclear()
1805 1805
1806 1806 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1807 1807 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1808 1808 """add a revision to the log
1809 1809
1810 1810 text - the revision data to add
1811 1811 transaction - the transaction object used for rollback
1812 1812 link - the linkrev data to add
1813 1813 p1, p2 - the parent nodeids of the revision
1814 1814 cachedelta - an optional precomputed delta
1815 1815 node - nodeid of revision; typically node is not specified, and it is
1816 1816 computed by default as hash(text, p1, p2), however subclasses might
1817 1817 use different hashing method (and override checkhash() in such case)
1818 1818 flags - the known flags to set on the revision
1819 1819 deltacomputer - an optional deltacomputer instance shared between
1820 1820 multiple calls
1821 1821 """
1822 1822 if link == nullrev:
1823 1823 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1824 1824 % self.indexfile)
1825 1825
1826 1826 if flags:
1827 1827 node = node or self.hash(text, p1, p2)
1828 1828
1829 1829 rawtext, validatehash = self._processflags(text, flags, 'write')
1830 1830
1831 1831 # If the flag processor modifies the revision data, ignore any provided
1832 1832 # cachedelta.
1833 1833 if rawtext != text:
1834 1834 cachedelta = None
1835 1835
1836 1836 if len(rawtext) > _maxentrysize:
1837 1837 raise error.RevlogError(
1838 1838 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1839 1839 % (self.indexfile, len(rawtext)))
1840 1840
1841 1841 node = node or self.hash(rawtext, p1, p2)
1842 1842 if node in self.nodemap:
1843 1843 return node
1844 1844
1845 1845 if validatehash:
1846 1846 self.checkhash(rawtext, node, p1=p1, p2=p2)
1847 1847
1848 1848 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1849 1849 flags, cachedelta=cachedelta,
1850 1850 deltacomputer=deltacomputer)
1851 1851
1852 1852 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1853 1853 cachedelta=None, deltacomputer=None):
1854 1854 """add a raw revision with known flags, node and parents
1855 1855 useful when reusing a revision not stored in this revlog (ex: received
1856 1856 over wire, or read from an external bundle).
1857 1857 """
1858 1858 dfh = None
1859 1859 if not self._inline:
1860 1860 dfh = self._datafp("a+")
1861 1861 ifh = self._indexfp("a+")
1862 1862 try:
1863 1863 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1864 1864 flags, cachedelta, ifh, dfh,
1865 1865 deltacomputer=deltacomputer)
1866 1866 finally:
1867 1867 if dfh:
1868 1868 dfh.close()
1869 1869 ifh.close()
1870 1870
1871 1871 def compress(self, data):
1872 1872 """Generate a possibly-compressed representation of data."""
1873 1873 if not data:
1874 1874 return '', data
1875 1875
1876 1876 compressed = self._compressor.compress(data)
1877 1877
1878 1878 if compressed:
1879 1879 # The revlog compressor added the header in the returned data.
1880 1880 return '', compressed
1881 1881
1882 1882 if data[0:1] == '\0':
1883 1883 return '', data
1884 1884 return 'u', data
1885 1885
1886 1886 def decompress(self, data):
1887 1887 """Decompress a revlog chunk.
1888 1888
1889 1889 The chunk is expected to begin with a header identifying the
1890 1890 format type so it can be routed to an appropriate decompressor.
1891 1891 """
1892 1892 if not data:
1893 1893 return data
1894 1894
1895 1895 # Revlogs are read much more frequently than they are written and many
1896 1896 # chunks only take microseconds to decompress, so performance is
1897 1897 # important here.
1898 1898 #
1899 1899 # We can make a few assumptions about revlogs:
1900 1900 #
1901 1901 # 1) the majority of chunks will be compressed (as opposed to inline
1902 1902 # raw data).
1903 1903 # 2) decompressing *any* data will likely by at least 10x slower than
1904 1904 # returning raw inline data.
1905 1905 # 3) we want to prioritize common and officially supported compression
1906 1906 # engines
1907 1907 #
1908 1908 # It follows that we want to optimize for "decompress compressed data
1909 1909 # when encoded with common and officially supported compression engines"
1910 1910 # case over "raw data" and "data encoded by less common or non-official
1911 1911 # compression engines." That is why we have the inline lookup first
1912 1912 # followed by the compengines lookup.
1913 1913 #
1914 1914 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1915 1915 # compressed chunks. And this matters for changelog and manifest reads.
1916 1916 t = data[0:1]
1917 1917
1918 1918 if t == 'x':
1919 1919 try:
1920 1920 return _zlibdecompress(data)
1921 1921 except zlib.error as e:
1922 1922 raise error.RevlogError(_('revlog decompress error: %s') %
1923 1923 stringutil.forcebytestr(e))
1924 1924 # '\0' is more common than 'u' so it goes first.
1925 1925 elif t == '\0':
1926 1926 return data
1927 1927 elif t == 'u':
1928 1928 return util.buffer(data, 1)
1929 1929
1930 1930 try:
1931 1931 compressor = self._decompressors[t]
1932 1932 except KeyError:
1933 1933 try:
1934 1934 engine = util.compengines.forrevlogheader(t)
1935 1935 compressor = engine.revlogcompressor()
1936 1936 self._decompressors[t] = compressor
1937 1937 except KeyError:
1938 1938 raise error.RevlogError(_('unknown compression type %r') % t)
1939 1939
1940 1940 return compressor.decompress(data)
1941 1941
1942 1942 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1943 1943 cachedelta, ifh, dfh, alwayscache=False,
1944 1944 deltacomputer=None):
1945 1945 """internal function to add revisions to the log
1946 1946
1947 1947 see addrevision for argument descriptions.
1948 1948
1949 1949 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1950 1950
1951 1951 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1952 1952 be used.
1953 1953
1954 1954 invariants:
1955 1955 - rawtext is optional (can be None); if not set, cachedelta must be set.
1956 1956 if both are set, they must correspond to each other.
1957 1957 """
1958 1958 if node == nullid:
1959 1959 raise error.RevlogError(_("%s: attempt to add null revision") %
1960 1960 self.indexfile)
1961 1961 if node == wdirid or node in wdirfilenodeids:
1962 1962 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1963 1963 self.indexfile)
1964 1964
1965 1965 if self._inline:
1966 1966 fh = ifh
1967 1967 else:
1968 1968 fh = dfh
1969 1969
1970 1970 btext = [rawtext]
1971 1971
1972 1972 curr = len(self)
1973 1973 prev = curr - 1
1974 1974 offset = self.end(prev)
1975 1975 p1r, p2r = self.rev(p1), self.rev(p2)
1976 1976
1977 1977 # full versions are inserted when the needed deltas
1978 1978 # become comparable to the uncompressed text
1979 1979 if rawtext is None:
1980 1980 # need rawtext size, before changed by flag processors, which is
1981 1981 # the non-raw size. use revlog explicitly to avoid filelog's extra
1982 1982 # logic that might remove metadata size.
1983 1983 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1984 1984 cachedelta[1])
1985 1985 else:
1986 1986 textlen = len(rawtext)
1987 1987
1988 1988 if deltacomputer is None:
1989 1989 deltacomputer = deltautil.deltacomputer(self)
1990 1990
1991 1991 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1992 1992
1993 1993 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1994 1994
1995 1995 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1996 1996 deltainfo.base, link, p1r, p2r, node)
1997 1997 self.index.append(e)
1998 1998 self.nodemap[node] = curr
1999 1999
2000 2000 entry = self._io.packentry(e, self.node, self.version, curr)
2001 2001 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2002 2002 link, offset)
2003 2003
2004 2004 rawtext = btext[0]
2005 2005
2006 2006 if alwayscache and rawtext is None:
2007 2007 rawtext = deltacomputer.buildtext(revinfo, fh)
2008 2008
2009 2009 if type(rawtext) == bytes: # only accept immutable objects
2010 2010 self._cache = (node, curr, rawtext)
2011 2011 self._chainbasecache[curr] = deltainfo.chainbase
2012 2012 return node
2013 2013
2014 2014 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2015 2015 # Files opened in a+ mode have inconsistent behavior on various
2016 2016 # platforms. Windows requires that a file positioning call be made
2017 2017 # when the file handle transitions between reads and writes. See
2018 2018 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2019 2019 # platforms, Python or the platform itself can be buggy. Some versions
2020 2020 # of Solaris have been observed to not append at the end of the file
2021 2021 # if the file was seeked to before the end. See issue4943 for more.
2022 2022 #
2023 2023 # We work around this issue by inserting a seek() before writing.
2024 2024 # Note: This is likely not necessary on Python 3.
2025 2025 ifh.seek(0, os.SEEK_END)
2026 2026 if dfh:
2027 2027 dfh.seek(0, os.SEEK_END)
2028 2028
2029 2029 curr = len(self) - 1
2030 2030 if not self._inline:
2031 2031 transaction.add(self.datafile, offset)
2032 2032 transaction.add(self.indexfile, curr * len(entry))
2033 2033 if data[0]:
2034 2034 dfh.write(data[0])
2035 2035 dfh.write(data[1])
2036 2036 ifh.write(entry)
2037 2037 else:
2038 2038 offset += curr * self._io.size
2039 2039 transaction.add(self.indexfile, offset, curr)
2040 2040 ifh.write(entry)
2041 2041 ifh.write(data[0])
2042 2042 ifh.write(data[1])
2043 2043 self._enforceinlinesize(transaction, ifh)
2044 2044
2045 2045 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2046 2046 """
2047 2047 add a delta group
2048 2048
2049 2049 given a set of deltas, add them to the revision log. the
2050 2050 first delta is against its parent, which should be in our
2051 2051 log, the rest are against the previous delta.
2052 2052
2053 2053 If ``addrevisioncb`` is defined, it will be called with arguments of
2054 2054 this revlog and the node that was added.
2055 2055 """
2056 2056
2057 2057 nodes = []
2058 2058
2059 2059 r = len(self)
2060 2060 end = 0
2061 2061 if r:
2062 2062 end = self.end(r - 1)
2063 2063 ifh = self._indexfp("a+")
2064 2064 isize = r * self._io.size
2065 2065 if self._inline:
2066 2066 transaction.add(self.indexfile, end + isize, r)
2067 2067 dfh = None
2068 2068 else:
2069 2069 transaction.add(self.indexfile, isize, r)
2070 2070 transaction.add(self.datafile, end)
2071 2071 dfh = self._datafp("a+")
2072 2072 def flush():
2073 2073 if dfh:
2074 2074 dfh.flush()
2075 2075 ifh.flush()
2076 2076 try:
2077 2077 deltacomputer = deltautil.deltacomputer(self)
2078 2078 # loop through our set of deltas
2079 2079 for data in deltas:
2080 2080 node, p1, p2, linknode, deltabase, delta, flags = data
2081 2081 link = linkmapper(linknode)
2082 2082 flags = flags or REVIDX_DEFAULT_FLAGS
2083 2083
2084 2084 nodes.append(node)
2085 2085
2086 2086 if node in self.nodemap:
2087 2087 # this can happen if two branches make the same change
2088 2088 continue
2089 2089
2090 2090 for p in (p1, p2):
2091 2091 if p not in self.nodemap:
2092 2092 raise error.LookupError(p, self.indexfile,
2093 2093 _('unknown parent'))
2094 2094
2095 2095 if deltabase not in self.nodemap:
2096 2096 raise error.LookupError(deltabase, self.indexfile,
2097 2097 _('unknown delta base'))
2098 2098
2099 2099 baserev = self.rev(deltabase)
2100 2100
2101 2101 if baserev != nullrev and self.iscensored(baserev):
2102 2102 # if base is censored, delta must be full replacement in a
2103 2103 # single patch operation
2104 2104 hlen = struct.calcsize(">lll")
2105 2105 oldlen = self.rawsize(baserev)
2106 2106 newlen = len(delta) - hlen
2107 2107 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2108 2108 raise error.CensoredBaseError(self.indexfile,
2109 2109 self.node(baserev))
2110 2110
2111 2111 if not flags and self._peek_iscensored(baserev, delta, flush):
2112 2112 flags |= REVIDX_ISCENSORED
2113 2113
2114 2114 # We assume consumers of addrevisioncb will want to retrieve
2115 2115 # the added revision, which will require a call to
2116 2116 # revision(). revision() will fast path if there is a cache
2117 2117 # hit. So, we tell _addrevision() to always cache in this case.
2118 2118 # We're only using addgroup() in the context of changegroup
2119 2119 # generation so the revision data can always be handled as raw
2120 2120 # by the flagprocessor.
2121 2121 self._addrevision(node, None, transaction, link,
2122 2122 p1, p2, flags, (baserev, delta),
2123 2123 ifh, dfh,
2124 2124 alwayscache=bool(addrevisioncb),
2125 2125 deltacomputer=deltacomputer)
2126 2126
2127 2127 if addrevisioncb:
2128 2128 addrevisioncb(self, node)
2129 2129
2130 2130 if not dfh and not self._inline:
2131 2131 # addrevision switched from inline to conventional
2132 2132 # reopen the index
2133 2133 ifh.close()
2134 2134 dfh = self._datafp("a+")
2135 2135 ifh = self._indexfp("a+")
2136 2136 finally:
2137 2137 if dfh:
2138 2138 dfh.close()
2139 2139 ifh.close()
2140 2140
2141 2141 return nodes
2142 2142
2143 2143 def iscensored(self, rev):
2144 2144 """Check if a file revision is censored."""
2145 2145 if not self._censorable:
2146 2146 return False
2147 2147
2148 2148 return self.flags(rev) & REVIDX_ISCENSORED
2149 2149
2150 2150 def _peek_iscensored(self, baserev, delta, flush):
2151 2151 """Quickly check if a delta produces a censored revision."""
2152 2152 if not self._censorable:
2153 2153 return False
2154 2154
2155 2155 # Fragile heuristic: unless new file meta keys are added alphabetically
2156 2156 # preceding "censored", all censored revisions are prefixed by
2157 2157 # "\1\ncensored:". A delta producing such a censored revision must be a
2158 2158 # full-replacement delta, so we inspect the first and only patch in the
2159 2159 # delta for this prefix.
2160 2160 hlen = struct.calcsize(">lll")
2161 2161 if len(delta) <= hlen:
2162 2162 return False
2163 2163
2164 2164 oldlen = self.rawsize(baserev)
2165 2165 newlen = len(delta) - hlen
2166 2166 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2167 2167 return False
2168 2168
2169 2169 add = "\1\ncensored:"
2170 2170 addlen = len(add)
2171 2171 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2172 2172
2173 2173 def getstrippoint(self, minlink):
2174 2174 """find the minimum rev that must be stripped to strip the linkrev
2175 2175
2176 2176 Returns a tuple containing the minimum rev and a set of all revs that
2177 2177 have linkrevs that will be broken by this strip.
2178 2178 """
2179 2179 brokenrevs = set()
2180 2180 strippoint = len(self)
2181 2181
2182 2182 heads = {}
2183 2183 futurelargelinkrevs = set()
2184 2184 for head in self.headrevs():
2185 2185 headlinkrev = self.linkrev(head)
2186 2186 heads[head] = headlinkrev
2187 2187 if headlinkrev >= minlink:
2188 2188 futurelargelinkrevs.add(headlinkrev)
2189 2189
2190 2190 # This algorithm involves walking down the rev graph, starting at the
2191 2191 # heads. Since the revs are topologically sorted according to linkrev,
2192 2192 # once all head linkrevs are below the minlink, we know there are
2193 2193 # no more revs that could have a linkrev greater than minlink.
2194 2194 # So we can stop walking.
2195 2195 while futurelargelinkrevs:
2196 2196 strippoint -= 1
2197 2197 linkrev = heads.pop(strippoint)
2198 2198
2199 2199 if linkrev < minlink:
2200 2200 brokenrevs.add(strippoint)
2201 2201 else:
2202 2202 futurelargelinkrevs.remove(linkrev)
2203 2203
2204 2204 for p in self.parentrevs(strippoint):
2205 2205 if p != nullrev:
2206 2206 plinkrev = self.linkrev(p)
2207 2207 heads[p] = plinkrev
2208 2208 if plinkrev >= minlink:
2209 2209 futurelargelinkrevs.add(plinkrev)
2210 2210
2211 2211 return strippoint, brokenrevs
2212 2212
2213 2213 def strip(self, minlink, transaction):
2214 2214 """truncate the revlog on the first revision with a linkrev >= minlink
2215 2215
2216 2216 This function is called when we're stripping revision minlink and
2217 2217 its descendants from the repository.
2218 2218
2219 2219 We have to remove all revisions with linkrev >= minlink, because
2220 2220 the equivalent changelog revisions will be renumbered after the
2221 2221 strip.
2222 2222
2223 2223 So we truncate the revlog on the first of these revisions, and
2224 2224 trust that the caller has saved the revisions that shouldn't be
2225 2225 removed and that it'll re-add them after this truncation.
2226 2226 """
2227 2227 if len(self) == 0:
2228 2228 return
2229 2229
2230 2230 rev, _ = self.getstrippoint(minlink)
2231 2231 if rev == len(self):
2232 2232 return
2233 2233
2234 2234 # first truncate the files on disk
2235 2235 end = self.start(rev)
2236 2236 if not self._inline:
2237 2237 transaction.add(self.datafile, end)
2238 2238 end = rev * self._io.size
2239 2239 else:
2240 2240 end += rev * self._io.size
2241 2241
2242 2242 transaction.add(self.indexfile, end)
2243 2243
2244 2244 # then reset internal state in memory to forget those revisions
2245 2245 self._cache = None
2246 2246 self._chaininfocache = {}
2247 2247 self._chunkclear()
2248 2248 for x in pycompat.xrange(rev, len(self)):
2249 2249 del self.nodemap[self.node(x)]
2250 2250
2251 2251 del self.index[rev:-1]
2252 2252 self._nodepos = None
2253 2253
2254 2254 def checksize(self):
2255 2255 expected = 0
2256 2256 if len(self):
2257 2257 expected = max(0, self.end(len(self) - 1))
2258 2258
2259 2259 try:
2260 2260 with self._datafp() as f:
2261 2261 f.seek(0, 2)
2262 2262 actual = f.tell()
2263 2263 dd = actual - expected
2264 2264 except IOError as inst:
2265 2265 if inst.errno != errno.ENOENT:
2266 2266 raise
2267 2267 dd = 0
2268 2268
2269 2269 try:
2270 2270 f = self.opener(self.indexfile)
2271 2271 f.seek(0, 2)
2272 2272 actual = f.tell()
2273 2273 f.close()
2274 2274 s = self._io.size
2275 2275 i = max(0, actual // s)
2276 2276 di = actual - (i * s)
2277 2277 if self._inline:
2278 2278 databytes = 0
2279 2279 for r in self:
2280 2280 databytes += max(0, self.length(r))
2281 2281 dd = 0
2282 2282 di = actual - len(self) * s - databytes
2283 2283 except IOError as inst:
2284 2284 if inst.errno != errno.ENOENT:
2285 2285 raise
2286 2286 di = 0
2287 2287
2288 2288 return (dd, di)
2289 2289
2290 2290 def files(self):
2291 2291 res = [self.indexfile]
2292 2292 if not self._inline:
2293 2293 res.append(self.datafile)
2294 2294 return res
2295 2295
2296 2296 def emitrevisiondeltas(self, requests):
2297 2297 frev = self.rev
2298 2298
2299 2299 prevrev = None
2300 2300 for request in requests:
2301 2301 node = request.node
2302 2302 rev = frev(node)
2303 2303
2304 2304 if prevrev is None:
2305 2305 prevrev = self.index[rev][5]
2306 2306
2307 2307 # Requesting a full revision.
2308 2308 if request.basenode == nullid:
2309 2309 baserev = nullrev
2310 2310 # Requesting an explicit revision.
2311 2311 elif request.basenode is not None:
2312 2312 baserev = frev(request.basenode)
2313 2313 # Allowing us to choose.
2314 2314 else:
2315 2315 p1rev, p2rev = self.parentrevs(rev)
2316 2316 deltaparentrev = self.deltaparent(rev)
2317 2317
2318 2318 # Avoid sending full revisions when delta parent is null. Pick
2319 2319 # prev in that case. It's tempting to pick p1 in this case, as
2320 2320 # p1 will be smaller in the common case. However, computing a
2321 2321 # delta against p1 may require resolving the raw text of p1,
2322 2322 # which could be expensive. The revlog caches should have prev
2323 2323 # cached, meaning less CPU for delta generation. There is
2324 2324 # likely room to add a flag and/or config option to control this
2325 2325 # behavior.
2326 2326 if deltaparentrev == nullrev and self._storedeltachains:
2327 2327 baserev = prevrev
2328 2328
2329 2329 # Revlog is configured to use full snapshot for a reason.
2330 2330 # Stick to full snapshot.
2331 2331 elif deltaparentrev == nullrev:
2332 2332 baserev = nullrev
2333 2333
2334 2334 # Pick previous when we can't be sure the base is available
2335 2335 # on consumer.
2336 2336 elif deltaparentrev not in (p1rev, p2rev, prevrev):
2337 2337 baserev = prevrev
2338 2338 else:
2339 2339 baserev = deltaparentrev
2340 2340
2341 2341 if baserev != nullrev and not self.candelta(baserev, rev):
2342 2342 baserev = nullrev
2343 2343
2344 2344 revision = None
2345 2345 delta = None
2346 2346 baserevisionsize = None
2347 2347
2348 2348 if self.iscensored(baserev) or self.iscensored(rev):
2349 2349 try:
2350 2350 revision = self.revision(node, raw=True)
2351 2351 except error.CensoredNodeError as e:
2352 2352 revision = e.tombstone
2353 2353
2354 2354 if baserev != nullrev:
2355 2355 baserevisionsize = self.rawsize(baserev)
2356 2356
2357 2357 elif baserev == nullrev:
2358 2358 revision = self.revision(node, raw=True)
2359 2359 else:
2360 2360 delta = self.revdiff(baserev, rev)
2361 2361
2362 2362 extraflags = REVIDX_ELLIPSIS if request.ellipsis else 0
2363 2363
2364 2364 yield revlogrevisiondelta(
2365 2365 node=node,
2366 2366 p1node=request.p1node,
2367 2367 p2node=request.p2node,
2368 2368 linknode=request.linknode,
2369 2369 basenode=self.node(baserev),
2370 2370 flags=self.flags(rev) | extraflags,
2371 2371 baserevisionsize=baserevisionsize,
2372 2372 revision=revision,
2373 2373 delta=delta)
2374 2374
2375 2375 prevrev = rev
2376 2376
2377 2377 DELTAREUSEALWAYS = 'always'
2378 2378 DELTAREUSESAMEREVS = 'samerevs'
2379 2379 DELTAREUSENEVER = 'never'
2380 2380
2381 2381 DELTAREUSEFULLADD = 'fulladd'
2382 2382
2383 2383 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2384 2384
2385 2385 def clone(self, tr, destrevlog, addrevisioncb=None,
2386 2386 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2387 2387 """Copy this revlog to another, possibly with format changes.
2388 2388
2389 2389 The destination revlog will contain the same revisions and nodes.
2390 2390 However, it may not be bit-for-bit identical due to e.g. delta encoding
2391 2391 differences.
2392 2392
2393 2393 The ``deltareuse`` argument control how deltas from the existing revlog
2394 2394 are preserved in the destination revlog. The argument can have the
2395 2395 following values:
2396 2396
2397 2397 DELTAREUSEALWAYS
2398 2398 Deltas will always be reused (if possible), even if the destination
2399 2399 revlog would not select the same revisions for the delta. This is the
2400 2400 fastest mode of operation.
2401 2401 DELTAREUSESAMEREVS
2402 2402 Deltas will be reused if the destination revlog would pick the same
2403 2403 revisions for the delta. This mode strikes a balance between speed
2404 2404 and optimization.
2405 2405 DELTAREUSENEVER
2406 2406 Deltas will never be reused. This is the slowest mode of execution.
2407 2407 This mode can be used to recompute deltas (e.g. if the diff/delta
2408 2408 algorithm changes).
2409 2409
2410 2410 Delta computation can be slow, so the choice of delta reuse policy can
2411 2411 significantly affect run time.
2412 2412
2413 2413 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2414 2414 two extremes. Deltas will be reused if they are appropriate. But if the
2415 2415 delta could choose a better revision, it will do so. This means if you
2416 2416 are converting a non-generaldelta revlog to a generaldelta revlog,
2417 2417 deltas will be recomputed if the delta's parent isn't a parent of the
2418 2418 revision.
2419 2419
2420 2420 In addition to the delta policy, the ``deltabothparents`` argument
2421 2421 controls whether to compute deltas against both parents for merges.
2422 2422 By default, the current default is used.
2423 2423 """
2424 2424 if deltareuse not in self.DELTAREUSEALL:
2425 2425 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2426 2426
2427 2427 if len(destrevlog):
2428 2428 raise ValueError(_('destination revlog is not empty'))
2429 2429
2430 2430 if getattr(self, 'filteredrevs', None):
2431 2431 raise ValueError(_('source revlog has filtered revisions'))
2432 2432 if getattr(destrevlog, 'filteredrevs', None):
2433 2433 raise ValueError(_('destination revlog has filtered revisions'))
2434 2434
2435 2435 # lazydeltabase controls whether to reuse a cached delta, if possible.
2436 2436 oldlazydeltabase = destrevlog._lazydeltabase
2437 2437 oldamd = destrevlog._deltabothparents
2438 2438
2439 2439 try:
2440 2440 if deltareuse == self.DELTAREUSEALWAYS:
2441 2441 destrevlog._lazydeltabase = True
2442 2442 elif deltareuse == self.DELTAREUSESAMEREVS:
2443 2443 destrevlog._lazydeltabase = False
2444 2444
2445 2445 destrevlog._deltabothparents = deltabothparents or oldamd
2446 2446
2447 2447 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2448 2448 self.DELTAREUSESAMEREVS)
2449 2449
2450 2450 deltacomputer = deltautil.deltacomputer(destrevlog)
2451 2451 index = self.index
2452 2452 for rev in self:
2453 2453 entry = index[rev]
2454 2454
2455 2455 # Some classes override linkrev to take filtered revs into
2456 2456 # account. Use raw entry from index.
2457 2457 flags = entry[0] & 0xffff
2458 2458 linkrev = entry[4]
2459 2459 p1 = index[entry[5]][7]
2460 2460 p2 = index[entry[6]][7]
2461 2461 node = entry[7]
2462 2462
2463 2463 # (Possibly) reuse the delta from the revlog if allowed and
2464 2464 # the revlog chunk is a delta.
2465 2465 cachedelta = None
2466 2466 rawtext = None
2467 2467 if populatecachedelta:
2468 2468 dp = self.deltaparent(rev)
2469 2469 if dp != nullrev:
2470 2470 cachedelta = (dp, bytes(self._chunk(rev)))
2471 2471
2472 2472 if not cachedelta:
2473 2473 rawtext = self.revision(rev, raw=True)
2474 2474
2475 2475
2476 2476 if deltareuse == self.DELTAREUSEFULLADD:
2477 2477 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2478 2478 cachedelta=cachedelta,
2479 2479 node=node, flags=flags,
2480 2480 deltacomputer=deltacomputer)
2481 2481 else:
2482 2482 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2483 2483 checkambig=False)
2484 2484 dfh = None
2485 2485 if not destrevlog._inline:
2486 2486 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2487 2487 try:
2488 2488 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2489 2489 p2, flags, cachedelta, ifh, dfh,
2490 2490 deltacomputer=deltacomputer)
2491 2491 finally:
2492 2492 if dfh:
2493 2493 dfh.close()
2494 2494 ifh.close()
2495 2495
2496 2496 if addrevisioncb:
2497 2497 addrevisioncb(self, rev, node)
2498 2498 finally:
2499 2499 destrevlog._lazydeltabase = oldlazydeltabase
2500 2500 destrevlog._deltabothparents = oldamd
2501 2501
2502 2502 def censorrevision(self, node, tombstone=b''):
2503 2503 if (self.version & 0xFFFF) == REVLOGV0:
2504 2504 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2505 2505 self.version)
2506 2506
2507 2507 rev = self.rev(node)
2508 2508 tombstone = packmeta({b'censored': tombstone}, b'')
2509 2509
2510 2510 if len(tombstone) > self.rawsize(rev):
2511 2511 raise error.Abort(_('censor tombstone must be no longer than '
2512 2512 'censored data'))
2513 2513
2514 2514 # Using two files instead of one makes it easy to rewrite entry-by-entry
2515 2515 idxread = self.opener(self.indexfile, 'r')
2516 2516 idxwrite = self.opener(self.indexfile, 'wb', atomictemp=True)
2517 2517 if self.version & FLAG_INLINE_DATA:
2518 2518 dataread, datawrite = idxread, idxwrite
2519 2519 else:
2520 2520 dataread = self.opener(self.datafile, 'r')
2521 2521 datawrite = self.opener(self.datafile, 'wb', atomictemp=True)
2522 2522
2523 2523 # Copy all revlog data up to the entry to be censored.
2524 2524 offset = self.start(rev)
2525 2525
2526 2526 for chunk in util.filechunkiter(idxread, limit=rev * self._io.size):
2527 2527 idxwrite.write(chunk)
2528 2528 for chunk in util.filechunkiter(dataread, limit=offset):
2529 2529 datawrite.write(chunk)
2530 2530
2531 2531 def rewriteindex(r, newoffs, newdata=None):
2532 2532 """Rewrite the index entry with a new data offset and new data.
2533 2533
2534 2534 The newdata argument, if given, is a tuple of three positive
2535 2535 integers: (new compressed, new uncompressed, added flag bits).
2536 2536 """
2537 2537 offlags, comp, uncomp, base, link, p1, p2, nodeid = self.index[r]
2538 2538 flags = gettype(offlags)
2539 2539 if newdata:
2540 2540 comp, uncomp, nflags = newdata
2541 2541 flags |= nflags
2542 2542 offlags = offset_type(newoffs, flags)
2543 2543 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
2544 2544 idxwrite.write(self._io.packentry(e, None, self.version, r))
2545 2545 idxread.seek(self._io.size, 1)
2546 2546
2547 2547 def rewrite(r, offs, data, nflags=REVIDX_DEFAULT_FLAGS):
2548 2548 """Write the given fulltext with the given data offset.
2549 2549
2550 2550 Returns:
2551 2551 The integer number of data bytes written, for tracking data
2552 2552 offsets.
2553 2553 """
2554 2554 flag, compdata = self.compress(data)
2555 2555 newcomp = len(flag) + len(compdata)
2556 2556 rewriteindex(r, offs, (newcomp, len(data), nflags))
2557 2557 datawrite.write(flag)
2558 2558 datawrite.write(compdata)
2559 2559 dataread.seek(self.length(r), 1)
2560 2560 return newcomp
2561 2561
2562 2562 # Rewrite censored entry with (padded) tombstone data.
2563 2563 pad = ' ' * (self.rawsize(rev) - len(tombstone))
2564 2564 offset += rewrite(rev, offset, tombstone + pad, REVIDX_ISCENSORED)
2565 2565
2566 2566 # Rewrite all following filelog revisions fixing up offsets and deltas.
2567 2567 for srev in pycompat.xrange(rev + 1, len(self)):
2568 2568 if rev in self.parentrevs(srev):
2569 2569 # Immediate children of censored node must be re-added as
2570 2570 # fulltext.
2571 2571 try:
2572 2572 revdata = self.revision(srev)
2573 2573 except error.CensoredNodeError as e:
2574 2574 revdata = e.tombstone
2575 2575 dlen = rewrite(srev, offset, revdata)
2576 2576 else:
2577 2577 # Copy any other revision data verbatim after fixing up the
2578 2578 # offset.
2579 2579 rewriteindex(srev, offset)
2580 2580 dlen = self.length(srev)
2581 2581 for chunk in util.filechunkiter(dataread, limit=dlen):
2582 2582 datawrite.write(chunk)
2583 2583 offset += dlen
2584 2584
2585 2585 idxread.close()
2586 2586 idxwrite.close()
2587 2587 if dataread is not idxread:
2588 2588 dataread.close()
2589 2589 datawrite.close()
2590 2590
2591 2591 def verifyintegrity(self, state):
2592 2592 """Verifies the integrity of the revlog.
2593 2593
2594 2594 Yields ``revlogproblem`` instances describing problems that are
2595 2595 found.
2596 2596 """
2597 2597 dd, di = self.checksize()
2598 2598 if dd:
2599 2599 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2600 2600 if di:
2601 2601 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2602 2602
2603 if self.version != REVLOGV0:
2604 if not state['revlogv1']:
2605 yield revlogproblem(warning=_("warning: `%s' uses revlog "
2606 "format 1") % self.indexfile)
2607 elif state['revlogv1']:
2608 yield revlogproblem(warning=_("warning: `%s' uses revlog "
2609 "format 0") % self.indexfile)
2603 version = self.version & 0xFFFF
2604
2605 # The verifier tells us what version revlog we should be.
2606 if version != state['expectedversion']:
2607 yield revlogproblem(
2608 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2609 (self.indexfile, version, state['expectedversion']))
@@ -1,505 +1,506
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import (
14 14 nullid,
15 15 short,
16 16 )
17 17
18 18 from . import (
19 19 error,
20 20 pycompat,
21 21 revlog,
22 22 scmutil,
23 23 util,
24 24 )
25 25
26 26 def verify(repo):
27 27 with repo.lock():
28 28 return verifier(repo).verify()
29 29
30 30 def _normpath(f):
31 31 # under hg < 2.4, convert didn't sanitize paths properly, so a
32 32 # converted repo may contain repeated slashes
33 33 while '//' in f:
34 34 f = f.replace('//', '/')
35 35 return f
36 36
37 37 class verifier(object):
38 38 # The match argument is always None in hg core, but e.g. the narrowhg
39 39 # extension will pass in a matcher here.
40 40 def __init__(self, repo, match=None):
41 41 self.repo = repo.unfiltered()
42 42 self.ui = repo.ui
43 43 self.match = match or scmutil.matchall(repo)
44 44 self.badrevs = set()
45 45 self.errors = 0
46 46 self.warnings = 0
47 47 self.havecl = len(repo.changelog) > 0
48 48 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
49 49 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
50 50 self.lrugetctx = util.lrucachefunc(repo.__getitem__)
51 51 self.refersmf = False
52 52 self.fncachewarned = False
53 53 # developer config: verify.skipflags
54 54 self.skipflags = repo.ui.configint('verify', 'skipflags')
55 55 self.warnorphanstorefiles = True
56 56
57 57 def warn(self, msg):
58 58 self.ui.warn(msg + "\n")
59 59 self.warnings += 1
60 60
61 61 def err(self, linkrev, msg, filename=None):
62 62 if linkrev is not None:
63 63 self.badrevs.add(linkrev)
64 64 linkrev = "%d" % linkrev
65 65 else:
66 66 linkrev = '?'
67 67 msg = "%s: %s" % (linkrev, msg)
68 68 if filename:
69 69 msg = "%s@%s" % (filename, msg)
70 70 self.ui.warn(" " + msg + "\n")
71 71 self.errors += 1
72 72
73 73 def exc(self, linkrev, msg, inst, filename=None):
74 74 fmsg = pycompat.bytestr(inst)
75 75 if not fmsg:
76 76 fmsg = pycompat.byterepr(inst)
77 77 self.err(linkrev, "%s: %s" % (msg, fmsg), filename)
78 78
79 79 def checklog(self, obj, name, linkrev):
80 80 if not len(obj) and (self.havecl or self.havemf):
81 81 self.err(linkrev, _("empty or missing %s") % name)
82 82 return
83 83
84 84 d = obj.checksize()
85 85 if d[0]:
86 86 self.err(None, _("data length off by %d bytes") % d[0], name)
87 87 if d[1]:
88 88 self.err(None, _("index contains %d extra bytes") % d[1], name)
89 89
90 90 if obj.version != revlog.REVLOGV0:
91 91 if not self.revlogv1:
92 92 self.warn(_("warning: `%s' uses revlog format 1") % name)
93 93 elif self.revlogv1:
94 94 self.warn(_("warning: `%s' uses revlog format 0") % name)
95 95
96 96 def checkentry(self, obj, i, node, seen, linkrevs, f):
97 97 lr = obj.linkrev(obj.rev(node))
98 98 if lr < 0 or (self.havecl and lr not in linkrevs):
99 99 if lr < 0 or lr >= len(self.repo.changelog):
100 100 msg = _("rev %d points to nonexistent changeset %d")
101 101 else:
102 102 msg = _("rev %d points to unexpected changeset %d")
103 103 self.err(None, msg % (i, lr), f)
104 104 if linkrevs:
105 105 if f and len(linkrevs) > 1:
106 106 try:
107 107 # attempt to filter down to real linkrevs
108 108 linkrevs = [l for l in linkrevs
109 109 if self.lrugetctx(l)[f].filenode() == node]
110 110 except Exception:
111 111 pass
112 112 self.warn(_(" (expected %s)") % " ".join
113 113 (map(pycompat.bytestr, linkrevs)))
114 114 lr = None # can't be trusted
115 115
116 116 try:
117 117 p1, p2 = obj.parents(node)
118 118 if p1 not in seen and p1 != nullid:
119 119 self.err(lr, _("unknown parent 1 %s of %s") %
120 120 (short(p1), short(node)), f)
121 121 if p2 not in seen and p2 != nullid:
122 122 self.err(lr, _("unknown parent 2 %s of %s") %
123 123 (short(p2), short(node)), f)
124 124 except Exception as inst:
125 125 self.exc(lr, _("checking parents of %s") % short(node), inst, f)
126 126
127 127 if node in seen:
128 128 self.err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
129 129 seen[node] = i
130 130 return lr
131 131
132 132 def verify(self):
133 133 repo = self.repo
134 134
135 135 ui = repo.ui
136 136
137 137 if not repo.url().startswith('file:'):
138 138 raise error.Abort(_("cannot verify bundle or remote repos"))
139 139
140 140 if os.path.exists(repo.sjoin("journal")):
141 141 ui.warn(_("abandoned transaction found - run hg recover\n"))
142 142
143 143 if ui.verbose or not self.revlogv1:
144 144 ui.status(_("repository uses revlog format %d\n") %
145 145 (self.revlogv1 and 1 or 0))
146 146
147 147 mflinkrevs, filelinkrevs = self._verifychangelog()
148 148
149 149 filenodes = self._verifymanifest(mflinkrevs)
150 150 del mflinkrevs
151 151
152 152 self._crosscheckfiles(filelinkrevs, filenodes)
153 153
154 154 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
155 155
156 156 ui.status(_("checked %d changesets with %d changes to %d files\n") %
157 157 (len(repo.changelog), filerevisions, totalfiles))
158 158 if self.warnings:
159 159 ui.warn(_("%d warnings encountered!\n") % self.warnings)
160 160 if self.fncachewarned:
161 161 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
162 162 'corrupt fncache\n'))
163 163 if self.errors:
164 164 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
165 165 if self.badrevs:
166 166 ui.warn(_("(first damaged changeset appears to be %d)\n")
167 167 % min(self.badrevs))
168 168 return 1
169 169
170 170 def _verifychangelog(self):
171 171 ui = self.ui
172 172 repo = self.repo
173 173 match = self.match
174 174 cl = repo.changelog
175 175
176 176 ui.status(_("checking changesets\n"))
177 177 mflinkrevs = {}
178 178 filelinkrevs = {}
179 179 seen = {}
180 180 self.checklog(cl, "changelog", 0)
181 181 progress = ui.makeprogress(_('checking'), unit=_('changesets'),
182 182 total=len(repo))
183 183 for i in repo:
184 184 progress.update(i)
185 185 n = cl.node(i)
186 186 self.checkentry(cl, i, n, seen, [i], "changelog")
187 187
188 188 try:
189 189 changes = cl.read(n)
190 190 if changes[0] != nullid:
191 191 mflinkrevs.setdefault(changes[0], []).append(i)
192 192 self.refersmf = True
193 193 for f in changes[3]:
194 194 if match(f):
195 195 filelinkrevs.setdefault(_normpath(f), []).append(i)
196 196 except Exception as inst:
197 197 self.refersmf = True
198 198 self.exc(i, _("unpacking changeset %s") % short(n), inst)
199 199 progress.complete()
200 200 return mflinkrevs, filelinkrevs
201 201
202 202 def _verifymanifest(self, mflinkrevs, dir="", storefiles=None,
203 203 subdirprogress=None):
204 204 repo = self.repo
205 205 ui = self.ui
206 206 match = self.match
207 207 mfl = self.repo.manifestlog
208 208 mf = mfl.getstorage(dir)
209 209
210 210 if not dir:
211 211 self.ui.status(_("checking manifests\n"))
212 212
213 213 filenodes = {}
214 214 subdirnodes = {}
215 215 seen = {}
216 216 label = "manifest"
217 217 if dir:
218 218 label = dir
219 219 revlogfiles = mf.files()
220 220 storefiles.difference_update(revlogfiles)
221 221 if subdirprogress: # should be true since we're in a subdirectory
222 222 subdirprogress.increment()
223 223 if self.refersmf:
224 224 # Do not check manifest if there are only changelog entries with
225 225 # null manifests.
226 226 self.checklog(mf, label, 0)
227 227 progress = ui.makeprogress(_('checking'), unit=_('manifests'),
228 228 total=len(mf))
229 229 for i in mf:
230 230 if not dir:
231 231 progress.update(i)
232 232 n = mf.node(i)
233 233 lr = self.checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
234 234 if n in mflinkrevs:
235 235 del mflinkrevs[n]
236 236 elif dir:
237 237 self.err(lr, _("%s not in parent-directory manifest") %
238 238 short(n), label)
239 239 else:
240 240 self.err(lr, _("%s not in changesets") % short(n), label)
241 241
242 242 try:
243 243 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
244 244 for f, fn, fl in mfdelta.iterentries():
245 245 if not f:
246 246 self.err(lr, _("entry without name in manifest"))
247 247 elif f == "/dev/null": # ignore this in very old repos
248 248 continue
249 249 fullpath = dir + _normpath(f)
250 250 if fl == 't':
251 251 if not match.visitdir(fullpath):
252 252 continue
253 253 subdirnodes.setdefault(fullpath + '/', {}).setdefault(
254 254 fn, []).append(lr)
255 255 else:
256 256 if not match(fullpath):
257 257 continue
258 258 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
259 259 except Exception as inst:
260 260 self.exc(lr, _("reading delta %s") % short(n), inst, label)
261 261 if not dir:
262 262 progress.complete()
263 263
264 264 if self.havemf:
265 265 for c, m in sorted([(c, m) for m in mflinkrevs
266 266 for c in mflinkrevs[m]]):
267 267 if dir:
268 268 self.err(c, _("parent-directory manifest refers to unknown "
269 269 "revision %s") % short(m), label)
270 270 else:
271 271 self.err(c, _("changeset refers to unknown revision %s") %
272 272 short(m), label)
273 273
274 274 if not dir and subdirnodes:
275 275 self.ui.status(_("checking directory manifests\n"))
276 276 storefiles = set()
277 277 subdirs = set()
278 278 revlogv1 = self.revlogv1
279 279 for f, f2, size in repo.store.datafiles():
280 280 if not f:
281 281 self.err(None, _("cannot decode filename '%s'") % f2)
282 282 elif (size > 0 or not revlogv1) and f.startswith('meta/'):
283 283 storefiles.add(_normpath(f))
284 284 subdirs.add(os.path.dirname(f))
285 285 subdirprogress = ui.makeprogress(_('checking'), unit=_('manifests'),
286 286 total=len(subdirs))
287 287
288 288 for subdir, linkrevs in subdirnodes.iteritems():
289 289 subdirfilenodes = self._verifymanifest(linkrevs, subdir, storefiles,
290 290 subdirprogress)
291 291 for f, onefilenodes in subdirfilenodes.iteritems():
292 292 filenodes.setdefault(f, {}).update(onefilenodes)
293 293
294 294 if not dir and subdirnodes:
295 295 subdirprogress.complete()
296 296 if self.warnorphanstorefiles:
297 297 for f in sorted(storefiles):
298 298 self.warn(_("warning: orphan data file '%s'") % f)
299 299
300 300 return filenodes
301 301
302 302 def _crosscheckfiles(self, filelinkrevs, filenodes):
303 303 repo = self.repo
304 304 ui = self.ui
305 305 ui.status(_("crosschecking files in changesets and manifests\n"))
306 306
307 307 total = len(filelinkrevs) + len(filenodes)
308 308 progress = ui.makeprogress(_('crosschecking'), total=total)
309 309 if self.havemf:
310 310 for f in sorted(filelinkrevs):
311 311 progress.increment()
312 312 if f not in filenodes:
313 313 lr = filelinkrevs[f][0]
314 314 self.err(lr, _("in changeset but not in manifest"), f)
315 315
316 316 if self.havecl:
317 317 for f in sorted(filenodes):
318 318 progress.increment()
319 319 if f not in filelinkrevs:
320 320 try:
321 321 fl = repo.file(f)
322 322 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
323 323 except Exception:
324 324 lr = None
325 325 self.err(lr, _("in manifest but not in changeset"), f)
326 326
327 327 progress.complete()
328 328
329 329 def _verifyfiles(self, filenodes, filelinkrevs):
330 330 repo = self.repo
331 331 ui = self.ui
332 332 lrugetctx = self.lrugetctx
333 333 revlogv1 = self.revlogv1
334 334 havemf = self.havemf
335 335 ui.status(_("checking files\n"))
336 336
337 337 storefiles = set()
338 338 for f, f2, size in repo.store.datafiles():
339 339 if not f:
340 340 self.err(None, _("cannot decode filename '%s'") % f2)
341 341 elif (size > 0 or not revlogv1) and f.startswith('data/'):
342 342 storefiles.add(_normpath(f))
343 343
344 344 state = {
345 'revlogv1': self.revlogv1,
345 # TODO this assumes revlog storage for changelog.
346 'expectedversion': self.repo.changelog.version & 0xFFFF
346 347 }
347 348
348 349 files = sorted(set(filenodes) | set(filelinkrevs))
349 350 revisions = 0
350 351 progress = ui.makeprogress(_('checking'), unit=_('files'),
351 352 total=len(files))
352 353 for i, f in enumerate(files):
353 354 progress.update(i, item=f)
354 355 try:
355 356 linkrevs = filelinkrevs[f]
356 357 except KeyError:
357 358 # in manifest but not in changelog
358 359 linkrevs = []
359 360
360 361 if linkrevs:
361 362 lr = linkrevs[0]
362 363 else:
363 364 lr = None
364 365
365 366 try:
366 367 fl = repo.file(f)
367 368 except error.StorageError as e:
368 369 self.err(lr, _("broken revlog! (%s)") % e, f)
369 370 continue
370 371
371 372 for ff in fl.files():
372 373 try:
373 374 storefiles.remove(ff)
374 375 except KeyError:
375 376 if self.warnorphanstorefiles:
376 377 self.warn(_(" warning: revlog '%s' not in fncache!") %
377 378 ff)
378 379 self.fncachewarned = True
379 380
380 381 if not len(fl) and (self.havecl or self.havemf):
381 382 self.err(lr, _("empty or missing %s") % f)
382 383 else:
383 384 for problem in fl.verifyintegrity(state):
384 385 if problem.warning:
385 386 self.warn(problem.warning)
386 387 elif problem.error:
387 388 self.err(lr, problem.error, f)
388 389 else:
389 390 raise error.ProgrammingError(
390 391 'problem instance does not set warning or error '
391 392 'attribute: %s' % problem.msg)
392 393
393 394 seen = {}
394 395 rp = None
395 396 for i in fl:
396 397 revisions += 1
397 398 n = fl.node(i)
398 399 lr = self.checkentry(fl, i, n, seen, linkrevs, f)
399 400 if f in filenodes:
400 401 if havemf and n not in filenodes[f]:
401 402 self.err(lr, _("%s not in manifests") % (short(n)), f)
402 403 else:
403 404 del filenodes[f][n]
404 405
405 406 # Verify contents. 4 cases to care about:
406 407 #
407 408 # common: the most common case
408 409 # rename: with a rename
409 410 # meta: file content starts with b'\1\n', the metadata
410 411 # header defined in filelog.py, but without a rename
411 412 # ext: content stored externally
412 413 #
413 414 # More formally, their differences are shown below:
414 415 #
415 416 # | common | rename | meta | ext
416 417 # -------------------------------------------------------
417 418 # flags() | 0 | 0 | 0 | not 0
418 419 # renamed() | False | True | False | ?
419 420 # rawtext[0:2]=='\1\n'| False | True | True | ?
420 421 #
421 422 # "rawtext" means the raw text stored in revlog data, which
422 423 # could be retrieved by "revision(rev, raw=True)". "text"
423 424 # mentioned below is "revision(rev, raw=False)".
424 425 #
425 426 # There are 3 different lengths stored physically:
426 427 # 1. L1: rawsize, stored in revlog index
427 428 # 2. L2: len(rawtext), stored in revlog data
428 429 # 3. L3: len(text), stored in revlog data if flags==0, or
429 430 # possibly somewhere else if flags!=0
430 431 #
431 432 # L1 should be equal to L2. L3 could be different from them.
432 433 # "text" may or may not affect commit hash depending on flag
433 434 # processors (see revlog.addflagprocessor).
434 435 #
435 436 # | common | rename | meta | ext
436 437 # -------------------------------------------------
437 438 # rawsize() | L1 | L1 | L1 | L1
438 439 # size() | L1 | L2-LM | L1(*) | L1 (?)
439 440 # len(rawtext) | L2 | L2 | L2 | L2
440 441 # len(text) | L2 | L2 | L2 | L3
441 442 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
442 443 #
443 444 # LM: length of metadata, depending on rawtext
444 445 # (*): not ideal, see comment in filelog.size
445 446 # (?): could be "- len(meta)" if the resolved content has
446 447 # rename metadata
447 448 #
448 449 # Checks needed to be done:
449 450 # 1. length check: L1 == L2, in all cases.
450 451 # 2. hash check: depending on flag processor, we may need to
451 452 # use either "text" (external), or "rawtext" (in revlog).
452 453 try:
453 454 skipflags = self.skipflags
454 455 if skipflags:
455 456 skipflags &= fl.flags(i)
456 457 if not skipflags:
457 458 fl.read(n) # side effect: read content and do checkhash
458 459 rp = fl.renamed(n)
459 460 # the "L1 == L2" check
460 461 l1 = fl.rawsize(i)
461 462 l2 = len(fl.revision(n, raw=True))
462 463 if l1 != l2:
463 464 self.err(lr, _("unpacked size is %s, %s expected") %
464 465 (l2, l1), f)
465 466 except error.CensoredNodeError:
466 467 # experimental config: censor.policy
467 468 if ui.config("censor", "policy") == "abort":
468 469 self.err(lr, _("censored file data"), f)
469 470 except Exception as inst:
470 471 self.exc(lr, _("unpacking %s") % short(n), inst, f)
471 472
472 473 # check renames
473 474 try:
474 475 if rp:
475 476 if lr is not None and ui.verbose:
476 477 ctx = lrugetctx(lr)
477 478 if not any(rp[0] in pctx for pctx in ctx.parents()):
478 479 self.warn(_("warning: copy source of '%s' not"
479 480 " in parents of %s") % (f, ctx))
480 481 fl2 = repo.file(rp[0])
481 482 if not len(fl2):
482 483 self.err(lr, _("empty or missing copy source "
483 484 "revlog %s:%s") % (rp[0], short(rp[1])), f)
484 485 elif rp[1] == nullid:
485 486 ui.note(_("warning: %s@%s: copy source"
486 487 " revision is nullid %s:%s\n")
487 488 % (f, lr, rp[0], short(rp[1])))
488 489 else:
489 490 fl2.rev(rp[1])
490 491 except Exception as inst:
491 492 self.exc(lr, _("checking rename of %s") % short(n), inst, f)
492 493
493 494 # cross-check
494 495 if f in filenodes:
495 496 fns = [(v, k) for k, v in filenodes[f].iteritems()]
496 497 for lr, node in sorted(fns):
497 498 self.err(lr, _("manifest refers to unknown revision %s") %
498 499 short(node), f)
499 500 progress.complete()
500 501
501 502 if self.warnorphanstorefiles:
502 503 for f in sorted(storefiles):
503 504 self.warn(_("warning: orphan data file '%s'") % f)
504 505
505 506 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now