##// END OF EJS Templates
upgrade: clarify "aggressivemergedelta" handling...
Boris Feld -
r40872:64051af1 default
parent child Browse files
Show More
@@ -1,2597 +1,2597 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullhex,
28 28 nullid,
29 29 nullrev,
30 30 short,
31 31 wdirfilenodeids,
32 32 wdirhex,
33 33 wdirid,
34 34 wdirrev,
35 35 )
36 36 from .i18n import _
37 37 from .revlogutils.constants import (
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 REVIDX_DEFAULT_FLAGS,
41 41 REVIDX_ELLIPSIS,
42 42 REVIDX_EXTSTORED,
43 43 REVIDX_FLAGS_ORDER,
44 44 REVIDX_ISCENSORED,
45 45 REVIDX_KNOWN_FLAGS,
46 46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 47 REVLOGV0,
48 48 REVLOGV1,
49 49 REVLOGV1_FLAGS,
50 50 REVLOGV2,
51 51 REVLOGV2_FLAGS,
52 52 REVLOG_DEFAULT_FLAGS,
53 53 REVLOG_DEFAULT_FORMAT,
54 54 REVLOG_DEFAULT_VERSION,
55 55 )
56 56 from .thirdparty import (
57 57 attr,
58 58 )
59 59 from . import (
60 60 ancestor,
61 61 dagop,
62 62 error,
63 63 mdiff,
64 64 policy,
65 65 pycompat,
66 66 repository,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .revlogutils import (
71 71 deltas as deltautil,
72 72 )
73 73 from .utils import (
74 74 interfaceutil,
75 75 storageutil,
76 76 stringutil,
77 77 )
78 78
79 79 # blanked usage of all the name to prevent pyflakes constraints
80 80 # We need these name available in the module for extensions.
81 81 REVLOGV0
82 82 REVLOGV1
83 83 REVLOGV2
84 84 FLAG_INLINE_DATA
85 85 FLAG_GENERALDELTA
86 86 REVLOG_DEFAULT_FLAGS
87 87 REVLOG_DEFAULT_FORMAT
88 88 REVLOG_DEFAULT_VERSION
89 89 REVLOGV1_FLAGS
90 90 REVLOGV2_FLAGS
91 91 REVIDX_ISCENSORED
92 92 REVIDX_ELLIPSIS
93 93 REVIDX_EXTSTORED
94 94 REVIDX_DEFAULT_FLAGS
95 95 REVIDX_FLAGS_ORDER
96 96 REVIDX_KNOWN_FLAGS
97 97 REVIDX_RAWTEXT_CHANGING_FLAGS
98 98
99 99 parsers = policy.importmod(r'parsers')
100 100
101 101 # Aliased for performance.
102 102 _zlibdecompress = zlib.decompress
103 103
104 104 # max size of revlog with inline data
105 105 _maxinline = 131072
106 106 _chunksize = 1048576
107 107
108 108 # Store flag processors (cf. 'addflagprocessor()' to register)
109 109 _flagprocessors = {
110 110 REVIDX_ISCENSORED: None,
111 111 }
112 112
113 113 # Flag processors for REVIDX_ELLIPSIS.
114 114 def ellipsisreadprocessor(rl, text):
115 115 return text, False
116 116
117 117 def ellipsiswriteprocessor(rl, text):
118 118 return text, False
119 119
120 120 def ellipsisrawprocessor(rl, text):
121 121 return False
122 122
123 123 ellipsisprocessor = (
124 124 ellipsisreadprocessor,
125 125 ellipsiswriteprocessor,
126 126 ellipsisrawprocessor,
127 127 )
128 128
129 129 def addflagprocessor(flag, processor):
130 130 """Register a flag processor on a revision data flag.
131 131
132 132 Invariant:
133 133 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
134 134 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
135 135 - Only one flag processor can be registered on a specific flag.
136 136 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
137 137 following signatures:
138 138 - (read) f(self, rawtext) -> text, bool
139 139 - (write) f(self, text) -> rawtext, bool
140 140 - (raw) f(self, rawtext) -> bool
141 141 "text" is presented to the user. "rawtext" is stored in revlog data, not
142 142 directly visible to the user.
143 143 The boolean returned by these transforms is used to determine whether
144 144 the returned text can be used for hash integrity checking. For example,
145 145 if "write" returns False, then "text" is used to generate hash. If
146 146 "write" returns True, that basically means "rawtext" returned by "write"
147 147 should be used to generate hash. Usually, "write" and "read" return
148 148 different booleans. And "raw" returns a same boolean as "write".
149 149
150 150 Note: The 'raw' transform is used for changegroup generation and in some
151 151 debug commands. In this case the transform only indicates whether the
152 152 contents can be used for hash integrity checks.
153 153 """
154 154 _insertflagprocessor(flag, processor, _flagprocessors)
155 155
156 156 def _insertflagprocessor(flag, processor, flagprocessors):
157 157 if not flag & REVIDX_KNOWN_FLAGS:
158 158 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
159 159 raise error.ProgrammingError(msg)
160 160 if flag not in REVIDX_FLAGS_ORDER:
161 161 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
162 162 raise error.ProgrammingError(msg)
163 163 if flag in flagprocessors:
164 164 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
165 165 raise error.Abort(msg)
166 166 flagprocessors[flag] = processor
167 167
168 168 def getoffset(q):
169 169 return int(q >> 16)
170 170
171 171 def gettype(q):
172 172 return int(q & 0xFFFF)
173 173
174 174 def offset_type(offset, type):
175 175 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
176 176 raise ValueError('unknown revlog index flags')
177 177 return int(int(offset) << 16 | type)
178 178
179 179 @attr.s(slots=True, frozen=True)
180 180 class _revisioninfo(object):
181 181 """Information about a revision that allows building its fulltext
182 182 node: expected hash of the revision
183 183 p1, p2: parent revs of the revision
184 184 btext: built text cache consisting of a one-element list
185 185 cachedelta: (baserev, uncompressed_delta) or None
186 186 flags: flags associated to the revision storage
187 187
188 188 One of btext[0] or cachedelta must be set.
189 189 """
190 190 node = attr.ib()
191 191 p1 = attr.ib()
192 192 p2 = attr.ib()
193 193 btext = attr.ib()
194 194 textlen = attr.ib()
195 195 cachedelta = attr.ib()
196 196 flags = attr.ib()
197 197
198 198 @interfaceutil.implementer(repository.irevisiondelta)
199 199 @attr.s(slots=True)
200 200 class revlogrevisiondelta(object):
201 201 node = attr.ib()
202 202 p1node = attr.ib()
203 203 p2node = attr.ib()
204 204 basenode = attr.ib()
205 205 flags = attr.ib()
206 206 baserevisionsize = attr.ib()
207 207 revision = attr.ib()
208 208 delta = attr.ib()
209 209 linknode = attr.ib(default=None)
210 210
211 211 @interfaceutil.implementer(repository.iverifyproblem)
212 212 @attr.s(frozen=True)
213 213 class revlogproblem(object):
214 214 warning = attr.ib(default=None)
215 215 error = attr.ib(default=None)
216 216 node = attr.ib(default=None)
217 217
218 218 # index v0:
219 219 # 4 bytes: offset
220 220 # 4 bytes: compressed length
221 221 # 4 bytes: base rev
222 222 # 4 bytes: link rev
223 223 # 20 bytes: parent 1 nodeid
224 224 # 20 bytes: parent 2 nodeid
225 225 # 20 bytes: nodeid
226 226 indexformatv0 = struct.Struct(">4l20s20s20s")
227 227 indexformatv0_pack = indexformatv0.pack
228 228 indexformatv0_unpack = indexformatv0.unpack
229 229
230 230 class revlogoldindex(list):
231 231 def __getitem__(self, i):
232 232 if i == -1:
233 233 return (0, 0, 0, -1, -1, -1, -1, nullid)
234 234 return list.__getitem__(self, i)
235 235
236 236 class revlogoldio(object):
237 237 def __init__(self):
238 238 self.size = indexformatv0.size
239 239
240 240 def parseindex(self, data, inline):
241 241 s = self.size
242 242 index = []
243 243 nodemap = {nullid: nullrev}
244 244 n = off = 0
245 245 l = len(data)
246 246 while off + s <= l:
247 247 cur = data[off:off + s]
248 248 off += s
249 249 e = indexformatv0_unpack(cur)
250 250 # transform to revlogv1 format
251 251 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
252 252 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
253 253 index.append(e2)
254 254 nodemap[e[6]] = n
255 255 n += 1
256 256
257 257 return revlogoldindex(index), nodemap, None
258 258
259 259 def packentry(self, entry, node, version, rev):
260 260 if gettype(entry[0]):
261 261 raise error.RevlogError(_('index entry flags need revlog '
262 262 'version 1'))
263 263 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
264 264 node(entry[5]), node(entry[6]), entry[7])
265 265 return indexformatv0_pack(*e2)
266 266
267 267 # index ng:
268 268 # 6 bytes: offset
269 269 # 2 bytes: flags
270 270 # 4 bytes: compressed length
271 271 # 4 bytes: uncompressed length
272 272 # 4 bytes: base rev
273 273 # 4 bytes: link rev
274 274 # 4 bytes: parent 1 rev
275 275 # 4 bytes: parent 2 rev
276 276 # 32 bytes: nodeid
277 277 indexformatng = struct.Struct(">Qiiiiii20s12x")
278 278 indexformatng_pack = indexformatng.pack
279 279 versionformat = struct.Struct(">I")
280 280 versionformat_pack = versionformat.pack
281 281 versionformat_unpack = versionformat.unpack
282 282
283 283 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
284 284 # signed integer)
285 285 _maxentrysize = 0x7fffffff
286 286
287 287 class revlogio(object):
288 288 def __init__(self):
289 289 self.size = indexformatng.size
290 290
291 291 def parseindex(self, data, inline):
292 292 # call the C implementation to parse the index data
293 293 index, cache = parsers.parse_index2(data, inline)
294 294 return index, getattr(index, 'nodemap', None), cache
295 295
296 296 def packentry(self, entry, node, version, rev):
297 297 p = indexformatng_pack(*entry)
298 298 if rev == 0:
299 299 p = versionformat_pack(version) + p[4:]
300 300 return p
301 301
302 302 class revlog(object):
303 303 """
304 304 the underlying revision storage object
305 305
306 306 A revlog consists of two parts, an index and the revision data.
307 307
308 308 The index is a file with a fixed record size containing
309 309 information on each revision, including its nodeid (hash), the
310 310 nodeids of its parents, the position and offset of its data within
311 311 the data file, and the revision it's based on. Finally, each entry
312 312 contains a linkrev entry that can serve as a pointer to external
313 313 data.
314 314
315 315 The revision data itself is a linear collection of data chunks.
316 316 Each chunk represents a revision and is usually represented as a
317 317 delta against the previous chunk. To bound lookup time, runs of
318 318 deltas are limited to about 2 times the length of the original
319 319 version data. This makes retrieval of a version proportional to
320 320 its size, or O(1) relative to the number of revisions.
321 321
322 322 Both pieces of the revlog are written to in an append-only
323 323 fashion, which means we never need to rewrite a file to insert or
324 324 remove data, and can use some simple techniques to avoid the need
325 325 for locking while reading.
326 326
327 327 If checkambig, indexfile is opened with checkambig=True at
328 328 writing, to avoid file stat ambiguity.
329 329
330 330 If mmaplargeindex is True, and an mmapindexthreshold is set, the
331 331 index will be mmapped rather than read if it is larger than the
332 332 configured threshold.
333 333
334 334 If censorable is True, the revlog can have censored revisions.
335 335 """
336 336 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
337 337 mmaplargeindex=False, censorable=False):
338 338 """
339 339 create a revlog object
340 340
341 341 opener is a function that abstracts the file opening operation
342 342 and can be used to implement COW semantics or the like.
343 343 """
344 344 self.indexfile = indexfile
345 345 self.datafile = datafile or (indexfile[:-2] + ".d")
346 346 self.opener = opener
347 347 # When True, indexfile is opened with checkambig=True at writing, to
348 348 # avoid file stat ambiguity.
349 349 self._checkambig = checkambig
350 350 self._censorable = censorable
351 351 # 3-tuple of (node, rev, text) for a raw revision.
352 352 self._revisioncache = None
353 353 # Maps rev to chain base rev.
354 354 self._chainbasecache = util.lrucachedict(100)
355 355 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
356 356 self._chunkcache = (0, '')
357 357 # How much data to read and cache into the raw revlog data cache.
358 358 self._chunkcachesize = 65536
359 359 self._maxchainlen = None
360 360 self._deltabothparents = True
361 361 self.index = []
362 362 # Mapping of partial identifiers to full nodes.
363 363 self._pcache = {}
364 364 # Mapping of revision integer to full node.
365 365 self._nodecache = {nullid: nullrev}
366 366 self._nodepos = None
367 367 self._compengine = 'zlib'
368 368 self._maxdeltachainspan = -1
369 369 self._withsparseread = False
370 370 self._sparserevlog = False
371 371 self._srdensitythreshold = 0.50
372 372 self._srmingapsize = 262144
373 373
374 374 # Make copy of flag processors so each revlog instance can support
375 375 # custom flags.
376 376 self._flagprocessors = dict(_flagprocessors)
377 377
378 378 # 2-tuple of file handles being used for active writing.
379 379 self._writinghandles = None
380 380
381 381 mmapindexthreshold = None
382 382 v = REVLOG_DEFAULT_VERSION
383 383 opts = getattr(opener, 'options', None)
384 384 if opts is not None:
385 385 if 'revlogv2' in opts:
386 386 # version 2 revlogs always use generaldelta.
387 387 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
388 388 elif 'revlogv1' in opts:
389 389 if 'generaldelta' in opts:
390 390 v |= FLAG_GENERALDELTA
391 391 else:
392 392 v = 0
393 393 if 'chunkcachesize' in opts:
394 394 self._chunkcachesize = opts['chunkcachesize']
395 395 if 'maxchainlen' in opts:
396 396 self._maxchainlen = opts['maxchainlen']
397 397 if 'deltabothparents' in opts:
398 398 self._deltabothparents = opts['deltabothparents']
399 399 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
400 400 if 'compengine' in opts:
401 401 self._compengine = opts['compengine']
402 402 if 'maxdeltachainspan' in opts:
403 403 self._maxdeltachainspan = opts['maxdeltachainspan']
404 404 if mmaplargeindex and 'mmapindexthreshold' in opts:
405 405 mmapindexthreshold = opts['mmapindexthreshold']
406 406 self._sparserevlog = bool(opts.get('sparse-revlog', False))
407 407 withsparseread = bool(opts.get('with-sparse-read', False))
408 408 # sparse-revlog forces sparse-read
409 409 self._withsparseread = self._sparserevlog or withsparseread
410 410 if 'sparse-read-density-threshold' in opts:
411 411 self._srdensitythreshold = opts['sparse-read-density-threshold']
412 412 if 'sparse-read-min-gap-size' in opts:
413 413 self._srmingapsize = opts['sparse-read-min-gap-size']
414 414 if opts.get('enableellipsis'):
415 415 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
416 416
417 417 # revlog v0 doesn't have flag processors
418 418 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
419 419 _insertflagprocessor(flag, processor, self._flagprocessors)
420 420
421 421 if self._chunkcachesize <= 0:
422 422 raise error.RevlogError(_('revlog chunk cache size %r is not '
423 423 'greater than 0') % self._chunkcachesize)
424 424 elif self._chunkcachesize & (self._chunkcachesize - 1):
425 425 raise error.RevlogError(_('revlog chunk cache size %r is not a '
426 426 'power of 2') % self._chunkcachesize)
427 427
428 428 self._loadindex(v, mmapindexthreshold)
429 429
430 430 def _loadindex(self, v, mmapindexthreshold):
431 431 indexdata = ''
432 432 self._initempty = True
433 433 try:
434 434 with self._indexfp() as f:
435 435 if (mmapindexthreshold is not None and
436 436 self.opener.fstat(f).st_size >= mmapindexthreshold):
437 437 indexdata = util.buffer(util.mmapread(f))
438 438 else:
439 439 indexdata = f.read()
440 440 if len(indexdata) > 0:
441 441 v = versionformat_unpack(indexdata[:4])[0]
442 442 self._initempty = False
443 443 except IOError as inst:
444 444 if inst.errno != errno.ENOENT:
445 445 raise
446 446
447 447 self.version = v
448 448 self._inline = v & FLAG_INLINE_DATA
449 449 self._generaldelta = v & FLAG_GENERALDELTA
450 450 flags = v & ~0xFFFF
451 451 fmt = v & 0xFFFF
452 452 if fmt == REVLOGV0:
453 453 if flags:
454 454 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
455 455 'revlog %s') %
456 456 (flags >> 16, fmt, self.indexfile))
457 457 elif fmt == REVLOGV1:
458 458 if flags & ~REVLOGV1_FLAGS:
459 459 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
460 460 'revlog %s') %
461 461 (flags >> 16, fmt, self.indexfile))
462 462 elif fmt == REVLOGV2:
463 463 if flags & ~REVLOGV2_FLAGS:
464 464 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
465 465 'revlog %s') %
466 466 (flags >> 16, fmt, self.indexfile))
467 467 else:
468 468 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
469 469 (fmt, self.indexfile))
470 470
471 471 self._storedeltachains = True
472 472
473 473 self._io = revlogio()
474 474 if self.version == REVLOGV0:
475 475 self._io = revlogoldio()
476 476 try:
477 477 d = self._io.parseindex(indexdata, self._inline)
478 478 except (ValueError, IndexError):
479 479 raise error.RevlogError(_("index %s is corrupted") %
480 480 self.indexfile)
481 481 self.index, nodemap, self._chunkcache = d
482 482 if nodemap is not None:
483 483 self.nodemap = self._nodecache = nodemap
484 484 if not self._chunkcache:
485 485 self._chunkclear()
486 486 # revnum -> (chain-length, sum-delta-length)
487 487 self._chaininfocache = {}
488 488 # revlog header -> revlog compressor
489 489 self._decompressors = {}
490 490
491 491 @util.propertycache
492 492 def _compressor(self):
493 493 return util.compengines[self._compengine].revlogcompressor()
494 494
495 495 def _indexfp(self, mode='r'):
496 496 """file object for the revlog's index file"""
497 497 args = {r'mode': mode}
498 498 if mode != 'r':
499 499 args[r'checkambig'] = self._checkambig
500 500 if mode == 'w':
501 501 args[r'atomictemp'] = True
502 502 return self.opener(self.indexfile, **args)
503 503
504 504 def _datafp(self, mode='r'):
505 505 """file object for the revlog's data file"""
506 506 return self.opener(self.datafile, mode=mode)
507 507
508 508 @contextlib.contextmanager
509 509 def _datareadfp(self, existingfp=None):
510 510 """file object suitable to read data"""
511 511 # Use explicit file handle, if given.
512 512 if existingfp is not None:
513 513 yield existingfp
514 514
515 515 # Use a file handle being actively used for writes, if available.
516 516 # There is some danger to doing this because reads will seek the
517 517 # file. However, _writeentry() performs a SEEK_END before all writes,
518 518 # so we should be safe.
519 519 elif self._writinghandles:
520 520 if self._inline:
521 521 yield self._writinghandles[0]
522 522 else:
523 523 yield self._writinghandles[1]
524 524
525 525 # Otherwise open a new file handle.
526 526 else:
527 527 if self._inline:
528 528 func = self._indexfp
529 529 else:
530 530 func = self._datafp
531 531 with func() as fp:
532 532 yield fp
533 533
534 534 def tip(self):
535 535 return self.node(len(self.index) - 1)
536 536 def __contains__(self, rev):
537 537 return 0 <= rev < len(self)
538 538 def __len__(self):
539 539 return len(self.index)
540 540 def __iter__(self):
541 541 return iter(pycompat.xrange(len(self)))
542 542 def revs(self, start=0, stop=None):
543 543 """iterate over all rev in this revlog (from start to stop)"""
544 544 return storageutil.iterrevs(len(self), start=start, stop=stop)
545 545
546 546 @util.propertycache
547 547 def nodemap(self):
548 548 if self.index:
549 549 # populate mapping down to the initial node
550 550 node0 = self.index[0][7] # get around changelog filtering
551 551 self.rev(node0)
552 552 return self._nodecache
553 553
554 554 def hasnode(self, node):
555 555 try:
556 556 self.rev(node)
557 557 return True
558 558 except KeyError:
559 559 return False
560 560
561 561 def candelta(self, baserev, rev):
562 562 """whether two revisions (baserev, rev) can be delta-ed or not"""
563 563 # Disable delta if either rev requires a content-changing flag
564 564 # processor (ex. LFS). This is because such flag processor can alter
565 565 # the rawtext content that the delta will be based on, and two clients
566 566 # could have a same revlog node with different flags (i.e. different
567 567 # rawtext contents) and the delta could be incompatible.
568 568 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
569 569 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
570 570 return False
571 571 return True
572 572
573 573 def clearcaches(self):
574 574 self._revisioncache = None
575 575 self._chainbasecache.clear()
576 576 self._chunkcache = (0, '')
577 577 self._pcache = {}
578 578
579 579 try:
580 580 self._nodecache.clearcaches()
581 581 except AttributeError:
582 582 self._nodecache = {nullid: nullrev}
583 583 self._nodepos = None
584 584
585 585 def rev(self, node):
586 586 try:
587 587 return self._nodecache[node]
588 588 except TypeError:
589 589 raise
590 590 except error.RevlogError:
591 591 # parsers.c radix tree lookup failed
592 592 if node == wdirid or node in wdirfilenodeids:
593 593 raise error.WdirUnsupported
594 594 raise error.LookupError(node, self.indexfile, _('no node'))
595 595 except KeyError:
596 596 # pure python cache lookup failed
597 597 n = self._nodecache
598 598 i = self.index
599 599 p = self._nodepos
600 600 if p is None:
601 601 p = len(i) - 1
602 602 else:
603 603 assert p < len(i)
604 604 for r in pycompat.xrange(p, -1, -1):
605 605 v = i[r][7]
606 606 n[v] = r
607 607 if v == node:
608 608 self._nodepos = r - 1
609 609 return r
610 610 if node == wdirid or node in wdirfilenodeids:
611 611 raise error.WdirUnsupported
612 612 raise error.LookupError(node, self.indexfile, _('no node'))
613 613
614 614 # Accessors for index entries.
615 615
616 616 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
617 617 # are flags.
618 618 def start(self, rev):
619 619 return int(self.index[rev][0] >> 16)
620 620
621 621 def flags(self, rev):
622 622 return self.index[rev][0] & 0xFFFF
623 623
624 624 def length(self, rev):
625 625 return self.index[rev][1]
626 626
627 627 def rawsize(self, rev):
628 628 """return the length of the uncompressed text for a given revision"""
629 629 l = self.index[rev][2]
630 630 if l >= 0:
631 631 return l
632 632
633 633 t = self.revision(rev, raw=True)
634 634 return len(t)
635 635
636 636 def size(self, rev):
637 637 """length of non-raw text (processed by a "read" flag processor)"""
638 638 # fast path: if no "read" flag processor could change the content,
639 639 # size is rawsize. note: ELLIPSIS is known to not change the content.
640 640 flags = self.flags(rev)
641 641 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
642 642 return self.rawsize(rev)
643 643
644 644 return len(self.revision(rev, raw=False))
645 645
646 646 def chainbase(self, rev):
647 647 base = self._chainbasecache.get(rev)
648 648 if base is not None:
649 649 return base
650 650
651 651 index = self.index
652 652 iterrev = rev
653 653 base = index[iterrev][3]
654 654 while base != iterrev:
655 655 iterrev = base
656 656 base = index[iterrev][3]
657 657
658 658 self._chainbasecache[rev] = base
659 659 return base
660 660
661 661 def linkrev(self, rev):
662 662 return self.index[rev][4]
663 663
664 664 def parentrevs(self, rev):
665 665 try:
666 666 entry = self.index[rev]
667 667 except IndexError:
668 668 if rev == wdirrev:
669 669 raise error.WdirUnsupported
670 670 raise
671 671
672 672 return entry[5], entry[6]
673 673
674 674 # fast parentrevs(rev) where rev isn't filtered
675 675 _uncheckedparentrevs = parentrevs
676 676
677 677 def node(self, rev):
678 678 try:
679 679 return self.index[rev][7]
680 680 except IndexError:
681 681 if rev == wdirrev:
682 682 raise error.WdirUnsupported
683 683 raise
684 684
685 685 # Derived from index values.
686 686
687 687 def end(self, rev):
688 688 return self.start(rev) + self.length(rev)
689 689
690 690 def parents(self, node):
691 691 i = self.index
692 692 d = i[self.rev(node)]
693 693 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
694 694
695 695 def chainlen(self, rev):
696 696 return self._chaininfo(rev)[0]
697 697
698 698 def _chaininfo(self, rev):
699 699 chaininfocache = self._chaininfocache
700 700 if rev in chaininfocache:
701 701 return chaininfocache[rev]
702 702 index = self.index
703 703 generaldelta = self._generaldelta
704 704 iterrev = rev
705 705 e = index[iterrev]
706 706 clen = 0
707 707 compresseddeltalen = 0
708 708 while iterrev != e[3]:
709 709 clen += 1
710 710 compresseddeltalen += e[1]
711 711 if generaldelta:
712 712 iterrev = e[3]
713 713 else:
714 714 iterrev -= 1
715 715 if iterrev in chaininfocache:
716 716 t = chaininfocache[iterrev]
717 717 clen += t[0]
718 718 compresseddeltalen += t[1]
719 719 break
720 720 e = index[iterrev]
721 721 else:
722 722 # Add text length of base since decompressing that also takes
723 723 # work. For cache hits the length is already included.
724 724 compresseddeltalen += e[1]
725 725 r = (clen, compresseddeltalen)
726 726 chaininfocache[rev] = r
727 727 return r
728 728
729 729 def _deltachain(self, rev, stoprev=None):
730 730 """Obtain the delta chain for a revision.
731 731
732 732 ``stoprev`` specifies a revision to stop at. If not specified, we
733 733 stop at the base of the chain.
734 734
735 735 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
736 736 revs in ascending order and ``stopped`` is a bool indicating whether
737 737 ``stoprev`` was hit.
738 738 """
739 739 # Try C implementation.
740 740 try:
741 741 return self.index.deltachain(rev, stoprev, self._generaldelta)
742 742 except AttributeError:
743 743 pass
744 744
745 745 chain = []
746 746
747 747 # Alias to prevent attribute lookup in tight loop.
748 748 index = self.index
749 749 generaldelta = self._generaldelta
750 750
751 751 iterrev = rev
752 752 e = index[iterrev]
753 753 while iterrev != e[3] and iterrev != stoprev:
754 754 chain.append(iterrev)
755 755 if generaldelta:
756 756 iterrev = e[3]
757 757 else:
758 758 iterrev -= 1
759 759 e = index[iterrev]
760 760
761 761 if iterrev == stoprev:
762 762 stopped = True
763 763 else:
764 764 chain.append(iterrev)
765 765 stopped = False
766 766
767 767 chain.reverse()
768 768 return chain, stopped
769 769
770 770 def ancestors(self, revs, stoprev=0, inclusive=False):
771 771 """Generate the ancestors of 'revs' in reverse revision order.
772 772 Does not generate revs lower than stoprev.
773 773
774 774 See the documentation for ancestor.lazyancestors for more details."""
775 775
776 776 # first, make sure start revisions aren't filtered
777 777 revs = list(revs)
778 778 checkrev = self.node
779 779 for r in revs:
780 780 checkrev(r)
781 781 # and we're sure ancestors aren't filtered as well
782 782 if util.safehasattr(parsers, 'rustlazyancestors'):
783 783 return ancestor.rustlazyancestors(
784 784 self.index, revs,
785 785 stoprev=stoprev, inclusive=inclusive)
786 786 return ancestor.lazyancestors(self._uncheckedparentrevs, revs,
787 787 stoprev=stoprev, inclusive=inclusive)
788 788
789 789 def descendants(self, revs):
790 790 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
791 791
792 792 def findcommonmissing(self, common=None, heads=None):
793 793 """Return a tuple of the ancestors of common and the ancestors of heads
794 794 that are not ancestors of common. In revset terminology, we return the
795 795 tuple:
796 796
797 797 ::common, (::heads) - (::common)
798 798
799 799 The list is sorted by revision number, meaning it is
800 800 topologically sorted.
801 801
802 802 'heads' and 'common' are both lists of node IDs. If heads is
803 803 not supplied, uses all of the revlog's heads. If common is not
804 804 supplied, uses nullid."""
805 805 if common is None:
806 806 common = [nullid]
807 807 if heads is None:
808 808 heads = self.heads()
809 809
810 810 common = [self.rev(n) for n in common]
811 811 heads = [self.rev(n) for n in heads]
812 812
813 813 # we want the ancestors, but inclusive
814 814 class lazyset(object):
815 815 def __init__(self, lazyvalues):
816 816 self.addedvalues = set()
817 817 self.lazyvalues = lazyvalues
818 818
819 819 def __contains__(self, value):
820 820 return value in self.addedvalues or value in self.lazyvalues
821 821
822 822 def __iter__(self):
823 823 added = self.addedvalues
824 824 for r in added:
825 825 yield r
826 826 for r in self.lazyvalues:
827 827 if not r in added:
828 828 yield r
829 829
830 830 def add(self, value):
831 831 self.addedvalues.add(value)
832 832
833 833 def update(self, values):
834 834 self.addedvalues.update(values)
835 835
836 836 has = lazyset(self.ancestors(common))
837 837 has.add(nullrev)
838 838 has.update(common)
839 839
840 840 # take all ancestors from heads that aren't in has
841 841 missing = set()
842 842 visit = collections.deque(r for r in heads if r not in has)
843 843 while visit:
844 844 r = visit.popleft()
845 845 if r in missing:
846 846 continue
847 847 else:
848 848 missing.add(r)
849 849 for p in self.parentrevs(r):
850 850 if p not in has:
851 851 visit.append(p)
852 852 missing = list(missing)
853 853 missing.sort()
854 854 return has, [self.node(miss) for miss in missing]
855 855
856 856 def incrementalmissingrevs(self, common=None):
857 857 """Return an object that can be used to incrementally compute the
858 858 revision numbers of the ancestors of arbitrary sets that are not
859 859 ancestors of common. This is an ancestor.incrementalmissingancestors
860 860 object.
861 861
862 862 'common' is a list of revision numbers. If common is not supplied, uses
863 863 nullrev.
864 864 """
865 865 if common is None:
866 866 common = [nullrev]
867 867
868 868 return ancestor.incrementalmissingancestors(self.parentrevs, common)
869 869
870 870 def findmissingrevs(self, common=None, heads=None):
871 871 """Return the revision numbers of the ancestors of heads that
872 872 are not ancestors of common.
873 873
874 874 More specifically, return a list of revision numbers corresponding to
875 875 nodes N such that every N satisfies the following constraints:
876 876
877 877 1. N is an ancestor of some node in 'heads'
878 878 2. N is not an ancestor of any node in 'common'
879 879
880 880 The list is sorted by revision number, meaning it is
881 881 topologically sorted.
882 882
883 883 'heads' and 'common' are both lists of revision numbers. If heads is
884 884 not supplied, uses all of the revlog's heads. If common is not
885 885 supplied, uses nullid."""
886 886 if common is None:
887 887 common = [nullrev]
888 888 if heads is None:
889 889 heads = self.headrevs()
890 890
891 891 inc = self.incrementalmissingrevs(common=common)
892 892 return inc.missingancestors(heads)
893 893
894 894 def findmissing(self, common=None, heads=None):
895 895 """Return the ancestors of heads that are not ancestors of common.
896 896
897 897 More specifically, return a list of nodes N such that every N
898 898 satisfies the following constraints:
899 899
900 900 1. N is an ancestor of some node in 'heads'
901 901 2. N is not an ancestor of any node in 'common'
902 902
903 903 The list is sorted by revision number, meaning it is
904 904 topologically sorted.
905 905
906 906 'heads' and 'common' are both lists of node IDs. If heads is
907 907 not supplied, uses all of the revlog's heads. If common is not
908 908 supplied, uses nullid."""
909 909 if common is None:
910 910 common = [nullid]
911 911 if heads is None:
912 912 heads = self.heads()
913 913
914 914 common = [self.rev(n) for n in common]
915 915 heads = [self.rev(n) for n in heads]
916 916
917 917 inc = self.incrementalmissingrevs(common=common)
918 918 return [self.node(r) for r in inc.missingancestors(heads)]
919 919
920 920 def nodesbetween(self, roots=None, heads=None):
921 921 """Return a topological path from 'roots' to 'heads'.
922 922
923 923 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
924 924 topologically sorted list of all nodes N that satisfy both of
925 925 these constraints:
926 926
927 927 1. N is a descendant of some node in 'roots'
928 928 2. N is an ancestor of some node in 'heads'
929 929
930 930 Every node is considered to be both a descendant and an ancestor
931 931 of itself, so every reachable node in 'roots' and 'heads' will be
932 932 included in 'nodes'.
933 933
934 934 'outroots' is the list of reachable nodes in 'roots', i.e., the
935 935 subset of 'roots' that is returned in 'nodes'. Likewise,
936 936 'outheads' is the subset of 'heads' that is also in 'nodes'.
937 937
938 938 'roots' and 'heads' are both lists of node IDs. If 'roots' is
939 939 unspecified, uses nullid as the only root. If 'heads' is
940 940 unspecified, uses list of all of the revlog's heads."""
941 941 nonodes = ([], [], [])
942 942 if roots is not None:
943 943 roots = list(roots)
944 944 if not roots:
945 945 return nonodes
946 946 lowestrev = min([self.rev(n) for n in roots])
947 947 else:
948 948 roots = [nullid] # Everybody's a descendant of nullid
949 949 lowestrev = nullrev
950 950 if (lowestrev == nullrev) and (heads is None):
951 951 # We want _all_ the nodes!
952 952 return ([self.node(r) for r in self], [nullid], list(self.heads()))
953 953 if heads is None:
954 954 # All nodes are ancestors, so the latest ancestor is the last
955 955 # node.
956 956 highestrev = len(self) - 1
957 957 # Set ancestors to None to signal that every node is an ancestor.
958 958 ancestors = None
959 959 # Set heads to an empty dictionary for later discovery of heads
960 960 heads = {}
961 961 else:
962 962 heads = list(heads)
963 963 if not heads:
964 964 return nonodes
965 965 ancestors = set()
966 966 # Turn heads into a dictionary so we can remove 'fake' heads.
967 967 # Also, later we will be using it to filter out the heads we can't
968 968 # find from roots.
969 969 heads = dict.fromkeys(heads, False)
970 970 # Start at the top and keep marking parents until we're done.
971 971 nodestotag = set(heads)
972 972 # Remember where the top was so we can use it as a limit later.
973 973 highestrev = max([self.rev(n) for n in nodestotag])
974 974 while nodestotag:
975 975 # grab a node to tag
976 976 n = nodestotag.pop()
977 977 # Never tag nullid
978 978 if n == nullid:
979 979 continue
980 980 # A node's revision number represents its place in a
981 981 # topologically sorted list of nodes.
982 982 r = self.rev(n)
983 983 if r >= lowestrev:
984 984 if n not in ancestors:
985 985 # If we are possibly a descendant of one of the roots
986 986 # and we haven't already been marked as an ancestor
987 987 ancestors.add(n) # Mark as ancestor
988 988 # Add non-nullid parents to list of nodes to tag.
989 989 nodestotag.update([p for p in self.parents(n) if
990 990 p != nullid])
991 991 elif n in heads: # We've seen it before, is it a fake head?
992 992 # So it is, real heads should not be the ancestors of
993 993 # any other heads.
994 994 heads.pop(n)
995 995 if not ancestors:
996 996 return nonodes
997 997 # Now that we have our set of ancestors, we want to remove any
998 998 # roots that are not ancestors.
999 999
1000 1000 # If one of the roots was nullid, everything is included anyway.
1001 1001 if lowestrev > nullrev:
1002 1002 # But, since we weren't, let's recompute the lowest rev to not
1003 1003 # include roots that aren't ancestors.
1004 1004
1005 1005 # Filter out roots that aren't ancestors of heads
1006 1006 roots = [root for root in roots if root in ancestors]
1007 1007 # Recompute the lowest revision
1008 1008 if roots:
1009 1009 lowestrev = min([self.rev(root) for root in roots])
1010 1010 else:
1011 1011 # No more roots? Return empty list
1012 1012 return nonodes
1013 1013 else:
1014 1014 # We are descending from nullid, and don't need to care about
1015 1015 # any other roots.
1016 1016 lowestrev = nullrev
1017 1017 roots = [nullid]
1018 1018 # Transform our roots list into a set.
1019 1019 descendants = set(roots)
1020 1020 # Also, keep the original roots so we can filter out roots that aren't
1021 1021 # 'real' roots (i.e. are descended from other roots).
1022 1022 roots = descendants.copy()
1023 1023 # Our topologically sorted list of output nodes.
1024 1024 orderedout = []
1025 1025 # Don't start at nullid since we don't want nullid in our output list,
1026 1026 # and if nullid shows up in descendants, empty parents will look like
1027 1027 # they're descendants.
1028 1028 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1029 1029 n = self.node(r)
1030 1030 isdescendant = False
1031 1031 if lowestrev == nullrev: # Everybody is a descendant of nullid
1032 1032 isdescendant = True
1033 1033 elif n in descendants:
1034 1034 # n is already a descendant
1035 1035 isdescendant = True
1036 1036 # This check only needs to be done here because all the roots
1037 1037 # will start being marked is descendants before the loop.
1038 1038 if n in roots:
1039 1039 # If n was a root, check if it's a 'real' root.
1040 1040 p = tuple(self.parents(n))
1041 1041 # If any of its parents are descendants, it's not a root.
1042 1042 if (p[0] in descendants) or (p[1] in descendants):
1043 1043 roots.remove(n)
1044 1044 else:
1045 1045 p = tuple(self.parents(n))
1046 1046 # A node is a descendant if either of its parents are
1047 1047 # descendants. (We seeded the dependents list with the roots
1048 1048 # up there, remember?)
1049 1049 if (p[0] in descendants) or (p[1] in descendants):
1050 1050 descendants.add(n)
1051 1051 isdescendant = True
1052 1052 if isdescendant and ((ancestors is None) or (n in ancestors)):
1053 1053 # Only include nodes that are both descendants and ancestors.
1054 1054 orderedout.append(n)
1055 1055 if (ancestors is not None) and (n in heads):
1056 1056 # We're trying to figure out which heads are reachable
1057 1057 # from roots.
1058 1058 # Mark this head as having been reached
1059 1059 heads[n] = True
1060 1060 elif ancestors is None:
1061 1061 # Otherwise, we're trying to discover the heads.
1062 1062 # Assume this is a head because if it isn't, the next step
1063 1063 # will eventually remove it.
1064 1064 heads[n] = True
1065 1065 # But, obviously its parents aren't.
1066 1066 for p in self.parents(n):
1067 1067 heads.pop(p, None)
1068 1068 heads = [head for head, flag in heads.iteritems() if flag]
1069 1069 roots = list(roots)
1070 1070 assert orderedout
1071 1071 assert roots
1072 1072 assert heads
1073 1073 return (orderedout, roots, heads)
1074 1074
1075 1075 def headrevs(self):
1076 1076 try:
1077 1077 return self.index.headrevs()
1078 1078 except AttributeError:
1079 1079 return self._headrevs()
1080 1080
1081 1081 def computephases(self, roots):
1082 1082 return self.index.computephasesmapsets(roots)
1083 1083
1084 1084 def _headrevs(self):
1085 1085 count = len(self)
1086 1086 if not count:
1087 1087 return [nullrev]
1088 1088 # we won't iter over filtered rev so nobody is a head at start
1089 1089 ishead = [0] * (count + 1)
1090 1090 index = self.index
1091 1091 for r in self:
1092 1092 ishead[r] = 1 # I may be an head
1093 1093 e = index[r]
1094 1094 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1095 1095 return [r for r, val in enumerate(ishead) if val]
1096 1096
1097 1097 def heads(self, start=None, stop=None):
1098 1098 """return the list of all nodes that have no children
1099 1099
1100 1100 if start is specified, only heads that are descendants of
1101 1101 start will be returned
1102 1102 if stop is specified, it will consider all the revs from stop
1103 1103 as if they had no children
1104 1104 """
1105 1105 if start is None and stop is None:
1106 1106 if not len(self):
1107 1107 return [nullid]
1108 1108 return [self.node(r) for r in self.headrevs()]
1109 1109
1110 1110 if start is None:
1111 1111 start = nullrev
1112 1112 else:
1113 1113 start = self.rev(start)
1114 1114
1115 1115 stoprevs = set(self.rev(n) for n in stop or [])
1116 1116
1117 1117 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1118 1118 stoprevs=stoprevs)
1119 1119
1120 1120 return [self.node(rev) for rev in revs]
1121 1121
1122 1122 def children(self, node):
1123 1123 """find the children of a given node"""
1124 1124 c = []
1125 1125 p = self.rev(node)
1126 1126 for r in self.revs(start=p + 1):
1127 1127 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1128 1128 if prevs:
1129 1129 for pr in prevs:
1130 1130 if pr == p:
1131 1131 c.append(self.node(r))
1132 1132 elif p == nullrev:
1133 1133 c.append(self.node(r))
1134 1134 return c
1135 1135
1136 1136 def commonancestorsheads(self, a, b):
1137 1137 """calculate all the heads of the common ancestors of nodes a and b"""
1138 1138 a, b = self.rev(a), self.rev(b)
1139 1139 ancs = self._commonancestorsheads(a, b)
1140 1140 return pycompat.maplist(self.node, ancs)
1141 1141
1142 1142 def _commonancestorsheads(self, *revs):
1143 1143 """calculate all the heads of the common ancestors of revs"""
1144 1144 try:
1145 1145 ancs = self.index.commonancestorsheads(*revs)
1146 1146 except (AttributeError, OverflowError): # C implementation failed
1147 1147 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1148 1148 return ancs
1149 1149
1150 1150 def isancestor(self, a, b):
1151 1151 """return True if node a is an ancestor of node b
1152 1152
1153 1153 A revision is considered an ancestor of itself."""
1154 1154 a, b = self.rev(a), self.rev(b)
1155 1155 return self.isancestorrev(a, b)
1156 1156
1157 1157 def isancestorrev(self, a, b):
1158 1158 """return True if revision a is an ancestor of revision b
1159 1159
1160 1160 A revision is considered an ancestor of itself.
1161 1161
1162 1162 The implementation of this is trivial but the use of
1163 1163 commonancestorsheads is not."""
1164 1164 if a == nullrev:
1165 1165 return True
1166 1166 elif a == b:
1167 1167 return True
1168 1168 elif a > b:
1169 1169 return False
1170 1170 return a in self._commonancestorsheads(a, b)
1171 1171
1172 1172 def ancestor(self, a, b):
1173 1173 """calculate the "best" common ancestor of nodes a and b"""
1174 1174
1175 1175 a, b = self.rev(a), self.rev(b)
1176 1176 try:
1177 1177 ancs = self.index.ancestors(a, b)
1178 1178 except (AttributeError, OverflowError):
1179 1179 ancs = ancestor.ancestors(self.parentrevs, a, b)
1180 1180 if ancs:
1181 1181 # choose a consistent winner when there's a tie
1182 1182 return min(map(self.node, ancs))
1183 1183 return nullid
1184 1184
1185 1185 def _match(self, id):
1186 1186 if isinstance(id, int):
1187 1187 # rev
1188 1188 return self.node(id)
1189 1189 if len(id) == 20:
1190 1190 # possibly a binary node
1191 1191 # odds of a binary node being all hex in ASCII are 1 in 10**25
1192 1192 try:
1193 1193 node = id
1194 1194 self.rev(node) # quick search the index
1195 1195 return node
1196 1196 except error.LookupError:
1197 1197 pass # may be partial hex id
1198 1198 try:
1199 1199 # str(rev)
1200 1200 rev = int(id)
1201 1201 if "%d" % rev != id:
1202 1202 raise ValueError
1203 1203 if rev < 0:
1204 1204 rev = len(self) + rev
1205 1205 if rev < 0 or rev >= len(self):
1206 1206 raise ValueError
1207 1207 return self.node(rev)
1208 1208 except (ValueError, OverflowError):
1209 1209 pass
1210 1210 if len(id) == 40:
1211 1211 try:
1212 1212 # a full hex nodeid?
1213 1213 node = bin(id)
1214 1214 self.rev(node)
1215 1215 return node
1216 1216 except (TypeError, error.LookupError):
1217 1217 pass
1218 1218
1219 1219 def _partialmatch(self, id):
1220 1220 # we don't care wdirfilenodeids as they should be always full hash
1221 1221 maybewdir = wdirhex.startswith(id)
1222 1222 try:
1223 1223 partial = self.index.partialmatch(id)
1224 1224 if partial and self.hasnode(partial):
1225 1225 if maybewdir:
1226 1226 # single 'ff...' match in radix tree, ambiguous with wdir
1227 1227 raise error.RevlogError
1228 1228 return partial
1229 1229 if maybewdir:
1230 1230 # no 'ff...' match in radix tree, wdir identified
1231 1231 raise error.WdirUnsupported
1232 1232 return None
1233 1233 except error.RevlogError:
1234 1234 # parsers.c radix tree lookup gave multiple matches
1235 1235 # fast path: for unfiltered changelog, radix tree is accurate
1236 1236 if not getattr(self, 'filteredrevs', None):
1237 1237 raise error.AmbiguousPrefixLookupError(
1238 1238 id, self.indexfile, _('ambiguous identifier'))
1239 1239 # fall through to slow path that filters hidden revisions
1240 1240 except (AttributeError, ValueError):
1241 1241 # we are pure python, or key was too short to search radix tree
1242 1242 pass
1243 1243
1244 1244 if id in self._pcache:
1245 1245 return self._pcache[id]
1246 1246
1247 1247 if len(id) <= 40:
1248 1248 try:
1249 1249 # hex(node)[:...]
1250 1250 l = len(id) // 2 # grab an even number of digits
1251 1251 prefix = bin(id[:l * 2])
1252 1252 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1253 1253 nl = [n for n in nl if hex(n).startswith(id) and
1254 1254 self.hasnode(n)]
1255 1255 if nullhex.startswith(id):
1256 1256 nl.append(nullid)
1257 1257 if len(nl) > 0:
1258 1258 if len(nl) == 1 and not maybewdir:
1259 1259 self._pcache[id] = nl[0]
1260 1260 return nl[0]
1261 1261 raise error.AmbiguousPrefixLookupError(
1262 1262 id, self.indexfile, _('ambiguous identifier'))
1263 1263 if maybewdir:
1264 1264 raise error.WdirUnsupported
1265 1265 return None
1266 1266 except TypeError:
1267 1267 pass
1268 1268
1269 1269 def lookup(self, id):
1270 1270 """locate a node based on:
1271 1271 - revision number or str(revision number)
1272 1272 - nodeid or subset of hex nodeid
1273 1273 """
1274 1274 n = self._match(id)
1275 1275 if n is not None:
1276 1276 return n
1277 1277 n = self._partialmatch(id)
1278 1278 if n:
1279 1279 return n
1280 1280
1281 1281 raise error.LookupError(id, self.indexfile, _('no match found'))
1282 1282
1283 1283 def shortest(self, node, minlength=1):
1284 1284 """Find the shortest unambiguous prefix that matches node."""
1285 1285 def isvalid(prefix):
1286 1286 try:
1287 1287 node = self._partialmatch(prefix)
1288 1288 except error.AmbiguousPrefixLookupError:
1289 1289 return False
1290 1290 except error.WdirUnsupported:
1291 1291 # single 'ff...' match
1292 1292 return True
1293 1293 if node is None:
1294 1294 raise error.LookupError(node, self.indexfile, _('no node'))
1295 1295 return True
1296 1296
1297 1297 def maybewdir(prefix):
1298 1298 return all(c == 'f' for c in prefix)
1299 1299
1300 1300 hexnode = hex(node)
1301 1301
1302 1302 def disambiguate(hexnode, minlength):
1303 1303 """Disambiguate against wdirid."""
1304 1304 for length in range(minlength, 41):
1305 1305 prefix = hexnode[:length]
1306 1306 if not maybewdir(prefix):
1307 1307 return prefix
1308 1308
1309 1309 if not getattr(self, 'filteredrevs', None):
1310 1310 try:
1311 1311 length = max(self.index.shortest(node), minlength)
1312 1312 return disambiguate(hexnode, length)
1313 1313 except error.RevlogError:
1314 1314 if node != wdirid:
1315 1315 raise error.LookupError(node, self.indexfile, _('no node'))
1316 1316 except AttributeError:
1317 1317 # Fall through to pure code
1318 1318 pass
1319 1319
1320 1320 if node == wdirid:
1321 1321 for length in range(minlength, 41):
1322 1322 prefix = hexnode[:length]
1323 1323 if isvalid(prefix):
1324 1324 return prefix
1325 1325
1326 1326 for length in range(minlength, 41):
1327 1327 prefix = hexnode[:length]
1328 1328 if isvalid(prefix):
1329 1329 return disambiguate(hexnode, length)
1330 1330
1331 1331 def cmp(self, node, text):
1332 1332 """compare text with a given file revision
1333 1333
1334 1334 returns True if text is different than what is stored.
1335 1335 """
1336 1336 p1, p2 = self.parents(node)
1337 1337 return storageutil.hashrevisionsha1(text, p1, p2) != node
1338 1338
1339 1339 def _cachesegment(self, offset, data):
1340 1340 """Add a segment to the revlog cache.
1341 1341
1342 1342 Accepts an absolute offset and the data that is at that location.
1343 1343 """
1344 1344 o, d = self._chunkcache
1345 1345 # try to add to existing cache
1346 1346 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1347 1347 self._chunkcache = o, d + data
1348 1348 else:
1349 1349 self._chunkcache = offset, data
1350 1350
1351 1351 def _readsegment(self, offset, length, df=None):
1352 1352 """Load a segment of raw data from the revlog.
1353 1353
1354 1354 Accepts an absolute offset, length to read, and an optional existing
1355 1355 file handle to read from.
1356 1356
1357 1357 If an existing file handle is passed, it will be seeked and the
1358 1358 original seek position will NOT be restored.
1359 1359
1360 1360 Returns a str or buffer of raw byte data.
1361 1361
1362 1362 Raises if the requested number of bytes could not be read.
1363 1363 """
1364 1364 # Cache data both forward and backward around the requested
1365 1365 # data, in a fixed size window. This helps speed up operations
1366 1366 # involving reading the revlog backwards.
1367 1367 cachesize = self._chunkcachesize
1368 1368 realoffset = offset & ~(cachesize - 1)
1369 1369 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1370 1370 - realoffset)
1371 1371 with self._datareadfp(df) as df:
1372 1372 df.seek(realoffset)
1373 1373 d = df.read(reallength)
1374 1374
1375 1375 self._cachesegment(realoffset, d)
1376 1376 if offset != realoffset or reallength != length:
1377 1377 startoffset = offset - realoffset
1378 1378 if len(d) - startoffset < length:
1379 1379 raise error.RevlogError(
1380 1380 _('partial read of revlog %s; expected %d bytes from '
1381 1381 'offset %d, got %d') %
1382 1382 (self.indexfile if self._inline else self.datafile,
1383 1383 length, realoffset, len(d) - startoffset))
1384 1384
1385 1385 return util.buffer(d, startoffset, length)
1386 1386
1387 1387 if len(d) < length:
1388 1388 raise error.RevlogError(
1389 1389 _('partial read of revlog %s; expected %d bytes from offset '
1390 1390 '%d, got %d') %
1391 1391 (self.indexfile if self._inline else self.datafile,
1392 1392 length, offset, len(d)))
1393 1393
1394 1394 return d
1395 1395
1396 1396 def _getsegment(self, offset, length, df=None):
1397 1397 """Obtain a segment of raw data from the revlog.
1398 1398
1399 1399 Accepts an absolute offset, length of bytes to obtain, and an
1400 1400 optional file handle to the already-opened revlog. If the file
1401 1401 handle is used, it's original seek position will not be preserved.
1402 1402
1403 1403 Requests for data may be returned from a cache.
1404 1404
1405 1405 Returns a str or a buffer instance of raw byte data.
1406 1406 """
1407 1407 o, d = self._chunkcache
1408 1408 l = len(d)
1409 1409
1410 1410 # is it in the cache?
1411 1411 cachestart = offset - o
1412 1412 cacheend = cachestart + length
1413 1413 if cachestart >= 0 and cacheend <= l:
1414 1414 if cachestart == 0 and cacheend == l:
1415 1415 return d # avoid a copy
1416 1416 return util.buffer(d, cachestart, cacheend - cachestart)
1417 1417
1418 1418 return self._readsegment(offset, length, df=df)
1419 1419
1420 1420 def _getsegmentforrevs(self, startrev, endrev, df=None):
1421 1421 """Obtain a segment of raw data corresponding to a range of revisions.
1422 1422
1423 1423 Accepts the start and end revisions and an optional already-open
1424 1424 file handle to be used for reading. If the file handle is read, its
1425 1425 seek position will not be preserved.
1426 1426
1427 1427 Requests for data may be satisfied by a cache.
1428 1428
1429 1429 Returns a 2-tuple of (offset, data) for the requested range of
1430 1430 revisions. Offset is the integer offset from the beginning of the
1431 1431 revlog and data is a str or buffer of the raw byte data.
1432 1432
1433 1433 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1434 1434 to determine where each revision's data begins and ends.
1435 1435 """
1436 1436 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1437 1437 # (functions are expensive).
1438 1438 index = self.index
1439 1439 istart = index[startrev]
1440 1440 start = int(istart[0] >> 16)
1441 1441 if startrev == endrev:
1442 1442 end = start + istart[1]
1443 1443 else:
1444 1444 iend = index[endrev]
1445 1445 end = int(iend[0] >> 16) + iend[1]
1446 1446
1447 1447 if self._inline:
1448 1448 start += (startrev + 1) * self._io.size
1449 1449 end += (endrev + 1) * self._io.size
1450 1450 length = end - start
1451 1451
1452 1452 return start, self._getsegment(start, length, df=df)
1453 1453
1454 1454 def _chunk(self, rev, df=None):
1455 1455 """Obtain a single decompressed chunk for a revision.
1456 1456
1457 1457 Accepts an integer revision and an optional already-open file handle
1458 1458 to be used for reading. If used, the seek position of the file will not
1459 1459 be preserved.
1460 1460
1461 1461 Returns a str holding uncompressed data for the requested revision.
1462 1462 """
1463 1463 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1464 1464
1465 1465 def _chunks(self, revs, df=None, targetsize=None):
1466 1466 """Obtain decompressed chunks for the specified revisions.
1467 1467
1468 1468 Accepts an iterable of numeric revisions that are assumed to be in
1469 1469 ascending order. Also accepts an optional already-open file handle
1470 1470 to be used for reading. If used, the seek position of the file will
1471 1471 not be preserved.
1472 1472
1473 1473 This function is similar to calling ``self._chunk()`` multiple times,
1474 1474 but is faster.
1475 1475
1476 1476 Returns a list with decompressed data for each requested revision.
1477 1477 """
1478 1478 if not revs:
1479 1479 return []
1480 1480 start = self.start
1481 1481 length = self.length
1482 1482 inline = self._inline
1483 1483 iosize = self._io.size
1484 1484 buffer = util.buffer
1485 1485
1486 1486 l = []
1487 1487 ladd = l.append
1488 1488
1489 1489 if not self._withsparseread:
1490 1490 slicedchunks = (revs,)
1491 1491 else:
1492 1492 slicedchunks = deltautil.slicechunk(self, revs,
1493 1493 targetsize=targetsize)
1494 1494
1495 1495 for revschunk in slicedchunks:
1496 1496 firstrev = revschunk[0]
1497 1497 # Skip trailing revisions with empty diff
1498 1498 for lastrev in revschunk[::-1]:
1499 1499 if length(lastrev) != 0:
1500 1500 break
1501 1501
1502 1502 try:
1503 1503 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1504 1504 except OverflowError:
1505 1505 # issue4215 - we can't cache a run of chunks greater than
1506 1506 # 2G on Windows
1507 1507 return [self._chunk(rev, df=df) for rev in revschunk]
1508 1508
1509 1509 decomp = self.decompress
1510 1510 for rev in revschunk:
1511 1511 chunkstart = start(rev)
1512 1512 if inline:
1513 1513 chunkstart += (rev + 1) * iosize
1514 1514 chunklength = length(rev)
1515 1515 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1516 1516
1517 1517 return l
1518 1518
1519 1519 def _chunkclear(self):
1520 1520 """Clear the raw chunk cache."""
1521 1521 self._chunkcache = (0, '')
1522 1522
1523 1523 def deltaparent(self, rev):
1524 1524 """return deltaparent of the given revision"""
1525 1525 base = self.index[rev][3]
1526 1526 if base == rev:
1527 1527 return nullrev
1528 1528 elif self._generaldelta:
1529 1529 return base
1530 1530 else:
1531 1531 return rev - 1
1532 1532
1533 1533 def issnapshot(self, rev):
1534 1534 """tells whether rev is a snapshot
1535 1535 """
1536 1536 if rev == nullrev:
1537 1537 return True
1538 1538 deltap = self.deltaparent(rev)
1539 1539 if deltap == nullrev:
1540 1540 return True
1541 1541 p1, p2 = self.parentrevs(rev)
1542 1542 if deltap in (p1, p2):
1543 1543 return False
1544 1544 return self.issnapshot(deltap)
1545 1545
1546 1546 def snapshotdepth(self, rev):
1547 1547 """number of snapshot in the chain before this one"""
1548 1548 if not self.issnapshot(rev):
1549 1549 raise error.ProgrammingError('revision %d not a snapshot')
1550 1550 return len(self._deltachain(rev)[0]) - 1
1551 1551
1552 1552 def revdiff(self, rev1, rev2):
1553 1553 """return or calculate a delta between two revisions
1554 1554
1555 1555 The delta calculated is in binary form and is intended to be written to
1556 1556 revlog data directly. So this function needs raw revision data.
1557 1557 """
1558 1558 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1559 1559 return bytes(self._chunk(rev2))
1560 1560
1561 1561 return mdiff.textdiff(self.revision(rev1, raw=True),
1562 1562 self.revision(rev2, raw=True))
1563 1563
1564 1564 def revision(self, nodeorrev, _df=None, raw=False):
1565 1565 """return an uncompressed revision of a given node or revision
1566 1566 number.
1567 1567
1568 1568 _df - an existing file handle to read from. (internal-only)
1569 1569 raw - an optional argument specifying if the revision data is to be
1570 1570 treated as raw data when applying flag transforms. 'raw' should be set
1571 1571 to True when generating changegroups or in debug commands.
1572 1572 """
1573 1573 if isinstance(nodeorrev, int):
1574 1574 rev = nodeorrev
1575 1575 node = self.node(rev)
1576 1576 else:
1577 1577 node = nodeorrev
1578 1578 rev = None
1579 1579
1580 1580 cachedrev = None
1581 1581 flags = None
1582 1582 rawtext = None
1583 1583 if node == nullid:
1584 1584 return ""
1585 1585 if self._revisioncache:
1586 1586 if self._revisioncache[0] == node:
1587 1587 # _cache only stores rawtext
1588 1588 if raw:
1589 1589 return self._revisioncache[2]
1590 1590 # duplicated, but good for perf
1591 1591 if rev is None:
1592 1592 rev = self.rev(node)
1593 1593 if flags is None:
1594 1594 flags = self.flags(rev)
1595 1595 # no extra flags set, no flag processor runs, text = rawtext
1596 1596 if flags == REVIDX_DEFAULT_FLAGS:
1597 1597 return self._revisioncache[2]
1598 1598 # rawtext is reusable. need to run flag processor
1599 1599 rawtext = self._revisioncache[2]
1600 1600
1601 1601 cachedrev = self._revisioncache[1]
1602 1602
1603 1603 # look up what we need to read
1604 1604 if rawtext is None:
1605 1605 if rev is None:
1606 1606 rev = self.rev(node)
1607 1607
1608 1608 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1609 1609 if stopped:
1610 1610 rawtext = self._revisioncache[2]
1611 1611
1612 1612 # drop cache to save memory
1613 1613 self._revisioncache = None
1614 1614
1615 1615 targetsize = None
1616 1616 rawsize = self.index[rev][2]
1617 1617 if 0 <= rawsize:
1618 1618 targetsize = 4 * rawsize
1619 1619
1620 1620 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1621 1621 if rawtext is None:
1622 1622 rawtext = bytes(bins[0])
1623 1623 bins = bins[1:]
1624 1624
1625 1625 rawtext = mdiff.patches(rawtext, bins)
1626 1626 self._revisioncache = (node, rev, rawtext)
1627 1627
1628 1628 if flags is None:
1629 1629 if rev is None:
1630 1630 rev = self.rev(node)
1631 1631 flags = self.flags(rev)
1632 1632
1633 1633 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1634 1634 if validatehash:
1635 1635 self.checkhash(text, node, rev=rev)
1636 1636
1637 1637 return text
1638 1638
1639 1639 def hash(self, text, p1, p2):
1640 1640 """Compute a node hash.
1641 1641
1642 1642 Available as a function so that subclasses can replace the hash
1643 1643 as needed.
1644 1644 """
1645 1645 return storageutil.hashrevisionsha1(text, p1, p2)
1646 1646
1647 1647 def _processflags(self, text, flags, operation, raw=False):
1648 1648 """Inspect revision data flags and applies transforms defined by
1649 1649 registered flag processors.
1650 1650
1651 1651 ``text`` - the revision data to process
1652 1652 ``flags`` - the revision flags
1653 1653 ``operation`` - the operation being performed (read or write)
1654 1654 ``raw`` - an optional argument describing if the raw transform should be
1655 1655 applied.
1656 1656
1657 1657 This method processes the flags in the order (or reverse order if
1658 1658 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1659 1659 flag processors registered for present flags. The order of flags defined
1660 1660 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1661 1661
1662 1662 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1663 1663 processed text and ``validatehash`` is a bool indicating whether the
1664 1664 returned text should be checked for hash integrity.
1665 1665
1666 1666 Note: If the ``raw`` argument is set, it has precedence over the
1667 1667 operation and will only update the value of ``validatehash``.
1668 1668 """
1669 1669 # fast path: no flag processors will run
1670 1670 if flags == 0:
1671 1671 return text, True
1672 1672 if not operation in ('read', 'write'):
1673 1673 raise error.ProgrammingError(_("invalid '%s' operation") %
1674 1674 operation)
1675 1675 # Check all flags are known.
1676 1676 if flags & ~REVIDX_KNOWN_FLAGS:
1677 1677 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1678 1678 (flags & ~REVIDX_KNOWN_FLAGS))
1679 1679 validatehash = True
1680 1680 # Depending on the operation (read or write), the order might be
1681 1681 # reversed due to non-commutative transforms.
1682 1682 orderedflags = REVIDX_FLAGS_ORDER
1683 1683 if operation == 'write':
1684 1684 orderedflags = reversed(orderedflags)
1685 1685
1686 1686 for flag in orderedflags:
1687 1687 # If a flagprocessor has been registered for a known flag, apply the
1688 1688 # related operation transform and update result tuple.
1689 1689 if flag & flags:
1690 1690 vhash = True
1691 1691
1692 1692 if flag not in self._flagprocessors:
1693 1693 message = _("missing processor for flag '%#x'") % (flag)
1694 1694 raise error.RevlogError(message)
1695 1695
1696 1696 processor = self._flagprocessors[flag]
1697 1697 if processor is not None:
1698 1698 readtransform, writetransform, rawtransform = processor
1699 1699
1700 1700 if raw:
1701 1701 vhash = rawtransform(self, text)
1702 1702 elif operation == 'read':
1703 1703 text, vhash = readtransform(self, text)
1704 1704 else: # write operation
1705 1705 text, vhash = writetransform(self, text)
1706 1706 validatehash = validatehash and vhash
1707 1707
1708 1708 return text, validatehash
1709 1709
1710 1710 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1711 1711 """Check node hash integrity.
1712 1712
1713 1713 Available as a function so that subclasses can extend hash mismatch
1714 1714 behaviors as needed.
1715 1715 """
1716 1716 try:
1717 1717 if p1 is None and p2 is None:
1718 1718 p1, p2 = self.parents(node)
1719 1719 if node != self.hash(text, p1, p2):
1720 1720 # Clear the revision cache on hash failure. The revision cache
1721 1721 # only stores the raw revision and clearing the cache does have
1722 1722 # the side-effect that we won't have a cache hit when the raw
1723 1723 # revision data is accessed. But this case should be rare and
1724 1724 # it is extra work to teach the cache about the hash
1725 1725 # verification state.
1726 1726 if self._revisioncache and self._revisioncache[0] == node:
1727 1727 self._revisioncache = None
1728 1728
1729 1729 revornode = rev
1730 1730 if revornode is None:
1731 1731 revornode = templatefilters.short(hex(node))
1732 1732 raise error.RevlogError(_("integrity check failed on %s:%s")
1733 1733 % (self.indexfile, pycompat.bytestr(revornode)))
1734 1734 except error.RevlogError:
1735 1735 if self._censorable and storageutil.iscensoredtext(text):
1736 1736 raise error.CensoredNodeError(self.indexfile, node, text)
1737 1737 raise
1738 1738
1739 1739 def _enforceinlinesize(self, tr, fp=None):
1740 1740 """Check if the revlog is too big for inline and convert if so.
1741 1741
1742 1742 This should be called after revisions are added to the revlog. If the
1743 1743 revlog has grown too large to be an inline revlog, it will convert it
1744 1744 to use multiple index and data files.
1745 1745 """
1746 1746 tiprev = len(self) - 1
1747 1747 if (not self._inline or
1748 1748 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1749 1749 return
1750 1750
1751 1751 trinfo = tr.find(self.indexfile)
1752 1752 if trinfo is None:
1753 1753 raise error.RevlogError(_("%s not found in the transaction")
1754 1754 % self.indexfile)
1755 1755
1756 1756 trindex = trinfo[2]
1757 1757 if trindex is not None:
1758 1758 dataoff = self.start(trindex)
1759 1759 else:
1760 1760 # revlog was stripped at start of transaction, use all leftover data
1761 1761 trindex = len(self) - 1
1762 1762 dataoff = self.end(tiprev)
1763 1763
1764 1764 tr.add(self.datafile, dataoff)
1765 1765
1766 1766 if fp:
1767 1767 fp.flush()
1768 1768 fp.close()
1769 1769 # We can't use the cached file handle after close(). So prevent
1770 1770 # its usage.
1771 1771 self._writinghandles = None
1772 1772
1773 1773 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1774 1774 for r in self:
1775 1775 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1776 1776
1777 1777 with self._indexfp('w') as fp:
1778 1778 self.version &= ~FLAG_INLINE_DATA
1779 1779 self._inline = False
1780 1780 io = self._io
1781 1781 for i in self:
1782 1782 e = io.packentry(self.index[i], self.node, self.version, i)
1783 1783 fp.write(e)
1784 1784
1785 1785 # the temp file replace the real index when we exit the context
1786 1786 # manager
1787 1787
1788 1788 tr.replace(self.indexfile, trindex * self._io.size)
1789 1789 self._chunkclear()
1790 1790
1791 1791 def _nodeduplicatecallback(self, transaction, node):
1792 1792 """called when trying to add a node already stored.
1793 1793 """
1794 1794
1795 1795 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1796 1796 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1797 1797 """add a revision to the log
1798 1798
1799 1799 text - the revision data to add
1800 1800 transaction - the transaction object used for rollback
1801 1801 link - the linkrev data to add
1802 1802 p1, p2 - the parent nodeids of the revision
1803 1803 cachedelta - an optional precomputed delta
1804 1804 node - nodeid of revision; typically node is not specified, and it is
1805 1805 computed by default as hash(text, p1, p2), however subclasses might
1806 1806 use different hashing method (and override checkhash() in such case)
1807 1807 flags - the known flags to set on the revision
1808 1808 deltacomputer - an optional deltacomputer instance shared between
1809 1809 multiple calls
1810 1810 """
1811 1811 if link == nullrev:
1812 1812 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1813 1813 % self.indexfile)
1814 1814
1815 1815 if flags:
1816 1816 node = node or self.hash(text, p1, p2)
1817 1817
1818 1818 rawtext, validatehash = self._processflags(text, flags, 'write')
1819 1819
1820 1820 # If the flag processor modifies the revision data, ignore any provided
1821 1821 # cachedelta.
1822 1822 if rawtext != text:
1823 1823 cachedelta = None
1824 1824
1825 1825 if len(rawtext) > _maxentrysize:
1826 1826 raise error.RevlogError(
1827 1827 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1828 1828 % (self.indexfile, len(rawtext)))
1829 1829
1830 1830 node = node or self.hash(rawtext, p1, p2)
1831 1831 if node in self.nodemap:
1832 1832 return node
1833 1833
1834 1834 if validatehash:
1835 1835 self.checkhash(rawtext, node, p1=p1, p2=p2)
1836 1836
1837 1837 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1838 1838 flags, cachedelta=cachedelta,
1839 1839 deltacomputer=deltacomputer)
1840 1840
1841 1841 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1842 1842 cachedelta=None, deltacomputer=None):
1843 1843 """add a raw revision with known flags, node and parents
1844 1844 useful when reusing a revision not stored in this revlog (ex: received
1845 1845 over wire, or read from an external bundle).
1846 1846 """
1847 1847 dfh = None
1848 1848 if not self._inline:
1849 1849 dfh = self._datafp("a+")
1850 1850 ifh = self._indexfp("a+")
1851 1851 try:
1852 1852 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1853 1853 flags, cachedelta, ifh, dfh,
1854 1854 deltacomputer=deltacomputer)
1855 1855 finally:
1856 1856 if dfh:
1857 1857 dfh.close()
1858 1858 ifh.close()
1859 1859
1860 1860 def compress(self, data):
1861 1861 """Generate a possibly-compressed representation of data."""
1862 1862 if not data:
1863 1863 return '', data
1864 1864
1865 1865 compressed = self._compressor.compress(data)
1866 1866
1867 1867 if compressed:
1868 1868 # The revlog compressor added the header in the returned data.
1869 1869 return '', compressed
1870 1870
1871 1871 if data[0:1] == '\0':
1872 1872 return '', data
1873 1873 return 'u', data
1874 1874
1875 1875 def decompress(self, data):
1876 1876 """Decompress a revlog chunk.
1877 1877
1878 1878 The chunk is expected to begin with a header identifying the
1879 1879 format type so it can be routed to an appropriate decompressor.
1880 1880 """
1881 1881 if not data:
1882 1882 return data
1883 1883
1884 1884 # Revlogs are read much more frequently than they are written and many
1885 1885 # chunks only take microseconds to decompress, so performance is
1886 1886 # important here.
1887 1887 #
1888 1888 # We can make a few assumptions about revlogs:
1889 1889 #
1890 1890 # 1) the majority of chunks will be compressed (as opposed to inline
1891 1891 # raw data).
1892 1892 # 2) decompressing *any* data will likely by at least 10x slower than
1893 1893 # returning raw inline data.
1894 1894 # 3) we want to prioritize common and officially supported compression
1895 1895 # engines
1896 1896 #
1897 1897 # It follows that we want to optimize for "decompress compressed data
1898 1898 # when encoded with common and officially supported compression engines"
1899 1899 # case over "raw data" and "data encoded by less common or non-official
1900 1900 # compression engines." That is why we have the inline lookup first
1901 1901 # followed by the compengines lookup.
1902 1902 #
1903 1903 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1904 1904 # compressed chunks. And this matters for changelog and manifest reads.
1905 1905 t = data[0:1]
1906 1906
1907 1907 if t == 'x':
1908 1908 try:
1909 1909 return _zlibdecompress(data)
1910 1910 except zlib.error as e:
1911 1911 raise error.RevlogError(_('revlog decompress error: %s') %
1912 1912 stringutil.forcebytestr(e))
1913 1913 # '\0' is more common than 'u' so it goes first.
1914 1914 elif t == '\0':
1915 1915 return data
1916 1916 elif t == 'u':
1917 1917 return util.buffer(data, 1)
1918 1918
1919 1919 try:
1920 1920 compressor = self._decompressors[t]
1921 1921 except KeyError:
1922 1922 try:
1923 1923 engine = util.compengines.forrevlogheader(t)
1924 1924 compressor = engine.revlogcompressor()
1925 1925 self._decompressors[t] = compressor
1926 1926 except KeyError:
1927 1927 raise error.RevlogError(_('unknown compression type %r') % t)
1928 1928
1929 1929 return compressor.decompress(data)
1930 1930
1931 1931 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1932 1932 cachedelta, ifh, dfh, alwayscache=False,
1933 1933 deltacomputer=None):
1934 1934 """internal function to add revisions to the log
1935 1935
1936 1936 see addrevision for argument descriptions.
1937 1937
1938 1938 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1939 1939
1940 1940 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1941 1941 be used.
1942 1942
1943 1943 invariants:
1944 1944 - rawtext is optional (can be None); if not set, cachedelta must be set.
1945 1945 if both are set, they must correspond to each other.
1946 1946 """
1947 1947 if node == nullid:
1948 1948 raise error.RevlogError(_("%s: attempt to add null revision") %
1949 1949 self.indexfile)
1950 1950 if node == wdirid or node in wdirfilenodeids:
1951 1951 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1952 1952 self.indexfile)
1953 1953
1954 1954 if self._inline:
1955 1955 fh = ifh
1956 1956 else:
1957 1957 fh = dfh
1958 1958
1959 1959 btext = [rawtext]
1960 1960
1961 1961 curr = len(self)
1962 1962 prev = curr - 1
1963 1963 offset = self.end(prev)
1964 1964 p1r, p2r = self.rev(p1), self.rev(p2)
1965 1965
1966 1966 # full versions are inserted when the needed deltas
1967 1967 # become comparable to the uncompressed text
1968 1968 if rawtext is None:
1969 1969 # need rawtext size, before changed by flag processors, which is
1970 1970 # the non-raw size. use revlog explicitly to avoid filelog's extra
1971 1971 # logic that might remove metadata size.
1972 1972 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1973 1973 cachedelta[1])
1974 1974 else:
1975 1975 textlen = len(rawtext)
1976 1976
1977 1977 if deltacomputer is None:
1978 1978 deltacomputer = deltautil.deltacomputer(self)
1979 1979
1980 1980 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1981 1981
1982 1982 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1983 1983
1984 1984 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1985 1985 deltainfo.base, link, p1r, p2r, node)
1986 1986 self.index.append(e)
1987 1987 self.nodemap[node] = curr
1988 1988
1989 1989 # Reset the pure node cache start lookup offset to account for new
1990 1990 # revision.
1991 1991 if self._nodepos is not None:
1992 1992 self._nodepos = curr
1993 1993
1994 1994 entry = self._io.packentry(e, self.node, self.version, curr)
1995 1995 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1996 1996 link, offset)
1997 1997
1998 1998 rawtext = btext[0]
1999 1999
2000 2000 if alwayscache and rawtext is None:
2001 2001 rawtext = deltacomputer.buildtext(revinfo, fh)
2002 2002
2003 2003 if type(rawtext) == bytes: # only accept immutable objects
2004 2004 self._revisioncache = (node, curr, rawtext)
2005 2005 self._chainbasecache[curr] = deltainfo.chainbase
2006 2006 return node
2007 2007
2008 2008 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2009 2009 # Files opened in a+ mode have inconsistent behavior on various
2010 2010 # platforms. Windows requires that a file positioning call be made
2011 2011 # when the file handle transitions between reads and writes. See
2012 2012 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2013 2013 # platforms, Python or the platform itself can be buggy. Some versions
2014 2014 # of Solaris have been observed to not append at the end of the file
2015 2015 # if the file was seeked to before the end. See issue4943 for more.
2016 2016 #
2017 2017 # We work around this issue by inserting a seek() before writing.
2018 2018 # Note: This is likely not necessary on Python 3. However, because
2019 2019 # the file handle is reused for reads and may be seeked there, we need
2020 2020 # to be careful before changing this.
2021 2021 ifh.seek(0, os.SEEK_END)
2022 2022 if dfh:
2023 2023 dfh.seek(0, os.SEEK_END)
2024 2024
2025 2025 curr = len(self) - 1
2026 2026 if not self._inline:
2027 2027 transaction.add(self.datafile, offset)
2028 2028 transaction.add(self.indexfile, curr * len(entry))
2029 2029 if data[0]:
2030 2030 dfh.write(data[0])
2031 2031 dfh.write(data[1])
2032 2032 ifh.write(entry)
2033 2033 else:
2034 2034 offset += curr * self._io.size
2035 2035 transaction.add(self.indexfile, offset, curr)
2036 2036 ifh.write(entry)
2037 2037 ifh.write(data[0])
2038 2038 ifh.write(data[1])
2039 2039 self._enforceinlinesize(transaction, ifh)
2040 2040
2041 2041 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2042 2042 """
2043 2043 add a delta group
2044 2044
2045 2045 given a set of deltas, add them to the revision log. the
2046 2046 first delta is against its parent, which should be in our
2047 2047 log, the rest are against the previous delta.
2048 2048
2049 2049 If ``addrevisioncb`` is defined, it will be called with arguments of
2050 2050 this revlog and the node that was added.
2051 2051 """
2052 2052
2053 2053 if self._writinghandles:
2054 2054 raise error.ProgrammingError('cannot nest addgroup() calls')
2055 2055
2056 2056 nodes = []
2057 2057
2058 2058 r = len(self)
2059 2059 end = 0
2060 2060 if r:
2061 2061 end = self.end(r - 1)
2062 2062 ifh = self._indexfp("a+")
2063 2063 isize = r * self._io.size
2064 2064 if self._inline:
2065 2065 transaction.add(self.indexfile, end + isize, r)
2066 2066 dfh = None
2067 2067 else:
2068 2068 transaction.add(self.indexfile, isize, r)
2069 2069 transaction.add(self.datafile, end)
2070 2070 dfh = self._datafp("a+")
2071 2071 def flush():
2072 2072 if dfh:
2073 2073 dfh.flush()
2074 2074 ifh.flush()
2075 2075
2076 2076 self._writinghandles = (ifh, dfh)
2077 2077
2078 2078 try:
2079 2079 deltacomputer = deltautil.deltacomputer(self)
2080 2080 # loop through our set of deltas
2081 2081 for data in deltas:
2082 2082 node, p1, p2, linknode, deltabase, delta, flags = data
2083 2083 link = linkmapper(linknode)
2084 2084 flags = flags or REVIDX_DEFAULT_FLAGS
2085 2085
2086 2086 nodes.append(node)
2087 2087
2088 2088 if node in self.nodemap:
2089 2089 self._nodeduplicatecallback(transaction, node)
2090 2090 # this can happen if two branches make the same change
2091 2091 continue
2092 2092
2093 2093 for p in (p1, p2):
2094 2094 if p not in self.nodemap:
2095 2095 raise error.LookupError(p, self.indexfile,
2096 2096 _('unknown parent'))
2097 2097
2098 2098 if deltabase not in self.nodemap:
2099 2099 raise error.LookupError(deltabase, self.indexfile,
2100 2100 _('unknown delta base'))
2101 2101
2102 2102 baserev = self.rev(deltabase)
2103 2103
2104 2104 if baserev != nullrev and self.iscensored(baserev):
2105 2105 # if base is censored, delta must be full replacement in a
2106 2106 # single patch operation
2107 2107 hlen = struct.calcsize(">lll")
2108 2108 oldlen = self.rawsize(baserev)
2109 2109 newlen = len(delta) - hlen
2110 2110 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2111 2111 raise error.CensoredBaseError(self.indexfile,
2112 2112 self.node(baserev))
2113 2113
2114 2114 if not flags and self._peek_iscensored(baserev, delta, flush):
2115 2115 flags |= REVIDX_ISCENSORED
2116 2116
2117 2117 # We assume consumers of addrevisioncb will want to retrieve
2118 2118 # the added revision, which will require a call to
2119 2119 # revision(). revision() will fast path if there is a cache
2120 2120 # hit. So, we tell _addrevision() to always cache in this case.
2121 2121 # We're only using addgroup() in the context of changegroup
2122 2122 # generation so the revision data can always be handled as raw
2123 2123 # by the flagprocessor.
2124 2124 self._addrevision(node, None, transaction, link,
2125 2125 p1, p2, flags, (baserev, delta),
2126 2126 ifh, dfh,
2127 2127 alwayscache=bool(addrevisioncb),
2128 2128 deltacomputer=deltacomputer)
2129 2129
2130 2130 if addrevisioncb:
2131 2131 addrevisioncb(self, node)
2132 2132
2133 2133 if not dfh and not self._inline:
2134 2134 # addrevision switched from inline to conventional
2135 2135 # reopen the index
2136 2136 ifh.close()
2137 2137 dfh = self._datafp("a+")
2138 2138 ifh = self._indexfp("a+")
2139 2139 self._writinghandles = (ifh, dfh)
2140 2140 finally:
2141 2141 self._writinghandles = None
2142 2142
2143 2143 if dfh:
2144 2144 dfh.close()
2145 2145 ifh.close()
2146 2146
2147 2147 return nodes
2148 2148
2149 2149 def iscensored(self, rev):
2150 2150 """Check if a file revision is censored."""
2151 2151 if not self._censorable:
2152 2152 return False
2153 2153
2154 2154 return self.flags(rev) & REVIDX_ISCENSORED
2155 2155
2156 2156 def _peek_iscensored(self, baserev, delta, flush):
2157 2157 """Quickly check if a delta produces a censored revision."""
2158 2158 if not self._censorable:
2159 2159 return False
2160 2160
2161 2161 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2162 2162
2163 2163 def getstrippoint(self, minlink):
2164 2164 """find the minimum rev that must be stripped to strip the linkrev
2165 2165
2166 2166 Returns a tuple containing the minimum rev and a set of all revs that
2167 2167 have linkrevs that will be broken by this strip.
2168 2168 """
2169 2169 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2170 2170 self.headrevs(),
2171 2171 self.linkrev, self.parentrevs)
2172 2172
2173 2173 def strip(self, minlink, transaction):
2174 2174 """truncate the revlog on the first revision with a linkrev >= minlink
2175 2175
2176 2176 This function is called when we're stripping revision minlink and
2177 2177 its descendants from the repository.
2178 2178
2179 2179 We have to remove all revisions with linkrev >= minlink, because
2180 2180 the equivalent changelog revisions will be renumbered after the
2181 2181 strip.
2182 2182
2183 2183 So we truncate the revlog on the first of these revisions, and
2184 2184 trust that the caller has saved the revisions that shouldn't be
2185 2185 removed and that it'll re-add them after this truncation.
2186 2186 """
2187 2187 if len(self) == 0:
2188 2188 return
2189 2189
2190 2190 rev, _ = self.getstrippoint(minlink)
2191 2191 if rev == len(self):
2192 2192 return
2193 2193
2194 2194 # first truncate the files on disk
2195 2195 end = self.start(rev)
2196 2196 if not self._inline:
2197 2197 transaction.add(self.datafile, end)
2198 2198 end = rev * self._io.size
2199 2199 else:
2200 2200 end += rev * self._io.size
2201 2201
2202 2202 transaction.add(self.indexfile, end)
2203 2203
2204 2204 # then reset internal state in memory to forget those revisions
2205 2205 self._revisioncache = None
2206 2206 self._chaininfocache = {}
2207 2207 self._chunkclear()
2208 2208 for x in pycompat.xrange(rev, len(self)):
2209 2209 del self.nodemap[self.node(x)]
2210 2210
2211 2211 del self.index[rev:-1]
2212 2212 self._nodepos = None
2213 2213
2214 2214 def checksize(self):
2215 2215 expected = 0
2216 2216 if len(self):
2217 2217 expected = max(0, self.end(len(self) - 1))
2218 2218
2219 2219 try:
2220 2220 with self._datafp() as f:
2221 2221 f.seek(0, 2)
2222 2222 actual = f.tell()
2223 2223 dd = actual - expected
2224 2224 except IOError as inst:
2225 2225 if inst.errno != errno.ENOENT:
2226 2226 raise
2227 2227 dd = 0
2228 2228
2229 2229 try:
2230 2230 f = self.opener(self.indexfile)
2231 2231 f.seek(0, 2)
2232 2232 actual = f.tell()
2233 2233 f.close()
2234 2234 s = self._io.size
2235 2235 i = max(0, actual // s)
2236 2236 di = actual - (i * s)
2237 2237 if self._inline:
2238 2238 databytes = 0
2239 2239 for r in self:
2240 2240 databytes += max(0, self.length(r))
2241 2241 dd = 0
2242 2242 di = actual - len(self) * s - databytes
2243 2243 except IOError as inst:
2244 2244 if inst.errno != errno.ENOENT:
2245 2245 raise
2246 2246 di = 0
2247 2247
2248 2248 return (dd, di)
2249 2249
2250 2250 def files(self):
2251 2251 res = [self.indexfile]
2252 2252 if not self._inline:
2253 2253 res.append(self.datafile)
2254 2254 return res
2255 2255
2256 2256 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2257 2257 assumehaveparentrevisions=False,
2258 2258 deltamode=repository.CG_DELTAMODE_STD):
2259 2259 if nodesorder not in ('nodes', 'storage', 'linear', None):
2260 2260 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2261 2261 nodesorder)
2262 2262
2263 2263 if nodesorder is None and not self._generaldelta:
2264 2264 nodesorder = 'storage'
2265 2265
2266 2266 if (not self._storedeltachains and
2267 2267 deltamode != repository.CG_DELTAMODE_PREV):
2268 2268 deltamode = repository.CG_DELTAMODE_FULL
2269 2269
2270 2270 return storageutil.emitrevisions(
2271 2271 self, nodes, nodesorder, revlogrevisiondelta,
2272 2272 deltaparentfn=self.deltaparent,
2273 2273 candeltafn=self.candelta,
2274 2274 rawsizefn=self.rawsize,
2275 2275 revdifffn=self.revdiff,
2276 2276 flagsfn=self.flags,
2277 2277 deltamode=deltamode,
2278 2278 revisiondata=revisiondata,
2279 2279 assumehaveparentrevisions=assumehaveparentrevisions)
2280 2280
2281 2281 DELTAREUSEALWAYS = 'always'
2282 2282 DELTAREUSESAMEREVS = 'samerevs'
2283 2283 DELTAREUSENEVER = 'never'
2284 2284
2285 2285 DELTAREUSEFULLADD = 'fulladd'
2286 2286
2287 2287 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2288 2288
2289 2289 def clone(self, tr, destrevlog, addrevisioncb=None,
2290 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2290 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2291 2291 """Copy this revlog to another, possibly with format changes.
2292 2292
2293 2293 The destination revlog will contain the same revisions and nodes.
2294 2294 However, it may not be bit-for-bit identical due to e.g. delta encoding
2295 2295 differences.
2296 2296
2297 2297 The ``deltareuse`` argument control how deltas from the existing revlog
2298 2298 are preserved in the destination revlog. The argument can have the
2299 2299 following values:
2300 2300
2301 2301 DELTAREUSEALWAYS
2302 2302 Deltas will always be reused (if possible), even if the destination
2303 2303 revlog would not select the same revisions for the delta. This is the
2304 2304 fastest mode of operation.
2305 2305 DELTAREUSESAMEREVS
2306 2306 Deltas will be reused if the destination revlog would pick the same
2307 2307 revisions for the delta. This mode strikes a balance between speed
2308 2308 and optimization.
2309 2309 DELTAREUSENEVER
2310 2310 Deltas will never be reused. This is the slowest mode of execution.
2311 2311 This mode can be used to recompute deltas (e.g. if the diff/delta
2312 2312 algorithm changes).
2313 2313
2314 2314 Delta computation can be slow, so the choice of delta reuse policy can
2315 2315 significantly affect run time.
2316 2316
2317 2317 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2318 2318 two extremes. Deltas will be reused if they are appropriate. But if the
2319 2319 delta could choose a better revision, it will do so. This means if you
2320 2320 are converting a non-generaldelta revlog to a generaldelta revlog,
2321 2321 deltas will be recomputed if the delta's parent isn't a parent of the
2322 2322 revision.
2323 2323
2324 In addition to the delta policy, the ``deltabothparents`` argument
2325 controls whether to compute deltas against both parents for merges.
2326 By default, the current default is used.
2324 In addition to the delta policy, the ``forcedeltabothparents``
2325 argument controls whether to force compute deltas against both parents
2326 for merges. By default, the current default is used.
2327 2327 """
2328 2328 if deltareuse not in self.DELTAREUSEALL:
2329 2329 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2330 2330
2331 2331 if len(destrevlog):
2332 2332 raise ValueError(_('destination revlog is not empty'))
2333 2333
2334 2334 if getattr(self, 'filteredrevs', None):
2335 2335 raise ValueError(_('source revlog has filtered revisions'))
2336 2336 if getattr(destrevlog, 'filteredrevs', None):
2337 2337 raise ValueError(_('destination revlog has filtered revisions'))
2338 2338
2339 2339 # lazydeltabase controls whether to reuse a cached delta, if possible.
2340 2340 oldlazydeltabase = destrevlog._lazydeltabase
2341 2341 oldamd = destrevlog._deltabothparents
2342 2342
2343 2343 try:
2344 2344 if deltareuse == self.DELTAREUSEALWAYS:
2345 2345 destrevlog._lazydeltabase = True
2346 2346 elif deltareuse == self.DELTAREUSESAMEREVS:
2347 2347 destrevlog._lazydeltabase = False
2348 2348
2349 destrevlog._deltabothparents = deltabothparents or oldamd
2349 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2350 2350
2351 2351 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2352 2352 self.DELTAREUSESAMEREVS)
2353 2353
2354 2354 deltacomputer = deltautil.deltacomputer(destrevlog)
2355 2355 index = self.index
2356 2356 for rev in self:
2357 2357 entry = index[rev]
2358 2358
2359 2359 # Some classes override linkrev to take filtered revs into
2360 2360 # account. Use raw entry from index.
2361 2361 flags = entry[0] & 0xffff
2362 2362 linkrev = entry[4]
2363 2363 p1 = index[entry[5]][7]
2364 2364 p2 = index[entry[6]][7]
2365 2365 node = entry[7]
2366 2366
2367 2367 # (Possibly) reuse the delta from the revlog if allowed and
2368 2368 # the revlog chunk is a delta.
2369 2369 cachedelta = None
2370 2370 rawtext = None
2371 2371 if populatecachedelta:
2372 2372 dp = self.deltaparent(rev)
2373 2373 if dp != nullrev:
2374 2374 cachedelta = (dp, bytes(self._chunk(rev)))
2375 2375
2376 2376 if not cachedelta:
2377 2377 rawtext = self.revision(rev, raw=True)
2378 2378
2379 2379
2380 2380 if deltareuse == self.DELTAREUSEFULLADD:
2381 2381 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2382 2382 cachedelta=cachedelta,
2383 2383 node=node, flags=flags,
2384 2384 deltacomputer=deltacomputer)
2385 2385 else:
2386 2386 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2387 2387 checkambig=False)
2388 2388 dfh = None
2389 2389 if not destrevlog._inline:
2390 2390 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2391 2391 try:
2392 2392 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2393 2393 p2, flags, cachedelta, ifh, dfh,
2394 2394 deltacomputer=deltacomputer)
2395 2395 finally:
2396 2396 if dfh:
2397 2397 dfh.close()
2398 2398 ifh.close()
2399 2399
2400 2400 if addrevisioncb:
2401 2401 addrevisioncb(self, rev, node)
2402 2402 finally:
2403 2403 destrevlog._lazydeltabase = oldlazydeltabase
2404 2404 destrevlog._deltabothparents = oldamd
2405 2405
2406 2406 def censorrevision(self, tr, censornode, tombstone=b''):
2407 2407 if (self.version & 0xFFFF) == REVLOGV0:
2408 2408 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2409 2409 self.version)
2410 2410
2411 2411 censorrev = self.rev(censornode)
2412 2412 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2413 2413
2414 2414 if len(tombstone) > self.rawsize(censorrev):
2415 2415 raise error.Abort(_('censor tombstone must be no longer than '
2416 2416 'censored data'))
2417 2417
2418 2418 # Rewriting the revlog in place is hard. Our strategy for censoring is
2419 2419 # to create a new revlog, copy all revisions to it, then replace the
2420 2420 # revlogs on transaction close.
2421 2421
2422 2422 newindexfile = self.indexfile + b'.tmpcensored'
2423 2423 newdatafile = self.datafile + b'.tmpcensored'
2424 2424
2425 2425 # This is a bit dangerous. We could easily have a mismatch of state.
2426 2426 newrl = revlog(self.opener, newindexfile, newdatafile,
2427 2427 censorable=True)
2428 2428 newrl.version = self.version
2429 2429 newrl._generaldelta = self._generaldelta
2430 2430 newrl._io = self._io
2431 2431
2432 2432 for rev in self.revs():
2433 2433 node = self.node(rev)
2434 2434 p1, p2 = self.parents(node)
2435 2435
2436 2436 if rev == censorrev:
2437 2437 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2438 2438 p1, p2, censornode, REVIDX_ISCENSORED)
2439 2439
2440 2440 if newrl.deltaparent(rev) != nullrev:
2441 2441 raise error.Abort(_('censored revision stored as delta; '
2442 2442 'cannot censor'),
2443 2443 hint=_('censoring of revlogs is not '
2444 2444 'fully implemented; please report '
2445 2445 'this bug'))
2446 2446 continue
2447 2447
2448 2448 if self.iscensored(rev):
2449 2449 if self.deltaparent(rev) != nullrev:
2450 2450 raise error.Abort(_('cannot censor due to censored '
2451 2451 'revision having delta stored'))
2452 2452 rawtext = self._chunk(rev)
2453 2453 else:
2454 2454 rawtext = self.revision(rev, raw=True)
2455 2455
2456 2456 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2457 2457 self.flags(rev))
2458 2458
2459 2459 tr.addbackup(self.indexfile, location='store')
2460 2460 if not self._inline:
2461 2461 tr.addbackup(self.datafile, location='store')
2462 2462
2463 2463 self.opener.rename(newrl.indexfile, self.indexfile)
2464 2464 if not self._inline:
2465 2465 self.opener.rename(newrl.datafile, self.datafile)
2466 2466
2467 2467 self.clearcaches()
2468 2468 self._loadindex(self.version, None)
2469 2469
2470 2470 def verifyintegrity(self, state):
2471 2471 """Verifies the integrity of the revlog.
2472 2472
2473 2473 Yields ``revlogproblem`` instances describing problems that are
2474 2474 found.
2475 2475 """
2476 2476 dd, di = self.checksize()
2477 2477 if dd:
2478 2478 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2479 2479 if di:
2480 2480 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2481 2481
2482 2482 version = self.version & 0xFFFF
2483 2483
2484 2484 # The verifier tells us what version revlog we should be.
2485 2485 if version != state['expectedversion']:
2486 2486 yield revlogproblem(
2487 2487 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2488 2488 (self.indexfile, version, state['expectedversion']))
2489 2489
2490 2490 state['skipread'] = set()
2491 2491
2492 2492 for rev in self:
2493 2493 node = self.node(rev)
2494 2494
2495 2495 # Verify contents. 4 cases to care about:
2496 2496 #
2497 2497 # common: the most common case
2498 2498 # rename: with a rename
2499 2499 # meta: file content starts with b'\1\n', the metadata
2500 2500 # header defined in filelog.py, but without a rename
2501 2501 # ext: content stored externally
2502 2502 #
2503 2503 # More formally, their differences are shown below:
2504 2504 #
2505 2505 # | common | rename | meta | ext
2506 2506 # -------------------------------------------------------
2507 2507 # flags() | 0 | 0 | 0 | not 0
2508 2508 # renamed() | False | True | False | ?
2509 2509 # rawtext[0:2]=='\1\n'| False | True | True | ?
2510 2510 #
2511 2511 # "rawtext" means the raw text stored in revlog data, which
2512 2512 # could be retrieved by "revision(rev, raw=True)". "text"
2513 2513 # mentioned below is "revision(rev, raw=False)".
2514 2514 #
2515 2515 # There are 3 different lengths stored physically:
2516 2516 # 1. L1: rawsize, stored in revlog index
2517 2517 # 2. L2: len(rawtext), stored in revlog data
2518 2518 # 3. L3: len(text), stored in revlog data if flags==0, or
2519 2519 # possibly somewhere else if flags!=0
2520 2520 #
2521 2521 # L1 should be equal to L2. L3 could be different from them.
2522 2522 # "text" may or may not affect commit hash depending on flag
2523 2523 # processors (see revlog.addflagprocessor).
2524 2524 #
2525 2525 # | common | rename | meta | ext
2526 2526 # -------------------------------------------------
2527 2527 # rawsize() | L1 | L1 | L1 | L1
2528 2528 # size() | L1 | L2-LM | L1(*) | L1 (?)
2529 2529 # len(rawtext) | L2 | L2 | L2 | L2
2530 2530 # len(text) | L2 | L2 | L2 | L3
2531 2531 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2532 2532 #
2533 2533 # LM: length of metadata, depending on rawtext
2534 2534 # (*): not ideal, see comment in filelog.size
2535 2535 # (?): could be "- len(meta)" if the resolved content has
2536 2536 # rename metadata
2537 2537 #
2538 2538 # Checks needed to be done:
2539 2539 # 1. length check: L1 == L2, in all cases.
2540 2540 # 2. hash check: depending on flag processor, we may need to
2541 2541 # use either "text" (external), or "rawtext" (in revlog).
2542 2542
2543 2543 try:
2544 2544 skipflags = state.get('skipflags', 0)
2545 2545 if skipflags:
2546 2546 skipflags &= self.flags(rev)
2547 2547
2548 2548 if skipflags:
2549 2549 state['skipread'].add(node)
2550 2550 else:
2551 2551 # Side-effect: read content and verify hash.
2552 2552 self.revision(node)
2553 2553
2554 2554 l1 = self.rawsize(rev)
2555 2555 l2 = len(self.revision(node, raw=True))
2556 2556
2557 2557 if l1 != l2:
2558 2558 yield revlogproblem(
2559 2559 error=_('unpacked size is %d, %d expected') % (l2, l1),
2560 2560 node=node)
2561 2561
2562 2562 except error.CensoredNodeError:
2563 2563 if state['erroroncensored']:
2564 2564 yield revlogproblem(error=_('censored file data'),
2565 2565 node=node)
2566 2566 state['skipread'].add(node)
2567 2567 except Exception as e:
2568 2568 yield revlogproblem(
2569 2569 error=_('unpacking %s: %s') % (short(node),
2570 2570 stringutil.forcebytestr(e)),
2571 2571 node=node)
2572 2572 state['skipread'].add(node)
2573 2573
2574 2574 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2575 2575 revisionscount=False, trackedsize=False,
2576 2576 storedsize=False):
2577 2577 d = {}
2578 2578
2579 2579 if exclusivefiles:
2580 2580 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2581 2581 if not self._inline:
2582 2582 d['exclusivefiles'].append((self.opener, self.datafile))
2583 2583
2584 2584 if sharedfiles:
2585 2585 d['sharedfiles'] = []
2586 2586
2587 2587 if revisionscount:
2588 2588 d['revisionscount'] = len(self)
2589 2589
2590 2590 if trackedsize:
2591 2591 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2592 2592
2593 2593 if storedsize:
2594 2594 d['storedsize'] = sum(self.opener.stat(path).st_size
2595 2595 for path in self.files())
2596 2596
2597 2597 return d
@@ -1,897 +1,897 b''
1 1 # upgrade.py - functions for in place upgrade of Mercurial repository
2 2 #
3 3 # Copyright (c) 2016-present, Gregory Szorc
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import stat
11 11
12 12 from .i18n import _
13 13 from . import (
14 14 changelog,
15 15 error,
16 16 filelog,
17 17 hg,
18 18 localrepo,
19 19 manifest,
20 20 pycompat,
21 21 revlog,
22 22 scmutil,
23 23 util,
24 24 vfs as vfsmod,
25 25 )
26 26
27 27 def requiredsourcerequirements(repo):
28 28 """Obtain requirements required to be present to upgrade a repo.
29 29
30 30 An upgrade will not be allowed if the repository doesn't have the
31 31 requirements returned by this function.
32 32 """
33 33 return {
34 34 # Introduced in Mercurial 0.9.2.
35 35 'revlogv1',
36 36 # Introduced in Mercurial 0.9.2.
37 37 'store',
38 38 }
39 39
40 40 def blocksourcerequirements(repo):
41 41 """Obtain requirements that will prevent an upgrade from occurring.
42 42
43 43 An upgrade cannot be performed if the source repository contains a
44 44 requirements in the returned set.
45 45 """
46 46 return {
47 47 # The upgrade code does not yet support these experimental features.
48 48 # This is an artificial limitation.
49 49 'treemanifest',
50 50 # This was a precursor to generaldelta and was never enabled by default.
51 51 # It should (hopefully) not exist in the wild.
52 52 'parentdelta',
53 53 # Upgrade should operate on the actual store, not the shared link.
54 54 'shared',
55 55 }
56 56
57 57 def supportremovedrequirements(repo):
58 58 """Obtain requirements that can be removed during an upgrade.
59 59
60 60 If an upgrade were to create a repository that dropped a requirement,
61 61 the dropped requirement must appear in the returned set for the upgrade
62 62 to be allowed.
63 63 """
64 64 return {
65 65 localrepo.SPARSEREVLOG_REQUIREMENT,
66 66 }
67 67
68 68 def supporteddestrequirements(repo):
69 69 """Obtain requirements that upgrade supports in the destination.
70 70
71 71 If the result of the upgrade would create requirements not in this set,
72 72 the upgrade is disallowed.
73 73
74 74 Extensions should monkeypatch this to add their custom requirements.
75 75 """
76 76 return {
77 77 'dotencode',
78 78 'fncache',
79 79 'generaldelta',
80 80 'revlogv1',
81 81 'store',
82 82 localrepo.SPARSEREVLOG_REQUIREMENT,
83 83 }
84 84
85 85 def allowednewrequirements(repo):
86 86 """Obtain requirements that can be added to a repository during upgrade.
87 87
88 88 This is used to disallow proposed requirements from being added when
89 89 they weren't present before.
90 90
91 91 We use a list of allowed requirement additions instead of a list of known
92 92 bad additions because the whitelist approach is safer and will prevent
93 93 future, unknown requirements from accidentally being added.
94 94 """
95 95 return {
96 96 'dotencode',
97 97 'fncache',
98 98 'generaldelta',
99 99 localrepo.SPARSEREVLOG_REQUIREMENT,
100 100 }
101 101
102 102 def preservedrequirements(repo):
103 103 return set()
104 104
105 105 deficiency = 'deficiency'
106 106 optimisation = 'optimization'
107 107
108 108 class improvement(object):
109 109 """Represents an improvement that can be made as part of an upgrade.
110 110
111 111 The following attributes are defined on each instance:
112 112
113 113 name
114 114 Machine-readable string uniquely identifying this improvement. It
115 115 will be mapped to an action later in the upgrade process.
116 116
117 117 type
118 118 Either ``deficiency`` or ``optimisation``. A deficiency is an obvious
119 119 problem. An optimization is an action (sometimes optional) that
120 120 can be taken to further improve the state of the repository.
121 121
122 122 description
123 123 Message intended for humans explaining the improvement in more detail,
124 124 including the implications of it. For ``deficiency`` types, should be
125 125 worded in the present tense. For ``optimisation`` types, should be
126 126 worded in the future tense.
127 127
128 128 upgrademessage
129 129 Message intended for humans explaining what an upgrade addressing this
130 130 issue will do. Should be worded in the future tense.
131 131 """
132 132 def __init__(self, name, type, description, upgrademessage):
133 133 self.name = name
134 134 self.type = type
135 135 self.description = description
136 136 self.upgrademessage = upgrademessage
137 137
138 138 def __eq__(self, other):
139 139 if not isinstance(other, improvement):
140 140 # This is what python tell use to do
141 141 return NotImplemented
142 142 return self.name == other.name
143 143
144 144 def __ne__(self, other):
145 145 return not self == other
146 146
147 147 def __hash__(self):
148 148 return hash(self.name)
149 149
150 150 allformatvariant = []
151 151
152 152 def registerformatvariant(cls):
153 153 allformatvariant.append(cls)
154 154 return cls
155 155
156 156 class formatvariant(improvement):
157 157 """an improvement subclass dedicated to repository format"""
158 158 type = deficiency
159 159 ### The following attributes should be defined for each class:
160 160
161 161 # machine-readable string uniquely identifying this improvement. it will be
162 162 # mapped to an action later in the upgrade process.
163 163 name = None
164 164
165 165 # message intended for humans explaining the improvement in more detail,
166 166 # including the implications of it ``deficiency`` types, should be worded
167 167 # in the present tense.
168 168 description = None
169 169
170 170 # message intended for humans explaining what an upgrade addressing this
171 171 # issue will do. should be worded in the future tense.
172 172 upgrademessage = None
173 173
174 174 # value of current Mercurial default for new repository
175 175 default = None
176 176
177 177 def __init__(self):
178 178 raise NotImplementedError()
179 179
180 180 @staticmethod
181 181 def fromrepo(repo):
182 182 """current value of the variant in the repository"""
183 183 raise NotImplementedError()
184 184
185 185 @staticmethod
186 186 def fromconfig(repo):
187 187 """current value of the variant in the configuration"""
188 188 raise NotImplementedError()
189 189
190 190 class requirementformatvariant(formatvariant):
191 191 """formatvariant based on a 'requirement' name.
192 192
193 193 Many format variant are controlled by a 'requirement'. We define a small
194 194 subclass to factor the code.
195 195 """
196 196
197 197 # the requirement that control this format variant
198 198 _requirement = None
199 199
200 200 @staticmethod
201 201 def _newreporequirements(ui):
202 202 return localrepo.newreporequirements(
203 203 ui, localrepo.defaultcreateopts(ui))
204 204
205 205 @classmethod
206 206 def fromrepo(cls, repo):
207 207 assert cls._requirement is not None
208 208 return cls._requirement in repo.requirements
209 209
210 210 @classmethod
211 211 def fromconfig(cls, repo):
212 212 assert cls._requirement is not None
213 213 return cls._requirement in cls._newreporequirements(repo.ui)
214 214
215 215 @registerformatvariant
216 216 class fncache(requirementformatvariant):
217 217 name = 'fncache'
218 218
219 219 _requirement = 'fncache'
220 220
221 221 default = True
222 222
223 223 description = _('long and reserved filenames may not work correctly; '
224 224 'repository performance is sub-optimal')
225 225
226 226 upgrademessage = _('repository will be more resilient to storing '
227 227 'certain paths and performance of certain '
228 228 'operations should be improved')
229 229
230 230 @registerformatvariant
231 231 class dotencode(requirementformatvariant):
232 232 name = 'dotencode'
233 233
234 234 _requirement = 'dotencode'
235 235
236 236 default = True
237 237
238 238 description = _('storage of filenames beginning with a period or '
239 239 'space may not work correctly')
240 240
241 241 upgrademessage = _('repository will be better able to store files '
242 242 'beginning with a space or period')
243 243
244 244 @registerformatvariant
245 245 class generaldelta(requirementformatvariant):
246 246 name = 'generaldelta'
247 247
248 248 _requirement = 'generaldelta'
249 249
250 250 default = True
251 251
252 252 description = _('deltas within internal storage are unable to '
253 253 'choose optimal revisions; repository is larger and '
254 254 'slower than it could be; interaction with other '
255 255 'repositories may require extra network and CPU '
256 256 'resources, making "hg push" and "hg pull" slower')
257 257
258 258 upgrademessage = _('repository storage will be able to create '
259 259 'optimal deltas; new repository data will be '
260 260 'smaller and read times should decrease; '
261 261 'interacting with other repositories using this '
262 262 'storage model should require less network and '
263 263 'CPU resources, making "hg push" and "hg pull" '
264 264 'faster')
265 265
266 266 @registerformatvariant
267 267 class sparserevlog(requirementformatvariant):
268 268 name = 'sparserevlog'
269 269
270 270 _requirement = localrepo.SPARSEREVLOG_REQUIREMENT
271 271
272 272 default = False
273 273
274 274 description = _('in order to limit disk reading and memory usage on older '
275 275 'version, the span of a delta chain from its root to its '
276 276 'end is limited, whatever the relevant data in this span. '
277 277 'This can severly limit Mercurial ability to build good '
278 278 'chain of delta resulting is much more storage space being '
279 279 'taken and limit reusability of on disk delta during '
280 280 'exchange.'
281 281 )
282 282
283 283 upgrademessage = _('Revlog supports delta chain with more unused data '
284 284 'between payload. These gaps will be skipped at read '
285 285 'time. This allows for better delta chains, making a '
286 286 'better compression and faster exchange with server.')
287 287
288 288 @registerformatvariant
289 289 class removecldeltachain(formatvariant):
290 290 name = 'plain-cl-delta'
291 291
292 292 default = True
293 293
294 294 description = _('changelog storage is using deltas instead of '
295 295 'raw entries; changelog reading and any '
296 296 'operation relying on changelog data are slower '
297 297 'than they could be')
298 298
299 299 upgrademessage = _('changelog storage will be reformated to '
300 300 'store raw entries; changelog reading will be '
301 301 'faster; changelog size may be reduced')
302 302
303 303 @staticmethod
304 304 def fromrepo(repo):
305 305 # Mercurial 4.0 changed changelogs to not use delta chains. Search for
306 306 # changelogs with deltas.
307 307 cl = repo.changelog
308 308 chainbase = cl.chainbase
309 309 return all(rev == chainbase(rev) for rev in cl)
310 310
311 311 @staticmethod
312 312 def fromconfig(repo):
313 313 return True
314 314
315 315 @registerformatvariant
316 316 class compressionengine(formatvariant):
317 317 name = 'compression'
318 318 default = 'zlib'
319 319
320 320 description = _('Compresion algorithm used to compress data. '
321 321 'Some engine are faster than other')
322 322
323 323 upgrademessage = _('revlog content will be recompressed with the new '
324 324 'algorithm.')
325 325
326 326 @classmethod
327 327 def fromrepo(cls, repo):
328 328 for req in repo.requirements:
329 329 if req.startswith('exp-compression-'):
330 330 return req.split('-', 2)[2]
331 331 return 'zlib'
332 332
333 333 @classmethod
334 334 def fromconfig(cls, repo):
335 335 return repo.ui.config('experimental', 'format.compression')
336 336
337 337 def finddeficiencies(repo):
338 338 """returns a list of deficiencies that the repo suffer from"""
339 339 deficiencies = []
340 340
341 341 # We could detect lack of revlogv1 and store here, but they were added
342 342 # in 0.9.2 and we don't support upgrading repos without these
343 343 # requirements, so let's not bother.
344 344
345 345 for fv in allformatvariant:
346 346 if not fv.fromrepo(repo):
347 347 deficiencies.append(fv)
348 348
349 349 return deficiencies
350 350
351 351 def findoptimizations(repo):
352 352 """Determine optimisation that could be used during upgrade"""
353 353 # These are unconditionally added. There is logic later that figures out
354 354 # which ones to apply.
355 355 optimizations = []
356 356
357 357 optimizations.append(improvement(
358 358 name='redeltaparent',
359 359 type=optimisation,
360 360 description=_('deltas within internal storage will be recalculated to '
361 361 'choose an optimal base revision where this was not '
362 362 'already done; the size of the repository may shrink and '
363 363 'various operations may become faster; the first time '
364 364 'this optimization is performed could slow down upgrade '
365 365 'execution considerably; subsequent invocations should '
366 366 'not run noticeably slower'),
367 367 upgrademessage=_('deltas within internal storage will choose a new '
368 368 'base revision if needed')))
369 369
370 370 optimizations.append(improvement(
371 371 name='redeltamultibase',
372 372 type=optimisation,
373 373 description=_('deltas within internal storage will be recalculated '
374 374 'against multiple base revision and the smallest '
375 375 'difference will be used; the size of the repository may '
376 376 'shrink significantly when there are many merges; this '
377 377 'optimization will slow down execution in proportion to '
378 378 'the number of merges in the repository and the amount '
379 379 'of files in the repository; this slow down should not '
380 380 'be significant unless there are tens of thousands of '
381 381 'files and thousands of merges'),
382 382 upgrademessage=_('deltas within internal storage will choose an '
383 383 'optimal delta by computing deltas against multiple '
384 384 'parents; may slow down execution time '
385 385 'significantly')))
386 386
387 387 optimizations.append(improvement(
388 388 name='redeltaall',
389 389 type=optimisation,
390 390 description=_('deltas within internal storage will always be '
391 391 'recalculated without reusing prior deltas; this will '
392 392 'likely make execution run several times slower; this '
393 393 'optimization is typically not needed'),
394 394 upgrademessage=_('deltas within internal storage will be fully '
395 395 'recomputed; this will likely drastically slow down '
396 396 'execution time')))
397 397
398 398 optimizations.append(improvement(
399 399 name='redeltafulladd',
400 400 type=optimisation,
401 401 description=_('every revision will be re-added as if it was new '
402 402 'content. It will go through the full storage '
403 403 'mechanism giving extensions a chance to process it '
404 404 '(eg. lfs). This is similar to "redeltaall" but even '
405 405 'slower since more logic is involved.'),
406 406 upgrademessage=_('each revision will be added as new content to the '
407 407 'internal storage; this will likely drastically slow '
408 408 'down execution time, but some extensions might need '
409 409 'it')))
410 410
411 411 return optimizations
412 412
413 413 def determineactions(repo, deficiencies, sourcereqs, destreqs):
414 414 """Determine upgrade actions that will be performed.
415 415
416 416 Given a list of improvements as returned by ``finddeficiencies`` and
417 417 ``findoptimizations``, determine the list of upgrade actions that
418 418 will be performed.
419 419
420 420 The role of this function is to filter improvements if needed, apply
421 421 recommended optimizations from the improvements list that make sense,
422 422 etc.
423 423
424 424 Returns a list of action names.
425 425 """
426 426 newactions = []
427 427
428 428 knownreqs = supporteddestrequirements(repo)
429 429
430 430 for d in deficiencies:
431 431 name = d.name
432 432
433 433 # If the action is a requirement that doesn't show up in the
434 434 # destination requirements, prune the action.
435 435 if name in knownreqs and name not in destreqs:
436 436 continue
437 437
438 438 newactions.append(d)
439 439
440 440 # FUTURE consider adding some optimizations here for certain transitions.
441 441 # e.g. adding generaldelta could schedule parent redeltas.
442 442
443 443 return newactions
444 444
445 445 def _revlogfrompath(repo, path):
446 446 """Obtain a revlog from a repo path.
447 447
448 448 An instance of the appropriate class is returned.
449 449 """
450 450 if path == '00changelog.i':
451 451 return changelog.changelog(repo.svfs)
452 452 elif path.endswith('00manifest.i'):
453 453 mandir = path[:-len('00manifest.i')]
454 454 return manifest.manifestrevlog(repo.svfs, tree=mandir)
455 455 else:
456 456 #reverse of "/".join(("data", path + ".i"))
457 457 return filelog.filelog(repo.svfs, path[5:-2])
458 458
459 def _copyrevlogs(ui, srcrepo, dstrepo, tr, deltareuse, deltabothparents):
459 def _copyrevlogs(ui, srcrepo, dstrepo, tr, deltareuse, forcedeltabothparents):
460 460 """Copy revlogs between 2 repos."""
461 461 revcount = 0
462 462 srcsize = 0
463 463 srcrawsize = 0
464 464 dstsize = 0
465 465 fcount = 0
466 466 frevcount = 0
467 467 fsrcsize = 0
468 468 frawsize = 0
469 469 fdstsize = 0
470 470 mcount = 0
471 471 mrevcount = 0
472 472 msrcsize = 0
473 473 mrawsize = 0
474 474 mdstsize = 0
475 475 crevcount = 0
476 476 csrcsize = 0
477 477 crawsize = 0
478 478 cdstsize = 0
479 479
480 480 # Perform a pass to collect metadata. This validates we can open all
481 481 # source files and allows a unified progress bar to be displayed.
482 482 for unencoded, encoded, size in srcrepo.store.walk():
483 483 if unencoded.endswith('.d'):
484 484 continue
485 485
486 486 rl = _revlogfrompath(srcrepo, unencoded)
487 487
488 488 info = rl.storageinfo(exclusivefiles=True, revisionscount=True,
489 489 trackedsize=True, storedsize=True)
490 490
491 491 revcount += info['revisionscount'] or 0
492 492 datasize = info['storedsize'] or 0
493 493 rawsize = info['trackedsize'] or 0
494 494
495 495 srcsize += datasize
496 496 srcrawsize += rawsize
497 497
498 498 # This is for the separate progress bars.
499 499 if isinstance(rl, changelog.changelog):
500 500 crevcount += len(rl)
501 501 csrcsize += datasize
502 502 crawsize += rawsize
503 503 elif isinstance(rl, manifest.manifestrevlog):
504 504 mcount += 1
505 505 mrevcount += len(rl)
506 506 msrcsize += datasize
507 507 mrawsize += rawsize
508 508 elif isinstance(rl, filelog.filelog):
509 509 fcount += 1
510 510 frevcount += len(rl)
511 511 fsrcsize += datasize
512 512 frawsize += rawsize
513 513 else:
514 514 error.ProgrammingError('unknown revlog type')
515 515
516 516 if not revcount:
517 517 return
518 518
519 519 ui.write(_('migrating %d total revisions (%d in filelogs, %d in manifests, '
520 520 '%d in changelog)\n') %
521 521 (revcount, frevcount, mrevcount, crevcount))
522 522 ui.write(_('migrating %s in store; %s tracked data\n') % (
523 523 (util.bytecount(srcsize), util.bytecount(srcrawsize))))
524 524
525 525 # Used to keep track of progress.
526 526 progress = None
527 527 def oncopiedrevision(rl, rev, node):
528 528 progress.increment()
529 529
530 530 # Do the actual copying.
531 531 # FUTURE this operation can be farmed off to worker processes.
532 532 seen = set()
533 533 for unencoded, encoded, size in srcrepo.store.walk():
534 534 if unencoded.endswith('.d'):
535 535 continue
536 536
537 537 oldrl = _revlogfrompath(srcrepo, unencoded)
538 538 newrl = _revlogfrompath(dstrepo, unencoded)
539 539
540 540 if isinstance(oldrl, changelog.changelog) and 'c' not in seen:
541 541 ui.write(_('finished migrating %d manifest revisions across %d '
542 542 'manifests; change in size: %s\n') %
543 543 (mrevcount, mcount, util.bytecount(mdstsize - msrcsize)))
544 544
545 545 ui.write(_('migrating changelog containing %d revisions '
546 546 '(%s in store; %s tracked data)\n') %
547 547 (crevcount, util.bytecount(csrcsize),
548 548 util.bytecount(crawsize)))
549 549 seen.add('c')
550 550 progress = srcrepo.ui.makeprogress(_('changelog revisions'),
551 551 total=crevcount)
552 552 elif isinstance(oldrl, manifest.manifestrevlog) and 'm' not in seen:
553 553 ui.write(_('finished migrating %d filelog revisions across %d '
554 554 'filelogs; change in size: %s\n') %
555 555 (frevcount, fcount, util.bytecount(fdstsize - fsrcsize)))
556 556
557 557 ui.write(_('migrating %d manifests containing %d revisions '
558 558 '(%s in store; %s tracked data)\n') %
559 559 (mcount, mrevcount, util.bytecount(msrcsize),
560 560 util.bytecount(mrawsize)))
561 561 seen.add('m')
562 562 if progress:
563 563 progress.complete()
564 564 progress = srcrepo.ui.makeprogress(_('manifest revisions'),
565 565 total=mrevcount)
566 566 elif 'f' not in seen:
567 567 ui.write(_('migrating %d filelogs containing %d revisions '
568 568 '(%s in store; %s tracked data)\n') %
569 569 (fcount, frevcount, util.bytecount(fsrcsize),
570 570 util.bytecount(frawsize)))
571 571 seen.add('f')
572 572 if progress:
573 573 progress.complete()
574 574 progress = srcrepo.ui.makeprogress(_('file revisions'),
575 575 total=frevcount)
576 576
577 577
578 578 ui.note(_('cloning %d revisions from %s\n') % (len(oldrl), unencoded))
579 579 oldrl.clone(tr, newrl, addrevisioncb=oncopiedrevision,
580 580 deltareuse=deltareuse,
581 deltabothparents=deltabothparents)
581 forcedeltabothparents=forcedeltabothparents)
582 582
583 583 info = newrl.storageinfo(storedsize=True)
584 584 datasize = info['storedsize'] or 0
585 585
586 586 dstsize += datasize
587 587
588 588 if isinstance(newrl, changelog.changelog):
589 589 cdstsize += datasize
590 590 elif isinstance(newrl, manifest.manifestrevlog):
591 591 mdstsize += datasize
592 592 else:
593 593 fdstsize += datasize
594 594
595 595 progress.complete()
596 596
597 597 ui.write(_('finished migrating %d changelog revisions; change in size: '
598 598 '%s\n') % (crevcount, util.bytecount(cdstsize - csrcsize)))
599 599
600 600 ui.write(_('finished migrating %d total revisions; total change in store '
601 601 'size: %s\n') % (revcount, util.bytecount(dstsize - srcsize)))
602 602
603 603 def _filterstorefile(srcrepo, dstrepo, requirements, path, mode, st):
604 604 """Determine whether to copy a store file during upgrade.
605 605
606 606 This function is called when migrating store files from ``srcrepo`` to
607 607 ``dstrepo`` as part of upgrading a repository.
608 608
609 609 Args:
610 610 srcrepo: repo we are copying from
611 611 dstrepo: repo we are copying to
612 612 requirements: set of requirements for ``dstrepo``
613 613 path: store file being examined
614 614 mode: the ``ST_MODE`` file type of ``path``
615 615 st: ``stat`` data structure for ``path``
616 616
617 617 Function should return ``True`` if the file is to be copied.
618 618 """
619 619 # Skip revlogs.
620 620 if path.endswith(('.i', '.d')):
621 621 return False
622 622 # Skip transaction related files.
623 623 if path.startswith('undo'):
624 624 return False
625 625 # Only copy regular files.
626 626 if mode != stat.S_IFREG:
627 627 return False
628 628 # Skip other skipped files.
629 629 if path in ('lock', 'fncache'):
630 630 return False
631 631
632 632 return True
633 633
634 634 def _finishdatamigration(ui, srcrepo, dstrepo, requirements):
635 635 """Hook point for extensions to perform additional actions during upgrade.
636 636
637 637 This function is called after revlogs and store files have been copied but
638 638 before the new store is swapped into the original location.
639 639 """
640 640
641 641 def _upgraderepo(ui, srcrepo, dstrepo, requirements, actions):
642 642 """Do the low-level work of upgrading a repository.
643 643
644 644 The upgrade is effectively performed as a copy between a source
645 645 repository and a temporary destination repository.
646 646
647 647 The source repository is unmodified for as long as possible so the
648 648 upgrade can abort at any time without causing loss of service for
649 649 readers and without corrupting the source repository.
650 650 """
651 651 assert srcrepo.currentwlock()
652 652 assert dstrepo.currentwlock()
653 653
654 654 ui.write(_('(it is safe to interrupt this process any time before '
655 655 'data migration completes)\n'))
656 656
657 657 if 'redeltaall' in actions:
658 658 deltareuse = revlog.revlog.DELTAREUSENEVER
659 659 elif 'redeltaparent' in actions:
660 660 deltareuse = revlog.revlog.DELTAREUSESAMEREVS
661 661 elif 'redeltamultibase' in actions:
662 662 deltareuse = revlog.revlog.DELTAREUSESAMEREVS
663 663 elif 'redeltafulladd' in actions:
664 664 deltareuse = revlog.revlog.DELTAREUSEFULLADD
665 665 else:
666 666 deltareuse = revlog.revlog.DELTAREUSEALWAYS
667 667
668 668 with dstrepo.transaction('upgrade') as tr:
669 669 _copyrevlogs(ui, srcrepo, dstrepo, tr, deltareuse,
670 670 'redeltamultibase' in actions)
671 671
672 672 # Now copy other files in the store directory.
673 673 # The sorted() makes execution deterministic.
674 674 for p, kind, st in sorted(srcrepo.store.vfs.readdir('', stat=True)):
675 675 if not _filterstorefile(srcrepo, dstrepo, requirements,
676 676 p, kind, st):
677 677 continue
678 678
679 679 srcrepo.ui.write(_('copying %s\n') % p)
680 680 src = srcrepo.store.rawvfs.join(p)
681 681 dst = dstrepo.store.rawvfs.join(p)
682 682 util.copyfile(src, dst, copystat=True)
683 683
684 684 _finishdatamigration(ui, srcrepo, dstrepo, requirements)
685 685
686 686 ui.write(_('data fully migrated to temporary repository\n'))
687 687
688 688 backuppath = pycompat.mkdtemp(prefix='upgradebackup.', dir=srcrepo.path)
689 689 backupvfs = vfsmod.vfs(backuppath)
690 690
691 691 # Make a backup of requires file first, as it is the first to be modified.
692 692 util.copyfile(srcrepo.vfs.join('requires'), backupvfs.join('requires'))
693 693
694 694 # We install an arbitrary requirement that clients must not support
695 695 # as a mechanism to lock out new clients during the data swap. This is
696 696 # better than allowing a client to continue while the repository is in
697 697 # an inconsistent state.
698 698 ui.write(_('marking source repository as being upgraded; clients will be '
699 699 'unable to read from repository\n'))
700 700 scmutil.writerequires(srcrepo.vfs,
701 701 srcrepo.requirements | {'upgradeinprogress'})
702 702
703 703 ui.write(_('starting in-place swap of repository data\n'))
704 704 ui.write(_('replaced files will be backed up at %s\n') %
705 705 backuppath)
706 706
707 707 # Now swap in the new store directory. Doing it as a rename should make
708 708 # the operation nearly instantaneous and atomic (at least in well-behaved
709 709 # environments).
710 710 ui.write(_('replacing store...\n'))
711 711 tstart = util.timer()
712 712 util.rename(srcrepo.spath, backupvfs.join('store'))
713 713 util.rename(dstrepo.spath, srcrepo.spath)
714 714 elapsed = util.timer() - tstart
715 715 ui.write(_('store replacement complete; repository was inconsistent for '
716 716 '%0.1fs\n') % elapsed)
717 717
718 718 # We first write the requirements file. Any new requirements will lock
719 719 # out legacy clients.
720 720 ui.write(_('finalizing requirements file and making repository readable '
721 721 'again\n'))
722 722 scmutil.writerequires(srcrepo.vfs, requirements)
723 723
724 724 # The lock file from the old store won't be removed because nothing has a
725 725 # reference to its new location. So clean it up manually. Alternatively, we
726 726 # could update srcrepo.svfs and other variables to point to the new
727 727 # location. This is simpler.
728 728 backupvfs.unlink('store/lock')
729 729
730 730 return backuppath
731 731
732 732 def upgraderepo(ui, repo, run=False, optimize=None):
733 733 """Upgrade a repository in place."""
734 734 optimize = set(optimize or [])
735 735 repo = repo.unfiltered()
736 736
737 737 # Ensure the repository can be upgraded.
738 738 missingreqs = requiredsourcerequirements(repo) - repo.requirements
739 739 if missingreqs:
740 740 raise error.Abort(_('cannot upgrade repository; requirement '
741 741 'missing: %s') % _(', ').join(sorted(missingreqs)))
742 742
743 743 blockedreqs = blocksourcerequirements(repo) & repo.requirements
744 744 if blockedreqs:
745 745 raise error.Abort(_('cannot upgrade repository; unsupported source '
746 746 'requirement: %s') %
747 747 _(', ').join(sorted(blockedreqs)))
748 748
749 749 # FUTURE there is potentially a need to control the wanted requirements via
750 750 # command arguments or via an extension hook point.
751 751 newreqs = localrepo.newreporequirements(
752 752 repo.ui, localrepo.defaultcreateopts(repo.ui))
753 753 newreqs.update(preservedrequirements(repo))
754 754
755 755 noremovereqs = (repo.requirements - newreqs -
756 756 supportremovedrequirements(repo))
757 757 if noremovereqs:
758 758 raise error.Abort(_('cannot upgrade repository; requirement would be '
759 759 'removed: %s') % _(', ').join(sorted(noremovereqs)))
760 760
761 761 noaddreqs = (newreqs - repo.requirements -
762 762 allowednewrequirements(repo))
763 763 if noaddreqs:
764 764 raise error.Abort(_('cannot upgrade repository; do not support adding '
765 765 'requirement: %s') %
766 766 _(', ').join(sorted(noaddreqs)))
767 767
768 768 unsupportedreqs = newreqs - supporteddestrequirements(repo)
769 769 if unsupportedreqs:
770 770 raise error.Abort(_('cannot upgrade repository; do not support '
771 771 'destination requirement: %s') %
772 772 _(', ').join(sorted(unsupportedreqs)))
773 773
774 774 # Find and validate all improvements that can be made.
775 775 alloptimizations = findoptimizations(repo)
776 776
777 777 # Apply and Validate arguments.
778 778 optimizations = []
779 779 for o in alloptimizations:
780 780 if o.name in optimize:
781 781 optimizations.append(o)
782 782 optimize.discard(o.name)
783 783
784 784 if optimize: # anything left is unknown
785 785 raise error.Abort(_('unknown optimization action requested: %s') %
786 786 ', '.join(sorted(optimize)),
787 787 hint=_('run without arguments to see valid '
788 788 'optimizations'))
789 789
790 790 deficiencies = finddeficiencies(repo)
791 791 actions = determineactions(repo, deficiencies, repo.requirements, newreqs)
792 792 actions.extend(o for o in sorted(optimizations)
793 793 # determineactions could have added optimisation
794 794 if o not in actions)
795 795
796 796 def printrequirements():
797 797 ui.write(_('requirements\n'))
798 798 ui.write(_(' preserved: %s\n') %
799 799 _(', ').join(sorted(newreqs & repo.requirements)))
800 800
801 801 if repo.requirements - newreqs:
802 802 ui.write(_(' removed: %s\n') %
803 803 _(', ').join(sorted(repo.requirements - newreqs)))
804 804
805 805 if newreqs - repo.requirements:
806 806 ui.write(_(' added: %s\n') %
807 807 _(', ').join(sorted(newreqs - repo.requirements)))
808 808
809 809 ui.write('\n')
810 810
811 811 def printupgradeactions():
812 812 for a in actions:
813 813 ui.write('%s\n %s\n\n' % (a.name, a.upgrademessage))
814 814
815 815 if not run:
816 816 fromconfig = []
817 817 onlydefault = []
818 818
819 819 for d in deficiencies:
820 820 if d.fromconfig(repo):
821 821 fromconfig.append(d)
822 822 elif d.default:
823 823 onlydefault.append(d)
824 824
825 825 if fromconfig or onlydefault:
826 826
827 827 if fromconfig:
828 828 ui.write(_('repository lacks features recommended by '
829 829 'current config options:\n\n'))
830 830 for i in fromconfig:
831 831 ui.write('%s\n %s\n\n' % (i.name, i.description))
832 832
833 833 if onlydefault:
834 834 ui.write(_('repository lacks features used by the default '
835 835 'config options:\n\n'))
836 836 for i in onlydefault:
837 837 ui.write('%s\n %s\n\n' % (i.name, i.description))
838 838
839 839 ui.write('\n')
840 840 else:
841 841 ui.write(_('(no feature deficiencies found in existing '
842 842 'repository)\n'))
843 843
844 844 ui.write(_('performing an upgrade with "--run" will make the following '
845 845 'changes:\n\n'))
846 846
847 847 printrequirements()
848 848 printupgradeactions()
849 849
850 850 unusedoptimize = [i for i in alloptimizations if i not in actions]
851 851
852 852 if unusedoptimize:
853 853 ui.write(_('additional optimizations are available by specifying '
854 854 '"--optimize <name>":\n\n'))
855 855 for i in unusedoptimize:
856 856 ui.write(_('%s\n %s\n\n') % (i.name, i.description))
857 857 return
858 858
859 859 # Else we're in the run=true case.
860 860 ui.write(_('upgrade will perform the following actions:\n\n'))
861 861 printrequirements()
862 862 printupgradeactions()
863 863
864 864 upgradeactions = [a.name for a in actions]
865 865
866 866 ui.write(_('beginning upgrade...\n'))
867 867 with repo.wlock(), repo.lock():
868 868 ui.write(_('repository locked and read-only\n'))
869 869 # Our strategy for upgrading the repository is to create a new,
870 870 # temporary repository, write data to it, then do a swap of the
871 871 # data. There are less heavyweight ways to do this, but it is easier
872 872 # to create a new repo object than to instantiate all the components
873 873 # (like the store) separately.
874 874 tmppath = pycompat.mkdtemp(prefix='upgrade.', dir=repo.path)
875 875 backuppath = None
876 876 try:
877 877 ui.write(_('creating temporary repository to stage migrated '
878 878 'data: %s\n') % tmppath)
879 879
880 880 # clone ui without using ui.copy because repo.ui is protected
881 881 repoui = repo.ui.__class__(repo.ui)
882 882 dstrepo = hg.repository(repoui, path=tmppath, create=True)
883 883
884 884 with dstrepo.wlock(), dstrepo.lock():
885 885 backuppath = _upgraderepo(ui, repo, dstrepo, newreqs,
886 886 upgradeactions)
887 887
888 888 finally:
889 889 ui.write(_('removing temporary repository %s\n') % tmppath)
890 890 repo.vfs.rmtree(tmppath, forcibly=True)
891 891
892 892 if backuppath:
893 893 ui.warn(_('copy of old repository backed up at %s\n') %
894 894 backuppath)
895 895 ui.warn(_('the old repository will not be deleted; remove '
896 896 'it to free up disk space once the upgraded '
897 897 'repository is verified\n'))
General Comments 0
You need to be logged in to leave comments. Login now