##// END OF EJS Templates
revlog: use revlog.display_id in integrity error...
marmoute -
r47931:0a66eef0 default
parent child Browse files
Show More
@@ -1,3180 +1,3180 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 INDEX_HEADER,
41 41 REVLOGV0,
42 42 REVLOGV1,
43 43 REVLOGV1_FLAGS,
44 44 REVLOGV2,
45 45 REVLOGV2_FLAGS,
46 46 REVLOG_DEFAULT_FLAGS,
47 47 REVLOG_DEFAULT_FORMAT,
48 48 REVLOG_DEFAULT_VERSION,
49 49 )
50 50 from .revlogutils.flagutil import (
51 51 REVIDX_DEFAULT_FLAGS,
52 52 REVIDX_ELLIPSIS,
53 53 REVIDX_EXTSTORED,
54 54 REVIDX_FLAGS_ORDER,
55 55 REVIDX_HASCOPIESINFO,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 )
59 59 from .thirdparty import attr
60 60 from . import (
61 61 ancestor,
62 62 dagop,
63 63 error,
64 64 mdiff,
65 65 policy,
66 66 pycompat,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .interfaces import (
71 71 repository,
72 72 util as interfaceutil,
73 73 )
74 74 from .revlogutils import (
75 75 deltas as deltautil,
76 76 flagutil,
77 77 nodemap as nodemaputil,
78 78 revlogv0,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88 88
89 89 REVLOGV0
90 90 REVLOGV1
91 91 REVLOGV2
92 92 FLAG_INLINE_DATA
93 93 FLAG_GENERALDELTA
94 94 REVLOG_DEFAULT_FLAGS
95 95 REVLOG_DEFAULT_FORMAT
96 96 REVLOG_DEFAULT_VERSION
97 97 REVLOGV1_FLAGS
98 98 REVLOGV2_FLAGS
99 99 REVIDX_ISCENSORED
100 100 REVIDX_ELLIPSIS
101 101 REVIDX_HASCOPIESINFO
102 102 REVIDX_EXTSTORED
103 103 REVIDX_DEFAULT_FLAGS
104 104 REVIDX_FLAGS_ORDER
105 105 REVIDX_RAWTEXT_CHANGING_FLAGS
106 106
107 107 parsers = policy.importmod('parsers')
108 108 rustancestor = policy.importrust('ancestor')
109 109 rustdagop = policy.importrust('dagop')
110 110 rustrevlog = policy.importrust('revlog')
111 111
112 112 # Aliased for performance.
113 113 _zlibdecompress = zlib.decompress
114 114
115 115 # max size of revlog with inline data
116 116 _maxinline = 131072
117 117 _chunksize = 1048576
118 118
119 119 # Flag processors for REVIDX_ELLIPSIS.
120 120 def ellipsisreadprocessor(rl, text):
121 121 return text, False
122 122
123 123
124 124 def ellipsiswriteprocessor(rl, text):
125 125 return text, False
126 126
127 127
128 128 def ellipsisrawprocessor(rl, text):
129 129 return False
130 130
131 131
132 132 ellipsisprocessor = (
133 133 ellipsisreadprocessor,
134 134 ellipsiswriteprocessor,
135 135 ellipsisrawprocessor,
136 136 )
137 137
138 138
139 139 def offset_type(offset, type):
140 140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 141 raise ValueError(b'unknown revlog index flags')
142 142 return int(int(offset) << 16 | type)
143 143
144 144
145 145 def _verify_revision(rl, skipflags, state, node):
146 146 """Verify the integrity of the given revlog ``node`` while providing a hook
147 147 point for extensions to influence the operation."""
148 148 if skipflags:
149 149 state[b'skipread'].add(node)
150 150 else:
151 151 # Side-effect: read content and verify hash.
152 152 rl.revision(node)
153 153
154 154
155 155 # True if a fast implementation for persistent-nodemap is available
156 156 #
157 157 # We also consider we have a "fast" implementation in "pure" python because
158 158 # people using pure don't really have performance consideration (and a
159 159 # wheelbarrow of other slowness source)
160 160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
161 161 parsers, 'BaseIndexObject'
162 162 )
163 163
164 164
165 165 @attr.s(slots=True, frozen=True)
166 166 class _revisioninfo(object):
167 167 """Information about a revision that allows building its fulltext
168 168 node: expected hash of the revision
169 169 p1, p2: parent revs of the revision
170 170 btext: built text cache consisting of a one-element list
171 171 cachedelta: (baserev, uncompressed_delta) or None
172 172 flags: flags associated to the revision storage
173 173
174 174 One of btext[0] or cachedelta must be set.
175 175 """
176 176
177 177 node = attr.ib()
178 178 p1 = attr.ib()
179 179 p2 = attr.ib()
180 180 btext = attr.ib()
181 181 textlen = attr.ib()
182 182 cachedelta = attr.ib()
183 183 flags = attr.ib()
184 184
185 185
186 186 @interfaceutil.implementer(repository.irevisiondelta)
187 187 @attr.s(slots=True)
188 188 class revlogrevisiondelta(object):
189 189 node = attr.ib()
190 190 p1node = attr.ib()
191 191 p2node = attr.ib()
192 192 basenode = attr.ib()
193 193 flags = attr.ib()
194 194 baserevisionsize = attr.ib()
195 195 revision = attr.ib()
196 196 delta = attr.ib()
197 197 sidedata = attr.ib()
198 198 protocol_flags = attr.ib()
199 199 linknode = attr.ib(default=None)
200 200
201 201
202 202 @interfaceutil.implementer(repository.iverifyproblem)
203 203 @attr.s(frozen=True)
204 204 class revlogproblem(object):
205 205 warning = attr.ib(default=None)
206 206 error = attr.ib(default=None)
207 207 node = attr.ib(default=None)
208 208
209 209
210 210 def parse_index_v1(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline)
213 213 return index, cache
214 214
215 215
216 216 def parse_index_v2(data, inline):
217 217 # call the C implementation to parse the index data
218 218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
219 219 return index, cache
220 220
221 221
222 222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
223 223
224 224 def parse_index_v1_nodemap(data, inline):
225 225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 226 return index, cache
227 227
228 228
229 229 else:
230 230 parse_index_v1_nodemap = None
231 231
232 232
233 233 def parse_index_v1_mixed(data, inline):
234 234 index, cache = parse_index_v1(data, inline)
235 235 return rustrevlog.MixedIndex(index), cache
236 236
237 237
238 238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 239 # signed integer)
240 240 _maxentrysize = 0x7FFFFFFF
241 241
242 242
243 243 class revlog(object):
244 244 """
245 245 the underlying revision storage object
246 246
247 247 A revlog consists of two parts, an index and the revision data.
248 248
249 249 The index is a file with a fixed record size containing
250 250 information on each revision, including its nodeid (hash), the
251 251 nodeids of its parents, the position and offset of its data within
252 252 the data file, and the revision it's based on. Finally, each entry
253 253 contains a linkrev entry that can serve as a pointer to external
254 254 data.
255 255
256 256 The revision data itself is a linear collection of data chunks.
257 257 Each chunk represents a revision and is usually represented as a
258 258 delta against the previous chunk. To bound lookup time, runs of
259 259 deltas are limited to about 2 times the length of the original
260 260 version data. This makes retrieval of a version proportional to
261 261 its size, or O(1) relative to the number of revisions.
262 262
263 263 Both pieces of the revlog are written to in an append-only
264 264 fashion, which means we never need to rewrite a file to insert or
265 265 remove data, and can use some simple techniques to avoid the need
266 266 for locking while reading.
267 267
268 268 If checkambig, indexfile is opened with checkambig=True at
269 269 writing, to avoid file stat ambiguity.
270 270
271 271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 272 index will be mmapped rather than read if it is larger than the
273 273 configured threshold.
274 274
275 275 If censorable is True, the revlog can have censored revisions.
276 276
277 277 If `upperboundcomp` is not None, this is the expected maximal gain from
278 278 compression for the data content.
279 279
280 280 `concurrencychecker` is an optional function that receives 3 arguments: a
281 281 file handle, a filename, and an expected position. It should check whether
282 282 the current position in the file handle is valid, and log/warn/fail (by
283 283 raising).
284 284 """
285 285
286 286 _flagserrorclass = error.RevlogError
287 287
288 288 def __init__(
289 289 self,
290 290 opener,
291 291 target,
292 292 radix,
293 293 postfix=None,
294 294 checkambig=False,
295 295 mmaplargeindex=False,
296 296 censorable=False,
297 297 upperboundcomp=None,
298 298 persistentnodemap=False,
299 299 concurrencychecker=None,
300 300 ):
301 301 """
302 302 create a revlog object
303 303
304 304 opener is a function that abstracts the file opening operation
305 305 and can be used to implement COW semantics or the like.
306 306
307 307 `target`: a (KIND, ID) tuple that identify the content stored in
308 308 this revlog. It help the rest of the code to understand what the revlog
309 309 is about without having to resort to heuristic and index filename
310 310 analysis. Note: that this must be reliably be set by normal code, but
311 311 that test, debug, or performance measurement code might not set this to
312 312 accurate value.
313 313 """
314 314 self.upperboundcomp = upperboundcomp
315 315
316 316 self.radix = radix
317 317
318 318 if postfix is None:
319 319 indexfile = b'%s.i' % self.radix
320 320 datafile = b'%s.d' % self.radix
321 321 elif postfix == b'a':
322 322 indexfile = b'%s.i.a' % self.radix
323 323 datafile = b'%s.d' % self.radix
324 324 else:
325 325 indexfile = b'%s.i.%s' % (self.radix, postfix)
326 326 datafile = b'%s.d.%s' % (self.radix, postfix)
327 327
328 328 self._indexfile = indexfile
329 329 self._datafile = datafile
330 330 self.nodemap_file = None
331 331 self.postfix = postfix
332 332 self.opener = opener
333 333 if persistentnodemap:
334 334 self.nodemap_file = nodemaputil.get_nodemap_file(self)
335 335
336 336 assert target[0] in ALL_KINDS
337 337 assert len(target) == 2
338 338 self.target = target
339 339 # When True, indexfile is opened with checkambig=True at writing, to
340 340 # avoid file stat ambiguity.
341 341 self._checkambig = checkambig
342 342 self._mmaplargeindex = mmaplargeindex
343 343 self._censorable = censorable
344 344 # 3-tuple of (node, rev, text) for a raw revision.
345 345 self._revisioncache = None
346 346 # Maps rev to chain base rev.
347 347 self._chainbasecache = util.lrucachedict(100)
348 348 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
349 349 self._chunkcache = (0, b'')
350 350 # How much data to read and cache into the raw revlog data cache.
351 351 self._chunkcachesize = 65536
352 352 self._maxchainlen = None
353 353 self._deltabothparents = True
354 354 self.index = None
355 355 self._nodemap_docket = None
356 356 # Mapping of partial identifiers to full nodes.
357 357 self._pcache = {}
358 358 # Mapping of revision integer to full node.
359 359 self._compengine = b'zlib'
360 360 self._compengineopts = {}
361 361 self._maxdeltachainspan = -1
362 362 self._withsparseread = False
363 363 self._sparserevlog = False
364 364 self._srdensitythreshold = 0.50
365 365 self._srmingapsize = 262144
366 366
367 367 # Make copy of flag processors so each revlog instance can support
368 368 # custom flags.
369 369 self._flagprocessors = dict(flagutil.flagprocessors)
370 370
371 371 # 2-tuple of file handles being used for active writing.
372 372 self._writinghandles = None
373 373
374 374 self._loadindex()
375 375
376 376 self._concurrencychecker = concurrencychecker
377 377
378 378 def _init_opts(self):
379 379 """process options (from above/config) to setup associated default revlog mode
380 380
381 381 These values might be affected when actually reading on disk information.
382 382
383 383 The relevant values are returned for use in _loadindex().
384 384
385 385 * newversionflags:
386 386 version header to use if we need to create a new revlog
387 387
388 388 * mmapindexthreshold:
389 389 minimal index size for start to use mmap
390 390
391 391 * force_nodemap:
392 392 force the usage of a "development" version of the nodemap code
393 393 """
394 394 mmapindexthreshold = None
395 395 opts = self.opener.options
396 396
397 397 if b'revlogv2' in opts:
398 398 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
399 399 elif b'revlogv1' in opts:
400 400 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
401 401 if b'generaldelta' in opts:
402 402 newversionflags |= FLAG_GENERALDELTA
403 403 elif b'revlogv0' in self.opener.options:
404 404 newversionflags = REVLOGV0
405 405 else:
406 406 newversionflags = REVLOG_DEFAULT_VERSION
407 407
408 408 if b'chunkcachesize' in opts:
409 409 self._chunkcachesize = opts[b'chunkcachesize']
410 410 if b'maxchainlen' in opts:
411 411 self._maxchainlen = opts[b'maxchainlen']
412 412 if b'deltabothparents' in opts:
413 413 self._deltabothparents = opts[b'deltabothparents']
414 414 self._lazydelta = bool(opts.get(b'lazydelta', True))
415 415 self._lazydeltabase = False
416 416 if self._lazydelta:
417 417 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
418 418 if b'compengine' in opts:
419 419 self._compengine = opts[b'compengine']
420 420 if b'zlib.level' in opts:
421 421 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
422 422 if b'zstd.level' in opts:
423 423 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
424 424 if b'maxdeltachainspan' in opts:
425 425 self._maxdeltachainspan = opts[b'maxdeltachainspan']
426 426 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
427 427 mmapindexthreshold = opts[b'mmapindexthreshold']
428 428 self.hassidedata = bool(opts.get(b'side-data', False))
429 429 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
430 430 withsparseread = bool(opts.get(b'with-sparse-read', False))
431 431 # sparse-revlog forces sparse-read
432 432 self._withsparseread = self._sparserevlog or withsparseread
433 433 if b'sparse-read-density-threshold' in opts:
434 434 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
435 435 if b'sparse-read-min-gap-size' in opts:
436 436 self._srmingapsize = opts[b'sparse-read-min-gap-size']
437 437 if opts.get(b'enableellipsis'):
438 438 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
439 439
440 440 # revlog v0 doesn't have flag processors
441 441 for flag, processor in pycompat.iteritems(
442 442 opts.get(b'flagprocessors', {})
443 443 ):
444 444 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
445 445
446 446 if self._chunkcachesize <= 0:
447 447 raise error.RevlogError(
448 448 _(b'revlog chunk cache size %r is not greater than 0')
449 449 % self._chunkcachesize
450 450 )
451 451 elif self._chunkcachesize & (self._chunkcachesize - 1):
452 452 raise error.RevlogError(
453 453 _(b'revlog chunk cache size %r is not a power of 2')
454 454 % self._chunkcachesize
455 455 )
456 456 force_nodemap = opts.get(b'devel-force-nodemap', False)
457 457 return newversionflags, mmapindexthreshold, force_nodemap
458 458
459 459 def _loadindex(self):
460 460
461 461 newversionflags, mmapindexthreshold, force_nodemap = self._init_opts()
462 462 indexdata = b''
463 463 self._initempty = True
464 464 try:
465 465 with self._indexfp() as f:
466 466 if (
467 467 mmapindexthreshold is not None
468 468 and self.opener.fstat(f).st_size >= mmapindexthreshold
469 469 ):
470 470 # TODO: should .close() to release resources without
471 471 # relying on Python GC
472 472 indexdata = util.buffer(util.mmapread(f))
473 473 else:
474 474 indexdata = f.read()
475 475 if len(indexdata) > 0:
476 476 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
477 477 self._initempty = False
478 478 else:
479 479 versionflags = newversionflags
480 480 except IOError as inst:
481 481 if inst.errno != errno.ENOENT:
482 482 raise
483 483
484 484 versionflags = newversionflags
485 485
486 486 flags = self._format_flags = versionflags & ~0xFFFF
487 487 fmt = self._format_version = versionflags & 0xFFFF
488 488
489 489 if fmt == REVLOGV0:
490 490 if flags:
491 491 raise error.RevlogError(
492 492 _(b'unknown flags (%#04x) in version %d revlog %s')
493 493 % (flags >> 16, fmt, self.display_id)
494 494 )
495 495
496 496 self._inline = False
497 497 self._generaldelta = False
498 498
499 499 elif fmt == REVLOGV1:
500 500 if flags & ~REVLOGV1_FLAGS:
501 501 raise error.RevlogError(
502 502 _(b'unknown flags (%#04x) in version %d revlog %s')
503 503 % (flags >> 16, fmt, self.display_id)
504 504 )
505 505
506 506 self._inline = versionflags & FLAG_INLINE_DATA
507 507 self._generaldelta = versionflags & FLAG_GENERALDELTA
508 508
509 509 elif fmt == REVLOGV2:
510 510 if flags & ~REVLOGV2_FLAGS:
511 511 raise error.RevlogError(
512 512 _(b'unknown flags (%#04x) in version %d revlog %s')
513 513 % (flags >> 16, fmt, self.display_id)
514 514 )
515 515
516 516 # There is a bug in the transaction handling when going from an
517 517 # inline revlog to a separate index and data file. Turn it off until
518 518 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
519 519 # See issue6485
520 520 self._inline = False
521 521 # generaldelta implied by version 2 revlogs.
522 522 self._generaldelta = True
523 523
524 524 else:
525 525 raise error.RevlogError(
526 526 _(b'unknown version (%d) in revlog %s') % (fmt, self.display_id)
527 527 )
528 528
529 529 self.nodeconstants = sha1nodeconstants
530 530 self.nullid = self.nodeconstants.nullid
531 531
532 532 # sparse-revlog can't be on without general-delta (issue6056)
533 533 if not self._generaldelta:
534 534 self._sparserevlog = False
535 535
536 536 self._storedeltachains = True
537 537
538 538 devel_nodemap = (
539 539 self.nodemap_file
540 540 and force_nodemap
541 541 and parse_index_v1_nodemap is not None
542 542 )
543 543
544 544 use_rust_index = False
545 545 if rustrevlog is not None:
546 546 if self.nodemap_file is not None:
547 547 use_rust_index = True
548 548 else:
549 549 use_rust_index = self.opener.options.get(b'rust.index')
550 550
551 551 self._parse_index = parse_index_v1
552 552 if self._format_version == REVLOGV0:
553 553 self._parse_index = revlogv0.parse_index_v0
554 554 elif fmt == REVLOGV2:
555 555 self._parse_index = parse_index_v2
556 556 elif devel_nodemap:
557 557 self._parse_index = parse_index_v1_nodemap
558 558 elif use_rust_index:
559 559 self._parse_index = parse_index_v1_mixed
560 560 try:
561 561 d = self._parse_index(indexdata, self._inline)
562 562 index, _chunkcache = d
563 563 use_nodemap = (
564 564 not self._inline
565 565 and self.nodemap_file is not None
566 566 and util.safehasattr(index, 'update_nodemap_data')
567 567 )
568 568 if use_nodemap:
569 569 nodemap_data = nodemaputil.persisted_data(self)
570 570 if nodemap_data is not None:
571 571 docket = nodemap_data[0]
572 572 if (
573 573 len(d[0]) > docket.tip_rev
574 574 and d[0][docket.tip_rev][7] == docket.tip_node
575 575 ):
576 576 # no changelog tampering
577 577 self._nodemap_docket = docket
578 578 index.update_nodemap_data(*nodemap_data)
579 579 except (ValueError, IndexError):
580 580 raise error.RevlogError(
581 581 _(b"index %s is corrupted") % self.display_id
582 582 )
583 583 self.index, self._chunkcache = d
584 584 if not self._chunkcache:
585 585 self._chunkclear()
586 586 # revnum -> (chain-length, sum-delta-length)
587 587 self._chaininfocache = util.lrucachedict(500)
588 588 # revlog header -> revlog compressor
589 589 self._decompressors = {}
590 590
591 591 @util.propertycache
592 592 def revlog_kind(self):
593 593 return self.target[0]
594 594
595 595 @util.propertycache
596 596 def display_id(self):
597 597 """The public facing "ID" of the revlog that we use in message"""
598 598 # Maybe we should build a user facing representation of
599 599 # revlog.target instead of using `self.radix`
600 600 return self.radix
601 601
602 602 @util.propertycache
603 603 def _compressor(self):
604 604 engine = util.compengines[self._compengine]
605 605 return engine.revlogcompressor(self._compengineopts)
606 606
607 607 def _indexfp(self, mode=b'r'):
608 608 """file object for the revlog's index file"""
609 609 args = {'mode': mode}
610 610 if mode != b'r':
611 611 args['checkambig'] = self._checkambig
612 612 if mode == b'w':
613 613 args['atomictemp'] = True
614 614 return self.opener(self._indexfile, **args)
615 615
616 616 def _datafp(self, mode=b'r'):
617 617 """file object for the revlog's data file"""
618 618 return self.opener(self._datafile, mode=mode)
619 619
620 620 @contextlib.contextmanager
621 621 def _datareadfp(self, existingfp=None):
622 622 """file object suitable to read data"""
623 623 # Use explicit file handle, if given.
624 624 if existingfp is not None:
625 625 yield existingfp
626 626
627 627 # Use a file handle being actively used for writes, if available.
628 628 # There is some danger to doing this because reads will seek the
629 629 # file. However, _writeentry() performs a SEEK_END before all writes,
630 630 # so we should be safe.
631 631 elif self._writinghandles:
632 632 if self._inline:
633 633 yield self._writinghandles[0]
634 634 else:
635 635 yield self._writinghandles[1]
636 636
637 637 # Otherwise open a new file handle.
638 638 else:
639 639 if self._inline:
640 640 func = self._indexfp
641 641 else:
642 642 func = self._datafp
643 643 with func() as fp:
644 644 yield fp
645 645
646 646 def tiprev(self):
647 647 return len(self.index) - 1
648 648
649 649 def tip(self):
650 650 return self.node(self.tiprev())
651 651
652 652 def __contains__(self, rev):
653 653 return 0 <= rev < len(self)
654 654
655 655 def __len__(self):
656 656 return len(self.index)
657 657
658 658 def __iter__(self):
659 659 return iter(pycompat.xrange(len(self)))
660 660
661 661 def revs(self, start=0, stop=None):
662 662 """iterate over all rev in this revlog (from start to stop)"""
663 663 return storageutil.iterrevs(len(self), start=start, stop=stop)
664 664
665 665 @property
666 666 def nodemap(self):
667 667 msg = (
668 668 b"revlog.nodemap is deprecated, "
669 669 b"use revlog.index.[has_node|rev|get_rev]"
670 670 )
671 671 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
672 672 return self.index.nodemap
673 673
674 674 @property
675 675 def _nodecache(self):
676 676 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
677 677 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
678 678 return self.index.nodemap
679 679
680 680 def hasnode(self, node):
681 681 try:
682 682 self.rev(node)
683 683 return True
684 684 except KeyError:
685 685 return False
686 686
687 687 def candelta(self, baserev, rev):
688 688 """whether two revisions (baserev, rev) can be delta-ed or not"""
689 689 # Disable delta if either rev requires a content-changing flag
690 690 # processor (ex. LFS). This is because such flag processor can alter
691 691 # the rawtext content that the delta will be based on, and two clients
692 692 # could have a same revlog node with different flags (i.e. different
693 693 # rawtext contents) and the delta could be incompatible.
694 694 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
695 695 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
696 696 ):
697 697 return False
698 698 return True
699 699
700 700 def update_caches(self, transaction):
701 701 if self.nodemap_file is not None:
702 702 if transaction is None:
703 703 nodemaputil.update_persistent_nodemap(self)
704 704 else:
705 705 nodemaputil.setup_persistent_nodemap(transaction, self)
706 706
707 707 def clearcaches(self):
708 708 self._revisioncache = None
709 709 self._chainbasecache.clear()
710 710 self._chunkcache = (0, b'')
711 711 self._pcache = {}
712 712 self._nodemap_docket = None
713 713 self.index.clearcaches()
714 714 # The python code is the one responsible for validating the docket, we
715 715 # end up having to refresh it here.
716 716 use_nodemap = (
717 717 not self._inline
718 718 and self.nodemap_file is not None
719 719 and util.safehasattr(self.index, 'update_nodemap_data')
720 720 )
721 721 if use_nodemap:
722 722 nodemap_data = nodemaputil.persisted_data(self)
723 723 if nodemap_data is not None:
724 724 self._nodemap_docket = nodemap_data[0]
725 725 self.index.update_nodemap_data(*nodemap_data)
726 726
727 727 def rev(self, node):
728 728 try:
729 729 return self.index.rev(node)
730 730 except TypeError:
731 731 raise
732 732 except error.RevlogError:
733 733 # parsers.c radix tree lookup failed
734 734 if (
735 735 node == self.nodeconstants.wdirid
736 736 or node in self.nodeconstants.wdirfilenodeids
737 737 ):
738 738 raise error.WdirUnsupported
739 739 raise error.LookupError(node, self.display_id, _(b'no node'))
740 740
741 741 # Accessors for index entries.
742 742
743 743 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
744 744 # are flags.
745 745 def start(self, rev):
746 746 return int(self.index[rev][0] >> 16)
747 747
748 748 def flags(self, rev):
749 749 return self.index[rev][0] & 0xFFFF
750 750
751 751 def length(self, rev):
752 752 return self.index[rev][1]
753 753
754 754 def sidedata_length(self, rev):
755 755 if not self.hassidedata:
756 756 return 0
757 757 return self.index[rev][9]
758 758
759 759 def rawsize(self, rev):
760 760 """return the length of the uncompressed text for a given revision"""
761 761 l = self.index[rev][2]
762 762 if l >= 0:
763 763 return l
764 764
765 765 t = self.rawdata(rev)
766 766 return len(t)
767 767
768 768 def size(self, rev):
769 769 """length of non-raw text (processed by a "read" flag processor)"""
770 770 # fast path: if no "read" flag processor could change the content,
771 771 # size is rawsize. note: ELLIPSIS is known to not change the content.
772 772 flags = self.flags(rev)
773 773 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
774 774 return self.rawsize(rev)
775 775
776 776 return len(self.revision(rev, raw=False))
777 777
778 778 def chainbase(self, rev):
779 779 base = self._chainbasecache.get(rev)
780 780 if base is not None:
781 781 return base
782 782
783 783 index = self.index
784 784 iterrev = rev
785 785 base = index[iterrev][3]
786 786 while base != iterrev:
787 787 iterrev = base
788 788 base = index[iterrev][3]
789 789
790 790 self._chainbasecache[rev] = base
791 791 return base
792 792
793 793 def linkrev(self, rev):
794 794 return self.index[rev][4]
795 795
796 796 def parentrevs(self, rev):
797 797 try:
798 798 entry = self.index[rev]
799 799 except IndexError:
800 800 if rev == wdirrev:
801 801 raise error.WdirUnsupported
802 802 raise
803 803 if entry[5] == nullrev:
804 804 return entry[6], entry[5]
805 805 else:
806 806 return entry[5], entry[6]
807 807
808 808 # fast parentrevs(rev) where rev isn't filtered
809 809 _uncheckedparentrevs = parentrevs
810 810
811 811 def node(self, rev):
812 812 try:
813 813 return self.index[rev][7]
814 814 except IndexError:
815 815 if rev == wdirrev:
816 816 raise error.WdirUnsupported
817 817 raise
818 818
819 819 # Derived from index values.
820 820
821 821 def end(self, rev):
822 822 return self.start(rev) + self.length(rev)
823 823
824 824 def parents(self, node):
825 825 i = self.index
826 826 d = i[self.rev(node)]
827 827 # inline node() to avoid function call overhead
828 828 if d[5] == self.nullid:
829 829 return i[d[6]][7], i[d[5]][7]
830 830 else:
831 831 return i[d[5]][7], i[d[6]][7]
832 832
833 833 def chainlen(self, rev):
834 834 return self._chaininfo(rev)[0]
835 835
836 836 def _chaininfo(self, rev):
837 837 chaininfocache = self._chaininfocache
838 838 if rev in chaininfocache:
839 839 return chaininfocache[rev]
840 840 index = self.index
841 841 generaldelta = self._generaldelta
842 842 iterrev = rev
843 843 e = index[iterrev]
844 844 clen = 0
845 845 compresseddeltalen = 0
846 846 while iterrev != e[3]:
847 847 clen += 1
848 848 compresseddeltalen += e[1]
849 849 if generaldelta:
850 850 iterrev = e[3]
851 851 else:
852 852 iterrev -= 1
853 853 if iterrev in chaininfocache:
854 854 t = chaininfocache[iterrev]
855 855 clen += t[0]
856 856 compresseddeltalen += t[1]
857 857 break
858 858 e = index[iterrev]
859 859 else:
860 860 # Add text length of base since decompressing that also takes
861 861 # work. For cache hits the length is already included.
862 862 compresseddeltalen += e[1]
863 863 r = (clen, compresseddeltalen)
864 864 chaininfocache[rev] = r
865 865 return r
866 866
867 867 def _deltachain(self, rev, stoprev=None):
868 868 """Obtain the delta chain for a revision.
869 869
870 870 ``stoprev`` specifies a revision to stop at. If not specified, we
871 871 stop at the base of the chain.
872 872
873 873 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
874 874 revs in ascending order and ``stopped`` is a bool indicating whether
875 875 ``stoprev`` was hit.
876 876 """
877 877 # Try C implementation.
878 878 try:
879 879 return self.index.deltachain(rev, stoprev, self._generaldelta)
880 880 except AttributeError:
881 881 pass
882 882
883 883 chain = []
884 884
885 885 # Alias to prevent attribute lookup in tight loop.
886 886 index = self.index
887 887 generaldelta = self._generaldelta
888 888
889 889 iterrev = rev
890 890 e = index[iterrev]
891 891 while iterrev != e[3] and iterrev != stoprev:
892 892 chain.append(iterrev)
893 893 if generaldelta:
894 894 iterrev = e[3]
895 895 else:
896 896 iterrev -= 1
897 897 e = index[iterrev]
898 898
899 899 if iterrev == stoprev:
900 900 stopped = True
901 901 else:
902 902 chain.append(iterrev)
903 903 stopped = False
904 904
905 905 chain.reverse()
906 906 return chain, stopped
907 907
908 908 def ancestors(self, revs, stoprev=0, inclusive=False):
909 909 """Generate the ancestors of 'revs' in reverse revision order.
910 910 Does not generate revs lower than stoprev.
911 911
912 912 See the documentation for ancestor.lazyancestors for more details."""
913 913
914 914 # first, make sure start revisions aren't filtered
915 915 revs = list(revs)
916 916 checkrev = self.node
917 917 for r in revs:
918 918 checkrev(r)
919 919 # and we're sure ancestors aren't filtered as well
920 920
921 921 if rustancestor is not None:
922 922 lazyancestors = rustancestor.LazyAncestors
923 923 arg = self.index
924 924 else:
925 925 lazyancestors = ancestor.lazyancestors
926 926 arg = self._uncheckedparentrevs
927 927 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
928 928
929 929 def descendants(self, revs):
930 930 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
931 931
932 932 def findcommonmissing(self, common=None, heads=None):
933 933 """Return a tuple of the ancestors of common and the ancestors of heads
934 934 that are not ancestors of common. In revset terminology, we return the
935 935 tuple:
936 936
937 937 ::common, (::heads) - (::common)
938 938
939 939 The list is sorted by revision number, meaning it is
940 940 topologically sorted.
941 941
942 942 'heads' and 'common' are both lists of node IDs. If heads is
943 943 not supplied, uses all of the revlog's heads. If common is not
944 944 supplied, uses nullid."""
945 945 if common is None:
946 946 common = [self.nullid]
947 947 if heads is None:
948 948 heads = self.heads()
949 949
950 950 common = [self.rev(n) for n in common]
951 951 heads = [self.rev(n) for n in heads]
952 952
953 953 # we want the ancestors, but inclusive
954 954 class lazyset(object):
955 955 def __init__(self, lazyvalues):
956 956 self.addedvalues = set()
957 957 self.lazyvalues = lazyvalues
958 958
959 959 def __contains__(self, value):
960 960 return value in self.addedvalues or value in self.lazyvalues
961 961
962 962 def __iter__(self):
963 963 added = self.addedvalues
964 964 for r in added:
965 965 yield r
966 966 for r in self.lazyvalues:
967 967 if not r in added:
968 968 yield r
969 969
970 970 def add(self, value):
971 971 self.addedvalues.add(value)
972 972
973 973 def update(self, values):
974 974 self.addedvalues.update(values)
975 975
976 976 has = lazyset(self.ancestors(common))
977 977 has.add(nullrev)
978 978 has.update(common)
979 979
980 980 # take all ancestors from heads that aren't in has
981 981 missing = set()
982 982 visit = collections.deque(r for r in heads if r not in has)
983 983 while visit:
984 984 r = visit.popleft()
985 985 if r in missing:
986 986 continue
987 987 else:
988 988 missing.add(r)
989 989 for p in self.parentrevs(r):
990 990 if p not in has:
991 991 visit.append(p)
992 992 missing = list(missing)
993 993 missing.sort()
994 994 return has, [self.node(miss) for miss in missing]
995 995
996 996 def incrementalmissingrevs(self, common=None):
997 997 """Return an object that can be used to incrementally compute the
998 998 revision numbers of the ancestors of arbitrary sets that are not
999 999 ancestors of common. This is an ancestor.incrementalmissingancestors
1000 1000 object.
1001 1001
1002 1002 'common' is a list of revision numbers. If common is not supplied, uses
1003 1003 nullrev.
1004 1004 """
1005 1005 if common is None:
1006 1006 common = [nullrev]
1007 1007
1008 1008 if rustancestor is not None:
1009 1009 return rustancestor.MissingAncestors(self.index, common)
1010 1010 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1011 1011
1012 1012 def findmissingrevs(self, common=None, heads=None):
1013 1013 """Return the revision numbers of the ancestors of heads that
1014 1014 are not ancestors of common.
1015 1015
1016 1016 More specifically, return a list of revision numbers corresponding to
1017 1017 nodes N such that every N satisfies the following constraints:
1018 1018
1019 1019 1. N is an ancestor of some node in 'heads'
1020 1020 2. N is not an ancestor of any node in 'common'
1021 1021
1022 1022 The list is sorted by revision number, meaning it is
1023 1023 topologically sorted.
1024 1024
1025 1025 'heads' and 'common' are both lists of revision numbers. If heads is
1026 1026 not supplied, uses all of the revlog's heads. If common is not
1027 1027 supplied, uses nullid."""
1028 1028 if common is None:
1029 1029 common = [nullrev]
1030 1030 if heads is None:
1031 1031 heads = self.headrevs()
1032 1032
1033 1033 inc = self.incrementalmissingrevs(common=common)
1034 1034 return inc.missingancestors(heads)
1035 1035
1036 1036 def findmissing(self, common=None, heads=None):
1037 1037 """Return the ancestors of heads that are not ancestors of common.
1038 1038
1039 1039 More specifically, return a list of nodes N such that every N
1040 1040 satisfies the following constraints:
1041 1041
1042 1042 1. N is an ancestor of some node in 'heads'
1043 1043 2. N is not an ancestor of any node in 'common'
1044 1044
1045 1045 The list is sorted by revision number, meaning it is
1046 1046 topologically sorted.
1047 1047
1048 1048 'heads' and 'common' are both lists of node IDs. If heads is
1049 1049 not supplied, uses all of the revlog's heads. If common is not
1050 1050 supplied, uses nullid."""
1051 1051 if common is None:
1052 1052 common = [self.nullid]
1053 1053 if heads is None:
1054 1054 heads = self.heads()
1055 1055
1056 1056 common = [self.rev(n) for n in common]
1057 1057 heads = [self.rev(n) for n in heads]
1058 1058
1059 1059 inc = self.incrementalmissingrevs(common=common)
1060 1060 return [self.node(r) for r in inc.missingancestors(heads)]
1061 1061
1062 1062 def nodesbetween(self, roots=None, heads=None):
1063 1063 """Return a topological path from 'roots' to 'heads'.
1064 1064
1065 1065 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1066 1066 topologically sorted list of all nodes N that satisfy both of
1067 1067 these constraints:
1068 1068
1069 1069 1. N is a descendant of some node in 'roots'
1070 1070 2. N is an ancestor of some node in 'heads'
1071 1071
1072 1072 Every node is considered to be both a descendant and an ancestor
1073 1073 of itself, so every reachable node in 'roots' and 'heads' will be
1074 1074 included in 'nodes'.
1075 1075
1076 1076 'outroots' is the list of reachable nodes in 'roots', i.e., the
1077 1077 subset of 'roots' that is returned in 'nodes'. Likewise,
1078 1078 'outheads' is the subset of 'heads' that is also in 'nodes'.
1079 1079
1080 1080 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1081 1081 unspecified, uses nullid as the only root. If 'heads' is
1082 1082 unspecified, uses list of all of the revlog's heads."""
1083 1083 nonodes = ([], [], [])
1084 1084 if roots is not None:
1085 1085 roots = list(roots)
1086 1086 if not roots:
1087 1087 return nonodes
1088 1088 lowestrev = min([self.rev(n) for n in roots])
1089 1089 else:
1090 1090 roots = [self.nullid] # Everybody's a descendant of nullid
1091 1091 lowestrev = nullrev
1092 1092 if (lowestrev == nullrev) and (heads is None):
1093 1093 # We want _all_ the nodes!
1094 1094 return (
1095 1095 [self.node(r) for r in self],
1096 1096 [self.nullid],
1097 1097 list(self.heads()),
1098 1098 )
1099 1099 if heads is None:
1100 1100 # All nodes are ancestors, so the latest ancestor is the last
1101 1101 # node.
1102 1102 highestrev = len(self) - 1
1103 1103 # Set ancestors to None to signal that every node is an ancestor.
1104 1104 ancestors = None
1105 1105 # Set heads to an empty dictionary for later discovery of heads
1106 1106 heads = {}
1107 1107 else:
1108 1108 heads = list(heads)
1109 1109 if not heads:
1110 1110 return nonodes
1111 1111 ancestors = set()
1112 1112 # Turn heads into a dictionary so we can remove 'fake' heads.
1113 1113 # Also, later we will be using it to filter out the heads we can't
1114 1114 # find from roots.
1115 1115 heads = dict.fromkeys(heads, False)
1116 1116 # Start at the top and keep marking parents until we're done.
1117 1117 nodestotag = set(heads)
1118 1118 # Remember where the top was so we can use it as a limit later.
1119 1119 highestrev = max([self.rev(n) for n in nodestotag])
1120 1120 while nodestotag:
1121 1121 # grab a node to tag
1122 1122 n = nodestotag.pop()
1123 1123 # Never tag nullid
1124 1124 if n == self.nullid:
1125 1125 continue
1126 1126 # A node's revision number represents its place in a
1127 1127 # topologically sorted list of nodes.
1128 1128 r = self.rev(n)
1129 1129 if r >= lowestrev:
1130 1130 if n not in ancestors:
1131 1131 # If we are possibly a descendant of one of the roots
1132 1132 # and we haven't already been marked as an ancestor
1133 1133 ancestors.add(n) # Mark as ancestor
1134 1134 # Add non-nullid parents to list of nodes to tag.
1135 1135 nodestotag.update(
1136 1136 [p for p in self.parents(n) if p != self.nullid]
1137 1137 )
1138 1138 elif n in heads: # We've seen it before, is it a fake head?
1139 1139 # So it is, real heads should not be the ancestors of
1140 1140 # any other heads.
1141 1141 heads.pop(n)
1142 1142 if not ancestors:
1143 1143 return nonodes
1144 1144 # Now that we have our set of ancestors, we want to remove any
1145 1145 # roots that are not ancestors.
1146 1146
1147 1147 # If one of the roots was nullid, everything is included anyway.
1148 1148 if lowestrev > nullrev:
1149 1149 # But, since we weren't, let's recompute the lowest rev to not
1150 1150 # include roots that aren't ancestors.
1151 1151
1152 1152 # Filter out roots that aren't ancestors of heads
1153 1153 roots = [root for root in roots if root in ancestors]
1154 1154 # Recompute the lowest revision
1155 1155 if roots:
1156 1156 lowestrev = min([self.rev(root) for root in roots])
1157 1157 else:
1158 1158 # No more roots? Return empty list
1159 1159 return nonodes
1160 1160 else:
1161 1161 # We are descending from nullid, and don't need to care about
1162 1162 # any other roots.
1163 1163 lowestrev = nullrev
1164 1164 roots = [self.nullid]
1165 1165 # Transform our roots list into a set.
1166 1166 descendants = set(roots)
1167 1167 # Also, keep the original roots so we can filter out roots that aren't
1168 1168 # 'real' roots (i.e. are descended from other roots).
1169 1169 roots = descendants.copy()
1170 1170 # Our topologically sorted list of output nodes.
1171 1171 orderedout = []
1172 1172 # Don't start at nullid since we don't want nullid in our output list,
1173 1173 # and if nullid shows up in descendants, empty parents will look like
1174 1174 # they're descendants.
1175 1175 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1176 1176 n = self.node(r)
1177 1177 isdescendant = False
1178 1178 if lowestrev == nullrev: # Everybody is a descendant of nullid
1179 1179 isdescendant = True
1180 1180 elif n in descendants:
1181 1181 # n is already a descendant
1182 1182 isdescendant = True
1183 1183 # This check only needs to be done here because all the roots
1184 1184 # will start being marked is descendants before the loop.
1185 1185 if n in roots:
1186 1186 # If n was a root, check if it's a 'real' root.
1187 1187 p = tuple(self.parents(n))
1188 1188 # If any of its parents are descendants, it's not a root.
1189 1189 if (p[0] in descendants) or (p[1] in descendants):
1190 1190 roots.remove(n)
1191 1191 else:
1192 1192 p = tuple(self.parents(n))
1193 1193 # A node is a descendant if either of its parents are
1194 1194 # descendants. (We seeded the dependents list with the roots
1195 1195 # up there, remember?)
1196 1196 if (p[0] in descendants) or (p[1] in descendants):
1197 1197 descendants.add(n)
1198 1198 isdescendant = True
1199 1199 if isdescendant and ((ancestors is None) or (n in ancestors)):
1200 1200 # Only include nodes that are both descendants and ancestors.
1201 1201 orderedout.append(n)
1202 1202 if (ancestors is not None) and (n in heads):
1203 1203 # We're trying to figure out which heads are reachable
1204 1204 # from roots.
1205 1205 # Mark this head as having been reached
1206 1206 heads[n] = True
1207 1207 elif ancestors is None:
1208 1208 # Otherwise, we're trying to discover the heads.
1209 1209 # Assume this is a head because if it isn't, the next step
1210 1210 # will eventually remove it.
1211 1211 heads[n] = True
1212 1212 # But, obviously its parents aren't.
1213 1213 for p in self.parents(n):
1214 1214 heads.pop(p, None)
1215 1215 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1216 1216 roots = list(roots)
1217 1217 assert orderedout
1218 1218 assert roots
1219 1219 assert heads
1220 1220 return (orderedout, roots, heads)
1221 1221
1222 1222 def headrevs(self, revs=None):
1223 1223 if revs is None:
1224 1224 try:
1225 1225 return self.index.headrevs()
1226 1226 except AttributeError:
1227 1227 return self._headrevs()
1228 1228 if rustdagop is not None:
1229 1229 return rustdagop.headrevs(self.index, revs)
1230 1230 return dagop.headrevs(revs, self._uncheckedparentrevs)
1231 1231
1232 1232 def computephases(self, roots):
1233 1233 return self.index.computephasesmapsets(roots)
1234 1234
1235 1235 def _headrevs(self):
1236 1236 count = len(self)
1237 1237 if not count:
1238 1238 return [nullrev]
1239 1239 # we won't iter over filtered rev so nobody is a head at start
1240 1240 ishead = [0] * (count + 1)
1241 1241 index = self.index
1242 1242 for r in self:
1243 1243 ishead[r] = 1 # I may be an head
1244 1244 e = index[r]
1245 1245 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1246 1246 return [r for r, val in enumerate(ishead) if val]
1247 1247
1248 1248 def heads(self, start=None, stop=None):
1249 1249 """return the list of all nodes that have no children
1250 1250
1251 1251 if start is specified, only heads that are descendants of
1252 1252 start will be returned
1253 1253 if stop is specified, it will consider all the revs from stop
1254 1254 as if they had no children
1255 1255 """
1256 1256 if start is None and stop is None:
1257 1257 if not len(self):
1258 1258 return [self.nullid]
1259 1259 return [self.node(r) for r in self.headrevs()]
1260 1260
1261 1261 if start is None:
1262 1262 start = nullrev
1263 1263 else:
1264 1264 start = self.rev(start)
1265 1265
1266 1266 stoprevs = {self.rev(n) for n in stop or []}
1267 1267
1268 1268 revs = dagop.headrevssubset(
1269 1269 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1270 1270 )
1271 1271
1272 1272 return [self.node(rev) for rev in revs]
1273 1273
1274 1274 def children(self, node):
1275 1275 """find the children of a given node"""
1276 1276 c = []
1277 1277 p = self.rev(node)
1278 1278 for r in self.revs(start=p + 1):
1279 1279 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1280 1280 if prevs:
1281 1281 for pr in prevs:
1282 1282 if pr == p:
1283 1283 c.append(self.node(r))
1284 1284 elif p == nullrev:
1285 1285 c.append(self.node(r))
1286 1286 return c
1287 1287
1288 1288 def commonancestorsheads(self, a, b):
1289 1289 """calculate all the heads of the common ancestors of nodes a and b"""
1290 1290 a, b = self.rev(a), self.rev(b)
1291 1291 ancs = self._commonancestorsheads(a, b)
1292 1292 return pycompat.maplist(self.node, ancs)
1293 1293
1294 1294 def _commonancestorsheads(self, *revs):
1295 1295 """calculate all the heads of the common ancestors of revs"""
1296 1296 try:
1297 1297 ancs = self.index.commonancestorsheads(*revs)
1298 1298 except (AttributeError, OverflowError): # C implementation failed
1299 1299 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1300 1300 return ancs
1301 1301
1302 1302 def isancestor(self, a, b):
1303 1303 """return True if node a is an ancestor of node b
1304 1304
1305 1305 A revision is considered an ancestor of itself."""
1306 1306 a, b = self.rev(a), self.rev(b)
1307 1307 return self.isancestorrev(a, b)
1308 1308
1309 1309 def isancestorrev(self, a, b):
1310 1310 """return True if revision a is an ancestor of revision b
1311 1311
1312 1312 A revision is considered an ancestor of itself.
1313 1313
1314 1314 The implementation of this is trivial but the use of
1315 1315 reachableroots is not."""
1316 1316 if a == nullrev:
1317 1317 return True
1318 1318 elif a == b:
1319 1319 return True
1320 1320 elif a > b:
1321 1321 return False
1322 1322 return bool(self.reachableroots(a, [b], [a], includepath=False))
1323 1323
1324 1324 def reachableroots(self, minroot, heads, roots, includepath=False):
1325 1325 """return (heads(::(<roots> and <roots>::<heads>)))
1326 1326
1327 1327 If includepath is True, return (<roots>::<heads>)."""
1328 1328 try:
1329 1329 return self.index.reachableroots2(
1330 1330 minroot, heads, roots, includepath
1331 1331 )
1332 1332 except AttributeError:
1333 1333 return dagop._reachablerootspure(
1334 1334 self.parentrevs, minroot, roots, heads, includepath
1335 1335 )
1336 1336
1337 1337 def ancestor(self, a, b):
1338 1338 """calculate the "best" common ancestor of nodes a and b"""
1339 1339
1340 1340 a, b = self.rev(a), self.rev(b)
1341 1341 try:
1342 1342 ancs = self.index.ancestors(a, b)
1343 1343 except (AttributeError, OverflowError):
1344 1344 ancs = ancestor.ancestors(self.parentrevs, a, b)
1345 1345 if ancs:
1346 1346 # choose a consistent winner when there's a tie
1347 1347 return min(map(self.node, ancs))
1348 1348 return self.nullid
1349 1349
1350 1350 def _match(self, id):
1351 1351 if isinstance(id, int):
1352 1352 # rev
1353 1353 return self.node(id)
1354 1354 if len(id) == self.nodeconstants.nodelen:
1355 1355 # possibly a binary node
1356 1356 # odds of a binary node being all hex in ASCII are 1 in 10**25
1357 1357 try:
1358 1358 node = id
1359 1359 self.rev(node) # quick search the index
1360 1360 return node
1361 1361 except error.LookupError:
1362 1362 pass # may be partial hex id
1363 1363 try:
1364 1364 # str(rev)
1365 1365 rev = int(id)
1366 1366 if b"%d" % rev != id:
1367 1367 raise ValueError
1368 1368 if rev < 0:
1369 1369 rev = len(self) + rev
1370 1370 if rev < 0 or rev >= len(self):
1371 1371 raise ValueError
1372 1372 return self.node(rev)
1373 1373 except (ValueError, OverflowError):
1374 1374 pass
1375 1375 if len(id) == 2 * self.nodeconstants.nodelen:
1376 1376 try:
1377 1377 # a full hex nodeid?
1378 1378 node = bin(id)
1379 1379 self.rev(node)
1380 1380 return node
1381 1381 except (TypeError, error.LookupError):
1382 1382 pass
1383 1383
1384 1384 def _partialmatch(self, id):
1385 1385 # we don't care wdirfilenodeids as they should be always full hash
1386 1386 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1387 1387 try:
1388 1388 partial = self.index.partialmatch(id)
1389 1389 if partial and self.hasnode(partial):
1390 1390 if maybewdir:
1391 1391 # single 'ff...' match in radix tree, ambiguous with wdir
1392 1392 raise error.RevlogError
1393 1393 return partial
1394 1394 if maybewdir:
1395 1395 # no 'ff...' match in radix tree, wdir identified
1396 1396 raise error.WdirUnsupported
1397 1397 return None
1398 1398 except error.RevlogError:
1399 1399 # parsers.c radix tree lookup gave multiple matches
1400 1400 # fast path: for unfiltered changelog, radix tree is accurate
1401 1401 if not getattr(self, 'filteredrevs', None):
1402 1402 raise error.AmbiguousPrefixLookupError(
1403 1403 id, self.display_id, _(b'ambiguous identifier')
1404 1404 )
1405 1405 # fall through to slow path that filters hidden revisions
1406 1406 except (AttributeError, ValueError):
1407 1407 # we are pure python, or key was too short to search radix tree
1408 1408 pass
1409 1409
1410 1410 if id in self._pcache:
1411 1411 return self._pcache[id]
1412 1412
1413 1413 if len(id) <= 40:
1414 1414 try:
1415 1415 # hex(node)[:...]
1416 1416 l = len(id) // 2 # grab an even number of digits
1417 1417 prefix = bin(id[: l * 2])
1418 1418 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1419 1419 nl = [
1420 1420 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1421 1421 ]
1422 1422 if self.nodeconstants.nullhex.startswith(id):
1423 1423 nl.append(self.nullid)
1424 1424 if len(nl) > 0:
1425 1425 if len(nl) == 1 and not maybewdir:
1426 1426 self._pcache[id] = nl[0]
1427 1427 return nl[0]
1428 1428 raise error.AmbiguousPrefixLookupError(
1429 1429 id, self.display_id, _(b'ambiguous identifier')
1430 1430 )
1431 1431 if maybewdir:
1432 1432 raise error.WdirUnsupported
1433 1433 return None
1434 1434 except TypeError:
1435 1435 pass
1436 1436
1437 1437 def lookup(self, id):
1438 1438 """locate a node based on:
1439 1439 - revision number or str(revision number)
1440 1440 - nodeid or subset of hex nodeid
1441 1441 """
1442 1442 n = self._match(id)
1443 1443 if n is not None:
1444 1444 return n
1445 1445 n = self._partialmatch(id)
1446 1446 if n:
1447 1447 return n
1448 1448
1449 1449 raise error.LookupError(id, self.display_id, _(b'no match found'))
1450 1450
1451 1451 def shortest(self, node, minlength=1):
1452 1452 """Find the shortest unambiguous prefix that matches node."""
1453 1453
1454 1454 def isvalid(prefix):
1455 1455 try:
1456 1456 matchednode = self._partialmatch(prefix)
1457 1457 except error.AmbiguousPrefixLookupError:
1458 1458 return False
1459 1459 except error.WdirUnsupported:
1460 1460 # single 'ff...' match
1461 1461 return True
1462 1462 if matchednode is None:
1463 1463 raise error.LookupError(node, self.display_id, _(b'no node'))
1464 1464 return True
1465 1465
1466 1466 def maybewdir(prefix):
1467 1467 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1468 1468
1469 1469 hexnode = hex(node)
1470 1470
1471 1471 def disambiguate(hexnode, minlength):
1472 1472 """Disambiguate against wdirid."""
1473 1473 for length in range(minlength, len(hexnode) + 1):
1474 1474 prefix = hexnode[:length]
1475 1475 if not maybewdir(prefix):
1476 1476 return prefix
1477 1477
1478 1478 if not getattr(self, 'filteredrevs', None):
1479 1479 try:
1480 1480 length = max(self.index.shortest(node), minlength)
1481 1481 return disambiguate(hexnode, length)
1482 1482 except error.RevlogError:
1483 1483 if node != self.nodeconstants.wdirid:
1484 1484 raise error.LookupError(
1485 1485 node, self.display_id, _(b'no node')
1486 1486 )
1487 1487 except AttributeError:
1488 1488 # Fall through to pure code
1489 1489 pass
1490 1490
1491 1491 if node == self.nodeconstants.wdirid:
1492 1492 for length in range(minlength, len(hexnode) + 1):
1493 1493 prefix = hexnode[:length]
1494 1494 if isvalid(prefix):
1495 1495 return prefix
1496 1496
1497 1497 for length in range(minlength, len(hexnode) + 1):
1498 1498 prefix = hexnode[:length]
1499 1499 if isvalid(prefix):
1500 1500 return disambiguate(hexnode, length)
1501 1501
1502 1502 def cmp(self, node, text):
1503 1503 """compare text with a given file revision
1504 1504
1505 1505 returns True if text is different than what is stored.
1506 1506 """
1507 1507 p1, p2 = self.parents(node)
1508 1508 return storageutil.hashrevisionsha1(text, p1, p2) != node
1509 1509
1510 1510 def _cachesegment(self, offset, data):
1511 1511 """Add a segment to the revlog cache.
1512 1512
1513 1513 Accepts an absolute offset and the data that is at that location.
1514 1514 """
1515 1515 o, d = self._chunkcache
1516 1516 # try to add to existing cache
1517 1517 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1518 1518 self._chunkcache = o, d + data
1519 1519 else:
1520 1520 self._chunkcache = offset, data
1521 1521
1522 1522 def _readsegment(self, offset, length, df=None):
1523 1523 """Load a segment of raw data from the revlog.
1524 1524
1525 1525 Accepts an absolute offset, length to read, and an optional existing
1526 1526 file handle to read from.
1527 1527
1528 1528 If an existing file handle is passed, it will be seeked and the
1529 1529 original seek position will NOT be restored.
1530 1530
1531 1531 Returns a str or buffer of raw byte data.
1532 1532
1533 1533 Raises if the requested number of bytes could not be read.
1534 1534 """
1535 1535 # Cache data both forward and backward around the requested
1536 1536 # data, in a fixed size window. This helps speed up operations
1537 1537 # involving reading the revlog backwards.
1538 1538 cachesize = self._chunkcachesize
1539 1539 realoffset = offset & ~(cachesize - 1)
1540 1540 reallength = (
1541 1541 (offset + length + cachesize) & ~(cachesize - 1)
1542 1542 ) - realoffset
1543 1543 with self._datareadfp(df) as df:
1544 1544 df.seek(realoffset)
1545 1545 d = df.read(reallength)
1546 1546
1547 1547 self._cachesegment(realoffset, d)
1548 1548 if offset != realoffset or reallength != length:
1549 1549 startoffset = offset - realoffset
1550 1550 if len(d) - startoffset < length:
1551 1551 raise error.RevlogError(
1552 1552 _(
1553 1553 b'partial read of revlog %s; expected %d bytes from '
1554 1554 b'offset %d, got %d'
1555 1555 )
1556 1556 % (
1557 1557 self._indexfile if self._inline else self._datafile,
1558 1558 length,
1559 1559 realoffset,
1560 1560 len(d) - startoffset,
1561 1561 )
1562 1562 )
1563 1563
1564 1564 return util.buffer(d, startoffset, length)
1565 1565
1566 1566 if len(d) < length:
1567 1567 raise error.RevlogError(
1568 1568 _(
1569 1569 b'partial read of revlog %s; expected %d bytes from offset '
1570 1570 b'%d, got %d'
1571 1571 )
1572 1572 % (
1573 1573 self._indexfile if self._inline else self._datafile,
1574 1574 length,
1575 1575 offset,
1576 1576 len(d),
1577 1577 )
1578 1578 )
1579 1579
1580 1580 return d
1581 1581
1582 1582 def _getsegment(self, offset, length, df=None):
1583 1583 """Obtain a segment of raw data from the revlog.
1584 1584
1585 1585 Accepts an absolute offset, length of bytes to obtain, and an
1586 1586 optional file handle to the already-opened revlog. If the file
1587 1587 handle is used, it's original seek position will not be preserved.
1588 1588
1589 1589 Requests for data may be returned from a cache.
1590 1590
1591 1591 Returns a str or a buffer instance of raw byte data.
1592 1592 """
1593 1593 o, d = self._chunkcache
1594 1594 l = len(d)
1595 1595
1596 1596 # is it in the cache?
1597 1597 cachestart = offset - o
1598 1598 cacheend = cachestart + length
1599 1599 if cachestart >= 0 and cacheend <= l:
1600 1600 if cachestart == 0 and cacheend == l:
1601 1601 return d # avoid a copy
1602 1602 return util.buffer(d, cachestart, cacheend - cachestart)
1603 1603
1604 1604 return self._readsegment(offset, length, df=df)
1605 1605
1606 1606 def _getsegmentforrevs(self, startrev, endrev, df=None):
1607 1607 """Obtain a segment of raw data corresponding to a range of revisions.
1608 1608
1609 1609 Accepts the start and end revisions and an optional already-open
1610 1610 file handle to be used for reading. If the file handle is read, its
1611 1611 seek position will not be preserved.
1612 1612
1613 1613 Requests for data may be satisfied by a cache.
1614 1614
1615 1615 Returns a 2-tuple of (offset, data) for the requested range of
1616 1616 revisions. Offset is the integer offset from the beginning of the
1617 1617 revlog and data is a str or buffer of the raw byte data.
1618 1618
1619 1619 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1620 1620 to determine where each revision's data begins and ends.
1621 1621 """
1622 1622 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1623 1623 # (functions are expensive).
1624 1624 index = self.index
1625 1625 istart = index[startrev]
1626 1626 start = int(istart[0] >> 16)
1627 1627 if startrev == endrev:
1628 1628 end = start + istart[1]
1629 1629 else:
1630 1630 iend = index[endrev]
1631 1631 end = int(iend[0] >> 16) + iend[1]
1632 1632
1633 1633 if self._inline:
1634 1634 start += (startrev + 1) * self.index.entry_size
1635 1635 end += (endrev + 1) * self.index.entry_size
1636 1636 length = end - start
1637 1637
1638 1638 return start, self._getsegment(start, length, df=df)
1639 1639
1640 1640 def _chunk(self, rev, df=None):
1641 1641 """Obtain a single decompressed chunk for a revision.
1642 1642
1643 1643 Accepts an integer revision and an optional already-open file handle
1644 1644 to be used for reading. If used, the seek position of the file will not
1645 1645 be preserved.
1646 1646
1647 1647 Returns a str holding uncompressed data for the requested revision.
1648 1648 """
1649 1649 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1650 1650
1651 1651 def _chunks(self, revs, df=None, targetsize=None):
1652 1652 """Obtain decompressed chunks for the specified revisions.
1653 1653
1654 1654 Accepts an iterable of numeric revisions that are assumed to be in
1655 1655 ascending order. Also accepts an optional already-open file handle
1656 1656 to be used for reading. If used, the seek position of the file will
1657 1657 not be preserved.
1658 1658
1659 1659 This function is similar to calling ``self._chunk()`` multiple times,
1660 1660 but is faster.
1661 1661
1662 1662 Returns a list with decompressed data for each requested revision.
1663 1663 """
1664 1664 if not revs:
1665 1665 return []
1666 1666 start = self.start
1667 1667 length = self.length
1668 1668 inline = self._inline
1669 1669 iosize = self.index.entry_size
1670 1670 buffer = util.buffer
1671 1671
1672 1672 l = []
1673 1673 ladd = l.append
1674 1674
1675 1675 if not self._withsparseread:
1676 1676 slicedchunks = (revs,)
1677 1677 else:
1678 1678 slicedchunks = deltautil.slicechunk(
1679 1679 self, revs, targetsize=targetsize
1680 1680 )
1681 1681
1682 1682 for revschunk in slicedchunks:
1683 1683 firstrev = revschunk[0]
1684 1684 # Skip trailing revisions with empty diff
1685 1685 for lastrev in revschunk[::-1]:
1686 1686 if length(lastrev) != 0:
1687 1687 break
1688 1688
1689 1689 try:
1690 1690 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1691 1691 except OverflowError:
1692 1692 # issue4215 - we can't cache a run of chunks greater than
1693 1693 # 2G on Windows
1694 1694 return [self._chunk(rev, df=df) for rev in revschunk]
1695 1695
1696 1696 decomp = self.decompress
1697 1697 for rev in revschunk:
1698 1698 chunkstart = start(rev)
1699 1699 if inline:
1700 1700 chunkstart += (rev + 1) * iosize
1701 1701 chunklength = length(rev)
1702 1702 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1703 1703
1704 1704 return l
1705 1705
1706 1706 def _chunkclear(self):
1707 1707 """Clear the raw chunk cache."""
1708 1708 self._chunkcache = (0, b'')
1709 1709
1710 1710 def deltaparent(self, rev):
1711 1711 """return deltaparent of the given revision"""
1712 1712 base = self.index[rev][3]
1713 1713 if base == rev:
1714 1714 return nullrev
1715 1715 elif self._generaldelta:
1716 1716 return base
1717 1717 else:
1718 1718 return rev - 1
1719 1719
1720 1720 def issnapshot(self, rev):
1721 1721 """tells whether rev is a snapshot"""
1722 1722 if not self._sparserevlog:
1723 1723 return self.deltaparent(rev) == nullrev
1724 1724 elif util.safehasattr(self.index, b'issnapshot'):
1725 1725 # directly assign the method to cache the testing and access
1726 1726 self.issnapshot = self.index.issnapshot
1727 1727 return self.issnapshot(rev)
1728 1728 if rev == nullrev:
1729 1729 return True
1730 1730 entry = self.index[rev]
1731 1731 base = entry[3]
1732 1732 if base == rev:
1733 1733 return True
1734 1734 if base == nullrev:
1735 1735 return True
1736 1736 p1 = entry[5]
1737 1737 p2 = entry[6]
1738 1738 if base == p1 or base == p2:
1739 1739 return False
1740 1740 return self.issnapshot(base)
1741 1741
1742 1742 def snapshotdepth(self, rev):
1743 1743 """number of snapshot in the chain before this one"""
1744 1744 if not self.issnapshot(rev):
1745 1745 raise error.ProgrammingError(b'revision %d not a snapshot')
1746 1746 return len(self._deltachain(rev)[0]) - 1
1747 1747
1748 1748 def revdiff(self, rev1, rev2):
1749 1749 """return or calculate a delta between two revisions
1750 1750
1751 1751 The delta calculated is in binary form and is intended to be written to
1752 1752 revlog data directly. So this function needs raw revision data.
1753 1753 """
1754 1754 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1755 1755 return bytes(self._chunk(rev2))
1756 1756
1757 1757 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1758 1758
1759 1759 def _processflags(self, text, flags, operation, raw=False):
1760 1760 """deprecated entry point to access flag processors"""
1761 1761 msg = b'_processflag(...) use the specialized variant'
1762 1762 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1763 1763 if raw:
1764 1764 return text, flagutil.processflagsraw(self, text, flags)
1765 1765 elif operation == b'read':
1766 1766 return flagutil.processflagsread(self, text, flags)
1767 1767 else: # write operation
1768 1768 return flagutil.processflagswrite(self, text, flags)
1769 1769
1770 1770 def revision(self, nodeorrev, _df=None, raw=False):
1771 1771 """return an uncompressed revision of a given node or revision
1772 1772 number.
1773 1773
1774 1774 _df - an existing file handle to read from. (internal-only)
1775 1775 raw - an optional argument specifying if the revision data is to be
1776 1776 treated as raw data when applying flag transforms. 'raw' should be set
1777 1777 to True when generating changegroups or in debug commands.
1778 1778 """
1779 1779 if raw:
1780 1780 msg = (
1781 1781 b'revlog.revision(..., raw=True) is deprecated, '
1782 1782 b'use revlog.rawdata(...)'
1783 1783 )
1784 1784 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1785 1785 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1786 1786
1787 1787 def sidedata(self, nodeorrev, _df=None):
1788 1788 """a map of extra data related to the changeset but not part of the hash
1789 1789
1790 1790 This function currently return a dictionary. However, more advanced
1791 1791 mapping object will likely be used in the future for a more
1792 1792 efficient/lazy code.
1793 1793 """
1794 1794 return self._revisiondata(nodeorrev, _df)[1]
1795 1795
1796 1796 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1797 1797 # deal with <nodeorrev> argument type
1798 1798 if isinstance(nodeorrev, int):
1799 1799 rev = nodeorrev
1800 1800 node = self.node(rev)
1801 1801 else:
1802 1802 node = nodeorrev
1803 1803 rev = None
1804 1804
1805 1805 # fast path the special `nullid` rev
1806 1806 if node == self.nullid:
1807 1807 return b"", {}
1808 1808
1809 1809 # ``rawtext`` is the text as stored inside the revlog. Might be the
1810 1810 # revision or might need to be processed to retrieve the revision.
1811 1811 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1812 1812
1813 1813 if self.hassidedata:
1814 1814 if rev is None:
1815 1815 rev = self.rev(node)
1816 1816 sidedata = self._sidedata(rev)
1817 1817 else:
1818 1818 sidedata = {}
1819 1819
1820 1820 if raw and validated:
1821 1821 # if we don't want to process the raw text and that raw
1822 1822 # text is cached, we can exit early.
1823 1823 return rawtext, sidedata
1824 1824 if rev is None:
1825 1825 rev = self.rev(node)
1826 1826 # the revlog's flag for this revision
1827 1827 # (usually alter its state or content)
1828 1828 flags = self.flags(rev)
1829 1829
1830 1830 if validated and flags == REVIDX_DEFAULT_FLAGS:
1831 1831 # no extra flags set, no flag processor runs, text = rawtext
1832 1832 return rawtext, sidedata
1833 1833
1834 1834 if raw:
1835 1835 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1836 1836 text = rawtext
1837 1837 else:
1838 1838 r = flagutil.processflagsread(self, rawtext, flags)
1839 1839 text, validatehash = r
1840 1840 if validatehash:
1841 1841 self.checkhash(text, node, rev=rev)
1842 1842 if not validated:
1843 1843 self._revisioncache = (node, rev, rawtext)
1844 1844
1845 1845 return text, sidedata
1846 1846
1847 1847 def _rawtext(self, node, rev, _df=None):
1848 1848 """return the possibly unvalidated rawtext for a revision
1849 1849
1850 1850 returns (rev, rawtext, validated)
1851 1851 """
1852 1852
1853 1853 # revision in the cache (could be useful to apply delta)
1854 1854 cachedrev = None
1855 1855 # An intermediate text to apply deltas to
1856 1856 basetext = None
1857 1857
1858 1858 # Check if we have the entry in cache
1859 1859 # The cache entry looks like (node, rev, rawtext)
1860 1860 if self._revisioncache:
1861 1861 if self._revisioncache[0] == node:
1862 1862 return (rev, self._revisioncache[2], True)
1863 1863 cachedrev = self._revisioncache[1]
1864 1864
1865 1865 if rev is None:
1866 1866 rev = self.rev(node)
1867 1867
1868 1868 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1869 1869 if stopped:
1870 1870 basetext = self._revisioncache[2]
1871 1871
1872 1872 # drop cache to save memory, the caller is expected to
1873 1873 # update self._revisioncache after validating the text
1874 1874 self._revisioncache = None
1875 1875
1876 1876 targetsize = None
1877 1877 rawsize = self.index[rev][2]
1878 1878 if 0 <= rawsize:
1879 1879 targetsize = 4 * rawsize
1880 1880
1881 1881 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1882 1882 if basetext is None:
1883 1883 basetext = bytes(bins[0])
1884 1884 bins = bins[1:]
1885 1885
1886 1886 rawtext = mdiff.patches(basetext, bins)
1887 1887 del basetext # let us have a chance to free memory early
1888 1888 return (rev, rawtext, False)
1889 1889
1890 1890 def _sidedata(self, rev):
1891 1891 """Return the sidedata for a given revision number."""
1892 1892 index_entry = self.index[rev]
1893 1893 sidedata_offset = index_entry[8]
1894 1894 sidedata_size = index_entry[9]
1895 1895
1896 1896 if self._inline:
1897 1897 sidedata_offset += self.index.entry_size * (1 + rev)
1898 1898 if sidedata_size == 0:
1899 1899 return {}
1900 1900
1901 1901 segment = self._getsegment(sidedata_offset, sidedata_size)
1902 1902 sidedata = sidedatautil.deserialize_sidedata(segment)
1903 1903 return sidedata
1904 1904
1905 1905 def rawdata(self, nodeorrev, _df=None):
1906 1906 """return an uncompressed raw data of a given node or revision number.
1907 1907
1908 1908 _df - an existing file handle to read from. (internal-only)
1909 1909 """
1910 1910 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1911 1911
1912 1912 def hash(self, text, p1, p2):
1913 1913 """Compute a node hash.
1914 1914
1915 1915 Available as a function so that subclasses can replace the hash
1916 1916 as needed.
1917 1917 """
1918 1918 return storageutil.hashrevisionsha1(text, p1, p2)
1919 1919
1920 1920 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1921 1921 """Check node hash integrity.
1922 1922
1923 1923 Available as a function so that subclasses can extend hash mismatch
1924 1924 behaviors as needed.
1925 1925 """
1926 1926 try:
1927 1927 if p1 is None and p2 is None:
1928 1928 p1, p2 = self.parents(node)
1929 1929 if node != self.hash(text, p1, p2):
1930 1930 # Clear the revision cache on hash failure. The revision cache
1931 1931 # only stores the raw revision and clearing the cache does have
1932 1932 # the side-effect that we won't have a cache hit when the raw
1933 1933 # revision data is accessed. But this case should be rare and
1934 1934 # it is extra work to teach the cache about the hash
1935 1935 # verification state.
1936 1936 if self._revisioncache and self._revisioncache[0] == node:
1937 1937 self._revisioncache = None
1938 1938
1939 1939 revornode = rev
1940 1940 if revornode is None:
1941 1941 revornode = templatefilters.short(hex(node))
1942 1942 raise error.RevlogError(
1943 1943 _(b"integrity check failed on %s:%s")
1944 % (self._indexfile, pycompat.bytestr(revornode))
1944 % (self.display_id, pycompat.bytestr(revornode))
1945 1945 )
1946 1946 except error.RevlogError:
1947 1947 if self._censorable and storageutil.iscensoredtext(text):
1948 1948 raise error.CensoredNodeError(self._indexfile, node, text)
1949 1949 raise
1950 1950
1951 1951 def _enforceinlinesize(self, tr, fp=None):
1952 1952 """Check if the revlog is too big for inline and convert if so.
1953 1953
1954 1954 This should be called after revisions are added to the revlog. If the
1955 1955 revlog has grown too large to be an inline revlog, it will convert it
1956 1956 to use multiple index and data files.
1957 1957 """
1958 1958 tiprev = len(self) - 1
1959 1959 if (
1960 1960 not self._inline
1961 1961 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1962 1962 ):
1963 1963 return
1964 1964
1965 1965 troffset = tr.findoffset(self._indexfile)
1966 1966 if troffset is None:
1967 1967 raise error.RevlogError(
1968 1968 _(b"%s not found in the transaction") % self._indexfile
1969 1969 )
1970 1970 trindex = 0
1971 1971 tr.add(self._datafile, 0)
1972 1972
1973 1973 if fp:
1974 1974 fp.flush()
1975 1975 fp.close()
1976 1976 # We can't use the cached file handle after close(). So prevent
1977 1977 # its usage.
1978 1978 self._writinghandles = None
1979 1979
1980 1980 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1981 1981 for r in self:
1982 1982 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1983 1983 if troffset <= self.start(r):
1984 1984 trindex = r
1985 1985
1986 1986 with self._indexfp(b'w') as fp:
1987 1987 self._format_flags &= ~FLAG_INLINE_DATA
1988 1988 self._inline = False
1989 1989 for i in self:
1990 1990 e = self.index.entry_binary(i)
1991 1991 if i == 0:
1992 1992 header = self._format_flags | self._format_version
1993 1993 header = self.index.pack_header(header)
1994 1994 e = header + e
1995 1995 fp.write(e)
1996 1996
1997 1997 # the temp file replace the real index when we exit the context
1998 1998 # manager
1999 1999
2000 2000 tr.replace(self._indexfile, trindex * self.index.entry_size)
2001 2001 nodemaputil.setup_persistent_nodemap(tr, self)
2002 2002 self._chunkclear()
2003 2003
2004 2004 def _nodeduplicatecallback(self, transaction, node):
2005 2005 """called when trying to add a node already stored."""
2006 2006
2007 2007 def addrevision(
2008 2008 self,
2009 2009 text,
2010 2010 transaction,
2011 2011 link,
2012 2012 p1,
2013 2013 p2,
2014 2014 cachedelta=None,
2015 2015 node=None,
2016 2016 flags=REVIDX_DEFAULT_FLAGS,
2017 2017 deltacomputer=None,
2018 2018 sidedata=None,
2019 2019 ):
2020 2020 """add a revision to the log
2021 2021
2022 2022 text - the revision data to add
2023 2023 transaction - the transaction object used for rollback
2024 2024 link - the linkrev data to add
2025 2025 p1, p2 - the parent nodeids of the revision
2026 2026 cachedelta - an optional precomputed delta
2027 2027 node - nodeid of revision; typically node is not specified, and it is
2028 2028 computed by default as hash(text, p1, p2), however subclasses might
2029 2029 use different hashing method (and override checkhash() in such case)
2030 2030 flags - the known flags to set on the revision
2031 2031 deltacomputer - an optional deltacomputer instance shared between
2032 2032 multiple calls
2033 2033 """
2034 2034 if link == nullrev:
2035 2035 raise error.RevlogError(
2036 2036 _(b"attempted to add linkrev -1 to %s") % self._indexfile
2037 2037 )
2038 2038
2039 2039 if sidedata is None:
2040 2040 sidedata = {}
2041 2041 elif sidedata and not self.hassidedata:
2042 2042 raise error.ProgrammingError(
2043 2043 _(b"trying to add sidedata to a revlog who don't support them")
2044 2044 )
2045 2045
2046 2046 if flags:
2047 2047 node = node or self.hash(text, p1, p2)
2048 2048
2049 2049 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2050 2050
2051 2051 # If the flag processor modifies the revision data, ignore any provided
2052 2052 # cachedelta.
2053 2053 if rawtext != text:
2054 2054 cachedelta = None
2055 2055
2056 2056 if len(rawtext) > _maxentrysize:
2057 2057 raise error.RevlogError(
2058 2058 _(
2059 2059 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2060 2060 )
2061 2061 % (self._indexfile, len(rawtext))
2062 2062 )
2063 2063
2064 2064 node = node or self.hash(rawtext, p1, p2)
2065 2065 rev = self.index.get_rev(node)
2066 2066 if rev is not None:
2067 2067 return rev
2068 2068
2069 2069 if validatehash:
2070 2070 self.checkhash(rawtext, node, p1=p1, p2=p2)
2071 2071
2072 2072 return self.addrawrevision(
2073 2073 rawtext,
2074 2074 transaction,
2075 2075 link,
2076 2076 p1,
2077 2077 p2,
2078 2078 node,
2079 2079 flags,
2080 2080 cachedelta=cachedelta,
2081 2081 deltacomputer=deltacomputer,
2082 2082 sidedata=sidedata,
2083 2083 )
2084 2084
2085 2085 def addrawrevision(
2086 2086 self,
2087 2087 rawtext,
2088 2088 transaction,
2089 2089 link,
2090 2090 p1,
2091 2091 p2,
2092 2092 node,
2093 2093 flags,
2094 2094 cachedelta=None,
2095 2095 deltacomputer=None,
2096 2096 sidedata=None,
2097 2097 ):
2098 2098 """add a raw revision with known flags, node and parents
2099 2099 useful when reusing a revision not stored in this revlog (ex: received
2100 2100 over wire, or read from an external bundle).
2101 2101 """
2102 2102 dfh = None
2103 2103 if not self._inline:
2104 2104 dfh = self._datafp(b"a+")
2105 2105 ifh = self._indexfp(b"a+")
2106 2106 try:
2107 2107 return self._addrevision(
2108 2108 node,
2109 2109 rawtext,
2110 2110 transaction,
2111 2111 link,
2112 2112 p1,
2113 2113 p2,
2114 2114 flags,
2115 2115 cachedelta,
2116 2116 ifh,
2117 2117 dfh,
2118 2118 deltacomputer=deltacomputer,
2119 2119 sidedata=sidedata,
2120 2120 )
2121 2121 finally:
2122 2122 if dfh:
2123 2123 dfh.close()
2124 2124 ifh.close()
2125 2125
2126 2126 def compress(self, data):
2127 2127 """Generate a possibly-compressed representation of data."""
2128 2128 if not data:
2129 2129 return b'', data
2130 2130
2131 2131 compressed = self._compressor.compress(data)
2132 2132
2133 2133 if compressed:
2134 2134 # The revlog compressor added the header in the returned data.
2135 2135 return b'', compressed
2136 2136
2137 2137 if data[0:1] == b'\0':
2138 2138 return b'', data
2139 2139 return b'u', data
2140 2140
2141 2141 def decompress(self, data):
2142 2142 """Decompress a revlog chunk.
2143 2143
2144 2144 The chunk is expected to begin with a header identifying the
2145 2145 format type so it can be routed to an appropriate decompressor.
2146 2146 """
2147 2147 if not data:
2148 2148 return data
2149 2149
2150 2150 # Revlogs are read much more frequently than they are written and many
2151 2151 # chunks only take microseconds to decompress, so performance is
2152 2152 # important here.
2153 2153 #
2154 2154 # We can make a few assumptions about revlogs:
2155 2155 #
2156 2156 # 1) the majority of chunks will be compressed (as opposed to inline
2157 2157 # raw data).
2158 2158 # 2) decompressing *any* data will likely by at least 10x slower than
2159 2159 # returning raw inline data.
2160 2160 # 3) we want to prioritize common and officially supported compression
2161 2161 # engines
2162 2162 #
2163 2163 # It follows that we want to optimize for "decompress compressed data
2164 2164 # when encoded with common and officially supported compression engines"
2165 2165 # case over "raw data" and "data encoded by less common or non-official
2166 2166 # compression engines." That is why we have the inline lookup first
2167 2167 # followed by the compengines lookup.
2168 2168 #
2169 2169 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2170 2170 # compressed chunks. And this matters for changelog and manifest reads.
2171 2171 t = data[0:1]
2172 2172
2173 2173 if t == b'x':
2174 2174 try:
2175 2175 return _zlibdecompress(data)
2176 2176 except zlib.error as e:
2177 2177 raise error.RevlogError(
2178 2178 _(b'revlog decompress error: %s')
2179 2179 % stringutil.forcebytestr(e)
2180 2180 )
2181 2181 # '\0' is more common than 'u' so it goes first.
2182 2182 elif t == b'\0':
2183 2183 return data
2184 2184 elif t == b'u':
2185 2185 return util.buffer(data, 1)
2186 2186
2187 2187 try:
2188 2188 compressor = self._decompressors[t]
2189 2189 except KeyError:
2190 2190 try:
2191 2191 engine = util.compengines.forrevlogheader(t)
2192 2192 compressor = engine.revlogcompressor(self._compengineopts)
2193 2193 self._decompressors[t] = compressor
2194 2194 except KeyError:
2195 2195 raise error.RevlogError(
2196 2196 _(b'unknown compression type %s') % binascii.hexlify(t)
2197 2197 )
2198 2198
2199 2199 return compressor.decompress(data)
2200 2200
2201 2201 def _addrevision(
2202 2202 self,
2203 2203 node,
2204 2204 rawtext,
2205 2205 transaction,
2206 2206 link,
2207 2207 p1,
2208 2208 p2,
2209 2209 flags,
2210 2210 cachedelta,
2211 2211 ifh,
2212 2212 dfh,
2213 2213 alwayscache=False,
2214 2214 deltacomputer=None,
2215 2215 sidedata=None,
2216 2216 ):
2217 2217 """internal function to add revisions to the log
2218 2218
2219 2219 see addrevision for argument descriptions.
2220 2220
2221 2221 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2222 2222
2223 2223 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2224 2224 be used.
2225 2225
2226 2226 invariants:
2227 2227 - rawtext is optional (can be None); if not set, cachedelta must be set.
2228 2228 if both are set, they must correspond to each other.
2229 2229 """
2230 2230 if node == self.nullid:
2231 2231 raise error.RevlogError(
2232 2232 _(b"%s: attempt to add null revision") % self._indexfile
2233 2233 )
2234 2234 if (
2235 2235 node == self.nodeconstants.wdirid
2236 2236 or node in self.nodeconstants.wdirfilenodeids
2237 2237 ):
2238 2238 raise error.RevlogError(
2239 2239 _(b"%s: attempt to add wdir revision") % self._indexfile
2240 2240 )
2241 2241
2242 2242 if self._inline:
2243 2243 fh = ifh
2244 2244 else:
2245 2245 fh = dfh
2246 2246
2247 2247 btext = [rawtext]
2248 2248
2249 2249 curr = len(self)
2250 2250 prev = curr - 1
2251 2251
2252 2252 offset = self._get_data_offset(prev)
2253 2253
2254 2254 if self._concurrencychecker:
2255 2255 if self._inline:
2256 2256 # offset is "as if" it were in the .d file, so we need to add on
2257 2257 # the size of the entry metadata.
2258 2258 self._concurrencychecker(
2259 2259 ifh, self._indexfile, offset + curr * self.index.entry_size
2260 2260 )
2261 2261 else:
2262 2262 # Entries in the .i are a consistent size.
2263 2263 self._concurrencychecker(
2264 2264 ifh, self._indexfile, curr * self.index.entry_size
2265 2265 )
2266 2266 self._concurrencychecker(dfh, self._datafile, offset)
2267 2267
2268 2268 p1r, p2r = self.rev(p1), self.rev(p2)
2269 2269
2270 2270 # full versions are inserted when the needed deltas
2271 2271 # become comparable to the uncompressed text
2272 2272 if rawtext is None:
2273 2273 # need rawtext size, before changed by flag processors, which is
2274 2274 # the non-raw size. use revlog explicitly to avoid filelog's extra
2275 2275 # logic that might remove metadata size.
2276 2276 textlen = mdiff.patchedsize(
2277 2277 revlog.size(self, cachedelta[0]), cachedelta[1]
2278 2278 )
2279 2279 else:
2280 2280 textlen = len(rawtext)
2281 2281
2282 2282 if deltacomputer is None:
2283 2283 deltacomputer = deltautil.deltacomputer(self)
2284 2284
2285 2285 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2286 2286
2287 2287 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2288 2288
2289 2289 if sidedata and self.hassidedata:
2290 2290 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2291 2291 sidedata_offset = offset + deltainfo.deltalen
2292 2292 else:
2293 2293 serialized_sidedata = b""
2294 2294 # Don't store the offset if the sidedata is empty, that way
2295 2295 # we can easily detect empty sidedata and they will be no different
2296 2296 # than ones we manually add.
2297 2297 sidedata_offset = 0
2298 2298
2299 2299 e = (
2300 2300 offset_type(offset, flags),
2301 2301 deltainfo.deltalen,
2302 2302 textlen,
2303 2303 deltainfo.base,
2304 2304 link,
2305 2305 p1r,
2306 2306 p2r,
2307 2307 node,
2308 2308 sidedata_offset,
2309 2309 len(serialized_sidedata),
2310 2310 )
2311 2311
2312 2312 self.index.append(e)
2313 2313 entry = self.index.entry_binary(curr)
2314 2314 if curr == 0:
2315 2315 header = self._format_flags | self._format_version
2316 2316 header = self.index.pack_header(header)
2317 2317 entry = header + entry
2318 2318 self._writeentry(
2319 2319 transaction,
2320 2320 ifh,
2321 2321 dfh,
2322 2322 entry,
2323 2323 deltainfo.data,
2324 2324 link,
2325 2325 offset,
2326 2326 serialized_sidedata,
2327 2327 )
2328 2328
2329 2329 rawtext = btext[0]
2330 2330
2331 2331 if alwayscache and rawtext is None:
2332 2332 rawtext = deltacomputer.buildtext(revinfo, fh)
2333 2333
2334 2334 if type(rawtext) == bytes: # only accept immutable objects
2335 2335 self._revisioncache = (node, curr, rawtext)
2336 2336 self._chainbasecache[curr] = deltainfo.chainbase
2337 2337 return curr
2338 2338
2339 2339 def _get_data_offset(self, prev):
2340 2340 """Returns the current offset in the (in-transaction) data file.
2341 2341 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2342 2342 file to store that information: since sidedata can be rewritten to the
2343 2343 end of the data file within a transaction, you can have cases where, for
2344 2344 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2345 2345 to `n - 1`'s sidedata being written after `n`'s data.
2346 2346
2347 2347 TODO cache this in a docket file before getting out of experimental."""
2348 2348 if self._format_version != REVLOGV2:
2349 2349 return self.end(prev)
2350 2350
2351 2351 offset = 0
2352 2352 for rev, entry in enumerate(self.index):
2353 2353 sidedata_end = entry[8] + entry[9]
2354 2354 # Sidedata for a previous rev has potentially been written after
2355 2355 # this rev's end, so take the max.
2356 2356 offset = max(self.end(rev), offset, sidedata_end)
2357 2357 return offset
2358 2358
2359 2359 def _writeentry(
2360 2360 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2361 2361 ):
2362 2362 # Files opened in a+ mode have inconsistent behavior on various
2363 2363 # platforms. Windows requires that a file positioning call be made
2364 2364 # when the file handle transitions between reads and writes. See
2365 2365 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2366 2366 # platforms, Python or the platform itself can be buggy. Some versions
2367 2367 # of Solaris have been observed to not append at the end of the file
2368 2368 # if the file was seeked to before the end. See issue4943 for more.
2369 2369 #
2370 2370 # We work around this issue by inserting a seek() before writing.
2371 2371 # Note: This is likely not necessary on Python 3. However, because
2372 2372 # the file handle is reused for reads and may be seeked there, we need
2373 2373 # to be careful before changing this.
2374 2374 ifh.seek(0, os.SEEK_END)
2375 2375 if dfh:
2376 2376 dfh.seek(0, os.SEEK_END)
2377 2377
2378 2378 curr = len(self) - 1
2379 2379 if not self._inline:
2380 2380 transaction.add(self._datafile, offset)
2381 2381 transaction.add(self._indexfile, curr * len(entry))
2382 2382 if data[0]:
2383 2383 dfh.write(data[0])
2384 2384 dfh.write(data[1])
2385 2385 if sidedata:
2386 2386 dfh.write(sidedata)
2387 2387 ifh.write(entry)
2388 2388 else:
2389 2389 offset += curr * self.index.entry_size
2390 2390 transaction.add(self._indexfile, offset)
2391 2391 ifh.write(entry)
2392 2392 ifh.write(data[0])
2393 2393 ifh.write(data[1])
2394 2394 if sidedata:
2395 2395 ifh.write(sidedata)
2396 2396 self._enforceinlinesize(transaction, ifh)
2397 2397 nodemaputil.setup_persistent_nodemap(transaction, self)
2398 2398
2399 2399 def addgroup(
2400 2400 self,
2401 2401 deltas,
2402 2402 linkmapper,
2403 2403 transaction,
2404 2404 alwayscache=False,
2405 2405 addrevisioncb=None,
2406 2406 duplicaterevisioncb=None,
2407 2407 ):
2408 2408 """
2409 2409 add a delta group
2410 2410
2411 2411 given a set of deltas, add them to the revision log. the
2412 2412 first delta is against its parent, which should be in our
2413 2413 log, the rest are against the previous delta.
2414 2414
2415 2415 If ``addrevisioncb`` is defined, it will be called with arguments of
2416 2416 this revlog and the node that was added.
2417 2417 """
2418 2418
2419 2419 if self._writinghandles:
2420 2420 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2421 2421
2422 2422 r = len(self)
2423 2423 end = 0
2424 2424 if r:
2425 2425 end = self.end(r - 1)
2426 2426 ifh = self._indexfp(b"a+")
2427 2427 isize = r * self.index.entry_size
2428 2428 if self._inline:
2429 2429 transaction.add(self._indexfile, end + isize)
2430 2430 dfh = None
2431 2431 else:
2432 2432 transaction.add(self._indexfile, isize)
2433 2433 transaction.add(self._datafile, end)
2434 2434 dfh = self._datafp(b"a+")
2435 2435
2436 2436 def flush():
2437 2437 if dfh:
2438 2438 dfh.flush()
2439 2439 ifh.flush()
2440 2440
2441 2441 self._writinghandles = (ifh, dfh)
2442 2442 empty = True
2443 2443
2444 2444 try:
2445 2445 deltacomputer = deltautil.deltacomputer(self)
2446 2446 # loop through our set of deltas
2447 2447 for data in deltas:
2448 2448 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2449 2449 link = linkmapper(linknode)
2450 2450 flags = flags or REVIDX_DEFAULT_FLAGS
2451 2451
2452 2452 rev = self.index.get_rev(node)
2453 2453 if rev is not None:
2454 2454 # this can happen if two branches make the same change
2455 2455 self._nodeduplicatecallback(transaction, rev)
2456 2456 if duplicaterevisioncb:
2457 2457 duplicaterevisioncb(self, rev)
2458 2458 empty = False
2459 2459 continue
2460 2460
2461 2461 for p in (p1, p2):
2462 2462 if not self.index.has_node(p):
2463 2463 raise error.LookupError(
2464 2464 p, self.radix, _(b'unknown parent')
2465 2465 )
2466 2466
2467 2467 if not self.index.has_node(deltabase):
2468 2468 raise error.LookupError(
2469 2469 deltabase, self.display_id, _(b'unknown delta base')
2470 2470 )
2471 2471
2472 2472 baserev = self.rev(deltabase)
2473 2473
2474 2474 if baserev != nullrev and self.iscensored(baserev):
2475 2475 # if base is censored, delta must be full replacement in a
2476 2476 # single patch operation
2477 2477 hlen = struct.calcsize(b">lll")
2478 2478 oldlen = self.rawsize(baserev)
2479 2479 newlen = len(delta) - hlen
2480 2480 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2481 2481 raise error.CensoredBaseError(
2482 2482 self._indexfile, self.node(baserev)
2483 2483 )
2484 2484
2485 2485 if not flags and self._peek_iscensored(baserev, delta, flush):
2486 2486 flags |= REVIDX_ISCENSORED
2487 2487
2488 2488 # We assume consumers of addrevisioncb will want to retrieve
2489 2489 # the added revision, which will require a call to
2490 2490 # revision(). revision() will fast path if there is a cache
2491 2491 # hit. So, we tell _addrevision() to always cache in this case.
2492 2492 # We're only using addgroup() in the context of changegroup
2493 2493 # generation so the revision data can always be handled as raw
2494 2494 # by the flagprocessor.
2495 2495 rev = self._addrevision(
2496 2496 node,
2497 2497 None,
2498 2498 transaction,
2499 2499 link,
2500 2500 p1,
2501 2501 p2,
2502 2502 flags,
2503 2503 (baserev, delta),
2504 2504 ifh,
2505 2505 dfh,
2506 2506 alwayscache=alwayscache,
2507 2507 deltacomputer=deltacomputer,
2508 2508 sidedata=sidedata,
2509 2509 )
2510 2510
2511 2511 if addrevisioncb:
2512 2512 addrevisioncb(self, rev)
2513 2513 empty = False
2514 2514
2515 2515 if not dfh and not self._inline:
2516 2516 # addrevision switched from inline to conventional
2517 2517 # reopen the index
2518 2518 ifh.close()
2519 2519 dfh = self._datafp(b"a+")
2520 2520 ifh = self._indexfp(b"a+")
2521 2521 self._writinghandles = (ifh, dfh)
2522 2522 finally:
2523 2523 self._writinghandles = None
2524 2524
2525 2525 if dfh:
2526 2526 dfh.close()
2527 2527 ifh.close()
2528 2528 return not empty
2529 2529
2530 2530 def iscensored(self, rev):
2531 2531 """Check if a file revision is censored."""
2532 2532 if not self._censorable:
2533 2533 return False
2534 2534
2535 2535 return self.flags(rev) & REVIDX_ISCENSORED
2536 2536
2537 2537 def _peek_iscensored(self, baserev, delta, flush):
2538 2538 """Quickly check if a delta produces a censored revision."""
2539 2539 if not self._censorable:
2540 2540 return False
2541 2541
2542 2542 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2543 2543
2544 2544 def getstrippoint(self, minlink):
2545 2545 """find the minimum rev that must be stripped to strip the linkrev
2546 2546
2547 2547 Returns a tuple containing the minimum rev and a set of all revs that
2548 2548 have linkrevs that will be broken by this strip.
2549 2549 """
2550 2550 return storageutil.resolvestripinfo(
2551 2551 minlink,
2552 2552 len(self) - 1,
2553 2553 self.headrevs(),
2554 2554 self.linkrev,
2555 2555 self.parentrevs,
2556 2556 )
2557 2557
2558 2558 def strip(self, minlink, transaction):
2559 2559 """truncate the revlog on the first revision with a linkrev >= minlink
2560 2560
2561 2561 This function is called when we're stripping revision minlink and
2562 2562 its descendants from the repository.
2563 2563
2564 2564 We have to remove all revisions with linkrev >= minlink, because
2565 2565 the equivalent changelog revisions will be renumbered after the
2566 2566 strip.
2567 2567
2568 2568 So we truncate the revlog on the first of these revisions, and
2569 2569 trust that the caller has saved the revisions that shouldn't be
2570 2570 removed and that it'll re-add them after this truncation.
2571 2571 """
2572 2572 if len(self) == 0:
2573 2573 return
2574 2574
2575 2575 rev, _ = self.getstrippoint(minlink)
2576 2576 if rev == len(self):
2577 2577 return
2578 2578
2579 2579 # first truncate the files on disk
2580 2580 end = self.start(rev)
2581 2581 if not self._inline:
2582 2582 transaction.add(self._datafile, end)
2583 2583 end = rev * self.index.entry_size
2584 2584 else:
2585 2585 end += rev * self.index.entry_size
2586 2586
2587 2587 transaction.add(self._indexfile, end)
2588 2588
2589 2589 # then reset internal state in memory to forget those revisions
2590 2590 self._revisioncache = None
2591 2591 self._chaininfocache = util.lrucachedict(500)
2592 2592 self._chunkclear()
2593 2593
2594 2594 del self.index[rev:-1]
2595 2595
2596 2596 def checksize(self):
2597 2597 """Check size of index and data files
2598 2598
2599 2599 return a (dd, di) tuple.
2600 2600 - dd: extra bytes for the "data" file
2601 2601 - di: extra bytes for the "index" file
2602 2602
2603 2603 A healthy revlog will return (0, 0).
2604 2604 """
2605 2605 expected = 0
2606 2606 if len(self):
2607 2607 expected = max(0, self.end(len(self) - 1))
2608 2608
2609 2609 try:
2610 2610 with self._datafp() as f:
2611 2611 f.seek(0, io.SEEK_END)
2612 2612 actual = f.tell()
2613 2613 dd = actual - expected
2614 2614 except IOError as inst:
2615 2615 if inst.errno != errno.ENOENT:
2616 2616 raise
2617 2617 dd = 0
2618 2618
2619 2619 try:
2620 2620 f = self.opener(self._indexfile)
2621 2621 f.seek(0, io.SEEK_END)
2622 2622 actual = f.tell()
2623 2623 f.close()
2624 2624 s = self.index.entry_size
2625 2625 i = max(0, actual // s)
2626 2626 di = actual - (i * s)
2627 2627 if self._inline:
2628 2628 databytes = 0
2629 2629 for r in self:
2630 2630 databytes += max(0, self.length(r))
2631 2631 dd = 0
2632 2632 di = actual - len(self) * s - databytes
2633 2633 except IOError as inst:
2634 2634 if inst.errno != errno.ENOENT:
2635 2635 raise
2636 2636 di = 0
2637 2637
2638 2638 return (dd, di)
2639 2639
2640 2640 def files(self):
2641 2641 res = [self._indexfile]
2642 2642 if not self._inline:
2643 2643 res.append(self._datafile)
2644 2644 return res
2645 2645
2646 2646 def emitrevisions(
2647 2647 self,
2648 2648 nodes,
2649 2649 nodesorder=None,
2650 2650 revisiondata=False,
2651 2651 assumehaveparentrevisions=False,
2652 2652 deltamode=repository.CG_DELTAMODE_STD,
2653 2653 sidedata_helpers=None,
2654 2654 ):
2655 2655 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2656 2656 raise error.ProgrammingError(
2657 2657 b'unhandled value for nodesorder: %s' % nodesorder
2658 2658 )
2659 2659
2660 2660 if nodesorder is None and not self._generaldelta:
2661 2661 nodesorder = b'storage'
2662 2662
2663 2663 if (
2664 2664 not self._storedeltachains
2665 2665 and deltamode != repository.CG_DELTAMODE_PREV
2666 2666 ):
2667 2667 deltamode = repository.CG_DELTAMODE_FULL
2668 2668
2669 2669 return storageutil.emitrevisions(
2670 2670 self,
2671 2671 nodes,
2672 2672 nodesorder,
2673 2673 revlogrevisiondelta,
2674 2674 deltaparentfn=self.deltaparent,
2675 2675 candeltafn=self.candelta,
2676 2676 rawsizefn=self.rawsize,
2677 2677 revdifffn=self.revdiff,
2678 2678 flagsfn=self.flags,
2679 2679 deltamode=deltamode,
2680 2680 revisiondata=revisiondata,
2681 2681 assumehaveparentrevisions=assumehaveparentrevisions,
2682 2682 sidedata_helpers=sidedata_helpers,
2683 2683 )
2684 2684
2685 2685 DELTAREUSEALWAYS = b'always'
2686 2686 DELTAREUSESAMEREVS = b'samerevs'
2687 2687 DELTAREUSENEVER = b'never'
2688 2688
2689 2689 DELTAREUSEFULLADD = b'fulladd'
2690 2690
2691 2691 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2692 2692
2693 2693 def clone(
2694 2694 self,
2695 2695 tr,
2696 2696 destrevlog,
2697 2697 addrevisioncb=None,
2698 2698 deltareuse=DELTAREUSESAMEREVS,
2699 2699 forcedeltabothparents=None,
2700 2700 sidedata_helpers=None,
2701 2701 ):
2702 2702 """Copy this revlog to another, possibly with format changes.
2703 2703
2704 2704 The destination revlog will contain the same revisions and nodes.
2705 2705 However, it may not be bit-for-bit identical due to e.g. delta encoding
2706 2706 differences.
2707 2707
2708 2708 The ``deltareuse`` argument control how deltas from the existing revlog
2709 2709 are preserved in the destination revlog. The argument can have the
2710 2710 following values:
2711 2711
2712 2712 DELTAREUSEALWAYS
2713 2713 Deltas will always be reused (if possible), even if the destination
2714 2714 revlog would not select the same revisions for the delta. This is the
2715 2715 fastest mode of operation.
2716 2716 DELTAREUSESAMEREVS
2717 2717 Deltas will be reused if the destination revlog would pick the same
2718 2718 revisions for the delta. This mode strikes a balance between speed
2719 2719 and optimization.
2720 2720 DELTAREUSENEVER
2721 2721 Deltas will never be reused. This is the slowest mode of execution.
2722 2722 This mode can be used to recompute deltas (e.g. if the diff/delta
2723 2723 algorithm changes).
2724 2724 DELTAREUSEFULLADD
2725 2725 Revision will be re-added as if their were new content. This is
2726 2726 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2727 2727 eg: large file detection and handling.
2728 2728
2729 2729 Delta computation can be slow, so the choice of delta reuse policy can
2730 2730 significantly affect run time.
2731 2731
2732 2732 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2733 2733 two extremes. Deltas will be reused if they are appropriate. But if the
2734 2734 delta could choose a better revision, it will do so. This means if you
2735 2735 are converting a non-generaldelta revlog to a generaldelta revlog,
2736 2736 deltas will be recomputed if the delta's parent isn't a parent of the
2737 2737 revision.
2738 2738
2739 2739 In addition to the delta policy, the ``forcedeltabothparents``
2740 2740 argument controls whether to force compute deltas against both parents
2741 2741 for merges. By default, the current default is used.
2742 2742
2743 2743 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2744 2744 `sidedata_helpers`.
2745 2745 """
2746 2746 if deltareuse not in self.DELTAREUSEALL:
2747 2747 raise ValueError(
2748 2748 _(b'value for deltareuse invalid: %s') % deltareuse
2749 2749 )
2750 2750
2751 2751 if len(destrevlog):
2752 2752 raise ValueError(_(b'destination revlog is not empty'))
2753 2753
2754 2754 if getattr(self, 'filteredrevs', None):
2755 2755 raise ValueError(_(b'source revlog has filtered revisions'))
2756 2756 if getattr(destrevlog, 'filteredrevs', None):
2757 2757 raise ValueError(_(b'destination revlog has filtered revisions'))
2758 2758
2759 2759 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2760 2760 # if possible.
2761 2761 oldlazydelta = destrevlog._lazydelta
2762 2762 oldlazydeltabase = destrevlog._lazydeltabase
2763 2763 oldamd = destrevlog._deltabothparents
2764 2764
2765 2765 try:
2766 2766 if deltareuse == self.DELTAREUSEALWAYS:
2767 2767 destrevlog._lazydeltabase = True
2768 2768 destrevlog._lazydelta = True
2769 2769 elif deltareuse == self.DELTAREUSESAMEREVS:
2770 2770 destrevlog._lazydeltabase = False
2771 2771 destrevlog._lazydelta = True
2772 2772 elif deltareuse == self.DELTAREUSENEVER:
2773 2773 destrevlog._lazydeltabase = False
2774 2774 destrevlog._lazydelta = False
2775 2775
2776 2776 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2777 2777
2778 2778 self._clone(
2779 2779 tr,
2780 2780 destrevlog,
2781 2781 addrevisioncb,
2782 2782 deltareuse,
2783 2783 forcedeltabothparents,
2784 2784 sidedata_helpers,
2785 2785 )
2786 2786
2787 2787 finally:
2788 2788 destrevlog._lazydelta = oldlazydelta
2789 2789 destrevlog._lazydeltabase = oldlazydeltabase
2790 2790 destrevlog._deltabothparents = oldamd
2791 2791
2792 2792 def _clone(
2793 2793 self,
2794 2794 tr,
2795 2795 destrevlog,
2796 2796 addrevisioncb,
2797 2797 deltareuse,
2798 2798 forcedeltabothparents,
2799 2799 sidedata_helpers,
2800 2800 ):
2801 2801 """perform the core duty of `revlog.clone` after parameter processing"""
2802 2802 deltacomputer = deltautil.deltacomputer(destrevlog)
2803 2803 index = self.index
2804 2804 for rev in self:
2805 2805 entry = index[rev]
2806 2806
2807 2807 # Some classes override linkrev to take filtered revs into
2808 2808 # account. Use raw entry from index.
2809 2809 flags = entry[0] & 0xFFFF
2810 2810 linkrev = entry[4]
2811 2811 p1 = index[entry[5]][7]
2812 2812 p2 = index[entry[6]][7]
2813 2813 node = entry[7]
2814 2814
2815 2815 # (Possibly) reuse the delta from the revlog if allowed and
2816 2816 # the revlog chunk is a delta.
2817 2817 cachedelta = None
2818 2818 rawtext = None
2819 2819 if deltareuse == self.DELTAREUSEFULLADD:
2820 2820 text, sidedata = self._revisiondata(rev)
2821 2821
2822 2822 if sidedata_helpers is not None:
2823 2823 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2824 2824 self, sidedata_helpers, sidedata, rev
2825 2825 )
2826 2826 flags = flags | new_flags[0] & ~new_flags[1]
2827 2827
2828 2828 destrevlog.addrevision(
2829 2829 text,
2830 2830 tr,
2831 2831 linkrev,
2832 2832 p1,
2833 2833 p2,
2834 2834 cachedelta=cachedelta,
2835 2835 node=node,
2836 2836 flags=flags,
2837 2837 deltacomputer=deltacomputer,
2838 2838 sidedata=sidedata,
2839 2839 )
2840 2840 else:
2841 2841 if destrevlog._lazydelta:
2842 2842 dp = self.deltaparent(rev)
2843 2843 if dp != nullrev:
2844 2844 cachedelta = (dp, bytes(self._chunk(rev)))
2845 2845
2846 2846 sidedata = None
2847 2847 if not cachedelta:
2848 2848 rawtext, sidedata = self._revisiondata(rev)
2849 2849 if sidedata is None:
2850 2850 sidedata = self.sidedata(rev)
2851 2851
2852 2852 if sidedata_helpers is not None:
2853 2853 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2854 2854 self, sidedata_helpers, sidedata, rev
2855 2855 )
2856 2856 flags = flags | new_flags[0] & ~new_flags[1]
2857 2857
2858 2858 ifh = destrevlog.opener(
2859 2859 destrevlog._indexfile, b'a+', checkambig=False
2860 2860 )
2861 2861 dfh = None
2862 2862 if not destrevlog._inline:
2863 2863 dfh = destrevlog.opener(destrevlog._datafile, b'a+')
2864 2864 try:
2865 2865 destrevlog._addrevision(
2866 2866 node,
2867 2867 rawtext,
2868 2868 tr,
2869 2869 linkrev,
2870 2870 p1,
2871 2871 p2,
2872 2872 flags,
2873 2873 cachedelta,
2874 2874 ifh,
2875 2875 dfh,
2876 2876 deltacomputer=deltacomputer,
2877 2877 sidedata=sidedata,
2878 2878 )
2879 2879 finally:
2880 2880 if dfh:
2881 2881 dfh.close()
2882 2882 ifh.close()
2883 2883
2884 2884 if addrevisioncb:
2885 2885 addrevisioncb(self, rev, node)
2886 2886
2887 2887 def censorrevision(self, tr, censornode, tombstone=b''):
2888 2888 if self._format_version == REVLOGV0:
2889 2889 raise error.RevlogError(
2890 2890 _(b'cannot censor with version %d revlogs')
2891 2891 % self._format_version
2892 2892 )
2893 2893
2894 2894 censorrev = self.rev(censornode)
2895 2895 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2896 2896
2897 2897 if len(tombstone) > self.rawsize(censorrev):
2898 2898 raise error.Abort(
2899 2899 _(b'censor tombstone must be no longer than censored data')
2900 2900 )
2901 2901
2902 2902 # Rewriting the revlog in place is hard. Our strategy for censoring is
2903 2903 # to create a new revlog, copy all revisions to it, then replace the
2904 2904 # revlogs on transaction close.
2905 2905 #
2906 2906 # This is a bit dangerous. We could easily have a mismatch of state.
2907 2907 newrl = revlog(
2908 2908 self.opener,
2909 2909 target=self.target,
2910 2910 radix=self.radix,
2911 2911 postfix=b'tmpcensored',
2912 2912 censorable=True,
2913 2913 )
2914 2914 newrl._format_version = self._format_version
2915 2915 newrl._format_flags = self._format_flags
2916 2916 newrl._generaldelta = self._generaldelta
2917 2917 newrl._parse_index = self._parse_index
2918 2918
2919 2919 for rev in self.revs():
2920 2920 node = self.node(rev)
2921 2921 p1, p2 = self.parents(node)
2922 2922
2923 2923 if rev == censorrev:
2924 2924 newrl.addrawrevision(
2925 2925 tombstone,
2926 2926 tr,
2927 2927 self.linkrev(censorrev),
2928 2928 p1,
2929 2929 p2,
2930 2930 censornode,
2931 2931 REVIDX_ISCENSORED,
2932 2932 )
2933 2933
2934 2934 if newrl.deltaparent(rev) != nullrev:
2935 2935 raise error.Abort(
2936 2936 _(
2937 2937 b'censored revision stored as delta; '
2938 2938 b'cannot censor'
2939 2939 ),
2940 2940 hint=_(
2941 2941 b'censoring of revlogs is not '
2942 2942 b'fully implemented; please report '
2943 2943 b'this bug'
2944 2944 ),
2945 2945 )
2946 2946 continue
2947 2947
2948 2948 if self.iscensored(rev):
2949 2949 if self.deltaparent(rev) != nullrev:
2950 2950 raise error.Abort(
2951 2951 _(
2952 2952 b'cannot censor due to censored '
2953 2953 b'revision having delta stored'
2954 2954 )
2955 2955 )
2956 2956 rawtext = self._chunk(rev)
2957 2957 else:
2958 2958 rawtext = self.rawdata(rev)
2959 2959
2960 2960 newrl.addrawrevision(
2961 2961 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2962 2962 )
2963 2963
2964 2964 tr.addbackup(self._indexfile, location=b'store')
2965 2965 if not self._inline:
2966 2966 tr.addbackup(self._datafile, location=b'store')
2967 2967
2968 2968 self.opener.rename(newrl._indexfile, self._indexfile)
2969 2969 if not self._inline:
2970 2970 self.opener.rename(newrl._datafile, self._datafile)
2971 2971
2972 2972 self.clearcaches()
2973 2973 self._loadindex()
2974 2974
2975 2975 def verifyintegrity(self, state):
2976 2976 """Verifies the integrity of the revlog.
2977 2977
2978 2978 Yields ``revlogproblem`` instances describing problems that are
2979 2979 found.
2980 2980 """
2981 2981 dd, di = self.checksize()
2982 2982 if dd:
2983 2983 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2984 2984 if di:
2985 2985 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2986 2986
2987 2987 version = self._format_version
2988 2988
2989 2989 # The verifier tells us what version revlog we should be.
2990 2990 if version != state[b'expectedversion']:
2991 2991 yield revlogproblem(
2992 2992 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2993 2993 % (self.display_id, version, state[b'expectedversion'])
2994 2994 )
2995 2995
2996 2996 state[b'skipread'] = set()
2997 2997 state[b'safe_renamed'] = set()
2998 2998
2999 2999 for rev in self:
3000 3000 node = self.node(rev)
3001 3001
3002 3002 # Verify contents. 4 cases to care about:
3003 3003 #
3004 3004 # common: the most common case
3005 3005 # rename: with a rename
3006 3006 # meta: file content starts with b'\1\n', the metadata
3007 3007 # header defined in filelog.py, but without a rename
3008 3008 # ext: content stored externally
3009 3009 #
3010 3010 # More formally, their differences are shown below:
3011 3011 #
3012 3012 # | common | rename | meta | ext
3013 3013 # -------------------------------------------------------
3014 3014 # flags() | 0 | 0 | 0 | not 0
3015 3015 # renamed() | False | True | False | ?
3016 3016 # rawtext[0:2]=='\1\n'| False | True | True | ?
3017 3017 #
3018 3018 # "rawtext" means the raw text stored in revlog data, which
3019 3019 # could be retrieved by "rawdata(rev)". "text"
3020 3020 # mentioned below is "revision(rev)".
3021 3021 #
3022 3022 # There are 3 different lengths stored physically:
3023 3023 # 1. L1: rawsize, stored in revlog index
3024 3024 # 2. L2: len(rawtext), stored in revlog data
3025 3025 # 3. L3: len(text), stored in revlog data if flags==0, or
3026 3026 # possibly somewhere else if flags!=0
3027 3027 #
3028 3028 # L1 should be equal to L2. L3 could be different from them.
3029 3029 # "text" may or may not affect commit hash depending on flag
3030 3030 # processors (see flagutil.addflagprocessor).
3031 3031 #
3032 3032 # | common | rename | meta | ext
3033 3033 # -------------------------------------------------
3034 3034 # rawsize() | L1 | L1 | L1 | L1
3035 3035 # size() | L1 | L2-LM | L1(*) | L1 (?)
3036 3036 # len(rawtext) | L2 | L2 | L2 | L2
3037 3037 # len(text) | L2 | L2 | L2 | L3
3038 3038 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3039 3039 #
3040 3040 # LM: length of metadata, depending on rawtext
3041 3041 # (*): not ideal, see comment in filelog.size
3042 3042 # (?): could be "- len(meta)" if the resolved content has
3043 3043 # rename metadata
3044 3044 #
3045 3045 # Checks needed to be done:
3046 3046 # 1. length check: L1 == L2, in all cases.
3047 3047 # 2. hash check: depending on flag processor, we may need to
3048 3048 # use either "text" (external), or "rawtext" (in revlog).
3049 3049
3050 3050 try:
3051 3051 skipflags = state.get(b'skipflags', 0)
3052 3052 if skipflags:
3053 3053 skipflags &= self.flags(rev)
3054 3054
3055 3055 _verify_revision(self, skipflags, state, node)
3056 3056
3057 3057 l1 = self.rawsize(rev)
3058 3058 l2 = len(self.rawdata(node))
3059 3059
3060 3060 if l1 != l2:
3061 3061 yield revlogproblem(
3062 3062 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3063 3063 node=node,
3064 3064 )
3065 3065
3066 3066 except error.CensoredNodeError:
3067 3067 if state[b'erroroncensored']:
3068 3068 yield revlogproblem(
3069 3069 error=_(b'censored file data'), node=node
3070 3070 )
3071 3071 state[b'skipread'].add(node)
3072 3072 except Exception as e:
3073 3073 yield revlogproblem(
3074 3074 error=_(b'unpacking %s: %s')
3075 3075 % (short(node), stringutil.forcebytestr(e)),
3076 3076 node=node,
3077 3077 )
3078 3078 state[b'skipread'].add(node)
3079 3079
3080 3080 def storageinfo(
3081 3081 self,
3082 3082 exclusivefiles=False,
3083 3083 sharedfiles=False,
3084 3084 revisionscount=False,
3085 3085 trackedsize=False,
3086 3086 storedsize=False,
3087 3087 ):
3088 3088 d = {}
3089 3089
3090 3090 if exclusivefiles:
3091 3091 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3092 3092 if not self._inline:
3093 3093 d[b'exclusivefiles'].append((self.opener, self._datafile))
3094 3094
3095 3095 if sharedfiles:
3096 3096 d[b'sharedfiles'] = []
3097 3097
3098 3098 if revisionscount:
3099 3099 d[b'revisionscount'] = len(self)
3100 3100
3101 3101 if trackedsize:
3102 3102 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3103 3103
3104 3104 if storedsize:
3105 3105 d[b'storedsize'] = sum(
3106 3106 self.opener.stat(path).st_size for path in self.files()
3107 3107 )
3108 3108
3109 3109 return d
3110 3110
3111 3111 def rewrite_sidedata(self, helpers, startrev, endrev):
3112 3112 if not self.hassidedata:
3113 3113 return
3114 3114 # inline are not yet supported because they suffer from an issue when
3115 3115 # rewriting them (since it's not an append-only operation).
3116 3116 # See issue6485.
3117 3117 assert not self._inline
3118 3118 if not helpers[1] and not helpers[2]:
3119 3119 # Nothing to generate or remove
3120 3120 return
3121 3121
3122 3122 # changelog implement some "delayed" writing mechanism that assume that
3123 3123 # all index data is writen in append mode and is therefor incompatible
3124 3124 # with the seeked write done in this method. The use of such "delayed"
3125 3125 # writing will soon be removed for revlog version that support side
3126 3126 # data, so for now, we only keep this simple assert to highlight the
3127 3127 # situation.
3128 3128 delayed = getattr(self, '_delayed', False)
3129 3129 diverted = getattr(self, '_divert', False)
3130 3130 if delayed and not diverted:
3131 3131 msg = "cannot rewrite_sidedata of a delayed revlog"
3132 3132 raise error.ProgrammingError(msg)
3133 3133
3134 3134 new_entries = []
3135 3135 # append the new sidedata
3136 3136 with self._datafp(b'a+') as fp:
3137 3137 # Maybe this bug still exists, see revlog._writeentry
3138 3138 fp.seek(0, os.SEEK_END)
3139 3139 current_offset = fp.tell()
3140 3140 for rev in range(startrev, endrev + 1):
3141 3141 entry = self.index[rev]
3142 3142 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3143 3143 store=self,
3144 3144 sidedata_helpers=helpers,
3145 3145 sidedata={},
3146 3146 rev=rev,
3147 3147 )
3148 3148
3149 3149 serialized_sidedata = sidedatautil.serialize_sidedata(
3150 3150 new_sidedata
3151 3151 )
3152 3152 if entry[8] != 0 or entry[9] != 0:
3153 3153 # rewriting entries that already have sidedata is not
3154 3154 # supported yet, because it introduces garbage data in the
3155 3155 # revlog.
3156 3156 msg = b"Rewriting existing sidedata is not supported yet"
3157 3157 raise error.Abort(msg)
3158 3158
3159 3159 # Apply (potential) flags to add and to remove after running
3160 3160 # the sidedata helpers
3161 3161 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3162 3162 entry = (new_offset_flags,) + entry[1:8]
3163 3163 entry += (current_offset, len(serialized_sidedata))
3164 3164
3165 3165 fp.write(serialized_sidedata)
3166 3166 new_entries.append(entry)
3167 3167 current_offset += len(serialized_sidedata)
3168 3168
3169 3169 # rewrite the new index entries
3170 3170 with self._indexfp(b'r+') as fp:
3171 3171 fp.seek(startrev * self.index.entry_size)
3172 3172 for i, e in enumerate(new_entries):
3173 3173 rev = startrev + i
3174 3174 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3175 3175 packed = self.index.entry_binary(rev)
3176 3176 if rev == 0:
3177 3177 header = self._format_flags | self._format_version
3178 3178 header = self.index.pack_header(header)
3179 3179 packed = header + packed
3180 3180 fp.write(packed)
@@ -1,1238 +1,1238 b''
1 1 #require no-reposimplestore no-chg
2 2
3 3 $ hg init requirements
4 4 $ cd requirements
5 5
6 6 # LFS not loaded by default.
7 7
8 8 $ hg config extensions
9 9 [1]
10 10
11 11 # Adding lfs to requires file will auto-load lfs extension.
12 12
13 13 $ echo lfs >> .hg/requires
14 14 $ hg config extensions
15 15 extensions.lfs=
16 16
17 17 # But only if there is no config entry for the extension already.
18 18
19 19 $ cat > .hg/hgrc << EOF
20 20 > [extensions]
21 21 > lfs=!
22 22 > EOF
23 23
24 24 $ hg config extensions
25 25 abort: repository requires features unknown to this Mercurial: lfs
26 26 (see https://mercurial-scm.org/wiki/MissingRequirement for more information)
27 27 [255]
28 28
29 29 $ cat > .hg/hgrc << EOF
30 30 > [extensions]
31 31 > lfs=
32 32 > EOF
33 33
34 34 $ hg config extensions
35 35 extensions.lfs=
36 36
37 37 $ cat > .hg/hgrc << EOF
38 38 > [extensions]
39 39 > lfs = missing.py
40 40 > EOF
41 41
42 42 $ hg config extensions
43 43 \*\*\* failed to import extension lfs from missing.py: [Errno *] $ENOENT$: 'missing.py' (glob)
44 44 abort: repository requires features unknown to this Mercurial: lfs
45 45 (see https://mercurial-scm.org/wiki/MissingRequirement for more information)
46 46 [255]
47 47
48 48 $ cd ..
49 49
50 50 # Initial setup
51 51
52 52 $ cat >> $HGRCPATH << EOF
53 53 > [extensions]
54 54 > lfs=
55 55 > [lfs]
56 56 > # Test deprecated config
57 57 > threshold=1000B
58 58 > EOF
59 59
60 60 $ LONG=AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
61 61
62 62 # Prepare server and enable extension
63 63 $ hg init server
64 64 $ hg clone -q server client
65 65 $ cd client
66 66
67 67 # Commit small file
68 68 $ echo s > smallfile
69 69 $ echo '**.py = LF' > .hgeol
70 70 $ hg --config lfs.track='"size(\">1000B\")"' commit -Aqm "add small file"
71 71 hg: parse error: unsupported file pattern: size(">1000B")
72 72 (paths must be prefixed with "path:")
73 73 [10]
74 74 $ hg --config lfs.track='size(">1000B")' commit -Aqm "add small file"
75 75
76 76 # Commit large file
77 77 $ echo $LONG > largefile
78 78 $ grep lfs .hg/requires
79 79 [1]
80 80 $ hg commit --traceback -Aqm "add large file"
81 81 $ grep lfs .hg/requires
82 82 lfs
83 83
84 84 # Ensure metadata is stored
85 85 $ hg debugdata largefile 0
86 86 version https://git-lfs.github.com/spec/v1
87 87 oid sha256:f11e77c257047a398492d8d6cb9f6acf3aa7c4384bb23080b43546053e183e4b
88 88 size 1501
89 89 x-is-binary 0
90 90
91 91 # Check the blobstore is populated
92 92 $ find .hg/store/lfs/objects | sort
93 93 .hg/store/lfs/objects
94 94 .hg/store/lfs/objects/f1
95 95 .hg/store/lfs/objects/f1/1e77c257047a398492d8d6cb9f6acf3aa7c4384bb23080b43546053e183e4b
96 96
97 97 # Check the blob stored contains the actual contents of the file
98 98 $ cat .hg/store/lfs/objects/f1/1e77c257047a398492d8d6cb9f6acf3aa7c4384bb23080b43546053e183e4b
99 99 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
100 100
101 101 # Push changes to the server
102 102
103 103 $ hg push
104 104 pushing to $TESTTMP/server
105 105 searching for changes
106 106 abort: lfs.url needs to be configured
107 107 [255]
108 108
109 109 $ cat >> $HGRCPATH << EOF
110 110 > [lfs]
111 111 > url=file:$TESTTMP/dummy-remote/
112 112 > EOF
113 113
114 114 Push to a local non-lfs repo with the extension enabled will add the
115 115 lfs requirement
116 116
117 117 $ grep lfs $TESTTMP/server/.hg/requires
118 118 [1]
119 119 $ hg push -v | egrep -v '^(uncompressed| )'
120 120 pushing to $TESTTMP/server
121 121 searching for changes
122 122 lfs: found f11e77c257047a398492d8d6cb9f6acf3aa7c4384bb23080b43546053e183e4b in the local lfs store
123 123 2 changesets found
124 124 adding changesets
125 125 adding manifests
126 126 adding file changes
127 127 calling hook pretxnchangegroup.lfs: hgext.lfs.checkrequireslfs
128 128 added 2 changesets with 3 changes to 3 files
129 129 $ grep lfs $TESTTMP/server/.hg/requires
130 130 lfs
131 131
132 132 # Unknown URL scheme
133 133
134 134 $ hg push --config lfs.url=ftp://foobar
135 135 abort: lfs: unknown url scheme: ftp
136 136 [255]
137 137
138 138 $ cd ../
139 139
140 140 # Initialize new client (not cloning) and setup extension
141 141 $ hg init client2
142 142 $ cd client2
143 143 $ cat >> .hg/hgrc <<EOF
144 144 > [paths]
145 145 > default = $TESTTMP/server
146 146 > EOF
147 147
148 148 # Pull from server
149 149
150 150 Pulling a local lfs repo into a local non-lfs repo with the extension
151 151 enabled adds the lfs requirement
152 152
153 153 $ grep lfs .hg/requires $TESTTMP/server/.hg/requires
154 154 $TESTTMP/server/.hg/requires:lfs
155 155 $ hg pull default
156 156 pulling from $TESTTMP/server
157 157 requesting all changes
158 158 adding changesets
159 159 adding manifests
160 160 adding file changes
161 161 added 2 changesets with 3 changes to 3 files
162 162 new changesets 0ead593177f7:b88141481348
163 163 (run 'hg update' to get a working copy)
164 164 $ grep lfs .hg/requires $TESTTMP/server/.hg/requires
165 165 .hg/requires:lfs
166 166 $TESTTMP/server/.hg/requires:lfs
167 167
168 168 # Check the blobstore is not yet populated
169 169 $ [ -d .hg/store/lfs/objects ]
170 170 [1]
171 171
172 172 # Update to the last revision containing the large file
173 173 $ hg update
174 174 3 files updated, 0 files merged, 0 files removed, 0 files unresolved
175 175
176 176 # Check the blobstore has been populated on update
177 177 $ find .hg/store/lfs/objects | sort
178 178 .hg/store/lfs/objects
179 179 .hg/store/lfs/objects/f1
180 180 .hg/store/lfs/objects/f1/1e77c257047a398492d8d6cb9f6acf3aa7c4384bb23080b43546053e183e4b
181 181
182 182 # Check the contents of the file are fetched from blobstore when requested
183 183 $ hg cat -r . largefile
184 184 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
185 185
186 186 # Check the file has been copied in the working copy
187 187 $ cat largefile
188 188 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
189 189
190 190 $ cd ..
191 191
192 192 # Check rename, and switch between large and small files
193 193
194 194 $ hg init repo3
195 195 $ cd repo3
196 196 $ cat >> .hg/hgrc << EOF
197 197 > [lfs]
198 198 > track=size(">10B")
199 199 > EOF
200 200
201 201 $ echo LONGER-THAN-TEN-BYTES-WILL-TRIGGER-LFS > large
202 202 $ echo SHORTER > small
203 203 $ hg add . -q
204 204 $ hg commit -m 'commit with lfs content'
205 205
206 206 $ hg files -r . 'set:added()'
207 207 large
208 208 small
209 209 $ hg files -r . 'set:added() & lfs()'
210 210 large
211 211
212 212 $ hg mv large l
213 213 $ hg mv small s
214 214 $ hg status 'set:removed()'
215 215 R large
216 216 R small
217 217 $ hg status 'set:removed() & lfs()'
218 218 R large
219 219 $ hg commit -m 'renames'
220 220
221 221 $ hg cat -r . l -T '{rawdata}\n'
222 222 version https://git-lfs.github.com/spec/v1
223 223 oid sha256:66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e
224 224 size 39
225 225 x-hg-copy large
226 226 x-hg-copyrev 2c531e0992ff3107c511b53cb82a91b6436de8b2
227 227 x-is-binary 0
228 228
229 229
230 230 $ hg files -r . 'set:copied()'
231 231 l
232 232 s
233 233 $ hg files -r . 'set:copied() & lfs()'
234 234 l
235 235 $ hg status --change . 'set:removed()'
236 236 R large
237 237 R small
238 238 $ hg status --change . 'set:removed() & lfs()'
239 239 R large
240 240
241 241 $ echo SHORT > l
242 242 $ echo BECOME-LARGER-FROM-SHORTER > s
243 243 $ hg commit -m 'large to small, small to large'
244 244
245 245 $ echo 1 >> l
246 246 $ echo 2 >> s
247 247 $ hg commit -m 'random modifications'
248 248
249 249 $ echo RESTORE-TO-BE-LARGE > l
250 250 $ echo SHORTER > s
251 251 $ hg commit -m 'switch large and small again'
252 252
253 253 # Test lfs_files template
254 254
255 255 $ hg log -r 'all()' -T '{rev} {join(lfs_files, ", ")}\n'
256 256 0 large
257 257 1 l, large
258 258 2 s
259 259 3 s
260 260 4 l
261 261
262 262 # Push and pull the above repo
263 263
264 264 $ hg --cwd .. init repo4
265 265 $ hg push ../repo4
266 266 pushing to ../repo4
267 267 searching for changes
268 268 adding changesets
269 269 adding manifests
270 270 adding file changes
271 271 added 5 changesets with 10 changes to 4 files
272 272
273 273 $ hg --cwd .. init repo5
274 274 $ hg --cwd ../repo5 pull ../repo3
275 275 pulling from ../repo3
276 276 requesting all changes
277 277 adding changesets
278 278 adding manifests
279 279 adding file changes
280 280 added 5 changesets with 10 changes to 4 files
281 281 new changesets fd47a419c4f7:5adf850972b9
282 282 (run 'hg update' to get a working copy)
283 283
284 284 $ cd ..
285 285
286 286 # Test clone
287 287
288 288 $ hg init repo6
289 289 $ cd repo6
290 290 $ cat >> .hg/hgrc << EOF
291 291 > [lfs]
292 292 > track=size(">30B")
293 293 > EOF
294 294
295 295 $ echo LARGE-BECAUSE-IT-IS-MORE-THAN-30-BYTES > large
296 296 $ echo SMALL > small
297 297 $ hg commit -Aqm 'create a lfs file' large small
298 298 $ hg debuglfsupload -r 'all()' -v
299 299 lfs: found 8e92251415339ae9b148c8da89ed5ec665905166a1ab11b09dca8fad83344738 in the local lfs store
300 300
301 301 $ cd ..
302 302
303 303 $ hg clone repo6 repo7
304 304 updating to branch default
305 305 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
306 306 $ cd repo7
307 307 $ cat large
308 308 LARGE-BECAUSE-IT-IS-MORE-THAN-30-BYTES
309 309 $ cat small
310 310 SMALL
311 311
312 312 $ cd ..
313 313
314 314 $ hg --config extensions.share= share repo7 sharedrepo
315 315 updating working directory
316 316 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
317 317 $ grep lfs sharedrepo/.hg/requires
318 318 lfs
319 319
320 320 # Test rename and status
321 321
322 322 $ hg init repo8
323 323 $ cd repo8
324 324 $ cat >> .hg/hgrc << EOF
325 325 > [lfs]
326 326 > track=size(">10B")
327 327 > EOF
328 328
329 329 $ echo THIS-IS-LFS-BECAUSE-10-BYTES > a1
330 330 $ echo SMALL > a2
331 331 $ hg commit -m a -A a1 a2
332 332 $ hg status
333 333 $ hg mv a1 b1
334 334 $ hg mv a2 a1
335 335 $ hg mv b1 a2
336 336 $ hg commit -m b
337 337 $ hg status
338 338 >>> with open('a2', 'wb') as f:
339 339 ... f.write(b'\1\nSTART-WITH-HG-FILELOG-METADATA') and None
340 340 >>> with open('a1', 'wb') as f:
341 341 ... f.write(b'\1\nMETA\n') and None
342 342 $ hg commit -m meta
343 343 $ hg status
344 344 $ hg log -T '{rev}: {file_copies} | {file_dels} | {file_adds}\n'
345 345 2: | |
346 346 1: a1 (a2)a2 (a1) | |
347 347 0: | | a1 a2
348 348
349 349 $ for n in a1 a2; do
350 350 > for r in 0 1 2; do
351 351 > printf '\n%s @ %s\n' $n $r
352 352 > hg debugdata $n $r
353 353 > done
354 354 > done
355 355
356 356 a1 @ 0
357 357 version https://git-lfs.github.com/spec/v1
358 358 oid sha256:5bb8341bee63b3649f222b2215bde37322bea075a30575aa685d8f8d21c77024
359 359 size 29
360 360 x-is-binary 0
361 361
362 362 a1 @ 1
363 363 \x01 (esc)
364 364 copy: a2
365 365 copyrev: 50470ad23cf937b1f4b9f80bfe54df38e65b50d9
366 366 \x01 (esc)
367 367 SMALL
368 368
369 369 a1 @ 2
370 370 \x01 (esc)
371 371 \x01 (esc)
372 372 \x01 (esc)
373 373 META
374 374
375 375 a2 @ 0
376 376 SMALL
377 377
378 378 a2 @ 1
379 379 version https://git-lfs.github.com/spec/v1
380 380 oid sha256:5bb8341bee63b3649f222b2215bde37322bea075a30575aa685d8f8d21c77024
381 381 size 29
382 382 x-hg-copy a1
383 383 x-hg-copyrev be23af27908a582af43e5cda209a5a9b319de8d4
384 384 x-is-binary 0
385 385
386 386 a2 @ 2
387 387 version https://git-lfs.github.com/spec/v1
388 388 oid sha256:876dadc86a8542f9798048f2c47f51dbf8e4359aed883e8ec80c5db825f0d943
389 389 size 32
390 390 x-is-binary 0
391 391
392 392 # Verify commit hashes include rename metadata
393 393
394 394 $ hg log -T '{rev}:{node|short} {desc}\n'
395 395 2:0fae949de7fa meta
396 396 1:9cd6bdffdac0 b
397 397 0:7f96794915f7 a
398 398
399 399 $ cd ..
400 400
401 401 # Test bundle
402 402
403 403 $ hg init repo9
404 404 $ cd repo9
405 405 $ cat >> .hg/hgrc << EOF
406 406 > [lfs]
407 407 > track=size(">10B")
408 408 > [diff]
409 409 > git=1
410 410 > EOF
411 411
412 412 $ for i in 0 single two three 4; do
413 413 > echo 'THIS-IS-LFS-'$i > a
414 414 > hg commit -m a-$i -A a
415 415 > done
416 416
417 417 $ hg update 2 -q
418 418 $ echo 'THIS-IS-LFS-2-CHILD' > a
419 419 $ hg commit -m branching -q
420 420
421 421 $ hg bundle --base 1 bundle.hg -v
422 422 lfs: found 5ab7a3739a5feec94a562d070a14f36dba7cad17e5484a4a89eea8e5f3166888 in the local lfs store
423 423 lfs: found a9c7d1cd6ce2b9bbdf46ed9a862845228717b921c089d0d42e3bcaed29eb612e in the local lfs store
424 424 lfs: found f693890c49c409ec33673b71e53f297681f76c1166daf33b2ad7ebf8b1d3237e in the local lfs store
425 425 lfs: found fda198fea753eb66a252e9856915e1f5cddbe41723bd4b695ece2604ad3c9f75 in the local lfs store
426 426 4 changesets found
427 427 uncompressed size of bundle content:
428 428 * (changelog) (glob)
429 429 * (manifests) (glob)
430 430 * a (glob)
431 431 $ hg --config extensions.strip= strip -r 2 --no-backup --force -q
432 432 $ hg -R bundle.hg log -p -T '{rev} {desc}\n' a
433 433 5 branching
434 434 diff --git a/a b/a
435 435 --- a/a
436 436 +++ b/a
437 437 @@ -1,1 +1,1 @@
438 438 -THIS-IS-LFS-two
439 439 +THIS-IS-LFS-2-CHILD
440 440
441 441 4 a-4
442 442 diff --git a/a b/a
443 443 --- a/a
444 444 +++ b/a
445 445 @@ -1,1 +1,1 @@
446 446 -THIS-IS-LFS-three
447 447 +THIS-IS-LFS-4
448 448
449 449 3 a-three
450 450 diff --git a/a b/a
451 451 --- a/a
452 452 +++ b/a
453 453 @@ -1,1 +1,1 @@
454 454 -THIS-IS-LFS-two
455 455 +THIS-IS-LFS-three
456 456
457 457 2 a-two
458 458 diff --git a/a b/a
459 459 --- a/a
460 460 +++ b/a
461 461 @@ -1,1 +1,1 @@
462 462 -THIS-IS-LFS-single
463 463 +THIS-IS-LFS-two
464 464
465 465 1 a-single
466 466 diff --git a/a b/a
467 467 --- a/a
468 468 +++ b/a
469 469 @@ -1,1 +1,1 @@
470 470 -THIS-IS-LFS-0
471 471 +THIS-IS-LFS-single
472 472
473 473 0 a-0
474 474 diff --git a/a b/a
475 475 new file mode 100644
476 476 --- /dev/null
477 477 +++ b/a
478 478 @@ -0,0 +1,1 @@
479 479 +THIS-IS-LFS-0
480 480
481 481 $ hg bundle -R bundle.hg --base 1 bundle-again.hg -q
482 482 $ hg -R bundle-again.hg log -p -T '{rev} {desc}\n' a
483 483 5 branching
484 484 diff --git a/a b/a
485 485 --- a/a
486 486 +++ b/a
487 487 @@ -1,1 +1,1 @@
488 488 -THIS-IS-LFS-two
489 489 +THIS-IS-LFS-2-CHILD
490 490
491 491 4 a-4
492 492 diff --git a/a b/a
493 493 --- a/a
494 494 +++ b/a
495 495 @@ -1,1 +1,1 @@
496 496 -THIS-IS-LFS-three
497 497 +THIS-IS-LFS-4
498 498
499 499 3 a-three
500 500 diff --git a/a b/a
501 501 --- a/a
502 502 +++ b/a
503 503 @@ -1,1 +1,1 @@
504 504 -THIS-IS-LFS-two
505 505 +THIS-IS-LFS-three
506 506
507 507 2 a-two
508 508 diff --git a/a b/a
509 509 --- a/a
510 510 +++ b/a
511 511 @@ -1,1 +1,1 @@
512 512 -THIS-IS-LFS-single
513 513 +THIS-IS-LFS-two
514 514
515 515 1 a-single
516 516 diff --git a/a b/a
517 517 --- a/a
518 518 +++ b/a
519 519 @@ -1,1 +1,1 @@
520 520 -THIS-IS-LFS-0
521 521 +THIS-IS-LFS-single
522 522
523 523 0 a-0
524 524 diff --git a/a b/a
525 525 new file mode 100644
526 526 --- /dev/null
527 527 +++ b/a
528 528 @@ -0,0 +1,1 @@
529 529 +THIS-IS-LFS-0
530 530
531 531 $ cd ..
532 532
533 533 # Test isbinary
534 534
535 535 $ hg init repo10
536 536 $ cd repo10
537 537 $ cat >> .hg/hgrc << EOF
538 538 > [extensions]
539 539 > lfs=
540 540 > [lfs]
541 541 > track=all()
542 542 > EOF
543 543 $ "$PYTHON" <<'EOF'
544 544 > def write(path, content):
545 545 > with open(path, 'wb') as f:
546 546 > f.write(content)
547 547 > write('a', b'\0\0')
548 548 > write('b', b'\1\n')
549 549 > write('c', b'\1\n\0')
550 550 > write('d', b'xx')
551 551 > EOF
552 552 $ hg add a b c d
553 553 $ hg diff --stat
554 554 a | Bin
555 555 b | 1 +
556 556 c | Bin
557 557 d | 1 +
558 558 4 files changed, 2 insertions(+), 0 deletions(-)
559 559 $ hg commit -m binarytest
560 560 $ cat > $TESTTMP/dumpbinary.py << EOF
561 561 > from mercurial.utils import (
562 562 > stringutil,
563 563 > )
564 564 > def reposetup(ui, repo):
565 565 > for n in (b'a', b'b', b'c', b'd'):
566 566 > ui.write((b'%s: binary=%s\n')
567 567 > % (n, stringutil.pprint(repo[b'.'][n].isbinary())))
568 568 > EOF
569 569 $ hg --config extensions.dumpbinary=$TESTTMP/dumpbinary.py id --trace
570 570 a: binary=True
571 571 b: binary=False
572 572 c: binary=True
573 573 d: binary=False
574 574 b55353847f02 tip
575 575
576 576 Binary blobs don't need to be present to be skipped in filesets. (And their
577 577 absence doesn't cause an abort.)
578 578
579 579 $ rm .hg/store/lfs/objects/96/a296d224f285c67bee93c30f8a309157f0daa35dc5b87e410b78630a09cfc7
580 580 $ rm .hg/store/lfs/objects/92/f76135a4baf4faccb8586a60faf830c2bdfce147cefa188aaf4b790bd01b7e
581 581
582 582 $ hg files --debug -r . 'set:eol("unix")' --config 'experimental.lfs.disableusercache=True'
583 583 lfs: found c04b5bb1a5b2eb3e9cd4805420dba5a9d133da5b7adeeafb5474c4adae9faa80 in the local lfs store
584 584 2 b
585 585 lfs: found 5dde896887f6754c9b15bfe3a441ae4806df2fde94001311e08bf110622e0bbe in the local lfs store
586 586
587 587 $ hg files --debug -r . 'set:binary()' --config 'experimental.lfs.disableusercache=True'
588 588 2 a
589 589 3 c
590 590
591 591 $ cd ..
592 592
593 593 # Test fctx.cmp fastpath - diff without LFS blobs
594 594
595 595 $ hg init repo12
596 596 $ cd repo12
597 597 $ cat >> .hg/hgrc <<EOF
598 598 > [lfs]
599 599 > threshold=1
600 600 > EOF
601 601 $ cat > ../patch.diff <<EOF
602 602 > # HG changeset patch
603 603 > 2
604 604 >
605 605 > diff --git a/a b/a
606 606 > old mode 100644
607 607 > new mode 100755
608 608 > EOF
609 609
610 610 $ for i in 1 2 3; do
611 611 > cp ../repo10/a a
612 612 > if [ $i = 3 ]; then
613 613 > # make a content-only change
614 614 > hg import -q --bypass ../patch.diff
615 615 > hg update -q
616 616 > rm ../patch.diff
617 617 > else
618 618 > echo $i >> a
619 619 > hg commit -m $i -A a
620 620 > fi
621 621 > done
622 622 $ [ -d .hg/store/lfs/objects ]
623 623
624 624 $ cd ..
625 625
626 626 $ hg clone repo12 repo13 --noupdate
627 627 $ cd repo13
628 628 $ hg log --removed -p a -T '{desc}\n' --config diff.nobinary=1 --git
629 629 2
630 630 diff --git a/a b/a
631 631 old mode 100644
632 632 new mode 100755
633 633
634 634 2
635 635 diff --git a/a b/a
636 636 Binary file a has changed
637 637
638 638 1
639 639 diff --git a/a b/a
640 640 new file mode 100644
641 641 Binary file a has changed
642 642
643 643 $ [ -d .hg/store/lfs/objects ]
644 644 [1]
645 645
646 646 $ cd ..
647 647
648 648 # Test filter
649 649
650 650 $ hg init repo11
651 651 $ cd repo11
652 652 $ cat >> .hg/hgrc << EOF
653 653 > [lfs]
654 654 > track=(**.a & size(">5B")) | (**.b & !size(">5B"))
655 655 > | (**.c & "path:d" & !"path:d/c.c") | size(">10B")
656 656 > EOF
657 657
658 658 $ mkdir a
659 659 $ echo aaaaaa > a/1.a
660 660 $ echo a > a/2.a
661 661 $ echo aaaaaa > 1.b
662 662 $ echo a > 2.b
663 663 $ echo a > 1.c
664 664 $ mkdir d
665 665 $ echo a > d/c.c
666 666 $ echo a > d/d.c
667 667 $ echo aaaaaaaaaaaa > x
668 668 $ hg add . -q
669 669 $ hg commit -m files
670 670
671 671 $ for p in a/1.a a/2.a 1.b 2.b 1.c d/c.c d/d.c x; do
672 672 > if hg debugdata $p 0 2>&1 | grep git-lfs >/dev/null; then
673 673 > echo "${p}: is lfs"
674 674 > else
675 675 > echo "${p}: not lfs"
676 676 > fi
677 677 > done
678 678 a/1.a: is lfs
679 679 a/2.a: not lfs
680 680 1.b: not lfs
681 681 2.b: is lfs
682 682 1.c: not lfs
683 683 d/c.c: not lfs
684 684 d/d.c: is lfs
685 685 x: is lfs
686 686
687 687 $ cd ..
688 688
689 689 # Verify the repos
690 690
691 691 $ cat > $TESTTMP/dumpflog.py << EOF
692 692 > # print raw revision sizes, flags, and hashes for certain files
693 693 > import hashlib
694 694 > from mercurial.node import short
695 695 > from mercurial import (
696 696 > pycompat,
697 697 > revlog,
698 698 > )
699 699 > from mercurial.utils import (
700 700 > procutil,
701 701 > stringutil,
702 702 > )
703 703 > def hash(rawtext):
704 704 > h = hashlib.sha512()
705 705 > h.update(rawtext)
706 706 > return pycompat.sysbytes(h.hexdigest()[:4])
707 707 > def reposetup(ui, repo):
708 708 > # these 2 files are interesting
709 709 > for name in [b'l', b's']:
710 710 > fl = repo.file(name)
711 711 > if len(fl) == 0:
712 712 > continue
713 713 > sizes = [fl._revlog.rawsize(i) for i in fl]
714 714 > texts = [fl.rawdata(i) for i in fl]
715 715 > flags = [int(fl._revlog.flags(i)) for i in fl]
716 716 > hashes = [hash(t) for t in texts]
717 717 > procutil.stdout.write(b' %s: rawsizes=%r flags=%r hashes=%s\n'
718 718 > % (name, sizes, flags, stringutil.pprint(hashes)))
719 719 > EOF
720 720
721 721 $ for i in client client2 server repo3 repo4 repo5 repo6 repo7 repo8 repo9 \
722 722 > repo10; do
723 723 > echo 'repo:' $i
724 724 > hg --cwd $i verify --config extensions.dumpflog=$TESTTMP/dumpflog.py -q
725 725 > done
726 726 repo: client
727 727 repo: client2
728 728 repo: server
729 729 repo: repo3
730 730 l: rawsizes=[211, 6, 8, 141] flags=[8192, 0, 0, 8192] hashes=['d2b8', '948c', 'cc88', '724d']
731 731 s: rawsizes=[74, 141, 141, 8] flags=[0, 8192, 8192, 0] hashes=['3c80', 'fce0', '874a', '826b']
732 732 repo: repo4
733 733 l: rawsizes=[211, 6, 8, 141] flags=[8192, 0, 0, 8192] hashes=['d2b8', '948c', 'cc88', '724d']
734 734 s: rawsizes=[74, 141, 141, 8] flags=[0, 8192, 8192, 0] hashes=['3c80', 'fce0', '874a', '826b']
735 735 repo: repo5
736 736 l: rawsizes=[211, 6, 8, 141] flags=[8192, 0, 0, 8192] hashes=['d2b8', '948c', 'cc88', '724d']
737 737 s: rawsizes=[74, 141, 141, 8] flags=[0, 8192, 8192, 0] hashes=['3c80', 'fce0', '874a', '826b']
738 738 repo: repo6
739 739 repo: repo7
740 740 repo: repo8
741 741 repo: repo9
742 742 repo: repo10
743 743
744 744 repo13 doesn't have any cached lfs files and its source never pushed its
745 745 files. Therefore, the files don't exist in the remote store. Use the files in
746 746 the user cache.
747 747
748 748 $ test -d $TESTTMP/repo13/.hg/store/lfs/objects
749 749 [1]
750 750
751 751 $ hg --config extensions.share= share repo13 repo14
752 752 updating working directory
753 753 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
754 754 $ hg -R repo14 -q verify
755 755
756 756 $ hg clone repo13 repo15
757 757 updating to branch default
758 758 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
759 759 $ hg -R repo15 -q verify
760 760
761 761 If the source repo doesn't have the blob (maybe it was pulled or cloned with
762 762 --noupdate), the blob is still accessible via the global cache to send to the
763 763 remote store.
764 764
765 765 $ rm -rf $TESTTMP/repo15/.hg/store/lfs
766 766 $ hg init repo16
767 767 $ hg -R repo15 push repo16
768 768 pushing to repo16
769 769 searching for changes
770 770 adding changesets
771 771 adding manifests
772 772 adding file changes
773 773 added 3 changesets with 2 changes to 1 files
774 774 $ hg -R repo15 -q verify
775 775
776 776 Test damaged file scenarios. (This also damages the usercache because of the
777 777 hardlinks.)
778 778
779 779 $ echo 'damage' >> repo5/.hg/store/lfs/objects/66/100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e
780 780
781 781 Repo with damaged lfs objects in any revision will fail verification.
782 782
783 783 $ hg -R repo5 verify
784 784 checking changesets
785 785 checking manifests
786 786 crosschecking files in changesets and manifests
787 787 checking files
788 l@1: unpacking 46a2f24864bc: integrity check failed on data/l.i:0
789 large@0: unpacking 2c531e0992ff: integrity check failed on data/large.i:0
788 l@1: unpacking 46a2f24864bc: integrity check failed on data/l:0
789 large@0: unpacking 2c531e0992ff: integrity check failed on data/large:0
790 790 checked 5 changesets with 10 changes to 4 files
791 791 2 integrity errors encountered!
792 792 (first damaged changeset appears to be 0)
793 793 [1]
794 794
795 795 Updates work after cloning a damaged repo, if the damaged lfs objects aren't in
796 796 the update destination. Those objects won't be added to the new repo's store
797 797 because they aren't accessed.
798 798
799 799 $ hg clone -v repo5 fromcorrupt
800 800 updating to branch default
801 801 resolving manifests
802 802 getting l
803 803 lfs: found 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b in the usercache
804 804 getting s
805 805 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
806 806 $ test -f fromcorrupt/.hg/store/lfs/objects/66/100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e
807 807 [1]
808 808
809 809 Verify will not try to download lfs blobs, if told not to process lfs content.
810 810 The extension makes sure that the filelog.renamed() path is taken on a missing
811 811 blob, and the output shows that it isn't fetched.
812 812
813 813 $ cat > $TESTTMP/lfsrename.py <<EOF
814 814 > import sys
815 815 >
816 816 > from mercurial import (
817 817 > exthelper,
818 818 > pycompat,
819 819 > )
820 820 >
821 821 > from hgext.lfs import (
822 822 > pointer,
823 823 > wrapper,
824 824 > )
825 825 >
826 826 > eh = exthelper.exthelper()
827 827 > uisetup = eh.finaluisetup
828 828 >
829 829 > @eh.wrapfunction(wrapper, b'filelogrenamed')
830 830 > def filelogrenamed(orig, orig1, self, node):
831 831 > ret = orig(orig1, self, node)
832 832 > if wrapper._islfs(self._revlog, node) and ret:
833 833 > rawtext = self._revlog.rawdata(node)
834 834 > metadata = pointer.deserialize(rawtext)
835 835 > print('lfs blob %s renamed %s -> %s'
836 836 > % (pycompat.sysstr(metadata[b'oid']),
837 837 > pycompat.sysstr(ret[0]),
838 838 > pycompat.fsdecode(self._revlog.filename)))
839 839 > sys.stdout.flush()
840 840 > return ret
841 841 > EOF
842 842
843 843 $ hg -R fromcorrupt --config lfs.usercache=emptycache verify -v --no-lfs \
844 844 > --config extensions.x=$TESTTMP/lfsrename.py
845 845 repository uses revlog format 1
846 846 checking changesets
847 847 checking manifests
848 848 crosschecking files in changesets and manifests
849 849 checking files
850 850 lfs: found 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b in the local lfs store
851 851 lfs blob sha256:66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e renamed large -> l
852 852 checked 5 changesets with 10 changes to 4 files
853 853
854 854 Verify will not try to download lfs blobs, if told not to by the config option
855 855
856 856 $ hg -R fromcorrupt --config lfs.usercache=emptycache verify -v \
857 857 > --config verify.skipflags=8192 \
858 858 > --config extensions.x=$TESTTMP/lfsrename.py
859 859 repository uses revlog format 1
860 860 checking changesets
861 861 checking manifests
862 862 crosschecking files in changesets and manifests
863 863 checking files
864 864 lfs: found 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b in the local lfs store
865 865 lfs blob sha256:66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e renamed large -> l
866 866 checked 5 changesets with 10 changes to 4 files
867 867
868 868 Verify will copy/link all lfs objects into the local store that aren't already
869 869 present. Bypass the corrupted usercache to show that verify works when fed by
870 870 the (uncorrupted) remote store.
871 871
872 872 $ hg -R fromcorrupt --config lfs.usercache=emptycache verify -v
873 873 repository uses revlog format 1
874 874 checking changesets
875 875 checking manifests
876 876 crosschecking files in changesets and manifests
877 877 checking files
878 878 lfs: adding 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e to the usercache
879 879 lfs: found 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e in the local lfs store
880 880 lfs: found 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b in the local lfs store
881 881 lfs: found 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e in the local lfs store
882 882 lfs: adding 89b6070915a3d573ff3599d1cda305bc5e38549b15c4847ab034169da66e1ca8 to the usercache
883 883 lfs: found 89b6070915a3d573ff3599d1cda305bc5e38549b15c4847ab034169da66e1ca8 in the local lfs store
884 884 lfs: adding b1a6ea88da0017a0e77db139a54618986e9a2489bee24af9fe596de9daac498c to the usercache
885 885 lfs: found b1a6ea88da0017a0e77db139a54618986e9a2489bee24af9fe596de9daac498c in the local lfs store
886 886 checked 5 changesets with 10 changes to 4 files
887 887
888 888 Verify will not copy/link a corrupted file from the usercache into the local
889 889 store, and poison it. (The verify with a good remote now works.)
890 890
891 891 $ rm -r fromcorrupt/.hg/store/lfs/objects/66/100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e
892 892 $ hg -R fromcorrupt verify -v
893 893 repository uses revlog format 1
894 894 checking changesets
895 895 checking manifests
896 896 crosschecking files in changesets and manifests
897 897 checking files
898 l@1: unpacking 46a2f24864bc: integrity check failed on data/l.i:0
898 l@1: unpacking 46a2f24864bc: integrity check failed on data/l:0
899 899 lfs: found 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b in the local lfs store
900 large@0: unpacking 2c531e0992ff: integrity check failed on data/large.i:0
900 large@0: unpacking 2c531e0992ff: integrity check failed on data/large:0
901 901 lfs: found 89b6070915a3d573ff3599d1cda305bc5e38549b15c4847ab034169da66e1ca8 in the local lfs store
902 902 lfs: found b1a6ea88da0017a0e77db139a54618986e9a2489bee24af9fe596de9daac498c in the local lfs store
903 903 checked 5 changesets with 10 changes to 4 files
904 904 2 integrity errors encountered!
905 905 (first damaged changeset appears to be 0)
906 906 [1]
907 907 $ hg -R fromcorrupt --config lfs.usercache=emptycache verify -v
908 908 repository uses revlog format 1
909 909 checking changesets
910 910 checking manifests
911 911 crosschecking files in changesets and manifests
912 912 checking files
913 913 lfs: found 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e in the usercache
914 914 lfs: found 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b in the local lfs store
915 915 lfs: found 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e in the local lfs store
916 916 lfs: found 89b6070915a3d573ff3599d1cda305bc5e38549b15c4847ab034169da66e1ca8 in the local lfs store
917 917 lfs: found b1a6ea88da0017a0e77db139a54618986e9a2489bee24af9fe596de9daac498c in the local lfs store
918 918 checked 5 changesets with 10 changes to 4 files
919 919
920 920 Damaging a file required by the update destination fails the update.
921 921
922 922 $ echo 'damage' >> $TESTTMP/dummy-remote/22/f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b
923 923 $ hg --config lfs.usercache=emptycache clone -v repo5 fromcorrupt2
924 924 updating to branch default
925 925 resolving manifests
926 926 abort: corrupt remote lfs object: 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b
927 927 [255]
928 928
929 929 A corrupted lfs blob is not transferred from a file://remotestore to the
930 930 usercache or local store.
931 931
932 932 $ test -f emptycache/22/f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b
933 933 [1]
934 934 $ test -f fromcorrupt2/.hg/store/lfs/objects/22/f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b
935 935 [1]
936 936
937 937 $ hg -R fromcorrupt2 verify
938 938 checking changesets
939 939 checking manifests
940 940 crosschecking files in changesets and manifests
941 941 checking files
942 l@1: unpacking 46a2f24864bc: integrity check failed on data/l.i:0
943 large@0: unpacking 2c531e0992ff: integrity check failed on data/large.i:0
942 l@1: unpacking 46a2f24864bc: integrity check failed on data/l:0
943 large@0: unpacking 2c531e0992ff: integrity check failed on data/large:0
944 944 checked 5 changesets with 10 changes to 4 files
945 945 2 integrity errors encountered!
946 946 (first damaged changeset appears to be 0)
947 947 [1]
948 948
949 949 Corrupt local files are not sent upstream. (The alternate dummy remote
950 950 avoids the corrupt lfs object in the original remote.)
951 951
952 952 $ mkdir $TESTTMP/dummy-remote2
953 953 $ hg init dest
954 954 $ hg -R fromcorrupt2 --config lfs.url=file:///$TESTTMP/dummy-remote2 push -v dest
955 955 pushing to dest
956 956 searching for changes
957 957 lfs: found 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b in the local lfs store
958 958 abort: detected corrupt lfs object: 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e
959 959 (run hg verify)
960 960 [255]
961 961
962 962 $ hg -R fromcorrupt2 --config lfs.url=file:///$TESTTMP/dummy-remote2 verify -v
963 963 repository uses revlog format 1
964 964 checking changesets
965 965 checking manifests
966 966 crosschecking files in changesets and manifests
967 967 checking files
968 l@1: unpacking 46a2f24864bc: integrity check failed on data/l.i:0
968 l@1: unpacking 46a2f24864bc: integrity check failed on data/l:0
969 969 lfs: found 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b in the local lfs store
970 large@0: unpacking 2c531e0992ff: integrity check failed on data/large.i:0
970 large@0: unpacking 2c531e0992ff: integrity check failed on data/large:0
971 971 lfs: found 89b6070915a3d573ff3599d1cda305bc5e38549b15c4847ab034169da66e1ca8 in the local lfs store
972 972 lfs: found b1a6ea88da0017a0e77db139a54618986e9a2489bee24af9fe596de9daac498c in the local lfs store
973 973 checked 5 changesets with 10 changes to 4 files
974 974 2 integrity errors encountered!
975 975 (first damaged changeset appears to be 0)
976 976 [1]
977 977
978 978 $ cat $TESTTMP/dummy-remote2/22/f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b | $TESTDIR/f --sha256
979 979 sha256=22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b
980 980 $ cat fromcorrupt2/.hg/store/lfs/objects/22/f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b | $TESTDIR/f --sha256
981 981 sha256=22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b
982 982 $ test -f $TESTTMP/dummy-remote2/66/100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e
983 983 [1]
984 984
985 985 Accessing a corrupt file will complain
986 986
987 987 $ hg --cwd fromcorrupt2 cat -r 0 large
988 abort: integrity check failed on data/large.i:0
988 abort: integrity check failed on data/large:0
989 989 [50]
990 990
991 991 lfs -> normal -> lfs round trip conversions are possible. The 'none()'
992 992 predicate on the command line will override whatever is configured globally and
993 993 locally, and ensures everything converts to a regular file. For lfs -> normal,
994 994 there's no 'lfs' destination repo requirement. For normal -> lfs, there is.
995 995
996 996 $ hg --config extensions.convert= --config 'lfs.track=none()' \
997 997 > convert repo8 convert_normal
998 998 initializing destination convert_normal repository
999 999 scanning source...
1000 1000 sorting...
1001 1001 converting...
1002 1002 2 a
1003 1003 1 b
1004 1004 0 meta
1005 1005 $ grep 'lfs' convert_normal/.hg/requires
1006 1006 [1]
1007 1007 $ hg --cwd convert_normal cat a1 -r 0 -T '{rawdata}'
1008 1008 THIS-IS-LFS-BECAUSE-10-BYTES
1009 1009
1010 1010 $ hg --config extensions.convert= --config lfs.threshold=10B \
1011 1011 > convert convert_normal convert_lfs
1012 1012 initializing destination convert_lfs repository
1013 1013 scanning source...
1014 1014 sorting...
1015 1015 converting...
1016 1016 2 a
1017 1017 1 b
1018 1018 0 meta
1019 1019
1020 1020 $ hg --cwd convert_lfs cat -r 0 a1 -T '{rawdata}'
1021 1021 version https://git-lfs.github.com/spec/v1
1022 1022 oid sha256:5bb8341bee63b3649f222b2215bde37322bea075a30575aa685d8f8d21c77024
1023 1023 size 29
1024 1024 x-is-binary 0
1025 1025 $ hg --cwd convert_lfs debugdata a1 0
1026 1026 version https://git-lfs.github.com/spec/v1
1027 1027 oid sha256:5bb8341bee63b3649f222b2215bde37322bea075a30575aa685d8f8d21c77024
1028 1028 size 29
1029 1029 x-is-binary 0
1030 1030 $ hg --cwd convert_lfs log -r 0 -T "{lfs_files % '{lfspointer % '{key}={value}\n'}'}"
1031 1031 version=https://git-lfs.github.com/spec/v1
1032 1032 oid=sha256:5bb8341bee63b3649f222b2215bde37322bea075a30575aa685d8f8d21c77024
1033 1033 size=29
1034 1034 x-is-binary=0
1035 1035 $ hg --cwd convert_lfs log -r 0 \
1036 1036 > -T '{lfs_files % "{get(lfspointer, "oid")}\n"}{lfs_files % "{lfspointer.oid}\n"}'
1037 1037 sha256:5bb8341bee63b3649f222b2215bde37322bea075a30575aa685d8f8d21c77024
1038 1038 sha256:5bb8341bee63b3649f222b2215bde37322bea075a30575aa685d8f8d21c77024
1039 1039 $ hg --cwd convert_lfs log -r 0 -T '{lfs_files % "{lfspointer}\n"}'
1040 1040 version=https://git-lfs.github.com/spec/v1 oid=sha256:5bb8341bee63b3649f222b2215bde37322bea075a30575aa685d8f8d21c77024 size=29 x-is-binary=0
1041 1041 $ hg --cwd convert_lfs \
1042 1042 > log -r 'all()' -T '{rev}: {lfs_files % "{file}: {lfsoid}\n"}'
1043 1043 0: a1: 5bb8341bee63b3649f222b2215bde37322bea075a30575aa685d8f8d21c77024
1044 1044 1: a2: 5bb8341bee63b3649f222b2215bde37322bea075a30575aa685d8f8d21c77024
1045 1045 2: a2: 876dadc86a8542f9798048f2c47f51dbf8e4359aed883e8ec80c5db825f0d943
1046 1046
1047 1047 $ grep 'lfs' convert_lfs/.hg/requires
1048 1048 lfs
1049 1049
1050 1050 The hashes in all stages of the conversion are unchanged.
1051 1051
1052 1052 $ hg -R repo8 log -T '{node|short}\n'
1053 1053 0fae949de7fa
1054 1054 9cd6bdffdac0
1055 1055 7f96794915f7
1056 1056 $ hg -R convert_normal log -T '{node|short}\n'
1057 1057 0fae949de7fa
1058 1058 9cd6bdffdac0
1059 1059 7f96794915f7
1060 1060 $ hg -R convert_lfs log -T '{node|short}\n'
1061 1061 0fae949de7fa
1062 1062 9cd6bdffdac0
1063 1063 7f96794915f7
1064 1064
1065 1065 This convert is trickier, because it contains deleted files (via `hg mv`)
1066 1066
1067 1067 $ hg --config extensions.convert= --config lfs.threshold=1000M \
1068 1068 > convert repo3 convert_normal2
1069 1069 initializing destination convert_normal2 repository
1070 1070 scanning source...
1071 1071 sorting...
1072 1072 converting...
1073 1073 4 commit with lfs content
1074 1074 3 renames
1075 1075 2 large to small, small to large
1076 1076 1 random modifications
1077 1077 0 switch large and small again
1078 1078 $ grep 'lfs' convert_normal2/.hg/requires
1079 1079 [1]
1080 1080 $ hg --cwd convert_normal2 debugdata large 0
1081 1081 LONGER-THAN-TEN-BYTES-WILL-TRIGGER-LFS
1082 1082
1083 1083 $ hg --config extensions.convert= --config lfs.threshold=10B \
1084 1084 > convert convert_normal2 convert_lfs2
1085 1085 initializing destination convert_lfs2 repository
1086 1086 scanning source...
1087 1087 sorting...
1088 1088 converting...
1089 1089 4 commit with lfs content
1090 1090 3 renames
1091 1091 2 large to small, small to large
1092 1092 1 random modifications
1093 1093 0 switch large and small again
1094 1094 $ grep 'lfs' convert_lfs2/.hg/requires
1095 1095 lfs
1096 1096 $ hg --cwd convert_lfs2 debugdata large 0
1097 1097 version https://git-lfs.github.com/spec/v1
1098 1098 oid sha256:66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e
1099 1099 size 39
1100 1100 x-is-binary 0
1101 1101
1102 1102 Committing deleted files works:
1103 1103
1104 1104 $ hg init $TESTTMP/repo-del
1105 1105 $ cd $TESTTMP/repo-del
1106 1106 $ echo 1 > A
1107 1107 $ hg commit -m 'add A' -A A
1108 1108 $ hg rm A
1109 1109 $ hg commit -m 'rm A'
1110 1110
1111 1111 Bad .hglfs files will block the commit with a useful message
1112 1112
1113 1113 $ cat > .hglfs << EOF
1114 1114 > [track]
1115 1115 > **.test = size(">5B")
1116 1116 > bad file ... no commit
1117 1117 > EOF
1118 1118
1119 1119 $ echo x > file.txt
1120 1120 $ hg ci -Aqm 'should fail'
1121 1121 config error at .hglfs:3: bad file ... no commit
1122 1122 [30]
1123 1123
1124 1124 $ cat > .hglfs << EOF
1125 1125 > [track]
1126 1126 > **.test = size(">5B")
1127 1127 > ** = nonexistent()
1128 1128 > EOF
1129 1129
1130 1130 $ hg ci -Aqm 'should fail'
1131 1131 abort: parse error in .hglfs: unknown identifier: nonexistent
1132 1132 [255]
1133 1133
1134 1134 '**' works out to mean all files.
1135 1135
1136 1136 $ cat > .hglfs << EOF
1137 1137 > [track]
1138 1138 > path:.hglfs = none()
1139 1139 > **.test = size(">5B")
1140 1140 > **.exclude = none()
1141 1141 > ** = size(">10B")
1142 1142 > EOF
1143 1143
1144 1144 The LFS policy takes effect without tracking the .hglfs file
1145 1145
1146 1146 $ echo 'largefile' > lfs.test
1147 1147 $ echo '012345678901234567890' > nolfs.exclude
1148 1148 $ echo '01234567890123456' > lfs.catchall
1149 1149 $ hg add *
1150 1150 $ hg ci -qm 'before add .hglfs'
1151 1151 $ hg log -r . -T '{rev}: {lfs_files % "{file}: {lfsoid}\n"}\n'
1152 1152 2: lfs.catchall: d4ec46c2869ba22eceb42a729377432052d9dd75d82fc40390ebaadecee87ee9
1153 1153 lfs.test: 5489e6ced8c36a7b267292bde9fd5242a5f80a7482e8f23fa0477393dfaa4d6c
1154 1154
1155 1155 The .hglfs file works when tracked
1156 1156
1157 1157 $ echo 'largefile2' > lfs.test
1158 1158 $ echo '012345678901234567890a' > nolfs.exclude
1159 1159 $ echo '01234567890123456a' > lfs.catchall
1160 1160 $ hg ci -Aqm 'after adding .hglfs'
1161 1161 $ hg log -r . -T '{rev}: {lfs_files % "{file}: {lfsoid}\n"}\n'
1162 1162 3: lfs.catchall: 31f43b9c62b540126b0ad5884dc013d21a61c9329b77de1fceeae2fc58511573
1163 1163 lfs.test: 8acd23467967bc7b8cc5a280056589b0ba0b17ff21dbd88a7b6474d6290378a6
1164 1164
1165 1165 The LFS policy stops when the .hglfs is gone
1166 1166
1167 1167 $ mv .hglfs .hglfs_
1168 1168 $ echo 'largefile3' > lfs.test
1169 1169 $ echo '012345678901234567890abc' > nolfs.exclude
1170 1170 $ echo '01234567890123456abc' > lfs.catchall
1171 1171 $ hg ci -qm 'file test' -X .hglfs
1172 1172 $ hg log -r . -T '{rev}: {lfs_files % "{file}: {lfsoid}\n"}\n'
1173 1173 4:
1174 1174
1175 1175 $ mv .hglfs_ .hglfs
1176 1176 $ echo '012345678901234567890abc' > lfs.test
1177 1177 $ hg ci -m 'back to lfs'
1178 1178 $ hg rm lfs.test
1179 1179 $ hg ci -qm 'remove lfs'
1180 1180
1181 1181 {lfs_files} will list deleted files too
1182 1182
1183 1183 $ hg log -T "{lfs_files % '{rev} {file}: {lfspointer.oid}\n'}"
1184 1184 6 lfs.test:
1185 1185 5 lfs.test: sha256:43f8f41171b6f62a6b61ba4ce98a8a6c1649240a47ebafd43120aa215ac9e7f6
1186 1186 3 lfs.catchall: sha256:31f43b9c62b540126b0ad5884dc013d21a61c9329b77de1fceeae2fc58511573
1187 1187 3 lfs.test: sha256:8acd23467967bc7b8cc5a280056589b0ba0b17ff21dbd88a7b6474d6290378a6
1188 1188 2 lfs.catchall: sha256:d4ec46c2869ba22eceb42a729377432052d9dd75d82fc40390ebaadecee87ee9
1189 1189 2 lfs.test: sha256:5489e6ced8c36a7b267292bde9fd5242a5f80a7482e8f23fa0477393dfaa4d6c
1190 1190
1191 1191 $ hg log -r 'file("set:lfs()")' -T '{rev} {join(lfs_files, ", ")}\n'
1192 1192 2 lfs.catchall, lfs.test
1193 1193 3 lfs.catchall, lfs.test
1194 1194 5 lfs.test
1195 1195 6 lfs.test
1196 1196
1197 1197 $ cd ..
1198 1198
1199 1199 Unbundling adds a requirement to a non-lfs repo, if necessary.
1200 1200
1201 1201 $ hg bundle -R $TESTTMP/repo-del -qr 0 --base null nolfs.hg
1202 1202 $ hg bundle -R convert_lfs2 -qr tip --base null lfs.hg
1203 1203 $ hg init unbundle
1204 1204 $ hg pull -R unbundle -q nolfs.hg
1205 1205 $ grep lfs unbundle/.hg/requires
1206 1206 [1]
1207 1207 $ hg pull -R unbundle -q lfs.hg
1208 1208 $ grep lfs unbundle/.hg/requires
1209 1209 lfs
1210 1210
1211 1211 $ hg init no_lfs
1212 1212 $ cat >> no_lfs/.hg/hgrc <<EOF
1213 1213 > [experimental]
1214 1214 > changegroup3 = True
1215 1215 > [extensions]
1216 1216 > lfs=!
1217 1217 > EOF
1218 1218 $ cp -R no_lfs no_lfs2
1219 1219
1220 1220 Pushing from a local lfs repo to a local repo without an lfs requirement and
1221 1221 with lfs disabled, fails.
1222 1222
1223 1223 $ hg push -R convert_lfs2 no_lfs
1224 1224 pushing to no_lfs
1225 1225 abort: required features are not supported in the destination: lfs
1226 1226 [255]
1227 1227 $ grep lfs no_lfs/.hg/requires
1228 1228 [1]
1229 1229
1230 1230 Pulling from a local lfs repo to a local repo without an lfs requirement and
1231 1231 with lfs disabled, fails.
1232 1232
1233 1233 $ hg pull -R no_lfs2 convert_lfs2
1234 1234 pulling from convert_lfs2
1235 1235 abort: required features are not supported in the destination: lfs
1236 1236 [255]
1237 1237 $ grep lfs no_lfs2/.hg/requires
1238 1238 [1]
@@ -1,365 +1,365 b''
1 1 #require reporevlogstore
2 2
3 3 prepare repo
4 4
5 5 $ hg init a
6 6 $ cd a
7 7 $ echo "some text" > FOO.txt
8 8 $ echo "another text" > bar.txt
9 9 $ echo "more text" > QUICK.txt
10 10 $ hg add
11 11 adding FOO.txt
12 12 adding QUICK.txt
13 13 adding bar.txt
14 14 $ hg ci -mtest1
15 15
16 16 verify
17 17
18 18 $ hg verify
19 19 checking changesets
20 20 checking manifests
21 21 crosschecking files in changesets and manifests
22 22 checking files
23 23 checked 1 changesets with 3 changes to 3 files
24 24
25 25 verify with journal
26 26
27 27 $ touch .hg/store/journal
28 28 $ hg verify
29 29 abandoned transaction found - run hg recover
30 30 checking changesets
31 31 checking manifests
32 32 crosschecking files in changesets and manifests
33 33 checking files
34 34 checked 1 changesets with 3 changes to 3 files
35 35 $ rm .hg/store/journal
36 36
37 37 introduce some bugs in repo
38 38
39 39 $ cd .hg/store/data
40 40 $ mv _f_o_o.txt.i X_f_o_o.txt.i
41 41 $ mv bar.txt.i xbar.txt.i
42 42 $ rm _q_u_i_c_k.txt.i
43 43
44 44 $ hg verify
45 45 checking changesets
46 46 checking manifests
47 47 crosschecking files in changesets and manifests
48 48 checking files
49 49 warning: revlog 'data/FOO.txt.i' not in fncache!
50 50 0: empty or missing FOO.txt
51 51 FOO.txt@0: manifest refers to unknown revision f62022d3d590
52 52 warning: revlog 'data/QUICK.txt.i' not in fncache!
53 53 0: empty or missing QUICK.txt
54 54 QUICK.txt@0: manifest refers to unknown revision 88b857db8eba
55 55 warning: revlog 'data/bar.txt.i' not in fncache!
56 56 0: empty or missing bar.txt
57 57 bar.txt@0: manifest refers to unknown revision 256559129457
58 58 checked 1 changesets with 0 changes to 3 files
59 59 3 warnings encountered!
60 60 hint: run "hg debugrebuildfncache" to recover from corrupt fncache
61 61 6 integrity errors encountered!
62 62 (first damaged changeset appears to be 0)
63 63 [1]
64 64
65 65 $ cd ../../..
66 66 $ cd ..
67 67
68 68 Set up a repo for testing missing revlog entries
69 69
70 70 $ hg init missing-entries
71 71 $ cd missing-entries
72 72 $ echo 0 > file
73 73 $ hg ci -Aqm0
74 74 $ cp -R .hg/store .hg/store-partial
75 75 $ echo 1 > file
76 76 $ hg ci -Aqm1
77 77 $ cp -R .hg/store .hg/store-full
78 78
79 79 Entire changelog missing
80 80
81 81 $ rm .hg/store/00changelog.*
82 82 $ hg verify -q
83 83 0: empty or missing changelog
84 84 manifest@0: d0b6632564d4 not in changesets
85 85 manifest@1: 941fc4534185 not in changesets
86 86 3 integrity errors encountered!
87 87 (first damaged changeset appears to be 0)
88 88 [1]
89 89 $ cp -R .hg/store-full/. .hg/store
90 90
91 91 Entire manifest log missing
92 92
93 93 $ rm .hg/store/00manifest.*
94 94 $ hg verify -q
95 95 0: empty or missing manifest
96 96 1 integrity errors encountered!
97 97 (first damaged changeset appears to be 0)
98 98 [1]
99 99 $ cp -R .hg/store-full/. .hg/store
100 100
101 101 Entire filelog missing
102 102
103 103 $ rm .hg/store/data/file.*
104 104 $ hg verify -q
105 105 warning: revlog 'data/file.i' not in fncache!
106 106 0: empty or missing file
107 107 file@0: manifest refers to unknown revision 362fef284ce2
108 108 file@1: manifest refers to unknown revision c10f2164107d
109 109 1 warnings encountered!
110 110 hint: run "hg debugrebuildfncache" to recover from corrupt fncache
111 111 3 integrity errors encountered!
112 112 (first damaged changeset appears to be 0)
113 113 [1]
114 114 $ cp -R .hg/store-full/. .hg/store
115 115
116 116 Entire changelog and manifest log missing
117 117
118 118 $ rm .hg/store/00changelog.*
119 119 $ rm .hg/store/00manifest.*
120 120 $ hg verify -q
121 121 warning: orphan data file 'data/file.i'
122 122 1 warnings encountered!
123 123 $ cp -R .hg/store-full/. .hg/store
124 124
125 125 Entire changelog and filelog missing
126 126
127 127 $ rm .hg/store/00changelog.*
128 128 $ rm .hg/store/data/file.*
129 129 $ hg verify -q
130 130 0: empty or missing changelog
131 131 manifest@0: d0b6632564d4 not in changesets
132 132 manifest@1: 941fc4534185 not in changesets
133 133 warning: revlog 'data/file.i' not in fncache!
134 134 ?: empty or missing file
135 135 file@0: manifest refers to unknown revision 362fef284ce2
136 136 file@1: manifest refers to unknown revision c10f2164107d
137 137 1 warnings encountered!
138 138 hint: run "hg debugrebuildfncache" to recover from corrupt fncache
139 139 6 integrity errors encountered!
140 140 (first damaged changeset appears to be 0)
141 141 [1]
142 142 $ cp -R .hg/store-full/. .hg/store
143 143
144 144 Entire manifest log and filelog missing
145 145
146 146 $ rm .hg/store/00manifest.*
147 147 $ rm .hg/store/data/file.*
148 148 $ hg verify -q
149 149 0: empty or missing manifest
150 150 warning: revlog 'data/file.i' not in fncache!
151 151 0: empty or missing file
152 152 1 warnings encountered!
153 153 hint: run "hg debugrebuildfncache" to recover from corrupt fncache
154 154 2 integrity errors encountered!
155 155 (first damaged changeset appears to be 0)
156 156 [1]
157 157 $ cp -R .hg/store-full/. .hg/store
158 158
159 159 Changelog missing entry
160 160
161 161 $ cp -f .hg/store-partial/00changelog.* .hg/store
162 162 $ hg verify -q
163 163 manifest@?: rev 1 points to nonexistent changeset 1
164 164 manifest@?: 941fc4534185 not in changesets
165 165 file@?: rev 1 points to nonexistent changeset 1
166 166 (expected 0)
167 167 1 warnings encountered!
168 168 3 integrity errors encountered!
169 169 [1]
170 170 $ cp -R .hg/store-full/. .hg/store
171 171
172 172 Manifest log missing entry
173 173
174 174 $ cp -f .hg/store-partial/00manifest.* .hg/store
175 175 $ hg verify -q
176 176 manifest@1: changeset refers to unknown revision 941fc4534185
177 177 file@1: c10f2164107d not in manifests
178 178 2 integrity errors encountered!
179 179 (first damaged changeset appears to be 1)
180 180 [1]
181 181 $ cp -R .hg/store-full/. .hg/store
182 182
183 183 Filelog missing entry
184 184
185 185 $ cp -f .hg/store-partial/data/file.* .hg/store/data
186 186 $ hg verify -q
187 187 file@1: manifest refers to unknown revision c10f2164107d
188 188 1 integrity errors encountered!
189 189 (first damaged changeset appears to be 1)
190 190 [1]
191 191 $ cp -R .hg/store-full/. .hg/store
192 192
193 193 Changelog and manifest log missing entry
194 194
195 195 $ cp -f .hg/store-partial/00changelog.* .hg/store
196 196 $ cp -f .hg/store-partial/00manifest.* .hg/store
197 197 $ hg verify -q
198 198 file@?: rev 1 points to nonexistent changeset 1
199 199 (expected 0)
200 200 file@?: c10f2164107d not in manifests
201 201 1 warnings encountered!
202 202 2 integrity errors encountered!
203 203 [1]
204 204 $ cp -R .hg/store-full/. .hg/store
205 205
206 206 Changelog and filelog missing entry
207 207
208 208 $ cp -f .hg/store-partial/00changelog.* .hg/store
209 209 $ cp -f .hg/store-partial/data/file.* .hg/store/data
210 210 $ hg verify -q
211 211 manifest@?: rev 1 points to nonexistent changeset 1
212 212 manifest@?: 941fc4534185 not in changesets
213 213 file@?: manifest refers to unknown revision c10f2164107d
214 214 3 integrity errors encountered!
215 215 [1]
216 216 $ cp -R .hg/store-full/. .hg/store
217 217
218 218 Manifest and filelog missing entry
219 219
220 220 $ cp -f .hg/store-partial/00manifest.* .hg/store
221 221 $ cp -f .hg/store-partial/data/file.* .hg/store/data
222 222 $ hg verify -q
223 223 manifest@1: changeset refers to unknown revision 941fc4534185
224 224 1 integrity errors encountered!
225 225 (first damaged changeset appears to be 1)
226 226 [1]
227 227 $ cp -R .hg/store-full/. .hg/store
228 228
229 229 Corrupt changelog base node to cause failure to read revision
230 230
231 231 $ printf abcd | dd conv=notrunc of=.hg/store/00changelog.i bs=1 seek=16 \
232 232 > 2> /dev/null
233 233 $ hg verify -q
234 234 0: unpacking changeset 08b1860757c2: * (glob)
235 235 manifest@?: rev 0 points to unexpected changeset 0
236 236 manifest@?: d0b6632564d4 not in changesets
237 237 file@?: rev 0 points to unexpected changeset 0
238 238 (expected 1)
239 239 1 warnings encountered!
240 240 4 integrity errors encountered!
241 241 (first damaged changeset appears to be 0)
242 242 [1]
243 243 $ cp -R .hg/store-full/. .hg/store
244 244
245 245 Corrupt manifest log base node to cause failure to read revision
246 246
247 247 $ printf abcd | dd conv=notrunc of=.hg/store/00manifest.i bs=1 seek=16 \
248 248 > 2> /dev/null
249 249 $ hg verify -q
250 250 manifest@0: reading delta d0b6632564d4: * (glob)
251 251 file@0: 362fef284ce2 not in manifests
252 252 2 integrity errors encountered!
253 253 (first damaged changeset appears to be 0)
254 254 [1]
255 255 $ cp -R .hg/store-full/. .hg/store
256 256
257 257 Corrupt filelog base node to cause failure to read revision
258 258
259 259 $ printf abcd | dd conv=notrunc of=.hg/store/data/file.i bs=1 seek=16 \
260 260 > 2> /dev/null
261 261 $ hg verify -q
262 262 file@0: unpacking 362fef284ce2: * (glob)
263 263 1 integrity errors encountered!
264 264 (first damaged changeset appears to be 0)
265 265 [1]
266 266 $ cp -R .hg/store-full/. .hg/store
267 267
268 268 $ cd ..
269 269
270 270 test changelog without a manifest
271 271
272 272 $ hg init b
273 273 $ cd b
274 274 $ hg branch foo
275 275 marked working directory as branch foo
276 276 (branches are permanent and global, did you want a bookmark?)
277 277 $ hg ci -m branchfoo
278 278 $ hg verify
279 279 checking changesets
280 280 checking manifests
281 281 crosschecking files in changesets and manifests
282 282 checking files
283 283 checked 1 changesets with 0 changes to 0 files
284 284
285 285 test revlog corruption
286 286
287 287 $ touch a
288 288 $ hg add a
289 289 $ hg ci -m a
290 290
291 291 $ echo 'corrupted' > b
292 292 $ dd if=.hg/store/data/a.i of=start bs=1 count=20 2>/dev/null
293 293 $ cat start b > .hg/store/data/a.i
294 294
295 295 $ hg verify
296 296 checking changesets
297 297 checking manifests
298 298 crosschecking files in changesets and manifests
299 299 checking files
300 300 a@1: broken revlog! (index data/a is corrupted)
301 301 warning: orphan data file 'data/a.i'
302 302 checked 2 changesets with 0 changes to 1 files
303 303 1 warnings encountered!
304 304 1 integrity errors encountered!
305 305 (first damaged changeset appears to be 1)
306 306 [1]
307 307
308 308 $ cd ..
309 309
310 310 test revlog format 0
311 311
312 312 $ revlog-formatv0.py
313 313 $ cd formatv0
314 314 $ hg verify
315 315 repository uses revlog format 0
316 316 checking changesets
317 317 checking manifests
318 318 crosschecking files in changesets and manifests
319 319 checking files
320 320 checked 1 changesets with 1 changes to 1 files
321 321 $ cd ..
322 322
323 323 test flag processor and skipflags
324 324
325 325 $ hg init skipflags
326 326 $ cd skipflags
327 327 $ cat >> .hg/hgrc <<EOF
328 328 > [extensions]
329 329 > flagprocessor=$RUNTESTDIR/flagprocessorext.py
330 330 > EOF
331 331 $ echo '[BASE64]content' > base64
332 332 $ hg commit -Aqm 'flag processor content' base64
333 333 $ hg verify
334 334 checking changesets
335 335 checking manifests
336 336 crosschecking files in changesets and manifests
337 337 checking files
338 338 checked 1 changesets with 1 changes to 1 files
339 339
340 340 $ cat >> $TESTTMP/break-base64.py <<EOF
341 341 > from __future__ import absolute_import
342 342 > import base64
343 343 > base64.b64decode=lambda x: x
344 344 > EOF
345 345 $ cat >> .hg/hgrc <<EOF
346 346 > breakbase64=$TESTTMP/break-base64.py
347 347 > EOF
348 348
349 349 $ hg verify
350 350 checking changesets
351 351 checking manifests
352 352 crosschecking files in changesets and manifests
353 353 checking files
354 base64@0: unpacking 794cee7777cb: integrity check failed on data/base64.i:0
354 base64@0: unpacking 794cee7777cb: integrity check failed on data/base64:0
355 355 checked 1 changesets with 1 changes to 1 files
356 356 1 integrity errors encountered!
357 357 (first damaged changeset appears to be 0)
358 358 [1]
359 359 $ hg verify --config verify.skipflags=2147483647
360 360 checking changesets
361 361 checking manifests
362 362 crosschecking files in changesets and manifests
363 363 checking files
364 364 checked 1 changesets with 1 changes to 1 files
365 365
General Comments 0
You need to be logged in to leave comments. Login now