##// END OF EJS Templates
revlog-split: make sure the self._indexfile attribut is reset (issue6811)...
marmoute -
r51705:f952be90 6.4.2 stable
parent child Browse files
Show More
@@ -1,3399 +1,3410 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 import weakref
22 23 import zlib
23 24
24 25 # import stuff from node for others to import from revlog
25 26 from .node import (
26 27 bin,
27 28 hex,
28 29 nullrev,
29 30 sha1nodeconstants,
30 31 short,
31 32 wdirrev,
32 33 )
33 34 from .i18n import _
34 35 from .pycompat import getattr
35 36 from .revlogutils.constants import (
36 37 ALL_KINDS,
37 38 CHANGELOGV2,
38 39 COMP_MODE_DEFAULT,
39 40 COMP_MODE_INLINE,
40 41 COMP_MODE_PLAIN,
41 42 DELTA_BASE_REUSE_NO,
42 43 DELTA_BASE_REUSE_TRY,
43 44 ENTRY_RANK,
44 45 FEATURES_BY_VERSION,
45 46 FLAG_GENERALDELTA,
46 47 FLAG_INLINE_DATA,
47 48 INDEX_HEADER,
48 49 KIND_CHANGELOG,
49 50 KIND_FILELOG,
50 51 RANK_UNKNOWN,
51 52 REVLOGV0,
52 53 REVLOGV1,
53 54 REVLOGV1_FLAGS,
54 55 REVLOGV2,
55 56 REVLOGV2_FLAGS,
56 57 REVLOG_DEFAULT_FLAGS,
57 58 REVLOG_DEFAULT_FORMAT,
58 59 REVLOG_DEFAULT_VERSION,
59 60 SUPPORTED_FLAGS,
60 61 )
61 62 from .revlogutils.flagutil import (
62 63 REVIDX_DEFAULT_FLAGS,
63 64 REVIDX_ELLIPSIS,
64 65 REVIDX_EXTSTORED,
65 66 REVIDX_FLAGS_ORDER,
66 67 REVIDX_HASCOPIESINFO,
67 68 REVIDX_ISCENSORED,
68 69 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 70 )
70 71 from .thirdparty import attr
71 72 from . import (
72 73 ancestor,
73 74 dagop,
74 75 error,
75 76 mdiff,
76 77 policy,
77 78 pycompat,
78 79 revlogutils,
79 80 templatefilters,
80 81 util,
81 82 )
82 83 from .interfaces import (
83 84 repository,
84 85 util as interfaceutil,
85 86 )
86 87 from .revlogutils import (
87 88 deltas as deltautil,
88 89 docket as docketutil,
89 90 flagutil,
90 91 nodemap as nodemaputil,
91 92 randomaccessfile,
92 93 revlogv0,
93 94 rewrite,
94 95 sidedata as sidedatautil,
95 96 )
96 97 from .utils import (
97 98 storageutil,
98 99 stringutil,
99 100 )
100 101
101 102 # blanked usage of all the name to prevent pyflakes constraints
102 103 # We need these name available in the module for extensions.
103 104
104 105 REVLOGV0
105 106 REVLOGV1
106 107 REVLOGV2
107 108 CHANGELOGV2
108 109 FLAG_INLINE_DATA
109 110 FLAG_GENERALDELTA
110 111 REVLOG_DEFAULT_FLAGS
111 112 REVLOG_DEFAULT_FORMAT
112 113 REVLOG_DEFAULT_VERSION
113 114 REVLOGV1_FLAGS
114 115 REVLOGV2_FLAGS
115 116 REVIDX_ISCENSORED
116 117 REVIDX_ELLIPSIS
117 118 REVIDX_HASCOPIESINFO
118 119 REVIDX_EXTSTORED
119 120 REVIDX_DEFAULT_FLAGS
120 121 REVIDX_FLAGS_ORDER
121 122 REVIDX_RAWTEXT_CHANGING_FLAGS
122 123
123 124 parsers = policy.importmod('parsers')
124 125 rustancestor = policy.importrust('ancestor')
125 126 rustdagop = policy.importrust('dagop')
126 127 rustrevlog = policy.importrust('revlog')
127 128
128 129 # Aliased for performance.
129 130 _zlibdecompress = zlib.decompress
130 131
131 132 # max size of inline data embedded into a revlog
132 133 _maxinline = 131072
133 134
134 135 # Flag processors for REVIDX_ELLIPSIS.
135 136 def ellipsisreadprocessor(rl, text):
136 137 return text, False
137 138
138 139
139 140 def ellipsiswriteprocessor(rl, text):
140 141 return text, False
141 142
142 143
143 144 def ellipsisrawprocessor(rl, text):
144 145 return False
145 146
146 147
147 148 ellipsisprocessor = (
148 149 ellipsisreadprocessor,
149 150 ellipsiswriteprocessor,
150 151 ellipsisrawprocessor,
151 152 )
152 153
153 154
154 155 def _verify_revision(rl, skipflags, state, node):
155 156 """Verify the integrity of the given revlog ``node`` while providing a hook
156 157 point for extensions to influence the operation."""
157 158 if skipflags:
158 159 state[b'skipread'].add(node)
159 160 else:
160 161 # Side-effect: read content and verify hash.
161 162 rl.revision(node)
162 163
163 164
164 165 # True if a fast implementation for persistent-nodemap is available
165 166 #
166 167 # We also consider we have a "fast" implementation in "pure" python because
167 168 # people using pure don't really have performance consideration (and a
168 169 # wheelbarrow of other slowness source)
169 170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
170 171 parsers, 'BaseIndexObject'
171 172 )
172 173
173 174
174 175 @interfaceutil.implementer(repository.irevisiondelta)
175 176 @attr.s(slots=True)
176 177 class revlogrevisiondelta:
177 178 node = attr.ib()
178 179 p1node = attr.ib()
179 180 p2node = attr.ib()
180 181 basenode = attr.ib()
181 182 flags = attr.ib()
182 183 baserevisionsize = attr.ib()
183 184 revision = attr.ib()
184 185 delta = attr.ib()
185 186 sidedata = attr.ib()
186 187 protocol_flags = attr.ib()
187 188 linknode = attr.ib(default=None)
188 189
189 190
190 191 @interfaceutil.implementer(repository.iverifyproblem)
191 192 @attr.s(frozen=True)
192 193 class revlogproblem:
193 194 warning = attr.ib(default=None)
194 195 error = attr.ib(default=None)
195 196 node = attr.ib(default=None)
196 197
197 198
198 199 def parse_index_v1(data, inline):
199 200 # call the C implementation to parse the index data
200 201 index, cache = parsers.parse_index2(data, inline)
201 202 return index, cache
202 203
203 204
204 205 def parse_index_v2(data, inline):
205 206 # call the C implementation to parse the index data
206 207 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 208 return index, cache
208 209
209 210
210 211 def parse_index_cl_v2(data, inline):
211 212 # call the C implementation to parse the index data
212 213 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 214 return index, cache
214 215
215 216
216 217 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
217 218
218 219 def parse_index_v1_nodemap(data, inline):
219 220 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 221 return index, cache
221 222
222 223
223 224 else:
224 225 parse_index_v1_nodemap = None
225 226
226 227
227 228 def parse_index_v1_mixed(data, inline):
228 229 index, cache = parse_index_v1(data, inline)
229 230 return rustrevlog.MixedIndex(index), cache
230 231
231 232
232 233 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 234 # signed integer)
234 235 _maxentrysize = 0x7FFFFFFF
235 236
236 237 FILE_TOO_SHORT_MSG = _(
237 238 b'cannot read from revlog %s;'
238 239 b' expected %d bytes from offset %d, data size is %d'
239 240 )
240 241
241 242 hexdigits = b'0123456789abcdefABCDEF'
242 243
243 244
244 245 class revlog:
245 246 """
246 247 the underlying revision storage object
247 248
248 249 A revlog consists of two parts, an index and the revision data.
249 250
250 251 The index is a file with a fixed record size containing
251 252 information on each revision, including its nodeid (hash), the
252 253 nodeids of its parents, the position and offset of its data within
253 254 the data file, and the revision it's based on. Finally, each entry
254 255 contains a linkrev entry that can serve as a pointer to external
255 256 data.
256 257
257 258 The revision data itself is a linear collection of data chunks.
258 259 Each chunk represents a revision and is usually represented as a
259 260 delta against the previous chunk. To bound lookup time, runs of
260 261 deltas are limited to about 2 times the length of the original
261 262 version data. This makes retrieval of a version proportional to
262 263 its size, or O(1) relative to the number of revisions.
263 264
264 265 Both pieces of the revlog are written to in an append-only
265 266 fashion, which means we never need to rewrite a file to insert or
266 267 remove data, and can use some simple techniques to avoid the need
267 268 for locking while reading.
268 269
269 270 If checkambig, indexfile is opened with checkambig=True at
270 271 writing, to avoid file stat ambiguity.
271 272
272 273 If mmaplargeindex is True, and an mmapindexthreshold is set, the
273 274 index will be mmapped rather than read if it is larger than the
274 275 configured threshold.
275 276
276 277 If censorable is True, the revlog can have censored revisions.
277 278
278 279 If `upperboundcomp` is not None, this is the expected maximal gain from
279 280 compression for the data content.
280 281
281 282 `concurrencychecker` is an optional function that receives 3 arguments: a
282 283 file handle, a filename, and an expected position. It should check whether
283 284 the current position in the file handle is valid, and log/warn/fail (by
284 285 raising).
285 286
286 287 See mercurial/revlogutils/contants.py for details about the content of an
287 288 index entry.
288 289 """
289 290
290 291 _flagserrorclass = error.RevlogError
291 292
292 293 def __init__(
293 294 self,
294 295 opener,
295 296 target,
296 297 radix,
297 298 postfix=None, # only exist for `tmpcensored` now
298 299 checkambig=False,
299 300 mmaplargeindex=False,
300 301 censorable=False,
301 302 upperboundcomp=None,
302 303 persistentnodemap=False,
303 304 concurrencychecker=None,
304 305 trypending=False,
305 306 try_split=False,
306 307 canonical_parent_order=True,
307 308 ):
308 309 """
309 310 create a revlog object
310 311
311 312 opener is a function that abstracts the file opening operation
312 313 and can be used to implement COW semantics or the like.
313 314
314 315 `target`: a (KIND, ID) tuple that identify the content stored in
315 316 this revlog. It help the rest of the code to understand what the revlog
316 317 is about without having to resort to heuristic and index filename
317 318 analysis. Note: that this must be reliably be set by normal code, but
318 319 that test, debug, or performance measurement code might not set this to
319 320 accurate value.
320 321 """
321 322 self.upperboundcomp = upperboundcomp
322 323
323 324 self.radix = radix
324 325
325 326 self._docket_file = None
326 327 self._indexfile = None
327 328 self._datafile = None
328 329 self._sidedatafile = None
329 330 self._nodemap_file = None
330 331 self.postfix = postfix
331 332 self._trypending = trypending
332 333 self._try_split = try_split
333 334 self.opener = opener
334 335 if persistentnodemap:
335 336 self._nodemap_file = nodemaputil.get_nodemap_file(self)
336 337
337 338 assert target[0] in ALL_KINDS
338 339 assert len(target) == 2
339 340 self.target = target
340 341 # When True, indexfile is opened with checkambig=True at writing, to
341 342 # avoid file stat ambiguity.
342 343 self._checkambig = checkambig
343 344 self._mmaplargeindex = mmaplargeindex
344 345 self._censorable = censorable
345 346 # 3-tuple of (node, rev, text) for a raw revision.
346 347 self._revisioncache = None
347 348 # Maps rev to chain base rev.
348 349 self._chainbasecache = util.lrucachedict(100)
349 350 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
350 351 self._chunkcache = (0, b'')
351 352 # How much data to read and cache into the raw revlog data cache.
352 353 self._chunkcachesize = 65536
353 354 self._maxchainlen = None
354 355 self._deltabothparents = True
355 356 self._candidate_group_chunk_size = 0
356 357 self._debug_delta = False
357 358 self.index = None
358 359 self._docket = None
359 360 self._nodemap_docket = None
360 361 # Mapping of partial identifiers to full nodes.
361 362 self._pcache = {}
362 363 # Mapping of revision integer to full node.
363 364 self._compengine = b'zlib'
364 365 self._compengineopts = {}
365 366 self._maxdeltachainspan = -1
366 367 self._withsparseread = False
367 368 self._sparserevlog = False
368 369 self.hassidedata = False
369 370 self._srdensitythreshold = 0.50
370 371 self._srmingapsize = 262144
371 372
372 373 # other optionnals features
373 374
374 375 # might remove rank configuration once the computation has no impact
375 376 self._compute_rank = False
376 377
377 378 # Make copy of flag processors so each revlog instance can support
378 379 # custom flags.
379 380 self._flagprocessors = dict(flagutil.flagprocessors)
380 381
381 382 # 3-tuple of file handles being used for active writing.
382 383 self._writinghandles = None
383 384 # prevent nesting of addgroup
384 385 self._adding_group = None
385 386
386 387 self._loadindex()
387 388
388 389 self._concurrencychecker = concurrencychecker
389 390
390 391 # parent order is supposed to be semantically irrelevant, so we
391 392 # normally resort parents to ensure that the first parent is non-null,
392 393 # if there is a non-null parent at all.
393 394 # filelog abuses the parent order as flag to mark some instances of
394 395 # meta-encoded files, so allow it to disable this behavior.
395 396 self.canonical_parent_order = canonical_parent_order
396 397
397 398 def _init_opts(self):
398 399 """process options (from above/config) to setup associated default revlog mode
399 400
400 401 These values might be affected when actually reading on disk information.
401 402
402 403 The relevant values are returned for use in _loadindex().
403 404
404 405 * newversionflags:
405 406 version header to use if we need to create a new revlog
406 407
407 408 * mmapindexthreshold:
408 409 minimal index size for start to use mmap
409 410
410 411 * force_nodemap:
411 412 force the usage of a "development" version of the nodemap code
412 413 """
413 414 mmapindexthreshold = None
414 415 opts = self.opener.options
415 416
416 417 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
417 418 new_header = CHANGELOGV2
418 419 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
419 420 elif b'revlogv2' in opts:
420 421 new_header = REVLOGV2
421 422 elif b'revlogv1' in opts:
422 423 new_header = REVLOGV1 | FLAG_INLINE_DATA
423 424 if b'generaldelta' in opts:
424 425 new_header |= FLAG_GENERALDELTA
425 426 elif b'revlogv0' in self.opener.options:
426 427 new_header = REVLOGV0
427 428 else:
428 429 new_header = REVLOG_DEFAULT_VERSION
429 430
430 431 if b'chunkcachesize' in opts:
431 432 self._chunkcachesize = opts[b'chunkcachesize']
432 433 if b'maxchainlen' in opts:
433 434 self._maxchainlen = opts[b'maxchainlen']
434 435 if b'deltabothparents' in opts:
435 436 self._deltabothparents = opts[b'deltabothparents']
436 437 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
437 438 if dps_cgds:
438 439 self._candidate_group_chunk_size = dps_cgds
439 440 self._lazydelta = bool(opts.get(b'lazydelta', True))
440 441 self._lazydeltabase = False
441 442 if self._lazydelta:
442 443 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
443 444 if b'debug-delta' in opts:
444 445 self._debug_delta = opts[b'debug-delta']
445 446 if b'compengine' in opts:
446 447 self._compengine = opts[b'compengine']
447 448 if b'zlib.level' in opts:
448 449 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
449 450 if b'zstd.level' in opts:
450 451 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
451 452 if b'maxdeltachainspan' in opts:
452 453 self._maxdeltachainspan = opts[b'maxdeltachainspan']
453 454 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
454 455 mmapindexthreshold = opts[b'mmapindexthreshold']
455 456 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
456 457 withsparseread = bool(opts.get(b'with-sparse-read', False))
457 458 # sparse-revlog forces sparse-read
458 459 self._withsparseread = self._sparserevlog or withsparseread
459 460 if b'sparse-read-density-threshold' in opts:
460 461 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
461 462 if b'sparse-read-min-gap-size' in opts:
462 463 self._srmingapsize = opts[b'sparse-read-min-gap-size']
463 464 if opts.get(b'enableellipsis'):
464 465 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
465 466
466 467 # revlog v0 doesn't have flag processors
467 468 for flag, processor in opts.get(b'flagprocessors', {}).items():
468 469 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
469 470
470 471 if self._chunkcachesize <= 0:
471 472 raise error.RevlogError(
472 473 _(b'revlog chunk cache size %r is not greater than 0')
473 474 % self._chunkcachesize
474 475 )
475 476 elif self._chunkcachesize & (self._chunkcachesize - 1):
476 477 raise error.RevlogError(
477 478 _(b'revlog chunk cache size %r is not a power of 2')
478 479 % self._chunkcachesize
479 480 )
480 481 force_nodemap = opts.get(b'devel-force-nodemap', False)
481 482 return new_header, mmapindexthreshold, force_nodemap
482 483
483 484 def _get_data(self, filepath, mmap_threshold, size=None):
484 485 """return a file content with or without mmap
485 486
486 487 If the file is missing return the empty string"""
487 488 try:
488 489 with self.opener(filepath) as fp:
489 490 if mmap_threshold is not None:
490 491 file_size = self.opener.fstat(fp).st_size
491 492 if file_size >= mmap_threshold:
492 493 if size is not None:
493 494 # avoid potentiel mmap crash
494 495 size = min(file_size, size)
495 496 # TODO: should .close() to release resources without
496 497 # relying on Python GC
497 498 if size is None:
498 499 return util.buffer(util.mmapread(fp))
499 500 else:
500 501 return util.buffer(util.mmapread(fp, size))
501 502 if size is None:
502 503 return fp.read()
503 504 else:
504 505 return fp.read(size)
505 506 except FileNotFoundError:
506 507 return b''
507 508
508 509 def _loadindex(self, docket=None):
509 510
510 511 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
511 512
512 513 if self.postfix is not None:
513 514 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
514 515 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
515 516 entry_point = b'%s.i.a' % self.radix
516 517 elif self._try_split and self.opener.exists(b'%s.i.s' % self.radix):
517 518 entry_point = b'%s.i.s' % self.radix
518 519 else:
519 520 entry_point = b'%s.i' % self.radix
520 521
521 522 if docket is not None:
522 523 self._docket = docket
523 524 self._docket_file = entry_point
524 525 else:
525 526 self._initempty = True
526 527 entry_data = self._get_data(entry_point, mmapindexthreshold)
527 528 if len(entry_data) > 0:
528 529 header = INDEX_HEADER.unpack(entry_data[:4])[0]
529 530 self._initempty = False
530 531 else:
531 532 header = new_header
532 533
533 534 self._format_flags = header & ~0xFFFF
534 535 self._format_version = header & 0xFFFF
535 536
536 537 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
537 538 if supported_flags is None:
538 539 msg = _(b'unknown version (%d) in revlog %s')
539 540 msg %= (self._format_version, self.display_id)
540 541 raise error.RevlogError(msg)
541 542 elif self._format_flags & ~supported_flags:
542 543 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
543 544 display_flag = self._format_flags >> 16
544 545 msg %= (display_flag, self._format_version, self.display_id)
545 546 raise error.RevlogError(msg)
546 547
547 548 features = FEATURES_BY_VERSION[self._format_version]
548 549 self._inline = features[b'inline'](self._format_flags)
549 550 self._generaldelta = features[b'generaldelta'](self._format_flags)
550 551 self.hassidedata = features[b'sidedata']
551 552
552 553 if not features[b'docket']:
553 554 self._indexfile = entry_point
554 555 index_data = entry_data
555 556 else:
556 557 self._docket_file = entry_point
557 558 if self._initempty:
558 559 self._docket = docketutil.default_docket(self, header)
559 560 else:
560 561 self._docket = docketutil.parse_docket(
561 562 self, entry_data, use_pending=self._trypending
562 563 )
563 564
564 565 if self._docket is not None:
565 566 self._indexfile = self._docket.index_filepath()
566 567 index_data = b''
567 568 index_size = self._docket.index_end
568 569 if index_size > 0:
569 570 index_data = self._get_data(
570 571 self._indexfile, mmapindexthreshold, size=index_size
571 572 )
572 573 if len(index_data) < index_size:
573 574 msg = _(b'too few index data for %s: got %d, expected %d')
574 575 msg %= (self.display_id, len(index_data), index_size)
575 576 raise error.RevlogError(msg)
576 577
577 578 self._inline = False
578 579 # generaldelta implied by version 2 revlogs.
579 580 self._generaldelta = True
580 581 # the logic for persistent nodemap will be dealt with within the
581 582 # main docket, so disable it for now.
582 583 self._nodemap_file = None
583 584
584 585 if self._docket is not None:
585 586 self._datafile = self._docket.data_filepath()
586 587 self._sidedatafile = self._docket.sidedata_filepath()
587 588 elif self.postfix is None:
588 589 self._datafile = b'%s.d' % self.radix
589 590 else:
590 591 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
591 592
592 593 self.nodeconstants = sha1nodeconstants
593 594 self.nullid = self.nodeconstants.nullid
594 595
595 596 # sparse-revlog can't be on without general-delta (issue6056)
596 597 if not self._generaldelta:
597 598 self._sparserevlog = False
598 599
599 600 self._storedeltachains = True
600 601
601 602 devel_nodemap = (
602 603 self._nodemap_file
603 604 and force_nodemap
604 605 and parse_index_v1_nodemap is not None
605 606 )
606 607
607 608 use_rust_index = False
608 609 if rustrevlog is not None:
609 610 if self._nodemap_file is not None:
610 611 use_rust_index = True
611 612 else:
612 613 use_rust_index = self.opener.options.get(b'rust.index')
613 614
614 615 self._parse_index = parse_index_v1
615 616 if self._format_version == REVLOGV0:
616 617 self._parse_index = revlogv0.parse_index_v0
617 618 elif self._format_version == REVLOGV2:
618 619 self._parse_index = parse_index_v2
619 620 elif self._format_version == CHANGELOGV2:
620 621 self._parse_index = parse_index_cl_v2
621 622 elif devel_nodemap:
622 623 self._parse_index = parse_index_v1_nodemap
623 624 elif use_rust_index:
624 625 self._parse_index = parse_index_v1_mixed
625 626 try:
626 627 d = self._parse_index(index_data, self._inline)
627 628 index, chunkcache = d
628 629 use_nodemap = (
629 630 not self._inline
630 631 and self._nodemap_file is not None
631 632 and util.safehasattr(index, 'update_nodemap_data')
632 633 )
633 634 if use_nodemap:
634 635 nodemap_data = nodemaputil.persisted_data(self)
635 636 if nodemap_data is not None:
636 637 docket = nodemap_data[0]
637 638 if (
638 639 len(d[0]) > docket.tip_rev
639 640 and d[0][docket.tip_rev][7] == docket.tip_node
640 641 ):
641 642 # no changelog tampering
642 643 self._nodemap_docket = docket
643 644 index.update_nodemap_data(*nodemap_data)
644 645 except (ValueError, IndexError):
645 646 raise error.RevlogError(
646 647 _(b"index %s is corrupted") % self.display_id
647 648 )
648 649 self.index = index
649 650 self._segmentfile = randomaccessfile.randomaccessfile(
650 651 self.opener,
651 652 (self._indexfile if self._inline else self._datafile),
652 653 self._chunkcachesize,
653 654 chunkcache,
654 655 )
655 656 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
656 657 self.opener,
657 658 self._sidedatafile,
658 659 self._chunkcachesize,
659 660 )
660 661 # revnum -> (chain-length, sum-delta-length)
661 662 self._chaininfocache = util.lrucachedict(500)
662 663 # revlog header -> revlog compressor
663 664 self._decompressors = {}
664 665
665 666 @util.propertycache
666 667 def revlog_kind(self):
667 668 return self.target[0]
668 669
669 670 @util.propertycache
670 671 def display_id(self):
671 672 """The public facing "ID" of the revlog that we use in message"""
672 673 if self.revlog_kind == KIND_FILELOG:
673 674 # Reference the file without the "data/" prefix, so it is familiar
674 675 # to the user.
675 676 return self.target[1]
676 677 else:
677 678 return self.radix
678 679
679 680 def _get_decompressor(self, t):
680 681 try:
681 682 compressor = self._decompressors[t]
682 683 except KeyError:
683 684 try:
684 685 engine = util.compengines.forrevlogheader(t)
685 686 compressor = engine.revlogcompressor(self._compengineopts)
686 687 self._decompressors[t] = compressor
687 688 except KeyError:
688 689 raise error.RevlogError(
689 690 _(b'unknown compression type %s') % binascii.hexlify(t)
690 691 )
691 692 return compressor
692 693
693 694 @util.propertycache
694 695 def _compressor(self):
695 696 engine = util.compengines[self._compengine]
696 697 return engine.revlogcompressor(self._compengineopts)
697 698
698 699 @util.propertycache
699 700 def _decompressor(self):
700 701 """the default decompressor"""
701 702 if self._docket is None:
702 703 return None
703 704 t = self._docket.default_compression_header
704 705 c = self._get_decompressor(t)
705 706 return c.decompress
706 707
707 708 def _indexfp(self):
708 709 """file object for the revlog's index file"""
709 710 return self.opener(self._indexfile, mode=b"r")
710 711
711 712 def __index_write_fp(self):
712 713 # You should not use this directly and use `_writing` instead
713 714 try:
714 715 f = self.opener(
715 716 self._indexfile, mode=b"r+", checkambig=self._checkambig
716 717 )
717 718 if self._docket is None:
718 719 f.seek(0, os.SEEK_END)
719 720 else:
720 721 f.seek(self._docket.index_end, os.SEEK_SET)
721 722 return f
722 723 except FileNotFoundError:
723 724 return self.opener(
724 725 self._indexfile, mode=b"w+", checkambig=self._checkambig
725 726 )
726 727
727 728 def __index_new_fp(self):
728 729 # You should not use this unless you are upgrading from inline revlog
729 730 return self.opener(
730 731 self._indexfile,
731 732 mode=b"w",
732 733 checkambig=self._checkambig,
733 734 atomictemp=True,
734 735 )
735 736
736 737 def _datafp(self, mode=b'r'):
737 738 """file object for the revlog's data file"""
738 739 return self.opener(self._datafile, mode=mode)
739 740
740 741 @contextlib.contextmanager
741 742 def _sidedatareadfp(self):
742 743 """file object suitable to read sidedata"""
743 744 if self._writinghandles:
744 745 yield self._writinghandles[2]
745 746 else:
746 747 with self.opener(self._sidedatafile) as fp:
747 748 yield fp
748 749
749 750 def tiprev(self):
750 751 return len(self.index) - 1
751 752
752 753 def tip(self):
753 754 return self.node(self.tiprev())
754 755
755 756 def __contains__(self, rev):
756 757 return 0 <= rev < len(self)
757 758
758 759 def __len__(self):
759 760 return len(self.index)
760 761
761 762 def __iter__(self):
762 763 return iter(range(len(self)))
763 764
764 765 def revs(self, start=0, stop=None):
765 766 """iterate over all rev in this revlog (from start to stop)"""
766 767 return storageutil.iterrevs(len(self), start=start, stop=stop)
767 768
768 769 def hasnode(self, node):
769 770 try:
770 771 self.rev(node)
771 772 return True
772 773 except KeyError:
773 774 return False
774 775
775 776 def candelta(self, baserev, rev):
776 777 """whether two revisions (baserev, rev) can be delta-ed or not"""
777 778 # Disable delta if either rev requires a content-changing flag
778 779 # processor (ex. LFS). This is because such flag processor can alter
779 780 # the rawtext content that the delta will be based on, and two clients
780 781 # could have a same revlog node with different flags (i.e. different
781 782 # rawtext contents) and the delta could be incompatible.
782 783 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
783 784 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
784 785 ):
785 786 return False
786 787 return True
787 788
788 789 def update_caches(self, transaction):
789 790 if self._nodemap_file is not None:
790 791 if transaction is None:
791 792 nodemaputil.update_persistent_nodemap(self)
792 793 else:
793 794 nodemaputil.setup_persistent_nodemap(transaction, self)
794 795
795 796 def clearcaches(self):
796 797 self._revisioncache = None
797 798 self._chainbasecache.clear()
798 799 self._segmentfile.clear_cache()
799 800 self._segmentfile_sidedata.clear_cache()
800 801 self._pcache = {}
801 802 self._nodemap_docket = None
802 803 self.index.clearcaches()
803 804 # The python code is the one responsible for validating the docket, we
804 805 # end up having to refresh it here.
805 806 use_nodemap = (
806 807 not self._inline
807 808 and self._nodemap_file is not None
808 809 and util.safehasattr(self.index, 'update_nodemap_data')
809 810 )
810 811 if use_nodemap:
811 812 nodemap_data = nodemaputil.persisted_data(self)
812 813 if nodemap_data is not None:
813 814 self._nodemap_docket = nodemap_data[0]
814 815 self.index.update_nodemap_data(*nodemap_data)
815 816
816 817 def rev(self, node):
817 818 try:
818 819 return self.index.rev(node)
819 820 except TypeError:
820 821 raise
821 822 except error.RevlogError:
822 823 # parsers.c radix tree lookup failed
823 824 if (
824 825 node == self.nodeconstants.wdirid
825 826 or node in self.nodeconstants.wdirfilenodeids
826 827 ):
827 828 raise error.WdirUnsupported
828 829 raise error.LookupError(node, self.display_id, _(b'no node'))
829 830
830 831 # Accessors for index entries.
831 832
832 833 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
833 834 # are flags.
834 835 def start(self, rev):
835 836 return int(self.index[rev][0] >> 16)
836 837
837 838 def sidedata_cut_off(self, rev):
838 839 sd_cut_off = self.index[rev][8]
839 840 if sd_cut_off != 0:
840 841 return sd_cut_off
841 842 # This is some annoying dance, because entries without sidedata
842 843 # currently use 0 as their ofsset. (instead of previous-offset +
843 844 # previous-size)
844 845 #
845 846 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
846 847 # In the meantime, we need this.
847 848 while 0 <= rev:
848 849 e = self.index[rev]
849 850 if e[9] != 0:
850 851 return e[8] + e[9]
851 852 rev -= 1
852 853 return 0
853 854
854 855 def flags(self, rev):
855 856 return self.index[rev][0] & 0xFFFF
856 857
857 858 def length(self, rev):
858 859 return self.index[rev][1]
859 860
860 861 def sidedata_length(self, rev):
861 862 if not self.hassidedata:
862 863 return 0
863 864 return self.index[rev][9]
864 865
865 866 def rawsize(self, rev):
866 867 """return the length of the uncompressed text for a given revision"""
867 868 l = self.index[rev][2]
868 869 if l >= 0:
869 870 return l
870 871
871 872 t = self.rawdata(rev)
872 873 return len(t)
873 874
874 875 def size(self, rev):
875 876 """length of non-raw text (processed by a "read" flag processor)"""
876 877 # fast path: if no "read" flag processor could change the content,
877 878 # size is rawsize. note: ELLIPSIS is known to not change the content.
878 879 flags = self.flags(rev)
879 880 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
880 881 return self.rawsize(rev)
881 882
882 883 return len(self.revision(rev))
883 884
884 885 def fast_rank(self, rev):
885 886 """Return the rank of a revision if already known, or None otherwise.
886 887
887 888 The rank of a revision is the size of the sub-graph it defines as a
888 889 head. Equivalently, the rank of a revision `r` is the size of the set
889 890 `ancestors(r)`, `r` included.
890 891
891 892 This method returns the rank retrieved from the revlog in constant
892 893 time. It makes no attempt at computing unknown values for versions of
893 894 the revlog which do not persist the rank.
894 895 """
895 896 rank = self.index[rev][ENTRY_RANK]
896 897 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
897 898 return None
898 899 if rev == nullrev:
899 900 return 0 # convention
900 901 return rank
901 902
902 903 def chainbase(self, rev):
903 904 base = self._chainbasecache.get(rev)
904 905 if base is not None:
905 906 return base
906 907
907 908 index = self.index
908 909 iterrev = rev
909 910 base = index[iterrev][3]
910 911 while base != iterrev:
911 912 iterrev = base
912 913 base = index[iterrev][3]
913 914
914 915 self._chainbasecache[rev] = base
915 916 return base
916 917
917 918 def linkrev(self, rev):
918 919 return self.index[rev][4]
919 920
920 921 def parentrevs(self, rev):
921 922 try:
922 923 entry = self.index[rev]
923 924 except IndexError:
924 925 if rev == wdirrev:
925 926 raise error.WdirUnsupported
926 927 raise
927 928
928 929 if self.canonical_parent_order and entry[5] == nullrev:
929 930 return entry[6], entry[5]
930 931 else:
931 932 return entry[5], entry[6]
932 933
933 934 # fast parentrevs(rev) where rev isn't filtered
934 935 _uncheckedparentrevs = parentrevs
935 936
936 937 def node(self, rev):
937 938 try:
938 939 return self.index[rev][7]
939 940 except IndexError:
940 941 if rev == wdirrev:
941 942 raise error.WdirUnsupported
942 943 raise
943 944
944 945 # Derived from index values.
945 946
946 947 def end(self, rev):
947 948 return self.start(rev) + self.length(rev)
948 949
949 950 def parents(self, node):
950 951 i = self.index
951 952 d = i[self.rev(node)]
952 953 # inline node() to avoid function call overhead
953 954 if self.canonical_parent_order and d[5] == self.nullid:
954 955 return i[d[6]][7], i[d[5]][7]
955 956 else:
956 957 return i[d[5]][7], i[d[6]][7]
957 958
958 959 def chainlen(self, rev):
959 960 return self._chaininfo(rev)[0]
960 961
961 962 def _chaininfo(self, rev):
962 963 chaininfocache = self._chaininfocache
963 964 if rev in chaininfocache:
964 965 return chaininfocache[rev]
965 966 index = self.index
966 967 generaldelta = self._generaldelta
967 968 iterrev = rev
968 969 e = index[iterrev]
969 970 clen = 0
970 971 compresseddeltalen = 0
971 972 while iterrev != e[3]:
972 973 clen += 1
973 974 compresseddeltalen += e[1]
974 975 if generaldelta:
975 976 iterrev = e[3]
976 977 else:
977 978 iterrev -= 1
978 979 if iterrev in chaininfocache:
979 980 t = chaininfocache[iterrev]
980 981 clen += t[0]
981 982 compresseddeltalen += t[1]
982 983 break
983 984 e = index[iterrev]
984 985 else:
985 986 # Add text length of base since decompressing that also takes
986 987 # work. For cache hits the length is already included.
987 988 compresseddeltalen += e[1]
988 989 r = (clen, compresseddeltalen)
989 990 chaininfocache[rev] = r
990 991 return r
991 992
992 993 def _deltachain(self, rev, stoprev=None):
993 994 """Obtain the delta chain for a revision.
994 995
995 996 ``stoprev`` specifies a revision to stop at. If not specified, we
996 997 stop at the base of the chain.
997 998
998 999 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
999 1000 revs in ascending order and ``stopped`` is a bool indicating whether
1000 1001 ``stoprev`` was hit.
1001 1002 """
1002 1003 # Try C implementation.
1003 1004 try:
1004 1005 return self.index.deltachain(rev, stoprev, self._generaldelta)
1005 1006 except AttributeError:
1006 1007 pass
1007 1008
1008 1009 chain = []
1009 1010
1010 1011 # Alias to prevent attribute lookup in tight loop.
1011 1012 index = self.index
1012 1013 generaldelta = self._generaldelta
1013 1014
1014 1015 iterrev = rev
1015 1016 e = index[iterrev]
1016 1017 while iterrev != e[3] and iterrev != stoprev:
1017 1018 chain.append(iterrev)
1018 1019 if generaldelta:
1019 1020 iterrev = e[3]
1020 1021 else:
1021 1022 iterrev -= 1
1022 1023 e = index[iterrev]
1023 1024
1024 1025 if iterrev == stoprev:
1025 1026 stopped = True
1026 1027 else:
1027 1028 chain.append(iterrev)
1028 1029 stopped = False
1029 1030
1030 1031 chain.reverse()
1031 1032 return chain, stopped
1032 1033
1033 1034 def ancestors(self, revs, stoprev=0, inclusive=False):
1034 1035 """Generate the ancestors of 'revs' in reverse revision order.
1035 1036 Does not generate revs lower than stoprev.
1036 1037
1037 1038 See the documentation for ancestor.lazyancestors for more details."""
1038 1039
1039 1040 # first, make sure start revisions aren't filtered
1040 1041 revs = list(revs)
1041 1042 checkrev = self.node
1042 1043 for r in revs:
1043 1044 checkrev(r)
1044 1045 # and we're sure ancestors aren't filtered as well
1045 1046
1046 1047 if rustancestor is not None and self.index.rust_ext_compat:
1047 1048 lazyancestors = rustancestor.LazyAncestors
1048 1049 arg = self.index
1049 1050 else:
1050 1051 lazyancestors = ancestor.lazyancestors
1051 1052 arg = self._uncheckedparentrevs
1052 1053 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1053 1054
1054 1055 def descendants(self, revs):
1055 1056 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1056 1057
1057 1058 def findcommonmissing(self, common=None, heads=None):
1058 1059 """Return a tuple of the ancestors of common and the ancestors of heads
1059 1060 that are not ancestors of common. In revset terminology, we return the
1060 1061 tuple:
1061 1062
1062 1063 ::common, (::heads) - (::common)
1063 1064
1064 1065 The list is sorted by revision number, meaning it is
1065 1066 topologically sorted.
1066 1067
1067 1068 'heads' and 'common' are both lists of node IDs. If heads is
1068 1069 not supplied, uses all of the revlog's heads. If common is not
1069 1070 supplied, uses nullid."""
1070 1071 if common is None:
1071 1072 common = [self.nullid]
1072 1073 if heads is None:
1073 1074 heads = self.heads()
1074 1075
1075 1076 common = [self.rev(n) for n in common]
1076 1077 heads = [self.rev(n) for n in heads]
1077 1078
1078 1079 # we want the ancestors, but inclusive
1079 1080 class lazyset:
1080 1081 def __init__(self, lazyvalues):
1081 1082 self.addedvalues = set()
1082 1083 self.lazyvalues = lazyvalues
1083 1084
1084 1085 def __contains__(self, value):
1085 1086 return value in self.addedvalues or value in self.lazyvalues
1086 1087
1087 1088 def __iter__(self):
1088 1089 added = self.addedvalues
1089 1090 for r in added:
1090 1091 yield r
1091 1092 for r in self.lazyvalues:
1092 1093 if not r in added:
1093 1094 yield r
1094 1095
1095 1096 def add(self, value):
1096 1097 self.addedvalues.add(value)
1097 1098
1098 1099 def update(self, values):
1099 1100 self.addedvalues.update(values)
1100 1101
1101 1102 has = lazyset(self.ancestors(common))
1102 1103 has.add(nullrev)
1103 1104 has.update(common)
1104 1105
1105 1106 # take all ancestors from heads that aren't in has
1106 1107 missing = set()
1107 1108 visit = collections.deque(r for r in heads if r not in has)
1108 1109 while visit:
1109 1110 r = visit.popleft()
1110 1111 if r in missing:
1111 1112 continue
1112 1113 else:
1113 1114 missing.add(r)
1114 1115 for p in self.parentrevs(r):
1115 1116 if p not in has:
1116 1117 visit.append(p)
1117 1118 missing = list(missing)
1118 1119 missing.sort()
1119 1120 return has, [self.node(miss) for miss in missing]
1120 1121
1121 1122 def incrementalmissingrevs(self, common=None):
1122 1123 """Return an object that can be used to incrementally compute the
1123 1124 revision numbers of the ancestors of arbitrary sets that are not
1124 1125 ancestors of common. This is an ancestor.incrementalmissingancestors
1125 1126 object.
1126 1127
1127 1128 'common' is a list of revision numbers. If common is not supplied, uses
1128 1129 nullrev.
1129 1130 """
1130 1131 if common is None:
1131 1132 common = [nullrev]
1132 1133
1133 1134 if rustancestor is not None and self.index.rust_ext_compat:
1134 1135 return rustancestor.MissingAncestors(self.index, common)
1135 1136 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1136 1137
1137 1138 def findmissingrevs(self, common=None, heads=None):
1138 1139 """Return the revision numbers of the ancestors of heads that
1139 1140 are not ancestors of common.
1140 1141
1141 1142 More specifically, return a list of revision numbers corresponding to
1142 1143 nodes N such that every N satisfies the following constraints:
1143 1144
1144 1145 1. N is an ancestor of some node in 'heads'
1145 1146 2. N is not an ancestor of any node in 'common'
1146 1147
1147 1148 The list is sorted by revision number, meaning it is
1148 1149 topologically sorted.
1149 1150
1150 1151 'heads' and 'common' are both lists of revision numbers. If heads is
1151 1152 not supplied, uses all of the revlog's heads. If common is not
1152 1153 supplied, uses nullid."""
1153 1154 if common is None:
1154 1155 common = [nullrev]
1155 1156 if heads is None:
1156 1157 heads = self.headrevs()
1157 1158
1158 1159 inc = self.incrementalmissingrevs(common=common)
1159 1160 return inc.missingancestors(heads)
1160 1161
1161 1162 def findmissing(self, common=None, heads=None):
1162 1163 """Return the ancestors of heads that are not ancestors of common.
1163 1164
1164 1165 More specifically, return a list of nodes N such that every N
1165 1166 satisfies the following constraints:
1166 1167
1167 1168 1. N is an ancestor of some node in 'heads'
1168 1169 2. N is not an ancestor of any node in 'common'
1169 1170
1170 1171 The list is sorted by revision number, meaning it is
1171 1172 topologically sorted.
1172 1173
1173 1174 'heads' and 'common' are both lists of node IDs. If heads is
1174 1175 not supplied, uses all of the revlog's heads. If common is not
1175 1176 supplied, uses nullid."""
1176 1177 if common is None:
1177 1178 common = [self.nullid]
1178 1179 if heads is None:
1179 1180 heads = self.heads()
1180 1181
1181 1182 common = [self.rev(n) for n in common]
1182 1183 heads = [self.rev(n) for n in heads]
1183 1184
1184 1185 inc = self.incrementalmissingrevs(common=common)
1185 1186 return [self.node(r) for r in inc.missingancestors(heads)]
1186 1187
1187 1188 def nodesbetween(self, roots=None, heads=None):
1188 1189 """Return a topological path from 'roots' to 'heads'.
1189 1190
1190 1191 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1191 1192 topologically sorted list of all nodes N that satisfy both of
1192 1193 these constraints:
1193 1194
1194 1195 1. N is a descendant of some node in 'roots'
1195 1196 2. N is an ancestor of some node in 'heads'
1196 1197
1197 1198 Every node is considered to be both a descendant and an ancestor
1198 1199 of itself, so every reachable node in 'roots' and 'heads' will be
1199 1200 included in 'nodes'.
1200 1201
1201 1202 'outroots' is the list of reachable nodes in 'roots', i.e., the
1202 1203 subset of 'roots' that is returned in 'nodes'. Likewise,
1203 1204 'outheads' is the subset of 'heads' that is also in 'nodes'.
1204 1205
1205 1206 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1206 1207 unspecified, uses nullid as the only root. If 'heads' is
1207 1208 unspecified, uses list of all of the revlog's heads."""
1208 1209 nonodes = ([], [], [])
1209 1210 if roots is not None:
1210 1211 roots = list(roots)
1211 1212 if not roots:
1212 1213 return nonodes
1213 1214 lowestrev = min([self.rev(n) for n in roots])
1214 1215 else:
1215 1216 roots = [self.nullid] # Everybody's a descendant of nullid
1216 1217 lowestrev = nullrev
1217 1218 if (lowestrev == nullrev) and (heads is None):
1218 1219 # We want _all_ the nodes!
1219 1220 return (
1220 1221 [self.node(r) for r in self],
1221 1222 [self.nullid],
1222 1223 list(self.heads()),
1223 1224 )
1224 1225 if heads is None:
1225 1226 # All nodes are ancestors, so the latest ancestor is the last
1226 1227 # node.
1227 1228 highestrev = len(self) - 1
1228 1229 # Set ancestors to None to signal that every node is an ancestor.
1229 1230 ancestors = None
1230 1231 # Set heads to an empty dictionary for later discovery of heads
1231 1232 heads = {}
1232 1233 else:
1233 1234 heads = list(heads)
1234 1235 if not heads:
1235 1236 return nonodes
1236 1237 ancestors = set()
1237 1238 # Turn heads into a dictionary so we can remove 'fake' heads.
1238 1239 # Also, later we will be using it to filter out the heads we can't
1239 1240 # find from roots.
1240 1241 heads = dict.fromkeys(heads, False)
1241 1242 # Start at the top and keep marking parents until we're done.
1242 1243 nodestotag = set(heads)
1243 1244 # Remember where the top was so we can use it as a limit later.
1244 1245 highestrev = max([self.rev(n) for n in nodestotag])
1245 1246 while nodestotag:
1246 1247 # grab a node to tag
1247 1248 n = nodestotag.pop()
1248 1249 # Never tag nullid
1249 1250 if n == self.nullid:
1250 1251 continue
1251 1252 # A node's revision number represents its place in a
1252 1253 # topologically sorted list of nodes.
1253 1254 r = self.rev(n)
1254 1255 if r >= lowestrev:
1255 1256 if n not in ancestors:
1256 1257 # If we are possibly a descendant of one of the roots
1257 1258 # and we haven't already been marked as an ancestor
1258 1259 ancestors.add(n) # Mark as ancestor
1259 1260 # Add non-nullid parents to list of nodes to tag.
1260 1261 nodestotag.update(
1261 1262 [p for p in self.parents(n) if p != self.nullid]
1262 1263 )
1263 1264 elif n in heads: # We've seen it before, is it a fake head?
1264 1265 # So it is, real heads should not be the ancestors of
1265 1266 # any other heads.
1266 1267 heads.pop(n)
1267 1268 if not ancestors:
1268 1269 return nonodes
1269 1270 # Now that we have our set of ancestors, we want to remove any
1270 1271 # roots that are not ancestors.
1271 1272
1272 1273 # If one of the roots was nullid, everything is included anyway.
1273 1274 if lowestrev > nullrev:
1274 1275 # But, since we weren't, let's recompute the lowest rev to not
1275 1276 # include roots that aren't ancestors.
1276 1277
1277 1278 # Filter out roots that aren't ancestors of heads
1278 1279 roots = [root for root in roots if root in ancestors]
1279 1280 # Recompute the lowest revision
1280 1281 if roots:
1281 1282 lowestrev = min([self.rev(root) for root in roots])
1282 1283 else:
1283 1284 # No more roots? Return empty list
1284 1285 return nonodes
1285 1286 else:
1286 1287 # We are descending from nullid, and don't need to care about
1287 1288 # any other roots.
1288 1289 lowestrev = nullrev
1289 1290 roots = [self.nullid]
1290 1291 # Transform our roots list into a set.
1291 1292 descendants = set(roots)
1292 1293 # Also, keep the original roots so we can filter out roots that aren't
1293 1294 # 'real' roots (i.e. are descended from other roots).
1294 1295 roots = descendants.copy()
1295 1296 # Our topologically sorted list of output nodes.
1296 1297 orderedout = []
1297 1298 # Don't start at nullid since we don't want nullid in our output list,
1298 1299 # and if nullid shows up in descendants, empty parents will look like
1299 1300 # they're descendants.
1300 1301 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1301 1302 n = self.node(r)
1302 1303 isdescendant = False
1303 1304 if lowestrev == nullrev: # Everybody is a descendant of nullid
1304 1305 isdescendant = True
1305 1306 elif n in descendants:
1306 1307 # n is already a descendant
1307 1308 isdescendant = True
1308 1309 # This check only needs to be done here because all the roots
1309 1310 # will start being marked is descendants before the loop.
1310 1311 if n in roots:
1311 1312 # If n was a root, check if it's a 'real' root.
1312 1313 p = tuple(self.parents(n))
1313 1314 # If any of its parents are descendants, it's not a root.
1314 1315 if (p[0] in descendants) or (p[1] in descendants):
1315 1316 roots.remove(n)
1316 1317 else:
1317 1318 p = tuple(self.parents(n))
1318 1319 # A node is a descendant if either of its parents are
1319 1320 # descendants. (We seeded the dependents list with the roots
1320 1321 # up there, remember?)
1321 1322 if (p[0] in descendants) or (p[1] in descendants):
1322 1323 descendants.add(n)
1323 1324 isdescendant = True
1324 1325 if isdescendant and ((ancestors is None) or (n in ancestors)):
1325 1326 # Only include nodes that are both descendants and ancestors.
1326 1327 orderedout.append(n)
1327 1328 if (ancestors is not None) and (n in heads):
1328 1329 # We're trying to figure out which heads are reachable
1329 1330 # from roots.
1330 1331 # Mark this head as having been reached
1331 1332 heads[n] = True
1332 1333 elif ancestors is None:
1333 1334 # Otherwise, we're trying to discover the heads.
1334 1335 # Assume this is a head because if it isn't, the next step
1335 1336 # will eventually remove it.
1336 1337 heads[n] = True
1337 1338 # But, obviously its parents aren't.
1338 1339 for p in self.parents(n):
1339 1340 heads.pop(p, None)
1340 1341 heads = [head for head, flag in heads.items() if flag]
1341 1342 roots = list(roots)
1342 1343 assert orderedout
1343 1344 assert roots
1344 1345 assert heads
1345 1346 return (orderedout, roots, heads)
1346 1347
1347 1348 def headrevs(self, revs=None):
1348 1349 if revs is None:
1349 1350 try:
1350 1351 return self.index.headrevs()
1351 1352 except AttributeError:
1352 1353 return self._headrevs()
1353 1354 if rustdagop is not None and self.index.rust_ext_compat:
1354 1355 return rustdagop.headrevs(self.index, revs)
1355 1356 return dagop.headrevs(revs, self._uncheckedparentrevs)
1356 1357
1357 1358 def computephases(self, roots):
1358 1359 return self.index.computephasesmapsets(roots)
1359 1360
1360 1361 def _headrevs(self):
1361 1362 count = len(self)
1362 1363 if not count:
1363 1364 return [nullrev]
1364 1365 # we won't iter over filtered rev so nobody is a head at start
1365 1366 ishead = [0] * (count + 1)
1366 1367 index = self.index
1367 1368 for r in self:
1368 1369 ishead[r] = 1 # I may be an head
1369 1370 e = index[r]
1370 1371 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1371 1372 return [r for r, val in enumerate(ishead) if val]
1372 1373
1373 1374 def heads(self, start=None, stop=None):
1374 1375 """return the list of all nodes that have no children
1375 1376
1376 1377 if start is specified, only heads that are descendants of
1377 1378 start will be returned
1378 1379 if stop is specified, it will consider all the revs from stop
1379 1380 as if they had no children
1380 1381 """
1381 1382 if start is None and stop is None:
1382 1383 if not len(self):
1383 1384 return [self.nullid]
1384 1385 return [self.node(r) for r in self.headrevs()]
1385 1386
1386 1387 if start is None:
1387 1388 start = nullrev
1388 1389 else:
1389 1390 start = self.rev(start)
1390 1391
1391 1392 stoprevs = {self.rev(n) for n in stop or []}
1392 1393
1393 1394 revs = dagop.headrevssubset(
1394 1395 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1395 1396 )
1396 1397
1397 1398 return [self.node(rev) for rev in revs]
1398 1399
1399 1400 def children(self, node):
1400 1401 """find the children of a given node"""
1401 1402 c = []
1402 1403 p = self.rev(node)
1403 1404 for r in self.revs(start=p + 1):
1404 1405 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1405 1406 if prevs:
1406 1407 for pr in prevs:
1407 1408 if pr == p:
1408 1409 c.append(self.node(r))
1409 1410 elif p == nullrev:
1410 1411 c.append(self.node(r))
1411 1412 return c
1412 1413
1413 1414 def commonancestorsheads(self, a, b):
1414 1415 """calculate all the heads of the common ancestors of nodes a and b"""
1415 1416 a, b = self.rev(a), self.rev(b)
1416 1417 ancs = self._commonancestorsheads(a, b)
1417 1418 return pycompat.maplist(self.node, ancs)
1418 1419
1419 1420 def _commonancestorsheads(self, *revs):
1420 1421 """calculate all the heads of the common ancestors of revs"""
1421 1422 try:
1422 1423 ancs = self.index.commonancestorsheads(*revs)
1423 1424 except (AttributeError, OverflowError): # C implementation failed
1424 1425 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1425 1426 return ancs
1426 1427
1427 1428 def isancestor(self, a, b):
1428 1429 """return True if node a is an ancestor of node b
1429 1430
1430 1431 A revision is considered an ancestor of itself."""
1431 1432 a, b = self.rev(a), self.rev(b)
1432 1433 return self.isancestorrev(a, b)
1433 1434
1434 1435 def isancestorrev(self, a, b):
1435 1436 """return True if revision a is an ancestor of revision b
1436 1437
1437 1438 A revision is considered an ancestor of itself.
1438 1439
1439 1440 The implementation of this is trivial but the use of
1440 1441 reachableroots is not."""
1441 1442 if a == nullrev:
1442 1443 return True
1443 1444 elif a == b:
1444 1445 return True
1445 1446 elif a > b:
1446 1447 return False
1447 1448 return bool(self.reachableroots(a, [b], [a], includepath=False))
1448 1449
1449 1450 def reachableroots(self, minroot, heads, roots, includepath=False):
1450 1451 """return (heads(::(<roots> and <roots>::<heads>)))
1451 1452
1452 1453 If includepath is True, return (<roots>::<heads>)."""
1453 1454 try:
1454 1455 return self.index.reachableroots2(
1455 1456 minroot, heads, roots, includepath
1456 1457 )
1457 1458 except AttributeError:
1458 1459 return dagop._reachablerootspure(
1459 1460 self.parentrevs, minroot, roots, heads, includepath
1460 1461 )
1461 1462
1462 1463 def ancestor(self, a, b):
1463 1464 """calculate the "best" common ancestor of nodes a and b"""
1464 1465
1465 1466 a, b = self.rev(a), self.rev(b)
1466 1467 try:
1467 1468 ancs = self.index.ancestors(a, b)
1468 1469 except (AttributeError, OverflowError):
1469 1470 ancs = ancestor.ancestors(self.parentrevs, a, b)
1470 1471 if ancs:
1471 1472 # choose a consistent winner when there's a tie
1472 1473 return min(map(self.node, ancs))
1473 1474 return self.nullid
1474 1475
1475 1476 def _match(self, id):
1476 1477 if isinstance(id, int):
1477 1478 # rev
1478 1479 return self.node(id)
1479 1480 if len(id) == self.nodeconstants.nodelen:
1480 1481 # possibly a binary node
1481 1482 # odds of a binary node being all hex in ASCII are 1 in 10**25
1482 1483 try:
1483 1484 node = id
1484 1485 self.rev(node) # quick search the index
1485 1486 return node
1486 1487 except error.LookupError:
1487 1488 pass # may be partial hex id
1488 1489 try:
1489 1490 # str(rev)
1490 1491 rev = int(id)
1491 1492 if b"%d" % rev != id:
1492 1493 raise ValueError
1493 1494 if rev < 0:
1494 1495 rev = len(self) + rev
1495 1496 if rev < 0 or rev >= len(self):
1496 1497 raise ValueError
1497 1498 return self.node(rev)
1498 1499 except (ValueError, OverflowError):
1499 1500 pass
1500 1501 if len(id) == 2 * self.nodeconstants.nodelen:
1501 1502 try:
1502 1503 # a full hex nodeid?
1503 1504 node = bin(id)
1504 1505 self.rev(node)
1505 1506 return node
1506 1507 except (binascii.Error, error.LookupError):
1507 1508 pass
1508 1509
1509 1510 def _partialmatch(self, id):
1510 1511 # we don't care wdirfilenodeids as they should be always full hash
1511 1512 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1512 1513 ambiguous = False
1513 1514 try:
1514 1515 partial = self.index.partialmatch(id)
1515 1516 if partial and self.hasnode(partial):
1516 1517 if maybewdir:
1517 1518 # single 'ff...' match in radix tree, ambiguous with wdir
1518 1519 ambiguous = True
1519 1520 else:
1520 1521 return partial
1521 1522 elif maybewdir:
1522 1523 # no 'ff...' match in radix tree, wdir identified
1523 1524 raise error.WdirUnsupported
1524 1525 else:
1525 1526 return None
1526 1527 except error.RevlogError:
1527 1528 # parsers.c radix tree lookup gave multiple matches
1528 1529 # fast path: for unfiltered changelog, radix tree is accurate
1529 1530 if not getattr(self, 'filteredrevs', None):
1530 1531 ambiguous = True
1531 1532 # fall through to slow path that filters hidden revisions
1532 1533 except (AttributeError, ValueError):
1533 1534 # we are pure python, or key is not hex
1534 1535 pass
1535 1536 if ambiguous:
1536 1537 raise error.AmbiguousPrefixLookupError(
1537 1538 id, self.display_id, _(b'ambiguous identifier')
1538 1539 )
1539 1540
1540 1541 if id in self._pcache:
1541 1542 return self._pcache[id]
1542 1543
1543 1544 if len(id) <= 40:
1544 1545 # hex(node)[:...]
1545 1546 l = len(id) // 2 * 2 # grab an even number of digits
1546 1547 try:
1547 1548 # we're dropping the last digit, so let's check that it's hex,
1548 1549 # to avoid the expensive computation below if it's not
1549 1550 if len(id) % 2 > 0:
1550 1551 if not (id[-1] in hexdigits):
1551 1552 return None
1552 1553 prefix = bin(id[:l])
1553 1554 except binascii.Error:
1554 1555 pass
1555 1556 else:
1556 1557 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1557 1558 nl = [
1558 1559 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1559 1560 ]
1560 1561 if self.nodeconstants.nullhex.startswith(id):
1561 1562 nl.append(self.nullid)
1562 1563 if len(nl) > 0:
1563 1564 if len(nl) == 1 and not maybewdir:
1564 1565 self._pcache[id] = nl[0]
1565 1566 return nl[0]
1566 1567 raise error.AmbiguousPrefixLookupError(
1567 1568 id, self.display_id, _(b'ambiguous identifier')
1568 1569 )
1569 1570 if maybewdir:
1570 1571 raise error.WdirUnsupported
1571 1572 return None
1572 1573
1573 1574 def lookup(self, id):
1574 1575 """locate a node based on:
1575 1576 - revision number or str(revision number)
1576 1577 - nodeid or subset of hex nodeid
1577 1578 """
1578 1579 n = self._match(id)
1579 1580 if n is not None:
1580 1581 return n
1581 1582 n = self._partialmatch(id)
1582 1583 if n:
1583 1584 return n
1584 1585
1585 1586 raise error.LookupError(id, self.display_id, _(b'no match found'))
1586 1587
1587 1588 def shortest(self, node, minlength=1):
1588 1589 """Find the shortest unambiguous prefix that matches node."""
1589 1590
1590 1591 def isvalid(prefix):
1591 1592 try:
1592 1593 matchednode = self._partialmatch(prefix)
1593 1594 except error.AmbiguousPrefixLookupError:
1594 1595 return False
1595 1596 except error.WdirUnsupported:
1596 1597 # single 'ff...' match
1597 1598 return True
1598 1599 if matchednode is None:
1599 1600 raise error.LookupError(node, self.display_id, _(b'no node'))
1600 1601 return True
1601 1602
1602 1603 def maybewdir(prefix):
1603 1604 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1604 1605
1605 1606 hexnode = hex(node)
1606 1607
1607 1608 def disambiguate(hexnode, minlength):
1608 1609 """Disambiguate against wdirid."""
1609 1610 for length in range(minlength, len(hexnode) + 1):
1610 1611 prefix = hexnode[:length]
1611 1612 if not maybewdir(prefix):
1612 1613 return prefix
1613 1614
1614 1615 if not getattr(self, 'filteredrevs', None):
1615 1616 try:
1616 1617 length = max(self.index.shortest(node), minlength)
1617 1618 return disambiguate(hexnode, length)
1618 1619 except error.RevlogError:
1619 1620 if node != self.nodeconstants.wdirid:
1620 1621 raise error.LookupError(
1621 1622 node, self.display_id, _(b'no node')
1622 1623 )
1623 1624 except AttributeError:
1624 1625 # Fall through to pure code
1625 1626 pass
1626 1627
1627 1628 if node == self.nodeconstants.wdirid:
1628 1629 for length in range(minlength, len(hexnode) + 1):
1629 1630 prefix = hexnode[:length]
1630 1631 if isvalid(prefix):
1631 1632 return prefix
1632 1633
1633 1634 for length in range(minlength, len(hexnode) + 1):
1634 1635 prefix = hexnode[:length]
1635 1636 if isvalid(prefix):
1636 1637 return disambiguate(hexnode, length)
1637 1638
1638 1639 def cmp(self, node, text):
1639 1640 """compare text with a given file revision
1640 1641
1641 1642 returns True if text is different than what is stored.
1642 1643 """
1643 1644 p1, p2 = self.parents(node)
1644 1645 return storageutil.hashrevisionsha1(text, p1, p2) != node
1645 1646
1646 1647 def _getsegmentforrevs(self, startrev, endrev, df=None):
1647 1648 """Obtain a segment of raw data corresponding to a range of revisions.
1648 1649
1649 1650 Accepts the start and end revisions and an optional already-open
1650 1651 file handle to be used for reading. If the file handle is read, its
1651 1652 seek position will not be preserved.
1652 1653
1653 1654 Requests for data may be satisfied by a cache.
1654 1655
1655 1656 Returns a 2-tuple of (offset, data) for the requested range of
1656 1657 revisions. Offset is the integer offset from the beginning of the
1657 1658 revlog and data is a str or buffer of the raw byte data.
1658 1659
1659 1660 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1660 1661 to determine where each revision's data begins and ends.
1661 1662 """
1662 1663 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1663 1664 # (functions are expensive).
1664 1665 index = self.index
1665 1666 istart = index[startrev]
1666 1667 start = int(istart[0] >> 16)
1667 1668 if startrev == endrev:
1668 1669 end = start + istart[1]
1669 1670 else:
1670 1671 iend = index[endrev]
1671 1672 end = int(iend[0] >> 16) + iend[1]
1672 1673
1673 1674 if self._inline:
1674 1675 start += (startrev + 1) * self.index.entry_size
1675 1676 end += (endrev + 1) * self.index.entry_size
1676 1677 length = end - start
1677 1678
1678 1679 return start, self._segmentfile.read_chunk(start, length, df)
1679 1680
1680 1681 def _chunk(self, rev, df=None):
1681 1682 """Obtain a single decompressed chunk for a revision.
1682 1683
1683 1684 Accepts an integer revision and an optional already-open file handle
1684 1685 to be used for reading. If used, the seek position of the file will not
1685 1686 be preserved.
1686 1687
1687 1688 Returns a str holding uncompressed data for the requested revision.
1688 1689 """
1689 1690 compression_mode = self.index[rev][10]
1690 1691 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1691 1692 if compression_mode == COMP_MODE_PLAIN:
1692 1693 return data
1693 1694 elif compression_mode == COMP_MODE_DEFAULT:
1694 1695 return self._decompressor(data)
1695 1696 elif compression_mode == COMP_MODE_INLINE:
1696 1697 return self.decompress(data)
1697 1698 else:
1698 1699 msg = b'unknown compression mode %d'
1699 1700 msg %= compression_mode
1700 1701 raise error.RevlogError(msg)
1701 1702
1702 1703 def _chunks(self, revs, df=None, targetsize=None):
1703 1704 """Obtain decompressed chunks for the specified revisions.
1704 1705
1705 1706 Accepts an iterable of numeric revisions that are assumed to be in
1706 1707 ascending order. Also accepts an optional already-open file handle
1707 1708 to be used for reading. If used, the seek position of the file will
1708 1709 not be preserved.
1709 1710
1710 1711 This function is similar to calling ``self._chunk()`` multiple times,
1711 1712 but is faster.
1712 1713
1713 1714 Returns a list with decompressed data for each requested revision.
1714 1715 """
1715 1716 if not revs:
1716 1717 return []
1717 1718 start = self.start
1718 1719 length = self.length
1719 1720 inline = self._inline
1720 1721 iosize = self.index.entry_size
1721 1722 buffer = util.buffer
1722 1723
1723 1724 l = []
1724 1725 ladd = l.append
1725 1726
1726 1727 if not self._withsparseread:
1727 1728 slicedchunks = (revs,)
1728 1729 else:
1729 1730 slicedchunks = deltautil.slicechunk(
1730 1731 self, revs, targetsize=targetsize
1731 1732 )
1732 1733
1733 1734 for revschunk in slicedchunks:
1734 1735 firstrev = revschunk[0]
1735 1736 # Skip trailing revisions with empty diff
1736 1737 for lastrev in revschunk[::-1]:
1737 1738 if length(lastrev) != 0:
1738 1739 break
1739 1740
1740 1741 try:
1741 1742 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1742 1743 except OverflowError:
1743 1744 # issue4215 - we can't cache a run of chunks greater than
1744 1745 # 2G on Windows
1745 1746 return [self._chunk(rev, df=df) for rev in revschunk]
1746 1747
1747 1748 decomp = self.decompress
1748 1749 # self._decompressor might be None, but will not be used in that case
1749 1750 def_decomp = self._decompressor
1750 1751 for rev in revschunk:
1751 1752 chunkstart = start(rev)
1752 1753 if inline:
1753 1754 chunkstart += (rev + 1) * iosize
1754 1755 chunklength = length(rev)
1755 1756 comp_mode = self.index[rev][10]
1756 1757 c = buffer(data, chunkstart - offset, chunklength)
1757 1758 if comp_mode == COMP_MODE_PLAIN:
1758 1759 ladd(c)
1759 1760 elif comp_mode == COMP_MODE_INLINE:
1760 1761 ladd(decomp(c))
1761 1762 elif comp_mode == COMP_MODE_DEFAULT:
1762 1763 ladd(def_decomp(c))
1763 1764 else:
1764 1765 msg = b'unknown compression mode %d'
1765 1766 msg %= comp_mode
1766 1767 raise error.RevlogError(msg)
1767 1768
1768 1769 return l
1769 1770
1770 1771 def deltaparent(self, rev):
1771 1772 """return deltaparent of the given revision"""
1772 1773 base = self.index[rev][3]
1773 1774 if base == rev:
1774 1775 return nullrev
1775 1776 elif self._generaldelta:
1776 1777 return base
1777 1778 else:
1778 1779 return rev - 1
1779 1780
1780 1781 def issnapshot(self, rev):
1781 1782 """tells whether rev is a snapshot"""
1782 1783 if not self._sparserevlog:
1783 1784 return self.deltaparent(rev) == nullrev
1784 1785 elif util.safehasattr(self.index, b'issnapshot'):
1785 1786 # directly assign the method to cache the testing and access
1786 1787 self.issnapshot = self.index.issnapshot
1787 1788 return self.issnapshot(rev)
1788 1789 if rev == nullrev:
1789 1790 return True
1790 1791 entry = self.index[rev]
1791 1792 base = entry[3]
1792 1793 if base == rev:
1793 1794 return True
1794 1795 if base == nullrev:
1795 1796 return True
1796 1797 p1 = entry[5]
1797 1798 while self.length(p1) == 0:
1798 1799 b = self.deltaparent(p1)
1799 1800 if b == p1:
1800 1801 break
1801 1802 p1 = b
1802 1803 p2 = entry[6]
1803 1804 while self.length(p2) == 0:
1804 1805 b = self.deltaparent(p2)
1805 1806 if b == p2:
1806 1807 break
1807 1808 p2 = b
1808 1809 if base == p1 or base == p2:
1809 1810 return False
1810 1811 return self.issnapshot(base)
1811 1812
1812 1813 def snapshotdepth(self, rev):
1813 1814 """number of snapshot in the chain before this one"""
1814 1815 if not self.issnapshot(rev):
1815 1816 raise error.ProgrammingError(b'revision %d not a snapshot')
1816 1817 return len(self._deltachain(rev)[0]) - 1
1817 1818
1818 1819 def revdiff(self, rev1, rev2):
1819 1820 """return or calculate a delta between two revisions
1820 1821
1821 1822 The delta calculated is in binary form and is intended to be written to
1822 1823 revlog data directly. So this function needs raw revision data.
1823 1824 """
1824 1825 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1825 1826 return bytes(self._chunk(rev2))
1826 1827
1827 1828 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1828 1829
1829 1830 def revision(self, nodeorrev, _df=None):
1830 1831 """return an uncompressed revision of a given node or revision
1831 1832 number.
1832 1833
1833 1834 _df - an existing file handle to read from. (internal-only)
1834 1835 """
1835 1836 return self._revisiondata(nodeorrev, _df)
1836 1837
1837 1838 def sidedata(self, nodeorrev, _df=None):
1838 1839 """a map of extra data related to the changeset but not part of the hash
1839 1840
1840 1841 This function currently return a dictionary. However, more advanced
1841 1842 mapping object will likely be used in the future for a more
1842 1843 efficient/lazy code.
1843 1844 """
1844 1845 # deal with <nodeorrev> argument type
1845 1846 if isinstance(nodeorrev, int):
1846 1847 rev = nodeorrev
1847 1848 else:
1848 1849 rev = self.rev(nodeorrev)
1849 1850 return self._sidedata(rev)
1850 1851
1851 1852 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1852 1853 # deal with <nodeorrev> argument type
1853 1854 if isinstance(nodeorrev, int):
1854 1855 rev = nodeorrev
1855 1856 node = self.node(rev)
1856 1857 else:
1857 1858 node = nodeorrev
1858 1859 rev = None
1859 1860
1860 1861 # fast path the special `nullid` rev
1861 1862 if node == self.nullid:
1862 1863 return b""
1863 1864
1864 1865 # ``rawtext`` is the text as stored inside the revlog. Might be the
1865 1866 # revision or might need to be processed to retrieve the revision.
1866 1867 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1867 1868
1868 1869 if raw and validated:
1869 1870 # if we don't want to process the raw text and that raw
1870 1871 # text is cached, we can exit early.
1871 1872 return rawtext
1872 1873 if rev is None:
1873 1874 rev = self.rev(node)
1874 1875 # the revlog's flag for this revision
1875 1876 # (usually alter its state or content)
1876 1877 flags = self.flags(rev)
1877 1878
1878 1879 if validated and flags == REVIDX_DEFAULT_FLAGS:
1879 1880 # no extra flags set, no flag processor runs, text = rawtext
1880 1881 return rawtext
1881 1882
1882 1883 if raw:
1883 1884 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1884 1885 text = rawtext
1885 1886 else:
1886 1887 r = flagutil.processflagsread(self, rawtext, flags)
1887 1888 text, validatehash = r
1888 1889 if validatehash:
1889 1890 self.checkhash(text, node, rev=rev)
1890 1891 if not validated:
1891 1892 self._revisioncache = (node, rev, rawtext)
1892 1893
1893 1894 return text
1894 1895
1895 1896 def _rawtext(self, node, rev, _df=None):
1896 1897 """return the possibly unvalidated rawtext for a revision
1897 1898
1898 1899 returns (rev, rawtext, validated)
1899 1900 """
1900 1901
1901 1902 # revision in the cache (could be useful to apply delta)
1902 1903 cachedrev = None
1903 1904 # An intermediate text to apply deltas to
1904 1905 basetext = None
1905 1906
1906 1907 # Check if we have the entry in cache
1907 1908 # The cache entry looks like (node, rev, rawtext)
1908 1909 if self._revisioncache:
1909 1910 if self._revisioncache[0] == node:
1910 1911 return (rev, self._revisioncache[2], True)
1911 1912 cachedrev = self._revisioncache[1]
1912 1913
1913 1914 if rev is None:
1914 1915 rev = self.rev(node)
1915 1916
1916 1917 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1917 1918 if stopped:
1918 1919 basetext = self._revisioncache[2]
1919 1920
1920 1921 # drop cache to save memory, the caller is expected to
1921 1922 # update self._revisioncache after validating the text
1922 1923 self._revisioncache = None
1923 1924
1924 1925 targetsize = None
1925 1926 rawsize = self.index[rev][2]
1926 1927 if 0 <= rawsize:
1927 1928 targetsize = 4 * rawsize
1928 1929
1929 1930 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1930 1931 if basetext is None:
1931 1932 basetext = bytes(bins[0])
1932 1933 bins = bins[1:]
1933 1934
1934 1935 rawtext = mdiff.patches(basetext, bins)
1935 1936 del basetext # let us have a chance to free memory early
1936 1937 return (rev, rawtext, False)
1937 1938
1938 1939 def _sidedata(self, rev):
1939 1940 """Return the sidedata for a given revision number."""
1940 1941 index_entry = self.index[rev]
1941 1942 sidedata_offset = index_entry[8]
1942 1943 sidedata_size = index_entry[9]
1943 1944
1944 1945 if self._inline:
1945 1946 sidedata_offset += self.index.entry_size * (1 + rev)
1946 1947 if sidedata_size == 0:
1947 1948 return {}
1948 1949
1949 1950 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1950 1951 filename = self._sidedatafile
1951 1952 end = self._docket.sidedata_end
1952 1953 offset = sidedata_offset
1953 1954 length = sidedata_size
1954 1955 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1955 1956 raise error.RevlogError(m)
1956 1957
1957 1958 comp_segment = self._segmentfile_sidedata.read_chunk(
1958 1959 sidedata_offset, sidedata_size
1959 1960 )
1960 1961
1961 1962 comp = self.index[rev][11]
1962 1963 if comp == COMP_MODE_PLAIN:
1963 1964 segment = comp_segment
1964 1965 elif comp == COMP_MODE_DEFAULT:
1965 1966 segment = self._decompressor(comp_segment)
1966 1967 elif comp == COMP_MODE_INLINE:
1967 1968 segment = self.decompress(comp_segment)
1968 1969 else:
1969 1970 msg = b'unknown compression mode %d'
1970 1971 msg %= comp
1971 1972 raise error.RevlogError(msg)
1972 1973
1973 1974 sidedata = sidedatautil.deserialize_sidedata(segment)
1974 1975 return sidedata
1975 1976
1976 1977 def rawdata(self, nodeorrev, _df=None):
1977 1978 """return an uncompressed raw data of a given node or revision number.
1978 1979
1979 1980 _df - an existing file handle to read from. (internal-only)
1980 1981 """
1981 1982 return self._revisiondata(nodeorrev, _df, raw=True)
1982 1983
1983 1984 def hash(self, text, p1, p2):
1984 1985 """Compute a node hash.
1985 1986
1986 1987 Available as a function so that subclasses can replace the hash
1987 1988 as needed.
1988 1989 """
1989 1990 return storageutil.hashrevisionsha1(text, p1, p2)
1990 1991
1991 1992 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1992 1993 """Check node hash integrity.
1993 1994
1994 1995 Available as a function so that subclasses can extend hash mismatch
1995 1996 behaviors as needed.
1996 1997 """
1997 1998 try:
1998 1999 if p1 is None and p2 is None:
1999 2000 p1, p2 = self.parents(node)
2000 2001 if node != self.hash(text, p1, p2):
2001 2002 # Clear the revision cache on hash failure. The revision cache
2002 2003 # only stores the raw revision and clearing the cache does have
2003 2004 # the side-effect that we won't have a cache hit when the raw
2004 2005 # revision data is accessed. But this case should be rare and
2005 2006 # it is extra work to teach the cache about the hash
2006 2007 # verification state.
2007 2008 if self._revisioncache and self._revisioncache[0] == node:
2008 2009 self._revisioncache = None
2009 2010
2010 2011 revornode = rev
2011 2012 if revornode is None:
2012 2013 revornode = templatefilters.short(hex(node))
2013 2014 raise error.RevlogError(
2014 2015 _(b"integrity check failed on %s:%s")
2015 2016 % (self.display_id, pycompat.bytestr(revornode))
2016 2017 )
2017 2018 except error.RevlogError:
2018 2019 if self._censorable and storageutil.iscensoredtext(text):
2019 2020 raise error.CensoredNodeError(self.display_id, node, text)
2020 2021 raise
2021 2022
2022 2023 def _enforceinlinesize(self, tr, side_write=True):
2023 2024 """Check if the revlog is too big for inline and convert if so.
2024 2025
2025 2026 This should be called after revisions are added to the revlog. If the
2026 2027 revlog has grown too large to be an inline revlog, it will convert it
2027 2028 to use multiple index and data files.
2028 2029 """
2029 2030 tiprev = len(self) - 1
2030 2031 total_size = self.start(tiprev) + self.length(tiprev)
2031 2032 if not self._inline or total_size < _maxinline:
2032 2033 return
2033 2034
2034 2035 troffset = tr.findoffset(self._indexfile)
2035 2036 if troffset is None:
2036 2037 raise error.RevlogError(
2037 2038 _(b"%s not found in the transaction") % self._indexfile
2038 2039 )
2039 2040 if troffset:
2040 2041 tr.addbackup(self._indexfile, for_offset=True)
2041 2042 tr.add(self._datafile, 0)
2042 2043
2043 2044 existing_handles = False
2044 2045 if self._writinghandles is not None:
2045 2046 existing_handles = True
2046 2047 fp = self._writinghandles[0]
2047 2048 fp.flush()
2048 2049 fp.close()
2049 2050 # We can't use the cached file handle after close(). So prevent
2050 2051 # its usage.
2051 2052 self._writinghandles = None
2052 2053 self._segmentfile.writing_handle = None
2053 2054 # No need to deal with sidedata writing handle as it is only
2054 2055 # relevant with revlog-v2 which is never inline, not reaching
2055 2056 # this code
2056 2057 if side_write:
2057 2058 old_index_file_path = self._indexfile
2058 2059 new_index_file_path = self._indexfile + b'.s'
2059 2060 opener = self.opener
2061 weak_self = weakref.ref(self)
2060 2062
2061 2063 fncache = getattr(opener, 'fncache', None)
2062 2064 if fncache is not None:
2063 2065 fncache.addignore(new_index_file_path)
2064 2066
2065 2067 # the "split" index replace the real index when the transaction is finalized
2066 2068 def finalize_callback(tr):
2067 2069 opener.rename(
2068 2070 new_index_file_path,
2069 2071 old_index_file_path,
2070 2072 checkambig=True,
2071 2073 )
2074 maybe_self = weak_self()
2075 if maybe_self is not None:
2076 maybe_self._indexfile = old_index_file_path
2077
2078 def abort_callback(tr):
2079 maybe_self = weak_self()
2080 if maybe_self is not None:
2081 maybe_self._indexfile = old_index_file_path
2072 2082
2073 2083 tr.registertmp(new_index_file_path)
2074 2084 if self.target[1] is not None:
2075 finalize_id = b'000-revlog-split-%d-%s' % self.target
2085 callback_id = b'000-revlog-split-%d-%s' % self.target
2076 2086 else:
2077 finalize_id = b'000-revlog-split-%d' % self.target[0]
2078 tr.addfinalize(finalize_id, finalize_callback)
2087 callback_id = b'000-revlog-split-%d' % self.target[0]
2088 tr.addfinalize(callback_id, finalize_callback)
2089 tr.addabort(callback_id, abort_callback)
2079 2090
2080 2091 new_dfh = self._datafp(b'w+')
2081 2092 new_dfh.truncate(0) # drop any potentially existing data
2082 2093 try:
2083 2094 with self._indexfp() as read_ifh:
2084 2095 for r in self:
2085 2096 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2086 2097 new_dfh.flush()
2087 2098
2088 2099 if side_write:
2089 2100 self._indexfile = new_index_file_path
2090 2101 with self.__index_new_fp() as fp:
2091 2102 self._format_flags &= ~FLAG_INLINE_DATA
2092 2103 self._inline = False
2093 2104 for i in self:
2094 2105 e = self.index.entry_binary(i)
2095 2106 if i == 0 and self._docket is None:
2096 2107 header = self._format_flags | self._format_version
2097 2108 header = self.index.pack_header(header)
2098 2109 e = header + e
2099 2110 fp.write(e)
2100 2111 if self._docket is not None:
2101 2112 self._docket.index_end = fp.tell()
2102 2113
2103 2114 # If we don't use side-write, the temp file replace the real
2104 2115 # index when we exit the context manager
2105 2116
2106 2117 nodemaputil.setup_persistent_nodemap(tr, self)
2107 2118 self._segmentfile = randomaccessfile.randomaccessfile(
2108 2119 self.opener,
2109 2120 self._datafile,
2110 2121 self._chunkcachesize,
2111 2122 )
2112 2123
2113 2124 if existing_handles:
2114 2125 # switched from inline to conventional reopen the index
2115 2126 ifh = self.__index_write_fp()
2116 2127 self._writinghandles = (ifh, new_dfh, None)
2117 2128 self._segmentfile.writing_handle = new_dfh
2118 2129 new_dfh = None
2119 2130 # No need to deal with sidedata writing handle as it is only
2120 2131 # relevant with revlog-v2 which is never inline, not reaching
2121 2132 # this code
2122 2133 finally:
2123 2134 if new_dfh is not None:
2124 2135 new_dfh.close()
2125 2136
2126 2137 def _nodeduplicatecallback(self, transaction, node):
2127 2138 """called when trying to add a node already stored."""
2128 2139
2129 2140 @contextlib.contextmanager
2130 2141 def reading(self):
2131 2142 """Context manager that keeps data and sidedata files open for reading"""
2132 2143 with self._segmentfile.reading():
2133 2144 with self._segmentfile_sidedata.reading():
2134 2145 yield
2135 2146
2136 2147 @contextlib.contextmanager
2137 2148 def _writing(self, transaction):
2138 2149 if self._trypending:
2139 2150 msg = b'try to write in a `trypending` revlog: %s'
2140 2151 msg %= self.display_id
2141 2152 raise error.ProgrammingError(msg)
2142 2153 if self._writinghandles is not None:
2143 2154 yield
2144 2155 else:
2145 2156 ifh = dfh = sdfh = None
2146 2157 try:
2147 2158 r = len(self)
2148 2159 # opening the data file.
2149 2160 dsize = 0
2150 2161 if r:
2151 2162 dsize = self.end(r - 1)
2152 2163 dfh = None
2153 2164 if not self._inline:
2154 2165 try:
2155 2166 dfh = self._datafp(b"r+")
2156 2167 if self._docket is None:
2157 2168 dfh.seek(0, os.SEEK_END)
2158 2169 else:
2159 2170 dfh.seek(self._docket.data_end, os.SEEK_SET)
2160 2171 except FileNotFoundError:
2161 2172 dfh = self._datafp(b"w+")
2162 2173 transaction.add(self._datafile, dsize)
2163 2174 if self._sidedatafile is not None:
2164 2175 # revlog-v2 does not inline, help Pytype
2165 2176 assert dfh is not None
2166 2177 try:
2167 2178 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2168 2179 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2169 2180 except FileNotFoundError:
2170 2181 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2171 2182 transaction.add(
2172 2183 self._sidedatafile, self._docket.sidedata_end
2173 2184 )
2174 2185
2175 2186 # opening the index file.
2176 2187 isize = r * self.index.entry_size
2177 2188 ifh = self.__index_write_fp()
2178 2189 if self._inline:
2179 2190 transaction.add(self._indexfile, dsize + isize)
2180 2191 else:
2181 2192 transaction.add(self._indexfile, isize)
2182 2193 # exposing all file handle for writing.
2183 2194 self._writinghandles = (ifh, dfh, sdfh)
2184 2195 self._segmentfile.writing_handle = ifh if self._inline else dfh
2185 2196 self._segmentfile_sidedata.writing_handle = sdfh
2186 2197 yield
2187 2198 if self._docket is not None:
2188 2199 self._write_docket(transaction)
2189 2200 finally:
2190 2201 self._writinghandles = None
2191 2202 self._segmentfile.writing_handle = None
2192 2203 self._segmentfile_sidedata.writing_handle = None
2193 2204 if dfh is not None:
2194 2205 dfh.close()
2195 2206 if sdfh is not None:
2196 2207 sdfh.close()
2197 2208 # closing the index file last to avoid exposing referent to
2198 2209 # potential unflushed data content.
2199 2210 if ifh is not None:
2200 2211 ifh.close()
2201 2212
2202 2213 def _write_docket(self, transaction):
2203 2214 """write the current docket on disk
2204 2215
2205 2216 Exist as a method to help changelog to implement transaction logic
2206 2217
2207 2218 We could also imagine using the same transaction logic for all revlog
2208 2219 since docket are cheap."""
2209 2220 self._docket.write(transaction)
2210 2221
2211 2222 def addrevision(
2212 2223 self,
2213 2224 text,
2214 2225 transaction,
2215 2226 link,
2216 2227 p1,
2217 2228 p2,
2218 2229 cachedelta=None,
2219 2230 node=None,
2220 2231 flags=REVIDX_DEFAULT_FLAGS,
2221 2232 deltacomputer=None,
2222 2233 sidedata=None,
2223 2234 ):
2224 2235 """add a revision to the log
2225 2236
2226 2237 text - the revision data to add
2227 2238 transaction - the transaction object used for rollback
2228 2239 link - the linkrev data to add
2229 2240 p1, p2 - the parent nodeids of the revision
2230 2241 cachedelta - an optional precomputed delta
2231 2242 node - nodeid of revision; typically node is not specified, and it is
2232 2243 computed by default as hash(text, p1, p2), however subclasses might
2233 2244 use different hashing method (and override checkhash() in such case)
2234 2245 flags - the known flags to set on the revision
2235 2246 deltacomputer - an optional deltacomputer instance shared between
2236 2247 multiple calls
2237 2248 """
2238 2249 if link == nullrev:
2239 2250 raise error.RevlogError(
2240 2251 _(b"attempted to add linkrev -1 to %s") % self.display_id
2241 2252 )
2242 2253
2243 2254 if sidedata is None:
2244 2255 sidedata = {}
2245 2256 elif sidedata and not self.hassidedata:
2246 2257 raise error.ProgrammingError(
2247 2258 _(b"trying to add sidedata to a revlog who don't support them")
2248 2259 )
2249 2260
2250 2261 if flags:
2251 2262 node = node or self.hash(text, p1, p2)
2252 2263
2253 2264 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2254 2265
2255 2266 # If the flag processor modifies the revision data, ignore any provided
2256 2267 # cachedelta.
2257 2268 if rawtext != text:
2258 2269 cachedelta = None
2259 2270
2260 2271 if len(rawtext) > _maxentrysize:
2261 2272 raise error.RevlogError(
2262 2273 _(
2263 2274 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2264 2275 )
2265 2276 % (self.display_id, len(rawtext))
2266 2277 )
2267 2278
2268 2279 node = node or self.hash(rawtext, p1, p2)
2269 2280 rev = self.index.get_rev(node)
2270 2281 if rev is not None:
2271 2282 return rev
2272 2283
2273 2284 if validatehash:
2274 2285 self.checkhash(rawtext, node, p1=p1, p2=p2)
2275 2286
2276 2287 return self.addrawrevision(
2277 2288 rawtext,
2278 2289 transaction,
2279 2290 link,
2280 2291 p1,
2281 2292 p2,
2282 2293 node,
2283 2294 flags,
2284 2295 cachedelta=cachedelta,
2285 2296 deltacomputer=deltacomputer,
2286 2297 sidedata=sidedata,
2287 2298 )
2288 2299
2289 2300 def addrawrevision(
2290 2301 self,
2291 2302 rawtext,
2292 2303 transaction,
2293 2304 link,
2294 2305 p1,
2295 2306 p2,
2296 2307 node,
2297 2308 flags,
2298 2309 cachedelta=None,
2299 2310 deltacomputer=None,
2300 2311 sidedata=None,
2301 2312 ):
2302 2313 """add a raw revision with known flags, node and parents
2303 2314 useful when reusing a revision not stored in this revlog (ex: received
2304 2315 over wire, or read from an external bundle).
2305 2316 """
2306 2317 with self._writing(transaction):
2307 2318 return self._addrevision(
2308 2319 node,
2309 2320 rawtext,
2310 2321 transaction,
2311 2322 link,
2312 2323 p1,
2313 2324 p2,
2314 2325 flags,
2315 2326 cachedelta,
2316 2327 deltacomputer=deltacomputer,
2317 2328 sidedata=sidedata,
2318 2329 )
2319 2330
2320 2331 def compress(self, data):
2321 2332 """Generate a possibly-compressed representation of data."""
2322 2333 if not data:
2323 2334 return b'', data
2324 2335
2325 2336 compressed = self._compressor.compress(data)
2326 2337
2327 2338 if compressed:
2328 2339 # The revlog compressor added the header in the returned data.
2329 2340 return b'', compressed
2330 2341
2331 2342 if data[0:1] == b'\0':
2332 2343 return b'', data
2333 2344 return b'u', data
2334 2345
2335 2346 def decompress(self, data):
2336 2347 """Decompress a revlog chunk.
2337 2348
2338 2349 The chunk is expected to begin with a header identifying the
2339 2350 format type so it can be routed to an appropriate decompressor.
2340 2351 """
2341 2352 if not data:
2342 2353 return data
2343 2354
2344 2355 # Revlogs are read much more frequently than they are written and many
2345 2356 # chunks only take microseconds to decompress, so performance is
2346 2357 # important here.
2347 2358 #
2348 2359 # We can make a few assumptions about revlogs:
2349 2360 #
2350 2361 # 1) the majority of chunks will be compressed (as opposed to inline
2351 2362 # raw data).
2352 2363 # 2) decompressing *any* data will likely by at least 10x slower than
2353 2364 # returning raw inline data.
2354 2365 # 3) we want to prioritize common and officially supported compression
2355 2366 # engines
2356 2367 #
2357 2368 # It follows that we want to optimize for "decompress compressed data
2358 2369 # when encoded with common and officially supported compression engines"
2359 2370 # case over "raw data" and "data encoded by less common or non-official
2360 2371 # compression engines." That is why we have the inline lookup first
2361 2372 # followed by the compengines lookup.
2362 2373 #
2363 2374 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2364 2375 # compressed chunks. And this matters for changelog and manifest reads.
2365 2376 t = data[0:1]
2366 2377
2367 2378 if t == b'x':
2368 2379 try:
2369 2380 return _zlibdecompress(data)
2370 2381 except zlib.error as e:
2371 2382 raise error.RevlogError(
2372 2383 _(b'revlog decompress error: %s')
2373 2384 % stringutil.forcebytestr(e)
2374 2385 )
2375 2386 # '\0' is more common than 'u' so it goes first.
2376 2387 elif t == b'\0':
2377 2388 return data
2378 2389 elif t == b'u':
2379 2390 return util.buffer(data, 1)
2380 2391
2381 2392 compressor = self._get_decompressor(t)
2382 2393
2383 2394 return compressor.decompress(data)
2384 2395
2385 2396 def _addrevision(
2386 2397 self,
2387 2398 node,
2388 2399 rawtext,
2389 2400 transaction,
2390 2401 link,
2391 2402 p1,
2392 2403 p2,
2393 2404 flags,
2394 2405 cachedelta,
2395 2406 alwayscache=False,
2396 2407 deltacomputer=None,
2397 2408 sidedata=None,
2398 2409 ):
2399 2410 """internal function to add revisions to the log
2400 2411
2401 2412 see addrevision for argument descriptions.
2402 2413
2403 2414 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2404 2415
2405 2416 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2406 2417 be used.
2407 2418
2408 2419 invariants:
2409 2420 - rawtext is optional (can be None); if not set, cachedelta must be set.
2410 2421 if both are set, they must correspond to each other.
2411 2422 """
2412 2423 if node == self.nullid:
2413 2424 raise error.RevlogError(
2414 2425 _(b"%s: attempt to add null revision") % self.display_id
2415 2426 )
2416 2427 if (
2417 2428 node == self.nodeconstants.wdirid
2418 2429 or node in self.nodeconstants.wdirfilenodeids
2419 2430 ):
2420 2431 raise error.RevlogError(
2421 2432 _(b"%s: attempt to add wdir revision") % self.display_id
2422 2433 )
2423 2434 if self._writinghandles is None:
2424 2435 msg = b'adding revision outside `revlog._writing` context'
2425 2436 raise error.ProgrammingError(msg)
2426 2437
2427 2438 if self._inline:
2428 2439 fh = self._writinghandles[0]
2429 2440 else:
2430 2441 fh = self._writinghandles[1]
2431 2442
2432 2443 btext = [rawtext]
2433 2444
2434 2445 curr = len(self)
2435 2446 prev = curr - 1
2436 2447
2437 2448 offset = self._get_data_offset(prev)
2438 2449
2439 2450 if self._concurrencychecker:
2440 2451 ifh, dfh, sdfh = self._writinghandles
2441 2452 # XXX no checking for the sidedata file
2442 2453 if self._inline:
2443 2454 # offset is "as if" it were in the .d file, so we need to add on
2444 2455 # the size of the entry metadata.
2445 2456 self._concurrencychecker(
2446 2457 ifh, self._indexfile, offset + curr * self.index.entry_size
2447 2458 )
2448 2459 else:
2449 2460 # Entries in the .i are a consistent size.
2450 2461 self._concurrencychecker(
2451 2462 ifh, self._indexfile, curr * self.index.entry_size
2452 2463 )
2453 2464 self._concurrencychecker(dfh, self._datafile, offset)
2454 2465
2455 2466 p1r, p2r = self.rev(p1), self.rev(p2)
2456 2467
2457 2468 # full versions are inserted when the needed deltas
2458 2469 # become comparable to the uncompressed text
2459 2470 if rawtext is None:
2460 2471 # need rawtext size, before changed by flag processors, which is
2461 2472 # the non-raw size. use revlog explicitly to avoid filelog's extra
2462 2473 # logic that might remove metadata size.
2463 2474 textlen = mdiff.patchedsize(
2464 2475 revlog.size(self, cachedelta[0]), cachedelta[1]
2465 2476 )
2466 2477 else:
2467 2478 textlen = len(rawtext)
2468 2479
2469 2480 if deltacomputer is None:
2470 2481 write_debug = None
2471 2482 if self._debug_delta:
2472 2483 write_debug = transaction._report
2473 2484 deltacomputer = deltautil.deltacomputer(
2474 2485 self, write_debug=write_debug
2475 2486 )
2476 2487
2477 2488 if cachedelta is not None and len(cachedelta) == 2:
2478 2489 # If the cached delta has no information about how it should be
2479 2490 # reused, add the default reuse instruction according to the
2480 2491 # revlog's configuration.
2481 2492 if self._generaldelta and self._lazydeltabase:
2482 2493 delta_base_reuse = DELTA_BASE_REUSE_TRY
2483 2494 else:
2484 2495 delta_base_reuse = DELTA_BASE_REUSE_NO
2485 2496 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2486 2497
2487 2498 revinfo = revlogutils.revisioninfo(
2488 2499 node,
2489 2500 p1,
2490 2501 p2,
2491 2502 btext,
2492 2503 textlen,
2493 2504 cachedelta,
2494 2505 flags,
2495 2506 )
2496 2507
2497 2508 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2498 2509
2499 2510 compression_mode = COMP_MODE_INLINE
2500 2511 if self._docket is not None:
2501 2512 default_comp = self._docket.default_compression_header
2502 2513 r = deltautil.delta_compression(default_comp, deltainfo)
2503 2514 compression_mode, deltainfo = r
2504 2515
2505 2516 sidedata_compression_mode = COMP_MODE_INLINE
2506 2517 if sidedata and self.hassidedata:
2507 2518 sidedata_compression_mode = COMP_MODE_PLAIN
2508 2519 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2509 2520 sidedata_offset = self._docket.sidedata_end
2510 2521 h, comp_sidedata = self.compress(serialized_sidedata)
2511 2522 if (
2512 2523 h != b'u'
2513 2524 and comp_sidedata[0:1] != b'\0'
2514 2525 and len(comp_sidedata) < len(serialized_sidedata)
2515 2526 ):
2516 2527 assert not h
2517 2528 if (
2518 2529 comp_sidedata[0:1]
2519 2530 == self._docket.default_compression_header
2520 2531 ):
2521 2532 sidedata_compression_mode = COMP_MODE_DEFAULT
2522 2533 serialized_sidedata = comp_sidedata
2523 2534 else:
2524 2535 sidedata_compression_mode = COMP_MODE_INLINE
2525 2536 serialized_sidedata = comp_sidedata
2526 2537 else:
2527 2538 serialized_sidedata = b""
2528 2539 # Don't store the offset if the sidedata is empty, that way
2529 2540 # we can easily detect empty sidedata and they will be no different
2530 2541 # than ones we manually add.
2531 2542 sidedata_offset = 0
2532 2543
2533 2544 rank = RANK_UNKNOWN
2534 2545 if self._compute_rank:
2535 2546 if (p1r, p2r) == (nullrev, nullrev):
2536 2547 rank = 1
2537 2548 elif p1r != nullrev and p2r == nullrev:
2538 2549 rank = 1 + self.fast_rank(p1r)
2539 2550 elif p1r == nullrev and p2r != nullrev:
2540 2551 rank = 1 + self.fast_rank(p2r)
2541 2552 else: # merge node
2542 2553 if rustdagop is not None and self.index.rust_ext_compat:
2543 2554 rank = rustdagop.rank(self.index, p1r, p2r)
2544 2555 else:
2545 2556 pmin, pmax = sorted((p1r, p2r))
2546 2557 rank = 1 + self.fast_rank(pmax)
2547 2558 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2548 2559
2549 2560 e = revlogutils.entry(
2550 2561 flags=flags,
2551 2562 data_offset=offset,
2552 2563 data_compressed_length=deltainfo.deltalen,
2553 2564 data_uncompressed_length=textlen,
2554 2565 data_compression_mode=compression_mode,
2555 2566 data_delta_base=deltainfo.base,
2556 2567 link_rev=link,
2557 2568 parent_rev_1=p1r,
2558 2569 parent_rev_2=p2r,
2559 2570 node_id=node,
2560 2571 sidedata_offset=sidedata_offset,
2561 2572 sidedata_compressed_length=len(serialized_sidedata),
2562 2573 sidedata_compression_mode=sidedata_compression_mode,
2563 2574 rank=rank,
2564 2575 )
2565 2576
2566 2577 self.index.append(e)
2567 2578 entry = self.index.entry_binary(curr)
2568 2579 if curr == 0 and self._docket is None:
2569 2580 header = self._format_flags | self._format_version
2570 2581 header = self.index.pack_header(header)
2571 2582 entry = header + entry
2572 2583 self._writeentry(
2573 2584 transaction,
2574 2585 entry,
2575 2586 deltainfo.data,
2576 2587 link,
2577 2588 offset,
2578 2589 serialized_sidedata,
2579 2590 sidedata_offset,
2580 2591 )
2581 2592
2582 2593 rawtext = btext[0]
2583 2594
2584 2595 if alwayscache and rawtext is None:
2585 2596 rawtext = deltacomputer.buildtext(revinfo, fh)
2586 2597
2587 2598 if type(rawtext) == bytes: # only accept immutable objects
2588 2599 self._revisioncache = (node, curr, rawtext)
2589 2600 self._chainbasecache[curr] = deltainfo.chainbase
2590 2601 return curr
2591 2602
2592 2603 def _get_data_offset(self, prev):
2593 2604 """Returns the current offset in the (in-transaction) data file.
2594 2605 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2595 2606 file to store that information: since sidedata can be rewritten to the
2596 2607 end of the data file within a transaction, you can have cases where, for
2597 2608 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2598 2609 to `n - 1`'s sidedata being written after `n`'s data.
2599 2610
2600 2611 TODO cache this in a docket file before getting out of experimental."""
2601 2612 if self._docket is None:
2602 2613 return self.end(prev)
2603 2614 else:
2604 2615 return self._docket.data_end
2605 2616
2606 2617 def _writeentry(
2607 2618 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2608 2619 ):
2609 2620 # Files opened in a+ mode have inconsistent behavior on various
2610 2621 # platforms. Windows requires that a file positioning call be made
2611 2622 # when the file handle transitions between reads and writes. See
2612 2623 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2613 2624 # platforms, Python or the platform itself can be buggy. Some versions
2614 2625 # of Solaris have been observed to not append at the end of the file
2615 2626 # if the file was seeked to before the end. See issue4943 for more.
2616 2627 #
2617 2628 # We work around this issue by inserting a seek() before writing.
2618 2629 # Note: This is likely not necessary on Python 3. However, because
2619 2630 # the file handle is reused for reads and may be seeked there, we need
2620 2631 # to be careful before changing this.
2621 2632 if self._writinghandles is None:
2622 2633 msg = b'adding revision outside `revlog._writing` context'
2623 2634 raise error.ProgrammingError(msg)
2624 2635 ifh, dfh, sdfh = self._writinghandles
2625 2636 if self._docket is None:
2626 2637 ifh.seek(0, os.SEEK_END)
2627 2638 else:
2628 2639 ifh.seek(self._docket.index_end, os.SEEK_SET)
2629 2640 if dfh:
2630 2641 if self._docket is None:
2631 2642 dfh.seek(0, os.SEEK_END)
2632 2643 else:
2633 2644 dfh.seek(self._docket.data_end, os.SEEK_SET)
2634 2645 if sdfh:
2635 2646 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2636 2647
2637 2648 curr = len(self) - 1
2638 2649 if not self._inline:
2639 2650 transaction.add(self._datafile, offset)
2640 2651 if self._sidedatafile:
2641 2652 transaction.add(self._sidedatafile, sidedata_offset)
2642 2653 transaction.add(self._indexfile, curr * len(entry))
2643 2654 if data[0]:
2644 2655 dfh.write(data[0])
2645 2656 dfh.write(data[1])
2646 2657 if sidedata:
2647 2658 sdfh.write(sidedata)
2648 2659 ifh.write(entry)
2649 2660 else:
2650 2661 offset += curr * self.index.entry_size
2651 2662 transaction.add(self._indexfile, offset)
2652 2663 ifh.write(entry)
2653 2664 ifh.write(data[0])
2654 2665 ifh.write(data[1])
2655 2666 assert not sidedata
2656 2667 self._enforceinlinesize(transaction)
2657 2668 if self._docket is not None:
2658 2669 # revlog-v2 always has 3 writing handles, help Pytype
2659 2670 wh1 = self._writinghandles[0]
2660 2671 wh2 = self._writinghandles[1]
2661 2672 wh3 = self._writinghandles[2]
2662 2673 assert wh1 is not None
2663 2674 assert wh2 is not None
2664 2675 assert wh3 is not None
2665 2676 self._docket.index_end = wh1.tell()
2666 2677 self._docket.data_end = wh2.tell()
2667 2678 self._docket.sidedata_end = wh3.tell()
2668 2679
2669 2680 nodemaputil.setup_persistent_nodemap(transaction, self)
2670 2681
2671 2682 def addgroup(
2672 2683 self,
2673 2684 deltas,
2674 2685 linkmapper,
2675 2686 transaction,
2676 2687 alwayscache=False,
2677 2688 addrevisioncb=None,
2678 2689 duplicaterevisioncb=None,
2679 2690 debug_info=None,
2680 2691 delta_base_reuse_policy=None,
2681 2692 ):
2682 2693 """
2683 2694 add a delta group
2684 2695
2685 2696 given a set of deltas, add them to the revision log. the
2686 2697 first delta is against its parent, which should be in our
2687 2698 log, the rest are against the previous delta.
2688 2699
2689 2700 If ``addrevisioncb`` is defined, it will be called with arguments of
2690 2701 this revlog and the node that was added.
2691 2702 """
2692 2703
2693 2704 if self._adding_group:
2694 2705 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2695 2706
2696 2707 # read the default delta-base reuse policy from revlog config if the
2697 2708 # group did not specify one.
2698 2709 if delta_base_reuse_policy is None:
2699 2710 if self._generaldelta and self._lazydeltabase:
2700 2711 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2701 2712 else:
2702 2713 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2703 2714
2704 2715 self._adding_group = True
2705 2716 empty = True
2706 2717 try:
2707 2718 with self._writing(transaction):
2708 2719 write_debug = None
2709 2720 if self._debug_delta:
2710 2721 write_debug = transaction._report
2711 2722 deltacomputer = deltautil.deltacomputer(
2712 2723 self,
2713 2724 write_debug=write_debug,
2714 2725 debug_info=debug_info,
2715 2726 )
2716 2727 # loop through our set of deltas
2717 2728 for data in deltas:
2718 2729 (
2719 2730 node,
2720 2731 p1,
2721 2732 p2,
2722 2733 linknode,
2723 2734 deltabase,
2724 2735 delta,
2725 2736 flags,
2726 2737 sidedata,
2727 2738 ) = data
2728 2739 link = linkmapper(linknode)
2729 2740 flags = flags or REVIDX_DEFAULT_FLAGS
2730 2741
2731 2742 rev = self.index.get_rev(node)
2732 2743 if rev is not None:
2733 2744 # this can happen if two branches make the same change
2734 2745 self._nodeduplicatecallback(transaction, rev)
2735 2746 if duplicaterevisioncb:
2736 2747 duplicaterevisioncb(self, rev)
2737 2748 empty = False
2738 2749 continue
2739 2750
2740 2751 for p in (p1, p2):
2741 2752 if not self.index.has_node(p):
2742 2753 raise error.LookupError(
2743 2754 p, self.radix, _(b'unknown parent')
2744 2755 )
2745 2756
2746 2757 if not self.index.has_node(deltabase):
2747 2758 raise error.LookupError(
2748 2759 deltabase, self.display_id, _(b'unknown delta base')
2749 2760 )
2750 2761
2751 2762 baserev = self.rev(deltabase)
2752 2763
2753 2764 if baserev != nullrev and self.iscensored(baserev):
2754 2765 # if base is censored, delta must be full replacement in a
2755 2766 # single patch operation
2756 2767 hlen = struct.calcsize(b">lll")
2757 2768 oldlen = self.rawsize(baserev)
2758 2769 newlen = len(delta) - hlen
2759 2770 if delta[:hlen] != mdiff.replacediffheader(
2760 2771 oldlen, newlen
2761 2772 ):
2762 2773 raise error.CensoredBaseError(
2763 2774 self.display_id, self.node(baserev)
2764 2775 )
2765 2776
2766 2777 if not flags and self._peek_iscensored(baserev, delta):
2767 2778 flags |= REVIDX_ISCENSORED
2768 2779
2769 2780 # We assume consumers of addrevisioncb will want to retrieve
2770 2781 # the added revision, which will require a call to
2771 2782 # revision(). revision() will fast path if there is a cache
2772 2783 # hit. So, we tell _addrevision() to always cache in this case.
2773 2784 # We're only using addgroup() in the context of changegroup
2774 2785 # generation so the revision data can always be handled as raw
2775 2786 # by the flagprocessor.
2776 2787 rev = self._addrevision(
2777 2788 node,
2778 2789 None,
2779 2790 transaction,
2780 2791 link,
2781 2792 p1,
2782 2793 p2,
2783 2794 flags,
2784 2795 (baserev, delta, delta_base_reuse_policy),
2785 2796 alwayscache=alwayscache,
2786 2797 deltacomputer=deltacomputer,
2787 2798 sidedata=sidedata,
2788 2799 )
2789 2800
2790 2801 if addrevisioncb:
2791 2802 addrevisioncb(self, rev)
2792 2803 empty = False
2793 2804 finally:
2794 2805 self._adding_group = False
2795 2806 return not empty
2796 2807
2797 2808 def iscensored(self, rev):
2798 2809 """Check if a file revision is censored."""
2799 2810 if not self._censorable:
2800 2811 return False
2801 2812
2802 2813 return self.flags(rev) & REVIDX_ISCENSORED
2803 2814
2804 2815 def _peek_iscensored(self, baserev, delta):
2805 2816 """Quickly check if a delta produces a censored revision."""
2806 2817 if not self._censorable:
2807 2818 return False
2808 2819
2809 2820 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2810 2821
2811 2822 def getstrippoint(self, minlink):
2812 2823 """find the minimum rev that must be stripped to strip the linkrev
2813 2824
2814 2825 Returns a tuple containing the minimum rev and a set of all revs that
2815 2826 have linkrevs that will be broken by this strip.
2816 2827 """
2817 2828 return storageutil.resolvestripinfo(
2818 2829 minlink,
2819 2830 len(self) - 1,
2820 2831 self.headrevs(),
2821 2832 self.linkrev,
2822 2833 self.parentrevs,
2823 2834 )
2824 2835
2825 2836 def strip(self, minlink, transaction):
2826 2837 """truncate the revlog on the first revision with a linkrev >= minlink
2827 2838
2828 2839 This function is called when we're stripping revision minlink and
2829 2840 its descendants from the repository.
2830 2841
2831 2842 We have to remove all revisions with linkrev >= minlink, because
2832 2843 the equivalent changelog revisions will be renumbered after the
2833 2844 strip.
2834 2845
2835 2846 So we truncate the revlog on the first of these revisions, and
2836 2847 trust that the caller has saved the revisions that shouldn't be
2837 2848 removed and that it'll re-add them after this truncation.
2838 2849 """
2839 2850 if len(self) == 0:
2840 2851 return
2841 2852
2842 2853 rev, _ = self.getstrippoint(minlink)
2843 2854 if rev == len(self):
2844 2855 return
2845 2856
2846 2857 # first truncate the files on disk
2847 2858 data_end = self.start(rev)
2848 2859 if not self._inline:
2849 2860 transaction.add(self._datafile, data_end)
2850 2861 end = rev * self.index.entry_size
2851 2862 else:
2852 2863 end = data_end + (rev * self.index.entry_size)
2853 2864
2854 2865 if self._sidedatafile:
2855 2866 sidedata_end = self.sidedata_cut_off(rev)
2856 2867 transaction.add(self._sidedatafile, sidedata_end)
2857 2868
2858 2869 transaction.add(self._indexfile, end)
2859 2870 if self._docket is not None:
2860 2871 # XXX we could, leverage the docket while stripping. However it is
2861 2872 # not powerfull enough at the time of this comment
2862 2873 self._docket.index_end = end
2863 2874 self._docket.data_end = data_end
2864 2875 self._docket.sidedata_end = sidedata_end
2865 2876 self._docket.write(transaction, stripping=True)
2866 2877
2867 2878 # then reset internal state in memory to forget those revisions
2868 2879 self._revisioncache = None
2869 2880 self._chaininfocache = util.lrucachedict(500)
2870 2881 self._segmentfile.clear_cache()
2871 2882 self._segmentfile_sidedata.clear_cache()
2872 2883
2873 2884 del self.index[rev:-1]
2874 2885
2875 2886 def checksize(self):
2876 2887 """Check size of index and data files
2877 2888
2878 2889 return a (dd, di) tuple.
2879 2890 - dd: extra bytes for the "data" file
2880 2891 - di: extra bytes for the "index" file
2881 2892
2882 2893 A healthy revlog will return (0, 0).
2883 2894 """
2884 2895 expected = 0
2885 2896 if len(self):
2886 2897 expected = max(0, self.end(len(self) - 1))
2887 2898
2888 2899 try:
2889 2900 with self._datafp() as f:
2890 2901 f.seek(0, io.SEEK_END)
2891 2902 actual = f.tell()
2892 2903 dd = actual - expected
2893 2904 except FileNotFoundError:
2894 2905 dd = 0
2895 2906
2896 2907 try:
2897 2908 f = self.opener(self._indexfile)
2898 2909 f.seek(0, io.SEEK_END)
2899 2910 actual = f.tell()
2900 2911 f.close()
2901 2912 s = self.index.entry_size
2902 2913 i = max(0, actual // s)
2903 2914 di = actual - (i * s)
2904 2915 if self._inline:
2905 2916 databytes = 0
2906 2917 for r in self:
2907 2918 databytes += max(0, self.length(r))
2908 2919 dd = 0
2909 2920 di = actual - len(self) * s - databytes
2910 2921 except FileNotFoundError:
2911 2922 di = 0
2912 2923
2913 2924 return (dd, di)
2914 2925
2915 2926 def files(self):
2916 2927 res = [self._indexfile]
2917 2928 if self._docket_file is None:
2918 2929 if not self._inline:
2919 2930 res.append(self._datafile)
2920 2931 else:
2921 2932 res.append(self._docket_file)
2922 2933 res.extend(self._docket.old_index_filepaths(include_empty=False))
2923 2934 if self._docket.data_end:
2924 2935 res.append(self._datafile)
2925 2936 res.extend(self._docket.old_data_filepaths(include_empty=False))
2926 2937 if self._docket.sidedata_end:
2927 2938 res.append(self._sidedatafile)
2928 2939 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2929 2940 return res
2930 2941
2931 2942 def emitrevisions(
2932 2943 self,
2933 2944 nodes,
2934 2945 nodesorder=None,
2935 2946 revisiondata=False,
2936 2947 assumehaveparentrevisions=False,
2937 2948 deltamode=repository.CG_DELTAMODE_STD,
2938 2949 sidedata_helpers=None,
2939 2950 debug_info=None,
2940 2951 ):
2941 2952 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2942 2953 raise error.ProgrammingError(
2943 2954 b'unhandled value for nodesorder: %s' % nodesorder
2944 2955 )
2945 2956
2946 2957 if nodesorder is None and not self._generaldelta:
2947 2958 nodesorder = b'storage'
2948 2959
2949 2960 if (
2950 2961 not self._storedeltachains
2951 2962 and deltamode != repository.CG_DELTAMODE_PREV
2952 2963 ):
2953 2964 deltamode = repository.CG_DELTAMODE_FULL
2954 2965
2955 2966 return storageutil.emitrevisions(
2956 2967 self,
2957 2968 nodes,
2958 2969 nodesorder,
2959 2970 revlogrevisiondelta,
2960 2971 deltaparentfn=self.deltaparent,
2961 2972 candeltafn=self.candelta,
2962 2973 rawsizefn=self.rawsize,
2963 2974 revdifffn=self.revdiff,
2964 2975 flagsfn=self.flags,
2965 2976 deltamode=deltamode,
2966 2977 revisiondata=revisiondata,
2967 2978 assumehaveparentrevisions=assumehaveparentrevisions,
2968 2979 sidedata_helpers=sidedata_helpers,
2969 2980 debug_info=debug_info,
2970 2981 )
2971 2982
2972 2983 DELTAREUSEALWAYS = b'always'
2973 2984 DELTAREUSESAMEREVS = b'samerevs'
2974 2985 DELTAREUSENEVER = b'never'
2975 2986
2976 2987 DELTAREUSEFULLADD = b'fulladd'
2977 2988
2978 2989 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2979 2990
2980 2991 def clone(
2981 2992 self,
2982 2993 tr,
2983 2994 destrevlog,
2984 2995 addrevisioncb=None,
2985 2996 deltareuse=DELTAREUSESAMEREVS,
2986 2997 forcedeltabothparents=None,
2987 2998 sidedata_helpers=None,
2988 2999 ):
2989 3000 """Copy this revlog to another, possibly with format changes.
2990 3001
2991 3002 The destination revlog will contain the same revisions and nodes.
2992 3003 However, it may not be bit-for-bit identical due to e.g. delta encoding
2993 3004 differences.
2994 3005
2995 3006 The ``deltareuse`` argument control how deltas from the existing revlog
2996 3007 are preserved in the destination revlog. The argument can have the
2997 3008 following values:
2998 3009
2999 3010 DELTAREUSEALWAYS
3000 3011 Deltas will always be reused (if possible), even if the destination
3001 3012 revlog would not select the same revisions for the delta. This is the
3002 3013 fastest mode of operation.
3003 3014 DELTAREUSESAMEREVS
3004 3015 Deltas will be reused if the destination revlog would pick the same
3005 3016 revisions for the delta. This mode strikes a balance between speed
3006 3017 and optimization.
3007 3018 DELTAREUSENEVER
3008 3019 Deltas will never be reused. This is the slowest mode of execution.
3009 3020 This mode can be used to recompute deltas (e.g. if the diff/delta
3010 3021 algorithm changes).
3011 3022 DELTAREUSEFULLADD
3012 3023 Revision will be re-added as if their were new content. This is
3013 3024 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3014 3025 eg: large file detection and handling.
3015 3026
3016 3027 Delta computation can be slow, so the choice of delta reuse policy can
3017 3028 significantly affect run time.
3018 3029
3019 3030 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3020 3031 two extremes. Deltas will be reused if they are appropriate. But if the
3021 3032 delta could choose a better revision, it will do so. This means if you
3022 3033 are converting a non-generaldelta revlog to a generaldelta revlog,
3023 3034 deltas will be recomputed if the delta's parent isn't a parent of the
3024 3035 revision.
3025 3036
3026 3037 In addition to the delta policy, the ``forcedeltabothparents``
3027 3038 argument controls whether to force compute deltas against both parents
3028 3039 for merges. By default, the current default is used.
3029 3040
3030 3041 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3031 3042 `sidedata_helpers`.
3032 3043 """
3033 3044 if deltareuse not in self.DELTAREUSEALL:
3034 3045 raise ValueError(
3035 3046 _(b'value for deltareuse invalid: %s') % deltareuse
3036 3047 )
3037 3048
3038 3049 if len(destrevlog):
3039 3050 raise ValueError(_(b'destination revlog is not empty'))
3040 3051
3041 3052 if getattr(self, 'filteredrevs', None):
3042 3053 raise ValueError(_(b'source revlog has filtered revisions'))
3043 3054 if getattr(destrevlog, 'filteredrevs', None):
3044 3055 raise ValueError(_(b'destination revlog has filtered revisions'))
3045 3056
3046 3057 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3047 3058 # if possible.
3048 3059 oldlazydelta = destrevlog._lazydelta
3049 3060 oldlazydeltabase = destrevlog._lazydeltabase
3050 3061 oldamd = destrevlog._deltabothparents
3051 3062
3052 3063 try:
3053 3064 if deltareuse == self.DELTAREUSEALWAYS:
3054 3065 destrevlog._lazydeltabase = True
3055 3066 destrevlog._lazydelta = True
3056 3067 elif deltareuse == self.DELTAREUSESAMEREVS:
3057 3068 destrevlog._lazydeltabase = False
3058 3069 destrevlog._lazydelta = True
3059 3070 elif deltareuse == self.DELTAREUSENEVER:
3060 3071 destrevlog._lazydeltabase = False
3061 3072 destrevlog._lazydelta = False
3062 3073
3063 3074 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3064 3075
3065 3076 self._clone(
3066 3077 tr,
3067 3078 destrevlog,
3068 3079 addrevisioncb,
3069 3080 deltareuse,
3070 3081 forcedeltabothparents,
3071 3082 sidedata_helpers,
3072 3083 )
3073 3084
3074 3085 finally:
3075 3086 destrevlog._lazydelta = oldlazydelta
3076 3087 destrevlog._lazydeltabase = oldlazydeltabase
3077 3088 destrevlog._deltabothparents = oldamd
3078 3089
3079 3090 def _clone(
3080 3091 self,
3081 3092 tr,
3082 3093 destrevlog,
3083 3094 addrevisioncb,
3084 3095 deltareuse,
3085 3096 forcedeltabothparents,
3086 3097 sidedata_helpers,
3087 3098 ):
3088 3099 """perform the core duty of `revlog.clone` after parameter processing"""
3089 3100 write_debug = None
3090 3101 if self._debug_delta:
3091 3102 write_debug = tr._report
3092 3103 deltacomputer = deltautil.deltacomputer(
3093 3104 destrevlog,
3094 3105 write_debug=write_debug,
3095 3106 )
3096 3107 index = self.index
3097 3108 for rev in self:
3098 3109 entry = index[rev]
3099 3110
3100 3111 # Some classes override linkrev to take filtered revs into
3101 3112 # account. Use raw entry from index.
3102 3113 flags = entry[0] & 0xFFFF
3103 3114 linkrev = entry[4]
3104 3115 p1 = index[entry[5]][7]
3105 3116 p2 = index[entry[6]][7]
3106 3117 node = entry[7]
3107 3118
3108 3119 # (Possibly) reuse the delta from the revlog if allowed and
3109 3120 # the revlog chunk is a delta.
3110 3121 cachedelta = None
3111 3122 rawtext = None
3112 3123 if deltareuse == self.DELTAREUSEFULLADD:
3113 3124 text = self._revisiondata(rev)
3114 3125 sidedata = self.sidedata(rev)
3115 3126
3116 3127 if sidedata_helpers is not None:
3117 3128 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3118 3129 self, sidedata_helpers, sidedata, rev
3119 3130 )
3120 3131 flags = flags | new_flags[0] & ~new_flags[1]
3121 3132
3122 3133 destrevlog.addrevision(
3123 3134 text,
3124 3135 tr,
3125 3136 linkrev,
3126 3137 p1,
3127 3138 p2,
3128 3139 cachedelta=cachedelta,
3129 3140 node=node,
3130 3141 flags=flags,
3131 3142 deltacomputer=deltacomputer,
3132 3143 sidedata=sidedata,
3133 3144 )
3134 3145 else:
3135 3146 if destrevlog._lazydelta:
3136 3147 dp = self.deltaparent(rev)
3137 3148 if dp != nullrev:
3138 3149 cachedelta = (dp, bytes(self._chunk(rev)))
3139 3150
3140 3151 sidedata = None
3141 3152 if not cachedelta:
3142 3153 rawtext = self._revisiondata(rev)
3143 3154 sidedata = self.sidedata(rev)
3144 3155 if sidedata is None:
3145 3156 sidedata = self.sidedata(rev)
3146 3157
3147 3158 if sidedata_helpers is not None:
3148 3159 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3149 3160 self, sidedata_helpers, sidedata, rev
3150 3161 )
3151 3162 flags = flags | new_flags[0] & ~new_flags[1]
3152 3163
3153 3164 with destrevlog._writing(tr):
3154 3165 destrevlog._addrevision(
3155 3166 node,
3156 3167 rawtext,
3157 3168 tr,
3158 3169 linkrev,
3159 3170 p1,
3160 3171 p2,
3161 3172 flags,
3162 3173 cachedelta,
3163 3174 deltacomputer=deltacomputer,
3164 3175 sidedata=sidedata,
3165 3176 )
3166 3177
3167 3178 if addrevisioncb:
3168 3179 addrevisioncb(self, rev, node)
3169 3180
3170 3181 def censorrevision(self, tr, censornode, tombstone=b''):
3171 3182 if self._format_version == REVLOGV0:
3172 3183 raise error.RevlogError(
3173 3184 _(b'cannot censor with version %d revlogs')
3174 3185 % self._format_version
3175 3186 )
3176 3187 elif self._format_version == REVLOGV1:
3177 3188 rewrite.v1_censor(self, tr, censornode, tombstone)
3178 3189 else:
3179 3190 rewrite.v2_censor(self, tr, censornode, tombstone)
3180 3191
3181 3192 def verifyintegrity(self, state):
3182 3193 """Verifies the integrity of the revlog.
3183 3194
3184 3195 Yields ``revlogproblem`` instances describing problems that are
3185 3196 found.
3186 3197 """
3187 3198 dd, di = self.checksize()
3188 3199 if dd:
3189 3200 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3190 3201 if di:
3191 3202 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3192 3203
3193 3204 version = self._format_version
3194 3205
3195 3206 # The verifier tells us what version revlog we should be.
3196 3207 if version != state[b'expectedversion']:
3197 3208 yield revlogproblem(
3198 3209 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3199 3210 % (self.display_id, version, state[b'expectedversion'])
3200 3211 )
3201 3212
3202 3213 state[b'skipread'] = set()
3203 3214 state[b'safe_renamed'] = set()
3204 3215
3205 3216 for rev in self:
3206 3217 node = self.node(rev)
3207 3218
3208 3219 # Verify contents. 4 cases to care about:
3209 3220 #
3210 3221 # common: the most common case
3211 3222 # rename: with a rename
3212 3223 # meta: file content starts with b'\1\n', the metadata
3213 3224 # header defined in filelog.py, but without a rename
3214 3225 # ext: content stored externally
3215 3226 #
3216 3227 # More formally, their differences are shown below:
3217 3228 #
3218 3229 # | common | rename | meta | ext
3219 3230 # -------------------------------------------------------
3220 3231 # flags() | 0 | 0 | 0 | not 0
3221 3232 # renamed() | False | True | False | ?
3222 3233 # rawtext[0:2]=='\1\n'| False | True | True | ?
3223 3234 #
3224 3235 # "rawtext" means the raw text stored in revlog data, which
3225 3236 # could be retrieved by "rawdata(rev)". "text"
3226 3237 # mentioned below is "revision(rev)".
3227 3238 #
3228 3239 # There are 3 different lengths stored physically:
3229 3240 # 1. L1: rawsize, stored in revlog index
3230 3241 # 2. L2: len(rawtext), stored in revlog data
3231 3242 # 3. L3: len(text), stored in revlog data if flags==0, or
3232 3243 # possibly somewhere else if flags!=0
3233 3244 #
3234 3245 # L1 should be equal to L2. L3 could be different from them.
3235 3246 # "text" may or may not affect commit hash depending on flag
3236 3247 # processors (see flagutil.addflagprocessor).
3237 3248 #
3238 3249 # | common | rename | meta | ext
3239 3250 # -------------------------------------------------
3240 3251 # rawsize() | L1 | L1 | L1 | L1
3241 3252 # size() | L1 | L2-LM | L1(*) | L1 (?)
3242 3253 # len(rawtext) | L2 | L2 | L2 | L2
3243 3254 # len(text) | L2 | L2 | L2 | L3
3244 3255 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3245 3256 #
3246 3257 # LM: length of metadata, depending on rawtext
3247 3258 # (*): not ideal, see comment in filelog.size
3248 3259 # (?): could be "- len(meta)" if the resolved content has
3249 3260 # rename metadata
3250 3261 #
3251 3262 # Checks needed to be done:
3252 3263 # 1. length check: L1 == L2, in all cases.
3253 3264 # 2. hash check: depending on flag processor, we may need to
3254 3265 # use either "text" (external), or "rawtext" (in revlog).
3255 3266
3256 3267 try:
3257 3268 skipflags = state.get(b'skipflags', 0)
3258 3269 if skipflags:
3259 3270 skipflags &= self.flags(rev)
3260 3271
3261 3272 _verify_revision(self, skipflags, state, node)
3262 3273
3263 3274 l1 = self.rawsize(rev)
3264 3275 l2 = len(self.rawdata(node))
3265 3276
3266 3277 if l1 != l2:
3267 3278 yield revlogproblem(
3268 3279 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3269 3280 node=node,
3270 3281 )
3271 3282
3272 3283 except error.CensoredNodeError:
3273 3284 if state[b'erroroncensored']:
3274 3285 yield revlogproblem(
3275 3286 error=_(b'censored file data'), node=node
3276 3287 )
3277 3288 state[b'skipread'].add(node)
3278 3289 except Exception as e:
3279 3290 yield revlogproblem(
3280 3291 error=_(b'unpacking %s: %s')
3281 3292 % (short(node), stringutil.forcebytestr(e)),
3282 3293 node=node,
3283 3294 )
3284 3295 state[b'skipread'].add(node)
3285 3296
3286 3297 def storageinfo(
3287 3298 self,
3288 3299 exclusivefiles=False,
3289 3300 sharedfiles=False,
3290 3301 revisionscount=False,
3291 3302 trackedsize=False,
3292 3303 storedsize=False,
3293 3304 ):
3294 3305 d = {}
3295 3306
3296 3307 if exclusivefiles:
3297 3308 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3298 3309 if not self._inline:
3299 3310 d[b'exclusivefiles'].append((self.opener, self._datafile))
3300 3311
3301 3312 if sharedfiles:
3302 3313 d[b'sharedfiles'] = []
3303 3314
3304 3315 if revisionscount:
3305 3316 d[b'revisionscount'] = len(self)
3306 3317
3307 3318 if trackedsize:
3308 3319 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3309 3320
3310 3321 if storedsize:
3311 3322 d[b'storedsize'] = sum(
3312 3323 self.opener.stat(path).st_size for path in self.files()
3313 3324 )
3314 3325
3315 3326 return d
3316 3327
3317 3328 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3318 3329 if not self.hassidedata:
3319 3330 return
3320 3331 # revlog formats with sidedata support does not support inline
3321 3332 assert not self._inline
3322 3333 if not helpers[1] and not helpers[2]:
3323 3334 # Nothing to generate or remove
3324 3335 return
3325 3336
3326 3337 new_entries = []
3327 3338 # append the new sidedata
3328 3339 with self._writing(transaction):
3329 3340 ifh, dfh, sdfh = self._writinghandles
3330 3341 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3331 3342
3332 3343 current_offset = sdfh.tell()
3333 3344 for rev in range(startrev, endrev + 1):
3334 3345 entry = self.index[rev]
3335 3346 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3336 3347 store=self,
3337 3348 sidedata_helpers=helpers,
3338 3349 sidedata={},
3339 3350 rev=rev,
3340 3351 )
3341 3352
3342 3353 serialized_sidedata = sidedatautil.serialize_sidedata(
3343 3354 new_sidedata
3344 3355 )
3345 3356
3346 3357 sidedata_compression_mode = COMP_MODE_INLINE
3347 3358 if serialized_sidedata and self.hassidedata:
3348 3359 sidedata_compression_mode = COMP_MODE_PLAIN
3349 3360 h, comp_sidedata = self.compress(serialized_sidedata)
3350 3361 if (
3351 3362 h != b'u'
3352 3363 and comp_sidedata[0] != b'\0'
3353 3364 and len(comp_sidedata) < len(serialized_sidedata)
3354 3365 ):
3355 3366 assert not h
3356 3367 if (
3357 3368 comp_sidedata[0]
3358 3369 == self._docket.default_compression_header
3359 3370 ):
3360 3371 sidedata_compression_mode = COMP_MODE_DEFAULT
3361 3372 serialized_sidedata = comp_sidedata
3362 3373 else:
3363 3374 sidedata_compression_mode = COMP_MODE_INLINE
3364 3375 serialized_sidedata = comp_sidedata
3365 3376 if entry[8] != 0 or entry[9] != 0:
3366 3377 # rewriting entries that already have sidedata is not
3367 3378 # supported yet, because it introduces garbage data in the
3368 3379 # revlog.
3369 3380 msg = b"rewriting existing sidedata is not supported yet"
3370 3381 raise error.Abort(msg)
3371 3382
3372 3383 # Apply (potential) flags to add and to remove after running
3373 3384 # the sidedata helpers
3374 3385 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3375 3386 entry_update = (
3376 3387 current_offset,
3377 3388 len(serialized_sidedata),
3378 3389 new_offset_flags,
3379 3390 sidedata_compression_mode,
3380 3391 )
3381 3392
3382 3393 # the sidedata computation might have move the file cursors around
3383 3394 sdfh.seek(current_offset, os.SEEK_SET)
3384 3395 sdfh.write(serialized_sidedata)
3385 3396 new_entries.append(entry_update)
3386 3397 current_offset += len(serialized_sidedata)
3387 3398 self._docket.sidedata_end = sdfh.tell()
3388 3399
3389 3400 # rewrite the new index entries
3390 3401 ifh.seek(startrev * self.index.entry_size)
3391 3402 for i, e in enumerate(new_entries):
3392 3403 rev = startrev + i
3393 3404 self.index.replace_sidedata_info(rev, *e)
3394 3405 packed = self.index.entry_binary(rev)
3395 3406 if rev == 0 and self._docket is None:
3396 3407 header = self._format_flags | self._format_version
3397 3408 header = self.index.pack_header(header)
3398 3409 packed = header + packed
3399 3410 ifh.write(packed)
@@ -1,757 +1,751 b''
1 1 #require no-reposimplestore no-chg
2 2
3 3 Set up a server
4 4
5 5 $ hg init server
6 6 $ cd server
7 7 $ cat >> .hg/hgrc << EOF
8 8 > [extensions]
9 9 > clonebundles =
10 10 > EOF
11 11
12 12 $ touch foo
13 13 $ hg -q commit -A -m 'add foo'
14 14 $ touch bar
15 15 $ hg -q commit -A -m 'add bar'
16 16
17 17 $ hg serve -d -p $HGPORT --pid-file hg.pid --accesslog access.log
18 18 $ cat hg.pid >> $DAEMON_PIDS
19 19 $ cd ..
20 20
21 21 Missing manifest should not result in server lookup
22 22
23 23 $ hg --verbose clone -U http://localhost:$HGPORT no-manifest
24 24 requesting all changes
25 25 adding changesets
26 26 adding manifests
27 27 adding file changes
28 28 added 2 changesets with 2 changes to 2 files
29 29 new changesets 53245c60e682:aaff8d2ffbbf
30 30 (sent 3 HTTP requests and * bytes; received * bytes in responses) (glob)
31 31
32 32 $ cat server/access.log
33 33 * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
34 34 $LOCALIP - - [$LOGDATE$] "GET /?cmd=batch HTTP/1.1" 200 - x-hgarg-1:cmds=heads+%3Bknown+nodes%3D x-hgproto-1:0.1 0.2 comp=$USUAL_COMPRESSIONS$ partial-pull (glob)
35 35 $LOCALIP - - [$LOGDATE$] "GET /?cmd=getbundle HTTP/1.1" 200 - x-hgarg-1:bookmarks=1&$USUAL_BUNDLE_CAPS$&cg=1&common=0000000000000000000000000000000000000000&heads=aaff8d2ffbbf07a46dd1f05d8ae7877e3f56e2a2&listkeys=bookmarks&phases=1 x-hgproto-1:0.1 0.2 comp=$USUAL_COMPRESSIONS$ partial-pull (glob)
36 36
37 37 Empty manifest file results in retrieval
38 38 (the extension only checks if the manifest file exists)
39 39
40 40 $ touch server/.hg/clonebundles.manifest
41 41 $ hg --verbose clone -U http://localhost:$HGPORT empty-manifest
42 42 no clone bundles available on remote; falling back to regular clone
43 43 requesting all changes
44 44 adding changesets
45 45 adding manifests
46 46 adding file changes
47 47 added 2 changesets with 2 changes to 2 files
48 48 new changesets 53245c60e682:aaff8d2ffbbf
49 49 (sent 4 HTTP requests and * bytes; received * bytes in responses) (glob)
50 50
51 51 Manifest file with invalid URL aborts
52 52
53 53 $ echo 'http://does.not.exist/bundle.hg' > server/.hg/clonebundles.manifest
54 54 $ hg clone http://localhost:$HGPORT 404-url
55 55 applying clone bundle from http://does.not.exist/bundle.hg
56 56 error fetching bundle: (.* not known|(\[Errno -?\d+] )?([Nn]o address associated with (host)?name|Temporary failure in name resolution|Name does not resolve)) (re) (no-windows !)
57 57 error fetching bundle: [Errno 1100*] getaddrinfo failed (glob) (windows !)
58 58 abort: error applying bundle
59 59 (if this error persists, consider contacting the server operator or disable clone bundles via "--config ui.clonebundles=false")
60 60 [255]
61 61
62 62 Server is not running aborts
63 63
64 64 $ echo "http://localhost:$HGPORT1/bundle.hg" > server/.hg/clonebundles.manifest
65 65 $ hg clone http://localhost:$HGPORT server-not-runner
66 66 applying clone bundle from http://localhost:$HGPORT1/bundle.hg
67 67 error fetching bundle: (.* refused.*|Protocol not supported|(.* )?\$EADDRNOTAVAIL\$|.* No route to host) (re)
68 68 abort: error applying bundle
69 69 (if this error persists, consider contacting the server operator or disable clone bundles via "--config ui.clonebundles=false")
70 70 [255]
71 71
72 72 Server returns 404
73 73
74 74 $ "$PYTHON" $TESTDIR/dumbhttp.py -p $HGPORT1 --pid http.pid
75 75 $ cat http.pid >> $DAEMON_PIDS
76 76 $ hg clone http://localhost:$HGPORT running-404
77 77 applying clone bundle from http://localhost:$HGPORT1/bundle.hg
78 78 HTTP error fetching bundle: HTTP Error 404: File not found
79 79 abort: error applying bundle
80 80 (if this error persists, consider contacting the server operator or disable clone bundles via "--config ui.clonebundles=false")
81 81 [255]
82 82
83 83 We can override failure to fall back to regular clone
84 84
85 85 $ hg --config ui.clonebundlefallback=true clone -U http://localhost:$HGPORT 404-fallback
86 86 applying clone bundle from http://localhost:$HGPORT1/bundle.hg
87 87 HTTP error fetching bundle: HTTP Error 404: File not found
88 88 falling back to normal clone
89 89 requesting all changes
90 90 adding changesets
91 91 adding manifests
92 92 adding file changes
93 93 added 2 changesets with 2 changes to 2 files
94 94 new changesets 53245c60e682:aaff8d2ffbbf
95 95
96 96 Bundle with partial content works
97 97
98 98 $ hg -R server bundle --type gzip-v1 --base null -r 53245c60e682 partial.hg
99 99 1 changesets found
100 100
101 101 We verify exact bundle content as an extra check against accidental future
102 102 changes. If this output changes, we could break old clients.
103 103
104 104 $ f --size --hexdump partial.hg
105 105 partial.hg: size=207
106 106 0000: 48 47 31 30 47 5a 78 9c 63 60 60 98 17 ac 12 93 |HG10GZx.c``.....|
107 107 0010: f0 ac a9 23 45 70 cb bf 0d 5f 59 4e 4a 7f 79 21 |...#Ep..._YNJ.y!|
108 108 0020: 9b cc 40 24 20 a0 d7 ce 2c d1 38 25 cd 24 25 d5 |..@$ ...,.8%.$%.|
109 109 0030: d8 c2 22 cd 38 d9 24 cd 22 d5 c8 22 cd 24 cd 32 |..".8.$."..".$.2|
110 110 0040: d1 c2 d0 c4 c8 d2 32 d1 38 39 29 c9 34 cd d4 80 |......2.89).4...|
111 111 0050: ab 24 b5 b8 84 cb 40 c1 80 2b 2d 3f 9f 8b 2b 31 |.$....@..+-?..+1|
112 112 0060: 25 45 01 c8 80 9a d2 9b 65 fb e5 9e 45 bf 8d 7f |%E......e...E...|
113 113 0070: 9f c6 97 9f 2b 44 34 67 d9 ec 8e 0f a0 92 0b 75 |....+D4g.......u|
114 114 0080: 41 d6 24 59 18 a4 a4 9a a6 18 1a 5b 98 9b 5a 98 |A.$Y.......[..Z.|
115 115 0090: 9a 18 26 9b a6 19 98 1a 99 99 26 a6 18 9a 98 24 |..&.......&....$|
116 116 00a0: 26 59 a6 25 5a 98 a5 18 a6 24 71 41 35 b1 43 dc |&Y.%Z....$qA5.C.|
117 117 00b0: 16 b2 83 f7 e9 45 8b d2 56 c7 a3 1f 82 52 d7 8a |.....E..V....R..|
118 118 00c0: 78 ed fc d5 76 f1 36 35 dc 05 00 36 ed 5e c7 |x...v.65...6.^.|
119 119
120 120 $ echo "http://localhost:$HGPORT1/partial.hg" > server/.hg/clonebundles.manifest
121 121 $ hg clone -U http://localhost:$HGPORT partial-bundle
122 122 applying clone bundle from http://localhost:$HGPORT1/partial.hg
123 123 adding changesets
124 124 adding manifests
125 125 adding file changes
126 126 added 1 changesets with 1 changes to 1 files
127 127 finished applying clone bundle
128 128 searching for changes
129 129 adding changesets
130 130 adding manifests
131 131 adding file changes
132 132 added 1 changesets with 1 changes to 1 files
133 133 new changesets aaff8d2ffbbf
134 134 1 local changesets published
135 135
136 136 Incremental pull doesn't fetch bundle
137 137
138 138 $ hg clone -r 53245c60e682 -U http://localhost:$HGPORT partial-clone
139 139 adding changesets
140 140 adding manifests
141 141 adding file changes
142 142 added 1 changesets with 1 changes to 1 files
143 143 new changesets 53245c60e682
144 144
145 145 $ cd partial-clone
146 146 $ hg pull
147 147 pulling from http://localhost:$HGPORT/
148 148 searching for changes
149 149 adding changesets
150 150 adding manifests
151 151 adding file changes
152 152 added 1 changesets with 1 changes to 1 files
153 153 new changesets aaff8d2ffbbf
154 154 (run 'hg update' to get a working copy)
155 155 $ cd ..
156 156
157 157 Bundle with full content works
158 158
159 159 $ hg -R server bundle --type gzip-v2 --base null -r tip full.hg
160 160 2 changesets found
161 161
162 162 Again, we perform an extra check against bundle content changes. If this content
163 163 changes, clone bundles produced by new Mercurial versions may not be readable
164 164 by old clients.
165 165
166 166 $ f --size --hexdump full.hg
167 167 full.hg: size=442
168 168 0000: 48 47 32 30 00 00 00 0e 43 6f 6d 70 72 65 73 73 |HG20....Compress|
169 169 0010: 69 6f 6e 3d 47 5a 78 9c 63 60 60 d0 e4 76 f6 70 |ion=GZx.c``..v.p|
170 170 0020: f4 73 77 75 0f f2 0f 0d 60 00 02 46 46 76 26 4e |.swu....`..FFv&N|
171 171 0030: c6 b2 d4 a2 e2 cc fc 3c 03 a3 bc a4 e4 8c c4 bc |.......<........|
172 172 0040: f4 d4 62 23 06 06 e6 19 40 f9 4d c1 2a 31 09 cf |..b#....@.M.*1..|
173 173 0050: 9a 3a 52 04 b7 fc db f0 95 e5 a4 f4 97 17 b2 c9 |.:R.............|
174 174 0060: 0c 14 00 02 e6 d9 99 25 1a a7 a4 99 a4 a4 1a 5b |.......%.......[|
175 175 0070: 58 a4 19 27 9b a4 59 a4 1a 59 a4 99 a4 59 26 5a |X..'..Y..Y...Y&Z|
176 176 0080: 18 9a 18 59 5a 26 1a 27 27 25 99 a6 99 1a 70 95 |...YZ&.''%....p.|
177 177 0090: a4 16 97 70 19 28 18 70 a5 e5 e7 73 71 25 a6 a4 |...p.(.p...sq%..|
178 178 00a0: 28 00 19 20 17 af fa df ab ff 7b 3f fb 92 dc 8b |(.. ......{?....|
179 179 00b0: 1f 62 bb 9e b7 d7 d9 87 3d 5a 44 89 2f b0 99 87 |.b......=ZD./...|
180 180 00c0: ec e2 54 63 43 e3 b4 64 43 73 23 33 43 53 0b 63 |..TcC..dCs#3CS.c|
181 181 00d0: d3 14 23 03 a0 fb 2c 2c 0c d3 80 1e 30 49 49 b1 |..#...,,....0II.|
182 182 00e0: 4c 4a 32 48 33 30 b0 34 42 b8 38 29 b1 08 e2 62 |LJ2H30.4B.8)...b|
183 183 00f0: 20 03 6a ca c2 2c db 2f f7 2c fa 6d fc fb 34 be | .j..,./.,.m..4.|
184 184 0100: fc 5c 21 a2 39 cb 66 77 7c 00 0d c3 59 17 14 58 |.\!.9.fw|...Y..X|
185 185 0110: 49 16 06 29 a9 a6 29 86 c6 16 e6 a6 16 a6 26 86 |I..)..).......&.|
186 186 0120: c9 a6 69 06 a6 46 66 a6 89 29 86 26 26 89 49 96 |..i..Ff..).&&.I.|
187 187 0130: 69 89 16 66 29 86 29 49 5c 20 07 3e 16 fe 23 ae |i..f).)I\ .>..#.|
188 188 0140: 26 da 1c ab 10 1f d1 f8 e3 b3 ef cd dd fc 0c 93 |&...............|
189 189 0150: 88 75 34 36 75 04 82 55 17 14 36 a4 38 10 04 d8 |.u46u..U..6.8...|
190 190 0160: 21 01 9a b1 83 f7 e9 45 8b d2 56 c7 a3 1f 82 52 |!......E..V....R|
191 191 0170: d7 8a 78 ed fc d5 76 f1 36 25 81 89 c7 ad ec 90 |..x...v.6%......|
192 192 0180: 54 47 75 2b 89 48 b1 b2 62 c9 89 c9 19 a9 56 45 |TGu+.H..b.....VE|
193 193 0190: a9 65 ba 49 45 89 79 c9 19 ba 60 01 a0 14 23 58 |.e.IE.y...`...#X|
194 194 01a0: 81 35 c8 7d 40 cc 04 e2 a4 a4 a6 25 96 e6 94 60 |.5.}@......%...`|
195 195 01b0: 33 17 5f 54 00 00 d3 1b 0d 4c |3._T.....L|
196 196
197 197 $ echo "http://localhost:$HGPORT1/full.hg" > server/.hg/clonebundles.manifest
198 198 $ hg clone -U http://localhost:$HGPORT full-bundle
199 199 applying clone bundle from http://localhost:$HGPORT1/full.hg
200 200 adding changesets
201 201 adding manifests
202 202 adding file changes
203 203 added 2 changesets with 2 changes to 2 files
204 204 finished applying clone bundle
205 205 searching for changes
206 206 no changes found
207 207 2 local changesets published
208 208
209 209 Feature works over SSH
210 210
211 211 $ hg clone -U ssh://user@dummy/server ssh-full-clone
212 212 applying clone bundle from http://localhost:$HGPORT1/full.hg
213 213 adding changesets
214 214 adding manifests
215 215 adding file changes
216 216 added 2 changesets with 2 changes to 2 files
217 217 finished applying clone bundle
218 218 searching for changes
219 219 no changes found
220 220 2 local changesets published
221 221
222 222 Entry with unknown BUNDLESPEC is filtered and not used
223 223
224 224 $ cat > server/.hg/clonebundles.manifest << EOF
225 225 > http://bad.entry1 BUNDLESPEC=UNKNOWN
226 226 > http://bad.entry2 BUNDLESPEC=xz-v1
227 227 > http://bad.entry3 BUNDLESPEC=none-v100
228 228 > http://localhost:$HGPORT1/full.hg BUNDLESPEC=gzip-v2
229 229 > EOF
230 230
231 231 $ hg clone -U http://localhost:$HGPORT filter-unknown-type
232 232 applying clone bundle from http://localhost:$HGPORT1/full.hg
233 233 adding changesets
234 234 adding manifests
235 235 adding file changes
236 236 added 2 changesets with 2 changes to 2 files
237 237 finished applying clone bundle
238 238 searching for changes
239 239 no changes found
240 240 2 local changesets published
241 241
242 242 Automatic fallback when all entries are filtered
243 243
244 244 $ cat > server/.hg/clonebundles.manifest << EOF
245 245 > http://bad.entry BUNDLESPEC=UNKNOWN
246 246 > EOF
247 247
248 248 $ hg clone -U http://localhost:$HGPORT filter-all
249 249 no compatible clone bundles available on server; falling back to regular clone
250 250 (you may want to report this to the server operator)
251 251 requesting all changes
252 252 adding changesets
253 253 adding manifests
254 254 adding file changes
255 255 added 2 changesets with 2 changes to 2 files
256 256 new changesets 53245c60e682:aaff8d2ffbbf
257 257
258 258 We require a Python version that supports SNI. Therefore, URLs requiring SNI
259 259 are not filtered.
260 260
261 261 $ cp full.hg sni.hg
262 262 $ cat > server/.hg/clonebundles.manifest << EOF
263 263 > http://localhost:$HGPORT1/sni.hg REQUIRESNI=true
264 264 > http://localhost:$HGPORT1/full.hg
265 265 > EOF
266 266
267 267 $ hg clone -U http://localhost:$HGPORT sni-supported
268 268 applying clone bundle from http://localhost:$HGPORT1/sni.hg
269 269 adding changesets
270 270 adding manifests
271 271 adding file changes
272 272 added 2 changesets with 2 changes to 2 files
273 273 finished applying clone bundle
274 274 searching for changes
275 275 no changes found
276 276 2 local changesets published
277 277
278 278 Stream clone bundles are supported
279 279
280 280 $ hg -R server debugcreatestreamclonebundle packed.hg
281 281 writing 613 bytes for 4 files
282 282 bundle requirements: generaldelta, revlogv1, sparserevlog (no-rust no-zstd !)
283 283 bundle requirements: generaldelta, revlog-compression-zstd, revlogv1, sparserevlog (no-rust zstd !)
284 284 bundle requirements: generaldelta, revlog-compression-zstd, revlogv1, sparserevlog (rust !)
285 285
286 286 No bundle spec should work
287 287
288 288 $ cat > server/.hg/clonebundles.manifest << EOF
289 289 > http://localhost:$HGPORT1/packed.hg
290 290 > EOF
291 291
292 292 $ hg clone -U http://localhost:$HGPORT stream-clone-no-spec
293 293 applying clone bundle from http://localhost:$HGPORT1/packed.hg
294 294 4 files to transfer, 613 bytes of data
295 295 transferred 613 bytes in *.* seconds (*) (glob)
296 296 finished applying clone bundle
297 297 searching for changes
298 298 no changes found
299 299
300 300 Bundle spec without parameters should work
301 301
302 302 $ cat > server/.hg/clonebundles.manifest << EOF
303 303 > http://localhost:$HGPORT1/packed.hg BUNDLESPEC=none-packed1
304 304 > EOF
305 305
306 306 $ hg clone -U http://localhost:$HGPORT stream-clone-vanilla-spec
307 307 applying clone bundle from http://localhost:$HGPORT1/packed.hg
308 308 4 files to transfer, 613 bytes of data
309 309 transferred 613 bytes in *.* seconds (*) (glob)
310 310 finished applying clone bundle
311 311 searching for changes
312 312 no changes found
313 313
314 314 Bundle spec with format requirements should work
315 315
316 316 $ cat > server/.hg/clonebundles.manifest << EOF
317 317 > http://localhost:$HGPORT1/packed.hg BUNDLESPEC=none-packed1;requirements%3Drevlogv1
318 318 > EOF
319 319
320 320 $ hg clone -U http://localhost:$HGPORT stream-clone-supported-requirements
321 321 applying clone bundle from http://localhost:$HGPORT1/packed.hg
322 322 4 files to transfer, 613 bytes of data
323 323 transferred 613 bytes in *.* seconds (*) (glob)
324 324 finished applying clone bundle
325 325 searching for changes
326 326 no changes found
327 327
328 328 Stream bundle spec with unknown requirements should be filtered out
329 329
330 330 $ cat > server/.hg/clonebundles.manifest << EOF
331 331 > http://localhost:$HGPORT1/packed.hg BUNDLESPEC=none-packed1;requirements%3Drevlogv42
332 332 > EOF
333 333
334 334 $ hg clone -U http://localhost:$HGPORT stream-clone-unsupported-requirements
335 335 no compatible clone bundles available on server; falling back to regular clone
336 336 (you may want to report this to the server operator)
337 337 requesting all changes
338 338 adding changesets
339 339 adding manifests
340 340 adding file changes
341 341 added 2 changesets with 2 changes to 2 files
342 342 new changesets 53245c60e682:aaff8d2ffbbf
343 343
344 344 Set up manifest for testing preferences
345 345 (Remember, the TYPE does not have to match reality - the URL is
346 346 important)
347 347
348 348 $ cp full.hg gz-a.hg
349 349 $ cp full.hg gz-b.hg
350 350 $ cp full.hg bz2-a.hg
351 351 $ cp full.hg bz2-b.hg
352 352 $ cat > server/.hg/clonebundles.manifest << EOF
353 353 > http://localhost:$HGPORT1/gz-a.hg BUNDLESPEC=gzip-v2 extra=a
354 354 > http://localhost:$HGPORT1/bz2-a.hg BUNDLESPEC=bzip2-v2 extra=a
355 355 > http://localhost:$HGPORT1/gz-b.hg BUNDLESPEC=gzip-v2 extra=b
356 356 > http://localhost:$HGPORT1/bz2-b.hg BUNDLESPEC=bzip2-v2 extra=b
357 357 > EOF
358 358
359 359 Preferring an undefined attribute will take first entry
360 360
361 361 $ hg --config ui.clonebundleprefers=foo=bar clone -U http://localhost:$HGPORT prefer-foo
362 362 applying clone bundle from http://localhost:$HGPORT1/gz-a.hg
363 363 adding changesets
364 364 adding manifests
365 365 adding file changes
366 366 added 2 changesets with 2 changes to 2 files
367 367 finished applying clone bundle
368 368 searching for changes
369 369 no changes found
370 370 2 local changesets published
371 371
372 372 Preferring bz2 type will download first entry of that type
373 373
374 374 $ hg --config ui.clonebundleprefers=COMPRESSION=bzip2 clone -U http://localhost:$HGPORT prefer-bz
375 375 applying clone bundle from http://localhost:$HGPORT1/bz2-a.hg
376 376 adding changesets
377 377 adding manifests
378 378 adding file changes
379 379 added 2 changesets with 2 changes to 2 files
380 380 finished applying clone bundle
381 381 searching for changes
382 382 no changes found
383 383 2 local changesets published
384 384
385 385 Preferring multiple values of an option works
386 386
387 387 $ hg --config ui.clonebundleprefers=COMPRESSION=unknown,COMPRESSION=bzip2 clone -U http://localhost:$HGPORT prefer-multiple-bz
388 388 applying clone bundle from http://localhost:$HGPORT1/bz2-a.hg
389 389 adding changesets
390 390 adding manifests
391 391 adding file changes
392 392 added 2 changesets with 2 changes to 2 files
393 393 finished applying clone bundle
394 394 searching for changes
395 395 no changes found
396 396 2 local changesets published
397 397
398 398 Sorting multiple values should get us back to original first entry
399 399
400 400 $ hg --config ui.clonebundleprefers=BUNDLESPEC=unknown,BUNDLESPEC=gzip-v2,BUNDLESPEC=bzip2-v2 clone -U http://localhost:$HGPORT prefer-multiple-gz
401 401 applying clone bundle from http://localhost:$HGPORT1/gz-a.hg
402 402 adding changesets
403 403 adding manifests
404 404 adding file changes
405 405 added 2 changesets with 2 changes to 2 files
406 406 finished applying clone bundle
407 407 searching for changes
408 408 no changes found
409 409 2 local changesets published
410 410
411 411 Preferring multiple attributes has correct order
412 412
413 413 $ hg --config ui.clonebundleprefers=extra=b,BUNDLESPEC=bzip2-v2 clone -U http://localhost:$HGPORT prefer-separate-attributes
414 414 applying clone bundle from http://localhost:$HGPORT1/bz2-b.hg
415 415 adding changesets
416 416 adding manifests
417 417 adding file changes
418 418 added 2 changesets with 2 changes to 2 files
419 419 finished applying clone bundle
420 420 searching for changes
421 421 no changes found
422 422 2 local changesets published
423 423
424 424 Test where attribute is missing from some entries
425 425
426 426 $ cat > server/.hg/clonebundles.manifest << EOF
427 427 > http://localhost:$HGPORT1/gz-a.hg BUNDLESPEC=gzip-v2
428 428 > http://localhost:$HGPORT1/bz2-a.hg BUNDLESPEC=bzip2-v2
429 429 > http://localhost:$HGPORT1/gz-b.hg BUNDLESPEC=gzip-v2 extra=b
430 430 > http://localhost:$HGPORT1/bz2-b.hg BUNDLESPEC=bzip2-v2 extra=b
431 431 > EOF
432 432
433 433 $ hg --config ui.clonebundleprefers=extra=b clone -U http://localhost:$HGPORT prefer-partially-defined-attribute
434 434 applying clone bundle from http://localhost:$HGPORT1/gz-b.hg
435 435 adding changesets
436 436 adding manifests
437 437 adding file changes
438 438 added 2 changesets with 2 changes to 2 files
439 439 finished applying clone bundle
440 440 searching for changes
441 441 no changes found
442 442 2 local changesets published
443 443
444 444 Test a bad attribute list
445 445
446 446 $ hg --config ui.clonebundleprefers=bad clone -U http://localhost:$HGPORT bad-input
447 447 abort: invalid ui.clonebundleprefers item: bad
448 448 (each comma separated item should be key=value pairs)
449 449 [255]
450 450 $ hg --config ui.clonebundleprefers=key=val,bad,key2=val2 clone \
451 451 > -U http://localhost:$HGPORT bad-input
452 452 abort: invalid ui.clonebundleprefers item: bad
453 453 (each comma separated item should be key=value pairs)
454 454 [255]
455 455
456 456
457 457 Test interaction between clone bundles and --stream
458 458
459 459 A manifest with just a gzip bundle
460 460
461 461 $ cat > server/.hg/clonebundles.manifest << EOF
462 462 > http://localhost:$HGPORT1/gz-a.hg BUNDLESPEC=gzip-v2
463 463 > EOF
464 464
465 465 $ hg clone -U --stream http://localhost:$HGPORT uncompressed-gzip
466 466 no compatible clone bundles available on server; falling back to regular clone
467 467 (you may want to report this to the server operator)
468 468 streaming all changes
469 469 9 files to transfer, 816 bytes of data
470 470 transferred 816 bytes in * seconds (*) (glob)
471 471
472 472 A manifest with a stream clone but no BUNDLESPEC
473 473
474 474 $ cat > server/.hg/clonebundles.manifest << EOF
475 475 > http://localhost:$HGPORT1/packed.hg
476 476 > EOF
477 477
478 478 $ hg clone -U --stream http://localhost:$HGPORT uncompressed-no-bundlespec
479 479 no compatible clone bundles available on server; falling back to regular clone
480 480 (you may want to report this to the server operator)
481 481 streaming all changes
482 482 9 files to transfer, 816 bytes of data
483 483 transferred 816 bytes in * seconds (*) (glob)
484 484
485 485 A manifest with a gzip bundle and a stream clone
486 486
487 487 $ cat > server/.hg/clonebundles.manifest << EOF
488 488 > http://localhost:$HGPORT1/gz-a.hg BUNDLESPEC=gzip-v2
489 489 > http://localhost:$HGPORT1/packed.hg BUNDLESPEC=none-packed1
490 490 > EOF
491 491
492 492 $ hg clone -U --stream http://localhost:$HGPORT uncompressed-gzip-packed
493 493 applying clone bundle from http://localhost:$HGPORT1/packed.hg
494 494 4 files to transfer, 613 bytes of data
495 495 transferred 613 bytes in * seconds (*) (glob)
496 496 finished applying clone bundle
497 497 searching for changes
498 498 no changes found
499 499
500 500 A manifest with a gzip bundle and stream clone with supported requirements
501 501
502 502 $ cat > server/.hg/clonebundles.manifest << EOF
503 503 > http://localhost:$HGPORT1/gz-a.hg BUNDLESPEC=gzip-v2
504 504 > http://localhost:$HGPORT1/packed.hg BUNDLESPEC=none-packed1;requirements%3Drevlogv1
505 505 > EOF
506 506
507 507 $ hg clone -U --stream http://localhost:$HGPORT uncompressed-gzip-packed-requirements
508 508 applying clone bundle from http://localhost:$HGPORT1/packed.hg
509 509 4 files to transfer, 613 bytes of data
510 510 transferred 613 bytes in * seconds (*) (glob)
511 511 finished applying clone bundle
512 512 searching for changes
513 513 no changes found
514 514
515 515 A manifest with a gzip bundle and a stream clone with unsupported requirements
516 516
517 517 $ cat > server/.hg/clonebundles.manifest << EOF
518 518 > http://localhost:$HGPORT1/gz-a.hg BUNDLESPEC=gzip-v2
519 519 > http://localhost:$HGPORT1/packed.hg BUNDLESPEC=none-packed1;requirements%3Drevlogv42
520 520 > EOF
521 521
522 522 $ hg clone -U --stream http://localhost:$HGPORT uncompressed-gzip-packed-unsupported-requirements
523 523 no compatible clone bundles available on server; falling back to regular clone
524 524 (you may want to report this to the server operator)
525 525 streaming all changes
526 526 9 files to transfer, 816 bytes of data
527 527 transferred 816 bytes in * seconds (*) (glob)
528 528
529 529 Test clone bundle retrieved through bundle2
530 530
531 531 $ cat << EOF >> $HGRCPATH
532 532 > [extensions]
533 533 > largefiles=
534 534 > EOF
535 535 $ killdaemons.py
536 536 $ hg -R server serve -d -p $HGPORT --pid-file hg.pid --accesslog access.log
537 537 $ cat hg.pid >> $DAEMON_PIDS
538 538
539 539 $ hg -R server debuglfput gz-a.hg
540 540 1f74b3d08286b9b3a16fb3fa185dd29219cbc6ae
541 541
542 542 $ cat > server/.hg/clonebundles.manifest << EOF
543 543 > largefile://1f74b3d08286b9b3a16fb3fa185dd29219cbc6ae BUNDLESPEC=gzip-v2
544 544 > EOF
545 545
546 546 $ hg clone -U http://localhost:$HGPORT largefile-provided --traceback
547 547 applying clone bundle from largefile://1f74b3d08286b9b3a16fb3fa185dd29219cbc6ae
548 548 adding changesets
549 549 adding manifests
550 550 adding file changes
551 551 added 2 changesets with 2 changes to 2 files
552 552 finished applying clone bundle
553 553 searching for changes
554 554 no changes found
555 555 2 local changesets published
556 556 $ killdaemons.py
557 557
558 558 A manifest with a gzip bundle requiring too much memory for a 16MB system and working
559 559 on a 32MB system.
560 560
561 561 $ "$PYTHON" $TESTDIR/dumbhttp.py -p $HGPORT1 --pid http.pid
562 562 $ cat http.pid >> $DAEMON_PIDS
563 563 $ hg -R server serve -d -p $HGPORT --pid-file hg.pid --accesslog access.log
564 564 $ cat hg.pid >> $DAEMON_PIDS
565 565
566 566 $ cat > server/.hg/clonebundles.manifest << EOF
567 567 > http://localhost:$HGPORT1/gz-a.hg BUNDLESPEC=gzip-v2 REQUIREDRAM=12MB
568 568 > EOF
569 569
570 570 $ hg clone -U --debug --config ui.available-memory=16MB http://localhost:$HGPORT gzip-too-large
571 571 using http://localhost:$HGPORT/
572 572 sending capabilities command
573 573 sending clonebundles command
574 574 filtering http://localhost:$HGPORT1/gz-a.hg as it needs more than 2/3 of system memory
575 575 no compatible clone bundles available on server; falling back to regular clone
576 576 (you may want to report this to the server operator)
577 577 query 1; heads
578 578 sending batch command
579 579 requesting all changes
580 580 sending getbundle command
581 581 bundle2-input-bundle: with-transaction
582 582 bundle2-input-part: "changegroup" (params: 1 mandatory 1 advisory) supported
583 583 adding changesets
584 584 add changeset 53245c60e682
585 585 add changeset aaff8d2ffbbf
586 586 adding manifests
587 587 adding file changes
588 588 adding bar revisions
589 589 adding foo revisions
590 590 bundle2-input-part: total payload size 920
591 591 bundle2-input-part: "listkeys" (params: 1 mandatory) supported
592 592 bundle2-input-part: "phase-heads" supported
593 593 bundle2-input-part: total payload size 24
594 594 bundle2-input-bundle: 3 parts total
595 595 checking for updated bookmarks
596 596 updating the branch cache
597 597 added 2 changesets with 2 changes to 2 files
598 598 new changesets 53245c60e682:aaff8d2ffbbf
599 599 calling hook changegroup.lfiles: hgext.largefiles.reposetup.checkrequireslfiles
600 600 updating the branch cache
601 601 (sent 4 HTTP requests and * bytes; received * bytes in responses) (glob)
602 602
603 603 $ hg clone -U --debug --config ui.available-memory=32MB http://localhost:$HGPORT gzip-too-large2
604 604 using http://localhost:$HGPORT/
605 605 sending capabilities command
606 606 sending clonebundles command
607 607 applying clone bundle from http://localhost:$HGPORT1/gz-a.hg
608 608 bundle2-input-bundle: 1 params with-transaction
609 609 bundle2-input-part: "changegroup" (params: 1 mandatory 1 advisory) supported
610 610 adding changesets
611 611 add changeset 53245c60e682
612 612 add changeset aaff8d2ffbbf
613 613 adding manifests
614 614 adding file changes
615 615 adding bar revisions
616 616 adding foo revisions
617 617 bundle2-input-part: total payload size 920
618 618 bundle2-input-part: "cache:rev-branch-cache" (advisory) supported
619 619 bundle2-input-part: total payload size 59
620 620 bundle2-input-bundle: 2 parts total
621 621 updating the branch cache
622 622 added 2 changesets with 2 changes to 2 files
623 623 finished applying clone bundle
624 624 query 1; heads
625 625 sending batch command
626 626 searching for changes
627 627 all remote heads known locally
628 628 no changes found
629 629 sending getbundle command
630 630 bundle2-input-bundle: with-transaction
631 631 bundle2-input-part: "listkeys" (params: 1 mandatory) supported
632 632 bundle2-input-part: "phase-heads" supported
633 633 bundle2-input-part: total payload size 24
634 634 bundle2-input-bundle: 2 parts total
635 635 checking for updated bookmarks
636 636 2 local changesets published
637 637 calling hook changegroup.lfiles: hgext.largefiles.reposetup.checkrequireslfiles
638 638 updating the branch cache
639 639 (sent 4 HTTP requests and * bytes; received * bytes in responses) (glob)
640 640 $ killdaemons.py
641 641
642 642 Testing a clone bundles that involves revlog splitting (issue6811)
643 643 ==================================================================
644 644
645 645 $ cat >> $HGRCPATH << EOF
646 646 > [format]
647 647 > revlog-compression=none
648 648 > use-persistent-nodemap=no
649 649 > EOF
650 650
651 651 $ hg init server-revlog-split/
652 652 $ cd server-revlog-split
653 653 $ cat >> .hg/hgrc << EOF
654 654 > [extensions]
655 655 > clonebundles =
656 656 > EOF
657 657 $ echo foo > A
658 658 $ hg add A
659 659 $ hg commit -m 'initial commit'
660 660 IMPORTANT: the revlogs must not be split
661 661 $ ls -1 .hg/store/00manifest.*
662 662 .hg/store/00manifest.i
663 663 $ ls -1 .hg/store/data/_a.*
664 664 .hg/store/data/_a.i
665 665
666 666 do big enough update to split the revlogs
667 667
668 668 $ $TESTDIR/seq.py 100000 > A
669 669 $ mkdir foo
670 670 $ cd foo
671 671 $ touch `$TESTDIR/seq.py 10000`
672 672 $ cd ..
673 673 $ hg add -q foo
674 674 $ hg commit -m 'split the manifest and one filelog'
675 675
676 676 IMPORTANT: now the revlogs must be split
677 677 $ ls -1 .hg/store/00manifest.*
678 678 .hg/store/00manifest.d
679 679 .hg/store/00manifest.i
680 680 $ ls -1 .hg/store/data/_a.*
681 681 .hg/store/data/_a.d
682 682 .hg/store/data/_a.i
683 683
684 684 Add an extra commit on top of that
685 685
686 686 $ echo foo >> A
687 687 $ hg commit -m 'one extra commit'
688 688
689 689 $ cd ..
690 690
691 691 Do a bundle that contains the split, but not the update
692 692
693 693 $ hg bundle --exact --rev '::(default~1)' -R server-revlog-split/ --type gzip-v2 split-test.hg
694 694 2 changesets found
695 695
696 696 $ cat > server-revlog-split/.hg/clonebundles.manifest << EOF
697 697 > http://localhost:$HGPORT1/split-test.hg BUNDLESPEC=gzip-v2
698 698 > EOF
699 699
700 700 start the necessary server
701 701
702 702 $ "$PYTHON" $TESTDIR/dumbhttp.py -p $HGPORT1 --pid http.pid
703 703 $ cat http.pid >> $DAEMON_PIDS
704 704 $ hg -R server-revlog-split serve -d -p $HGPORT --pid-file hg.pid --accesslog access.log
705 705 $ cat hg.pid >> $DAEMON_PIDS
706 706
707 707 Check that clone works fine
708 708 ===========================
709 709
710 710 Here, the initial clone will trigger a revlog split (which is a bit clowny it
711 711 itself, but whatever). The split revlogs will see additionnal data added to
712 712 them in the subsequent pull. This should not be a problem
713 713
714 714 $ hg clone http://localhost:$HGPORT revlog-split-in-the-bundle
715 715 applying clone bundle from http://localhost:$HGPORT1/split-test.hg
716 716 adding changesets
717 717 adding manifests
718 718 adding file changes
719 719 added 2 changesets with 10002 changes to 10001 files
720 720 finished applying clone bundle
721 721 searching for changes
722 722 adding changesets
723 723 adding manifests
724 724 adding file changes
725 725 added 1 changesets with 1 changes to 1 files
726 726 new changesets e3879eaa1db7
727 727 2 local changesets published
728 728 updating to branch default
729 729 10001 files updated, 0 files merged, 0 files removed, 0 files unresolved
730 730
731 731 check the results
732 732
733 733 $ cd revlog-split-in-the-bundle
734 734 $ f --size .hg/store/00manifest.*
735 735 .hg/store/00manifest.d: size=499037
736 .hg/store/00manifest.i: size=192 (missing-correct-output !)
737 .hg/store/00manifest.i: size=128 (known-bad-output !)
738 .hg/store/00manifest.i.s: size=64 (known-bad-output !)
736 .hg/store/00manifest.i: size=192
739 737 $ f --size .hg/store/data/_a.*
740 738 .hg/store/data/_a.d: size=588917
741 739 .hg/store/data/_a.i: size=192
742 740
743 741 manifest should work
744 742
745 743 $ hg files -r tip | wc -l
746 \s*10001 (re) (missing-correct-output !)
747 abort: 00manifest@4941afd6b8e298d932227572c5c303cbc14301bd: no node (known-bad-output !)
748 0 (known-bad-output !)
744 \s*10001 (re)
749 745
750 746 file content should work
751 747
752 748 $ hg cat -r tip A | wc -l
753 \s*100001 (re) (missing-correct-output !)
754 abort: 00manifest@4941afd6b8e298d932227572c5c303cbc14301bd: no node (known-bad-output !)
755 0 (known-bad-output !)
749 \s*100001 (re)
756 750
757 751
General Comments 0
You need to be logged in to leave comments. Login now