##// END OF EJS Templates
stream-clone: fix a crash when a repo with an empty revlog is cloned
Arseniy Alekseyev -
r51970:74c004a5 stable
parent child Browse files
Show More
@@ -1,3530 +1,3533 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 CHANGELOGV2,
39 39 COMP_MODE_DEFAULT,
40 40 COMP_MODE_INLINE,
41 41 COMP_MODE_PLAIN,
42 42 DELTA_BASE_REUSE_NO,
43 43 DELTA_BASE_REUSE_TRY,
44 44 ENTRY_RANK,
45 45 FEATURES_BY_VERSION,
46 46 FLAG_GENERALDELTA,
47 47 FLAG_INLINE_DATA,
48 48 INDEX_HEADER,
49 49 KIND_CHANGELOG,
50 50 KIND_FILELOG,
51 51 RANK_UNKNOWN,
52 52 REVLOGV0,
53 53 REVLOGV1,
54 54 REVLOGV1_FLAGS,
55 55 REVLOGV2,
56 56 REVLOGV2_FLAGS,
57 57 REVLOG_DEFAULT_FLAGS,
58 58 REVLOG_DEFAULT_FORMAT,
59 59 REVLOG_DEFAULT_VERSION,
60 60 SUPPORTED_FLAGS,
61 61 )
62 62 from .revlogutils.flagutil import (
63 63 REVIDX_DEFAULT_FLAGS,
64 64 REVIDX_ELLIPSIS,
65 65 REVIDX_EXTSTORED,
66 66 REVIDX_FLAGS_ORDER,
67 67 REVIDX_HASCOPIESINFO,
68 68 REVIDX_ISCENSORED,
69 69 REVIDX_RAWTEXT_CHANGING_FLAGS,
70 70 )
71 71 from .thirdparty import attr
72 72 from . import (
73 73 ancestor,
74 74 dagop,
75 75 error,
76 76 mdiff,
77 77 policy,
78 78 pycompat,
79 79 revlogutils,
80 80 templatefilters,
81 81 util,
82 82 )
83 83 from .interfaces import (
84 84 repository,
85 85 util as interfaceutil,
86 86 )
87 87 from .revlogutils import (
88 88 deltas as deltautil,
89 89 docket as docketutil,
90 90 flagutil,
91 91 nodemap as nodemaputil,
92 92 randomaccessfile,
93 93 revlogv0,
94 94 rewrite,
95 95 sidedata as sidedatautil,
96 96 )
97 97 from .utils import (
98 98 storageutil,
99 99 stringutil,
100 100 )
101 101
102 102 # blanked usage of all the name to prevent pyflakes constraints
103 103 # We need these name available in the module for extensions.
104 104
105 105 REVLOGV0
106 106 REVLOGV1
107 107 REVLOGV2
108 108 CHANGELOGV2
109 109 FLAG_INLINE_DATA
110 110 FLAG_GENERALDELTA
111 111 REVLOG_DEFAULT_FLAGS
112 112 REVLOG_DEFAULT_FORMAT
113 113 REVLOG_DEFAULT_VERSION
114 114 REVLOGV1_FLAGS
115 115 REVLOGV2_FLAGS
116 116 REVIDX_ISCENSORED
117 117 REVIDX_ELLIPSIS
118 118 REVIDX_HASCOPIESINFO
119 119 REVIDX_EXTSTORED
120 120 REVIDX_DEFAULT_FLAGS
121 121 REVIDX_FLAGS_ORDER
122 122 REVIDX_RAWTEXT_CHANGING_FLAGS
123 123
124 124 parsers = policy.importmod('parsers')
125 125 rustancestor = policy.importrust('ancestor')
126 126 rustdagop = policy.importrust('dagop')
127 127 rustrevlog = policy.importrust('revlog')
128 128
129 129 # Aliased for performance.
130 130 _zlibdecompress = zlib.decompress
131 131
132 132 # max size of inline data embedded into a revlog
133 133 _maxinline = 131072
134 134
135 135 # Flag processors for REVIDX_ELLIPSIS.
136 136 def ellipsisreadprocessor(rl, text):
137 137 return text, False
138 138
139 139
140 140 def ellipsiswriteprocessor(rl, text):
141 141 return text, False
142 142
143 143
144 144 def ellipsisrawprocessor(rl, text):
145 145 return False
146 146
147 147
148 148 ellipsisprocessor = (
149 149 ellipsisreadprocessor,
150 150 ellipsiswriteprocessor,
151 151 ellipsisrawprocessor,
152 152 )
153 153
154 154
155 155 def _verify_revision(rl, skipflags, state, node):
156 156 """Verify the integrity of the given revlog ``node`` while providing a hook
157 157 point for extensions to influence the operation."""
158 158 if skipflags:
159 159 state[b'skipread'].add(node)
160 160 else:
161 161 # Side-effect: read content and verify hash.
162 162 rl.revision(node)
163 163
164 164
165 165 # True if a fast implementation for persistent-nodemap is available
166 166 #
167 167 # We also consider we have a "fast" implementation in "pure" python because
168 168 # people using pure don't really have performance consideration (and a
169 169 # wheelbarrow of other slowness source)
170 170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
171 171 parsers, 'BaseIndexObject'
172 172 )
173 173
174 174
175 175 @interfaceutil.implementer(repository.irevisiondelta)
176 176 @attr.s(slots=True)
177 177 class revlogrevisiondelta:
178 178 node = attr.ib()
179 179 p1node = attr.ib()
180 180 p2node = attr.ib()
181 181 basenode = attr.ib()
182 182 flags = attr.ib()
183 183 baserevisionsize = attr.ib()
184 184 revision = attr.ib()
185 185 delta = attr.ib()
186 186 sidedata = attr.ib()
187 187 protocol_flags = attr.ib()
188 188 linknode = attr.ib(default=None)
189 189
190 190
191 191 @interfaceutil.implementer(repository.iverifyproblem)
192 192 @attr.s(frozen=True)
193 193 class revlogproblem:
194 194 warning = attr.ib(default=None)
195 195 error = attr.ib(default=None)
196 196 node = attr.ib(default=None)
197 197
198 198
199 199 def parse_index_v1(data, inline):
200 200 # call the C implementation to parse the index data
201 201 index, cache = parsers.parse_index2(data, inline)
202 202 return index, cache
203 203
204 204
205 205 def parse_index_v2(data, inline):
206 206 # call the C implementation to parse the index data
207 207 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
208 208 return index, cache
209 209
210 210
211 211 def parse_index_cl_v2(data, inline):
212 212 # call the C implementation to parse the index data
213 213 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
214 214 return index, cache
215 215
216 216
217 217 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
218 218
219 219 def parse_index_v1_nodemap(data, inline):
220 220 index, cache = parsers.parse_index_devel_nodemap(data, inline)
221 221 return index, cache
222 222
223 223
224 224 else:
225 225 parse_index_v1_nodemap = None
226 226
227 227
228 228 def parse_index_v1_mixed(data, inline):
229 229 index, cache = parse_index_v1(data, inline)
230 230 return rustrevlog.MixedIndex(index), cache
231 231
232 232
233 233 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
234 234 # signed integer)
235 235 _maxentrysize = 0x7FFFFFFF
236 236
237 237 FILE_TOO_SHORT_MSG = _(
238 238 b'cannot read from revlog %s;'
239 239 b' expected %d bytes from offset %d, data size is %d'
240 240 )
241 241
242 242 hexdigits = b'0123456789abcdefABCDEF'
243 243
244 244
245 245 class revlog:
246 246 """
247 247 the underlying revision storage object
248 248
249 249 A revlog consists of two parts, an index and the revision data.
250 250
251 251 The index is a file with a fixed record size containing
252 252 information on each revision, including its nodeid (hash), the
253 253 nodeids of its parents, the position and offset of its data within
254 254 the data file, and the revision it's based on. Finally, each entry
255 255 contains a linkrev entry that can serve as a pointer to external
256 256 data.
257 257
258 258 The revision data itself is a linear collection of data chunks.
259 259 Each chunk represents a revision and is usually represented as a
260 260 delta against the previous chunk. To bound lookup time, runs of
261 261 deltas are limited to about 2 times the length of the original
262 262 version data. This makes retrieval of a version proportional to
263 263 its size, or O(1) relative to the number of revisions.
264 264
265 265 Both pieces of the revlog are written to in an append-only
266 266 fashion, which means we never need to rewrite a file to insert or
267 267 remove data, and can use some simple techniques to avoid the need
268 268 for locking while reading.
269 269
270 270 If checkambig, indexfile is opened with checkambig=True at
271 271 writing, to avoid file stat ambiguity.
272 272
273 273 If mmaplargeindex is True, and an mmapindexthreshold is set, the
274 274 index will be mmapped rather than read if it is larger than the
275 275 configured threshold.
276 276
277 277 If censorable is True, the revlog can have censored revisions.
278 278
279 279 If `upperboundcomp` is not None, this is the expected maximal gain from
280 280 compression for the data content.
281 281
282 282 `concurrencychecker` is an optional function that receives 3 arguments: a
283 283 file handle, a filename, and an expected position. It should check whether
284 284 the current position in the file handle is valid, and log/warn/fail (by
285 285 raising).
286 286
287 287 See mercurial/revlogutils/contants.py for details about the content of an
288 288 index entry.
289 289 """
290 290
291 291 _flagserrorclass = error.RevlogError
292 292
293 293 @staticmethod
294 294 def is_inline_index(header_bytes):
295 if len(header_bytes) == 0:
296 return True
297
295 298 header = INDEX_HEADER.unpack(header_bytes)[0]
296 299
297 300 _format_flags = header & ~0xFFFF
298 301 _format_version = header & 0xFFFF
299 302
300 303 features = FEATURES_BY_VERSION[_format_version]
301 304 return features[b'inline'](_format_flags)
302 305
303 306 def __init__(
304 307 self,
305 308 opener,
306 309 target,
307 310 radix,
308 311 postfix=None, # only exist for `tmpcensored` now
309 312 checkambig=False,
310 313 mmaplargeindex=False,
311 314 censorable=False,
312 315 upperboundcomp=None,
313 316 persistentnodemap=False,
314 317 concurrencychecker=None,
315 318 trypending=False,
316 319 try_split=False,
317 320 canonical_parent_order=True,
318 321 ):
319 322 """
320 323 create a revlog object
321 324
322 325 opener is a function that abstracts the file opening operation
323 326 and can be used to implement COW semantics or the like.
324 327
325 328 `target`: a (KIND, ID) tuple that identify the content stored in
326 329 this revlog. It help the rest of the code to understand what the revlog
327 330 is about without having to resort to heuristic and index filename
328 331 analysis. Note: that this must be reliably be set by normal code, but
329 332 that test, debug, or performance measurement code might not set this to
330 333 accurate value.
331 334 """
332 335 self.upperboundcomp = upperboundcomp
333 336
334 337 self.radix = radix
335 338
336 339 self._docket_file = None
337 340 self._indexfile = None
338 341 self._datafile = None
339 342 self._sidedatafile = None
340 343 self._nodemap_file = None
341 344 self.postfix = postfix
342 345 self._trypending = trypending
343 346 self._try_split = try_split
344 347 self.opener = opener
345 348 if persistentnodemap:
346 349 self._nodemap_file = nodemaputil.get_nodemap_file(self)
347 350
348 351 assert target[0] in ALL_KINDS
349 352 assert len(target) == 2
350 353 self.target = target
351 354 # When True, indexfile is opened with checkambig=True at writing, to
352 355 # avoid file stat ambiguity.
353 356 self._checkambig = checkambig
354 357 self._mmaplargeindex = mmaplargeindex
355 358 self._censorable = censorable
356 359 # 3-tuple of (node, rev, text) for a raw revision.
357 360 self._revisioncache = None
358 361 # Maps rev to chain base rev.
359 362 self._chainbasecache = util.lrucachedict(100)
360 363 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
361 364 self._chunkcache = (0, b'')
362 365 # How much data to read and cache into the raw revlog data cache.
363 366 self._chunkcachesize = 65536
364 367 self._maxchainlen = None
365 368 self._deltabothparents = True
366 369 self._candidate_group_chunk_size = 0
367 370 self._debug_delta = False
368 371 self.index = None
369 372 self._docket = None
370 373 self._nodemap_docket = None
371 374 # Mapping of partial identifiers to full nodes.
372 375 self._pcache = {}
373 376 # Mapping of revision integer to full node.
374 377 self._compengine = b'zlib'
375 378 self._compengineopts = {}
376 379 self._maxdeltachainspan = -1
377 380 self._withsparseread = False
378 381 self._sparserevlog = False
379 382 self.hassidedata = False
380 383 self._srdensitythreshold = 0.50
381 384 self._srmingapsize = 262144
382 385
383 386 # other optionnals features
384 387
385 388 # might remove rank configuration once the computation has no impact
386 389 self._compute_rank = False
387 390
388 391 # Make copy of flag processors so each revlog instance can support
389 392 # custom flags.
390 393 self._flagprocessors = dict(flagutil.flagprocessors)
391 394
392 395 # 3-tuple of file handles being used for active writing.
393 396 self._writinghandles = None
394 397 # prevent nesting of addgroup
395 398 self._adding_group = None
396 399
397 400 self._loadindex()
398 401
399 402 self._concurrencychecker = concurrencychecker
400 403
401 404 # parent order is supposed to be semantically irrelevant, so we
402 405 # normally resort parents to ensure that the first parent is non-null,
403 406 # if there is a non-null parent at all.
404 407 # filelog abuses the parent order as flag to mark some instances of
405 408 # meta-encoded files, so allow it to disable this behavior.
406 409 self.canonical_parent_order = canonical_parent_order
407 410
408 411 def _init_opts(self):
409 412 """process options (from above/config) to setup associated default revlog mode
410 413
411 414 These values might be affected when actually reading on disk information.
412 415
413 416 The relevant values are returned for use in _loadindex().
414 417
415 418 * newversionflags:
416 419 version header to use if we need to create a new revlog
417 420
418 421 * mmapindexthreshold:
419 422 minimal index size for start to use mmap
420 423
421 424 * force_nodemap:
422 425 force the usage of a "development" version of the nodemap code
423 426 """
424 427 mmapindexthreshold = None
425 428 opts = self.opener.options
426 429
427 430 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
428 431 new_header = CHANGELOGV2
429 432 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
430 433 elif b'revlogv2' in opts:
431 434 new_header = REVLOGV2
432 435 elif b'revlogv1' in opts:
433 436 new_header = REVLOGV1 | FLAG_INLINE_DATA
434 437 if b'generaldelta' in opts:
435 438 new_header |= FLAG_GENERALDELTA
436 439 elif b'revlogv0' in self.opener.options:
437 440 new_header = REVLOGV0
438 441 else:
439 442 new_header = REVLOG_DEFAULT_VERSION
440 443
441 444 if b'chunkcachesize' in opts:
442 445 self._chunkcachesize = opts[b'chunkcachesize']
443 446 if b'maxchainlen' in opts:
444 447 self._maxchainlen = opts[b'maxchainlen']
445 448 if b'deltabothparents' in opts:
446 449 self._deltabothparents = opts[b'deltabothparents']
447 450 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
448 451 if dps_cgds:
449 452 self._candidate_group_chunk_size = dps_cgds
450 453 self._lazydelta = bool(opts.get(b'lazydelta', True))
451 454 self._lazydeltabase = False
452 455 if self._lazydelta:
453 456 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
454 457 if b'debug-delta' in opts:
455 458 self._debug_delta = opts[b'debug-delta']
456 459 if b'compengine' in opts:
457 460 self._compengine = opts[b'compengine']
458 461 if b'zlib.level' in opts:
459 462 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
460 463 if b'zstd.level' in opts:
461 464 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
462 465 if b'maxdeltachainspan' in opts:
463 466 self._maxdeltachainspan = opts[b'maxdeltachainspan']
464 467 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
465 468 mmapindexthreshold = opts[b'mmapindexthreshold']
466 469 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
467 470 withsparseread = bool(opts.get(b'with-sparse-read', False))
468 471 # sparse-revlog forces sparse-read
469 472 self._withsparseread = self._sparserevlog or withsparseread
470 473 if b'sparse-read-density-threshold' in opts:
471 474 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
472 475 if b'sparse-read-min-gap-size' in opts:
473 476 self._srmingapsize = opts[b'sparse-read-min-gap-size']
474 477 if opts.get(b'enableellipsis'):
475 478 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
476 479
477 480 # revlog v0 doesn't have flag processors
478 481 for flag, processor in opts.get(b'flagprocessors', {}).items():
479 482 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
480 483
481 484 if self._chunkcachesize <= 0:
482 485 raise error.RevlogError(
483 486 _(b'revlog chunk cache size %r is not greater than 0')
484 487 % self._chunkcachesize
485 488 )
486 489 elif self._chunkcachesize & (self._chunkcachesize - 1):
487 490 raise error.RevlogError(
488 491 _(b'revlog chunk cache size %r is not a power of 2')
489 492 % self._chunkcachesize
490 493 )
491 494 force_nodemap = opts.get(b'devel-force-nodemap', False)
492 495 return new_header, mmapindexthreshold, force_nodemap
493 496
494 497 def _get_data(self, filepath, mmap_threshold, size=None):
495 498 """return a file content with or without mmap
496 499
497 500 If the file is missing return the empty string"""
498 501 try:
499 502 with self.opener(filepath) as fp:
500 503 if mmap_threshold is not None:
501 504 file_size = self.opener.fstat(fp).st_size
502 505 if file_size >= mmap_threshold:
503 506 if size is not None:
504 507 # avoid potentiel mmap crash
505 508 size = min(file_size, size)
506 509 # TODO: should .close() to release resources without
507 510 # relying on Python GC
508 511 if size is None:
509 512 return util.buffer(util.mmapread(fp))
510 513 else:
511 514 return util.buffer(util.mmapread(fp, size))
512 515 if size is None:
513 516 return fp.read()
514 517 else:
515 518 return fp.read(size)
516 519 except FileNotFoundError:
517 520 return b''
518 521
519 522 def get_streams(self, max_linkrev, force_inline=False):
520 523 n = len(self)
521 524 index = self.index
522 525 while n > 0:
523 526 linkrev = index[n - 1][4]
524 527 if linkrev < max_linkrev:
525 528 break
526 529 # note: this loop will rarely go through multiple iterations, since
527 530 # it only traverses commits created during the current streaming
528 531 # pull operation.
529 532 #
530 533 # If this become a problem, using a binary search should cap the
531 534 # runtime of this.
532 535 n = n - 1
533 536 if n == 0:
534 537 # no data to send
535 538 return []
536 539 index_size = n * index.entry_size
537 540 data_size = self.end(n - 1)
538 541
539 542 # XXX we might have been split (or stripped) since the object
540 543 # initialization, We need to close this race too, but having a way to
541 544 # pre-open the file we feed to the revlog and never closing them before
542 545 # we are done streaming.
543 546
544 547 if self._inline:
545 548
546 549 def get_stream():
547 550 with self._indexfp() as fp:
548 551 yield None
549 552 size = index_size + data_size
550 553 if size <= 65536:
551 554 yield fp.read(size)
552 555 else:
553 556 yield from util.filechunkiter(fp, limit=size)
554 557
555 558 inline_stream = get_stream()
556 559 next(inline_stream)
557 560 return [
558 561 (self._indexfile, inline_stream, index_size + data_size),
559 562 ]
560 563 elif force_inline:
561 564
562 565 def get_stream():
563 566 with self._datafp() as fp_d:
564 567 yield None
565 568
566 569 for rev in range(n):
567 570 idx = self.index.entry_binary(rev)
568 571 if rev == 0 and self._docket is None:
569 572 # re-inject the inline flag
570 573 header = self._format_flags
571 574 header |= self._format_version
572 575 header |= FLAG_INLINE_DATA
573 576 header = self.index.pack_header(header)
574 577 idx = header + idx
575 578 yield idx
576 579 yield self._getsegmentforrevs(rev, rev, df=fp_d)[1]
577 580
578 581 inline_stream = get_stream()
579 582 next(inline_stream)
580 583 return [
581 584 (self._indexfile, inline_stream, index_size + data_size),
582 585 ]
583 586 else:
584 587
585 588 def get_index_stream():
586 589 with self._indexfp() as fp:
587 590 yield None
588 591 if index_size <= 65536:
589 592 yield fp.read(index_size)
590 593 else:
591 594 yield from util.filechunkiter(fp, limit=index_size)
592 595
593 596 def get_data_stream():
594 597 with self._datafp() as fp:
595 598 yield None
596 599 if data_size <= 65536:
597 600 yield fp.read(data_size)
598 601 else:
599 602 yield from util.filechunkiter(fp, limit=data_size)
600 603
601 604 index_stream = get_index_stream()
602 605 next(index_stream)
603 606 data_stream = get_data_stream()
604 607 next(data_stream)
605 608 return [
606 609 (self._datafile, data_stream, data_size),
607 610 (self._indexfile, index_stream, index_size),
608 611 ]
609 612
610 613 def _loadindex(self, docket=None):
611 614
612 615 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
613 616
614 617 if self.postfix is not None:
615 618 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
616 619 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
617 620 entry_point = b'%s.i.a' % self.radix
618 621 elif self._try_split and self.opener.exists(self._split_index_file):
619 622 entry_point = self._split_index_file
620 623 else:
621 624 entry_point = b'%s.i' % self.radix
622 625
623 626 if docket is not None:
624 627 self._docket = docket
625 628 self._docket_file = entry_point
626 629 else:
627 630 self._initempty = True
628 631 entry_data = self._get_data(entry_point, mmapindexthreshold)
629 632 if len(entry_data) > 0:
630 633 header = INDEX_HEADER.unpack(entry_data[:4])[0]
631 634 self._initempty = False
632 635 else:
633 636 header = new_header
634 637
635 638 self._format_flags = header & ~0xFFFF
636 639 self._format_version = header & 0xFFFF
637 640
638 641 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
639 642 if supported_flags is None:
640 643 msg = _(b'unknown version (%d) in revlog %s')
641 644 msg %= (self._format_version, self.display_id)
642 645 raise error.RevlogError(msg)
643 646 elif self._format_flags & ~supported_flags:
644 647 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
645 648 display_flag = self._format_flags >> 16
646 649 msg %= (display_flag, self._format_version, self.display_id)
647 650 raise error.RevlogError(msg)
648 651
649 652 features = FEATURES_BY_VERSION[self._format_version]
650 653 self._inline = features[b'inline'](self._format_flags)
651 654 self._generaldelta = features[b'generaldelta'](self._format_flags)
652 655 self.hassidedata = features[b'sidedata']
653 656
654 657 if not features[b'docket']:
655 658 self._indexfile = entry_point
656 659 index_data = entry_data
657 660 else:
658 661 self._docket_file = entry_point
659 662 if self._initempty:
660 663 self._docket = docketutil.default_docket(self, header)
661 664 else:
662 665 self._docket = docketutil.parse_docket(
663 666 self, entry_data, use_pending=self._trypending
664 667 )
665 668
666 669 if self._docket is not None:
667 670 self._indexfile = self._docket.index_filepath()
668 671 index_data = b''
669 672 index_size = self._docket.index_end
670 673 if index_size > 0:
671 674 index_data = self._get_data(
672 675 self._indexfile, mmapindexthreshold, size=index_size
673 676 )
674 677 if len(index_data) < index_size:
675 678 msg = _(b'too few index data for %s: got %d, expected %d')
676 679 msg %= (self.display_id, len(index_data), index_size)
677 680 raise error.RevlogError(msg)
678 681
679 682 self._inline = False
680 683 # generaldelta implied by version 2 revlogs.
681 684 self._generaldelta = True
682 685 # the logic for persistent nodemap will be dealt with within the
683 686 # main docket, so disable it for now.
684 687 self._nodemap_file = None
685 688
686 689 if self._docket is not None:
687 690 self._datafile = self._docket.data_filepath()
688 691 self._sidedatafile = self._docket.sidedata_filepath()
689 692 elif self.postfix is None:
690 693 self._datafile = b'%s.d' % self.radix
691 694 else:
692 695 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
693 696
694 697 self.nodeconstants = sha1nodeconstants
695 698 self.nullid = self.nodeconstants.nullid
696 699
697 700 # sparse-revlog can't be on without general-delta (issue6056)
698 701 if not self._generaldelta:
699 702 self._sparserevlog = False
700 703
701 704 self._storedeltachains = True
702 705
703 706 devel_nodemap = (
704 707 self._nodemap_file
705 708 and force_nodemap
706 709 and parse_index_v1_nodemap is not None
707 710 )
708 711
709 712 use_rust_index = False
710 713 if rustrevlog is not None:
711 714 if self._nodemap_file is not None:
712 715 use_rust_index = True
713 716 else:
714 717 use_rust_index = self.opener.options.get(b'rust.index')
715 718
716 719 self._parse_index = parse_index_v1
717 720 if self._format_version == REVLOGV0:
718 721 self._parse_index = revlogv0.parse_index_v0
719 722 elif self._format_version == REVLOGV2:
720 723 self._parse_index = parse_index_v2
721 724 elif self._format_version == CHANGELOGV2:
722 725 self._parse_index = parse_index_cl_v2
723 726 elif devel_nodemap:
724 727 self._parse_index = parse_index_v1_nodemap
725 728 elif use_rust_index:
726 729 self._parse_index = parse_index_v1_mixed
727 730 try:
728 731 d = self._parse_index(index_data, self._inline)
729 732 index, chunkcache = d
730 733 use_nodemap = (
731 734 not self._inline
732 735 and self._nodemap_file is not None
733 736 and util.safehasattr(index, 'update_nodemap_data')
734 737 )
735 738 if use_nodemap:
736 739 nodemap_data = nodemaputil.persisted_data(self)
737 740 if nodemap_data is not None:
738 741 docket = nodemap_data[0]
739 742 if (
740 743 len(d[0]) > docket.tip_rev
741 744 and d[0][docket.tip_rev][7] == docket.tip_node
742 745 ):
743 746 # no changelog tampering
744 747 self._nodemap_docket = docket
745 748 index.update_nodemap_data(*nodemap_data)
746 749 except (ValueError, IndexError):
747 750 raise error.RevlogError(
748 751 _(b"index %s is corrupted") % self.display_id
749 752 )
750 753 self.index = index
751 754 self._segmentfile = randomaccessfile.randomaccessfile(
752 755 self.opener,
753 756 (self._indexfile if self._inline else self._datafile),
754 757 self._chunkcachesize,
755 758 chunkcache,
756 759 )
757 760 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
758 761 self.opener,
759 762 self._sidedatafile,
760 763 self._chunkcachesize,
761 764 )
762 765 # revnum -> (chain-length, sum-delta-length)
763 766 self._chaininfocache = util.lrucachedict(500)
764 767 # revlog header -> revlog compressor
765 768 self._decompressors = {}
766 769
767 770 def get_revlog(self):
768 771 """simple function to mirror API of other not-really-revlog API"""
769 772 return self
770 773
771 774 @util.propertycache
772 775 def revlog_kind(self):
773 776 return self.target[0]
774 777
775 778 @util.propertycache
776 779 def display_id(self):
777 780 """The public facing "ID" of the revlog that we use in message"""
778 781 if self.revlog_kind == KIND_FILELOG:
779 782 # Reference the file without the "data/" prefix, so it is familiar
780 783 # to the user.
781 784 return self.target[1]
782 785 else:
783 786 return self.radix
784 787
785 788 def _get_decompressor(self, t):
786 789 try:
787 790 compressor = self._decompressors[t]
788 791 except KeyError:
789 792 try:
790 793 engine = util.compengines.forrevlogheader(t)
791 794 compressor = engine.revlogcompressor(self._compengineopts)
792 795 self._decompressors[t] = compressor
793 796 except KeyError:
794 797 raise error.RevlogError(
795 798 _(b'unknown compression type %s') % binascii.hexlify(t)
796 799 )
797 800 return compressor
798 801
799 802 @util.propertycache
800 803 def _compressor(self):
801 804 engine = util.compengines[self._compengine]
802 805 return engine.revlogcompressor(self._compengineopts)
803 806
804 807 @util.propertycache
805 808 def _decompressor(self):
806 809 """the default decompressor"""
807 810 if self._docket is None:
808 811 return None
809 812 t = self._docket.default_compression_header
810 813 c = self._get_decompressor(t)
811 814 return c.decompress
812 815
813 816 def _indexfp(self):
814 817 """file object for the revlog's index file"""
815 818 return self.opener(self._indexfile, mode=b"r")
816 819
817 820 def __index_write_fp(self):
818 821 # You should not use this directly and use `_writing` instead
819 822 try:
820 823 f = self.opener(
821 824 self._indexfile, mode=b"r+", checkambig=self._checkambig
822 825 )
823 826 if self._docket is None:
824 827 f.seek(0, os.SEEK_END)
825 828 else:
826 829 f.seek(self._docket.index_end, os.SEEK_SET)
827 830 return f
828 831 except FileNotFoundError:
829 832 return self.opener(
830 833 self._indexfile, mode=b"w+", checkambig=self._checkambig
831 834 )
832 835
833 836 def __index_new_fp(self):
834 837 # You should not use this unless you are upgrading from inline revlog
835 838 return self.opener(
836 839 self._indexfile,
837 840 mode=b"w",
838 841 checkambig=self._checkambig,
839 842 atomictemp=True,
840 843 )
841 844
842 845 def _datafp(self, mode=b'r'):
843 846 """file object for the revlog's data file"""
844 847 return self.opener(self._datafile, mode=mode)
845 848
846 849 @contextlib.contextmanager
847 850 def _sidedatareadfp(self):
848 851 """file object suitable to read sidedata"""
849 852 if self._writinghandles:
850 853 yield self._writinghandles[2]
851 854 else:
852 855 with self.opener(self._sidedatafile) as fp:
853 856 yield fp
854 857
855 858 def tiprev(self):
856 859 return len(self.index) - 1
857 860
858 861 def tip(self):
859 862 return self.node(self.tiprev())
860 863
861 864 def __contains__(self, rev):
862 865 return 0 <= rev < len(self)
863 866
864 867 def __len__(self):
865 868 return len(self.index)
866 869
867 870 def __iter__(self):
868 871 return iter(range(len(self)))
869 872
870 873 def revs(self, start=0, stop=None):
871 874 """iterate over all rev in this revlog (from start to stop)"""
872 875 return storageutil.iterrevs(len(self), start=start, stop=stop)
873 876
874 877 def hasnode(self, node):
875 878 try:
876 879 self.rev(node)
877 880 return True
878 881 except KeyError:
879 882 return False
880 883
881 884 def candelta(self, baserev, rev):
882 885 """whether two revisions (baserev, rev) can be delta-ed or not"""
883 886 # Disable delta if either rev requires a content-changing flag
884 887 # processor (ex. LFS). This is because such flag processor can alter
885 888 # the rawtext content that the delta will be based on, and two clients
886 889 # could have a same revlog node with different flags (i.e. different
887 890 # rawtext contents) and the delta could be incompatible.
888 891 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
889 892 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
890 893 ):
891 894 return False
892 895 return True
893 896
894 897 def update_caches(self, transaction):
895 898 if self._nodemap_file is not None:
896 899 if transaction is None:
897 900 nodemaputil.update_persistent_nodemap(self)
898 901 else:
899 902 nodemaputil.setup_persistent_nodemap(transaction, self)
900 903
901 904 def clearcaches(self):
902 905 self._revisioncache = None
903 906 self._chainbasecache.clear()
904 907 self._segmentfile.clear_cache()
905 908 self._segmentfile_sidedata.clear_cache()
906 909 self._pcache = {}
907 910 self._nodemap_docket = None
908 911 self.index.clearcaches()
909 912 # The python code is the one responsible for validating the docket, we
910 913 # end up having to refresh it here.
911 914 use_nodemap = (
912 915 not self._inline
913 916 and self._nodemap_file is not None
914 917 and util.safehasattr(self.index, 'update_nodemap_data')
915 918 )
916 919 if use_nodemap:
917 920 nodemap_data = nodemaputil.persisted_data(self)
918 921 if nodemap_data is not None:
919 922 self._nodemap_docket = nodemap_data[0]
920 923 self.index.update_nodemap_data(*nodemap_data)
921 924
922 925 def rev(self, node):
923 926 try:
924 927 return self.index.rev(node)
925 928 except TypeError:
926 929 raise
927 930 except error.RevlogError:
928 931 # parsers.c radix tree lookup failed
929 932 if (
930 933 node == self.nodeconstants.wdirid
931 934 or node in self.nodeconstants.wdirfilenodeids
932 935 ):
933 936 raise error.WdirUnsupported
934 937 raise error.LookupError(node, self.display_id, _(b'no node'))
935 938
936 939 # Accessors for index entries.
937 940
938 941 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
939 942 # are flags.
940 943 def start(self, rev):
941 944 return int(self.index[rev][0] >> 16)
942 945
943 946 def sidedata_cut_off(self, rev):
944 947 sd_cut_off = self.index[rev][8]
945 948 if sd_cut_off != 0:
946 949 return sd_cut_off
947 950 # This is some annoying dance, because entries without sidedata
948 951 # currently use 0 as their ofsset. (instead of previous-offset +
949 952 # previous-size)
950 953 #
951 954 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
952 955 # In the meantime, we need this.
953 956 while 0 <= rev:
954 957 e = self.index[rev]
955 958 if e[9] != 0:
956 959 return e[8] + e[9]
957 960 rev -= 1
958 961 return 0
959 962
960 963 def flags(self, rev):
961 964 return self.index[rev][0] & 0xFFFF
962 965
963 966 def length(self, rev):
964 967 return self.index[rev][1]
965 968
966 969 def sidedata_length(self, rev):
967 970 if not self.hassidedata:
968 971 return 0
969 972 return self.index[rev][9]
970 973
971 974 def rawsize(self, rev):
972 975 """return the length of the uncompressed text for a given revision"""
973 976 l = self.index[rev][2]
974 977 if l >= 0:
975 978 return l
976 979
977 980 t = self.rawdata(rev)
978 981 return len(t)
979 982
980 983 def size(self, rev):
981 984 """length of non-raw text (processed by a "read" flag processor)"""
982 985 # fast path: if no "read" flag processor could change the content,
983 986 # size is rawsize. note: ELLIPSIS is known to not change the content.
984 987 flags = self.flags(rev)
985 988 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
986 989 return self.rawsize(rev)
987 990
988 991 return len(self.revision(rev))
989 992
990 993 def fast_rank(self, rev):
991 994 """Return the rank of a revision if already known, or None otherwise.
992 995
993 996 The rank of a revision is the size of the sub-graph it defines as a
994 997 head. Equivalently, the rank of a revision `r` is the size of the set
995 998 `ancestors(r)`, `r` included.
996 999
997 1000 This method returns the rank retrieved from the revlog in constant
998 1001 time. It makes no attempt at computing unknown values for versions of
999 1002 the revlog which do not persist the rank.
1000 1003 """
1001 1004 rank = self.index[rev][ENTRY_RANK]
1002 1005 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1003 1006 return None
1004 1007 if rev == nullrev:
1005 1008 return 0 # convention
1006 1009 return rank
1007 1010
1008 1011 def chainbase(self, rev):
1009 1012 base = self._chainbasecache.get(rev)
1010 1013 if base is not None:
1011 1014 return base
1012 1015
1013 1016 index = self.index
1014 1017 iterrev = rev
1015 1018 base = index[iterrev][3]
1016 1019 while base != iterrev:
1017 1020 iterrev = base
1018 1021 base = index[iterrev][3]
1019 1022
1020 1023 self._chainbasecache[rev] = base
1021 1024 return base
1022 1025
1023 1026 def linkrev(self, rev):
1024 1027 return self.index[rev][4]
1025 1028
1026 1029 def parentrevs(self, rev):
1027 1030 try:
1028 1031 entry = self.index[rev]
1029 1032 except IndexError:
1030 1033 if rev == wdirrev:
1031 1034 raise error.WdirUnsupported
1032 1035 raise
1033 1036
1034 1037 if self.canonical_parent_order and entry[5] == nullrev:
1035 1038 return entry[6], entry[5]
1036 1039 else:
1037 1040 return entry[5], entry[6]
1038 1041
1039 1042 # fast parentrevs(rev) where rev isn't filtered
1040 1043 _uncheckedparentrevs = parentrevs
1041 1044
1042 1045 def node(self, rev):
1043 1046 try:
1044 1047 return self.index[rev][7]
1045 1048 except IndexError:
1046 1049 if rev == wdirrev:
1047 1050 raise error.WdirUnsupported
1048 1051 raise
1049 1052
1050 1053 # Derived from index values.
1051 1054
1052 1055 def end(self, rev):
1053 1056 return self.start(rev) + self.length(rev)
1054 1057
1055 1058 def parents(self, node):
1056 1059 i = self.index
1057 1060 d = i[self.rev(node)]
1058 1061 # inline node() to avoid function call overhead
1059 1062 if self.canonical_parent_order and d[5] == self.nullid:
1060 1063 return i[d[6]][7], i[d[5]][7]
1061 1064 else:
1062 1065 return i[d[5]][7], i[d[6]][7]
1063 1066
1064 1067 def chainlen(self, rev):
1065 1068 return self._chaininfo(rev)[0]
1066 1069
1067 1070 def _chaininfo(self, rev):
1068 1071 chaininfocache = self._chaininfocache
1069 1072 if rev in chaininfocache:
1070 1073 return chaininfocache[rev]
1071 1074 index = self.index
1072 1075 generaldelta = self._generaldelta
1073 1076 iterrev = rev
1074 1077 e = index[iterrev]
1075 1078 clen = 0
1076 1079 compresseddeltalen = 0
1077 1080 while iterrev != e[3]:
1078 1081 clen += 1
1079 1082 compresseddeltalen += e[1]
1080 1083 if generaldelta:
1081 1084 iterrev = e[3]
1082 1085 else:
1083 1086 iterrev -= 1
1084 1087 if iterrev in chaininfocache:
1085 1088 t = chaininfocache[iterrev]
1086 1089 clen += t[0]
1087 1090 compresseddeltalen += t[1]
1088 1091 break
1089 1092 e = index[iterrev]
1090 1093 else:
1091 1094 # Add text length of base since decompressing that also takes
1092 1095 # work. For cache hits the length is already included.
1093 1096 compresseddeltalen += e[1]
1094 1097 r = (clen, compresseddeltalen)
1095 1098 chaininfocache[rev] = r
1096 1099 return r
1097 1100
1098 1101 def _deltachain(self, rev, stoprev=None):
1099 1102 """Obtain the delta chain for a revision.
1100 1103
1101 1104 ``stoprev`` specifies a revision to stop at. If not specified, we
1102 1105 stop at the base of the chain.
1103 1106
1104 1107 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1105 1108 revs in ascending order and ``stopped`` is a bool indicating whether
1106 1109 ``stoprev`` was hit.
1107 1110 """
1108 1111 # Try C implementation.
1109 1112 try:
1110 1113 return self.index.deltachain(rev, stoprev, self._generaldelta)
1111 1114 except AttributeError:
1112 1115 pass
1113 1116
1114 1117 chain = []
1115 1118
1116 1119 # Alias to prevent attribute lookup in tight loop.
1117 1120 index = self.index
1118 1121 generaldelta = self._generaldelta
1119 1122
1120 1123 iterrev = rev
1121 1124 e = index[iterrev]
1122 1125 while iterrev != e[3] and iterrev != stoprev:
1123 1126 chain.append(iterrev)
1124 1127 if generaldelta:
1125 1128 iterrev = e[3]
1126 1129 else:
1127 1130 iterrev -= 1
1128 1131 e = index[iterrev]
1129 1132
1130 1133 if iterrev == stoprev:
1131 1134 stopped = True
1132 1135 else:
1133 1136 chain.append(iterrev)
1134 1137 stopped = False
1135 1138
1136 1139 chain.reverse()
1137 1140 return chain, stopped
1138 1141
1139 1142 def ancestors(self, revs, stoprev=0, inclusive=False):
1140 1143 """Generate the ancestors of 'revs' in reverse revision order.
1141 1144 Does not generate revs lower than stoprev.
1142 1145
1143 1146 See the documentation for ancestor.lazyancestors for more details."""
1144 1147
1145 1148 # first, make sure start revisions aren't filtered
1146 1149 revs = list(revs)
1147 1150 checkrev = self.node
1148 1151 for r in revs:
1149 1152 checkrev(r)
1150 1153 # and we're sure ancestors aren't filtered as well
1151 1154
1152 1155 if rustancestor is not None and self.index.rust_ext_compat:
1153 1156 lazyancestors = rustancestor.LazyAncestors
1154 1157 arg = self.index
1155 1158 else:
1156 1159 lazyancestors = ancestor.lazyancestors
1157 1160 arg = self._uncheckedparentrevs
1158 1161 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1159 1162
1160 1163 def descendants(self, revs):
1161 1164 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1162 1165
1163 1166 def findcommonmissing(self, common=None, heads=None):
1164 1167 """Return a tuple of the ancestors of common and the ancestors of heads
1165 1168 that are not ancestors of common. In revset terminology, we return the
1166 1169 tuple:
1167 1170
1168 1171 ::common, (::heads) - (::common)
1169 1172
1170 1173 The list is sorted by revision number, meaning it is
1171 1174 topologically sorted.
1172 1175
1173 1176 'heads' and 'common' are both lists of node IDs. If heads is
1174 1177 not supplied, uses all of the revlog's heads. If common is not
1175 1178 supplied, uses nullid."""
1176 1179 if common is None:
1177 1180 common = [self.nullid]
1178 1181 if heads is None:
1179 1182 heads = self.heads()
1180 1183
1181 1184 common = [self.rev(n) for n in common]
1182 1185 heads = [self.rev(n) for n in heads]
1183 1186
1184 1187 # we want the ancestors, but inclusive
1185 1188 class lazyset:
1186 1189 def __init__(self, lazyvalues):
1187 1190 self.addedvalues = set()
1188 1191 self.lazyvalues = lazyvalues
1189 1192
1190 1193 def __contains__(self, value):
1191 1194 return value in self.addedvalues or value in self.lazyvalues
1192 1195
1193 1196 def __iter__(self):
1194 1197 added = self.addedvalues
1195 1198 for r in added:
1196 1199 yield r
1197 1200 for r in self.lazyvalues:
1198 1201 if not r in added:
1199 1202 yield r
1200 1203
1201 1204 def add(self, value):
1202 1205 self.addedvalues.add(value)
1203 1206
1204 1207 def update(self, values):
1205 1208 self.addedvalues.update(values)
1206 1209
1207 1210 has = lazyset(self.ancestors(common))
1208 1211 has.add(nullrev)
1209 1212 has.update(common)
1210 1213
1211 1214 # take all ancestors from heads that aren't in has
1212 1215 missing = set()
1213 1216 visit = collections.deque(r for r in heads if r not in has)
1214 1217 while visit:
1215 1218 r = visit.popleft()
1216 1219 if r in missing:
1217 1220 continue
1218 1221 else:
1219 1222 missing.add(r)
1220 1223 for p in self.parentrevs(r):
1221 1224 if p not in has:
1222 1225 visit.append(p)
1223 1226 missing = list(missing)
1224 1227 missing.sort()
1225 1228 return has, [self.node(miss) for miss in missing]
1226 1229
1227 1230 def incrementalmissingrevs(self, common=None):
1228 1231 """Return an object that can be used to incrementally compute the
1229 1232 revision numbers of the ancestors of arbitrary sets that are not
1230 1233 ancestors of common. This is an ancestor.incrementalmissingancestors
1231 1234 object.
1232 1235
1233 1236 'common' is a list of revision numbers. If common is not supplied, uses
1234 1237 nullrev.
1235 1238 """
1236 1239 if common is None:
1237 1240 common = [nullrev]
1238 1241
1239 1242 if rustancestor is not None and self.index.rust_ext_compat:
1240 1243 return rustancestor.MissingAncestors(self.index, common)
1241 1244 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1242 1245
1243 1246 def findmissingrevs(self, common=None, heads=None):
1244 1247 """Return the revision numbers of the ancestors of heads that
1245 1248 are not ancestors of common.
1246 1249
1247 1250 More specifically, return a list of revision numbers corresponding to
1248 1251 nodes N such that every N satisfies the following constraints:
1249 1252
1250 1253 1. N is an ancestor of some node in 'heads'
1251 1254 2. N is not an ancestor of any node in 'common'
1252 1255
1253 1256 The list is sorted by revision number, meaning it is
1254 1257 topologically sorted.
1255 1258
1256 1259 'heads' and 'common' are both lists of revision numbers. If heads is
1257 1260 not supplied, uses all of the revlog's heads. If common is not
1258 1261 supplied, uses nullid."""
1259 1262 if common is None:
1260 1263 common = [nullrev]
1261 1264 if heads is None:
1262 1265 heads = self.headrevs()
1263 1266
1264 1267 inc = self.incrementalmissingrevs(common=common)
1265 1268 return inc.missingancestors(heads)
1266 1269
1267 1270 def findmissing(self, common=None, heads=None):
1268 1271 """Return the ancestors of heads that are not ancestors of common.
1269 1272
1270 1273 More specifically, return a list of nodes N such that every N
1271 1274 satisfies the following constraints:
1272 1275
1273 1276 1. N is an ancestor of some node in 'heads'
1274 1277 2. N is not an ancestor of any node in 'common'
1275 1278
1276 1279 The list is sorted by revision number, meaning it is
1277 1280 topologically sorted.
1278 1281
1279 1282 'heads' and 'common' are both lists of node IDs. If heads is
1280 1283 not supplied, uses all of the revlog's heads. If common is not
1281 1284 supplied, uses nullid."""
1282 1285 if common is None:
1283 1286 common = [self.nullid]
1284 1287 if heads is None:
1285 1288 heads = self.heads()
1286 1289
1287 1290 common = [self.rev(n) for n in common]
1288 1291 heads = [self.rev(n) for n in heads]
1289 1292
1290 1293 inc = self.incrementalmissingrevs(common=common)
1291 1294 return [self.node(r) for r in inc.missingancestors(heads)]
1292 1295
1293 1296 def nodesbetween(self, roots=None, heads=None):
1294 1297 """Return a topological path from 'roots' to 'heads'.
1295 1298
1296 1299 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1297 1300 topologically sorted list of all nodes N that satisfy both of
1298 1301 these constraints:
1299 1302
1300 1303 1. N is a descendant of some node in 'roots'
1301 1304 2. N is an ancestor of some node in 'heads'
1302 1305
1303 1306 Every node is considered to be both a descendant and an ancestor
1304 1307 of itself, so every reachable node in 'roots' and 'heads' will be
1305 1308 included in 'nodes'.
1306 1309
1307 1310 'outroots' is the list of reachable nodes in 'roots', i.e., the
1308 1311 subset of 'roots' that is returned in 'nodes'. Likewise,
1309 1312 'outheads' is the subset of 'heads' that is also in 'nodes'.
1310 1313
1311 1314 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1312 1315 unspecified, uses nullid as the only root. If 'heads' is
1313 1316 unspecified, uses list of all of the revlog's heads."""
1314 1317 nonodes = ([], [], [])
1315 1318 if roots is not None:
1316 1319 roots = list(roots)
1317 1320 if not roots:
1318 1321 return nonodes
1319 1322 lowestrev = min([self.rev(n) for n in roots])
1320 1323 else:
1321 1324 roots = [self.nullid] # Everybody's a descendant of nullid
1322 1325 lowestrev = nullrev
1323 1326 if (lowestrev == nullrev) and (heads is None):
1324 1327 # We want _all_ the nodes!
1325 1328 return (
1326 1329 [self.node(r) for r in self],
1327 1330 [self.nullid],
1328 1331 list(self.heads()),
1329 1332 )
1330 1333 if heads is None:
1331 1334 # All nodes are ancestors, so the latest ancestor is the last
1332 1335 # node.
1333 1336 highestrev = len(self) - 1
1334 1337 # Set ancestors to None to signal that every node is an ancestor.
1335 1338 ancestors = None
1336 1339 # Set heads to an empty dictionary for later discovery of heads
1337 1340 heads = {}
1338 1341 else:
1339 1342 heads = list(heads)
1340 1343 if not heads:
1341 1344 return nonodes
1342 1345 ancestors = set()
1343 1346 # Turn heads into a dictionary so we can remove 'fake' heads.
1344 1347 # Also, later we will be using it to filter out the heads we can't
1345 1348 # find from roots.
1346 1349 heads = dict.fromkeys(heads, False)
1347 1350 # Start at the top and keep marking parents until we're done.
1348 1351 nodestotag = set(heads)
1349 1352 # Remember where the top was so we can use it as a limit later.
1350 1353 highestrev = max([self.rev(n) for n in nodestotag])
1351 1354 while nodestotag:
1352 1355 # grab a node to tag
1353 1356 n = nodestotag.pop()
1354 1357 # Never tag nullid
1355 1358 if n == self.nullid:
1356 1359 continue
1357 1360 # A node's revision number represents its place in a
1358 1361 # topologically sorted list of nodes.
1359 1362 r = self.rev(n)
1360 1363 if r >= lowestrev:
1361 1364 if n not in ancestors:
1362 1365 # If we are possibly a descendant of one of the roots
1363 1366 # and we haven't already been marked as an ancestor
1364 1367 ancestors.add(n) # Mark as ancestor
1365 1368 # Add non-nullid parents to list of nodes to tag.
1366 1369 nodestotag.update(
1367 1370 [p for p in self.parents(n) if p != self.nullid]
1368 1371 )
1369 1372 elif n in heads: # We've seen it before, is it a fake head?
1370 1373 # So it is, real heads should not be the ancestors of
1371 1374 # any other heads.
1372 1375 heads.pop(n)
1373 1376 if not ancestors:
1374 1377 return nonodes
1375 1378 # Now that we have our set of ancestors, we want to remove any
1376 1379 # roots that are not ancestors.
1377 1380
1378 1381 # If one of the roots was nullid, everything is included anyway.
1379 1382 if lowestrev > nullrev:
1380 1383 # But, since we weren't, let's recompute the lowest rev to not
1381 1384 # include roots that aren't ancestors.
1382 1385
1383 1386 # Filter out roots that aren't ancestors of heads
1384 1387 roots = [root for root in roots if root in ancestors]
1385 1388 # Recompute the lowest revision
1386 1389 if roots:
1387 1390 lowestrev = min([self.rev(root) for root in roots])
1388 1391 else:
1389 1392 # No more roots? Return empty list
1390 1393 return nonodes
1391 1394 else:
1392 1395 # We are descending from nullid, and don't need to care about
1393 1396 # any other roots.
1394 1397 lowestrev = nullrev
1395 1398 roots = [self.nullid]
1396 1399 # Transform our roots list into a set.
1397 1400 descendants = set(roots)
1398 1401 # Also, keep the original roots so we can filter out roots that aren't
1399 1402 # 'real' roots (i.e. are descended from other roots).
1400 1403 roots = descendants.copy()
1401 1404 # Our topologically sorted list of output nodes.
1402 1405 orderedout = []
1403 1406 # Don't start at nullid since we don't want nullid in our output list,
1404 1407 # and if nullid shows up in descendants, empty parents will look like
1405 1408 # they're descendants.
1406 1409 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1407 1410 n = self.node(r)
1408 1411 isdescendant = False
1409 1412 if lowestrev == nullrev: # Everybody is a descendant of nullid
1410 1413 isdescendant = True
1411 1414 elif n in descendants:
1412 1415 # n is already a descendant
1413 1416 isdescendant = True
1414 1417 # This check only needs to be done here because all the roots
1415 1418 # will start being marked is descendants before the loop.
1416 1419 if n in roots:
1417 1420 # If n was a root, check if it's a 'real' root.
1418 1421 p = tuple(self.parents(n))
1419 1422 # If any of its parents are descendants, it's not a root.
1420 1423 if (p[0] in descendants) or (p[1] in descendants):
1421 1424 roots.remove(n)
1422 1425 else:
1423 1426 p = tuple(self.parents(n))
1424 1427 # A node is a descendant if either of its parents are
1425 1428 # descendants. (We seeded the dependents list with the roots
1426 1429 # up there, remember?)
1427 1430 if (p[0] in descendants) or (p[1] in descendants):
1428 1431 descendants.add(n)
1429 1432 isdescendant = True
1430 1433 if isdescendant and ((ancestors is None) or (n in ancestors)):
1431 1434 # Only include nodes that are both descendants and ancestors.
1432 1435 orderedout.append(n)
1433 1436 if (ancestors is not None) and (n in heads):
1434 1437 # We're trying to figure out which heads are reachable
1435 1438 # from roots.
1436 1439 # Mark this head as having been reached
1437 1440 heads[n] = True
1438 1441 elif ancestors is None:
1439 1442 # Otherwise, we're trying to discover the heads.
1440 1443 # Assume this is a head because if it isn't, the next step
1441 1444 # will eventually remove it.
1442 1445 heads[n] = True
1443 1446 # But, obviously its parents aren't.
1444 1447 for p in self.parents(n):
1445 1448 heads.pop(p, None)
1446 1449 heads = [head for head, flag in heads.items() if flag]
1447 1450 roots = list(roots)
1448 1451 assert orderedout
1449 1452 assert roots
1450 1453 assert heads
1451 1454 return (orderedout, roots, heads)
1452 1455
1453 1456 def headrevs(self, revs=None):
1454 1457 if revs is None:
1455 1458 try:
1456 1459 return self.index.headrevs()
1457 1460 except AttributeError:
1458 1461 return self._headrevs()
1459 1462 if rustdagop is not None and self.index.rust_ext_compat:
1460 1463 return rustdagop.headrevs(self.index, revs)
1461 1464 return dagop.headrevs(revs, self._uncheckedparentrevs)
1462 1465
1463 1466 def computephases(self, roots):
1464 1467 return self.index.computephasesmapsets(roots)
1465 1468
1466 1469 def _headrevs(self):
1467 1470 count = len(self)
1468 1471 if not count:
1469 1472 return [nullrev]
1470 1473 # we won't iter over filtered rev so nobody is a head at start
1471 1474 ishead = [0] * (count + 1)
1472 1475 index = self.index
1473 1476 for r in self:
1474 1477 ishead[r] = 1 # I may be an head
1475 1478 e = index[r]
1476 1479 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1477 1480 return [r for r, val in enumerate(ishead) if val]
1478 1481
1479 1482 def heads(self, start=None, stop=None):
1480 1483 """return the list of all nodes that have no children
1481 1484
1482 1485 if start is specified, only heads that are descendants of
1483 1486 start will be returned
1484 1487 if stop is specified, it will consider all the revs from stop
1485 1488 as if they had no children
1486 1489 """
1487 1490 if start is None and stop is None:
1488 1491 if not len(self):
1489 1492 return [self.nullid]
1490 1493 return [self.node(r) for r in self.headrevs()]
1491 1494
1492 1495 if start is None:
1493 1496 start = nullrev
1494 1497 else:
1495 1498 start = self.rev(start)
1496 1499
1497 1500 stoprevs = {self.rev(n) for n in stop or []}
1498 1501
1499 1502 revs = dagop.headrevssubset(
1500 1503 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1501 1504 )
1502 1505
1503 1506 return [self.node(rev) for rev in revs]
1504 1507
1505 1508 def children(self, node):
1506 1509 """find the children of a given node"""
1507 1510 c = []
1508 1511 p = self.rev(node)
1509 1512 for r in self.revs(start=p + 1):
1510 1513 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1511 1514 if prevs:
1512 1515 for pr in prevs:
1513 1516 if pr == p:
1514 1517 c.append(self.node(r))
1515 1518 elif p == nullrev:
1516 1519 c.append(self.node(r))
1517 1520 return c
1518 1521
1519 1522 def commonancestorsheads(self, a, b):
1520 1523 """calculate all the heads of the common ancestors of nodes a and b"""
1521 1524 a, b = self.rev(a), self.rev(b)
1522 1525 ancs = self._commonancestorsheads(a, b)
1523 1526 return pycompat.maplist(self.node, ancs)
1524 1527
1525 1528 def _commonancestorsheads(self, *revs):
1526 1529 """calculate all the heads of the common ancestors of revs"""
1527 1530 try:
1528 1531 ancs = self.index.commonancestorsheads(*revs)
1529 1532 except (AttributeError, OverflowError): # C implementation failed
1530 1533 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1531 1534 return ancs
1532 1535
1533 1536 def isancestor(self, a, b):
1534 1537 """return True if node a is an ancestor of node b
1535 1538
1536 1539 A revision is considered an ancestor of itself."""
1537 1540 a, b = self.rev(a), self.rev(b)
1538 1541 return self.isancestorrev(a, b)
1539 1542
1540 1543 def isancestorrev(self, a, b):
1541 1544 """return True if revision a is an ancestor of revision b
1542 1545
1543 1546 A revision is considered an ancestor of itself.
1544 1547
1545 1548 The implementation of this is trivial but the use of
1546 1549 reachableroots is not."""
1547 1550 if a == nullrev:
1548 1551 return True
1549 1552 elif a == b:
1550 1553 return True
1551 1554 elif a > b:
1552 1555 return False
1553 1556 return bool(self.reachableroots(a, [b], [a], includepath=False))
1554 1557
1555 1558 def reachableroots(self, minroot, heads, roots, includepath=False):
1556 1559 """return (heads(::(<roots> and <roots>::<heads>)))
1557 1560
1558 1561 If includepath is True, return (<roots>::<heads>)."""
1559 1562 try:
1560 1563 return self.index.reachableroots2(
1561 1564 minroot, heads, roots, includepath
1562 1565 )
1563 1566 except AttributeError:
1564 1567 return dagop._reachablerootspure(
1565 1568 self.parentrevs, minroot, roots, heads, includepath
1566 1569 )
1567 1570
1568 1571 def ancestor(self, a, b):
1569 1572 """calculate the "best" common ancestor of nodes a and b"""
1570 1573
1571 1574 a, b = self.rev(a), self.rev(b)
1572 1575 try:
1573 1576 ancs = self.index.ancestors(a, b)
1574 1577 except (AttributeError, OverflowError):
1575 1578 ancs = ancestor.ancestors(self.parentrevs, a, b)
1576 1579 if ancs:
1577 1580 # choose a consistent winner when there's a tie
1578 1581 return min(map(self.node, ancs))
1579 1582 return self.nullid
1580 1583
1581 1584 def _match(self, id):
1582 1585 if isinstance(id, int):
1583 1586 # rev
1584 1587 return self.node(id)
1585 1588 if len(id) == self.nodeconstants.nodelen:
1586 1589 # possibly a binary node
1587 1590 # odds of a binary node being all hex in ASCII are 1 in 10**25
1588 1591 try:
1589 1592 node = id
1590 1593 self.rev(node) # quick search the index
1591 1594 return node
1592 1595 except error.LookupError:
1593 1596 pass # may be partial hex id
1594 1597 try:
1595 1598 # str(rev)
1596 1599 rev = int(id)
1597 1600 if b"%d" % rev != id:
1598 1601 raise ValueError
1599 1602 if rev < 0:
1600 1603 rev = len(self) + rev
1601 1604 if rev < 0 or rev >= len(self):
1602 1605 raise ValueError
1603 1606 return self.node(rev)
1604 1607 except (ValueError, OverflowError):
1605 1608 pass
1606 1609 if len(id) == 2 * self.nodeconstants.nodelen:
1607 1610 try:
1608 1611 # a full hex nodeid?
1609 1612 node = bin(id)
1610 1613 self.rev(node)
1611 1614 return node
1612 1615 except (binascii.Error, error.LookupError):
1613 1616 pass
1614 1617
1615 1618 def _partialmatch(self, id):
1616 1619 # we don't care wdirfilenodeids as they should be always full hash
1617 1620 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1618 1621 ambiguous = False
1619 1622 try:
1620 1623 partial = self.index.partialmatch(id)
1621 1624 if partial and self.hasnode(partial):
1622 1625 if maybewdir:
1623 1626 # single 'ff...' match in radix tree, ambiguous with wdir
1624 1627 ambiguous = True
1625 1628 else:
1626 1629 return partial
1627 1630 elif maybewdir:
1628 1631 # no 'ff...' match in radix tree, wdir identified
1629 1632 raise error.WdirUnsupported
1630 1633 else:
1631 1634 return None
1632 1635 except error.RevlogError:
1633 1636 # parsers.c radix tree lookup gave multiple matches
1634 1637 # fast path: for unfiltered changelog, radix tree is accurate
1635 1638 if not getattr(self, 'filteredrevs', None):
1636 1639 ambiguous = True
1637 1640 # fall through to slow path that filters hidden revisions
1638 1641 except (AttributeError, ValueError):
1639 1642 # we are pure python, or key is not hex
1640 1643 pass
1641 1644 if ambiguous:
1642 1645 raise error.AmbiguousPrefixLookupError(
1643 1646 id, self.display_id, _(b'ambiguous identifier')
1644 1647 )
1645 1648
1646 1649 if id in self._pcache:
1647 1650 return self._pcache[id]
1648 1651
1649 1652 if len(id) <= 40:
1650 1653 # hex(node)[:...]
1651 1654 l = len(id) // 2 * 2 # grab an even number of digits
1652 1655 try:
1653 1656 # we're dropping the last digit, so let's check that it's hex,
1654 1657 # to avoid the expensive computation below if it's not
1655 1658 if len(id) % 2 > 0:
1656 1659 if not (id[-1] in hexdigits):
1657 1660 return None
1658 1661 prefix = bin(id[:l])
1659 1662 except binascii.Error:
1660 1663 pass
1661 1664 else:
1662 1665 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1663 1666 nl = [
1664 1667 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1665 1668 ]
1666 1669 if self.nodeconstants.nullhex.startswith(id):
1667 1670 nl.append(self.nullid)
1668 1671 if len(nl) > 0:
1669 1672 if len(nl) == 1 and not maybewdir:
1670 1673 self._pcache[id] = nl[0]
1671 1674 return nl[0]
1672 1675 raise error.AmbiguousPrefixLookupError(
1673 1676 id, self.display_id, _(b'ambiguous identifier')
1674 1677 )
1675 1678 if maybewdir:
1676 1679 raise error.WdirUnsupported
1677 1680 return None
1678 1681
1679 1682 def lookup(self, id):
1680 1683 """locate a node based on:
1681 1684 - revision number or str(revision number)
1682 1685 - nodeid or subset of hex nodeid
1683 1686 """
1684 1687 n = self._match(id)
1685 1688 if n is not None:
1686 1689 return n
1687 1690 n = self._partialmatch(id)
1688 1691 if n:
1689 1692 return n
1690 1693
1691 1694 raise error.LookupError(id, self.display_id, _(b'no match found'))
1692 1695
1693 1696 def shortest(self, node, minlength=1):
1694 1697 """Find the shortest unambiguous prefix that matches node."""
1695 1698
1696 1699 def isvalid(prefix):
1697 1700 try:
1698 1701 matchednode = self._partialmatch(prefix)
1699 1702 except error.AmbiguousPrefixLookupError:
1700 1703 return False
1701 1704 except error.WdirUnsupported:
1702 1705 # single 'ff...' match
1703 1706 return True
1704 1707 if matchednode is None:
1705 1708 raise error.LookupError(node, self.display_id, _(b'no node'))
1706 1709 return True
1707 1710
1708 1711 def maybewdir(prefix):
1709 1712 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1710 1713
1711 1714 hexnode = hex(node)
1712 1715
1713 1716 def disambiguate(hexnode, minlength):
1714 1717 """Disambiguate against wdirid."""
1715 1718 for length in range(minlength, len(hexnode) + 1):
1716 1719 prefix = hexnode[:length]
1717 1720 if not maybewdir(prefix):
1718 1721 return prefix
1719 1722
1720 1723 if not getattr(self, 'filteredrevs', None):
1721 1724 try:
1722 1725 length = max(self.index.shortest(node), minlength)
1723 1726 return disambiguate(hexnode, length)
1724 1727 except error.RevlogError:
1725 1728 if node != self.nodeconstants.wdirid:
1726 1729 raise error.LookupError(
1727 1730 node, self.display_id, _(b'no node')
1728 1731 )
1729 1732 except AttributeError:
1730 1733 # Fall through to pure code
1731 1734 pass
1732 1735
1733 1736 if node == self.nodeconstants.wdirid:
1734 1737 for length in range(minlength, len(hexnode) + 1):
1735 1738 prefix = hexnode[:length]
1736 1739 if isvalid(prefix):
1737 1740 return prefix
1738 1741
1739 1742 for length in range(minlength, len(hexnode) + 1):
1740 1743 prefix = hexnode[:length]
1741 1744 if isvalid(prefix):
1742 1745 return disambiguate(hexnode, length)
1743 1746
1744 1747 def cmp(self, node, text):
1745 1748 """compare text with a given file revision
1746 1749
1747 1750 returns True if text is different than what is stored.
1748 1751 """
1749 1752 p1, p2 = self.parents(node)
1750 1753 return storageutil.hashrevisionsha1(text, p1, p2) != node
1751 1754
1752 1755 def _getsegmentforrevs(self, startrev, endrev, df=None):
1753 1756 """Obtain a segment of raw data corresponding to a range of revisions.
1754 1757
1755 1758 Accepts the start and end revisions and an optional already-open
1756 1759 file handle to be used for reading. If the file handle is read, its
1757 1760 seek position will not be preserved.
1758 1761
1759 1762 Requests for data may be satisfied by a cache.
1760 1763
1761 1764 Returns a 2-tuple of (offset, data) for the requested range of
1762 1765 revisions. Offset is the integer offset from the beginning of the
1763 1766 revlog and data is a str or buffer of the raw byte data.
1764 1767
1765 1768 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1766 1769 to determine where each revision's data begins and ends.
1767 1770 """
1768 1771 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1769 1772 # (functions are expensive).
1770 1773 index = self.index
1771 1774 istart = index[startrev]
1772 1775 start = int(istart[0] >> 16)
1773 1776 if startrev == endrev:
1774 1777 end = start + istart[1]
1775 1778 else:
1776 1779 iend = index[endrev]
1777 1780 end = int(iend[0] >> 16) + iend[1]
1778 1781
1779 1782 if self._inline:
1780 1783 start += (startrev + 1) * self.index.entry_size
1781 1784 end += (endrev + 1) * self.index.entry_size
1782 1785 length = end - start
1783 1786
1784 1787 return start, self._segmentfile.read_chunk(start, length, df)
1785 1788
1786 1789 def _chunk(self, rev, df=None):
1787 1790 """Obtain a single decompressed chunk for a revision.
1788 1791
1789 1792 Accepts an integer revision and an optional already-open file handle
1790 1793 to be used for reading. If used, the seek position of the file will not
1791 1794 be preserved.
1792 1795
1793 1796 Returns a str holding uncompressed data for the requested revision.
1794 1797 """
1795 1798 compression_mode = self.index[rev][10]
1796 1799 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1797 1800 if compression_mode == COMP_MODE_PLAIN:
1798 1801 return data
1799 1802 elif compression_mode == COMP_MODE_DEFAULT:
1800 1803 return self._decompressor(data)
1801 1804 elif compression_mode == COMP_MODE_INLINE:
1802 1805 return self.decompress(data)
1803 1806 else:
1804 1807 msg = b'unknown compression mode %d'
1805 1808 msg %= compression_mode
1806 1809 raise error.RevlogError(msg)
1807 1810
1808 1811 def _chunks(self, revs, df=None, targetsize=None):
1809 1812 """Obtain decompressed chunks for the specified revisions.
1810 1813
1811 1814 Accepts an iterable of numeric revisions that are assumed to be in
1812 1815 ascending order. Also accepts an optional already-open file handle
1813 1816 to be used for reading. If used, the seek position of the file will
1814 1817 not be preserved.
1815 1818
1816 1819 This function is similar to calling ``self._chunk()`` multiple times,
1817 1820 but is faster.
1818 1821
1819 1822 Returns a list with decompressed data for each requested revision.
1820 1823 """
1821 1824 if not revs:
1822 1825 return []
1823 1826 start = self.start
1824 1827 length = self.length
1825 1828 inline = self._inline
1826 1829 iosize = self.index.entry_size
1827 1830 buffer = util.buffer
1828 1831
1829 1832 l = []
1830 1833 ladd = l.append
1831 1834
1832 1835 if not self._withsparseread:
1833 1836 slicedchunks = (revs,)
1834 1837 else:
1835 1838 slicedchunks = deltautil.slicechunk(
1836 1839 self, revs, targetsize=targetsize
1837 1840 )
1838 1841
1839 1842 for revschunk in slicedchunks:
1840 1843 firstrev = revschunk[0]
1841 1844 # Skip trailing revisions with empty diff
1842 1845 for lastrev in revschunk[::-1]:
1843 1846 if length(lastrev) != 0:
1844 1847 break
1845 1848
1846 1849 try:
1847 1850 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1848 1851 except OverflowError:
1849 1852 # issue4215 - we can't cache a run of chunks greater than
1850 1853 # 2G on Windows
1851 1854 return [self._chunk(rev, df=df) for rev in revschunk]
1852 1855
1853 1856 decomp = self.decompress
1854 1857 # self._decompressor might be None, but will not be used in that case
1855 1858 def_decomp = self._decompressor
1856 1859 for rev in revschunk:
1857 1860 chunkstart = start(rev)
1858 1861 if inline:
1859 1862 chunkstart += (rev + 1) * iosize
1860 1863 chunklength = length(rev)
1861 1864 comp_mode = self.index[rev][10]
1862 1865 c = buffer(data, chunkstart - offset, chunklength)
1863 1866 if comp_mode == COMP_MODE_PLAIN:
1864 1867 ladd(c)
1865 1868 elif comp_mode == COMP_MODE_INLINE:
1866 1869 ladd(decomp(c))
1867 1870 elif comp_mode == COMP_MODE_DEFAULT:
1868 1871 ladd(def_decomp(c))
1869 1872 else:
1870 1873 msg = b'unknown compression mode %d'
1871 1874 msg %= comp_mode
1872 1875 raise error.RevlogError(msg)
1873 1876
1874 1877 return l
1875 1878
1876 1879 def deltaparent(self, rev):
1877 1880 """return deltaparent of the given revision"""
1878 1881 base = self.index[rev][3]
1879 1882 if base == rev:
1880 1883 return nullrev
1881 1884 elif self._generaldelta:
1882 1885 return base
1883 1886 else:
1884 1887 return rev - 1
1885 1888
1886 1889 def issnapshot(self, rev):
1887 1890 """tells whether rev is a snapshot"""
1888 1891 if not self._sparserevlog:
1889 1892 return self.deltaparent(rev) == nullrev
1890 1893 elif util.safehasattr(self.index, 'issnapshot'):
1891 1894 # directly assign the method to cache the testing and access
1892 1895 self.issnapshot = self.index.issnapshot
1893 1896 return self.issnapshot(rev)
1894 1897 if rev == nullrev:
1895 1898 return True
1896 1899 entry = self.index[rev]
1897 1900 base = entry[3]
1898 1901 if base == rev:
1899 1902 return True
1900 1903 if base == nullrev:
1901 1904 return True
1902 1905 p1 = entry[5]
1903 1906 while self.length(p1) == 0:
1904 1907 b = self.deltaparent(p1)
1905 1908 if b == p1:
1906 1909 break
1907 1910 p1 = b
1908 1911 p2 = entry[6]
1909 1912 while self.length(p2) == 0:
1910 1913 b = self.deltaparent(p2)
1911 1914 if b == p2:
1912 1915 break
1913 1916 p2 = b
1914 1917 if base == p1 or base == p2:
1915 1918 return False
1916 1919 return self.issnapshot(base)
1917 1920
1918 1921 def snapshotdepth(self, rev):
1919 1922 """number of snapshot in the chain before this one"""
1920 1923 if not self.issnapshot(rev):
1921 1924 raise error.ProgrammingError(b'revision %d not a snapshot')
1922 1925 return len(self._deltachain(rev)[0]) - 1
1923 1926
1924 1927 def revdiff(self, rev1, rev2):
1925 1928 """return or calculate a delta between two revisions
1926 1929
1927 1930 The delta calculated is in binary form and is intended to be written to
1928 1931 revlog data directly. So this function needs raw revision data.
1929 1932 """
1930 1933 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1931 1934 return bytes(self._chunk(rev2))
1932 1935
1933 1936 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1934 1937
1935 1938 def revision(self, nodeorrev, _df=None):
1936 1939 """return an uncompressed revision of a given node or revision
1937 1940 number.
1938 1941
1939 1942 _df - an existing file handle to read from. (internal-only)
1940 1943 """
1941 1944 return self._revisiondata(nodeorrev, _df)
1942 1945
1943 1946 def sidedata(self, nodeorrev, _df=None):
1944 1947 """a map of extra data related to the changeset but not part of the hash
1945 1948
1946 1949 This function currently return a dictionary. However, more advanced
1947 1950 mapping object will likely be used in the future for a more
1948 1951 efficient/lazy code.
1949 1952 """
1950 1953 # deal with <nodeorrev> argument type
1951 1954 if isinstance(nodeorrev, int):
1952 1955 rev = nodeorrev
1953 1956 else:
1954 1957 rev = self.rev(nodeorrev)
1955 1958 return self._sidedata(rev)
1956 1959
1957 1960 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1958 1961 # deal with <nodeorrev> argument type
1959 1962 if isinstance(nodeorrev, int):
1960 1963 rev = nodeorrev
1961 1964 node = self.node(rev)
1962 1965 else:
1963 1966 node = nodeorrev
1964 1967 rev = None
1965 1968
1966 1969 # fast path the special `nullid` rev
1967 1970 if node == self.nullid:
1968 1971 return b""
1969 1972
1970 1973 # ``rawtext`` is the text as stored inside the revlog. Might be the
1971 1974 # revision or might need to be processed to retrieve the revision.
1972 1975 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1973 1976
1974 1977 if raw and validated:
1975 1978 # if we don't want to process the raw text and that raw
1976 1979 # text is cached, we can exit early.
1977 1980 return rawtext
1978 1981 if rev is None:
1979 1982 rev = self.rev(node)
1980 1983 # the revlog's flag for this revision
1981 1984 # (usually alter its state or content)
1982 1985 flags = self.flags(rev)
1983 1986
1984 1987 if validated and flags == REVIDX_DEFAULT_FLAGS:
1985 1988 # no extra flags set, no flag processor runs, text = rawtext
1986 1989 return rawtext
1987 1990
1988 1991 if raw:
1989 1992 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1990 1993 text = rawtext
1991 1994 else:
1992 1995 r = flagutil.processflagsread(self, rawtext, flags)
1993 1996 text, validatehash = r
1994 1997 if validatehash:
1995 1998 self.checkhash(text, node, rev=rev)
1996 1999 if not validated:
1997 2000 self._revisioncache = (node, rev, rawtext)
1998 2001
1999 2002 return text
2000 2003
2001 2004 def _rawtext(self, node, rev, _df=None):
2002 2005 """return the possibly unvalidated rawtext for a revision
2003 2006
2004 2007 returns (rev, rawtext, validated)
2005 2008 """
2006 2009
2007 2010 # revision in the cache (could be useful to apply delta)
2008 2011 cachedrev = None
2009 2012 # An intermediate text to apply deltas to
2010 2013 basetext = None
2011 2014
2012 2015 # Check if we have the entry in cache
2013 2016 # The cache entry looks like (node, rev, rawtext)
2014 2017 if self._revisioncache:
2015 2018 if self._revisioncache[0] == node:
2016 2019 return (rev, self._revisioncache[2], True)
2017 2020 cachedrev = self._revisioncache[1]
2018 2021
2019 2022 if rev is None:
2020 2023 rev = self.rev(node)
2021 2024
2022 2025 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2023 2026 if stopped:
2024 2027 basetext = self._revisioncache[2]
2025 2028
2026 2029 # drop cache to save memory, the caller is expected to
2027 2030 # update self._revisioncache after validating the text
2028 2031 self._revisioncache = None
2029 2032
2030 2033 targetsize = None
2031 2034 rawsize = self.index[rev][2]
2032 2035 if 0 <= rawsize:
2033 2036 targetsize = 4 * rawsize
2034 2037
2035 2038 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2036 2039 if basetext is None:
2037 2040 basetext = bytes(bins[0])
2038 2041 bins = bins[1:]
2039 2042
2040 2043 rawtext = mdiff.patches(basetext, bins)
2041 2044 del basetext # let us have a chance to free memory early
2042 2045 return (rev, rawtext, False)
2043 2046
2044 2047 def _sidedata(self, rev):
2045 2048 """Return the sidedata for a given revision number."""
2046 2049 index_entry = self.index[rev]
2047 2050 sidedata_offset = index_entry[8]
2048 2051 sidedata_size = index_entry[9]
2049 2052
2050 2053 if self._inline:
2051 2054 sidedata_offset += self.index.entry_size * (1 + rev)
2052 2055 if sidedata_size == 0:
2053 2056 return {}
2054 2057
2055 2058 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2056 2059 filename = self._sidedatafile
2057 2060 end = self._docket.sidedata_end
2058 2061 offset = sidedata_offset
2059 2062 length = sidedata_size
2060 2063 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2061 2064 raise error.RevlogError(m)
2062 2065
2063 2066 comp_segment = self._segmentfile_sidedata.read_chunk(
2064 2067 sidedata_offset, sidedata_size
2065 2068 )
2066 2069
2067 2070 comp = self.index[rev][11]
2068 2071 if comp == COMP_MODE_PLAIN:
2069 2072 segment = comp_segment
2070 2073 elif comp == COMP_MODE_DEFAULT:
2071 2074 segment = self._decompressor(comp_segment)
2072 2075 elif comp == COMP_MODE_INLINE:
2073 2076 segment = self.decompress(comp_segment)
2074 2077 else:
2075 2078 msg = b'unknown compression mode %d'
2076 2079 msg %= comp
2077 2080 raise error.RevlogError(msg)
2078 2081
2079 2082 sidedata = sidedatautil.deserialize_sidedata(segment)
2080 2083 return sidedata
2081 2084
2082 2085 def rawdata(self, nodeorrev, _df=None):
2083 2086 """return an uncompressed raw data of a given node or revision number.
2084 2087
2085 2088 _df - an existing file handle to read from. (internal-only)
2086 2089 """
2087 2090 return self._revisiondata(nodeorrev, _df, raw=True)
2088 2091
2089 2092 def hash(self, text, p1, p2):
2090 2093 """Compute a node hash.
2091 2094
2092 2095 Available as a function so that subclasses can replace the hash
2093 2096 as needed.
2094 2097 """
2095 2098 return storageutil.hashrevisionsha1(text, p1, p2)
2096 2099
2097 2100 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2098 2101 """Check node hash integrity.
2099 2102
2100 2103 Available as a function so that subclasses can extend hash mismatch
2101 2104 behaviors as needed.
2102 2105 """
2103 2106 try:
2104 2107 if p1 is None and p2 is None:
2105 2108 p1, p2 = self.parents(node)
2106 2109 if node != self.hash(text, p1, p2):
2107 2110 # Clear the revision cache on hash failure. The revision cache
2108 2111 # only stores the raw revision and clearing the cache does have
2109 2112 # the side-effect that we won't have a cache hit when the raw
2110 2113 # revision data is accessed. But this case should be rare and
2111 2114 # it is extra work to teach the cache about the hash
2112 2115 # verification state.
2113 2116 if self._revisioncache and self._revisioncache[0] == node:
2114 2117 self._revisioncache = None
2115 2118
2116 2119 revornode = rev
2117 2120 if revornode is None:
2118 2121 revornode = templatefilters.short(hex(node))
2119 2122 raise error.RevlogError(
2120 2123 _(b"integrity check failed on %s:%s")
2121 2124 % (self.display_id, pycompat.bytestr(revornode))
2122 2125 )
2123 2126 except error.RevlogError:
2124 2127 if self._censorable and storageutil.iscensoredtext(text):
2125 2128 raise error.CensoredNodeError(self.display_id, node, text)
2126 2129 raise
2127 2130
2128 2131 @property
2129 2132 def _split_index_file(self):
2130 2133 """the path where to expect the index of an ongoing splitting operation
2131 2134
2132 2135 The file will only exist if a splitting operation is in progress, but
2133 2136 it is always expected at the same location."""
2134 2137 parts = self.radix.split(b'/')
2135 2138 if len(parts) > 1:
2136 2139 # adds a '-s' prefix to the ``data/` or `meta/` base
2137 2140 head = parts[0] + b'-s'
2138 2141 mids = parts[1:-1]
2139 2142 tail = parts[-1] + b'.i'
2140 2143 pieces = [head] + mids + [tail]
2141 2144 return b'/'.join(pieces)
2142 2145 else:
2143 2146 # the revlog is stored at the root of the store (changelog or
2144 2147 # manifest), no risk of collision.
2145 2148 return self.radix + b'.i.s'
2146 2149
2147 2150 def _enforceinlinesize(self, tr, side_write=True):
2148 2151 """Check if the revlog is too big for inline and convert if so.
2149 2152
2150 2153 This should be called after revisions are added to the revlog. If the
2151 2154 revlog has grown too large to be an inline revlog, it will convert it
2152 2155 to use multiple index and data files.
2153 2156 """
2154 2157 tiprev = len(self) - 1
2155 2158 total_size = self.start(tiprev) + self.length(tiprev)
2156 2159 if not self._inline or total_size < _maxinline:
2157 2160 return
2158 2161
2159 2162 troffset = tr.findoffset(self._indexfile)
2160 2163 if troffset is None:
2161 2164 raise error.RevlogError(
2162 2165 _(b"%s not found in the transaction") % self._indexfile
2163 2166 )
2164 2167 if troffset:
2165 2168 tr.addbackup(self._indexfile, for_offset=True)
2166 2169 tr.add(self._datafile, 0)
2167 2170
2168 2171 existing_handles = False
2169 2172 if self._writinghandles is not None:
2170 2173 existing_handles = True
2171 2174 fp = self._writinghandles[0]
2172 2175 fp.flush()
2173 2176 fp.close()
2174 2177 # We can't use the cached file handle after close(). So prevent
2175 2178 # its usage.
2176 2179 self._writinghandles = None
2177 2180 self._segmentfile.writing_handle = None
2178 2181 # No need to deal with sidedata writing handle as it is only
2179 2182 # relevant with revlog-v2 which is never inline, not reaching
2180 2183 # this code
2181 2184 if side_write:
2182 2185 old_index_file_path = self._indexfile
2183 2186 new_index_file_path = self._split_index_file
2184 2187 opener = self.opener
2185 2188 weak_self = weakref.ref(self)
2186 2189
2187 2190 # the "split" index replace the real index when the transaction is finalized
2188 2191 def finalize_callback(tr):
2189 2192 opener.rename(
2190 2193 new_index_file_path,
2191 2194 old_index_file_path,
2192 2195 checkambig=True,
2193 2196 )
2194 2197 maybe_self = weak_self()
2195 2198 if maybe_self is not None:
2196 2199 maybe_self._indexfile = old_index_file_path
2197 2200
2198 2201 def abort_callback(tr):
2199 2202 maybe_self = weak_self()
2200 2203 if maybe_self is not None:
2201 2204 maybe_self._indexfile = old_index_file_path
2202 2205
2203 2206 tr.registertmp(new_index_file_path)
2204 2207 if self.target[1] is not None:
2205 2208 callback_id = b'000-revlog-split-%d-%s' % self.target
2206 2209 else:
2207 2210 callback_id = b'000-revlog-split-%d' % self.target[0]
2208 2211 tr.addfinalize(callback_id, finalize_callback)
2209 2212 tr.addabort(callback_id, abort_callback)
2210 2213
2211 2214 new_dfh = self._datafp(b'w+')
2212 2215 new_dfh.truncate(0) # drop any potentially existing data
2213 2216 try:
2214 2217 with self._indexfp() as read_ifh:
2215 2218 for r in self:
2216 2219 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2217 2220 new_dfh.flush()
2218 2221
2219 2222 if side_write:
2220 2223 self._indexfile = new_index_file_path
2221 2224 with self.__index_new_fp() as fp:
2222 2225 self._format_flags &= ~FLAG_INLINE_DATA
2223 2226 self._inline = False
2224 2227 for i in self:
2225 2228 e = self.index.entry_binary(i)
2226 2229 if i == 0 and self._docket is None:
2227 2230 header = self._format_flags | self._format_version
2228 2231 header = self.index.pack_header(header)
2229 2232 e = header + e
2230 2233 fp.write(e)
2231 2234 if self._docket is not None:
2232 2235 self._docket.index_end = fp.tell()
2233 2236
2234 2237 # If we don't use side-write, the temp file replace the real
2235 2238 # index when we exit the context manager
2236 2239
2237 2240 nodemaputil.setup_persistent_nodemap(tr, self)
2238 2241 self._segmentfile = randomaccessfile.randomaccessfile(
2239 2242 self.opener,
2240 2243 self._datafile,
2241 2244 self._chunkcachesize,
2242 2245 )
2243 2246
2244 2247 if existing_handles:
2245 2248 # switched from inline to conventional reopen the index
2246 2249 ifh = self.__index_write_fp()
2247 2250 self._writinghandles = (ifh, new_dfh, None)
2248 2251 self._segmentfile.writing_handle = new_dfh
2249 2252 new_dfh = None
2250 2253 # No need to deal with sidedata writing handle as it is only
2251 2254 # relevant with revlog-v2 which is never inline, not reaching
2252 2255 # this code
2253 2256 finally:
2254 2257 if new_dfh is not None:
2255 2258 new_dfh.close()
2256 2259
2257 2260 def _nodeduplicatecallback(self, transaction, node):
2258 2261 """called when trying to add a node already stored."""
2259 2262
2260 2263 @contextlib.contextmanager
2261 2264 def reading(self):
2262 2265 """Context manager that keeps data and sidedata files open for reading"""
2263 2266 with self._segmentfile.reading():
2264 2267 with self._segmentfile_sidedata.reading():
2265 2268 yield
2266 2269
2267 2270 @contextlib.contextmanager
2268 2271 def _writing(self, transaction):
2269 2272 if self._trypending:
2270 2273 msg = b'try to write in a `trypending` revlog: %s'
2271 2274 msg %= self.display_id
2272 2275 raise error.ProgrammingError(msg)
2273 2276 if self._writinghandles is not None:
2274 2277 yield
2275 2278 else:
2276 2279 ifh = dfh = sdfh = None
2277 2280 try:
2278 2281 r = len(self)
2279 2282 # opening the data file.
2280 2283 dsize = 0
2281 2284 if r:
2282 2285 dsize = self.end(r - 1)
2283 2286 dfh = None
2284 2287 if not self._inline:
2285 2288 try:
2286 2289 dfh = self._datafp(b"r+")
2287 2290 if self._docket is None:
2288 2291 dfh.seek(0, os.SEEK_END)
2289 2292 else:
2290 2293 dfh.seek(self._docket.data_end, os.SEEK_SET)
2291 2294 except FileNotFoundError:
2292 2295 dfh = self._datafp(b"w+")
2293 2296 transaction.add(self._datafile, dsize)
2294 2297 if self._sidedatafile is not None:
2295 2298 # revlog-v2 does not inline, help Pytype
2296 2299 assert dfh is not None
2297 2300 try:
2298 2301 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2299 2302 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2300 2303 except FileNotFoundError:
2301 2304 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2302 2305 transaction.add(
2303 2306 self._sidedatafile, self._docket.sidedata_end
2304 2307 )
2305 2308
2306 2309 # opening the index file.
2307 2310 isize = r * self.index.entry_size
2308 2311 ifh = self.__index_write_fp()
2309 2312 if self._inline:
2310 2313 transaction.add(self._indexfile, dsize + isize)
2311 2314 else:
2312 2315 transaction.add(self._indexfile, isize)
2313 2316 # exposing all file handle for writing.
2314 2317 self._writinghandles = (ifh, dfh, sdfh)
2315 2318 self._segmentfile.writing_handle = ifh if self._inline else dfh
2316 2319 self._segmentfile_sidedata.writing_handle = sdfh
2317 2320 yield
2318 2321 if self._docket is not None:
2319 2322 self._write_docket(transaction)
2320 2323 finally:
2321 2324 self._writinghandles = None
2322 2325 self._segmentfile.writing_handle = None
2323 2326 self._segmentfile_sidedata.writing_handle = None
2324 2327 if dfh is not None:
2325 2328 dfh.close()
2326 2329 if sdfh is not None:
2327 2330 sdfh.close()
2328 2331 # closing the index file last to avoid exposing referent to
2329 2332 # potential unflushed data content.
2330 2333 if ifh is not None:
2331 2334 ifh.close()
2332 2335
2333 2336 def _write_docket(self, transaction):
2334 2337 """write the current docket on disk
2335 2338
2336 2339 Exist as a method to help changelog to implement transaction logic
2337 2340
2338 2341 We could also imagine using the same transaction logic for all revlog
2339 2342 since docket are cheap."""
2340 2343 self._docket.write(transaction)
2341 2344
2342 2345 def addrevision(
2343 2346 self,
2344 2347 text,
2345 2348 transaction,
2346 2349 link,
2347 2350 p1,
2348 2351 p2,
2349 2352 cachedelta=None,
2350 2353 node=None,
2351 2354 flags=REVIDX_DEFAULT_FLAGS,
2352 2355 deltacomputer=None,
2353 2356 sidedata=None,
2354 2357 ):
2355 2358 """add a revision to the log
2356 2359
2357 2360 text - the revision data to add
2358 2361 transaction - the transaction object used for rollback
2359 2362 link - the linkrev data to add
2360 2363 p1, p2 - the parent nodeids of the revision
2361 2364 cachedelta - an optional precomputed delta
2362 2365 node - nodeid of revision; typically node is not specified, and it is
2363 2366 computed by default as hash(text, p1, p2), however subclasses might
2364 2367 use different hashing method (and override checkhash() in such case)
2365 2368 flags - the known flags to set on the revision
2366 2369 deltacomputer - an optional deltacomputer instance shared between
2367 2370 multiple calls
2368 2371 """
2369 2372 if link == nullrev:
2370 2373 raise error.RevlogError(
2371 2374 _(b"attempted to add linkrev -1 to %s") % self.display_id
2372 2375 )
2373 2376
2374 2377 if sidedata is None:
2375 2378 sidedata = {}
2376 2379 elif sidedata and not self.hassidedata:
2377 2380 raise error.ProgrammingError(
2378 2381 _(b"trying to add sidedata to a revlog who don't support them")
2379 2382 )
2380 2383
2381 2384 if flags:
2382 2385 node = node or self.hash(text, p1, p2)
2383 2386
2384 2387 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2385 2388
2386 2389 # If the flag processor modifies the revision data, ignore any provided
2387 2390 # cachedelta.
2388 2391 if rawtext != text:
2389 2392 cachedelta = None
2390 2393
2391 2394 if len(rawtext) > _maxentrysize:
2392 2395 raise error.RevlogError(
2393 2396 _(
2394 2397 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2395 2398 )
2396 2399 % (self.display_id, len(rawtext))
2397 2400 )
2398 2401
2399 2402 node = node or self.hash(rawtext, p1, p2)
2400 2403 rev = self.index.get_rev(node)
2401 2404 if rev is not None:
2402 2405 return rev
2403 2406
2404 2407 if validatehash:
2405 2408 self.checkhash(rawtext, node, p1=p1, p2=p2)
2406 2409
2407 2410 return self.addrawrevision(
2408 2411 rawtext,
2409 2412 transaction,
2410 2413 link,
2411 2414 p1,
2412 2415 p2,
2413 2416 node,
2414 2417 flags,
2415 2418 cachedelta=cachedelta,
2416 2419 deltacomputer=deltacomputer,
2417 2420 sidedata=sidedata,
2418 2421 )
2419 2422
2420 2423 def addrawrevision(
2421 2424 self,
2422 2425 rawtext,
2423 2426 transaction,
2424 2427 link,
2425 2428 p1,
2426 2429 p2,
2427 2430 node,
2428 2431 flags,
2429 2432 cachedelta=None,
2430 2433 deltacomputer=None,
2431 2434 sidedata=None,
2432 2435 ):
2433 2436 """add a raw revision with known flags, node and parents
2434 2437 useful when reusing a revision not stored in this revlog (ex: received
2435 2438 over wire, or read from an external bundle).
2436 2439 """
2437 2440 with self._writing(transaction):
2438 2441 return self._addrevision(
2439 2442 node,
2440 2443 rawtext,
2441 2444 transaction,
2442 2445 link,
2443 2446 p1,
2444 2447 p2,
2445 2448 flags,
2446 2449 cachedelta,
2447 2450 deltacomputer=deltacomputer,
2448 2451 sidedata=sidedata,
2449 2452 )
2450 2453
2451 2454 def compress(self, data):
2452 2455 """Generate a possibly-compressed representation of data."""
2453 2456 if not data:
2454 2457 return b'', data
2455 2458
2456 2459 compressed = self._compressor.compress(data)
2457 2460
2458 2461 if compressed:
2459 2462 # The revlog compressor added the header in the returned data.
2460 2463 return b'', compressed
2461 2464
2462 2465 if data[0:1] == b'\0':
2463 2466 return b'', data
2464 2467 return b'u', data
2465 2468
2466 2469 def decompress(self, data):
2467 2470 """Decompress a revlog chunk.
2468 2471
2469 2472 The chunk is expected to begin with a header identifying the
2470 2473 format type so it can be routed to an appropriate decompressor.
2471 2474 """
2472 2475 if not data:
2473 2476 return data
2474 2477
2475 2478 # Revlogs are read much more frequently than they are written and many
2476 2479 # chunks only take microseconds to decompress, so performance is
2477 2480 # important here.
2478 2481 #
2479 2482 # We can make a few assumptions about revlogs:
2480 2483 #
2481 2484 # 1) the majority of chunks will be compressed (as opposed to inline
2482 2485 # raw data).
2483 2486 # 2) decompressing *any* data will likely by at least 10x slower than
2484 2487 # returning raw inline data.
2485 2488 # 3) we want to prioritize common and officially supported compression
2486 2489 # engines
2487 2490 #
2488 2491 # It follows that we want to optimize for "decompress compressed data
2489 2492 # when encoded with common and officially supported compression engines"
2490 2493 # case over "raw data" and "data encoded by less common or non-official
2491 2494 # compression engines." That is why we have the inline lookup first
2492 2495 # followed by the compengines lookup.
2493 2496 #
2494 2497 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2495 2498 # compressed chunks. And this matters for changelog and manifest reads.
2496 2499 t = data[0:1]
2497 2500
2498 2501 if t == b'x':
2499 2502 try:
2500 2503 return _zlibdecompress(data)
2501 2504 except zlib.error as e:
2502 2505 raise error.RevlogError(
2503 2506 _(b'revlog decompress error: %s')
2504 2507 % stringutil.forcebytestr(e)
2505 2508 )
2506 2509 # '\0' is more common than 'u' so it goes first.
2507 2510 elif t == b'\0':
2508 2511 return data
2509 2512 elif t == b'u':
2510 2513 return util.buffer(data, 1)
2511 2514
2512 2515 compressor = self._get_decompressor(t)
2513 2516
2514 2517 return compressor.decompress(data)
2515 2518
2516 2519 def _addrevision(
2517 2520 self,
2518 2521 node,
2519 2522 rawtext,
2520 2523 transaction,
2521 2524 link,
2522 2525 p1,
2523 2526 p2,
2524 2527 flags,
2525 2528 cachedelta,
2526 2529 alwayscache=False,
2527 2530 deltacomputer=None,
2528 2531 sidedata=None,
2529 2532 ):
2530 2533 """internal function to add revisions to the log
2531 2534
2532 2535 see addrevision for argument descriptions.
2533 2536
2534 2537 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2535 2538
2536 2539 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2537 2540 be used.
2538 2541
2539 2542 invariants:
2540 2543 - rawtext is optional (can be None); if not set, cachedelta must be set.
2541 2544 if both are set, they must correspond to each other.
2542 2545 """
2543 2546 if node == self.nullid:
2544 2547 raise error.RevlogError(
2545 2548 _(b"%s: attempt to add null revision") % self.display_id
2546 2549 )
2547 2550 if (
2548 2551 node == self.nodeconstants.wdirid
2549 2552 or node in self.nodeconstants.wdirfilenodeids
2550 2553 ):
2551 2554 raise error.RevlogError(
2552 2555 _(b"%s: attempt to add wdir revision") % self.display_id
2553 2556 )
2554 2557 if self._writinghandles is None:
2555 2558 msg = b'adding revision outside `revlog._writing` context'
2556 2559 raise error.ProgrammingError(msg)
2557 2560
2558 2561 if self._inline:
2559 2562 fh = self._writinghandles[0]
2560 2563 else:
2561 2564 fh = self._writinghandles[1]
2562 2565
2563 2566 btext = [rawtext]
2564 2567
2565 2568 curr = len(self)
2566 2569 prev = curr - 1
2567 2570
2568 2571 offset = self._get_data_offset(prev)
2569 2572
2570 2573 if self._concurrencychecker:
2571 2574 ifh, dfh, sdfh = self._writinghandles
2572 2575 # XXX no checking for the sidedata file
2573 2576 if self._inline:
2574 2577 # offset is "as if" it were in the .d file, so we need to add on
2575 2578 # the size of the entry metadata.
2576 2579 self._concurrencychecker(
2577 2580 ifh, self._indexfile, offset + curr * self.index.entry_size
2578 2581 )
2579 2582 else:
2580 2583 # Entries in the .i are a consistent size.
2581 2584 self._concurrencychecker(
2582 2585 ifh, self._indexfile, curr * self.index.entry_size
2583 2586 )
2584 2587 self._concurrencychecker(dfh, self._datafile, offset)
2585 2588
2586 2589 p1r, p2r = self.rev(p1), self.rev(p2)
2587 2590
2588 2591 # full versions are inserted when the needed deltas
2589 2592 # become comparable to the uncompressed text
2590 2593 if rawtext is None:
2591 2594 # need rawtext size, before changed by flag processors, which is
2592 2595 # the non-raw size. use revlog explicitly to avoid filelog's extra
2593 2596 # logic that might remove metadata size.
2594 2597 textlen = mdiff.patchedsize(
2595 2598 revlog.size(self, cachedelta[0]), cachedelta[1]
2596 2599 )
2597 2600 else:
2598 2601 textlen = len(rawtext)
2599 2602
2600 2603 if deltacomputer is None:
2601 2604 write_debug = None
2602 2605 if self._debug_delta:
2603 2606 write_debug = transaction._report
2604 2607 deltacomputer = deltautil.deltacomputer(
2605 2608 self, write_debug=write_debug
2606 2609 )
2607 2610
2608 2611 if cachedelta is not None and len(cachedelta) == 2:
2609 2612 # If the cached delta has no information about how it should be
2610 2613 # reused, add the default reuse instruction according to the
2611 2614 # revlog's configuration.
2612 2615 if self._generaldelta and self._lazydeltabase:
2613 2616 delta_base_reuse = DELTA_BASE_REUSE_TRY
2614 2617 else:
2615 2618 delta_base_reuse = DELTA_BASE_REUSE_NO
2616 2619 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2617 2620
2618 2621 revinfo = revlogutils.revisioninfo(
2619 2622 node,
2620 2623 p1,
2621 2624 p2,
2622 2625 btext,
2623 2626 textlen,
2624 2627 cachedelta,
2625 2628 flags,
2626 2629 )
2627 2630
2628 2631 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2629 2632
2630 2633 compression_mode = COMP_MODE_INLINE
2631 2634 if self._docket is not None:
2632 2635 default_comp = self._docket.default_compression_header
2633 2636 r = deltautil.delta_compression(default_comp, deltainfo)
2634 2637 compression_mode, deltainfo = r
2635 2638
2636 2639 sidedata_compression_mode = COMP_MODE_INLINE
2637 2640 if sidedata and self.hassidedata:
2638 2641 sidedata_compression_mode = COMP_MODE_PLAIN
2639 2642 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2640 2643 sidedata_offset = self._docket.sidedata_end
2641 2644 h, comp_sidedata = self.compress(serialized_sidedata)
2642 2645 if (
2643 2646 h != b'u'
2644 2647 and comp_sidedata[0:1] != b'\0'
2645 2648 and len(comp_sidedata) < len(serialized_sidedata)
2646 2649 ):
2647 2650 assert not h
2648 2651 if (
2649 2652 comp_sidedata[0:1]
2650 2653 == self._docket.default_compression_header
2651 2654 ):
2652 2655 sidedata_compression_mode = COMP_MODE_DEFAULT
2653 2656 serialized_sidedata = comp_sidedata
2654 2657 else:
2655 2658 sidedata_compression_mode = COMP_MODE_INLINE
2656 2659 serialized_sidedata = comp_sidedata
2657 2660 else:
2658 2661 serialized_sidedata = b""
2659 2662 # Don't store the offset if the sidedata is empty, that way
2660 2663 # we can easily detect empty sidedata and they will be no different
2661 2664 # than ones we manually add.
2662 2665 sidedata_offset = 0
2663 2666
2664 2667 rank = RANK_UNKNOWN
2665 2668 if self._compute_rank:
2666 2669 if (p1r, p2r) == (nullrev, nullrev):
2667 2670 rank = 1
2668 2671 elif p1r != nullrev and p2r == nullrev:
2669 2672 rank = 1 + self.fast_rank(p1r)
2670 2673 elif p1r == nullrev and p2r != nullrev:
2671 2674 rank = 1 + self.fast_rank(p2r)
2672 2675 else: # merge node
2673 2676 if rustdagop is not None and self.index.rust_ext_compat:
2674 2677 rank = rustdagop.rank(self.index, p1r, p2r)
2675 2678 else:
2676 2679 pmin, pmax = sorted((p1r, p2r))
2677 2680 rank = 1 + self.fast_rank(pmax)
2678 2681 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2679 2682
2680 2683 e = revlogutils.entry(
2681 2684 flags=flags,
2682 2685 data_offset=offset,
2683 2686 data_compressed_length=deltainfo.deltalen,
2684 2687 data_uncompressed_length=textlen,
2685 2688 data_compression_mode=compression_mode,
2686 2689 data_delta_base=deltainfo.base,
2687 2690 link_rev=link,
2688 2691 parent_rev_1=p1r,
2689 2692 parent_rev_2=p2r,
2690 2693 node_id=node,
2691 2694 sidedata_offset=sidedata_offset,
2692 2695 sidedata_compressed_length=len(serialized_sidedata),
2693 2696 sidedata_compression_mode=sidedata_compression_mode,
2694 2697 rank=rank,
2695 2698 )
2696 2699
2697 2700 self.index.append(e)
2698 2701 entry = self.index.entry_binary(curr)
2699 2702 if curr == 0 and self._docket is None:
2700 2703 header = self._format_flags | self._format_version
2701 2704 header = self.index.pack_header(header)
2702 2705 entry = header + entry
2703 2706 self._writeentry(
2704 2707 transaction,
2705 2708 entry,
2706 2709 deltainfo.data,
2707 2710 link,
2708 2711 offset,
2709 2712 serialized_sidedata,
2710 2713 sidedata_offset,
2711 2714 )
2712 2715
2713 2716 rawtext = btext[0]
2714 2717
2715 2718 if alwayscache and rawtext is None:
2716 2719 rawtext = deltacomputer.buildtext(revinfo, fh)
2717 2720
2718 2721 if type(rawtext) == bytes: # only accept immutable objects
2719 2722 self._revisioncache = (node, curr, rawtext)
2720 2723 self._chainbasecache[curr] = deltainfo.chainbase
2721 2724 return curr
2722 2725
2723 2726 def _get_data_offset(self, prev):
2724 2727 """Returns the current offset in the (in-transaction) data file.
2725 2728 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2726 2729 file to store that information: since sidedata can be rewritten to the
2727 2730 end of the data file within a transaction, you can have cases where, for
2728 2731 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2729 2732 to `n - 1`'s sidedata being written after `n`'s data.
2730 2733
2731 2734 TODO cache this in a docket file before getting out of experimental."""
2732 2735 if self._docket is None:
2733 2736 return self.end(prev)
2734 2737 else:
2735 2738 return self._docket.data_end
2736 2739
2737 2740 def _writeentry(
2738 2741 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2739 2742 ):
2740 2743 # Files opened in a+ mode have inconsistent behavior on various
2741 2744 # platforms. Windows requires that a file positioning call be made
2742 2745 # when the file handle transitions between reads and writes. See
2743 2746 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2744 2747 # platforms, Python or the platform itself can be buggy. Some versions
2745 2748 # of Solaris have been observed to not append at the end of the file
2746 2749 # if the file was seeked to before the end. See issue4943 for more.
2747 2750 #
2748 2751 # We work around this issue by inserting a seek() before writing.
2749 2752 # Note: This is likely not necessary on Python 3. However, because
2750 2753 # the file handle is reused for reads and may be seeked there, we need
2751 2754 # to be careful before changing this.
2752 2755 if self._writinghandles is None:
2753 2756 msg = b'adding revision outside `revlog._writing` context'
2754 2757 raise error.ProgrammingError(msg)
2755 2758 ifh, dfh, sdfh = self._writinghandles
2756 2759 if self._docket is None:
2757 2760 ifh.seek(0, os.SEEK_END)
2758 2761 else:
2759 2762 ifh.seek(self._docket.index_end, os.SEEK_SET)
2760 2763 if dfh:
2761 2764 if self._docket is None:
2762 2765 dfh.seek(0, os.SEEK_END)
2763 2766 else:
2764 2767 dfh.seek(self._docket.data_end, os.SEEK_SET)
2765 2768 if sdfh:
2766 2769 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2767 2770
2768 2771 curr = len(self) - 1
2769 2772 if not self._inline:
2770 2773 transaction.add(self._datafile, offset)
2771 2774 if self._sidedatafile:
2772 2775 transaction.add(self._sidedatafile, sidedata_offset)
2773 2776 transaction.add(self._indexfile, curr * len(entry))
2774 2777 if data[0]:
2775 2778 dfh.write(data[0])
2776 2779 dfh.write(data[1])
2777 2780 if sidedata:
2778 2781 sdfh.write(sidedata)
2779 2782 ifh.write(entry)
2780 2783 else:
2781 2784 offset += curr * self.index.entry_size
2782 2785 transaction.add(self._indexfile, offset)
2783 2786 ifh.write(entry)
2784 2787 ifh.write(data[0])
2785 2788 ifh.write(data[1])
2786 2789 assert not sidedata
2787 2790 self._enforceinlinesize(transaction)
2788 2791 if self._docket is not None:
2789 2792 # revlog-v2 always has 3 writing handles, help Pytype
2790 2793 wh1 = self._writinghandles[0]
2791 2794 wh2 = self._writinghandles[1]
2792 2795 wh3 = self._writinghandles[2]
2793 2796 assert wh1 is not None
2794 2797 assert wh2 is not None
2795 2798 assert wh3 is not None
2796 2799 self._docket.index_end = wh1.tell()
2797 2800 self._docket.data_end = wh2.tell()
2798 2801 self._docket.sidedata_end = wh3.tell()
2799 2802
2800 2803 nodemaputil.setup_persistent_nodemap(transaction, self)
2801 2804
2802 2805 def addgroup(
2803 2806 self,
2804 2807 deltas,
2805 2808 linkmapper,
2806 2809 transaction,
2807 2810 alwayscache=False,
2808 2811 addrevisioncb=None,
2809 2812 duplicaterevisioncb=None,
2810 2813 debug_info=None,
2811 2814 delta_base_reuse_policy=None,
2812 2815 ):
2813 2816 """
2814 2817 add a delta group
2815 2818
2816 2819 given a set of deltas, add them to the revision log. the
2817 2820 first delta is against its parent, which should be in our
2818 2821 log, the rest are against the previous delta.
2819 2822
2820 2823 If ``addrevisioncb`` is defined, it will be called with arguments of
2821 2824 this revlog and the node that was added.
2822 2825 """
2823 2826
2824 2827 if self._adding_group:
2825 2828 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2826 2829
2827 2830 # read the default delta-base reuse policy from revlog config if the
2828 2831 # group did not specify one.
2829 2832 if delta_base_reuse_policy is None:
2830 2833 if self._generaldelta and self._lazydeltabase:
2831 2834 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2832 2835 else:
2833 2836 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2834 2837
2835 2838 self._adding_group = True
2836 2839 empty = True
2837 2840 try:
2838 2841 with self._writing(transaction):
2839 2842 write_debug = None
2840 2843 if self._debug_delta:
2841 2844 write_debug = transaction._report
2842 2845 deltacomputer = deltautil.deltacomputer(
2843 2846 self,
2844 2847 write_debug=write_debug,
2845 2848 debug_info=debug_info,
2846 2849 )
2847 2850 # loop through our set of deltas
2848 2851 for data in deltas:
2849 2852 (
2850 2853 node,
2851 2854 p1,
2852 2855 p2,
2853 2856 linknode,
2854 2857 deltabase,
2855 2858 delta,
2856 2859 flags,
2857 2860 sidedata,
2858 2861 ) = data
2859 2862 link = linkmapper(linknode)
2860 2863 flags = flags or REVIDX_DEFAULT_FLAGS
2861 2864
2862 2865 rev = self.index.get_rev(node)
2863 2866 if rev is not None:
2864 2867 # this can happen if two branches make the same change
2865 2868 self._nodeduplicatecallback(transaction, rev)
2866 2869 if duplicaterevisioncb:
2867 2870 duplicaterevisioncb(self, rev)
2868 2871 empty = False
2869 2872 continue
2870 2873
2871 2874 for p in (p1, p2):
2872 2875 if not self.index.has_node(p):
2873 2876 raise error.LookupError(
2874 2877 p, self.radix, _(b'unknown parent')
2875 2878 )
2876 2879
2877 2880 if not self.index.has_node(deltabase):
2878 2881 raise error.LookupError(
2879 2882 deltabase, self.display_id, _(b'unknown delta base')
2880 2883 )
2881 2884
2882 2885 baserev = self.rev(deltabase)
2883 2886
2884 2887 if baserev != nullrev and self.iscensored(baserev):
2885 2888 # if base is censored, delta must be full replacement in a
2886 2889 # single patch operation
2887 2890 hlen = struct.calcsize(b">lll")
2888 2891 oldlen = self.rawsize(baserev)
2889 2892 newlen = len(delta) - hlen
2890 2893 if delta[:hlen] != mdiff.replacediffheader(
2891 2894 oldlen, newlen
2892 2895 ):
2893 2896 raise error.CensoredBaseError(
2894 2897 self.display_id, self.node(baserev)
2895 2898 )
2896 2899
2897 2900 if not flags and self._peek_iscensored(baserev, delta):
2898 2901 flags |= REVIDX_ISCENSORED
2899 2902
2900 2903 # We assume consumers of addrevisioncb will want to retrieve
2901 2904 # the added revision, which will require a call to
2902 2905 # revision(). revision() will fast path if there is a cache
2903 2906 # hit. So, we tell _addrevision() to always cache in this case.
2904 2907 # We're only using addgroup() in the context of changegroup
2905 2908 # generation so the revision data can always be handled as raw
2906 2909 # by the flagprocessor.
2907 2910 rev = self._addrevision(
2908 2911 node,
2909 2912 None,
2910 2913 transaction,
2911 2914 link,
2912 2915 p1,
2913 2916 p2,
2914 2917 flags,
2915 2918 (baserev, delta, delta_base_reuse_policy),
2916 2919 alwayscache=alwayscache,
2917 2920 deltacomputer=deltacomputer,
2918 2921 sidedata=sidedata,
2919 2922 )
2920 2923
2921 2924 if addrevisioncb:
2922 2925 addrevisioncb(self, rev)
2923 2926 empty = False
2924 2927 finally:
2925 2928 self._adding_group = False
2926 2929 return not empty
2927 2930
2928 2931 def iscensored(self, rev):
2929 2932 """Check if a file revision is censored."""
2930 2933 if not self._censorable:
2931 2934 return False
2932 2935
2933 2936 return self.flags(rev) & REVIDX_ISCENSORED
2934 2937
2935 2938 def _peek_iscensored(self, baserev, delta):
2936 2939 """Quickly check if a delta produces a censored revision."""
2937 2940 if not self._censorable:
2938 2941 return False
2939 2942
2940 2943 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2941 2944
2942 2945 def getstrippoint(self, minlink):
2943 2946 """find the minimum rev that must be stripped to strip the linkrev
2944 2947
2945 2948 Returns a tuple containing the minimum rev and a set of all revs that
2946 2949 have linkrevs that will be broken by this strip.
2947 2950 """
2948 2951 return storageutil.resolvestripinfo(
2949 2952 minlink,
2950 2953 len(self) - 1,
2951 2954 self.headrevs(),
2952 2955 self.linkrev,
2953 2956 self.parentrevs,
2954 2957 )
2955 2958
2956 2959 def strip(self, minlink, transaction):
2957 2960 """truncate the revlog on the first revision with a linkrev >= minlink
2958 2961
2959 2962 This function is called when we're stripping revision minlink and
2960 2963 its descendants from the repository.
2961 2964
2962 2965 We have to remove all revisions with linkrev >= minlink, because
2963 2966 the equivalent changelog revisions will be renumbered after the
2964 2967 strip.
2965 2968
2966 2969 So we truncate the revlog on the first of these revisions, and
2967 2970 trust that the caller has saved the revisions that shouldn't be
2968 2971 removed and that it'll re-add them after this truncation.
2969 2972 """
2970 2973 if len(self) == 0:
2971 2974 return
2972 2975
2973 2976 rev, _ = self.getstrippoint(minlink)
2974 2977 if rev == len(self):
2975 2978 return
2976 2979
2977 2980 # first truncate the files on disk
2978 2981 data_end = self.start(rev)
2979 2982 if not self._inline:
2980 2983 transaction.add(self._datafile, data_end)
2981 2984 end = rev * self.index.entry_size
2982 2985 else:
2983 2986 end = data_end + (rev * self.index.entry_size)
2984 2987
2985 2988 if self._sidedatafile:
2986 2989 sidedata_end = self.sidedata_cut_off(rev)
2987 2990 transaction.add(self._sidedatafile, sidedata_end)
2988 2991
2989 2992 transaction.add(self._indexfile, end)
2990 2993 if self._docket is not None:
2991 2994 # XXX we could, leverage the docket while stripping. However it is
2992 2995 # not powerfull enough at the time of this comment
2993 2996 self._docket.index_end = end
2994 2997 self._docket.data_end = data_end
2995 2998 self._docket.sidedata_end = sidedata_end
2996 2999 self._docket.write(transaction, stripping=True)
2997 3000
2998 3001 # then reset internal state in memory to forget those revisions
2999 3002 self._revisioncache = None
3000 3003 self._chaininfocache = util.lrucachedict(500)
3001 3004 self._segmentfile.clear_cache()
3002 3005 self._segmentfile_sidedata.clear_cache()
3003 3006
3004 3007 del self.index[rev:-1]
3005 3008
3006 3009 def checksize(self):
3007 3010 """Check size of index and data files
3008 3011
3009 3012 return a (dd, di) tuple.
3010 3013 - dd: extra bytes for the "data" file
3011 3014 - di: extra bytes for the "index" file
3012 3015
3013 3016 A healthy revlog will return (0, 0).
3014 3017 """
3015 3018 expected = 0
3016 3019 if len(self):
3017 3020 expected = max(0, self.end(len(self) - 1))
3018 3021
3019 3022 try:
3020 3023 with self._datafp() as f:
3021 3024 f.seek(0, io.SEEK_END)
3022 3025 actual = f.tell()
3023 3026 dd = actual - expected
3024 3027 except FileNotFoundError:
3025 3028 dd = 0
3026 3029
3027 3030 try:
3028 3031 f = self.opener(self._indexfile)
3029 3032 f.seek(0, io.SEEK_END)
3030 3033 actual = f.tell()
3031 3034 f.close()
3032 3035 s = self.index.entry_size
3033 3036 i = max(0, actual // s)
3034 3037 di = actual - (i * s)
3035 3038 if self._inline:
3036 3039 databytes = 0
3037 3040 for r in self:
3038 3041 databytes += max(0, self.length(r))
3039 3042 dd = 0
3040 3043 di = actual - len(self) * s - databytes
3041 3044 except FileNotFoundError:
3042 3045 di = 0
3043 3046
3044 3047 return (dd, di)
3045 3048
3046 3049 def files(self):
3047 3050 res = [self._indexfile]
3048 3051 if self._docket_file is None:
3049 3052 if not self._inline:
3050 3053 res.append(self._datafile)
3051 3054 else:
3052 3055 res.append(self._docket_file)
3053 3056 res.extend(self._docket.old_index_filepaths(include_empty=False))
3054 3057 if self._docket.data_end:
3055 3058 res.append(self._datafile)
3056 3059 res.extend(self._docket.old_data_filepaths(include_empty=False))
3057 3060 if self._docket.sidedata_end:
3058 3061 res.append(self._sidedatafile)
3059 3062 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3060 3063 return res
3061 3064
3062 3065 def emitrevisions(
3063 3066 self,
3064 3067 nodes,
3065 3068 nodesorder=None,
3066 3069 revisiondata=False,
3067 3070 assumehaveparentrevisions=False,
3068 3071 deltamode=repository.CG_DELTAMODE_STD,
3069 3072 sidedata_helpers=None,
3070 3073 debug_info=None,
3071 3074 ):
3072 3075 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3073 3076 raise error.ProgrammingError(
3074 3077 b'unhandled value for nodesorder: %s' % nodesorder
3075 3078 )
3076 3079
3077 3080 if nodesorder is None and not self._generaldelta:
3078 3081 nodesorder = b'storage'
3079 3082
3080 3083 if (
3081 3084 not self._storedeltachains
3082 3085 and deltamode != repository.CG_DELTAMODE_PREV
3083 3086 ):
3084 3087 deltamode = repository.CG_DELTAMODE_FULL
3085 3088
3086 3089 return storageutil.emitrevisions(
3087 3090 self,
3088 3091 nodes,
3089 3092 nodesorder,
3090 3093 revlogrevisiondelta,
3091 3094 deltaparentfn=self.deltaparent,
3092 3095 candeltafn=self.candelta,
3093 3096 rawsizefn=self.rawsize,
3094 3097 revdifffn=self.revdiff,
3095 3098 flagsfn=self.flags,
3096 3099 deltamode=deltamode,
3097 3100 revisiondata=revisiondata,
3098 3101 assumehaveparentrevisions=assumehaveparentrevisions,
3099 3102 sidedata_helpers=sidedata_helpers,
3100 3103 debug_info=debug_info,
3101 3104 )
3102 3105
3103 3106 DELTAREUSEALWAYS = b'always'
3104 3107 DELTAREUSESAMEREVS = b'samerevs'
3105 3108 DELTAREUSENEVER = b'never'
3106 3109
3107 3110 DELTAREUSEFULLADD = b'fulladd'
3108 3111
3109 3112 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3110 3113
3111 3114 def clone(
3112 3115 self,
3113 3116 tr,
3114 3117 destrevlog,
3115 3118 addrevisioncb=None,
3116 3119 deltareuse=DELTAREUSESAMEREVS,
3117 3120 forcedeltabothparents=None,
3118 3121 sidedata_helpers=None,
3119 3122 ):
3120 3123 """Copy this revlog to another, possibly with format changes.
3121 3124
3122 3125 The destination revlog will contain the same revisions and nodes.
3123 3126 However, it may not be bit-for-bit identical due to e.g. delta encoding
3124 3127 differences.
3125 3128
3126 3129 The ``deltareuse`` argument control how deltas from the existing revlog
3127 3130 are preserved in the destination revlog. The argument can have the
3128 3131 following values:
3129 3132
3130 3133 DELTAREUSEALWAYS
3131 3134 Deltas will always be reused (if possible), even if the destination
3132 3135 revlog would not select the same revisions for the delta. This is the
3133 3136 fastest mode of operation.
3134 3137 DELTAREUSESAMEREVS
3135 3138 Deltas will be reused if the destination revlog would pick the same
3136 3139 revisions for the delta. This mode strikes a balance between speed
3137 3140 and optimization.
3138 3141 DELTAREUSENEVER
3139 3142 Deltas will never be reused. This is the slowest mode of execution.
3140 3143 This mode can be used to recompute deltas (e.g. if the diff/delta
3141 3144 algorithm changes).
3142 3145 DELTAREUSEFULLADD
3143 3146 Revision will be re-added as if their were new content. This is
3144 3147 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3145 3148 eg: large file detection and handling.
3146 3149
3147 3150 Delta computation can be slow, so the choice of delta reuse policy can
3148 3151 significantly affect run time.
3149 3152
3150 3153 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3151 3154 two extremes. Deltas will be reused if they are appropriate. But if the
3152 3155 delta could choose a better revision, it will do so. This means if you
3153 3156 are converting a non-generaldelta revlog to a generaldelta revlog,
3154 3157 deltas will be recomputed if the delta's parent isn't a parent of the
3155 3158 revision.
3156 3159
3157 3160 In addition to the delta policy, the ``forcedeltabothparents``
3158 3161 argument controls whether to force compute deltas against both parents
3159 3162 for merges. By default, the current default is used.
3160 3163
3161 3164 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3162 3165 `sidedata_helpers`.
3163 3166 """
3164 3167 if deltareuse not in self.DELTAREUSEALL:
3165 3168 raise ValueError(
3166 3169 _(b'value for deltareuse invalid: %s') % deltareuse
3167 3170 )
3168 3171
3169 3172 if len(destrevlog):
3170 3173 raise ValueError(_(b'destination revlog is not empty'))
3171 3174
3172 3175 if getattr(self, 'filteredrevs', None):
3173 3176 raise ValueError(_(b'source revlog has filtered revisions'))
3174 3177 if getattr(destrevlog, 'filteredrevs', None):
3175 3178 raise ValueError(_(b'destination revlog has filtered revisions'))
3176 3179
3177 3180 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3178 3181 # if possible.
3179 3182 oldlazydelta = destrevlog._lazydelta
3180 3183 oldlazydeltabase = destrevlog._lazydeltabase
3181 3184 oldamd = destrevlog._deltabothparents
3182 3185
3183 3186 try:
3184 3187 if deltareuse == self.DELTAREUSEALWAYS:
3185 3188 destrevlog._lazydeltabase = True
3186 3189 destrevlog._lazydelta = True
3187 3190 elif deltareuse == self.DELTAREUSESAMEREVS:
3188 3191 destrevlog._lazydeltabase = False
3189 3192 destrevlog._lazydelta = True
3190 3193 elif deltareuse == self.DELTAREUSENEVER:
3191 3194 destrevlog._lazydeltabase = False
3192 3195 destrevlog._lazydelta = False
3193 3196
3194 3197 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3195 3198
3196 3199 self._clone(
3197 3200 tr,
3198 3201 destrevlog,
3199 3202 addrevisioncb,
3200 3203 deltareuse,
3201 3204 forcedeltabothparents,
3202 3205 sidedata_helpers,
3203 3206 )
3204 3207
3205 3208 finally:
3206 3209 destrevlog._lazydelta = oldlazydelta
3207 3210 destrevlog._lazydeltabase = oldlazydeltabase
3208 3211 destrevlog._deltabothparents = oldamd
3209 3212
3210 3213 def _clone(
3211 3214 self,
3212 3215 tr,
3213 3216 destrevlog,
3214 3217 addrevisioncb,
3215 3218 deltareuse,
3216 3219 forcedeltabothparents,
3217 3220 sidedata_helpers,
3218 3221 ):
3219 3222 """perform the core duty of `revlog.clone` after parameter processing"""
3220 3223 write_debug = None
3221 3224 if self._debug_delta:
3222 3225 write_debug = tr._report
3223 3226 deltacomputer = deltautil.deltacomputer(
3224 3227 destrevlog,
3225 3228 write_debug=write_debug,
3226 3229 )
3227 3230 index = self.index
3228 3231 for rev in self:
3229 3232 entry = index[rev]
3230 3233
3231 3234 # Some classes override linkrev to take filtered revs into
3232 3235 # account. Use raw entry from index.
3233 3236 flags = entry[0] & 0xFFFF
3234 3237 linkrev = entry[4]
3235 3238 p1 = index[entry[5]][7]
3236 3239 p2 = index[entry[6]][7]
3237 3240 node = entry[7]
3238 3241
3239 3242 # (Possibly) reuse the delta from the revlog if allowed and
3240 3243 # the revlog chunk is a delta.
3241 3244 cachedelta = None
3242 3245 rawtext = None
3243 3246 if deltareuse == self.DELTAREUSEFULLADD:
3244 3247 text = self._revisiondata(rev)
3245 3248 sidedata = self.sidedata(rev)
3246 3249
3247 3250 if sidedata_helpers is not None:
3248 3251 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3249 3252 self, sidedata_helpers, sidedata, rev
3250 3253 )
3251 3254 flags = flags | new_flags[0] & ~new_flags[1]
3252 3255
3253 3256 destrevlog.addrevision(
3254 3257 text,
3255 3258 tr,
3256 3259 linkrev,
3257 3260 p1,
3258 3261 p2,
3259 3262 cachedelta=cachedelta,
3260 3263 node=node,
3261 3264 flags=flags,
3262 3265 deltacomputer=deltacomputer,
3263 3266 sidedata=sidedata,
3264 3267 )
3265 3268 else:
3266 3269 if destrevlog._lazydelta:
3267 3270 dp = self.deltaparent(rev)
3268 3271 if dp != nullrev:
3269 3272 cachedelta = (dp, bytes(self._chunk(rev)))
3270 3273
3271 3274 sidedata = None
3272 3275 if not cachedelta:
3273 3276 rawtext = self._revisiondata(rev)
3274 3277 sidedata = self.sidedata(rev)
3275 3278 if sidedata is None:
3276 3279 sidedata = self.sidedata(rev)
3277 3280
3278 3281 if sidedata_helpers is not None:
3279 3282 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3280 3283 self, sidedata_helpers, sidedata, rev
3281 3284 )
3282 3285 flags = flags | new_flags[0] & ~new_flags[1]
3283 3286
3284 3287 with destrevlog._writing(tr):
3285 3288 destrevlog._addrevision(
3286 3289 node,
3287 3290 rawtext,
3288 3291 tr,
3289 3292 linkrev,
3290 3293 p1,
3291 3294 p2,
3292 3295 flags,
3293 3296 cachedelta,
3294 3297 deltacomputer=deltacomputer,
3295 3298 sidedata=sidedata,
3296 3299 )
3297 3300
3298 3301 if addrevisioncb:
3299 3302 addrevisioncb(self, rev, node)
3300 3303
3301 3304 def censorrevision(self, tr, censornode, tombstone=b''):
3302 3305 if self._format_version == REVLOGV0:
3303 3306 raise error.RevlogError(
3304 3307 _(b'cannot censor with version %d revlogs')
3305 3308 % self._format_version
3306 3309 )
3307 3310 elif self._format_version == REVLOGV1:
3308 3311 rewrite.v1_censor(self, tr, censornode, tombstone)
3309 3312 else:
3310 3313 rewrite.v2_censor(self, tr, censornode, tombstone)
3311 3314
3312 3315 def verifyintegrity(self, state):
3313 3316 """Verifies the integrity of the revlog.
3314 3317
3315 3318 Yields ``revlogproblem`` instances describing problems that are
3316 3319 found.
3317 3320 """
3318 3321 dd, di = self.checksize()
3319 3322 if dd:
3320 3323 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3321 3324 if di:
3322 3325 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3323 3326
3324 3327 version = self._format_version
3325 3328
3326 3329 # The verifier tells us what version revlog we should be.
3327 3330 if version != state[b'expectedversion']:
3328 3331 yield revlogproblem(
3329 3332 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3330 3333 % (self.display_id, version, state[b'expectedversion'])
3331 3334 )
3332 3335
3333 3336 state[b'skipread'] = set()
3334 3337 state[b'safe_renamed'] = set()
3335 3338
3336 3339 for rev in self:
3337 3340 node = self.node(rev)
3338 3341
3339 3342 # Verify contents. 4 cases to care about:
3340 3343 #
3341 3344 # common: the most common case
3342 3345 # rename: with a rename
3343 3346 # meta: file content starts with b'\1\n', the metadata
3344 3347 # header defined in filelog.py, but without a rename
3345 3348 # ext: content stored externally
3346 3349 #
3347 3350 # More formally, their differences are shown below:
3348 3351 #
3349 3352 # | common | rename | meta | ext
3350 3353 # -------------------------------------------------------
3351 3354 # flags() | 0 | 0 | 0 | not 0
3352 3355 # renamed() | False | True | False | ?
3353 3356 # rawtext[0:2]=='\1\n'| False | True | True | ?
3354 3357 #
3355 3358 # "rawtext" means the raw text stored in revlog data, which
3356 3359 # could be retrieved by "rawdata(rev)". "text"
3357 3360 # mentioned below is "revision(rev)".
3358 3361 #
3359 3362 # There are 3 different lengths stored physically:
3360 3363 # 1. L1: rawsize, stored in revlog index
3361 3364 # 2. L2: len(rawtext), stored in revlog data
3362 3365 # 3. L3: len(text), stored in revlog data if flags==0, or
3363 3366 # possibly somewhere else if flags!=0
3364 3367 #
3365 3368 # L1 should be equal to L2. L3 could be different from them.
3366 3369 # "text" may or may not affect commit hash depending on flag
3367 3370 # processors (see flagutil.addflagprocessor).
3368 3371 #
3369 3372 # | common | rename | meta | ext
3370 3373 # -------------------------------------------------
3371 3374 # rawsize() | L1 | L1 | L1 | L1
3372 3375 # size() | L1 | L2-LM | L1(*) | L1 (?)
3373 3376 # len(rawtext) | L2 | L2 | L2 | L2
3374 3377 # len(text) | L2 | L2 | L2 | L3
3375 3378 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3376 3379 #
3377 3380 # LM: length of metadata, depending on rawtext
3378 3381 # (*): not ideal, see comment in filelog.size
3379 3382 # (?): could be "- len(meta)" if the resolved content has
3380 3383 # rename metadata
3381 3384 #
3382 3385 # Checks needed to be done:
3383 3386 # 1. length check: L1 == L2, in all cases.
3384 3387 # 2. hash check: depending on flag processor, we may need to
3385 3388 # use either "text" (external), or "rawtext" (in revlog).
3386 3389
3387 3390 try:
3388 3391 skipflags = state.get(b'skipflags', 0)
3389 3392 if skipflags:
3390 3393 skipflags &= self.flags(rev)
3391 3394
3392 3395 _verify_revision(self, skipflags, state, node)
3393 3396
3394 3397 l1 = self.rawsize(rev)
3395 3398 l2 = len(self.rawdata(node))
3396 3399
3397 3400 if l1 != l2:
3398 3401 yield revlogproblem(
3399 3402 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3400 3403 node=node,
3401 3404 )
3402 3405
3403 3406 except error.CensoredNodeError:
3404 3407 if state[b'erroroncensored']:
3405 3408 yield revlogproblem(
3406 3409 error=_(b'censored file data'), node=node
3407 3410 )
3408 3411 state[b'skipread'].add(node)
3409 3412 except Exception as e:
3410 3413 yield revlogproblem(
3411 3414 error=_(b'unpacking %s: %s')
3412 3415 % (short(node), stringutil.forcebytestr(e)),
3413 3416 node=node,
3414 3417 )
3415 3418 state[b'skipread'].add(node)
3416 3419
3417 3420 def storageinfo(
3418 3421 self,
3419 3422 exclusivefiles=False,
3420 3423 sharedfiles=False,
3421 3424 revisionscount=False,
3422 3425 trackedsize=False,
3423 3426 storedsize=False,
3424 3427 ):
3425 3428 d = {}
3426 3429
3427 3430 if exclusivefiles:
3428 3431 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3429 3432 if not self._inline:
3430 3433 d[b'exclusivefiles'].append((self.opener, self._datafile))
3431 3434
3432 3435 if sharedfiles:
3433 3436 d[b'sharedfiles'] = []
3434 3437
3435 3438 if revisionscount:
3436 3439 d[b'revisionscount'] = len(self)
3437 3440
3438 3441 if trackedsize:
3439 3442 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3440 3443
3441 3444 if storedsize:
3442 3445 d[b'storedsize'] = sum(
3443 3446 self.opener.stat(path).st_size for path in self.files()
3444 3447 )
3445 3448
3446 3449 return d
3447 3450
3448 3451 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3449 3452 if not self.hassidedata:
3450 3453 return
3451 3454 # revlog formats with sidedata support does not support inline
3452 3455 assert not self._inline
3453 3456 if not helpers[1] and not helpers[2]:
3454 3457 # Nothing to generate or remove
3455 3458 return
3456 3459
3457 3460 new_entries = []
3458 3461 # append the new sidedata
3459 3462 with self._writing(transaction):
3460 3463 ifh, dfh, sdfh = self._writinghandles
3461 3464 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3462 3465
3463 3466 current_offset = sdfh.tell()
3464 3467 for rev in range(startrev, endrev + 1):
3465 3468 entry = self.index[rev]
3466 3469 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3467 3470 store=self,
3468 3471 sidedata_helpers=helpers,
3469 3472 sidedata={},
3470 3473 rev=rev,
3471 3474 )
3472 3475
3473 3476 serialized_sidedata = sidedatautil.serialize_sidedata(
3474 3477 new_sidedata
3475 3478 )
3476 3479
3477 3480 sidedata_compression_mode = COMP_MODE_INLINE
3478 3481 if serialized_sidedata and self.hassidedata:
3479 3482 sidedata_compression_mode = COMP_MODE_PLAIN
3480 3483 h, comp_sidedata = self.compress(serialized_sidedata)
3481 3484 if (
3482 3485 h != b'u'
3483 3486 and comp_sidedata[0] != b'\0'
3484 3487 and len(comp_sidedata) < len(serialized_sidedata)
3485 3488 ):
3486 3489 assert not h
3487 3490 if (
3488 3491 comp_sidedata[0]
3489 3492 == self._docket.default_compression_header
3490 3493 ):
3491 3494 sidedata_compression_mode = COMP_MODE_DEFAULT
3492 3495 serialized_sidedata = comp_sidedata
3493 3496 else:
3494 3497 sidedata_compression_mode = COMP_MODE_INLINE
3495 3498 serialized_sidedata = comp_sidedata
3496 3499 if entry[8] != 0 or entry[9] != 0:
3497 3500 # rewriting entries that already have sidedata is not
3498 3501 # supported yet, because it introduces garbage data in the
3499 3502 # revlog.
3500 3503 msg = b"rewriting existing sidedata is not supported yet"
3501 3504 raise error.Abort(msg)
3502 3505
3503 3506 # Apply (potential) flags to add and to remove after running
3504 3507 # the sidedata helpers
3505 3508 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3506 3509 entry_update = (
3507 3510 current_offset,
3508 3511 len(serialized_sidedata),
3509 3512 new_offset_flags,
3510 3513 sidedata_compression_mode,
3511 3514 )
3512 3515
3513 3516 # the sidedata computation might have move the file cursors around
3514 3517 sdfh.seek(current_offset, os.SEEK_SET)
3515 3518 sdfh.write(serialized_sidedata)
3516 3519 new_entries.append(entry_update)
3517 3520 current_offset += len(serialized_sidedata)
3518 3521 self._docket.sidedata_end = sdfh.tell()
3519 3522
3520 3523 # rewrite the new index entries
3521 3524 ifh.seek(startrev * self.index.entry_size)
3522 3525 for i, e in enumerate(new_entries):
3523 3526 rev = startrev + i
3524 3527 self.index.replace_sidedata_info(rev, *e)
3525 3528 packed = self.index.entry_binary(rev)
3526 3529 if rev == 0 and self._docket is None:
3527 3530 header = self._format_flags | self._format_version
3528 3531 header = self.index.pack_header(header)
3529 3532 packed = header + packed
3530 3533 ifh.write(packed)
@@ -1,982 +1,994 b''
1 1 #require serve no-reposimplestore no-chg
2 2
3 3 #testcases stream-legacy stream-bundle2-v2 stream-bundle2-v3
4 4
5 5 #if stream-legacy
6 6 $ cat << EOF >> $HGRCPATH
7 7 > [server]
8 8 > bundle2.stream = no
9 9 > EOF
10 10 #endif
11 11 #if stream-bundle2-v3
12 12 $ cat << EOF >> $HGRCPATH
13 13 > [experimental]
14 14 > stream-v3 = yes
15 15 > EOF
16 16 #endif
17 17
18 18 Initialize repository
19 19
20 20 $ hg init server
21 21 $ cd server
22 22 $ sh $TESTDIR/testlib/stream_clone_setup.sh
23 23 adding 00changelog-ab349180a0405010.nd
24 24 adding 00changelog.d
25 25 adding 00changelog.i
26 26 adding 00changelog.n
27 27 adding 00manifest.d
28 28 adding 00manifest.i
29 29 adding container/isam-build-centos7/bazel-coverage-generator-sandboxfs-compatibility-0758e3e4f6057904d44399bd666faba9e7f40686.patch
30 30 adding data/foo.d
31 31 adding data/foo.i
32 32 adding data/foo.n
33 33 adding data/undo.babar
34 34 adding data/undo.d
35 35 adding data/undo.foo.d
36 36 adding data/undo.foo.i
37 37 adding data/undo.foo.n
38 38 adding data/undo.i
39 39 adding data/undo.n
40 40 adding data/undo.py
41 41 adding foo.d
42 42 adding foo.i
43 43 adding foo.n
44 44 adding meta/foo.d
45 45 adding meta/foo.i
46 46 adding meta/foo.n
47 47 adding meta/undo.babar
48 48 adding meta/undo.d
49 49 adding meta/undo.foo.d
50 50 adding meta/undo.foo.i
51 51 adding meta/undo.foo.n
52 52 adding meta/undo.i
53 53 adding meta/undo.n
54 54 adding meta/undo.py
55 55 adding savanah/foo.d
56 56 adding savanah/foo.i
57 57 adding savanah/foo.n
58 58 adding savanah/undo.babar
59 59 adding savanah/undo.d
60 60 adding savanah/undo.foo.d
61 61 adding savanah/undo.foo.i
62 62 adding savanah/undo.foo.n
63 63 adding savanah/undo.i
64 64 adding savanah/undo.n
65 65 adding savanah/undo.py
66 66 adding store/C\xc3\xa9lesteVille_is_a_Capital_City (esc)
67 67 adding store/foo.d
68 68 adding store/foo.i
69 69 adding store/foo.n
70 70 adding store/undo.babar
71 71 adding store/undo.d
72 72 adding store/undo.foo.d
73 73 adding store/undo.foo.i
74 74 adding store/undo.foo.n
75 75 adding store/undo.i
76 76 adding store/undo.n
77 77 adding store/undo.py
78 78 adding undo.babar
79 79 adding undo.d
80 80 adding undo.foo.d
81 81 adding undo.foo.i
82 82 adding undo.foo.n
83 83 adding undo.i
84 84 adding undo.n
85 85 adding undo.py
86 86
87 87 $ hg --config server.uncompressed=false serve -p $HGPORT -d --pid-file=hg.pid
88 88 $ cat hg.pid > $DAEMON_PIDS
89 89 $ cd ..
90 90
91 91 Check local clone
92 92 ==================
93 93
94 94 The logic is close enough of uncompressed.
95 95 This is present here to reuse the testing around file with "special" names.
96 96
97 97 $ hg clone server local-clone
98 98 updating to branch default
99 99 1088 files updated, 0 files merged, 0 files removed, 0 files unresolved
100 100
101 101 Check that the clone went well
102 102
103 103 $ hg verify -R local-clone -q
104 104
105 105 Check uncompressed
106 106 ==================
107 107
108 108 Cannot stream clone when server.uncompressed is set
109 109
110 110 $ get-with-headers.py $LOCALIP:$HGPORT '?cmd=stream_out'
111 111 200 Script output follows
112 112
113 113 1
114 114
115 115 #if stream-legacy
116 116 $ hg debugcapabilities http://localhost:$HGPORT
117 117 Main capabilities:
118 118 batch
119 119 branchmap
120 120 $USUAL_BUNDLE2_CAPS_SERVER$
121 121 changegroupsubset
122 122 compression=$BUNDLE2_COMPRESSIONS$
123 123 getbundle
124 124 httpheader=1024
125 125 httpmediatype=0.1rx,0.1tx,0.2tx
126 126 known
127 127 lookup
128 128 pushkey
129 129 unbundle=HG10GZ,HG10BZ,HG10UN
130 130 unbundlehash
131 131 Bundle2 capabilities:
132 132 HG20
133 133 bookmarks
134 134 changegroup
135 135 01
136 136 02
137 137 03
138 138 checkheads
139 139 related
140 140 digests
141 141 md5
142 142 sha1
143 143 sha512
144 144 error
145 145 abort
146 146 unsupportedcontent
147 147 pushraced
148 148 pushkey
149 149 hgtagsfnodes
150 150 listkeys
151 151 phases
152 152 heads
153 153 pushkey
154 154 remote-changegroup
155 155 http
156 156 https
157 157
158 158 $ hg clone --stream -U http://localhost:$HGPORT server-disabled
159 159 warning: stream clone requested but server has them disabled
160 160 requesting all changes
161 161 adding changesets
162 162 adding manifests
163 163 adding file changes
164 164 added 3 changesets with 1088 changes to 1088 files
165 165 new changesets 96ee1d7354c4:5223b5e3265f
166 166
167 167 $ get-with-headers.py $LOCALIP:$HGPORT '?cmd=getbundle' content-type --bodyfile body --hgproto 0.2 --requestheader "x-hgarg-1=bundlecaps=HG20%2Cbundle2%3DHG20%250Abookmarks%250Achangegroup%253D01%252C02%252C03%250Adigests%253Dmd5%252Csha1%252Csha512%250Aerror%253Dabort%252Cunsupportedcontent%252Cpushraced%252Cpushkey%250Ahgtagsfnodes%250Alistkeys%250Aphases%253Dheads%250Apushkey%250Aremote-changegroup%253Dhttp%252Chttps&cg=0&common=0000000000000000000000000000000000000000&heads=c17445101a72edac06facd130d14808dfbd5c7c2&stream=1"
168 168 200 Script output follows
169 169 content-type: application/mercurial-0.2
170 170
171 171
172 172 $ f --size body --hexdump --bytes 100
173 173 body: size=140
174 174 0000: 04 6e 6f 6e 65 48 47 32 30 00 00 00 00 00 00 00 |.noneHG20.......|
175 175 0010: 73 0b 45 52 52 4f 52 3a 41 42 4f 52 54 00 00 00 |s.ERROR:ABORT...|
176 176 0020: 00 01 01 07 3c 04 16 6d 65 73 73 61 67 65 73 74 |....<..messagest|
177 177 0030: 72 65 61 6d 20 64 61 74 61 20 72 65 71 75 65 73 |ream data reques|
178 178 0040: 74 65 64 20 62 75 74 20 73 65 72 76 65 72 20 64 |ted but server d|
179 179 0050: 6f 65 73 20 6e 6f 74 20 61 6c 6c 6f 77 20 74 68 |oes not allow th|
180 180 0060: 69 73 20 66 |is f|
181 181
182 182 #endif
183 183 #if stream-bundle2-v2
184 184 $ hg debugcapabilities http://localhost:$HGPORT
185 185 Main capabilities:
186 186 batch
187 187 branchmap
188 188 $USUAL_BUNDLE2_CAPS_SERVER$
189 189 changegroupsubset
190 190 compression=$BUNDLE2_COMPRESSIONS$
191 191 getbundle
192 192 httpheader=1024
193 193 httpmediatype=0.1rx,0.1tx,0.2tx
194 194 known
195 195 lookup
196 196 pushkey
197 197 unbundle=HG10GZ,HG10BZ,HG10UN
198 198 unbundlehash
199 199 Bundle2 capabilities:
200 200 HG20
201 201 bookmarks
202 202 changegroup
203 203 01
204 204 02
205 205 03
206 206 checkheads
207 207 related
208 208 digests
209 209 md5
210 210 sha1
211 211 sha512
212 212 error
213 213 abort
214 214 unsupportedcontent
215 215 pushraced
216 216 pushkey
217 217 hgtagsfnodes
218 218 listkeys
219 219 phases
220 220 heads
221 221 pushkey
222 222 remote-changegroup
223 223 http
224 224 https
225 225
226 226 $ hg clone --stream -U http://localhost:$HGPORT server-disabled
227 227 warning: stream clone requested but server has them disabled
228 228 requesting all changes
229 229 adding changesets
230 230 adding manifests
231 231 adding file changes
232 232 added 3 changesets with 1088 changes to 1088 files
233 233 new changesets 96ee1d7354c4:5223b5e3265f
234 234
235 235 $ get-with-headers.py $LOCALIP:$HGPORT '?cmd=getbundle' content-type --bodyfile body --hgproto 0.2 --requestheader "x-hgarg-1=bundlecaps=HG20%2Cbundle2%3DHG20%250Abookmarks%250Achangegroup%253D01%252C02%252C03%250Adigests%253Dmd5%252Csha1%252Csha512%250Aerror%253Dabort%252Cunsupportedcontent%252Cpushraced%252Cpushkey%250Ahgtagsfnodes%250Alistkeys%250Aphases%253Dheads%250Apushkey%250Aremote-changegroup%253Dhttp%252Chttps&cg=0&common=0000000000000000000000000000000000000000&heads=c17445101a72edac06facd130d14808dfbd5c7c2&stream=1"
236 236 200 Script output follows
237 237 content-type: application/mercurial-0.2
238 238
239 239
240 240 $ f --size body --hexdump --bytes 100
241 241 body: size=140
242 242 0000: 04 6e 6f 6e 65 48 47 32 30 00 00 00 00 00 00 00 |.noneHG20.......|
243 243 0010: 73 0b 45 52 52 4f 52 3a 41 42 4f 52 54 00 00 00 |s.ERROR:ABORT...|
244 244 0020: 00 01 01 07 3c 04 16 6d 65 73 73 61 67 65 73 74 |....<..messagest|
245 245 0030: 72 65 61 6d 20 64 61 74 61 20 72 65 71 75 65 73 |ream data reques|
246 246 0040: 74 65 64 20 62 75 74 20 73 65 72 76 65 72 20 64 |ted but server d|
247 247 0050: 6f 65 73 20 6e 6f 74 20 61 6c 6c 6f 77 20 74 68 |oes not allow th|
248 248 0060: 69 73 20 66 |is f|
249 249
250 250 #endif
251 251 #if stream-bundle2-v3
252 252 $ hg debugcapabilities http://localhost:$HGPORT
253 253 Main capabilities:
254 254 batch
255 255 branchmap
256 256 $USUAL_BUNDLE2_CAPS_SERVER$
257 257 changegroupsubset
258 258 compression=$BUNDLE2_COMPRESSIONS$
259 259 getbundle
260 260 httpheader=1024
261 261 httpmediatype=0.1rx,0.1tx,0.2tx
262 262 known
263 263 lookup
264 264 pushkey
265 265 unbundle=HG10GZ,HG10BZ,HG10UN
266 266 unbundlehash
267 267 Bundle2 capabilities:
268 268 HG20
269 269 bookmarks
270 270 changegroup
271 271 01
272 272 02
273 273 03
274 274 checkheads
275 275 related
276 276 digests
277 277 md5
278 278 sha1
279 279 sha512
280 280 error
281 281 abort
282 282 unsupportedcontent
283 283 pushraced
284 284 pushkey
285 285 hgtagsfnodes
286 286 listkeys
287 287 phases
288 288 heads
289 289 pushkey
290 290 remote-changegroup
291 291 http
292 292 https
293 293
294 294 $ hg clone --stream -U http://localhost:$HGPORT server-disabled
295 295 warning: stream clone requested but server has them disabled
296 296 requesting all changes
297 297 adding changesets
298 298 adding manifests
299 299 adding file changes
300 300 added 3 changesets with 1088 changes to 1088 files
301 301 new changesets 96ee1d7354c4:5223b5e3265f
302 302
303 303 $ get-with-headers.py $LOCALIP:$HGPORT '?cmd=getbundle' content-type --bodyfile body --hgproto 0.2 --requestheader "x-hgarg-1=bundlecaps=HG20%2Cbundle2%3DHG20%250Abookmarks%250Achangegroup%253D01%252C02%252C03%250Adigests%253Dmd5%252Csha1%252Csha512%250Aerror%253Dabort%252Cunsupportedcontent%252Cpushraced%252Cpushkey%250Ahgtagsfnodes%250Alistkeys%250Aphases%253Dheads%250Apushkey%250Aremote-changegroup%253Dhttp%252Chttps&cg=0&common=0000000000000000000000000000000000000000&heads=c17445101a72edac06facd130d14808dfbd5c7c2&stream=1"
304 304 200 Script output follows
305 305 content-type: application/mercurial-0.2
306 306
307 307
308 308 $ f --size body --hexdump --bytes 100
309 309 body: size=140
310 310 0000: 04 6e 6f 6e 65 48 47 32 30 00 00 00 00 00 00 00 |.noneHG20.......|
311 311 0010: 73 0b 45 52 52 4f 52 3a 41 42 4f 52 54 00 00 00 |s.ERROR:ABORT...|
312 312 0020: 00 01 01 07 3c 04 16 6d 65 73 73 61 67 65 73 74 |....<..messagest|
313 313 0030: 72 65 61 6d 20 64 61 74 61 20 72 65 71 75 65 73 |ream data reques|
314 314 0040: 74 65 64 20 62 75 74 20 73 65 72 76 65 72 20 64 |ted but server d|
315 315 0050: 6f 65 73 20 6e 6f 74 20 61 6c 6c 6f 77 20 74 68 |oes not allow th|
316 316 0060: 69 73 20 66 |is f|
317 317
318 318 #endif
319 319
320 320 $ killdaemons.py
321 321 $ cd server
322 322 $ hg serve -p $HGPORT -d --pid-file=hg.pid --error errors.txt
323 323 $ cat hg.pid > $DAEMON_PIDS
324 324 $ cd ..
325 325
326 326 Basic clone
327 327
328 328 #if stream-legacy
329 329 $ hg clone --stream -U http://localhost:$HGPORT clone1
330 330 streaming all changes
331 331 1090 files to transfer, 102 KB of data (no-zstd !)
332 332 transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !)
333 333 1090 files to transfer, 98.8 KB of data (zstd !)
334 334 transferred 98.8 KB in * seconds (* */sec) (glob) (zstd !)
335 335 searching for changes
336 336 no changes found
337 337 $ cat server/errors.txt
338 338 #endif
339 339 #if stream-bundle2-v2
340 340 $ hg clone --stream -U http://localhost:$HGPORT clone1
341 341 streaming all changes
342 342 1093 files to transfer, 102 KB of data (no-zstd !)
343 343 transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !)
344 344 1093 files to transfer, 98.9 KB of data (zstd !)
345 345 transferred 98.9 KB in * seconds (* */sec) (glob) (zstd !)
346 346
347 347 $ ls -1 clone1/.hg/cache
348 348 branch2-base
349 349 branch2-immutable
350 350 branch2-served
351 351 branch2-served.hidden
352 352 branch2-visible
353 353 branch2-visible-hidden
354 354 rbc-names-v1
355 355 rbc-revs-v1
356 356 tags2
357 357 tags2-served
358 358 $ cat server/errors.txt
359 359 #endif
360 360 #if stream-bundle2-v3
361 361 $ hg clone --stream -U http://localhost:$HGPORT clone1
362 362 streaming all changes
363 363 1093 entries to transfer
364 364 transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !)
365 365 transferred 98.9 KB in * seconds (* */sec) (glob) (zstd !)
366 366
367 367 $ ls -1 clone1/.hg/cache
368 368 branch2-base
369 369 branch2-immutable
370 370 branch2-served
371 371 branch2-served.hidden
372 372 branch2-visible
373 373 branch2-visible-hidden
374 374 rbc-names-v1
375 375 rbc-revs-v1
376 376 tags2
377 377 tags2-served
378 378 $ cat server/errors.txt
379 379 #endif
380 380
381 381 getbundle requests with stream=1 are uncompressed
382 382
383 383 $ get-with-headers.py $LOCALIP:$HGPORT '?cmd=getbundle' content-type --bodyfile body --hgproto '0.1 0.2 comp=zlib,none' --requestheader "x-hgarg-1=bundlecaps=HG20%2Cbundle2%3DHG20%250Abookmarks%250Achangegroup%253D01%252C02%252C03%250Adigests%253Dmd5%252Csha1%252Csha512%250Aerror%253Dabort%252Cunsupportedcontent%252Cpushraced%252Cpushkey%250Ahgtagsfnodes%250Alistkeys%250Aphases%253Dheads%250Apushkey%250Aremote-changegroup%253Dhttp%252Chttps%250Astream%253Dv2&cg=0&common=0000000000000000000000000000000000000000&heads=c17445101a72edac06facd130d14808dfbd5c7c2&stream=1"
384 384 200 Script output follows
385 385 content-type: application/mercurial-0.2
386 386
387 387
388 388 #if no-zstd no-rust
389 389 $ f --size --hex --bytes 256 body
390 390 body: size=119123
391 391 0000: 04 6e 6f 6e 65 48 47 32 30 00 00 00 00 00 00 00 |.noneHG20.......|
392 392 0010: 62 07 53 54 52 45 41 4d 32 00 00 00 00 03 00 09 |b.STREAM2.......|
393 393 0020: 06 09 04 0c 26 62 79 74 65 63 6f 75 6e 74 31 30 |....&bytecount10|
394 394 0030: 34 31 31 35 66 69 6c 65 63 6f 75 6e 74 31 30 39 |4115filecount109|
395 395 0040: 33 72 65 71 75 69 72 65 6d 65 6e 74 73 67 65 6e |3requirementsgen|
396 396 0050: 65 72 61 6c 64 65 6c 74 61 25 32 43 72 65 76 6c |eraldelta%2Crevl|
397 397 0060: 6f 67 76 31 25 32 43 73 70 61 72 73 65 72 65 76 |ogv1%2Csparserev|
398 398 0070: 6c 6f 67 00 00 80 00 73 08 42 64 61 74 61 2f 30 |log....s.Bdata/0|
399 399 0080: 2e 69 00 03 00 01 00 00 00 00 00 00 00 02 00 00 |.i..............|
400 400 0090: 00 01 00 00 00 00 00 00 00 01 ff ff ff ff ff ff |................|
401 401 00a0: ff ff 80 29 63 a0 49 d3 23 87 bf ce fe 56 67 92 |...)c.I.#....Vg.|
402 402 00b0: 67 2c 69 d1 ec 39 00 00 00 00 00 00 00 00 00 00 |g,i..9..........|
403 403 00c0: 00 00 75 30 73 26 45 64 61 74 61 2f 30 30 63 68 |..u0s&Edata/00ch|
404 404 00d0: 61 6e 67 65 6c 6f 67 2d 61 62 33 34 39 31 38 30 |angelog-ab349180|
405 405 00e0: 61 30 34 30 35 30 31 30 2e 6e 64 2e 69 00 03 00 |a0405010.nd.i...|
406 406 00f0: 01 00 00 00 00 00 00 00 05 00 00 00 04 00 00 00 |................|
407 407 #endif
408 408 #if zstd no-rust
409 409 $ f --size --hex --bytes 256 body
410 410 body: size=116310 (no-bigendian !)
411 411 body: size=116305 (bigendian !)
412 412 0000: 04 6e 6f 6e 65 48 47 32 30 00 00 00 00 00 00 00 |.noneHG20.......|
413 413 0010: 7c 07 53 54 52 45 41 4d 32 00 00 00 00 03 00 09 ||.STREAM2.......|
414 414 0020: 06 09 04 0c 40 62 79 74 65 63 6f 75 6e 74 31 30 |....@bytecount10|
415 415 0030: 31 32 37 36 66 69 6c 65 63 6f 75 6e 74 31 30 39 |1276filecount109| (no-bigendian !)
416 416 0030: 31 32 37 31 66 69 6c 65 63 6f 75 6e 74 31 30 39 |1271filecount109| (bigendian !)
417 417 0040: 33 72 65 71 75 69 72 65 6d 65 6e 74 73 67 65 6e |3requirementsgen|
418 418 0050: 65 72 61 6c 64 65 6c 74 61 25 32 43 72 65 76 6c |eraldelta%2Crevl|
419 419 0060: 6f 67 2d 63 6f 6d 70 72 65 73 73 69 6f 6e 2d 7a |og-compression-z|
420 420 0070: 73 74 64 25 32 43 72 65 76 6c 6f 67 76 31 25 32 |std%2Crevlogv1%2|
421 421 0080: 43 73 70 61 72 73 65 72 65 76 6c 6f 67 00 00 80 |Csparserevlog...|
422 422 0090: 00 73 08 42 64 61 74 61 2f 30 2e 69 00 03 00 01 |.s.Bdata/0.i....|
423 423 00a0: 00 00 00 00 00 00 00 02 00 00 00 01 00 00 00 00 |................|
424 424 00b0: 00 00 00 01 ff ff ff ff ff ff ff ff 80 29 63 a0 |.............)c.|
425 425 00c0: 49 d3 23 87 bf ce fe 56 67 92 67 2c 69 d1 ec 39 |I.#....Vg.g,i..9|
426 426 00d0: 00 00 00 00 00 00 00 00 00 00 00 00 75 30 73 26 |............u0s&|
427 427 00e0: 45 64 61 74 61 2f 30 30 63 68 61 6e 67 65 6c 6f |Edata/00changelo|
428 428 00f0: 67 2d 61 62 33 34 39 31 38 30 61 30 34 30 35 30 |g-ab349180a04050|
429 429 #endif
430 430 #if zstd rust no-dirstate-v2
431 431 $ f --size --hex --bytes 256 body
432 432 body: size=116310
433 433 0000: 04 6e 6f 6e 65 48 47 32 30 00 00 00 00 00 00 00 |.noneHG20.......|
434 434 0010: 7c 07 53 54 52 45 41 4d 32 00 00 00 00 03 00 09 ||.STREAM2.......|
435 435 0020: 06 09 04 0c 40 62 79 74 65 63 6f 75 6e 74 31 30 |....@bytecount10|
436 436 0030: 31 32 37 36 66 69 6c 65 63 6f 75 6e 74 31 30 39 |1276filecount109|
437 437 0040: 33 72 65 71 75 69 72 65 6d 65 6e 74 73 67 65 6e |3requirementsgen|
438 438 0050: 65 72 61 6c 64 65 6c 74 61 25 32 43 72 65 76 6c |eraldelta%2Crevl|
439 439 0060: 6f 67 2d 63 6f 6d 70 72 65 73 73 69 6f 6e 2d 7a |og-compression-z|
440 440 0070: 73 74 64 25 32 43 72 65 76 6c 6f 67 76 31 25 32 |std%2Crevlogv1%2|
441 441 0080: 43 73 70 61 72 73 65 72 65 76 6c 6f 67 00 00 80 |Csparserevlog...|
442 442 0090: 00 73 08 42 64 61 74 61 2f 30 2e 69 00 03 00 01 |.s.Bdata/0.i....|
443 443 00a0: 00 00 00 00 00 00 00 02 00 00 00 01 00 00 00 00 |................|
444 444 00b0: 00 00 00 01 ff ff ff ff ff ff ff ff 80 29 63 a0 |.............)c.|
445 445 00c0: 49 d3 23 87 bf ce fe 56 67 92 67 2c 69 d1 ec 39 |I.#....Vg.g,i..9|
446 446 00d0: 00 00 00 00 00 00 00 00 00 00 00 00 75 30 73 26 |............u0s&|
447 447 00e0: 45 64 61 74 61 2f 30 30 63 68 61 6e 67 65 6c 6f |Edata/00changelo|
448 448 00f0: 67 2d 61 62 33 34 39 31 38 30 61 30 34 30 35 30 |g-ab349180a04050|
449 449 #endif
450 450 #if zstd dirstate-v2
451 451 $ f --size --hex --bytes 256 body
452 452 body: size=109549
453 453 0000: 04 6e 6f 6e 65 48 47 32 30 00 00 00 00 00 00 00 |.noneHG20.......|
454 454 0010: c0 07 53 54 52 45 41 4d 32 00 00 00 00 03 00 09 |..STREAM2.......|
455 455 0020: 05 09 04 0c 85 62 79 74 65 63 6f 75 6e 74 39 35 |.....bytecount95|
456 456 0030: 38 39 37 66 69 6c 65 63 6f 75 6e 74 31 30 33 30 |897filecount1030|
457 457 0040: 72 65 71 75 69 72 65 6d 65 6e 74 73 64 6f 74 65 |requirementsdote|
458 458 0050: 6e 63 6f 64 65 25 32 43 65 78 70 2d 64 69 72 73 |ncode%2Cexp-dirs|
459 459 0060: 74 61 74 65 2d 76 32 25 32 43 66 6e 63 61 63 68 |tate-v2%2Cfncach|
460 460 0070: 65 25 32 43 67 65 6e 65 72 61 6c 64 65 6c 74 61 |e%2Cgeneraldelta|
461 461 0080: 25 32 43 70 65 72 73 69 73 74 65 6e 74 2d 6e 6f |%2Cpersistent-no|
462 462 0090: 64 65 6d 61 70 25 32 43 72 65 76 6c 6f 67 2d 63 |demap%2Crevlog-c|
463 463 00a0: 6f 6d 70 72 65 73 73 69 6f 6e 2d 7a 73 74 64 25 |ompression-zstd%|
464 464 00b0: 32 43 72 65 76 6c 6f 67 76 31 25 32 43 73 70 61 |2Crevlogv1%2Cspa|
465 465 00c0: 72 73 65 72 65 76 6c 6f 67 25 32 43 73 74 6f 72 |rserevlog%2Cstor|
466 466 00d0: 65 00 00 80 00 73 08 42 64 61 74 61 2f 30 2e 69 |e....s.Bdata/0.i|
467 467 00e0: 00 03 00 01 00 00 00 00 00 00 00 02 00 00 00 01 |................|
468 468 00f0: 00 00 00 00 00 00 00 01 ff ff ff ff ff ff ff ff |................|
469 469 #endif
470 470
471 471 --uncompressed is an alias to --stream
472 472
473 473 #if stream-legacy
474 474 $ hg clone --uncompressed -U http://localhost:$HGPORT clone1-uncompressed
475 475 streaming all changes
476 476 1090 files to transfer, 102 KB of data (no-zstd !)
477 477 transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !)
478 478 1090 files to transfer, 98.8 KB of data (zstd !)
479 479 transferred 98.8 KB in * seconds (* */sec) (glob) (zstd !)
480 480 searching for changes
481 481 no changes found
482 482 #endif
483 483 #if stream-bundle2-v2
484 484 $ hg clone --uncompressed -U http://localhost:$HGPORT clone1-uncompressed
485 485 streaming all changes
486 486 1093 files to transfer, 102 KB of data (no-zstd !)
487 487 transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !)
488 488 1093 files to transfer, 98.9 KB of data (zstd !)
489 489 transferred 98.9 KB in * seconds (* */sec) (glob) (zstd !)
490 490 #endif
491 491 #if stream-bundle2-v3
492 492 $ hg clone --uncompressed -U http://localhost:$HGPORT clone1-uncompressed
493 493 streaming all changes
494 494 1093 entries to transfer
495 495 transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !)
496 496 transferred 98.9 KB in * seconds (* */sec) (glob) (zstd !)
497 497 #endif
498 498
499 499 Clone with background file closing enabled
500 500
501 501 #if stream-legacy
502 502 $ hg --debug --config worker.backgroundclose=true --config worker.backgroundcloseminfilecount=1 clone --stream -U http://localhost:$HGPORT clone-background | grep -v adding
503 503 using http://localhost:$HGPORT/
504 504 sending capabilities command
505 505 sending branchmap command
506 506 streaming all changes
507 507 sending stream_out command
508 508 1090 files to transfer, 102 KB of data (no-zstd !)
509 509 1090 files to transfer, 98.8 KB of data (zstd !)
510 510 starting 4 threads for background file closing
511 511 updating the branch cache
512 512 transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !)
513 513 transferred 98.8 KB in * seconds (* */sec) (glob) (zstd !)
514 514 query 1; heads
515 515 sending batch command
516 516 searching for changes
517 517 all remote heads known locally
518 518 no changes found
519 519 sending getbundle command
520 520 bundle2-input-bundle: with-transaction
521 521 bundle2-input-part: "listkeys" (params: 1 mandatory) supported
522 522 bundle2-input-part: "phase-heads" supported
523 523 bundle2-input-part: total payload size 24
524 524 bundle2-input-bundle: 2 parts total
525 525 checking for updated bookmarks
526 526 updating the branch cache
527 527 (sent 5 HTTP requests and * bytes; received * bytes in responses) (glob)
528 528 #endif
529 529 #if stream-bundle2-v2
530 530 $ hg --debug --config worker.backgroundclose=true --config worker.backgroundcloseminfilecount=1 clone --stream -U http://localhost:$HGPORT clone-background | grep -v adding
531 531 using http://localhost:$HGPORT/
532 532 sending capabilities command
533 533 query 1; heads
534 534 sending batch command
535 535 streaming all changes
536 536 sending getbundle command
537 537 bundle2-input-bundle: with-transaction
538 538 bundle2-input-part: "stream2" (params: 3 mandatory) supported
539 539 applying stream bundle
540 540 1093 files to transfer, 102 KB of data (no-zstd !)
541 541 1093 files to transfer, 98.9 KB of data (zstd !)
542 542 starting 4 threads for background file closing
543 543 starting 4 threads for background file closing
544 544 updating the branch cache
545 545 transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !)
546 546 bundle2-input-part: total payload size 118984 (no-zstd !)
547 547 transferred 98.9 KB in * seconds (* */sec) (glob) (zstd !)
548 548 bundle2-input-part: total payload size 116145 (zstd no-bigendian !)
549 549 bundle2-input-part: total payload size 116140 (zstd bigendian !)
550 550 bundle2-input-part: "listkeys" (params: 1 mandatory) supported
551 551 bundle2-input-bundle: 2 parts total
552 552 checking for updated bookmarks
553 553 updating the branch cache
554 554 (sent 3 HTTP requests and * bytes; received * bytes in responses) (glob)
555 555 #endif
556 556 #if stream-bundle2-v3
557 557 $ hg --debug --config worker.backgroundclose=true --config worker.backgroundcloseminfilecount=1 clone --stream -U http://localhost:$HGPORT clone-background | grep -v adding
558 558 using http://localhost:$HGPORT/
559 559 sending capabilities command
560 560 query 1; heads
561 561 sending batch command
562 562 streaming all changes
563 563 sending getbundle command
564 564 bundle2-input-bundle: with-transaction
565 565 bundle2-input-part: "stream3-exp" (params: 1 mandatory) supported
566 566 applying stream bundle
567 567 1093 entries to transfer
568 568 starting 4 threads for background file closing
569 569 starting 4 threads for background file closing
570 570 updating the branch cache
571 571 transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !)
572 572 bundle2-input-part: total payload size 120079 (no-zstd !)
573 573 transferred 98.9 KB in * seconds (* */sec) (glob) (zstd !)
574 574 bundle2-input-part: total payload size 117240 (zstd no-bigendian !)
575 575 bundle2-input-part: total payload size 116138 (zstd bigendian !)
576 576 bundle2-input-part: "listkeys" (params: 1 mandatory) supported
577 577 bundle2-input-bundle: 2 parts total
578 578 checking for updated bookmarks
579 579 updating the branch cache
580 580 (sent 3 HTTP requests and * bytes; received * bytes in responses) (glob)
581 581 #endif
582 582
583 583 Cannot stream clone when there are secret changesets
584 584
585 585 $ hg -R server phase --force --secret -r tip
586 586 $ hg clone --stream -U http://localhost:$HGPORT secret-denied
587 587 warning: stream clone requested but server has them disabled
588 588 requesting all changes
589 589 adding changesets
590 590 adding manifests
591 591 adding file changes
592 592 added 2 changesets with 1025 changes to 1025 files
593 593 new changesets 96ee1d7354c4:c17445101a72
594 594
595 595 $ killdaemons.py
596 596
597 597 Streaming of secrets can be overridden by server config
598 598
599 599 $ cd server
600 600 $ hg serve --config server.uncompressedallowsecret=true -p $HGPORT -d --pid-file=hg.pid
601 601 $ cat hg.pid > $DAEMON_PIDS
602 602 $ cd ..
603 603
604 604 #if stream-legacy
605 605 $ hg clone --stream -U http://localhost:$HGPORT secret-allowed
606 606 streaming all changes
607 607 1090 files to transfer, 102 KB of data (no-zstd !)
608 608 transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !)
609 609 1090 files to transfer, 98.8 KB of data (zstd !)
610 610 transferred 98.8 KB in * seconds (* */sec) (glob) (zstd !)
611 611 searching for changes
612 612 no changes found
613 613 #endif
614 614 #if stream-bundle2-v2
615 615 $ hg clone --stream -U http://localhost:$HGPORT secret-allowed
616 616 streaming all changes
617 617 1093 files to transfer, 102 KB of data (no-zstd !)
618 618 transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !)
619 619 1093 files to transfer, 98.9 KB of data (zstd !)
620 620 transferred 98.9 KB in * seconds (* */sec) (glob) (zstd !)
621 621 #endif
622 622 #if stream-bundle2-v3
623 623 $ hg clone --stream -U http://localhost:$HGPORT secret-allowed
624 624 streaming all changes
625 625 1093 entries to transfer
626 626 transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !)
627 627 transferred 98.9 KB in * seconds (* */sec) (glob) (zstd !)
628 628 #endif
629 629
630 630 $ killdaemons.py
631 631
632 632 Verify interaction between preferuncompressed and secret presence
633 633
634 634 $ cd server
635 635 $ hg serve --config server.preferuncompressed=true -p $HGPORT -d --pid-file=hg.pid
636 636 $ cat hg.pid > $DAEMON_PIDS
637 637 $ cd ..
638 638
639 639 $ hg clone -U http://localhost:$HGPORT preferuncompressed-secret
640 640 requesting all changes
641 641 adding changesets
642 642 adding manifests
643 643 adding file changes
644 644 added 2 changesets with 1025 changes to 1025 files
645 645 new changesets 96ee1d7354c4:c17445101a72
646 646
647 647 $ killdaemons.py
648 648
649 649 Clone not allowed when full bundles disabled and can't serve secrets
650 650
651 651 $ cd server
652 652 $ hg serve --config server.disablefullbundle=true -p $HGPORT -d --pid-file=hg.pid
653 653 $ cat hg.pid > $DAEMON_PIDS
654 654 $ cd ..
655 655
656 656 $ hg clone --stream http://localhost:$HGPORT secret-full-disabled
657 657 warning: stream clone requested but server has them disabled
658 658 requesting all changes
659 659 remote: abort: server has pull-based clones disabled
660 660 abort: pull failed on remote
661 661 (remove --pull if specified or upgrade Mercurial)
662 662 [100]
663 663
664 664 Local stream clone with secrets involved
665 665 (This is just a test over behavior: if you have access to the repo's files,
666 666 there is no security so it isn't important to prevent a clone here.)
667 667
668 668 $ hg clone -U --stream server local-secret
669 669 warning: stream clone requested but server has them disabled
670 670 requesting all changes
671 671 adding changesets
672 672 adding manifests
673 673 adding file changes
674 674 added 2 changesets with 1025 changes to 1025 files
675 675 new changesets 96ee1d7354c4:c17445101a72
676 676
677 677 Stream clone while repo is changing:
678 678
679 679 $ mkdir changing
680 680 $ cd changing
681 681
682 682 prepare repo with small and big file to cover both code paths in emitrevlogdata
683 683
684 684 $ hg init repo
685 685 $ touch repo/f1
686 686 $ $TESTDIR/seq.py 50000 > repo/f2
687 687 $ hg -R repo ci -Aqm "0"
688 688 $ HG_TEST_STREAM_WALKED_FILE_1="$TESTTMP/sync_file_walked_1"
689 689 $ export HG_TEST_STREAM_WALKED_FILE_1
690 690 $ HG_TEST_STREAM_WALKED_FILE_2="$TESTTMP/sync_file_walked_2"
691 691 $ export HG_TEST_STREAM_WALKED_FILE_2
692 692 $ HG_TEST_STREAM_WALKED_FILE_3="$TESTTMP/sync_file_walked_3"
693 693 $ export HG_TEST_STREAM_WALKED_FILE_3
694 694 # $ cat << EOF >> $HGRCPATH
695 695 # > [hooks]
696 696 # > pre-clone=rm -f "$TESTTMP/sync_file_walked_*"
697 697 # > EOF
698 698 $ hg serve -R repo -p $HGPORT1 -d --error errors.log --pid-file=hg.pid --config extensions.stream_steps="$RUNTESTDIR/testlib/ext-stream-clone-steps.py"
699 699 $ cat hg.pid >> $DAEMON_PIDS
700 700
701 701 clone while modifying the repo between stating file with write lock and
702 702 actually serving file content
703 703
704 704 $ (hg clone -q --stream -U http://localhost:$HGPORT1 clone; touch "$HG_TEST_STREAM_WALKED_FILE_3") &
705 705 $ $RUNTESTDIR/testlib/wait-on-file 10 $HG_TEST_STREAM_WALKED_FILE_1
706 706 $ echo >> repo/f1
707 707 $ echo >> repo/f2
708 708 $ hg -R repo ci -m "1" --config ui.timeout.warn=-1
709 709 $ touch $HG_TEST_STREAM_WALKED_FILE_2
710 710 $ $RUNTESTDIR/testlib/wait-on-file 10 $HG_TEST_STREAM_WALKED_FILE_3
711 711 $ hg -R clone id
712 712 000000000000
713 713 $ cat errors.log
714 714 $ cd ..
715 715
716 716 Stream repository with bookmarks
717 717 --------------------------------
718 718
719 719 (revert introduction of secret changeset)
720 720
721 721 $ hg -R server phase --draft 'secret()'
722 722
723 723 add a bookmark
724 724
725 725 $ hg -R server bookmark -r tip some-bookmark
726 726
727 727 clone it
728 728
729 729 #if stream-legacy
730 730 $ hg clone --stream http://localhost:$HGPORT with-bookmarks
731 731 streaming all changes
732 732 1090 files to transfer, 102 KB of data (no-zstd !)
733 733 transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !)
734 734 1090 files to transfer, 98.8 KB of data (zstd !)
735 735 transferred 98.8 KB in * seconds (* */sec) (glob) (zstd !)
736 736 searching for changes
737 737 no changes found
738 738 updating to branch default
739 739 1088 files updated, 0 files merged, 0 files removed, 0 files unresolved
740 740 #endif
741 741 #if stream-bundle2-v2
742 742 $ hg clone --stream http://localhost:$HGPORT with-bookmarks
743 743 streaming all changes
744 744 1096 files to transfer, 102 KB of data (no-zstd !)
745 745 transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !)
746 746 1096 files to transfer, 99.1 KB of data (zstd !)
747 747 transferred 99.1 KB in * seconds (* */sec) (glob) (zstd !)
748 748 updating to branch default
749 749 1088 files updated, 0 files merged, 0 files removed, 0 files unresolved
750 750 #endif
751 751 #if stream-bundle2-v3
752 752 $ hg clone --stream http://localhost:$HGPORT with-bookmarks
753 753 streaming all changes
754 754 1096 entries to transfer
755 755 transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !)
756 756 transferred 99.1 KB in * seconds (* */sec) (glob) (zstd !)
757 757 updating to branch default
758 758 1088 files updated, 0 files merged, 0 files removed, 0 files unresolved
759 759 #endif
760 760 $ hg verify -R with-bookmarks -q
761 761 $ hg -R with-bookmarks bookmarks
762 762 some-bookmark 2:5223b5e3265f
763 763
764 764 Stream repository with phases
765 765 -----------------------------
766 766
767 767 Clone as publishing
768 768
769 769 $ hg -R server phase -r 'all()'
770 770 0: draft
771 771 1: draft
772 772 2: draft
773 773
774 774 #if stream-legacy
775 775 $ hg clone --stream http://localhost:$HGPORT phase-publish
776 776 streaming all changes
777 777 1090 files to transfer, 102 KB of data (no-zstd !)
778 778 transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !)
779 779 1090 files to transfer, 98.8 KB of data (zstd !)
780 780 transferred 98.8 KB in * seconds (* */sec) (glob) (zstd !)
781 781 searching for changes
782 782 no changes found
783 783 updating to branch default
784 784 1088 files updated, 0 files merged, 0 files removed, 0 files unresolved
785 785 #endif
786 786 #if stream-bundle2-v2
787 787 $ hg clone --stream http://localhost:$HGPORT phase-publish
788 788 streaming all changes
789 789 1096 files to transfer, 102 KB of data (no-zstd !)
790 790 transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !)
791 791 1096 files to transfer, 99.1 KB of data (zstd !)
792 792 transferred 99.1 KB in * seconds (* */sec) (glob) (zstd !)
793 793 updating to branch default
794 794 1088 files updated, 0 files merged, 0 files removed, 0 files unresolved
795 795 #endif
796 796 #if stream-bundle2-v3
797 797 $ hg clone --stream http://localhost:$HGPORT phase-publish
798 798 streaming all changes
799 799 1096 entries to transfer
800 800 transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !)
801 801 transferred 99.1 KB in * seconds (* */sec) (glob) (zstd !)
802 802 updating to branch default
803 803 1088 files updated, 0 files merged, 0 files removed, 0 files unresolved
804 804 #endif
805 805 $ hg verify -R phase-publish -q
806 806 $ hg -R phase-publish phase -r 'all()'
807 807 0: public
808 808 1: public
809 809 2: public
810 810
811 811 Clone as non publishing
812 812
813 813 $ cat << EOF >> server/.hg/hgrc
814 814 > [phases]
815 815 > publish = False
816 816 > EOF
817 817 $ killdaemons.py
818 818 $ hg -R server serve -p $HGPORT -d --pid-file=hg.pid
819 819 $ cat hg.pid > $DAEMON_PIDS
820 820
821 821 #if stream-legacy
822 822
823 823 With v1 of the stream protocol, changeset are always cloned as public. It make
824 824 stream v1 unsuitable for non-publishing repository.
825 825
826 826 $ hg clone --stream http://localhost:$HGPORT phase-no-publish
827 827 streaming all changes
828 828 1090 files to transfer, 102 KB of data (no-zstd !)
829 829 transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !)
830 830 1090 files to transfer, 98.8 KB of data (zstd !)
831 831 transferred 98.8 KB in * seconds (* */sec) (glob) (zstd !)
832 832 searching for changes
833 833 no changes found
834 834 updating to branch default
835 835 1088 files updated, 0 files merged, 0 files removed, 0 files unresolved
836 836 $ hg -R phase-no-publish phase -r 'all()'
837 837 0: public
838 838 1: public
839 839 2: public
840 840 #endif
841 841 #if stream-bundle2-v2
842 842 $ hg clone --stream http://localhost:$HGPORT phase-no-publish
843 843 streaming all changes
844 844 1097 files to transfer, 102 KB of data (no-zstd !)
845 845 transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !)
846 846 1097 files to transfer, 99.1 KB of data (zstd !)
847 847 transferred 99.1 KB in * seconds (* */sec) (glob) (zstd !)
848 848 updating to branch default
849 849 1088 files updated, 0 files merged, 0 files removed, 0 files unresolved
850 850 $ hg -R phase-no-publish phase -r 'all()'
851 851 0: draft
852 852 1: draft
853 853 2: draft
854 854 #endif
855 855 #if stream-bundle2-v3
856 856 $ hg clone --stream http://localhost:$HGPORT phase-no-publish
857 857 streaming all changes
858 858 1097 entries to transfer
859 859 transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !)
860 860 transferred 99.1 KB in * seconds (* */sec) (glob) (zstd !)
861 861 updating to branch default
862 862 1088 files updated, 0 files merged, 0 files removed, 0 files unresolved
863 863 $ hg -R phase-no-publish phase -r 'all()'
864 864 0: draft
865 865 1: draft
866 866 2: draft
867 867 #endif
868 868 $ hg verify -R phase-no-publish -q
869 869
870 870 $ killdaemons.py
871 871
872 872 #if stream-legacy
873 873
874 874 With v1 of the stream protocol, changeset are always cloned as public. There's
875 875 no obsolescence markers exchange in stream v1.
876 876
877 877 #endif
878 878 #if stream-bundle2-v2
879 879
880 880 Stream repository with obsolescence
881 881 -----------------------------------
882 882
883 883 Clone non-publishing with obsolescence
884 884
885 885 $ cat >> $HGRCPATH << EOF
886 886 > [experimental]
887 887 > evolution=all
888 888 > EOF
889 889
890 890 $ cd server
891 891 $ echo foo > foo
892 892 $ hg -q commit -m 'about to be pruned'
893 893 $ hg debugobsolete `hg log -r . -T '{node}'` -d '0 0' -u test --record-parents
894 894 1 new obsolescence markers
895 895 obsoleted 1 changesets
896 896 $ hg up null -q
897 897 $ hg log -T '{rev}: {phase}\n'
898 898 2: draft
899 899 1: draft
900 900 0: draft
901 901 $ hg serve -p $HGPORT -d --pid-file=hg.pid
902 902 $ cat hg.pid > $DAEMON_PIDS
903 903 $ cd ..
904 904
905 905 $ hg clone -U --stream http://localhost:$HGPORT with-obsolescence
906 906 streaming all changes
907 907 1098 files to transfer, 102 KB of data (no-zstd !)
908 908 transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !)
909 909 1098 files to transfer, 99.5 KB of data (zstd !)
910 910 transferred 99.5 KB in * seconds (* */sec) (glob) (zstd !)
911 911 $ hg -R with-obsolescence log -T '{rev}: {phase}\n'
912 912 2: draft
913 913 1: draft
914 914 0: draft
915 915 $ hg debugobsolete -R with-obsolescence
916 916 8c206a663911c1f97f2f9d7382e417ae55872cfa 0 {5223b5e3265f0df40bb743da62249413d74ac70f} (Thu Jan 01 00:00:00 1970 +0000) {'user': 'test'}
917 917 $ hg verify -R with-obsolescence -q
918 918
919 919 $ hg clone -U --stream --config experimental.evolution=0 http://localhost:$HGPORT with-obsolescence-no-evolution
920 920 streaming all changes
921 921 remote: abort: server has obsolescence markers, but client cannot receive them via stream clone
922 922 abort: pull failed on remote
923 923 [100]
924 924
925 925 $ killdaemons.py
926 926
927 927 #endif
928 928 #if stream-bundle2-v3
929 929
930 930 Stream repository with obsolescence
931 931 -----------------------------------
932 932
933 933 Clone non-publishing with obsolescence
934 934
935 935 $ cat >> $HGRCPATH << EOF
936 936 > [experimental]
937 937 > evolution=all
938 938 > EOF
939 939
940 940 $ cd server
941 941 $ echo foo > foo
942 942 $ hg -q commit -m 'about to be pruned'
943 943 $ hg debugobsolete `hg log -r . -T '{node}'` -d '0 0' -u test --record-parents
944 944 1 new obsolescence markers
945 945 obsoleted 1 changesets
946 946 $ hg up null -q
947 947 $ hg log -T '{rev}: {phase}\n'
948 948 2: draft
949 949 1: draft
950 950 0: draft
951 951 $ hg serve -p $HGPORT -d --pid-file=hg.pid
952 952 $ cat hg.pid > $DAEMON_PIDS
953 953 $ cd ..
954 954
955 955 $ hg clone -U --stream http://localhost:$HGPORT with-obsolescence
956 956 streaming all changes
957 957 1098 entries to transfer
958 958 transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !)
959 959 transferred 99.5 KB in * seconds (* */sec) (glob) (zstd !)
960 960 $ hg -R with-obsolescence log -T '{rev}: {phase}\n'
961 961 2: draft
962 962 1: draft
963 963 0: draft
964 964 $ hg debugobsolete -R with-obsolescence
965 965 8c206a663911c1f97f2f9d7382e417ae55872cfa 0 {5223b5e3265f0df40bb743da62249413d74ac70f} (Thu Jan 01 00:00:00 1970 +0000) {'user': 'test'}
966 966 $ hg verify -R with-obsolescence -q
967 967
968 968 $ hg clone -U --stream --config experimental.evolution=0 http://localhost:$HGPORT with-obsolescence-no-evolution
969 969 streaming all changes
970 970 remote: abort: server has obsolescence markers, but client cannot receive them via stream clone
971 971 abort: pull failed on remote
972 972 [100]
973 973
974 974 $ killdaemons.py
975 975
976 976 #endif
977 977
978 978 Cloning a repo with no requirements doesn't give some obscure error
979 979
980 980 $ mkdir -p empty-repo/.hg
981 981 $ hg clone -q --stream ssh://user@dummy/empty-repo empty-repo2
982 982 $ hg --cwd empty-repo2 verify -q
983
984 Cloning a repo with an empty manifestlog doesn't give some weird error
985
986 $ rm -r empty-repo; hg init empty-repo
987 $ (cd empty-repo; touch x; hg commit -Am empty; hg debugstrip -r 0) > /dev/null
988 $ hg clone -q --stream ssh://user@dummy/empty-repo empty-repo3
989 $ hg --cwd empty-repo3 verify -q 2>&1 | grep -v warning
990 [1]
991
992 The warnings filtered out here are talking about zero-length 'orphan' data files.
993 Those are harmless, so that's fine.
994
General Comments 0
You need to be logged in to leave comments. Login now