##// END OF EJS Templates
stream-clone: implement decidated `get_streams` method for revlog...
marmoute -
r51533:9caa860d default
parent child Browse files
Show More
@@ -1,3410 +1,3478 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 CHANGELOGV2,
39 39 COMP_MODE_DEFAULT,
40 40 COMP_MODE_INLINE,
41 41 COMP_MODE_PLAIN,
42 42 DELTA_BASE_REUSE_NO,
43 43 DELTA_BASE_REUSE_TRY,
44 44 ENTRY_RANK,
45 45 FEATURES_BY_VERSION,
46 46 FLAG_GENERALDELTA,
47 47 FLAG_INLINE_DATA,
48 48 INDEX_HEADER,
49 49 KIND_CHANGELOG,
50 50 KIND_FILELOG,
51 51 RANK_UNKNOWN,
52 52 REVLOGV0,
53 53 REVLOGV1,
54 54 REVLOGV1_FLAGS,
55 55 REVLOGV2,
56 56 REVLOGV2_FLAGS,
57 57 REVLOG_DEFAULT_FLAGS,
58 58 REVLOG_DEFAULT_FORMAT,
59 59 REVLOG_DEFAULT_VERSION,
60 60 SUPPORTED_FLAGS,
61 61 )
62 62 from .revlogutils.flagutil import (
63 63 REVIDX_DEFAULT_FLAGS,
64 64 REVIDX_ELLIPSIS,
65 65 REVIDX_EXTSTORED,
66 66 REVIDX_FLAGS_ORDER,
67 67 REVIDX_HASCOPIESINFO,
68 68 REVIDX_ISCENSORED,
69 69 REVIDX_RAWTEXT_CHANGING_FLAGS,
70 70 )
71 71 from .thirdparty import attr
72 72 from . import (
73 73 ancestor,
74 74 dagop,
75 75 error,
76 76 mdiff,
77 77 policy,
78 78 pycompat,
79 79 revlogutils,
80 80 templatefilters,
81 81 util,
82 82 )
83 83 from .interfaces import (
84 84 repository,
85 85 util as interfaceutil,
86 86 )
87 87 from .revlogutils import (
88 88 deltas as deltautil,
89 89 docket as docketutil,
90 90 flagutil,
91 91 nodemap as nodemaputil,
92 92 randomaccessfile,
93 93 revlogv0,
94 94 rewrite,
95 95 sidedata as sidedatautil,
96 96 )
97 97 from .utils import (
98 98 storageutil,
99 99 stringutil,
100 100 )
101 101
102 102 # blanked usage of all the name to prevent pyflakes constraints
103 103 # We need these name available in the module for extensions.
104 104
105 105 REVLOGV0
106 106 REVLOGV1
107 107 REVLOGV2
108 108 CHANGELOGV2
109 109 FLAG_INLINE_DATA
110 110 FLAG_GENERALDELTA
111 111 REVLOG_DEFAULT_FLAGS
112 112 REVLOG_DEFAULT_FORMAT
113 113 REVLOG_DEFAULT_VERSION
114 114 REVLOGV1_FLAGS
115 115 REVLOGV2_FLAGS
116 116 REVIDX_ISCENSORED
117 117 REVIDX_ELLIPSIS
118 118 REVIDX_HASCOPIESINFO
119 119 REVIDX_EXTSTORED
120 120 REVIDX_DEFAULT_FLAGS
121 121 REVIDX_FLAGS_ORDER
122 122 REVIDX_RAWTEXT_CHANGING_FLAGS
123 123
124 124 parsers = policy.importmod('parsers')
125 125 rustancestor = policy.importrust('ancestor')
126 126 rustdagop = policy.importrust('dagop')
127 127 rustrevlog = policy.importrust('revlog')
128 128
129 129 # Aliased for performance.
130 130 _zlibdecompress = zlib.decompress
131 131
132 132 # max size of inline data embedded into a revlog
133 133 _maxinline = 131072
134 134
135 135 # Flag processors for REVIDX_ELLIPSIS.
136 136 def ellipsisreadprocessor(rl, text):
137 137 return text, False
138 138
139 139
140 140 def ellipsiswriteprocessor(rl, text):
141 141 return text, False
142 142
143 143
144 144 def ellipsisrawprocessor(rl, text):
145 145 return False
146 146
147 147
148 148 ellipsisprocessor = (
149 149 ellipsisreadprocessor,
150 150 ellipsiswriteprocessor,
151 151 ellipsisrawprocessor,
152 152 )
153 153
154 154
155 155 def _verify_revision(rl, skipflags, state, node):
156 156 """Verify the integrity of the given revlog ``node`` while providing a hook
157 157 point for extensions to influence the operation."""
158 158 if skipflags:
159 159 state[b'skipread'].add(node)
160 160 else:
161 161 # Side-effect: read content and verify hash.
162 162 rl.revision(node)
163 163
164 164
165 165 # True if a fast implementation for persistent-nodemap is available
166 166 #
167 167 # We also consider we have a "fast" implementation in "pure" python because
168 168 # people using pure don't really have performance consideration (and a
169 169 # wheelbarrow of other slowness source)
170 170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
171 171 parsers, 'BaseIndexObject'
172 172 )
173 173
174 174
175 175 @interfaceutil.implementer(repository.irevisiondelta)
176 176 @attr.s(slots=True)
177 177 class revlogrevisiondelta:
178 178 node = attr.ib()
179 179 p1node = attr.ib()
180 180 p2node = attr.ib()
181 181 basenode = attr.ib()
182 182 flags = attr.ib()
183 183 baserevisionsize = attr.ib()
184 184 revision = attr.ib()
185 185 delta = attr.ib()
186 186 sidedata = attr.ib()
187 187 protocol_flags = attr.ib()
188 188 linknode = attr.ib(default=None)
189 189
190 190
191 191 @interfaceutil.implementer(repository.iverifyproblem)
192 192 @attr.s(frozen=True)
193 193 class revlogproblem:
194 194 warning = attr.ib(default=None)
195 195 error = attr.ib(default=None)
196 196 node = attr.ib(default=None)
197 197
198 198
199 199 def parse_index_v1(data, inline):
200 200 # call the C implementation to parse the index data
201 201 index, cache = parsers.parse_index2(data, inline)
202 202 return index, cache
203 203
204 204
205 205 def parse_index_v2(data, inline):
206 206 # call the C implementation to parse the index data
207 207 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
208 208 return index, cache
209 209
210 210
211 211 def parse_index_cl_v2(data, inline):
212 212 # call the C implementation to parse the index data
213 213 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
214 214 return index, cache
215 215
216 216
217 217 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
218 218
219 219 def parse_index_v1_nodemap(data, inline):
220 220 index, cache = parsers.parse_index_devel_nodemap(data, inline)
221 221 return index, cache
222 222
223 223
224 224 else:
225 225 parse_index_v1_nodemap = None
226 226
227 227
228 228 def parse_index_v1_mixed(data, inline):
229 229 index, cache = parse_index_v1(data, inline)
230 230 return rustrevlog.MixedIndex(index), cache
231 231
232 232
233 233 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
234 234 # signed integer)
235 235 _maxentrysize = 0x7FFFFFFF
236 236
237 237 FILE_TOO_SHORT_MSG = _(
238 238 b'cannot read from revlog %s;'
239 239 b' expected %d bytes from offset %d, data size is %d'
240 240 )
241 241
242 242 hexdigits = b'0123456789abcdefABCDEF'
243 243
244 244
245 245 class revlog:
246 246 """
247 247 the underlying revision storage object
248 248
249 249 A revlog consists of two parts, an index and the revision data.
250 250
251 251 The index is a file with a fixed record size containing
252 252 information on each revision, including its nodeid (hash), the
253 253 nodeids of its parents, the position and offset of its data within
254 254 the data file, and the revision it's based on. Finally, each entry
255 255 contains a linkrev entry that can serve as a pointer to external
256 256 data.
257 257
258 258 The revision data itself is a linear collection of data chunks.
259 259 Each chunk represents a revision and is usually represented as a
260 260 delta against the previous chunk. To bound lookup time, runs of
261 261 deltas are limited to about 2 times the length of the original
262 262 version data. This makes retrieval of a version proportional to
263 263 its size, or O(1) relative to the number of revisions.
264 264
265 265 Both pieces of the revlog are written to in an append-only
266 266 fashion, which means we never need to rewrite a file to insert or
267 267 remove data, and can use some simple techniques to avoid the need
268 268 for locking while reading.
269 269
270 270 If checkambig, indexfile is opened with checkambig=True at
271 271 writing, to avoid file stat ambiguity.
272 272
273 273 If mmaplargeindex is True, and an mmapindexthreshold is set, the
274 274 index will be mmapped rather than read if it is larger than the
275 275 configured threshold.
276 276
277 277 If censorable is True, the revlog can have censored revisions.
278 278
279 279 If `upperboundcomp` is not None, this is the expected maximal gain from
280 280 compression for the data content.
281 281
282 282 `concurrencychecker` is an optional function that receives 3 arguments: a
283 283 file handle, a filename, and an expected position. It should check whether
284 284 the current position in the file handle is valid, and log/warn/fail (by
285 285 raising).
286 286
287 287 See mercurial/revlogutils/contants.py for details about the content of an
288 288 index entry.
289 289 """
290 290
291 291 _flagserrorclass = error.RevlogError
292 292
293 293 def __init__(
294 294 self,
295 295 opener,
296 296 target,
297 297 radix,
298 298 postfix=None, # only exist for `tmpcensored` now
299 299 checkambig=False,
300 300 mmaplargeindex=False,
301 301 censorable=False,
302 302 upperboundcomp=None,
303 303 persistentnodemap=False,
304 304 concurrencychecker=None,
305 305 trypending=False,
306 306 try_split=False,
307 307 canonical_parent_order=True,
308 308 ):
309 309 """
310 310 create a revlog object
311 311
312 312 opener is a function that abstracts the file opening operation
313 313 and can be used to implement COW semantics or the like.
314 314
315 315 `target`: a (KIND, ID) tuple that identify the content stored in
316 316 this revlog. It help the rest of the code to understand what the revlog
317 317 is about without having to resort to heuristic and index filename
318 318 analysis. Note: that this must be reliably be set by normal code, but
319 319 that test, debug, or performance measurement code might not set this to
320 320 accurate value.
321 321 """
322 322 self.upperboundcomp = upperboundcomp
323 323
324 324 self.radix = radix
325 325
326 326 self._docket_file = None
327 327 self._indexfile = None
328 328 self._datafile = None
329 329 self._sidedatafile = None
330 330 self._nodemap_file = None
331 331 self.postfix = postfix
332 332 self._trypending = trypending
333 333 self._try_split = try_split
334 334 self.opener = opener
335 335 if persistentnodemap:
336 336 self._nodemap_file = nodemaputil.get_nodemap_file(self)
337 337
338 338 assert target[0] in ALL_KINDS
339 339 assert len(target) == 2
340 340 self.target = target
341 341 # When True, indexfile is opened with checkambig=True at writing, to
342 342 # avoid file stat ambiguity.
343 343 self._checkambig = checkambig
344 344 self._mmaplargeindex = mmaplargeindex
345 345 self._censorable = censorable
346 346 # 3-tuple of (node, rev, text) for a raw revision.
347 347 self._revisioncache = None
348 348 # Maps rev to chain base rev.
349 349 self._chainbasecache = util.lrucachedict(100)
350 350 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
351 351 self._chunkcache = (0, b'')
352 352 # How much data to read and cache into the raw revlog data cache.
353 353 self._chunkcachesize = 65536
354 354 self._maxchainlen = None
355 355 self._deltabothparents = True
356 356 self._candidate_group_chunk_size = 0
357 357 self._debug_delta = False
358 358 self.index = None
359 359 self._docket = None
360 360 self._nodemap_docket = None
361 361 # Mapping of partial identifiers to full nodes.
362 362 self._pcache = {}
363 363 # Mapping of revision integer to full node.
364 364 self._compengine = b'zlib'
365 365 self._compengineopts = {}
366 366 self._maxdeltachainspan = -1
367 367 self._withsparseread = False
368 368 self._sparserevlog = False
369 369 self.hassidedata = False
370 370 self._srdensitythreshold = 0.50
371 371 self._srmingapsize = 262144
372 372
373 373 # other optionnals features
374 374
375 375 # might remove rank configuration once the computation has no impact
376 376 self._compute_rank = False
377 377
378 378 # Make copy of flag processors so each revlog instance can support
379 379 # custom flags.
380 380 self._flagprocessors = dict(flagutil.flagprocessors)
381 381
382 382 # 3-tuple of file handles being used for active writing.
383 383 self._writinghandles = None
384 384 # prevent nesting of addgroup
385 385 self._adding_group = None
386 386
387 387 self._loadindex()
388 388
389 389 self._concurrencychecker = concurrencychecker
390 390
391 391 # parent order is supposed to be semantically irrelevant, so we
392 392 # normally resort parents to ensure that the first parent is non-null,
393 393 # if there is a non-null parent at all.
394 394 # filelog abuses the parent order as flag to mark some instances of
395 395 # meta-encoded files, so allow it to disable this behavior.
396 396 self.canonical_parent_order = canonical_parent_order
397 397
398 398 def _init_opts(self):
399 399 """process options (from above/config) to setup associated default revlog mode
400 400
401 401 These values might be affected when actually reading on disk information.
402 402
403 403 The relevant values are returned for use in _loadindex().
404 404
405 405 * newversionflags:
406 406 version header to use if we need to create a new revlog
407 407
408 408 * mmapindexthreshold:
409 409 minimal index size for start to use mmap
410 410
411 411 * force_nodemap:
412 412 force the usage of a "development" version of the nodemap code
413 413 """
414 414 mmapindexthreshold = None
415 415 opts = self.opener.options
416 416
417 417 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
418 418 new_header = CHANGELOGV2
419 419 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
420 420 elif b'revlogv2' in opts:
421 421 new_header = REVLOGV2
422 422 elif b'revlogv1' in opts:
423 423 new_header = REVLOGV1 | FLAG_INLINE_DATA
424 424 if b'generaldelta' in opts:
425 425 new_header |= FLAG_GENERALDELTA
426 426 elif b'revlogv0' in self.opener.options:
427 427 new_header = REVLOGV0
428 428 else:
429 429 new_header = REVLOG_DEFAULT_VERSION
430 430
431 431 if b'chunkcachesize' in opts:
432 432 self._chunkcachesize = opts[b'chunkcachesize']
433 433 if b'maxchainlen' in opts:
434 434 self._maxchainlen = opts[b'maxchainlen']
435 435 if b'deltabothparents' in opts:
436 436 self._deltabothparents = opts[b'deltabothparents']
437 437 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
438 438 if dps_cgds:
439 439 self._candidate_group_chunk_size = dps_cgds
440 440 self._lazydelta = bool(opts.get(b'lazydelta', True))
441 441 self._lazydeltabase = False
442 442 if self._lazydelta:
443 443 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
444 444 if b'debug-delta' in opts:
445 445 self._debug_delta = opts[b'debug-delta']
446 446 if b'compengine' in opts:
447 447 self._compengine = opts[b'compengine']
448 448 if b'zlib.level' in opts:
449 449 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
450 450 if b'zstd.level' in opts:
451 451 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
452 452 if b'maxdeltachainspan' in opts:
453 453 self._maxdeltachainspan = opts[b'maxdeltachainspan']
454 454 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
455 455 mmapindexthreshold = opts[b'mmapindexthreshold']
456 456 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
457 457 withsparseread = bool(opts.get(b'with-sparse-read', False))
458 458 # sparse-revlog forces sparse-read
459 459 self._withsparseread = self._sparserevlog or withsparseread
460 460 if b'sparse-read-density-threshold' in opts:
461 461 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
462 462 if b'sparse-read-min-gap-size' in opts:
463 463 self._srmingapsize = opts[b'sparse-read-min-gap-size']
464 464 if opts.get(b'enableellipsis'):
465 465 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
466 466
467 467 # revlog v0 doesn't have flag processors
468 468 for flag, processor in opts.get(b'flagprocessors', {}).items():
469 469 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
470 470
471 471 if self._chunkcachesize <= 0:
472 472 raise error.RevlogError(
473 473 _(b'revlog chunk cache size %r is not greater than 0')
474 474 % self._chunkcachesize
475 475 )
476 476 elif self._chunkcachesize & (self._chunkcachesize - 1):
477 477 raise error.RevlogError(
478 478 _(b'revlog chunk cache size %r is not a power of 2')
479 479 % self._chunkcachesize
480 480 )
481 481 force_nodemap = opts.get(b'devel-force-nodemap', False)
482 482 return new_header, mmapindexthreshold, force_nodemap
483 483
484 484 def _get_data(self, filepath, mmap_threshold, size=None):
485 485 """return a file content with or without mmap
486 486
487 487 If the file is missing return the empty string"""
488 488 try:
489 489 with self.opener(filepath) as fp:
490 490 if mmap_threshold is not None:
491 491 file_size = self.opener.fstat(fp).st_size
492 492 if file_size >= mmap_threshold:
493 493 if size is not None:
494 494 # avoid potentiel mmap crash
495 495 size = min(file_size, size)
496 496 # TODO: should .close() to release resources without
497 497 # relying on Python GC
498 498 if size is None:
499 499 return util.buffer(util.mmapread(fp))
500 500 else:
501 501 return util.buffer(util.mmapread(fp, size))
502 502 if size is None:
503 503 return fp.read()
504 504 else:
505 505 return fp.read(size)
506 506 except FileNotFoundError:
507 507 return b''
508 508
509 def get_streams(self, max_linkrev):
510 n = len(self)
511 index = self.index
512 while n > 0:
513 linkrev = index[n - 1][4]
514 if linkrev < max_linkrev:
515 break
516 # note: this loop will rarely go through multiple iterations, since
517 # it only traverses commits created during the current streaming
518 # pull operation.
519 #
520 # If this become a problem, using a binary search should cap the
521 # runtime of this.
522 n = n - 1
523 if n == 0:
524 # no data to send
525 return []
526 index_size = n * index.entry_size
527 data_size = self.end(n - 1)
528
529 # XXX we might have been split (or stripped) since the object
530 # initialization, We need to close this race too, but having a way to
531 # pre-open the file we feed to the revlog and never closing them before
532 # we are done streaming.
533
534 if self._inline:
535
536 def get_stream():
537 with self._indexfp() as fp:
538 yield None
539 size = index_size + data_size
540 if size <= 65536:
541 yield fp.read(size)
542 else:
543 yield from util.filechunkiter(fp, limit=size)
544
545 inline_stream = get_stream()
546 next(inline_stream)
547 return [
548 (self._indexfile, inline_stream, index_size + data_size),
549 ]
550 else:
551
552 def get_index_stream():
553 with self._indexfp() as fp:
554 yield None
555 if index_size <= 65536:
556 yield fp.read(index_size)
557 else:
558 yield from util.filechunkiter(fp, limit=index_size)
559
560 def get_data_stream():
561 with self._datafp() as fp:
562 yield None
563 if data_size <= 65536:
564 yield fp.read(data_size)
565 else:
566 yield from util.filechunkiter(fp, limit=data_size)
567
568 index_stream = get_index_stream()
569 next(index_stream)
570 data_stream = get_data_stream()
571 next(data_stream)
572 return [
573 (self._datafile, data_stream, data_size),
574 (self._indexfile, index_stream, index_size),
575 ]
576
509 577 def _loadindex(self, docket=None):
510 578
511 579 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
512 580
513 581 if self.postfix is not None:
514 582 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
515 583 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
516 584 entry_point = b'%s.i.a' % self.radix
517 585 elif self._try_split and self.opener.exists(b'%s.i.s' % self.radix):
518 586 entry_point = b'%s.i.s' % self.radix
519 587 else:
520 588 entry_point = b'%s.i' % self.radix
521 589
522 590 if docket is not None:
523 591 self._docket = docket
524 592 self._docket_file = entry_point
525 593 else:
526 594 self._initempty = True
527 595 entry_data = self._get_data(entry_point, mmapindexthreshold)
528 596 if len(entry_data) > 0:
529 597 header = INDEX_HEADER.unpack(entry_data[:4])[0]
530 598 self._initempty = False
531 599 else:
532 600 header = new_header
533 601
534 602 self._format_flags = header & ~0xFFFF
535 603 self._format_version = header & 0xFFFF
536 604
537 605 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
538 606 if supported_flags is None:
539 607 msg = _(b'unknown version (%d) in revlog %s')
540 608 msg %= (self._format_version, self.display_id)
541 609 raise error.RevlogError(msg)
542 610 elif self._format_flags & ~supported_flags:
543 611 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
544 612 display_flag = self._format_flags >> 16
545 613 msg %= (display_flag, self._format_version, self.display_id)
546 614 raise error.RevlogError(msg)
547 615
548 616 features = FEATURES_BY_VERSION[self._format_version]
549 617 self._inline = features[b'inline'](self._format_flags)
550 618 self._generaldelta = features[b'generaldelta'](self._format_flags)
551 619 self.hassidedata = features[b'sidedata']
552 620
553 621 if not features[b'docket']:
554 622 self._indexfile = entry_point
555 623 index_data = entry_data
556 624 else:
557 625 self._docket_file = entry_point
558 626 if self._initempty:
559 627 self._docket = docketutil.default_docket(self, header)
560 628 else:
561 629 self._docket = docketutil.parse_docket(
562 630 self, entry_data, use_pending=self._trypending
563 631 )
564 632
565 633 if self._docket is not None:
566 634 self._indexfile = self._docket.index_filepath()
567 635 index_data = b''
568 636 index_size = self._docket.index_end
569 637 if index_size > 0:
570 638 index_data = self._get_data(
571 639 self._indexfile, mmapindexthreshold, size=index_size
572 640 )
573 641 if len(index_data) < index_size:
574 642 msg = _(b'too few index data for %s: got %d, expected %d')
575 643 msg %= (self.display_id, len(index_data), index_size)
576 644 raise error.RevlogError(msg)
577 645
578 646 self._inline = False
579 647 # generaldelta implied by version 2 revlogs.
580 648 self._generaldelta = True
581 649 # the logic for persistent nodemap will be dealt with within the
582 650 # main docket, so disable it for now.
583 651 self._nodemap_file = None
584 652
585 653 if self._docket is not None:
586 654 self._datafile = self._docket.data_filepath()
587 655 self._sidedatafile = self._docket.sidedata_filepath()
588 656 elif self.postfix is None:
589 657 self._datafile = b'%s.d' % self.radix
590 658 else:
591 659 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
592 660
593 661 self.nodeconstants = sha1nodeconstants
594 662 self.nullid = self.nodeconstants.nullid
595 663
596 664 # sparse-revlog can't be on without general-delta (issue6056)
597 665 if not self._generaldelta:
598 666 self._sparserevlog = False
599 667
600 668 self._storedeltachains = True
601 669
602 670 devel_nodemap = (
603 671 self._nodemap_file
604 672 and force_nodemap
605 673 and parse_index_v1_nodemap is not None
606 674 )
607 675
608 676 use_rust_index = False
609 677 if rustrevlog is not None:
610 678 if self._nodemap_file is not None:
611 679 use_rust_index = True
612 680 else:
613 681 use_rust_index = self.opener.options.get(b'rust.index')
614 682
615 683 self._parse_index = parse_index_v1
616 684 if self._format_version == REVLOGV0:
617 685 self._parse_index = revlogv0.parse_index_v0
618 686 elif self._format_version == REVLOGV2:
619 687 self._parse_index = parse_index_v2
620 688 elif self._format_version == CHANGELOGV2:
621 689 self._parse_index = parse_index_cl_v2
622 690 elif devel_nodemap:
623 691 self._parse_index = parse_index_v1_nodemap
624 692 elif use_rust_index:
625 693 self._parse_index = parse_index_v1_mixed
626 694 try:
627 695 d = self._parse_index(index_data, self._inline)
628 696 index, chunkcache = d
629 697 use_nodemap = (
630 698 not self._inline
631 699 and self._nodemap_file is not None
632 700 and util.safehasattr(index, 'update_nodemap_data')
633 701 )
634 702 if use_nodemap:
635 703 nodemap_data = nodemaputil.persisted_data(self)
636 704 if nodemap_data is not None:
637 705 docket = nodemap_data[0]
638 706 if (
639 707 len(d[0]) > docket.tip_rev
640 708 and d[0][docket.tip_rev][7] == docket.tip_node
641 709 ):
642 710 # no changelog tampering
643 711 self._nodemap_docket = docket
644 712 index.update_nodemap_data(*nodemap_data)
645 713 except (ValueError, IndexError):
646 714 raise error.RevlogError(
647 715 _(b"index %s is corrupted") % self.display_id
648 716 )
649 717 self.index = index
650 718 self._segmentfile = randomaccessfile.randomaccessfile(
651 719 self.opener,
652 720 (self._indexfile if self._inline else self._datafile),
653 721 self._chunkcachesize,
654 722 chunkcache,
655 723 )
656 724 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
657 725 self.opener,
658 726 self._sidedatafile,
659 727 self._chunkcachesize,
660 728 )
661 729 # revnum -> (chain-length, sum-delta-length)
662 730 self._chaininfocache = util.lrucachedict(500)
663 731 # revlog header -> revlog compressor
664 732 self._decompressors = {}
665 733
666 734 def get_revlog(self):
667 735 """simple function to mirror API of other not-really-revlog API"""
668 736 return self
669 737
670 738 @util.propertycache
671 739 def revlog_kind(self):
672 740 return self.target[0]
673 741
674 742 @util.propertycache
675 743 def display_id(self):
676 744 """The public facing "ID" of the revlog that we use in message"""
677 745 if self.revlog_kind == KIND_FILELOG:
678 746 # Reference the file without the "data/" prefix, so it is familiar
679 747 # to the user.
680 748 return self.target[1]
681 749 else:
682 750 return self.radix
683 751
684 752 def _get_decompressor(self, t):
685 753 try:
686 754 compressor = self._decompressors[t]
687 755 except KeyError:
688 756 try:
689 757 engine = util.compengines.forrevlogheader(t)
690 758 compressor = engine.revlogcompressor(self._compengineopts)
691 759 self._decompressors[t] = compressor
692 760 except KeyError:
693 761 raise error.RevlogError(
694 762 _(b'unknown compression type %s') % binascii.hexlify(t)
695 763 )
696 764 return compressor
697 765
698 766 @util.propertycache
699 767 def _compressor(self):
700 768 engine = util.compengines[self._compengine]
701 769 return engine.revlogcompressor(self._compengineopts)
702 770
703 771 @util.propertycache
704 772 def _decompressor(self):
705 773 """the default decompressor"""
706 774 if self._docket is None:
707 775 return None
708 776 t = self._docket.default_compression_header
709 777 c = self._get_decompressor(t)
710 778 return c.decompress
711 779
712 780 def _indexfp(self):
713 781 """file object for the revlog's index file"""
714 782 return self.opener(self._indexfile, mode=b"r")
715 783
716 784 def __index_write_fp(self):
717 785 # You should not use this directly and use `_writing` instead
718 786 try:
719 787 f = self.opener(
720 788 self._indexfile, mode=b"r+", checkambig=self._checkambig
721 789 )
722 790 if self._docket is None:
723 791 f.seek(0, os.SEEK_END)
724 792 else:
725 793 f.seek(self._docket.index_end, os.SEEK_SET)
726 794 return f
727 795 except FileNotFoundError:
728 796 return self.opener(
729 797 self._indexfile, mode=b"w+", checkambig=self._checkambig
730 798 )
731 799
732 800 def __index_new_fp(self):
733 801 # You should not use this unless you are upgrading from inline revlog
734 802 return self.opener(
735 803 self._indexfile,
736 804 mode=b"w",
737 805 checkambig=self._checkambig,
738 806 atomictemp=True,
739 807 )
740 808
741 809 def _datafp(self, mode=b'r'):
742 810 """file object for the revlog's data file"""
743 811 return self.opener(self._datafile, mode=mode)
744 812
745 813 @contextlib.contextmanager
746 814 def _sidedatareadfp(self):
747 815 """file object suitable to read sidedata"""
748 816 if self._writinghandles:
749 817 yield self._writinghandles[2]
750 818 else:
751 819 with self.opener(self._sidedatafile) as fp:
752 820 yield fp
753 821
754 822 def tiprev(self):
755 823 return len(self.index) - 1
756 824
757 825 def tip(self):
758 826 return self.node(self.tiprev())
759 827
760 828 def __contains__(self, rev):
761 829 return 0 <= rev < len(self)
762 830
763 831 def __len__(self):
764 832 return len(self.index)
765 833
766 834 def __iter__(self):
767 835 return iter(range(len(self)))
768 836
769 837 def revs(self, start=0, stop=None):
770 838 """iterate over all rev in this revlog (from start to stop)"""
771 839 return storageutil.iterrevs(len(self), start=start, stop=stop)
772 840
773 841 def hasnode(self, node):
774 842 try:
775 843 self.rev(node)
776 844 return True
777 845 except KeyError:
778 846 return False
779 847
780 848 def candelta(self, baserev, rev):
781 849 """whether two revisions (baserev, rev) can be delta-ed or not"""
782 850 # Disable delta if either rev requires a content-changing flag
783 851 # processor (ex. LFS). This is because such flag processor can alter
784 852 # the rawtext content that the delta will be based on, and two clients
785 853 # could have a same revlog node with different flags (i.e. different
786 854 # rawtext contents) and the delta could be incompatible.
787 855 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
788 856 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
789 857 ):
790 858 return False
791 859 return True
792 860
793 861 def update_caches(self, transaction):
794 862 if self._nodemap_file is not None:
795 863 if transaction is None:
796 864 nodemaputil.update_persistent_nodemap(self)
797 865 else:
798 866 nodemaputil.setup_persistent_nodemap(transaction, self)
799 867
800 868 def clearcaches(self):
801 869 self._revisioncache = None
802 870 self._chainbasecache.clear()
803 871 self._segmentfile.clear_cache()
804 872 self._segmentfile_sidedata.clear_cache()
805 873 self._pcache = {}
806 874 self._nodemap_docket = None
807 875 self.index.clearcaches()
808 876 # The python code is the one responsible for validating the docket, we
809 877 # end up having to refresh it here.
810 878 use_nodemap = (
811 879 not self._inline
812 880 and self._nodemap_file is not None
813 881 and util.safehasattr(self.index, 'update_nodemap_data')
814 882 )
815 883 if use_nodemap:
816 884 nodemap_data = nodemaputil.persisted_data(self)
817 885 if nodemap_data is not None:
818 886 self._nodemap_docket = nodemap_data[0]
819 887 self.index.update_nodemap_data(*nodemap_data)
820 888
821 889 def rev(self, node):
822 890 try:
823 891 return self.index.rev(node)
824 892 except TypeError:
825 893 raise
826 894 except error.RevlogError:
827 895 # parsers.c radix tree lookup failed
828 896 if (
829 897 node == self.nodeconstants.wdirid
830 898 or node in self.nodeconstants.wdirfilenodeids
831 899 ):
832 900 raise error.WdirUnsupported
833 901 raise error.LookupError(node, self.display_id, _(b'no node'))
834 902
835 903 # Accessors for index entries.
836 904
837 905 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
838 906 # are flags.
839 907 def start(self, rev):
840 908 return int(self.index[rev][0] >> 16)
841 909
842 910 def sidedata_cut_off(self, rev):
843 911 sd_cut_off = self.index[rev][8]
844 912 if sd_cut_off != 0:
845 913 return sd_cut_off
846 914 # This is some annoying dance, because entries without sidedata
847 915 # currently use 0 as their ofsset. (instead of previous-offset +
848 916 # previous-size)
849 917 #
850 918 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
851 919 # In the meantime, we need this.
852 920 while 0 <= rev:
853 921 e = self.index[rev]
854 922 if e[9] != 0:
855 923 return e[8] + e[9]
856 924 rev -= 1
857 925 return 0
858 926
859 927 def flags(self, rev):
860 928 return self.index[rev][0] & 0xFFFF
861 929
862 930 def length(self, rev):
863 931 return self.index[rev][1]
864 932
865 933 def sidedata_length(self, rev):
866 934 if not self.hassidedata:
867 935 return 0
868 936 return self.index[rev][9]
869 937
870 938 def rawsize(self, rev):
871 939 """return the length of the uncompressed text for a given revision"""
872 940 l = self.index[rev][2]
873 941 if l >= 0:
874 942 return l
875 943
876 944 t = self.rawdata(rev)
877 945 return len(t)
878 946
879 947 def size(self, rev):
880 948 """length of non-raw text (processed by a "read" flag processor)"""
881 949 # fast path: if no "read" flag processor could change the content,
882 950 # size is rawsize. note: ELLIPSIS is known to not change the content.
883 951 flags = self.flags(rev)
884 952 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
885 953 return self.rawsize(rev)
886 954
887 955 return len(self.revision(rev))
888 956
889 957 def fast_rank(self, rev):
890 958 """Return the rank of a revision if already known, or None otherwise.
891 959
892 960 The rank of a revision is the size of the sub-graph it defines as a
893 961 head. Equivalently, the rank of a revision `r` is the size of the set
894 962 `ancestors(r)`, `r` included.
895 963
896 964 This method returns the rank retrieved from the revlog in constant
897 965 time. It makes no attempt at computing unknown values for versions of
898 966 the revlog which do not persist the rank.
899 967 """
900 968 rank = self.index[rev][ENTRY_RANK]
901 969 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
902 970 return None
903 971 if rev == nullrev:
904 972 return 0 # convention
905 973 return rank
906 974
907 975 def chainbase(self, rev):
908 976 base = self._chainbasecache.get(rev)
909 977 if base is not None:
910 978 return base
911 979
912 980 index = self.index
913 981 iterrev = rev
914 982 base = index[iterrev][3]
915 983 while base != iterrev:
916 984 iterrev = base
917 985 base = index[iterrev][3]
918 986
919 987 self._chainbasecache[rev] = base
920 988 return base
921 989
922 990 def linkrev(self, rev):
923 991 return self.index[rev][4]
924 992
925 993 def parentrevs(self, rev):
926 994 try:
927 995 entry = self.index[rev]
928 996 except IndexError:
929 997 if rev == wdirrev:
930 998 raise error.WdirUnsupported
931 999 raise
932 1000
933 1001 if self.canonical_parent_order and entry[5] == nullrev:
934 1002 return entry[6], entry[5]
935 1003 else:
936 1004 return entry[5], entry[6]
937 1005
938 1006 # fast parentrevs(rev) where rev isn't filtered
939 1007 _uncheckedparentrevs = parentrevs
940 1008
941 1009 def node(self, rev):
942 1010 try:
943 1011 return self.index[rev][7]
944 1012 except IndexError:
945 1013 if rev == wdirrev:
946 1014 raise error.WdirUnsupported
947 1015 raise
948 1016
949 1017 # Derived from index values.
950 1018
951 1019 def end(self, rev):
952 1020 return self.start(rev) + self.length(rev)
953 1021
954 1022 def parents(self, node):
955 1023 i = self.index
956 1024 d = i[self.rev(node)]
957 1025 # inline node() to avoid function call overhead
958 1026 if self.canonical_parent_order and d[5] == self.nullid:
959 1027 return i[d[6]][7], i[d[5]][7]
960 1028 else:
961 1029 return i[d[5]][7], i[d[6]][7]
962 1030
963 1031 def chainlen(self, rev):
964 1032 return self._chaininfo(rev)[0]
965 1033
966 1034 def _chaininfo(self, rev):
967 1035 chaininfocache = self._chaininfocache
968 1036 if rev in chaininfocache:
969 1037 return chaininfocache[rev]
970 1038 index = self.index
971 1039 generaldelta = self._generaldelta
972 1040 iterrev = rev
973 1041 e = index[iterrev]
974 1042 clen = 0
975 1043 compresseddeltalen = 0
976 1044 while iterrev != e[3]:
977 1045 clen += 1
978 1046 compresseddeltalen += e[1]
979 1047 if generaldelta:
980 1048 iterrev = e[3]
981 1049 else:
982 1050 iterrev -= 1
983 1051 if iterrev in chaininfocache:
984 1052 t = chaininfocache[iterrev]
985 1053 clen += t[0]
986 1054 compresseddeltalen += t[1]
987 1055 break
988 1056 e = index[iterrev]
989 1057 else:
990 1058 # Add text length of base since decompressing that also takes
991 1059 # work. For cache hits the length is already included.
992 1060 compresseddeltalen += e[1]
993 1061 r = (clen, compresseddeltalen)
994 1062 chaininfocache[rev] = r
995 1063 return r
996 1064
997 1065 def _deltachain(self, rev, stoprev=None):
998 1066 """Obtain the delta chain for a revision.
999 1067
1000 1068 ``stoprev`` specifies a revision to stop at. If not specified, we
1001 1069 stop at the base of the chain.
1002 1070
1003 1071 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1004 1072 revs in ascending order and ``stopped`` is a bool indicating whether
1005 1073 ``stoprev`` was hit.
1006 1074 """
1007 1075 # Try C implementation.
1008 1076 try:
1009 1077 return self.index.deltachain(rev, stoprev, self._generaldelta)
1010 1078 except AttributeError:
1011 1079 pass
1012 1080
1013 1081 chain = []
1014 1082
1015 1083 # Alias to prevent attribute lookup in tight loop.
1016 1084 index = self.index
1017 1085 generaldelta = self._generaldelta
1018 1086
1019 1087 iterrev = rev
1020 1088 e = index[iterrev]
1021 1089 while iterrev != e[3] and iterrev != stoprev:
1022 1090 chain.append(iterrev)
1023 1091 if generaldelta:
1024 1092 iterrev = e[3]
1025 1093 else:
1026 1094 iterrev -= 1
1027 1095 e = index[iterrev]
1028 1096
1029 1097 if iterrev == stoprev:
1030 1098 stopped = True
1031 1099 else:
1032 1100 chain.append(iterrev)
1033 1101 stopped = False
1034 1102
1035 1103 chain.reverse()
1036 1104 return chain, stopped
1037 1105
1038 1106 def ancestors(self, revs, stoprev=0, inclusive=False):
1039 1107 """Generate the ancestors of 'revs' in reverse revision order.
1040 1108 Does not generate revs lower than stoprev.
1041 1109
1042 1110 See the documentation for ancestor.lazyancestors for more details."""
1043 1111
1044 1112 # first, make sure start revisions aren't filtered
1045 1113 revs = list(revs)
1046 1114 checkrev = self.node
1047 1115 for r in revs:
1048 1116 checkrev(r)
1049 1117 # and we're sure ancestors aren't filtered as well
1050 1118
1051 1119 if rustancestor is not None and self.index.rust_ext_compat:
1052 1120 lazyancestors = rustancestor.LazyAncestors
1053 1121 arg = self.index
1054 1122 else:
1055 1123 lazyancestors = ancestor.lazyancestors
1056 1124 arg = self._uncheckedparentrevs
1057 1125 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1058 1126
1059 1127 def descendants(self, revs):
1060 1128 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1061 1129
1062 1130 def findcommonmissing(self, common=None, heads=None):
1063 1131 """Return a tuple of the ancestors of common and the ancestors of heads
1064 1132 that are not ancestors of common. In revset terminology, we return the
1065 1133 tuple:
1066 1134
1067 1135 ::common, (::heads) - (::common)
1068 1136
1069 1137 The list is sorted by revision number, meaning it is
1070 1138 topologically sorted.
1071 1139
1072 1140 'heads' and 'common' are both lists of node IDs. If heads is
1073 1141 not supplied, uses all of the revlog's heads. If common is not
1074 1142 supplied, uses nullid."""
1075 1143 if common is None:
1076 1144 common = [self.nullid]
1077 1145 if heads is None:
1078 1146 heads = self.heads()
1079 1147
1080 1148 common = [self.rev(n) for n in common]
1081 1149 heads = [self.rev(n) for n in heads]
1082 1150
1083 1151 # we want the ancestors, but inclusive
1084 1152 class lazyset:
1085 1153 def __init__(self, lazyvalues):
1086 1154 self.addedvalues = set()
1087 1155 self.lazyvalues = lazyvalues
1088 1156
1089 1157 def __contains__(self, value):
1090 1158 return value in self.addedvalues or value in self.lazyvalues
1091 1159
1092 1160 def __iter__(self):
1093 1161 added = self.addedvalues
1094 1162 for r in added:
1095 1163 yield r
1096 1164 for r in self.lazyvalues:
1097 1165 if not r in added:
1098 1166 yield r
1099 1167
1100 1168 def add(self, value):
1101 1169 self.addedvalues.add(value)
1102 1170
1103 1171 def update(self, values):
1104 1172 self.addedvalues.update(values)
1105 1173
1106 1174 has = lazyset(self.ancestors(common))
1107 1175 has.add(nullrev)
1108 1176 has.update(common)
1109 1177
1110 1178 # take all ancestors from heads that aren't in has
1111 1179 missing = set()
1112 1180 visit = collections.deque(r for r in heads if r not in has)
1113 1181 while visit:
1114 1182 r = visit.popleft()
1115 1183 if r in missing:
1116 1184 continue
1117 1185 else:
1118 1186 missing.add(r)
1119 1187 for p in self.parentrevs(r):
1120 1188 if p not in has:
1121 1189 visit.append(p)
1122 1190 missing = list(missing)
1123 1191 missing.sort()
1124 1192 return has, [self.node(miss) for miss in missing]
1125 1193
1126 1194 def incrementalmissingrevs(self, common=None):
1127 1195 """Return an object that can be used to incrementally compute the
1128 1196 revision numbers of the ancestors of arbitrary sets that are not
1129 1197 ancestors of common. This is an ancestor.incrementalmissingancestors
1130 1198 object.
1131 1199
1132 1200 'common' is a list of revision numbers. If common is not supplied, uses
1133 1201 nullrev.
1134 1202 """
1135 1203 if common is None:
1136 1204 common = [nullrev]
1137 1205
1138 1206 if rustancestor is not None and self.index.rust_ext_compat:
1139 1207 return rustancestor.MissingAncestors(self.index, common)
1140 1208 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1141 1209
1142 1210 def findmissingrevs(self, common=None, heads=None):
1143 1211 """Return the revision numbers of the ancestors of heads that
1144 1212 are not ancestors of common.
1145 1213
1146 1214 More specifically, return a list of revision numbers corresponding to
1147 1215 nodes N such that every N satisfies the following constraints:
1148 1216
1149 1217 1. N is an ancestor of some node in 'heads'
1150 1218 2. N is not an ancestor of any node in 'common'
1151 1219
1152 1220 The list is sorted by revision number, meaning it is
1153 1221 topologically sorted.
1154 1222
1155 1223 'heads' and 'common' are both lists of revision numbers. If heads is
1156 1224 not supplied, uses all of the revlog's heads. If common is not
1157 1225 supplied, uses nullid."""
1158 1226 if common is None:
1159 1227 common = [nullrev]
1160 1228 if heads is None:
1161 1229 heads = self.headrevs()
1162 1230
1163 1231 inc = self.incrementalmissingrevs(common=common)
1164 1232 return inc.missingancestors(heads)
1165 1233
1166 1234 def findmissing(self, common=None, heads=None):
1167 1235 """Return the ancestors of heads that are not ancestors of common.
1168 1236
1169 1237 More specifically, return a list of nodes N such that every N
1170 1238 satisfies the following constraints:
1171 1239
1172 1240 1. N is an ancestor of some node in 'heads'
1173 1241 2. N is not an ancestor of any node in 'common'
1174 1242
1175 1243 The list is sorted by revision number, meaning it is
1176 1244 topologically sorted.
1177 1245
1178 1246 'heads' and 'common' are both lists of node IDs. If heads is
1179 1247 not supplied, uses all of the revlog's heads. If common is not
1180 1248 supplied, uses nullid."""
1181 1249 if common is None:
1182 1250 common = [self.nullid]
1183 1251 if heads is None:
1184 1252 heads = self.heads()
1185 1253
1186 1254 common = [self.rev(n) for n in common]
1187 1255 heads = [self.rev(n) for n in heads]
1188 1256
1189 1257 inc = self.incrementalmissingrevs(common=common)
1190 1258 return [self.node(r) for r in inc.missingancestors(heads)]
1191 1259
1192 1260 def nodesbetween(self, roots=None, heads=None):
1193 1261 """Return a topological path from 'roots' to 'heads'.
1194 1262
1195 1263 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1196 1264 topologically sorted list of all nodes N that satisfy both of
1197 1265 these constraints:
1198 1266
1199 1267 1. N is a descendant of some node in 'roots'
1200 1268 2. N is an ancestor of some node in 'heads'
1201 1269
1202 1270 Every node is considered to be both a descendant and an ancestor
1203 1271 of itself, so every reachable node in 'roots' and 'heads' will be
1204 1272 included in 'nodes'.
1205 1273
1206 1274 'outroots' is the list of reachable nodes in 'roots', i.e., the
1207 1275 subset of 'roots' that is returned in 'nodes'. Likewise,
1208 1276 'outheads' is the subset of 'heads' that is also in 'nodes'.
1209 1277
1210 1278 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1211 1279 unspecified, uses nullid as the only root. If 'heads' is
1212 1280 unspecified, uses list of all of the revlog's heads."""
1213 1281 nonodes = ([], [], [])
1214 1282 if roots is not None:
1215 1283 roots = list(roots)
1216 1284 if not roots:
1217 1285 return nonodes
1218 1286 lowestrev = min([self.rev(n) for n in roots])
1219 1287 else:
1220 1288 roots = [self.nullid] # Everybody's a descendant of nullid
1221 1289 lowestrev = nullrev
1222 1290 if (lowestrev == nullrev) and (heads is None):
1223 1291 # We want _all_ the nodes!
1224 1292 return (
1225 1293 [self.node(r) for r in self],
1226 1294 [self.nullid],
1227 1295 list(self.heads()),
1228 1296 )
1229 1297 if heads is None:
1230 1298 # All nodes are ancestors, so the latest ancestor is the last
1231 1299 # node.
1232 1300 highestrev = len(self) - 1
1233 1301 # Set ancestors to None to signal that every node is an ancestor.
1234 1302 ancestors = None
1235 1303 # Set heads to an empty dictionary for later discovery of heads
1236 1304 heads = {}
1237 1305 else:
1238 1306 heads = list(heads)
1239 1307 if not heads:
1240 1308 return nonodes
1241 1309 ancestors = set()
1242 1310 # Turn heads into a dictionary so we can remove 'fake' heads.
1243 1311 # Also, later we will be using it to filter out the heads we can't
1244 1312 # find from roots.
1245 1313 heads = dict.fromkeys(heads, False)
1246 1314 # Start at the top and keep marking parents until we're done.
1247 1315 nodestotag = set(heads)
1248 1316 # Remember where the top was so we can use it as a limit later.
1249 1317 highestrev = max([self.rev(n) for n in nodestotag])
1250 1318 while nodestotag:
1251 1319 # grab a node to tag
1252 1320 n = nodestotag.pop()
1253 1321 # Never tag nullid
1254 1322 if n == self.nullid:
1255 1323 continue
1256 1324 # A node's revision number represents its place in a
1257 1325 # topologically sorted list of nodes.
1258 1326 r = self.rev(n)
1259 1327 if r >= lowestrev:
1260 1328 if n not in ancestors:
1261 1329 # If we are possibly a descendant of one of the roots
1262 1330 # and we haven't already been marked as an ancestor
1263 1331 ancestors.add(n) # Mark as ancestor
1264 1332 # Add non-nullid parents to list of nodes to tag.
1265 1333 nodestotag.update(
1266 1334 [p for p in self.parents(n) if p != self.nullid]
1267 1335 )
1268 1336 elif n in heads: # We've seen it before, is it a fake head?
1269 1337 # So it is, real heads should not be the ancestors of
1270 1338 # any other heads.
1271 1339 heads.pop(n)
1272 1340 if not ancestors:
1273 1341 return nonodes
1274 1342 # Now that we have our set of ancestors, we want to remove any
1275 1343 # roots that are not ancestors.
1276 1344
1277 1345 # If one of the roots was nullid, everything is included anyway.
1278 1346 if lowestrev > nullrev:
1279 1347 # But, since we weren't, let's recompute the lowest rev to not
1280 1348 # include roots that aren't ancestors.
1281 1349
1282 1350 # Filter out roots that aren't ancestors of heads
1283 1351 roots = [root for root in roots if root in ancestors]
1284 1352 # Recompute the lowest revision
1285 1353 if roots:
1286 1354 lowestrev = min([self.rev(root) for root in roots])
1287 1355 else:
1288 1356 # No more roots? Return empty list
1289 1357 return nonodes
1290 1358 else:
1291 1359 # We are descending from nullid, and don't need to care about
1292 1360 # any other roots.
1293 1361 lowestrev = nullrev
1294 1362 roots = [self.nullid]
1295 1363 # Transform our roots list into a set.
1296 1364 descendants = set(roots)
1297 1365 # Also, keep the original roots so we can filter out roots that aren't
1298 1366 # 'real' roots (i.e. are descended from other roots).
1299 1367 roots = descendants.copy()
1300 1368 # Our topologically sorted list of output nodes.
1301 1369 orderedout = []
1302 1370 # Don't start at nullid since we don't want nullid in our output list,
1303 1371 # and if nullid shows up in descendants, empty parents will look like
1304 1372 # they're descendants.
1305 1373 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1306 1374 n = self.node(r)
1307 1375 isdescendant = False
1308 1376 if lowestrev == nullrev: # Everybody is a descendant of nullid
1309 1377 isdescendant = True
1310 1378 elif n in descendants:
1311 1379 # n is already a descendant
1312 1380 isdescendant = True
1313 1381 # This check only needs to be done here because all the roots
1314 1382 # will start being marked is descendants before the loop.
1315 1383 if n in roots:
1316 1384 # If n was a root, check if it's a 'real' root.
1317 1385 p = tuple(self.parents(n))
1318 1386 # If any of its parents are descendants, it's not a root.
1319 1387 if (p[0] in descendants) or (p[1] in descendants):
1320 1388 roots.remove(n)
1321 1389 else:
1322 1390 p = tuple(self.parents(n))
1323 1391 # A node is a descendant if either of its parents are
1324 1392 # descendants. (We seeded the dependents list with the roots
1325 1393 # up there, remember?)
1326 1394 if (p[0] in descendants) or (p[1] in descendants):
1327 1395 descendants.add(n)
1328 1396 isdescendant = True
1329 1397 if isdescendant and ((ancestors is None) or (n in ancestors)):
1330 1398 # Only include nodes that are both descendants and ancestors.
1331 1399 orderedout.append(n)
1332 1400 if (ancestors is not None) and (n in heads):
1333 1401 # We're trying to figure out which heads are reachable
1334 1402 # from roots.
1335 1403 # Mark this head as having been reached
1336 1404 heads[n] = True
1337 1405 elif ancestors is None:
1338 1406 # Otherwise, we're trying to discover the heads.
1339 1407 # Assume this is a head because if it isn't, the next step
1340 1408 # will eventually remove it.
1341 1409 heads[n] = True
1342 1410 # But, obviously its parents aren't.
1343 1411 for p in self.parents(n):
1344 1412 heads.pop(p, None)
1345 1413 heads = [head for head, flag in heads.items() if flag]
1346 1414 roots = list(roots)
1347 1415 assert orderedout
1348 1416 assert roots
1349 1417 assert heads
1350 1418 return (orderedout, roots, heads)
1351 1419
1352 1420 def headrevs(self, revs=None):
1353 1421 if revs is None:
1354 1422 try:
1355 1423 return self.index.headrevs()
1356 1424 except AttributeError:
1357 1425 return self._headrevs()
1358 1426 if rustdagop is not None and self.index.rust_ext_compat:
1359 1427 return rustdagop.headrevs(self.index, revs)
1360 1428 return dagop.headrevs(revs, self._uncheckedparentrevs)
1361 1429
1362 1430 def computephases(self, roots):
1363 1431 return self.index.computephasesmapsets(roots)
1364 1432
1365 1433 def _headrevs(self):
1366 1434 count = len(self)
1367 1435 if not count:
1368 1436 return [nullrev]
1369 1437 # we won't iter over filtered rev so nobody is a head at start
1370 1438 ishead = [0] * (count + 1)
1371 1439 index = self.index
1372 1440 for r in self:
1373 1441 ishead[r] = 1 # I may be an head
1374 1442 e = index[r]
1375 1443 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1376 1444 return [r for r, val in enumerate(ishead) if val]
1377 1445
1378 1446 def heads(self, start=None, stop=None):
1379 1447 """return the list of all nodes that have no children
1380 1448
1381 1449 if start is specified, only heads that are descendants of
1382 1450 start will be returned
1383 1451 if stop is specified, it will consider all the revs from stop
1384 1452 as if they had no children
1385 1453 """
1386 1454 if start is None and stop is None:
1387 1455 if not len(self):
1388 1456 return [self.nullid]
1389 1457 return [self.node(r) for r in self.headrevs()]
1390 1458
1391 1459 if start is None:
1392 1460 start = nullrev
1393 1461 else:
1394 1462 start = self.rev(start)
1395 1463
1396 1464 stoprevs = {self.rev(n) for n in stop or []}
1397 1465
1398 1466 revs = dagop.headrevssubset(
1399 1467 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1400 1468 )
1401 1469
1402 1470 return [self.node(rev) for rev in revs]
1403 1471
1404 1472 def children(self, node):
1405 1473 """find the children of a given node"""
1406 1474 c = []
1407 1475 p = self.rev(node)
1408 1476 for r in self.revs(start=p + 1):
1409 1477 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1410 1478 if prevs:
1411 1479 for pr in prevs:
1412 1480 if pr == p:
1413 1481 c.append(self.node(r))
1414 1482 elif p == nullrev:
1415 1483 c.append(self.node(r))
1416 1484 return c
1417 1485
1418 1486 def commonancestorsheads(self, a, b):
1419 1487 """calculate all the heads of the common ancestors of nodes a and b"""
1420 1488 a, b = self.rev(a), self.rev(b)
1421 1489 ancs = self._commonancestorsheads(a, b)
1422 1490 return pycompat.maplist(self.node, ancs)
1423 1491
1424 1492 def _commonancestorsheads(self, *revs):
1425 1493 """calculate all the heads of the common ancestors of revs"""
1426 1494 try:
1427 1495 ancs = self.index.commonancestorsheads(*revs)
1428 1496 except (AttributeError, OverflowError): # C implementation failed
1429 1497 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1430 1498 return ancs
1431 1499
1432 1500 def isancestor(self, a, b):
1433 1501 """return True if node a is an ancestor of node b
1434 1502
1435 1503 A revision is considered an ancestor of itself."""
1436 1504 a, b = self.rev(a), self.rev(b)
1437 1505 return self.isancestorrev(a, b)
1438 1506
1439 1507 def isancestorrev(self, a, b):
1440 1508 """return True if revision a is an ancestor of revision b
1441 1509
1442 1510 A revision is considered an ancestor of itself.
1443 1511
1444 1512 The implementation of this is trivial but the use of
1445 1513 reachableroots is not."""
1446 1514 if a == nullrev:
1447 1515 return True
1448 1516 elif a == b:
1449 1517 return True
1450 1518 elif a > b:
1451 1519 return False
1452 1520 return bool(self.reachableroots(a, [b], [a], includepath=False))
1453 1521
1454 1522 def reachableroots(self, minroot, heads, roots, includepath=False):
1455 1523 """return (heads(::(<roots> and <roots>::<heads>)))
1456 1524
1457 1525 If includepath is True, return (<roots>::<heads>)."""
1458 1526 try:
1459 1527 return self.index.reachableroots2(
1460 1528 minroot, heads, roots, includepath
1461 1529 )
1462 1530 except AttributeError:
1463 1531 return dagop._reachablerootspure(
1464 1532 self.parentrevs, minroot, roots, heads, includepath
1465 1533 )
1466 1534
1467 1535 def ancestor(self, a, b):
1468 1536 """calculate the "best" common ancestor of nodes a and b"""
1469 1537
1470 1538 a, b = self.rev(a), self.rev(b)
1471 1539 try:
1472 1540 ancs = self.index.ancestors(a, b)
1473 1541 except (AttributeError, OverflowError):
1474 1542 ancs = ancestor.ancestors(self.parentrevs, a, b)
1475 1543 if ancs:
1476 1544 # choose a consistent winner when there's a tie
1477 1545 return min(map(self.node, ancs))
1478 1546 return self.nullid
1479 1547
1480 1548 def _match(self, id):
1481 1549 if isinstance(id, int):
1482 1550 # rev
1483 1551 return self.node(id)
1484 1552 if len(id) == self.nodeconstants.nodelen:
1485 1553 # possibly a binary node
1486 1554 # odds of a binary node being all hex in ASCII are 1 in 10**25
1487 1555 try:
1488 1556 node = id
1489 1557 self.rev(node) # quick search the index
1490 1558 return node
1491 1559 except error.LookupError:
1492 1560 pass # may be partial hex id
1493 1561 try:
1494 1562 # str(rev)
1495 1563 rev = int(id)
1496 1564 if b"%d" % rev != id:
1497 1565 raise ValueError
1498 1566 if rev < 0:
1499 1567 rev = len(self) + rev
1500 1568 if rev < 0 or rev >= len(self):
1501 1569 raise ValueError
1502 1570 return self.node(rev)
1503 1571 except (ValueError, OverflowError):
1504 1572 pass
1505 1573 if len(id) == 2 * self.nodeconstants.nodelen:
1506 1574 try:
1507 1575 # a full hex nodeid?
1508 1576 node = bin(id)
1509 1577 self.rev(node)
1510 1578 return node
1511 1579 except (binascii.Error, error.LookupError):
1512 1580 pass
1513 1581
1514 1582 def _partialmatch(self, id):
1515 1583 # we don't care wdirfilenodeids as they should be always full hash
1516 1584 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1517 1585 ambiguous = False
1518 1586 try:
1519 1587 partial = self.index.partialmatch(id)
1520 1588 if partial and self.hasnode(partial):
1521 1589 if maybewdir:
1522 1590 # single 'ff...' match in radix tree, ambiguous with wdir
1523 1591 ambiguous = True
1524 1592 else:
1525 1593 return partial
1526 1594 elif maybewdir:
1527 1595 # no 'ff...' match in radix tree, wdir identified
1528 1596 raise error.WdirUnsupported
1529 1597 else:
1530 1598 return None
1531 1599 except error.RevlogError:
1532 1600 # parsers.c radix tree lookup gave multiple matches
1533 1601 # fast path: for unfiltered changelog, radix tree is accurate
1534 1602 if not getattr(self, 'filteredrevs', None):
1535 1603 ambiguous = True
1536 1604 # fall through to slow path that filters hidden revisions
1537 1605 except (AttributeError, ValueError):
1538 1606 # we are pure python, or key is not hex
1539 1607 pass
1540 1608 if ambiguous:
1541 1609 raise error.AmbiguousPrefixLookupError(
1542 1610 id, self.display_id, _(b'ambiguous identifier')
1543 1611 )
1544 1612
1545 1613 if id in self._pcache:
1546 1614 return self._pcache[id]
1547 1615
1548 1616 if len(id) <= 40:
1549 1617 # hex(node)[:...]
1550 1618 l = len(id) // 2 * 2 # grab an even number of digits
1551 1619 try:
1552 1620 # we're dropping the last digit, so let's check that it's hex,
1553 1621 # to avoid the expensive computation below if it's not
1554 1622 if len(id) % 2 > 0:
1555 1623 if not (id[-1] in hexdigits):
1556 1624 return None
1557 1625 prefix = bin(id[:l])
1558 1626 except binascii.Error:
1559 1627 pass
1560 1628 else:
1561 1629 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1562 1630 nl = [
1563 1631 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1564 1632 ]
1565 1633 if self.nodeconstants.nullhex.startswith(id):
1566 1634 nl.append(self.nullid)
1567 1635 if len(nl) > 0:
1568 1636 if len(nl) == 1 and not maybewdir:
1569 1637 self._pcache[id] = nl[0]
1570 1638 return nl[0]
1571 1639 raise error.AmbiguousPrefixLookupError(
1572 1640 id, self.display_id, _(b'ambiguous identifier')
1573 1641 )
1574 1642 if maybewdir:
1575 1643 raise error.WdirUnsupported
1576 1644 return None
1577 1645
1578 1646 def lookup(self, id):
1579 1647 """locate a node based on:
1580 1648 - revision number or str(revision number)
1581 1649 - nodeid or subset of hex nodeid
1582 1650 """
1583 1651 n = self._match(id)
1584 1652 if n is not None:
1585 1653 return n
1586 1654 n = self._partialmatch(id)
1587 1655 if n:
1588 1656 return n
1589 1657
1590 1658 raise error.LookupError(id, self.display_id, _(b'no match found'))
1591 1659
1592 1660 def shortest(self, node, minlength=1):
1593 1661 """Find the shortest unambiguous prefix that matches node."""
1594 1662
1595 1663 def isvalid(prefix):
1596 1664 try:
1597 1665 matchednode = self._partialmatch(prefix)
1598 1666 except error.AmbiguousPrefixLookupError:
1599 1667 return False
1600 1668 except error.WdirUnsupported:
1601 1669 # single 'ff...' match
1602 1670 return True
1603 1671 if matchednode is None:
1604 1672 raise error.LookupError(node, self.display_id, _(b'no node'))
1605 1673 return True
1606 1674
1607 1675 def maybewdir(prefix):
1608 1676 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1609 1677
1610 1678 hexnode = hex(node)
1611 1679
1612 1680 def disambiguate(hexnode, minlength):
1613 1681 """Disambiguate against wdirid."""
1614 1682 for length in range(minlength, len(hexnode) + 1):
1615 1683 prefix = hexnode[:length]
1616 1684 if not maybewdir(prefix):
1617 1685 return prefix
1618 1686
1619 1687 if not getattr(self, 'filteredrevs', None):
1620 1688 try:
1621 1689 length = max(self.index.shortest(node), minlength)
1622 1690 return disambiguate(hexnode, length)
1623 1691 except error.RevlogError:
1624 1692 if node != self.nodeconstants.wdirid:
1625 1693 raise error.LookupError(
1626 1694 node, self.display_id, _(b'no node')
1627 1695 )
1628 1696 except AttributeError:
1629 1697 # Fall through to pure code
1630 1698 pass
1631 1699
1632 1700 if node == self.nodeconstants.wdirid:
1633 1701 for length in range(minlength, len(hexnode) + 1):
1634 1702 prefix = hexnode[:length]
1635 1703 if isvalid(prefix):
1636 1704 return prefix
1637 1705
1638 1706 for length in range(minlength, len(hexnode) + 1):
1639 1707 prefix = hexnode[:length]
1640 1708 if isvalid(prefix):
1641 1709 return disambiguate(hexnode, length)
1642 1710
1643 1711 def cmp(self, node, text):
1644 1712 """compare text with a given file revision
1645 1713
1646 1714 returns True if text is different than what is stored.
1647 1715 """
1648 1716 p1, p2 = self.parents(node)
1649 1717 return storageutil.hashrevisionsha1(text, p1, p2) != node
1650 1718
1651 1719 def _getsegmentforrevs(self, startrev, endrev, df=None):
1652 1720 """Obtain a segment of raw data corresponding to a range of revisions.
1653 1721
1654 1722 Accepts the start and end revisions and an optional already-open
1655 1723 file handle to be used for reading. If the file handle is read, its
1656 1724 seek position will not be preserved.
1657 1725
1658 1726 Requests for data may be satisfied by a cache.
1659 1727
1660 1728 Returns a 2-tuple of (offset, data) for the requested range of
1661 1729 revisions. Offset is the integer offset from the beginning of the
1662 1730 revlog and data is a str or buffer of the raw byte data.
1663 1731
1664 1732 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1665 1733 to determine where each revision's data begins and ends.
1666 1734 """
1667 1735 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1668 1736 # (functions are expensive).
1669 1737 index = self.index
1670 1738 istart = index[startrev]
1671 1739 start = int(istart[0] >> 16)
1672 1740 if startrev == endrev:
1673 1741 end = start + istart[1]
1674 1742 else:
1675 1743 iend = index[endrev]
1676 1744 end = int(iend[0] >> 16) + iend[1]
1677 1745
1678 1746 if self._inline:
1679 1747 start += (startrev + 1) * self.index.entry_size
1680 1748 end += (endrev + 1) * self.index.entry_size
1681 1749 length = end - start
1682 1750
1683 1751 return start, self._segmentfile.read_chunk(start, length, df)
1684 1752
1685 1753 def _chunk(self, rev, df=None):
1686 1754 """Obtain a single decompressed chunk for a revision.
1687 1755
1688 1756 Accepts an integer revision and an optional already-open file handle
1689 1757 to be used for reading. If used, the seek position of the file will not
1690 1758 be preserved.
1691 1759
1692 1760 Returns a str holding uncompressed data for the requested revision.
1693 1761 """
1694 1762 compression_mode = self.index[rev][10]
1695 1763 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1696 1764 if compression_mode == COMP_MODE_PLAIN:
1697 1765 return data
1698 1766 elif compression_mode == COMP_MODE_DEFAULT:
1699 1767 return self._decompressor(data)
1700 1768 elif compression_mode == COMP_MODE_INLINE:
1701 1769 return self.decompress(data)
1702 1770 else:
1703 1771 msg = b'unknown compression mode %d'
1704 1772 msg %= compression_mode
1705 1773 raise error.RevlogError(msg)
1706 1774
1707 1775 def _chunks(self, revs, df=None, targetsize=None):
1708 1776 """Obtain decompressed chunks for the specified revisions.
1709 1777
1710 1778 Accepts an iterable of numeric revisions that are assumed to be in
1711 1779 ascending order. Also accepts an optional already-open file handle
1712 1780 to be used for reading. If used, the seek position of the file will
1713 1781 not be preserved.
1714 1782
1715 1783 This function is similar to calling ``self._chunk()`` multiple times,
1716 1784 but is faster.
1717 1785
1718 1786 Returns a list with decompressed data for each requested revision.
1719 1787 """
1720 1788 if not revs:
1721 1789 return []
1722 1790 start = self.start
1723 1791 length = self.length
1724 1792 inline = self._inline
1725 1793 iosize = self.index.entry_size
1726 1794 buffer = util.buffer
1727 1795
1728 1796 l = []
1729 1797 ladd = l.append
1730 1798
1731 1799 if not self._withsparseread:
1732 1800 slicedchunks = (revs,)
1733 1801 else:
1734 1802 slicedchunks = deltautil.slicechunk(
1735 1803 self, revs, targetsize=targetsize
1736 1804 )
1737 1805
1738 1806 for revschunk in slicedchunks:
1739 1807 firstrev = revschunk[0]
1740 1808 # Skip trailing revisions with empty diff
1741 1809 for lastrev in revschunk[::-1]:
1742 1810 if length(lastrev) != 0:
1743 1811 break
1744 1812
1745 1813 try:
1746 1814 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1747 1815 except OverflowError:
1748 1816 # issue4215 - we can't cache a run of chunks greater than
1749 1817 # 2G on Windows
1750 1818 return [self._chunk(rev, df=df) for rev in revschunk]
1751 1819
1752 1820 decomp = self.decompress
1753 1821 # self._decompressor might be None, but will not be used in that case
1754 1822 def_decomp = self._decompressor
1755 1823 for rev in revschunk:
1756 1824 chunkstart = start(rev)
1757 1825 if inline:
1758 1826 chunkstart += (rev + 1) * iosize
1759 1827 chunklength = length(rev)
1760 1828 comp_mode = self.index[rev][10]
1761 1829 c = buffer(data, chunkstart - offset, chunklength)
1762 1830 if comp_mode == COMP_MODE_PLAIN:
1763 1831 ladd(c)
1764 1832 elif comp_mode == COMP_MODE_INLINE:
1765 1833 ladd(decomp(c))
1766 1834 elif comp_mode == COMP_MODE_DEFAULT:
1767 1835 ladd(def_decomp(c))
1768 1836 else:
1769 1837 msg = b'unknown compression mode %d'
1770 1838 msg %= comp_mode
1771 1839 raise error.RevlogError(msg)
1772 1840
1773 1841 return l
1774 1842
1775 1843 def deltaparent(self, rev):
1776 1844 """return deltaparent of the given revision"""
1777 1845 base = self.index[rev][3]
1778 1846 if base == rev:
1779 1847 return nullrev
1780 1848 elif self._generaldelta:
1781 1849 return base
1782 1850 else:
1783 1851 return rev - 1
1784 1852
1785 1853 def issnapshot(self, rev):
1786 1854 """tells whether rev is a snapshot"""
1787 1855 if not self._sparserevlog:
1788 1856 return self.deltaparent(rev) == nullrev
1789 1857 elif util.safehasattr(self.index, 'issnapshot'):
1790 1858 # directly assign the method to cache the testing and access
1791 1859 self.issnapshot = self.index.issnapshot
1792 1860 return self.issnapshot(rev)
1793 1861 if rev == nullrev:
1794 1862 return True
1795 1863 entry = self.index[rev]
1796 1864 base = entry[3]
1797 1865 if base == rev:
1798 1866 return True
1799 1867 if base == nullrev:
1800 1868 return True
1801 1869 p1 = entry[5]
1802 1870 while self.length(p1) == 0:
1803 1871 b = self.deltaparent(p1)
1804 1872 if b == p1:
1805 1873 break
1806 1874 p1 = b
1807 1875 p2 = entry[6]
1808 1876 while self.length(p2) == 0:
1809 1877 b = self.deltaparent(p2)
1810 1878 if b == p2:
1811 1879 break
1812 1880 p2 = b
1813 1881 if base == p1 or base == p2:
1814 1882 return False
1815 1883 return self.issnapshot(base)
1816 1884
1817 1885 def snapshotdepth(self, rev):
1818 1886 """number of snapshot in the chain before this one"""
1819 1887 if not self.issnapshot(rev):
1820 1888 raise error.ProgrammingError(b'revision %d not a snapshot')
1821 1889 return len(self._deltachain(rev)[0]) - 1
1822 1890
1823 1891 def revdiff(self, rev1, rev2):
1824 1892 """return or calculate a delta between two revisions
1825 1893
1826 1894 The delta calculated is in binary form and is intended to be written to
1827 1895 revlog data directly. So this function needs raw revision data.
1828 1896 """
1829 1897 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1830 1898 return bytes(self._chunk(rev2))
1831 1899
1832 1900 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1833 1901
1834 1902 def revision(self, nodeorrev, _df=None):
1835 1903 """return an uncompressed revision of a given node or revision
1836 1904 number.
1837 1905
1838 1906 _df - an existing file handle to read from. (internal-only)
1839 1907 """
1840 1908 return self._revisiondata(nodeorrev, _df)
1841 1909
1842 1910 def sidedata(self, nodeorrev, _df=None):
1843 1911 """a map of extra data related to the changeset but not part of the hash
1844 1912
1845 1913 This function currently return a dictionary. However, more advanced
1846 1914 mapping object will likely be used in the future for a more
1847 1915 efficient/lazy code.
1848 1916 """
1849 1917 # deal with <nodeorrev> argument type
1850 1918 if isinstance(nodeorrev, int):
1851 1919 rev = nodeorrev
1852 1920 else:
1853 1921 rev = self.rev(nodeorrev)
1854 1922 return self._sidedata(rev)
1855 1923
1856 1924 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1857 1925 # deal with <nodeorrev> argument type
1858 1926 if isinstance(nodeorrev, int):
1859 1927 rev = nodeorrev
1860 1928 node = self.node(rev)
1861 1929 else:
1862 1930 node = nodeorrev
1863 1931 rev = None
1864 1932
1865 1933 # fast path the special `nullid` rev
1866 1934 if node == self.nullid:
1867 1935 return b""
1868 1936
1869 1937 # ``rawtext`` is the text as stored inside the revlog. Might be the
1870 1938 # revision or might need to be processed to retrieve the revision.
1871 1939 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1872 1940
1873 1941 if raw and validated:
1874 1942 # if we don't want to process the raw text and that raw
1875 1943 # text is cached, we can exit early.
1876 1944 return rawtext
1877 1945 if rev is None:
1878 1946 rev = self.rev(node)
1879 1947 # the revlog's flag for this revision
1880 1948 # (usually alter its state or content)
1881 1949 flags = self.flags(rev)
1882 1950
1883 1951 if validated and flags == REVIDX_DEFAULT_FLAGS:
1884 1952 # no extra flags set, no flag processor runs, text = rawtext
1885 1953 return rawtext
1886 1954
1887 1955 if raw:
1888 1956 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1889 1957 text = rawtext
1890 1958 else:
1891 1959 r = flagutil.processflagsread(self, rawtext, flags)
1892 1960 text, validatehash = r
1893 1961 if validatehash:
1894 1962 self.checkhash(text, node, rev=rev)
1895 1963 if not validated:
1896 1964 self._revisioncache = (node, rev, rawtext)
1897 1965
1898 1966 return text
1899 1967
1900 1968 def _rawtext(self, node, rev, _df=None):
1901 1969 """return the possibly unvalidated rawtext for a revision
1902 1970
1903 1971 returns (rev, rawtext, validated)
1904 1972 """
1905 1973
1906 1974 # revision in the cache (could be useful to apply delta)
1907 1975 cachedrev = None
1908 1976 # An intermediate text to apply deltas to
1909 1977 basetext = None
1910 1978
1911 1979 # Check if we have the entry in cache
1912 1980 # The cache entry looks like (node, rev, rawtext)
1913 1981 if self._revisioncache:
1914 1982 if self._revisioncache[0] == node:
1915 1983 return (rev, self._revisioncache[2], True)
1916 1984 cachedrev = self._revisioncache[1]
1917 1985
1918 1986 if rev is None:
1919 1987 rev = self.rev(node)
1920 1988
1921 1989 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1922 1990 if stopped:
1923 1991 basetext = self._revisioncache[2]
1924 1992
1925 1993 # drop cache to save memory, the caller is expected to
1926 1994 # update self._revisioncache after validating the text
1927 1995 self._revisioncache = None
1928 1996
1929 1997 targetsize = None
1930 1998 rawsize = self.index[rev][2]
1931 1999 if 0 <= rawsize:
1932 2000 targetsize = 4 * rawsize
1933 2001
1934 2002 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1935 2003 if basetext is None:
1936 2004 basetext = bytes(bins[0])
1937 2005 bins = bins[1:]
1938 2006
1939 2007 rawtext = mdiff.patches(basetext, bins)
1940 2008 del basetext # let us have a chance to free memory early
1941 2009 return (rev, rawtext, False)
1942 2010
1943 2011 def _sidedata(self, rev):
1944 2012 """Return the sidedata for a given revision number."""
1945 2013 index_entry = self.index[rev]
1946 2014 sidedata_offset = index_entry[8]
1947 2015 sidedata_size = index_entry[9]
1948 2016
1949 2017 if self._inline:
1950 2018 sidedata_offset += self.index.entry_size * (1 + rev)
1951 2019 if sidedata_size == 0:
1952 2020 return {}
1953 2021
1954 2022 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1955 2023 filename = self._sidedatafile
1956 2024 end = self._docket.sidedata_end
1957 2025 offset = sidedata_offset
1958 2026 length = sidedata_size
1959 2027 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1960 2028 raise error.RevlogError(m)
1961 2029
1962 2030 comp_segment = self._segmentfile_sidedata.read_chunk(
1963 2031 sidedata_offset, sidedata_size
1964 2032 )
1965 2033
1966 2034 comp = self.index[rev][11]
1967 2035 if comp == COMP_MODE_PLAIN:
1968 2036 segment = comp_segment
1969 2037 elif comp == COMP_MODE_DEFAULT:
1970 2038 segment = self._decompressor(comp_segment)
1971 2039 elif comp == COMP_MODE_INLINE:
1972 2040 segment = self.decompress(comp_segment)
1973 2041 else:
1974 2042 msg = b'unknown compression mode %d'
1975 2043 msg %= comp
1976 2044 raise error.RevlogError(msg)
1977 2045
1978 2046 sidedata = sidedatautil.deserialize_sidedata(segment)
1979 2047 return sidedata
1980 2048
1981 2049 def rawdata(self, nodeorrev, _df=None):
1982 2050 """return an uncompressed raw data of a given node or revision number.
1983 2051
1984 2052 _df - an existing file handle to read from. (internal-only)
1985 2053 """
1986 2054 return self._revisiondata(nodeorrev, _df, raw=True)
1987 2055
1988 2056 def hash(self, text, p1, p2):
1989 2057 """Compute a node hash.
1990 2058
1991 2059 Available as a function so that subclasses can replace the hash
1992 2060 as needed.
1993 2061 """
1994 2062 return storageutil.hashrevisionsha1(text, p1, p2)
1995 2063
1996 2064 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1997 2065 """Check node hash integrity.
1998 2066
1999 2067 Available as a function so that subclasses can extend hash mismatch
2000 2068 behaviors as needed.
2001 2069 """
2002 2070 try:
2003 2071 if p1 is None and p2 is None:
2004 2072 p1, p2 = self.parents(node)
2005 2073 if node != self.hash(text, p1, p2):
2006 2074 # Clear the revision cache on hash failure. The revision cache
2007 2075 # only stores the raw revision and clearing the cache does have
2008 2076 # the side-effect that we won't have a cache hit when the raw
2009 2077 # revision data is accessed. But this case should be rare and
2010 2078 # it is extra work to teach the cache about the hash
2011 2079 # verification state.
2012 2080 if self._revisioncache and self._revisioncache[0] == node:
2013 2081 self._revisioncache = None
2014 2082
2015 2083 revornode = rev
2016 2084 if revornode is None:
2017 2085 revornode = templatefilters.short(hex(node))
2018 2086 raise error.RevlogError(
2019 2087 _(b"integrity check failed on %s:%s")
2020 2088 % (self.display_id, pycompat.bytestr(revornode))
2021 2089 )
2022 2090 except error.RevlogError:
2023 2091 if self._censorable and storageutil.iscensoredtext(text):
2024 2092 raise error.CensoredNodeError(self.display_id, node, text)
2025 2093 raise
2026 2094
2027 2095 def _enforceinlinesize(self, tr, side_write=True):
2028 2096 """Check if the revlog is too big for inline and convert if so.
2029 2097
2030 2098 This should be called after revisions are added to the revlog. If the
2031 2099 revlog has grown too large to be an inline revlog, it will convert it
2032 2100 to use multiple index and data files.
2033 2101 """
2034 2102 tiprev = len(self) - 1
2035 2103 total_size = self.start(tiprev) + self.length(tiprev)
2036 2104 if not self._inline or total_size < _maxinline:
2037 2105 return
2038 2106
2039 2107 troffset = tr.findoffset(self._indexfile)
2040 2108 if troffset is None:
2041 2109 raise error.RevlogError(
2042 2110 _(b"%s not found in the transaction") % self._indexfile
2043 2111 )
2044 2112 if troffset:
2045 2113 tr.addbackup(self._indexfile, for_offset=True)
2046 2114 tr.add(self._datafile, 0)
2047 2115
2048 2116 existing_handles = False
2049 2117 if self._writinghandles is not None:
2050 2118 existing_handles = True
2051 2119 fp = self._writinghandles[0]
2052 2120 fp.flush()
2053 2121 fp.close()
2054 2122 # We can't use the cached file handle after close(). So prevent
2055 2123 # its usage.
2056 2124 self._writinghandles = None
2057 2125 self._segmentfile.writing_handle = None
2058 2126 # No need to deal with sidedata writing handle as it is only
2059 2127 # relevant with revlog-v2 which is never inline, not reaching
2060 2128 # this code
2061 2129 if side_write:
2062 2130 old_index_file_path = self._indexfile
2063 2131 new_index_file_path = self._indexfile + b'.s'
2064 2132 opener = self.opener
2065 2133 weak_self = weakref.ref(self)
2066 2134
2067 2135 # the "split" index replace the real index when the transaction is finalized
2068 2136 def finalize_callback(tr):
2069 2137 opener.rename(
2070 2138 new_index_file_path,
2071 2139 old_index_file_path,
2072 2140 checkambig=True,
2073 2141 )
2074 2142 maybe_self = weak_self()
2075 2143 if maybe_self is not None:
2076 2144 maybe_self._indexfile = old_index_file_path
2077 2145
2078 2146 def abort_callback(tr):
2079 2147 maybe_self = weak_self()
2080 2148 if maybe_self is not None:
2081 2149 maybe_self._indexfile = old_index_file_path
2082 2150
2083 2151 tr.registertmp(new_index_file_path)
2084 2152 if self.target[1] is not None:
2085 2153 callback_id = b'000-revlog-split-%d-%s' % self.target
2086 2154 else:
2087 2155 callback_id = b'000-revlog-split-%d' % self.target[0]
2088 2156 tr.addfinalize(callback_id, finalize_callback)
2089 2157 tr.addabort(callback_id, abort_callback)
2090 2158
2091 2159 new_dfh = self._datafp(b'w+')
2092 2160 new_dfh.truncate(0) # drop any potentially existing data
2093 2161 try:
2094 2162 with self._indexfp() as read_ifh:
2095 2163 for r in self:
2096 2164 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2097 2165 new_dfh.flush()
2098 2166
2099 2167 if side_write:
2100 2168 self._indexfile = new_index_file_path
2101 2169 with self.__index_new_fp() as fp:
2102 2170 self._format_flags &= ~FLAG_INLINE_DATA
2103 2171 self._inline = False
2104 2172 for i in self:
2105 2173 e = self.index.entry_binary(i)
2106 2174 if i == 0 and self._docket is None:
2107 2175 header = self._format_flags | self._format_version
2108 2176 header = self.index.pack_header(header)
2109 2177 e = header + e
2110 2178 fp.write(e)
2111 2179 if self._docket is not None:
2112 2180 self._docket.index_end = fp.tell()
2113 2181
2114 2182 # If we don't use side-write, the temp file replace the real
2115 2183 # index when we exit the context manager
2116 2184
2117 2185 nodemaputil.setup_persistent_nodemap(tr, self)
2118 2186 self._segmentfile = randomaccessfile.randomaccessfile(
2119 2187 self.opener,
2120 2188 self._datafile,
2121 2189 self._chunkcachesize,
2122 2190 )
2123 2191
2124 2192 if existing_handles:
2125 2193 # switched from inline to conventional reopen the index
2126 2194 ifh = self.__index_write_fp()
2127 2195 self._writinghandles = (ifh, new_dfh, None)
2128 2196 self._segmentfile.writing_handle = new_dfh
2129 2197 new_dfh = None
2130 2198 # No need to deal with sidedata writing handle as it is only
2131 2199 # relevant with revlog-v2 which is never inline, not reaching
2132 2200 # this code
2133 2201 finally:
2134 2202 if new_dfh is not None:
2135 2203 new_dfh.close()
2136 2204
2137 2205 def _nodeduplicatecallback(self, transaction, node):
2138 2206 """called when trying to add a node already stored."""
2139 2207
2140 2208 @contextlib.contextmanager
2141 2209 def reading(self):
2142 2210 """Context manager that keeps data and sidedata files open for reading"""
2143 2211 with self._segmentfile.reading():
2144 2212 with self._segmentfile_sidedata.reading():
2145 2213 yield
2146 2214
2147 2215 @contextlib.contextmanager
2148 2216 def _writing(self, transaction):
2149 2217 if self._trypending:
2150 2218 msg = b'try to write in a `trypending` revlog: %s'
2151 2219 msg %= self.display_id
2152 2220 raise error.ProgrammingError(msg)
2153 2221 if self._writinghandles is not None:
2154 2222 yield
2155 2223 else:
2156 2224 ifh = dfh = sdfh = None
2157 2225 try:
2158 2226 r = len(self)
2159 2227 # opening the data file.
2160 2228 dsize = 0
2161 2229 if r:
2162 2230 dsize = self.end(r - 1)
2163 2231 dfh = None
2164 2232 if not self._inline:
2165 2233 try:
2166 2234 dfh = self._datafp(b"r+")
2167 2235 if self._docket is None:
2168 2236 dfh.seek(0, os.SEEK_END)
2169 2237 else:
2170 2238 dfh.seek(self._docket.data_end, os.SEEK_SET)
2171 2239 except FileNotFoundError:
2172 2240 dfh = self._datafp(b"w+")
2173 2241 transaction.add(self._datafile, dsize)
2174 2242 if self._sidedatafile is not None:
2175 2243 # revlog-v2 does not inline, help Pytype
2176 2244 assert dfh is not None
2177 2245 try:
2178 2246 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2179 2247 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2180 2248 except FileNotFoundError:
2181 2249 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2182 2250 transaction.add(
2183 2251 self._sidedatafile, self._docket.sidedata_end
2184 2252 )
2185 2253
2186 2254 # opening the index file.
2187 2255 isize = r * self.index.entry_size
2188 2256 ifh = self.__index_write_fp()
2189 2257 if self._inline:
2190 2258 transaction.add(self._indexfile, dsize + isize)
2191 2259 else:
2192 2260 transaction.add(self._indexfile, isize)
2193 2261 # exposing all file handle for writing.
2194 2262 self._writinghandles = (ifh, dfh, sdfh)
2195 2263 self._segmentfile.writing_handle = ifh if self._inline else dfh
2196 2264 self._segmentfile_sidedata.writing_handle = sdfh
2197 2265 yield
2198 2266 if self._docket is not None:
2199 2267 self._write_docket(transaction)
2200 2268 finally:
2201 2269 self._writinghandles = None
2202 2270 self._segmentfile.writing_handle = None
2203 2271 self._segmentfile_sidedata.writing_handle = None
2204 2272 if dfh is not None:
2205 2273 dfh.close()
2206 2274 if sdfh is not None:
2207 2275 sdfh.close()
2208 2276 # closing the index file last to avoid exposing referent to
2209 2277 # potential unflushed data content.
2210 2278 if ifh is not None:
2211 2279 ifh.close()
2212 2280
2213 2281 def _write_docket(self, transaction):
2214 2282 """write the current docket on disk
2215 2283
2216 2284 Exist as a method to help changelog to implement transaction logic
2217 2285
2218 2286 We could also imagine using the same transaction logic for all revlog
2219 2287 since docket are cheap."""
2220 2288 self._docket.write(transaction)
2221 2289
2222 2290 def addrevision(
2223 2291 self,
2224 2292 text,
2225 2293 transaction,
2226 2294 link,
2227 2295 p1,
2228 2296 p2,
2229 2297 cachedelta=None,
2230 2298 node=None,
2231 2299 flags=REVIDX_DEFAULT_FLAGS,
2232 2300 deltacomputer=None,
2233 2301 sidedata=None,
2234 2302 ):
2235 2303 """add a revision to the log
2236 2304
2237 2305 text - the revision data to add
2238 2306 transaction - the transaction object used for rollback
2239 2307 link - the linkrev data to add
2240 2308 p1, p2 - the parent nodeids of the revision
2241 2309 cachedelta - an optional precomputed delta
2242 2310 node - nodeid of revision; typically node is not specified, and it is
2243 2311 computed by default as hash(text, p1, p2), however subclasses might
2244 2312 use different hashing method (and override checkhash() in such case)
2245 2313 flags - the known flags to set on the revision
2246 2314 deltacomputer - an optional deltacomputer instance shared between
2247 2315 multiple calls
2248 2316 """
2249 2317 if link == nullrev:
2250 2318 raise error.RevlogError(
2251 2319 _(b"attempted to add linkrev -1 to %s") % self.display_id
2252 2320 )
2253 2321
2254 2322 if sidedata is None:
2255 2323 sidedata = {}
2256 2324 elif sidedata and not self.hassidedata:
2257 2325 raise error.ProgrammingError(
2258 2326 _(b"trying to add sidedata to a revlog who don't support them")
2259 2327 )
2260 2328
2261 2329 if flags:
2262 2330 node = node or self.hash(text, p1, p2)
2263 2331
2264 2332 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2265 2333
2266 2334 # If the flag processor modifies the revision data, ignore any provided
2267 2335 # cachedelta.
2268 2336 if rawtext != text:
2269 2337 cachedelta = None
2270 2338
2271 2339 if len(rawtext) > _maxentrysize:
2272 2340 raise error.RevlogError(
2273 2341 _(
2274 2342 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2275 2343 )
2276 2344 % (self.display_id, len(rawtext))
2277 2345 )
2278 2346
2279 2347 node = node or self.hash(rawtext, p1, p2)
2280 2348 rev = self.index.get_rev(node)
2281 2349 if rev is not None:
2282 2350 return rev
2283 2351
2284 2352 if validatehash:
2285 2353 self.checkhash(rawtext, node, p1=p1, p2=p2)
2286 2354
2287 2355 return self.addrawrevision(
2288 2356 rawtext,
2289 2357 transaction,
2290 2358 link,
2291 2359 p1,
2292 2360 p2,
2293 2361 node,
2294 2362 flags,
2295 2363 cachedelta=cachedelta,
2296 2364 deltacomputer=deltacomputer,
2297 2365 sidedata=sidedata,
2298 2366 )
2299 2367
2300 2368 def addrawrevision(
2301 2369 self,
2302 2370 rawtext,
2303 2371 transaction,
2304 2372 link,
2305 2373 p1,
2306 2374 p2,
2307 2375 node,
2308 2376 flags,
2309 2377 cachedelta=None,
2310 2378 deltacomputer=None,
2311 2379 sidedata=None,
2312 2380 ):
2313 2381 """add a raw revision with known flags, node and parents
2314 2382 useful when reusing a revision not stored in this revlog (ex: received
2315 2383 over wire, or read from an external bundle).
2316 2384 """
2317 2385 with self._writing(transaction):
2318 2386 return self._addrevision(
2319 2387 node,
2320 2388 rawtext,
2321 2389 transaction,
2322 2390 link,
2323 2391 p1,
2324 2392 p2,
2325 2393 flags,
2326 2394 cachedelta,
2327 2395 deltacomputer=deltacomputer,
2328 2396 sidedata=sidedata,
2329 2397 )
2330 2398
2331 2399 def compress(self, data):
2332 2400 """Generate a possibly-compressed representation of data."""
2333 2401 if not data:
2334 2402 return b'', data
2335 2403
2336 2404 compressed = self._compressor.compress(data)
2337 2405
2338 2406 if compressed:
2339 2407 # The revlog compressor added the header in the returned data.
2340 2408 return b'', compressed
2341 2409
2342 2410 if data[0:1] == b'\0':
2343 2411 return b'', data
2344 2412 return b'u', data
2345 2413
2346 2414 def decompress(self, data):
2347 2415 """Decompress a revlog chunk.
2348 2416
2349 2417 The chunk is expected to begin with a header identifying the
2350 2418 format type so it can be routed to an appropriate decompressor.
2351 2419 """
2352 2420 if not data:
2353 2421 return data
2354 2422
2355 2423 # Revlogs are read much more frequently than they are written and many
2356 2424 # chunks only take microseconds to decompress, so performance is
2357 2425 # important here.
2358 2426 #
2359 2427 # We can make a few assumptions about revlogs:
2360 2428 #
2361 2429 # 1) the majority of chunks will be compressed (as opposed to inline
2362 2430 # raw data).
2363 2431 # 2) decompressing *any* data will likely by at least 10x slower than
2364 2432 # returning raw inline data.
2365 2433 # 3) we want to prioritize common and officially supported compression
2366 2434 # engines
2367 2435 #
2368 2436 # It follows that we want to optimize for "decompress compressed data
2369 2437 # when encoded with common and officially supported compression engines"
2370 2438 # case over "raw data" and "data encoded by less common or non-official
2371 2439 # compression engines." That is why we have the inline lookup first
2372 2440 # followed by the compengines lookup.
2373 2441 #
2374 2442 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2375 2443 # compressed chunks. And this matters for changelog and manifest reads.
2376 2444 t = data[0:1]
2377 2445
2378 2446 if t == b'x':
2379 2447 try:
2380 2448 return _zlibdecompress(data)
2381 2449 except zlib.error as e:
2382 2450 raise error.RevlogError(
2383 2451 _(b'revlog decompress error: %s')
2384 2452 % stringutil.forcebytestr(e)
2385 2453 )
2386 2454 # '\0' is more common than 'u' so it goes first.
2387 2455 elif t == b'\0':
2388 2456 return data
2389 2457 elif t == b'u':
2390 2458 return util.buffer(data, 1)
2391 2459
2392 2460 compressor = self._get_decompressor(t)
2393 2461
2394 2462 return compressor.decompress(data)
2395 2463
2396 2464 def _addrevision(
2397 2465 self,
2398 2466 node,
2399 2467 rawtext,
2400 2468 transaction,
2401 2469 link,
2402 2470 p1,
2403 2471 p2,
2404 2472 flags,
2405 2473 cachedelta,
2406 2474 alwayscache=False,
2407 2475 deltacomputer=None,
2408 2476 sidedata=None,
2409 2477 ):
2410 2478 """internal function to add revisions to the log
2411 2479
2412 2480 see addrevision for argument descriptions.
2413 2481
2414 2482 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2415 2483
2416 2484 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2417 2485 be used.
2418 2486
2419 2487 invariants:
2420 2488 - rawtext is optional (can be None); if not set, cachedelta must be set.
2421 2489 if both are set, they must correspond to each other.
2422 2490 """
2423 2491 if node == self.nullid:
2424 2492 raise error.RevlogError(
2425 2493 _(b"%s: attempt to add null revision") % self.display_id
2426 2494 )
2427 2495 if (
2428 2496 node == self.nodeconstants.wdirid
2429 2497 or node in self.nodeconstants.wdirfilenodeids
2430 2498 ):
2431 2499 raise error.RevlogError(
2432 2500 _(b"%s: attempt to add wdir revision") % self.display_id
2433 2501 )
2434 2502 if self._writinghandles is None:
2435 2503 msg = b'adding revision outside `revlog._writing` context'
2436 2504 raise error.ProgrammingError(msg)
2437 2505
2438 2506 if self._inline:
2439 2507 fh = self._writinghandles[0]
2440 2508 else:
2441 2509 fh = self._writinghandles[1]
2442 2510
2443 2511 btext = [rawtext]
2444 2512
2445 2513 curr = len(self)
2446 2514 prev = curr - 1
2447 2515
2448 2516 offset = self._get_data_offset(prev)
2449 2517
2450 2518 if self._concurrencychecker:
2451 2519 ifh, dfh, sdfh = self._writinghandles
2452 2520 # XXX no checking for the sidedata file
2453 2521 if self._inline:
2454 2522 # offset is "as if" it were in the .d file, so we need to add on
2455 2523 # the size of the entry metadata.
2456 2524 self._concurrencychecker(
2457 2525 ifh, self._indexfile, offset + curr * self.index.entry_size
2458 2526 )
2459 2527 else:
2460 2528 # Entries in the .i are a consistent size.
2461 2529 self._concurrencychecker(
2462 2530 ifh, self._indexfile, curr * self.index.entry_size
2463 2531 )
2464 2532 self._concurrencychecker(dfh, self._datafile, offset)
2465 2533
2466 2534 p1r, p2r = self.rev(p1), self.rev(p2)
2467 2535
2468 2536 # full versions are inserted when the needed deltas
2469 2537 # become comparable to the uncompressed text
2470 2538 if rawtext is None:
2471 2539 # need rawtext size, before changed by flag processors, which is
2472 2540 # the non-raw size. use revlog explicitly to avoid filelog's extra
2473 2541 # logic that might remove metadata size.
2474 2542 textlen = mdiff.patchedsize(
2475 2543 revlog.size(self, cachedelta[0]), cachedelta[1]
2476 2544 )
2477 2545 else:
2478 2546 textlen = len(rawtext)
2479 2547
2480 2548 if deltacomputer is None:
2481 2549 write_debug = None
2482 2550 if self._debug_delta:
2483 2551 write_debug = transaction._report
2484 2552 deltacomputer = deltautil.deltacomputer(
2485 2553 self, write_debug=write_debug
2486 2554 )
2487 2555
2488 2556 if cachedelta is not None and len(cachedelta) == 2:
2489 2557 # If the cached delta has no information about how it should be
2490 2558 # reused, add the default reuse instruction according to the
2491 2559 # revlog's configuration.
2492 2560 if self._generaldelta and self._lazydeltabase:
2493 2561 delta_base_reuse = DELTA_BASE_REUSE_TRY
2494 2562 else:
2495 2563 delta_base_reuse = DELTA_BASE_REUSE_NO
2496 2564 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2497 2565
2498 2566 revinfo = revlogutils.revisioninfo(
2499 2567 node,
2500 2568 p1,
2501 2569 p2,
2502 2570 btext,
2503 2571 textlen,
2504 2572 cachedelta,
2505 2573 flags,
2506 2574 )
2507 2575
2508 2576 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2509 2577
2510 2578 compression_mode = COMP_MODE_INLINE
2511 2579 if self._docket is not None:
2512 2580 default_comp = self._docket.default_compression_header
2513 2581 r = deltautil.delta_compression(default_comp, deltainfo)
2514 2582 compression_mode, deltainfo = r
2515 2583
2516 2584 sidedata_compression_mode = COMP_MODE_INLINE
2517 2585 if sidedata and self.hassidedata:
2518 2586 sidedata_compression_mode = COMP_MODE_PLAIN
2519 2587 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2520 2588 sidedata_offset = self._docket.sidedata_end
2521 2589 h, comp_sidedata = self.compress(serialized_sidedata)
2522 2590 if (
2523 2591 h != b'u'
2524 2592 and comp_sidedata[0:1] != b'\0'
2525 2593 and len(comp_sidedata) < len(serialized_sidedata)
2526 2594 ):
2527 2595 assert not h
2528 2596 if (
2529 2597 comp_sidedata[0:1]
2530 2598 == self._docket.default_compression_header
2531 2599 ):
2532 2600 sidedata_compression_mode = COMP_MODE_DEFAULT
2533 2601 serialized_sidedata = comp_sidedata
2534 2602 else:
2535 2603 sidedata_compression_mode = COMP_MODE_INLINE
2536 2604 serialized_sidedata = comp_sidedata
2537 2605 else:
2538 2606 serialized_sidedata = b""
2539 2607 # Don't store the offset if the sidedata is empty, that way
2540 2608 # we can easily detect empty sidedata and they will be no different
2541 2609 # than ones we manually add.
2542 2610 sidedata_offset = 0
2543 2611
2544 2612 rank = RANK_UNKNOWN
2545 2613 if self._compute_rank:
2546 2614 if (p1r, p2r) == (nullrev, nullrev):
2547 2615 rank = 1
2548 2616 elif p1r != nullrev and p2r == nullrev:
2549 2617 rank = 1 + self.fast_rank(p1r)
2550 2618 elif p1r == nullrev and p2r != nullrev:
2551 2619 rank = 1 + self.fast_rank(p2r)
2552 2620 else: # merge node
2553 2621 if rustdagop is not None and self.index.rust_ext_compat:
2554 2622 rank = rustdagop.rank(self.index, p1r, p2r)
2555 2623 else:
2556 2624 pmin, pmax = sorted((p1r, p2r))
2557 2625 rank = 1 + self.fast_rank(pmax)
2558 2626 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2559 2627
2560 2628 e = revlogutils.entry(
2561 2629 flags=flags,
2562 2630 data_offset=offset,
2563 2631 data_compressed_length=deltainfo.deltalen,
2564 2632 data_uncompressed_length=textlen,
2565 2633 data_compression_mode=compression_mode,
2566 2634 data_delta_base=deltainfo.base,
2567 2635 link_rev=link,
2568 2636 parent_rev_1=p1r,
2569 2637 parent_rev_2=p2r,
2570 2638 node_id=node,
2571 2639 sidedata_offset=sidedata_offset,
2572 2640 sidedata_compressed_length=len(serialized_sidedata),
2573 2641 sidedata_compression_mode=sidedata_compression_mode,
2574 2642 rank=rank,
2575 2643 )
2576 2644
2577 2645 self.index.append(e)
2578 2646 entry = self.index.entry_binary(curr)
2579 2647 if curr == 0 and self._docket is None:
2580 2648 header = self._format_flags | self._format_version
2581 2649 header = self.index.pack_header(header)
2582 2650 entry = header + entry
2583 2651 self._writeentry(
2584 2652 transaction,
2585 2653 entry,
2586 2654 deltainfo.data,
2587 2655 link,
2588 2656 offset,
2589 2657 serialized_sidedata,
2590 2658 sidedata_offset,
2591 2659 )
2592 2660
2593 2661 rawtext = btext[0]
2594 2662
2595 2663 if alwayscache and rawtext is None:
2596 2664 rawtext = deltacomputer.buildtext(revinfo, fh)
2597 2665
2598 2666 if type(rawtext) == bytes: # only accept immutable objects
2599 2667 self._revisioncache = (node, curr, rawtext)
2600 2668 self._chainbasecache[curr] = deltainfo.chainbase
2601 2669 return curr
2602 2670
2603 2671 def _get_data_offset(self, prev):
2604 2672 """Returns the current offset in the (in-transaction) data file.
2605 2673 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2606 2674 file to store that information: since sidedata can be rewritten to the
2607 2675 end of the data file within a transaction, you can have cases where, for
2608 2676 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2609 2677 to `n - 1`'s sidedata being written after `n`'s data.
2610 2678
2611 2679 TODO cache this in a docket file before getting out of experimental."""
2612 2680 if self._docket is None:
2613 2681 return self.end(prev)
2614 2682 else:
2615 2683 return self._docket.data_end
2616 2684
2617 2685 def _writeentry(
2618 2686 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2619 2687 ):
2620 2688 # Files opened in a+ mode have inconsistent behavior on various
2621 2689 # platforms. Windows requires that a file positioning call be made
2622 2690 # when the file handle transitions between reads and writes. See
2623 2691 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2624 2692 # platforms, Python or the platform itself can be buggy. Some versions
2625 2693 # of Solaris have been observed to not append at the end of the file
2626 2694 # if the file was seeked to before the end. See issue4943 for more.
2627 2695 #
2628 2696 # We work around this issue by inserting a seek() before writing.
2629 2697 # Note: This is likely not necessary on Python 3. However, because
2630 2698 # the file handle is reused for reads and may be seeked there, we need
2631 2699 # to be careful before changing this.
2632 2700 if self._writinghandles is None:
2633 2701 msg = b'adding revision outside `revlog._writing` context'
2634 2702 raise error.ProgrammingError(msg)
2635 2703 ifh, dfh, sdfh = self._writinghandles
2636 2704 if self._docket is None:
2637 2705 ifh.seek(0, os.SEEK_END)
2638 2706 else:
2639 2707 ifh.seek(self._docket.index_end, os.SEEK_SET)
2640 2708 if dfh:
2641 2709 if self._docket is None:
2642 2710 dfh.seek(0, os.SEEK_END)
2643 2711 else:
2644 2712 dfh.seek(self._docket.data_end, os.SEEK_SET)
2645 2713 if sdfh:
2646 2714 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2647 2715
2648 2716 curr = len(self) - 1
2649 2717 if not self._inline:
2650 2718 transaction.add(self._datafile, offset)
2651 2719 if self._sidedatafile:
2652 2720 transaction.add(self._sidedatafile, sidedata_offset)
2653 2721 transaction.add(self._indexfile, curr * len(entry))
2654 2722 if data[0]:
2655 2723 dfh.write(data[0])
2656 2724 dfh.write(data[1])
2657 2725 if sidedata:
2658 2726 sdfh.write(sidedata)
2659 2727 ifh.write(entry)
2660 2728 else:
2661 2729 offset += curr * self.index.entry_size
2662 2730 transaction.add(self._indexfile, offset)
2663 2731 ifh.write(entry)
2664 2732 ifh.write(data[0])
2665 2733 ifh.write(data[1])
2666 2734 assert not sidedata
2667 2735 self._enforceinlinesize(transaction)
2668 2736 if self._docket is not None:
2669 2737 # revlog-v2 always has 3 writing handles, help Pytype
2670 2738 wh1 = self._writinghandles[0]
2671 2739 wh2 = self._writinghandles[1]
2672 2740 wh3 = self._writinghandles[2]
2673 2741 assert wh1 is not None
2674 2742 assert wh2 is not None
2675 2743 assert wh3 is not None
2676 2744 self._docket.index_end = wh1.tell()
2677 2745 self._docket.data_end = wh2.tell()
2678 2746 self._docket.sidedata_end = wh3.tell()
2679 2747
2680 2748 nodemaputil.setup_persistent_nodemap(transaction, self)
2681 2749
2682 2750 def addgroup(
2683 2751 self,
2684 2752 deltas,
2685 2753 linkmapper,
2686 2754 transaction,
2687 2755 alwayscache=False,
2688 2756 addrevisioncb=None,
2689 2757 duplicaterevisioncb=None,
2690 2758 debug_info=None,
2691 2759 delta_base_reuse_policy=None,
2692 2760 ):
2693 2761 """
2694 2762 add a delta group
2695 2763
2696 2764 given a set of deltas, add them to the revision log. the
2697 2765 first delta is against its parent, which should be in our
2698 2766 log, the rest are against the previous delta.
2699 2767
2700 2768 If ``addrevisioncb`` is defined, it will be called with arguments of
2701 2769 this revlog and the node that was added.
2702 2770 """
2703 2771
2704 2772 if self._adding_group:
2705 2773 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2706 2774
2707 2775 # read the default delta-base reuse policy from revlog config if the
2708 2776 # group did not specify one.
2709 2777 if delta_base_reuse_policy is None:
2710 2778 if self._generaldelta and self._lazydeltabase:
2711 2779 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2712 2780 else:
2713 2781 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2714 2782
2715 2783 self._adding_group = True
2716 2784 empty = True
2717 2785 try:
2718 2786 with self._writing(transaction):
2719 2787 write_debug = None
2720 2788 if self._debug_delta:
2721 2789 write_debug = transaction._report
2722 2790 deltacomputer = deltautil.deltacomputer(
2723 2791 self,
2724 2792 write_debug=write_debug,
2725 2793 debug_info=debug_info,
2726 2794 )
2727 2795 # loop through our set of deltas
2728 2796 for data in deltas:
2729 2797 (
2730 2798 node,
2731 2799 p1,
2732 2800 p2,
2733 2801 linknode,
2734 2802 deltabase,
2735 2803 delta,
2736 2804 flags,
2737 2805 sidedata,
2738 2806 ) = data
2739 2807 link = linkmapper(linknode)
2740 2808 flags = flags or REVIDX_DEFAULT_FLAGS
2741 2809
2742 2810 rev = self.index.get_rev(node)
2743 2811 if rev is not None:
2744 2812 # this can happen if two branches make the same change
2745 2813 self._nodeduplicatecallback(transaction, rev)
2746 2814 if duplicaterevisioncb:
2747 2815 duplicaterevisioncb(self, rev)
2748 2816 empty = False
2749 2817 continue
2750 2818
2751 2819 for p in (p1, p2):
2752 2820 if not self.index.has_node(p):
2753 2821 raise error.LookupError(
2754 2822 p, self.radix, _(b'unknown parent')
2755 2823 )
2756 2824
2757 2825 if not self.index.has_node(deltabase):
2758 2826 raise error.LookupError(
2759 2827 deltabase, self.display_id, _(b'unknown delta base')
2760 2828 )
2761 2829
2762 2830 baserev = self.rev(deltabase)
2763 2831
2764 2832 if baserev != nullrev and self.iscensored(baserev):
2765 2833 # if base is censored, delta must be full replacement in a
2766 2834 # single patch operation
2767 2835 hlen = struct.calcsize(b">lll")
2768 2836 oldlen = self.rawsize(baserev)
2769 2837 newlen = len(delta) - hlen
2770 2838 if delta[:hlen] != mdiff.replacediffheader(
2771 2839 oldlen, newlen
2772 2840 ):
2773 2841 raise error.CensoredBaseError(
2774 2842 self.display_id, self.node(baserev)
2775 2843 )
2776 2844
2777 2845 if not flags and self._peek_iscensored(baserev, delta):
2778 2846 flags |= REVIDX_ISCENSORED
2779 2847
2780 2848 # We assume consumers of addrevisioncb will want to retrieve
2781 2849 # the added revision, which will require a call to
2782 2850 # revision(). revision() will fast path if there is a cache
2783 2851 # hit. So, we tell _addrevision() to always cache in this case.
2784 2852 # We're only using addgroup() in the context of changegroup
2785 2853 # generation so the revision data can always be handled as raw
2786 2854 # by the flagprocessor.
2787 2855 rev = self._addrevision(
2788 2856 node,
2789 2857 None,
2790 2858 transaction,
2791 2859 link,
2792 2860 p1,
2793 2861 p2,
2794 2862 flags,
2795 2863 (baserev, delta, delta_base_reuse_policy),
2796 2864 alwayscache=alwayscache,
2797 2865 deltacomputer=deltacomputer,
2798 2866 sidedata=sidedata,
2799 2867 )
2800 2868
2801 2869 if addrevisioncb:
2802 2870 addrevisioncb(self, rev)
2803 2871 empty = False
2804 2872 finally:
2805 2873 self._adding_group = False
2806 2874 return not empty
2807 2875
2808 2876 def iscensored(self, rev):
2809 2877 """Check if a file revision is censored."""
2810 2878 if not self._censorable:
2811 2879 return False
2812 2880
2813 2881 return self.flags(rev) & REVIDX_ISCENSORED
2814 2882
2815 2883 def _peek_iscensored(self, baserev, delta):
2816 2884 """Quickly check if a delta produces a censored revision."""
2817 2885 if not self._censorable:
2818 2886 return False
2819 2887
2820 2888 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2821 2889
2822 2890 def getstrippoint(self, minlink):
2823 2891 """find the minimum rev that must be stripped to strip the linkrev
2824 2892
2825 2893 Returns a tuple containing the minimum rev and a set of all revs that
2826 2894 have linkrevs that will be broken by this strip.
2827 2895 """
2828 2896 return storageutil.resolvestripinfo(
2829 2897 minlink,
2830 2898 len(self) - 1,
2831 2899 self.headrevs(),
2832 2900 self.linkrev,
2833 2901 self.parentrevs,
2834 2902 )
2835 2903
2836 2904 def strip(self, minlink, transaction):
2837 2905 """truncate the revlog on the first revision with a linkrev >= minlink
2838 2906
2839 2907 This function is called when we're stripping revision minlink and
2840 2908 its descendants from the repository.
2841 2909
2842 2910 We have to remove all revisions with linkrev >= minlink, because
2843 2911 the equivalent changelog revisions will be renumbered after the
2844 2912 strip.
2845 2913
2846 2914 So we truncate the revlog on the first of these revisions, and
2847 2915 trust that the caller has saved the revisions that shouldn't be
2848 2916 removed and that it'll re-add them after this truncation.
2849 2917 """
2850 2918 if len(self) == 0:
2851 2919 return
2852 2920
2853 2921 rev, _ = self.getstrippoint(minlink)
2854 2922 if rev == len(self):
2855 2923 return
2856 2924
2857 2925 # first truncate the files on disk
2858 2926 data_end = self.start(rev)
2859 2927 if not self._inline:
2860 2928 transaction.add(self._datafile, data_end)
2861 2929 end = rev * self.index.entry_size
2862 2930 else:
2863 2931 end = data_end + (rev * self.index.entry_size)
2864 2932
2865 2933 if self._sidedatafile:
2866 2934 sidedata_end = self.sidedata_cut_off(rev)
2867 2935 transaction.add(self._sidedatafile, sidedata_end)
2868 2936
2869 2937 transaction.add(self._indexfile, end)
2870 2938 if self._docket is not None:
2871 2939 # XXX we could, leverage the docket while stripping. However it is
2872 2940 # not powerfull enough at the time of this comment
2873 2941 self._docket.index_end = end
2874 2942 self._docket.data_end = data_end
2875 2943 self._docket.sidedata_end = sidedata_end
2876 2944 self._docket.write(transaction, stripping=True)
2877 2945
2878 2946 # then reset internal state in memory to forget those revisions
2879 2947 self._revisioncache = None
2880 2948 self._chaininfocache = util.lrucachedict(500)
2881 2949 self._segmentfile.clear_cache()
2882 2950 self._segmentfile_sidedata.clear_cache()
2883 2951
2884 2952 del self.index[rev:-1]
2885 2953
2886 2954 def checksize(self):
2887 2955 """Check size of index and data files
2888 2956
2889 2957 return a (dd, di) tuple.
2890 2958 - dd: extra bytes for the "data" file
2891 2959 - di: extra bytes for the "index" file
2892 2960
2893 2961 A healthy revlog will return (0, 0).
2894 2962 """
2895 2963 expected = 0
2896 2964 if len(self):
2897 2965 expected = max(0, self.end(len(self) - 1))
2898 2966
2899 2967 try:
2900 2968 with self._datafp() as f:
2901 2969 f.seek(0, io.SEEK_END)
2902 2970 actual = f.tell()
2903 2971 dd = actual - expected
2904 2972 except FileNotFoundError:
2905 2973 dd = 0
2906 2974
2907 2975 try:
2908 2976 f = self.opener(self._indexfile)
2909 2977 f.seek(0, io.SEEK_END)
2910 2978 actual = f.tell()
2911 2979 f.close()
2912 2980 s = self.index.entry_size
2913 2981 i = max(0, actual // s)
2914 2982 di = actual - (i * s)
2915 2983 if self._inline:
2916 2984 databytes = 0
2917 2985 for r in self:
2918 2986 databytes += max(0, self.length(r))
2919 2987 dd = 0
2920 2988 di = actual - len(self) * s - databytes
2921 2989 except FileNotFoundError:
2922 2990 di = 0
2923 2991
2924 2992 return (dd, di)
2925 2993
2926 2994 def files(self):
2927 2995 res = [self._indexfile]
2928 2996 if self._docket_file is None:
2929 2997 if not self._inline:
2930 2998 res.append(self._datafile)
2931 2999 else:
2932 3000 res.append(self._docket_file)
2933 3001 res.extend(self._docket.old_index_filepaths(include_empty=False))
2934 3002 if self._docket.data_end:
2935 3003 res.append(self._datafile)
2936 3004 res.extend(self._docket.old_data_filepaths(include_empty=False))
2937 3005 if self._docket.sidedata_end:
2938 3006 res.append(self._sidedatafile)
2939 3007 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2940 3008 return res
2941 3009
2942 3010 def emitrevisions(
2943 3011 self,
2944 3012 nodes,
2945 3013 nodesorder=None,
2946 3014 revisiondata=False,
2947 3015 assumehaveparentrevisions=False,
2948 3016 deltamode=repository.CG_DELTAMODE_STD,
2949 3017 sidedata_helpers=None,
2950 3018 debug_info=None,
2951 3019 ):
2952 3020 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2953 3021 raise error.ProgrammingError(
2954 3022 b'unhandled value for nodesorder: %s' % nodesorder
2955 3023 )
2956 3024
2957 3025 if nodesorder is None and not self._generaldelta:
2958 3026 nodesorder = b'storage'
2959 3027
2960 3028 if (
2961 3029 not self._storedeltachains
2962 3030 and deltamode != repository.CG_DELTAMODE_PREV
2963 3031 ):
2964 3032 deltamode = repository.CG_DELTAMODE_FULL
2965 3033
2966 3034 return storageutil.emitrevisions(
2967 3035 self,
2968 3036 nodes,
2969 3037 nodesorder,
2970 3038 revlogrevisiondelta,
2971 3039 deltaparentfn=self.deltaparent,
2972 3040 candeltafn=self.candelta,
2973 3041 rawsizefn=self.rawsize,
2974 3042 revdifffn=self.revdiff,
2975 3043 flagsfn=self.flags,
2976 3044 deltamode=deltamode,
2977 3045 revisiondata=revisiondata,
2978 3046 assumehaveparentrevisions=assumehaveparentrevisions,
2979 3047 sidedata_helpers=sidedata_helpers,
2980 3048 debug_info=debug_info,
2981 3049 )
2982 3050
2983 3051 DELTAREUSEALWAYS = b'always'
2984 3052 DELTAREUSESAMEREVS = b'samerevs'
2985 3053 DELTAREUSENEVER = b'never'
2986 3054
2987 3055 DELTAREUSEFULLADD = b'fulladd'
2988 3056
2989 3057 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2990 3058
2991 3059 def clone(
2992 3060 self,
2993 3061 tr,
2994 3062 destrevlog,
2995 3063 addrevisioncb=None,
2996 3064 deltareuse=DELTAREUSESAMEREVS,
2997 3065 forcedeltabothparents=None,
2998 3066 sidedata_helpers=None,
2999 3067 ):
3000 3068 """Copy this revlog to another, possibly with format changes.
3001 3069
3002 3070 The destination revlog will contain the same revisions and nodes.
3003 3071 However, it may not be bit-for-bit identical due to e.g. delta encoding
3004 3072 differences.
3005 3073
3006 3074 The ``deltareuse`` argument control how deltas from the existing revlog
3007 3075 are preserved in the destination revlog. The argument can have the
3008 3076 following values:
3009 3077
3010 3078 DELTAREUSEALWAYS
3011 3079 Deltas will always be reused (if possible), even if the destination
3012 3080 revlog would not select the same revisions for the delta. This is the
3013 3081 fastest mode of operation.
3014 3082 DELTAREUSESAMEREVS
3015 3083 Deltas will be reused if the destination revlog would pick the same
3016 3084 revisions for the delta. This mode strikes a balance between speed
3017 3085 and optimization.
3018 3086 DELTAREUSENEVER
3019 3087 Deltas will never be reused. This is the slowest mode of execution.
3020 3088 This mode can be used to recompute deltas (e.g. if the diff/delta
3021 3089 algorithm changes).
3022 3090 DELTAREUSEFULLADD
3023 3091 Revision will be re-added as if their were new content. This is
3024 3092 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3025 3093 eg: large file detection and handling.
3026 3094
3027 3095 Delta computation can be slow, so the choice of delta reuse policy can
3028 3096 significantly affect run time.
3029 3097
3030 3098 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3031 3099 two extremes. Deltas will be reused if they are appropriate. But if the
3032 3100 delta could choose a better revision, it will do so. This means if you
3033 3101 are converting a non-generaldelta revlog to a generaldelta revlog,
3034 3102 deltas will be recomputed if the delta's parent isn't a parent of the
3035 3103 revision.
3036 3104
3037 3105 In addition to the delta policy, the ``forcedeltabothparents``
3038 3106 argument controls whether to force compute deltas against both parents
3039 3107 for merges. By default, the current default is used.
3040 3108
3041 3109 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3042 3110 `sidedata_helpers`.
3043 3111 """
3044 3112 if deltareuse not in self.DELTAREUSEALL:
3045 3113 raise ValueError(
3046 3114 _(b'value for deltareuse invalid: %s') % deltareuse
3047 3115 )
3048 3116
3049 3117 if len(destrevlog):
3050 3118 raise ValueError(_(b'destination revlog is not empty'))
3051 3119
3052 3120 if getattr(self, 'filteredrevs', None):
3053 3121 raise ValueError(_(b'source revlog has filtered revisions'))
3054 3122 if getattr(destrevlog, 'filteredrevs', None):
3055 3123 raise ValueError(_(b'destination revlog has filtered revisions'))
3056 3124
3057 3125 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3058 3126 # if possible.
3059 3127 oldlazydelta = destrevlog._lazydelta
3060 3128 oldlazydeltabase = destrevlog._lazydeltabase
3061 3129 oldamd = destrevlog._deltabothparents
3062 3130
3063 3131 try:
3064 3132 if deltareuse == self.DELTAREUSEALWAYS:
3065 3133 destrevlog._lazydeltabase = True
3066 3134 destrevlog._lazydelta = True
3067 3135 elif deltareuse == self.DELTAREUSESAMEREVS:
3068 3136 destrevlog._lazydeltabase = False
3069 3137 destrevlog._lazydelta = True
3070 3138 elif deltareuse == self.DELTAREUSENEVER:
3071 3139 destrevlog._lazydeltabase = False
3072 3140 destrevlog._lazydelta = False
3073 3141
3074 3142 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3075 3143
3076 3144 self._clone(
3077 3145 tr,
3078 3146 destrevlog,
3079 3147 addrevisioncb,
3080 3148 deltareuse,
3081 3149 forcedeltabothparents,
3082 3150 sidedata_helpers,
3083 3151 )
3084 3152
3085 3153 finally:
3086 3154 destrevlog._lazydelta = oldlazydelta
3087 3155 destrevlog._lazydeltabase = oldlazydeltabase
3088 3156 destrevlog._deltabothparents = oldamd
3089 3157
3090 3158 def _clone(
3091 3159 self,
3092 3160 tr,
3093 3161 destrevlog,
3094 3162 addrevisioncb,
3095 3163 deltareuse,
3096 3164 forcedeltabothparents,
3097 3165 sidedata_helpers,
3098 3166 ):
3099 3167 """perform the core duty of `revlog.clone` after parameter processing"""
3100 3168 write_debug = None
3101 3169 if self._debug_delta:
3102 3170 write_debug = tr._report
3103 3171 deltacomputer = deltautil.deltacomputer(
3104 3172 destrevlog,
3105 3173 write_debug=write_debug,
3106 3174 )
3107 3175 index = self.index
3108 3176 for rev in self:
3109 3177 entry = index[rev]
3110 3178
3111 3179 # Some classes override linkrev to take filtered revs into
3112 3180 # account. Use raw entry from index.
3113 3181 flags = entry[0] & 0xFFFF
3114 3182 linkrev = entry[4]
3115 3183 p1 = index[entry[5]][7]
3116 3184 p2 = index[entry[6]][7]
3117 3185 node = entry[7]
3118 3186
3119 3187 # (Possibly) reuse the delta from the revlog if allowed and
3120 3188 # the revlog chunk is a delta.
3121 3189 cachedelta = None
3122 3190 rawtext = None
3123 3191 if deltareuse == self.DELTAREUSEFULLADD:
3124 3192 text = self._revisiondata(rev)
3125 3193 sidedata = self.sidedata(rev)
3126 3194
3127 3195 if sidedata_helpers is not None:
3128 3196 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3129 3197 self, sidedata_helpers, sidedata, rev
3130 3198 )
3131 3199 flags = flags | new_flags[0] & ~new_flags[1]
3132 3200
3133 3201 destrevlog.addrevision(
3134 3202 text,
3135 3203 tr,
3136 3204 linkrev,
3137 3205 p1,
3138 3206 p2,
3139 3207 cachedelta=cachedelta,
3140 3208 node=node,
3141 3209 flags=flags,
3142 3210 deltacomputer=deltacomputer,
3143 3211 sidedata=sidedata,
3144 3212 )
3145 3213 else:
3146 3214 if destrevlog._lazydelta:
3147 3215 dp = self.deltaparent(rev)
3148 3216 if dp != nullrev:
3149 3217 cachedelta = (dp, bytes(self._chunk(rev)))
3150 3218
3151 3219 sidedata = None
3152 3220 if not cachedelta:
3153 3221 rawtext = self._revisiondata(rev)
3154 3222 sidedata = self.sidedata(rev)
3155 3223 if sidedata is None:
3156 3224 sidedata = self.sidedata(rev)
3157 3225
3158 3226 if sidedata_helpers is not None:
3159 3227 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3160 3228 self, sidedata_helpers, sidedata, rev
3161 3229 )
3162 3230 flags = flags | new_flags[0] & ~new_flags[1]
3163 3231
3164 3232 with destrevlog._writing(tr):
3165 3233 destrevlog._addrevision(
3166 3234 node,
3167 3235 rawtext,
3168 3236 tr,
3169 3237 linkrev,
3170 3238 p1,
3171 3239 p2,
3172 3240 flags,
3173 3241 cachedelta,
3174 3242 deltacomputer=deltacomputer,
3175 3243 sidedata=sidedata,
3176 3244 )
3177 3245
3178 3246 if addrevisioncb:
3179 3247 addrevisioncb(self, rev, node)
3180 3248
3181 3249 def censorrevision(self, tr, censornode, tombstone=b''):
3182 3250 if self._format_version == REVLOGV0:
3183 3251 raise error.RevlogError(
3184 3252 _(b'cannot censor with version %d revlogs')
3185 3253 % self._format_version
3186 3254 )
3187 3255 elif self._format_version == REVLOGV1:
3188 3256 rewrite.v1_censor(self, tr, censornode, tombstone)
3189 3257 else:
3190 3258 rewrite.v2_censor(self, tr, censornode, tombstone)
3191 3259
3192 3260 def verifyintegrity(self, state):
3193 3261 """Verifies the integrity of the revlog.
3194 3262
3195 3263 Yields ``revlogproblem`` instances describing problems that are
3196 3264 found.
3197 3265 """
3198 3266 dd, di = self.checksize()
3199 3267 if dd:
3200 3268 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3201 3269 if di:
3202 3270 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3203 3271
3204 3272 version = self._format_version
3205 3273
3206 3274 # The verifier tells us what version revlog we should be.
3207 3275 if version != state[b'expectedversion']:
3208 3276 yield revlogproblem(
3209 3277 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3210 3278 % (self.display_id, version, state[b'expectedversion'])
3211 3279 )
3212 3280
3213 3281 state[b'skipread'] = set()
3214 3282 state[b'safe_renamed'] = set()
3215 3283
3216 3284 for rev in self:
3217 3285 node = self.node(rev)
3218 3286
3219 3287 # Verify contents. 4 cases to care about:
3220 3288 #
3221 3289 # common: the most common case
3222 3290 # rename: with a rename
3223 3291 # meta: file content starts with b'\1\n', the metadata
3224 3292 # header defined in filelog.py, but without a rename
3225 3293 # ext: content stored externally
3226 3294 #
3227 3295 # More formally, their differences are shown below:
3228 3296 #
3229 3297 # | common | rename | meta | ext
3230 3298 # -------------------------------------------------------
3231 3299 # flags() | 0 | 0 | 0 | not 0
3232 3300 # renamed() | False | True | False | ?
3233 3301 # rawtext[0:2]=='\1\n'| False | True | True | ?
3234 3302 #
3235 3303 # "rawtext" means the raw text stored in revlog data, which
3236 3304 # could be retrieved by "rawdata(rev)". "text"
3237 3305 # mentioned below is "revision(rev)".
3238 3306 #
3239 3307 # There are 3 different lengths stored physically:
3240 3308 # 1. L1: rawsize, stored in revlog index
3241 3309 # 2. L2: len(rawtext), stored in revlog data
3242 3310 # 3. L3: len(text), stored in revlog data if flags==0, or
3243 3311 # possibly somewhere else if flags!=0
3244 3312 #
3245 3313 # L1 should be equal to L2. L3 could be different from them.
3246 3314 # "text" may or may not affect commit hash depending on flag
3247 3315 # processors (see flagutil.addflagprocessor).
3248 3316 #
3249 3317 # | common | rename | meta | ext
3250 3318 # -------------------------------------------------
3251 3319 # rawsize() | L1 | L1 | L1 | L1
3252 3320 # size() | L1 | L2-LM | L1(*) | L1 (?)
3253 3321 # len(rawtext) | L2 | L2 | L2 | L2
3254 3322 # len(text) | L2 | L2 | L2 | L3
3255 3323 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3256 3324 #
3257 3325 # LM: length of metadata, depending on rawtext
3258 3326 # (*): not ideal, see comment in filelog.size
3259 3327 # (?): could be "- len(meta)" if the resolved content has
3260 3328 # rename metadata
3261 3329 #
3262 3330 # Checks needed to be done:
3263 3331 # 1. length check: L1 == L2, in all cases.
3264 3332 # 2. hash check: depending on flag processor, we may need to
3265 3333 # use either "text" (external), or "rawtext" (in revlog).
3266 3334
3267 3335 try:
3268 3336 skipflags = state.get(b'skipflags', 0)
3269 3337 if skipflags:
3270 3338 skipflags &= self.flags(rev)
3271 3339
3272 3340 _verify_revision(self, skipflags, state, node)
3273 3341
3274 3342 l1 = self.rawsize(rev)
3275 3343 l2 = len(self.rawdata(node))
3276 3344
3277 3345 if l1 != l2:
3278 3346 yield revlogproblem(
3279 3347 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3280 3348 node=node,
3281 3349 )
3282 3350
3283 3351 except error.CensoredNodeError:
3284 3352 if state[b'erroroncensored']:
3285 3353 yield revlogproblem(
3286 3354 error=_(b'censored file data'), node=node
3287 3355 )
3288 3356 state[b'skipread'].add(node)
3289 3357 except Exception as e:
3290 3358 yield revlogproblem(
3291 3359 error=_(b'unpacking %s: %s')
3292 3360 % (short(node), stringutil.forcebytestr(e)),
3293 3361 node=node,
3294 3362 )
3295 3363 state[b'skipread'].add(node)
3296 3364
3297 3365 def storageinfo(
3298 3366 self,
3299 3367 exclusivefiles=False,
3300 3368 sharedfiles=False,
3301 3369 revisionscount=False,
3302 3370 trackedsize=False,
3303 3371 storedsize=False,
3304 3372 ):
3305 3373 d = {}
3306 3374
3307 3375 if exclusivefiles:
3308 3376 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3309 3377 if not self._inline:
3310 3378 d[b'exclusivefiles'].append((self.opener, self._datafile))
3311 3379
3312 3380 if sharedfiles:
3313 3381 d[b'sharedfiles'] = []
3314 3382
3315 3383 if revisionscount:
3316 3384 d[b'revisionscount'] = len(self)
3317 3385
3318 3386 if trackedsize:
3319 3387 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3320 3388
3321 3389 if storedsize:
3322 3390 d[b'storedsize'] = sum(
3323 3391 self.opener.stat(path).st_size for path in self.files()
3324 3392 )
3325 3393
3326 3394 return d
3327 3395
3328 3396 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3329 3397 if not self.hassidedata:
3330 3398 return
3331 3399 # revlog formats with sidedata support does not support inline
3332 3400 assert not self._inline
3333 3401 if not helpers[1] and not helpers[2]:
3334 3402 # Nothing to generate or remove
3335 3403 return
3336 3404
3337 3405 new_entries = []
3338 3406 # append the new sidedata
3339 3407 with self._writing(transaction):
3340 3408 ifh, dfh, sdfh = self._writinghandles
3341 3409 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3342 3410
3343 3411 current_offset = sdfh.tell()
3344 3412 for rev in range(startrev, endrev + 1):
3345 3413 entry = self.index[rev]
3346 3414 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3347 3415 store=self,
3348 3416 sidedata_helpers=helpers,
3349 3417 sidedata={},
3350 3418 rev=rev,
3351 3419 )
3352 3420
3353 3421 serialized_sidedata = sidedatautil.serialize_sidedata(
3354 3422 new_sidedata
3355 3423 )
3356 3424
3357 3425 sidedata_compression_mode = COMP_MODE_INLINE
3358 3426 if serialized_sidedata and self.hassidedata:
3359 3427 sidedata_compression_mode = COMP_MODE_PLAIN
3360 3428 h, comp_sidedata = self.compress(serialized_sidedata)
3361 3429 if (
3362 3430 h != b'u'
3363 3431 and comp_sidedata[0] != b'\0'
3364 3432 and len(comp_sidedata) < len(serialized_sidedata)
3365 3433 ):
3366 3434 assert not h
3367 3435 if (
3368 3436 comp_sidedata[0]
3369 3437 == self._docket.default_compression_header
3370 3438 ):
3371 3439 sidedata_compression_mode = COMP_MODE_DEFAULT
3372 3440 serialized_sidedata = comp_sidedata
3373 3441 else:
3374 3442 sidedata_compression_mode = COMP_MODE_INLINE
3375 3443 serialized_sidedata = comp_sidedata
3376 3444 if entry[8] != 0 or entry[9] != 0:
3377 3445 # rewriting entries that already have sidedata is not
3378 3446 # supported yet, because it introduces garbage data in the
3379 3447 # revlog.
3380 3448 msg = b"rewriting existing sidedata is not supported yet"
3381 3449 raise error.Abort(msg)
3382 3450
3383 3451 # Apply (potential) flags to add and to remove after running
3384 3452 # the sidedata helpers
3385 3453 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3386 3454 entry_update = (
3387 3455 current_offset,
3388 3456 len(serialized_sidedata),
3389 3457 new_offset_flags,
3390 3458 sidedata_compression_mode,
3391 3459 )
3392 3460
3393 3461 # the sidedata computation might have move the file cursors around
3394 3462 sdfh.seek(current_offset, os.SEEK_SET)
3395 3463 sdfh.write(serialized_sidedata)
3396 3464 new_entries.append(entry_update)
3397 3465 current_offset += len(serialized_sidedata)
3398 3466 self._docket.sidedata_end = sdfh.tell()
3399 3467
3400 3468 # rewrite the new index entries
3401 3469 ifh.seek(startrev * self.index.entry_size)
3402 3470 for i, e in enumerate(new_entries):
3403 3471 rev = startrev + i
3404 3472 self.index.replace_sidedata_info(rev, *e)
3405 3473 packed = self.index.entry_binary(rev)
3406 3474 if rev == 0 and self._docket is None:
3407 3475 header = self._format_flags | self._format_version
3408 3476 header = self.index.pack_header(header)
3409 3477 packed = header + packed
3410 3478 ifh.write(packed)
@@ -1,1159 +1,1216 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import collections
9 9 import functools
10 10 import os
11 11 import re
12 12 import stat
13 13 from typing import Generator, List
14 14
15 15 from .i18n import _
16 16 from .pycompat import getattr
17 17 from .thirdparty import attr
18 18 from .node import hex
19 19 from . import (
20 20 changelog,
21 21 error,
22 22 filelog,
23 23 manifest,
24 24 policy,
25 25 pycompat,
26 26 util,
27 27 vfs as vfsmod,
28 28 )
29 29 from .utils import hashutil
30 30
31 31 parsers = policy.importmod('parsers')
32 32 # how much bytes should be read from fncache in one read
33 33 # It is done to prevent loading large fncache files into memory
34 34 fncache_chunksize = 10 ** 6
35 35
36 36
37 37 def _match_tracked_entry(entry, matcher):
38 38 """parses a fncache entry and returns whether the entry is tracking a path
39 39 matched by matcher or not.
40 40
41 41 If matcher is None, returns True"""
42 42
43 43 if matcher is None:
44 44 return True
45 45 if entry.is_filelog:
46 46 return matcher(entry.target_id)
47 47 elif entry.is_manifestlog:
48 48 return matcher.visitdir(entry.target_id.rstrip(b'/'))
49 49 raise error.ProgrammingError(b"cannot process entry %r" % entry)
50 50
51 51
52 52 # This avoids a collision between a file named foo and a dir named
53 53 # foo.i or foo.d
54 54 def _encodedir(path):
55 55 """
56 56 >>> _encodedir(b'data/foo.i')
57 57 'data/foo.i'
58 58 >>> _encodedir(b'data/foo.i/bla.i')
59 59 'data/foo.i.hg/bla.i'
60 60 >>> _encodedir(b'data/foo.i.hg/bla.i')
61 61 'data/foo.i.hg.hg/bla.i'
62 62 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
63 63 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
64 64 """
65 65 return (
66 66 path.replace(b".hg/", b".hg.hg/")
67 67 .replace(b".i/", b".i.hg/")
68 68 .replace(b".d/", b".d.hg/")
69 69 )
70 70
71 71
72 72 encodedir = getattr(parsers, 'encodedir', _encodedir)
73 73
74 74
75 75 def decodedir(path):
76 76 """
77 77 >>> decodedir(b'data/foo.i')
78 78 'data/foo.i'
79 79 >>> decodedir(b'data/foo.i.hg/bla.i')
80 80 'data/foo.i/bla.i'
81 81 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
82 82 'data/foo.i.hg/bla.i'
83 83 """
84 84 if b".hg/" not in path:
85 85 return path
86 86 return (
87 87 path.replace(b".d.hg/", b".d/")
88 88 .replace(b".i.hg/", b".i/")
89 89 .replace(b".hg.hg/", b".hg/")
90 90 )
91 91
92 92
93 93 def _reserved():
94 94 """characters that are problematic for filesystems
95 95
96 96 * ascii escapes (0..31)
97 97 * ascii hi (126..255)
98 98 * windows specials
99 99
100 100 these characters will be escaped by encodefunctions
101 101 """
102 102 winreserved = [ord(x) for x in u'\\:*?"<>|']
103 103 for x in range(32):
104 104 yield x
105 105 for x in range(126, 256):
106 106 yield x
107 107 for x in winreserved:
108 108 yield x
109 109
110 110
111 111 def _buildencodefun():
112 112 """
113 113 >>> enc, dec = _buildencodefun()
114 114
115 115 >>> enc(b'nothing/special.txt')
116 116 'nothing/special.txt'
117 117 >>> dec(b'nothing/special.txt')
118 118 'nothing/special.txt'
119 119
120 120 >>> enc(b'HELLO')
121 121 '_h_e_l_l_o'
122 122 >>> dec(b'_h_e_l_l_o')
123 123 'HELLO'
124 124
125 125 >>> enc(b'hello:world?')
126 126 'hello~3aworld~3f'
127 127 >>> dec(b'hello~3aworld~3f')
128 128 'hello:world?'
129 129
130 130 >>> enc(b'the\\x07quick\\xADshot')
131 131 'the~07quick~adshot'
132 132 >>> dec(b'the~07quick~adshot')
133 133 'the\\x07quick\\xadshot'
134 134 """
135 135 e = b'_'
136 136 xchr = pycompat.bytechr
137 137 asciistr = list(map(xchr, range(127)))
138 138 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
139 139
140 140 cmap = {x: x for x in asciistr}
141 141 for x in _reserved():
142 142 cmap[xchr(x)] = b"~%02x" % x
143 143 for x in capitals + [ord(e)]:
144 144 cmap[xchr(x)] = e + xchr(x).lower()
145 145
146 146 dmap = {}
147 147 for k, v in cmap.items():
148 148 dmap[v] = k
149 149
150 150 def decode(s):
151 151 i = 0
152 152 while i < len(s):
153 153 for l in range(1, 4):
154 154 try:
155 155 yield dmap[s[i : i + l]]
156 156 i += l
157 157 break
158 158 except KeyError:
159 159 pass
160 160 else:
161 161 raise KeyError
162 162
163 163 return (
164 164 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
165 165 lambda s: b''.join(list(decode(s))),
166 166 )
167 167
168 168
169 169 _encodefname, _decodefname = _buildencodefun()
170 170
171 171
172 172 def encodefilename(s):
173 173 """
174 174 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
175 175 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
176 176 """
177 177 return _encodefname(encodedir(s))
178 178
179 179
180 180 def decodefilename(s):
181 181 """
182 182 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
183 183 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
184 184 """
185 185 return decodedir(_decodefname(s))
186 186
187 187
188 188 def _buildlowerencodefun():
189 189 """
190 190 >>> f = _buildlowerencodefun()
191 191 >>> f(b'nothing/special.txt')
192 192 'nothing/special.txt'
193 193 >>> f(b'HELLO')
194 194 'hello'
195 195 >>> f(b'hello:world?')
196 196 'hello~3aworld~3f'
197 197 >>> f(b'the\\x07quick\\xADshot')
198 198 'the~07quick~adshot'
199 199 """
200 200 xchr = pycompat.bytechr
201 201 cmap = {xchr(x): xchr(x) for x in range(127)}
202 202 for x in _reserved():
203 203 cmap[xchr(x)] = b"~%02x" % x
204 204 for x in range(ord(b"A"), ord(b"Z") + 1):
205 205 cmap[xchr(x)] = xchr(x).lower()
206 206
207 207 def lowerencode(s):
208 208 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
209 209
210 210 return lowerencode
211 211
212 212
213 213 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
214 214
215 215 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
216 216 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
217 217 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
218 218
219 219
220 220 def _auxencode(path, dotencode):
221 221 """
222 222 Encodes filenames containing names reserved by Windows or which end in
223 223 period or space. Does not touch other single reserved characters c.
224 224 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
225 225 Additionally encodes space or period at the beginning, if dotencode is
226 226 True. Parameter path is assumed to be all lowercase.
227 227 A segment only needs encoding if a reserved name appears as a
228 228 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
229 229 doesn't need encoding.
230 230
231 231 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
232 232 >>> _auxencode(s.split(b'/'), True)
233 233 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
234 234 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
235 235 >>> _auxencode(s.split(b'/'), False)
236 236 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
237 237 >>> _auxencode([b'foo. '], True)
238 238 ['foo.~20']
239 239 >>> _auxencode([b' .foo'], True)
240 240 ['~20.foo']
241 241 """
242 242 for i, n in enumerate(path):
243 243 if not n:
244 244 continue
245 245 if dotencode and n[0] in b'. ':
246 246 n = b"~%02x" % ord(n[0:1]) + n[1:]
247 247 path[i] = n
248 248 else:
249 249 l = n.find(b'.')
250 250 if l == -1:
251 251 l = len(n)
252 252 if (l == 3 and n[:3] in _winres3) or (
253 253 l == 4
254 254 and n[3:4] <= b'9'
255 255 and n[3:4] >= b'1'
256 256 and n[:3] in _winres4
257 257 ):
258 258 # encode third letter ('aux' -> 'au~78')
259 259 ec = b"~%02x" % ord(n[2:3])
260 260 n = n[0:2] + ec + n[3:]
261 261 path[i] = n
262 262 if n[-1] in b'. ':
263 263 # encode last period or space ('foo...' -> 'foo..~2e')
264 264 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
265 265 return path
266 266
267 267
268 268 _maxstorepathlen = 120
269 269 _dirprefixlen = 8
270 270 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
271 271
272 272
273 273 def _hashencode(path, dotencode):
274 274 digest = hex(hashutil.sha1(path).digest())
275 275 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
276 276 parts = _auxencode(le, dotencode)
277 277 basename = parts[-1]
278 278 _root, ext = os.path.splitext(basename)
279 279 sdirs = []
280 280 sdirslen = 0
281 281 for p in parts[:-1]:
282 282 d = p[:_dirprefixlen]
283 283 if d[-1] in b'. ':
284 284 # Windows can't access dirs ending in period or space
285 285 d = d[:-1] + b'_'
286 286 if sdirslen == 0:
287 287 t = len(d)
288 288 else:
289 289 t = sdirslen + 1 + len(d)
290 290 if t > _maxshortdirslen:
291 291 break
292 292 sdirs.append(d)
293 293 sdirslen = t
294 294 dirs = b'/'.join(sdirs)
295 295 if len(dirs) > 0:
296 296 dirs += b'/'
297 297 res = b'dh/' + dirs + digest + ext
298 298 spaceleft = _maxstorepathlen - len(res)
299 299 if spaceleft > 0:
300 300 filler = basename[:spaceleft]
301 301 res = b'dh/' + dirs + filler + digest + ext
302 302 return res
303 303
304 304
305 305 def _hybridencode(path, dotencode):
306 306 """encodes path with a length limit
307 307
308 308 Encodes all paths that begin with 'data/', according to the following.
309 309
310 310 Default encoding (reversible):
311 311
312 312 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
313 313 characters are encoded as '~xx', where xx is the two digit hex code
314 314 of the character (see encodefilename).
315 315 Relevant path components consisting of Windows reserved filenames are
316 316 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
317 317
318 318 Hashed encoding (not reversible):
319 319
320 320 If the default-encoded path is longer than _maxstorepathlen, a
321 321 non-reversible hybrid hashing of the path is done instead.
322 322 This encoding uses up to _dirprefixlen characters of all directory
323 323 levels of the lowerencoded path, but not more levels than can fit into
324 324 _maxshortdirslen.
325 325 Then follows the filler followed by the sha digest of the full path.
326 326 The filler is the beginning of the basename of the lowerencoded path
327 327 (the basename is everything after the last path separator). The filler
328 328 is as long as possible, filling in characters from the basename until
329 329 the encoded path has _maxstorepathlen characters (or all chars of the
330 330 basename have been taken).
331 331 The extension (e.g. '.i' or '.d') is preserved.
332 332
333 333 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
334 334 encoding was used.
335 335 """
336 336 path = encodedir(path)
337 337 ef = _encodefname(path).split(b'/')
338 338 res = b'/'.join(_auxencode(ef, dotencode))
339 339 if len(res) > _maxstorepathlen:
340 340 res = _hashencode(path, dotencode)
341 341 return res
342 342
343 343
344 344 def _pathencode(path):
345 345 de = encodedir(path)
346 346 if len(path) > _maxstorepathlen:
347 347 return _hashencode(de, True)
348 348 ef = _encodefname(de).split(b'/')
349 349 res = b'/'.join(_auxencode(ef, True))
350 350 if len(res) > _maxstorepathlen:
351 351 return _hashencode(de, True)
352 352 return res
353 353
354 354
355 355 _pathencode = getattr(parsers, 'pathencode', _pathencode)
356 356
357 357
358 358 def _plainhybridencode(f):
359 359 return _hybridencode(f, False)
360 360
361 361
362 362 def _calcmode(vfs):
363 363 try:
364 364 # files in .hg/ will be created using this mode
365 365 mode = vfs.stat().st_mode
366 366 # avoid some useless chmods
367 367 if (0o777 & ~util.umask) == (0o777 & mode):
368 368 mode = None
369 369 except OSError:
370 370 mode = None
371 371 return mode
372 372
373 373
374 374 _data = [
375 375 b'bookmarks',
376 376 b'narrowspec',
377 377 b'data',
378 378 b'meta',
379 379 b'00manifest.d',
380 380 b'00manifest.i',
381 381 b'00changelog.d',
382 382 b'00changelog.i',
383 383 b'phaseroots',
384 384 b'obsstore',
385 385 b'requires',
386 386 ]
387 387
388 388 REVLOG_FILES_MAIN_EXT = (b'.i',)
389 389 REVLOG_FILES_OTHER_EXT = (
390 390 b'.idx',
391 391 b'.d',
392 392 b'.dat',
393 393 b'.n',
394 394 b'.nd',
395 395 b'.sda',
396 396 )
397 397 # file extension that also use a `-SOMELONGIDHASH.ext` form
398 398 REVLOG_FILES_LONG_EXT = (
399 399 b'.nd',
400 400 b'.idx',
401 401 b'.dat',
402 402 b'.sda',
403 403 )
404 404 # files that are "volatile" and might change between listing and streaming
405 405 #
406 406 # note: the ".nd" file are nodemap data and won't "change" but they might be
407 407 # deleted.
408 408 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
409 409
410 410 # some exception to the above matching
411 411 #
412 412 # XXX This is currently not in use because of issue6542
413 413 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
414 414
415 415
416 416 def is_revlog(f, kind, st):
417 417 if kind != stat.S_IFREG:
418 418 return None
419 419 return revlog_type(f)
420 420
421 421
422 422 def revlog_type(f):
423 423 # XXX we need to filter `undo.` created by the transaction here, however
424 424 # being naive about it also filter revlog for `undo.*` files, leading to
425 425 # issue6542. So we no longer use EXCLUDED.
426 426 if f.endswith(REVLOG_FILES_MAIN_EXT):
427 427 return FILEFLAGS_REVLOG_MAIN
428 428 elif f.endswith(REVLOG_FILES_OTHER_EXT):
429 429 t = FILETYPE_FILELOG_OTHER
430 430 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
431 431 t |= FILEFLAGS_VOLATILE
432 432 return t
433 433 return None
434 434
435 435
436 436 # the file is part of changelog data
437 437 FILEFLAGS_CHANGELOG = 1 << 13
438 438 # the file is part of manifest data
439 439 FILEFLAGS_MANIFESTLOG = 1 << 12
440 440 # the file is part of filelog data
441 441 FILEFLAGS_FILELOG = 1 << 11
442 442 # file that are not directly part of a revlog
443 443 FILEFLAGS_OTHER = 1 << 10
444 444
445 445 # the main entry point for a revlog
446 446 FILEFLAGS_REVLOG_MAIN = 1 << 1
447 447 # a secondary file for a revlog
448 448 FILEFLAGS_REVLOG_OTHER = 1 << 0
449 449
450 450 # files that are "volatile" and might change between listing and streaming
451 451 FILEFLAGS_VOLATILE = 1 << 20
452 452
453 453 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
454 454 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
455 455 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
456 456 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
457 457 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
458 458 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
459 459 FILETYPE_OTHER = FILEFLAGS_OTHER
460 460
461 461
462 462 @attr.s(slots=True)
463 463 class StoreFile:
464 464 """a file matching a store entry"""
465 465
466 466 unencoded_path = attr.ib()
467 467 _file_size = attr.ib(default=None)
468 468 is_volatile = attr.ib(default=False)
469 469
470 470 def file_size(self, vfs):
471 471 if self._file_size is None:
472 472 if vfs is None:
473 473 msg = b"calling vfs-less file_size without prior call: %s"
474 474 msg %= self.unencoded_path
475 475 raise error.ProgrammingError(msg)
476 476 try:
477 477 self._file_size = vfs.stat(self.unencoded_path).st_size
478 478 except FileNotFoundError:
479 479 self._file_size = 0
480 480 return self._file_size
481 481
482 482 def get_stream(self, vfs, copies):
483 483 """return data "stream" information for this file
484 484
485 485 (unencoded_file_path, content_iterator, content_size)
486 486 """
487 487 size = self.file_size(None)
488 488
489 489 def get_stream():
490 490 actual_path = copies[vfs.join(self.unencoded_path)]
491 491 with open(actual_path, 'rb') as fp:
492 492 yield None # ready to stream
493 493 if size <= 65536:
494 494 yield fp.read(size)
495 495 else:
496 496 yield from util.filechunkiter(fp, limit=size)
497 497
498 498 s = get_stream()
499 499 next(s)
500 500 return (self.unencoded_path, s, size)
501 501
502 502
503 503 @attr.s(slots=True, init=False)
504 504 class BaseStoreEntry:
505 505 """An entry in the store
506 506
507 507 This is returned by `store.walk` and represent some data in the store."""
508 508
509 509 def files(self) -> List[StoreFile]:
510 510 raise NotImplementedError
511 511
512 def get_streams(self, vfs, copies=None):
512 def get_streams(
513 self,
514 repo=None,
515 vfs=None,
516 copies=None,
517 max_changeset=None,
518 ):
513 519 """return a list of data stream associated to files for this entry
514 520
515 521 return [(unencoded_file_path, content_iterator, content_size), …]
516 522 """
517 523 assert vfs is not None
518 524 return [f.get_stream(vfs, copies) for f in self.files()]
519 525
520 526
521 527 @attr.s(slots=True, init=False)
522 528 class SimpleStoreEntry(BaseStoreEntry):
523 529 """A generic entry in the store"""
524 530
525 531 is_revlog = False
526 532
527 533 _entry_path = attr.ib()
528 534 _is_volatile = attr.ib(default=False)
529 535 _file_size = attr.ib(default=None)
530 536 _files = attr.ib(default=None)
531 537
532 538 def __init__(
533 539 self,
534 540 entry_path,
535 541 is_volatile=False,
536 542 file_size=None,
537 543 ):
538 544 super().__init__()
539 545 self._entry_path = entry_path
540 546 self._is_volatile = is_volatile
541 547 self._file_size = file_size
542 548 self._files = None
543 549
544 550 def files(self) -> List[StoreFile]:
545 551 if self._files is None:
546 552 self._files = [
547 553 StoreFile(
548 554 unencoded_path=self._entry_path,
549 555 file_size=self._file_size,
550 556 is_volatile=self._is_volatile,
551 557 )
552 558 ]
553 559 return self._files
554 560
555 561
556 562 @attr.s(slots=True, init=False)
557 563 class RevlogStoreEntry(BaseStoreEntry):
558 564 """A revlog entry in the store"""
559 565
560 566 is_revlog = True
561 567
562 568 revlog_type = attr.ib(default=None)
563 569 target_id = attr.ib(default=None)
564 570 _path_prefix = attr.ib(default=None)
565 571 _details = attr.ib(default=None)
566 572 _files = attr.ib(default=None)
567 573
568 574 def __init__(
569 575 self,
570 576 revlog_type,
571 577 path_prefix,
572 578 target_id,
573 579 details,
574 580 ):
575 581 super().__init__()
576 582 self.revlog_type = revlog_type
577 583 self.target_id = target_id
578 584 self._path_prefix = path_prefix
579 585 assert b'.i' in details, (path_prefix, details)
580 586 self._details = details
581 587 self._files = None
582 588
583 589 @property
584 590 def is_changelog(self):
585 591 return self.revlog_type & FILEFLAGS_CHANGELOG
586 592
587 593 @property
588 594 def is_manifestlog(self):
589 595 return self.revlog_type & FILEFLAGS_MANIFESTLOG
590 596
591 597 @property
592 598 def is_filelog(self):
593 599 return self.revlog_type & FILEFLAGS_FILELOG
594 600
595 601 def main_file_path(self):
596 602 """unencoded path of the main revlog file"""
597 603 return self._path_prefix + b'.i'
598 604
599 605 def files(self) -> List[StoreFile]:
600 606 if self._files is None:
601 607 self._files = []
602 608 for ext in sorted(self._details, key=_ext_key):
603 609 path = self._path_prefix + ext
604 610 data = self._details[ext]
605 611 self._files.append(StoreFile(unencoded_path=path, **data))
606 612 return self._files
607 613
614 def get_streams(
615 self,
616 repo=None,
617 vfs=None,
618 copies=None,
619 max_changeset=None,
620 ):
621 if repo is None or max_changeset is None:
622 return super().get_streams(
623 repo=repo,
624 vfs=vfs,
625 copies=copies,
626 max_changeset=max_changeset,
627 )
628 if any(k.endswith(b'.idx') for k in self._details.keys()):
629 # This use revlog-v2, ignore for now
630 return super().get_streams(
631 repo=repo,
632 vfs=vfs,
633 copies=copies,
634 max_changeset=max_changeset,
635 )
636 name_to_ext = {}
637 for ext in self._details.keys():
638 name_to_ext[self._path_prefix + ext] = ext
639 name_to_size = {}
640 for f in self.files():
641 name_to_size[f.unencoded_path] = f.file_size(None)
642 stream = [
643 f.get_stream(vfs, copies)
644 for f in self.files()
645 if name_to_ext[f.unencoded_path] not in (b'.d', b'.i')
646 ]
647
648 rl = self.get_revlog_instance(repo).get_revlog()
649 rl_stream = rl.get_streams(max_changeset)
650 for name, s, size in rl_stream:
651 if name_to_size.get(name, 0) != size:
652 msg = _(b"expected %d bytes but %d provided for %s")
653 msg %= name_to_size.get(name, 0), size, name
654 raise error.Abort(msg)
655 stream.extend(rl_stream)
656 files = self.files()
657 assert len(stream) == len(files), (
658 stream,
659 files,
660 self._path_prefix,
661 self.target_id,
662 )
663 return stream
664
608 665 def get_revlog_instance(self, repo):
609 666 """Obtain a revlog instance from this store entry
610 667
611 668 An instance of the appropriate class is returned.
612 669 """
613 670 if self.is_changelog:
614 671 return changelog.changelog(repo.svfs)
615 672 elif self.is_manifestlog:
616 673 mandir = self.target_id
617 674 return manifest.manifestrevlog(
618 675 repo.nodeconstants, repo.svfs, tree=mandir
619 676 )
620 677 else:
621 678 return filelog.filelog(repo.svfs, self.target_id)
622 679
623 680
624 681 def _gather_revlog(files_data):
625 682 """group files per revlog prefix
626 683
627 684 The returns a two level nested dict. The top level key is the revlog prefix
628 685 without extension, the second level is all the file "suffix" that were
629 686 seen for this revlog and arbitrary file data as value.
630 687 """
631 688 revlogs = collections.defaultdict(dict)
632 689 for u, value in files_data:
633 690 name, ext = _split_revlog_ext(u)
634 691 revlogs[name][ext] = value
635 692 return sorted(revlogs.items())
636 693
637 694
638 695 def _split_revlog_ext(filename):
639 696 """split the revlog file prefix from the variable extension"""
640 697 if filename.endswith(REVLOG_FILES_LONG_EXT):
641 698 char = b'-'
642 699 else:
643 700 char = b'.'
644 701 idx = filename.rfind(char)
645 702 return filename[:idx], filename[idx:]
646 703
647 704
648 705 def _ext_key(ext):
649 706 """a key to order revlog suffix
650 707
651 708 important to issue .i after other entry."""
652 709 # the only important part of this order is to keep the `.i` last.
653 710 if ext.endswith(b'.n'):
654 711 return (0, ext)
655 712 elif ext.endswith(b'.nd'):
656 713 return (10, ext)
657 714 elif ext.endswith(b'.d'):
658 715 return (20, ext)
659 716 elif ext.endswith(b'.i'):
660 717 return (50, ext)
661 718 else:
662 719 return (40, ext)
663 720
664 721
665 722 class basicstore:
666 723 '''base class for local repository stores'''
667 724
668 725 def __init__(self, path, vfstype):
669 726 vfs = vfstype(path)
670 727 self.path = vfs.base
671 728 self.createmode = _calcmode(vfs)
672 729 vfs.createmode = self.createmode
673 730 self.rawvfs = vfs
674 731 self.vfs = vfsmod.filtervfs(vfs, encodedir)
675 732 self.opener = self.vfs
676 733
677 734 def join(self, f):
678 735 return self.path + b'/' + encodedir(f)
679 736
680 737 def _walk(self, relpath, recurse, undecodable=None):
681 738 '''yields (revlog_type, unencoded, size)'''
682 739 path = self.path
683 740 if relpath:
684 741 path += b'/' + relpath
685 742 striplen = len(self.path) + 1
686 743 l = []
687 744 if self.rawvfs.isdir(path):
688 745 visit = [path]
689 746 readdir = self.rawvfs.readdir
690 747 while visit:
691 748 p = visit.pop()
692 749 for f, kind, st in readdir(p, stat=True):
693 750 fp = p + b'/' + f
694 751 rl_type = is_revlog(f, kind, st)
695 752 if rl_type is not None:
696 753 n = util.pconvert(fp[striplen:])
697 754 l.append((decodedir(n), (rl_type, st.st_size)))
698 755 elif kind == stat.S_IFDIR and recurse:
699 756 visit.append(fp)
700 757
701 758 l.sort()
702 759 return l
703 760
704 761 def changelog(self, trypending, concurrencychecker=None):
705 762 return changelog.changelog(
706 763 self.vfs,
707 764 trypending=trypending,
708 765 concurrencychecker=concurrencychecker,
709 766 )
710 767
711 768 def manifestlog(self, repo, storenarrowmatch):
712 769 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
713 770 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
714 771
715 772 def data_entries(
716 773 self, matcher=None, undecodable=None
717 774 ) -> Generator[BaseStoreEntry, None, None]:
718 775 """Like walk, but excluding the changelog and root manifest.
719 776
720 777 When [undecodable] is None, revlogs names that can't be
721 778 decoded cause an exception. When it is provided, it should
722 779 be a list and the filenames that can't be decoded are added
723 780 to it instead. This is very rarely needed."""
724 781 dirs = [
725 782 (b'data', FILEFLAGS_FILELOG, False),
726 783 (b'meta', FILEFLAGS_MANIFESTLOG, True),
727 784 ]
728 785 for base_dir, rl_type, strip_filename in dirs:
729 786 files = self._walk(base_dir, True, undecodable=undecodable)
730 787 files = (f for f in files if f[1][0] is not None)
731 788 for revlog, details in _gather_revlog(files):
732 789 file_details = {}
733 790 revlog_target_id = revlog.split(b'/', 1)[1]
734 791 if strip_filename and b'/' in revlog:
735 792 revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]
736 793 revlog_target_id += b'/'
737 794 for ext, (t, s) in sorted(details.items()):
738 795 file_details[ext] = {
739 796 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
740 797 'file_size': s,
741 798 }
742 799 yield RevlogStoreEntry(
743 800 path_prefix=revlog,
744 801 revlog_type=rl_type,
745 802 target_id=revlog_target_id,
746 803 details=file_details,
747 804 )
748 805
749 806 def top_entries(
750 807 self, phase=False, obsolescence=False
751 808 ) -> Generator[BaseStoreEntry, None, None]:
752 809 if phase and self.vfs.exists(b'phaseroots'):
753 810 yield SimpleStoreEntry(
754 811 entry_path=b'phaseroots',
755 812 is_volatile=True,
756 813 )
757 814
758 815 if obsolescence and self.vfs.exists(b'obsstore'):
759 816 # XXX if we had the file size it could be non-volatile
760 817 yield SimpleStoreEntry(
761 818 entry_path=b'obsstore',
762 819 is_volatile=True,
763 820 )
764 821
765 822 files = reversed(self._walk(b'', False))
766 823
767 824 changelogs = collections.defaultdict(dict)
768 825 manifestlogs = collections.defaultdict(dict)
769 826
770 827 for u, (t, s) in files:
771 828 if u.startswith(b'00changelog'):
772 829 name, ext = _split_revlog_ext(u)
773 830 changelogs[name][ext] = (t, s)
774 831 elif u.startswith(b'00manifest'):
775 832 name, ext = _split_revlog_ext(u)
776 833 manifestlogs[name][ext] = (t, s)
777 834 else:
778 835 yield SimpleStoreEntry(
779 836 entry_path=u,
780 837 is_volatile=bool(t & FILEFLAGS_VOLATILE),
781 838 file_size=s,
782 839 )
783 840 # yield manifest before changelog
784 841 top_rl = [
785 842 (manifestlogs, FILEFLAGS_MANIFESTLOG),
786 843 (changelogs, FILEFLAGS_CHANGELOG),
787 844 ]
788 845 assert len(manifestlogs) <= 1
789 846 assert len(changelogs) <= 1
790 847 for data, revlog_type in top_rl:
791 848 for revlog, details in sorted(data.items()):
792 849 file_details = {}
793 850 for ext, (t, s) in details.items():
794 851 file_details[ext] = {
795 852 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
796 853 'file_size': s,
797 854 }
798 855 yield RevlogStoreEntry(
799 856 path_prefix=revlog,
800 857 revlog_type=revlog_type,
801 858 target_id=b'',
802 859 details=file_details,
803 860 )
804 861
805 862 def walk(
806 863 self, matcher=None, phase=False, obsolescence=False
807 864 ) -> Generator[BaseStoreEntry, None, None]:
808 865 """return files related to data storage (ie: revlogs)
809 866
810 867 yields instance from BaseStoreEntry subclasses
811 868
812 869 if a matcher is passed, storage files of only those tracked paths
813 870 are passed with matches the matcher
814 871 """
815 872 # yield data files first
816 873 for x in self.data_entries(matcher):
817 874 yield x
818 875 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
819 876 yield x
820 877
821 878 def copylist(self):
822 879 return _data
823 880
824 881 def write(self, tr):
825 882 pass
826 883
827 884 def invalidatecaches(self):
828 885 pass
829 886
830 887 def markremoved(self, fn):
831 888 pass
832 889
833 890 def __contains__(self, path):
834 891 '''Checks if the store contains path'''
835 892 path = b"/".join((b"data", path))
836 893 # file?
837 894 if self.vfs.exists(path + b".i"):
838 895 return True
839 896 # dir?
840 897 if not path.endswith(b"/"):
841 898 path = path + b"/"
842 899 return self.vfs.exists(path)
843 900
844 901
845 902 class encodedstore(basicstore):
846 903 def __init__(self, path, vfstype):
847 904 vfs = vfstype(path + b'/store')
848 905 self.path = vfs.base
849 906 self.createmode = _calcmode(vfs)
850 907 vfs.createmode = self.createmode
851 908 self.rawvfs = vfs
852 909 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
853 910 self.opener = self.vfs
854 911
855 912 def _walk(self, relpath, recurse, undecodable=None):
856 913 old = super()._walk(relpath, recurse)
857 914 new = []
858 915 for f1, value in old:
859 916 try:
860 917 f2 = decodefilename(f1)
861 918 except KeyError:
862 919 if undecodable is None:
863 920 msg = _(b'undecodable revlog name %s') % f1
864 921 raise error.StorageError(msg)
865 922 else:
866 923 undecodable.append(f1)
867 924 continue
868 925 new.append((f2, value))
869 926 return new
870 927
871 928 def data_entries(
872 929 self, matcher=None, undecodable=None
873 930 ) -> Generator[BaseStoreEntry, None, None]:
874 931 entries = super(encodedstore, self).data_entries(
875 932 undecodable=undecodable
876 933 )
877 934 for entry in entries:
878 935 if _match_tracked_entry(entry, matcher):
879 936 yield entry
880 937
881 938 def join(self, f):
882 939 return self.path + b'/' + encodefilename(f)
883 940
884 941 def copylist(self):
885 942 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
886 943
887 944
888 945 class fncache:
889 946 # the filename used to be partially encoded
890 947 # hence the encodedir/decodedir dance
891 948 def __init__(self, vfs):
892 949 self.vfs = vfs
893 950 self._ignores = set()
894 951 self.entries = None
895 952 self._dirty = False
896 953 # set of new additions to fncache
897 954 self.addls = set()
898 955
899 956 def ensureloaded(self, warn=None):
900 957 """read the fncache file if not already read.
901 958
902 959 If the file on disk is corrupted, raise. If warn is provided,
903 960 warn and keep going instead."""
904 961 if self.entries is None:
905 962 self._load(warn)
906 963
907 964 def _load(self, warn=None):
908 965 '''fill the entries from the fncache file'''
909 966 self._dirty = False
910 967 try:
911 968 fp = self.vfs(b'fncache', mode=b'rb')
912 969 except IOError:
913 970 # skip nonexistent file
914 971 self.entries = set()
915 972 return
916 973
917 974 self.entries = set()
918 975 chunk = b''
919 976 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
920 977 chunk += c
921 978 try:
922 979 p = chunk.rindex(b'\n')
923 980 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
924 981 chunk = chunk[p + 1 :]
925 982 except ValueError:
926 983 # substring '\n' not found, maybe the entry is bigger than the
927 984 # chunksize, so let's keep iterating
928 985 pass
929 986
930 987 if chunk:
931 988 msg = _(b"fncache does not ends with a newline")
932 989 if warn:
933 990 warn(msg + b'\n')
934 991 else:
935 992 raise error.Abort(
936 993 msg,
937 994 hint=_(
938 995 b"use 'hg debugrebuildfncache' to "
939 996 b"rebuild the fncache"
940 997 ),
941 998 )
942 999 self._checkentries(fp, warn)
943 1000 fp.close()
944 1001
945 1002 def _checkentries(self, fp, warn):
946 1003 """make sure there is no empty string in entries"""
947 1004 if b'' in self.entries:
948 1005 fp.seek(0)
949 1006 for n, line in enumerate(fp):
950 1007 if not line.rstrip(b'\n'):
951 1008 t = _(b'invalid entry in fncache, line %d') % (n + 1)
952 1009 if warn:
953 1010 warn(t + b'\n')
954 1011 else:
955 1012 raise error.Abort(t)
956 1013
957 1014 def write(self, tr):
958 1015 if self._dirty:
959 1016 assert self.entries is not None
960 1017 self.entries = self.entries | self.addls
961 1018 self.addls = set()
962 1019 tr.addbackup(b'fncache')
963 1020 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
964 1021 if self.entries:
965 1022 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
966 1023 fp.close()
967 1024 self._dirty = False
968 1025 if self.addls:
969 1026 # if we have just new entries, let's append them to the fncache
970 1027 tr.addbackup(b'fncache')
971 1028 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
972 1029 if self.addls:
973 1030 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
974 1031 fp.close()
975 1032 self.entries = None
976 1033 self.addls = set()
977 1034
978 1035 def addignore(self, fn):
979 1036 self._ignores.add(fn)
980 1037
981 1038 def add(self, fn):
982 1039 if fn in self._ignores:
983 1040 return
984 1041 if self.entries is None:
985 1042 self._load()
986 1043 if fn not in self.entries:
987 1044 self.addls.add(fn)
988 1045
989 1046 def remove(self, fn):
990 1047 if self.entries is None:
991 1048 self._load()
992 1049 if fn in self.addls:
993 1050 self.addls.remove(fn)
994 1051 return
995 1052 try:
996 1053 self.entries.remove(fn)
997 1054 self._dirty = True
998 1055 except KeyError:
999 1056 pass
1000 1057
1001 1058 def __contains__(self, fn):
1002 1059 if fn in self.addls:
1003 1060 return True
1004 1061 if self.entries is None:
1005 1062 self._load()
1006 1063 return fn in self.entries
1007 1064
1008 1065 def __iter__(self):
1009 1066 if self.entries is None:
1010 1067 self._load()
1011 1068 return iter(self.entries | self.addls)
1012 1069
1013 1070
1014 1071 class _fncachevfs(vfsmod.proxyvfs):
1015 1072 def __init__(self, vfs, fnc, encode):
1016 1073 vfsmod.proxyvfs.__init__(self, vfs)
1017 1074 self.fncache = fnc
1018 1075 self.encode = encode
1019 1076
1020 1077 def __call__(self, path, mode=b'r', *args, **kw):
1021 1078 encoded = self.encode(path)
1022 1079 if (
1023 1080 mode not in (b'r', b'rb')
1024 1081 and (path.startswith(b'data/') or path.startswith(b'meta/'))
1025 1082 and revlog_type(path) is not None
1026 1083 ):
1027 1084 # do not trigger a fncache load when adding a file that already is
1028 1085 # known to exist.
1029 1086 notload = self.fncache.entries is None and self.vfs.exists(encoded)
1030 1087 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
1031 1088 # when appending to an existing file, if the file has size zero,
1032 1089 # it should be considered as missing. Such zero-size files are
1033 1090 # the result of truncation when a transaction is aborted.
1034 1091 notload = False
1035 1092 if not notload:
1036 1093 self.fncache.add(path)
1037 1094 return self.vfs(encoded, mode, *args, **kw)
1038 1095
1039 1096 def join(self, path):
1040 1097 if path:
1041 1098 return self.vfs.join(self.encode(path))
1042 1099 else:
1043 1100 return self.vfs.join(path)
1044 1101
1045 1102 def register_file(self, path):
1046 1103 """generic hook point to lets fncache steer its stew"""
1047 1104 if path.startswith(b'data/') or path.startswith(b'meta/'):
1048 1105 self.fncache.add(path)
1049 1106
1050 1107
1051 1108 class fncachestore(basicstore):
1052 1109 def __init__(self, path, vfstype, dotencode):
1053 1110 if dotencode:
1054 1111 encode = _pathencode
1055 1112 else:
1056 1113 encode = _plainhybridencode
1057 1114 self.encode = encode
1058 1115 vfs = vfstype(path + b'/store')
1059 1116 self.path = vfs.base
1060 1117 self.pathsep = self.path + b'/'
1061 1118 self.createmode = _calcmode(vfs)
1062 1119 vfs.createmode = self.createmode
1063 1120 self.rawvfs = vfs
1064 1121 fnc = fncache(vfs)
1065 1122 self.fncache = fnc
1066 1123 self.vfs = _fncachevfs(vfs, fnc, encode)
1067 1124 self.opener = self.vfs
1068 1125
1069 1126 def join(self, f):
1070 1127 return self.pathsep + self.encode(f)
1071 1128
1072 1129 def getsize(self, path):
1073 1130 return self.rawvfs.stat(path).st_size
1074 1131
1075 1132 def data_entries(
1076 1133 self, matcher=None, undecodable=None
1077 1134 ) -> Generator[BaseStoreEntry, None, None]:
1078 1135 files = ((f, revlog_type(f)) for f in self.fncache)
1079 1136 # Note: all files in fncache should be revlog related, However the
1080 1137 # fncache might contains such file added by previous version of
1081 1138 # Mercurial.
1082 1139 files = (f for f in files if f[1] is not None)
1083 1140 by_revlog = _gather_revlog(files)
1084 1141 for revlog, details in by_revlog:
1085 1142 file_details = {}
1086 1143 if revlog.startswith(b'data/'):
1087 1144 rl_type = FILEFLAGS_FILELOG
1088 1145 revlog_target_id = revlog.split(b'/', 1)[1]
1089 1146 elif revlog.startswith(b'meta/'):
1090 1147 rl_type = FILEFLAGS_MANIFESTLOG
1091 1148 # drop the initial directory and the `00manifest` file part
1092 1149 tmp = revlog.split(b'/', 1)[1]
1093 1150 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1094 1151 else:
1095 1152 # unreachable
1096 1153 assert False, revlog
1097 1154 for ext, t in details.items():
1098 1155 file_details[ext] = {
1099 1156 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
1100 1157 }
1101 1158 entry = RevlogStoreEntry(
1102 1159 path_prefix=revlog,
1103 1160 revlog_type=rl_type,
1104 1161 target_id=revlog_target_id,
1105 1162 details=file_details,
1106 1163 )
1107 1164 if _match_tracked_entry(entry, matcher):
1108 1165 yield entry
1109 1166
1110 1167 def copylist(self):
1111 1168 d = (
1112 1169 b'bookmarks',
1113 1170 b'narrowspec',
1114 1171 b'data',
1115 1172 b'meta',
1116 1173 b'dh',
1117 1174 b'fncache',
1118 1175 b'phaseroots',
1119 1176 b'obsstore',
1120 1177 b'00manifest.d',
1121 1178 b'00manifest.i',
1122 1179 b'00changelog.d',
1123 1180 b'00changelog.i',
1124 1181 b'requires',
1125 1182 )
1126 1183 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1127 1184
1128 1185 def write(self, tr):
1129 1186 self.fncache.write(tr)
1130 1187
1131 1188 def invalidatecaches(self):
1132 1189 self.fncache.entries = None
1133 1190 self.fncache.addls = set()
1134 1191
1135 1192 def markremoved(self, fn):
1136 1193 self.fncache.remove(fn)
1137 1194
1138 1195 def _exists(self, f):
1139 1196 ef = self.encode(f)
1140 1197 try:
1141 1198 self.getsize(ef)
1142 1199 return True
1143 1200 except FileNotFoundError:
1144 1201 return False
1145 1202
1146 1203 def __contains__(self, path):
1147 1204 '''Checks if the store contains path'''
1148 1205 path = b"/".join((b"data", path))
1149 1206 # check for files (exact match)
1150 1207 e = path + b'.i'
1151 1208 if e in self.fncache and self._exists(e):
1152 1209 return True
1153 1210 # now check for directories (prefix match)
1154 1211 if not path.endswith(b'/'):
1155 1212 path += b'/'
1156 1213 for e in self.fncache:
1157 1214 if e.startswith(path) and self._exists(e):
1158 1215 return True
1159 1216 return False
@@ -1,966 +1,970 b''
1 1 # streamclone.py - producing and consuming streaming repository data
2 2 #
3 3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import contextlib
10 10 import os
11 11 import struct
12 12
13 13 from .i18n import _
14 14 from .interfaces import repository
15 15 from . import (
16 16 bookmarks,
17 17 bundle2 as bundle2mod,
18 18 cacheutil,
19 19 error,
20 20 narrowspec,
21 21 phases,
22 22 pycompat,
23 23 requirements as requirementsmod,
24 24 scmutil,
25 25 store,
26 26 transaction,
27 27 util,
28 28 )
29 29 from .revlogutils import (
30 30 nodemap,
31 31 )
32 32
33 33
34 34 def new_stream_clone_requirements(default_requirements, streamed_requirements):
35 35 """determine the final set of requirement for a new stream clone
36 36
37 37 this method combine the "default" requirements that a new repository would
38 38 use with the constaint we get from the stream clone content. We keep local
39 39 configuration choice when possible.
40 40 """
41 41 requirements = set(default_requirements)
42 42 requirements -= requirementsmod.STREAM_FIXED_REQUIREMENTS
43 43 requirements.update(streamed_requirements)
44 44 return requirements
45 45
46 46
47 47 def streamed_requirements(repo):
48 48 """the set of requirement the new clone will have to support
49 49
50 50 This is used for advertising the stream options and to generate the actual
51 51 stream content."""
52 52 requiredformats = (
53 53 repo.requirements & requirementsmod.STREAM_FIXED_REQUIREMENTS
54 54 )
55 55 return requiredformats
56 56
57 57
58 58 def canperformstreamclone(pullop, bundle2=False):
59 59 """Whether it is possible to perform a streaming clone as part of pull.
60 60
61 61 ``bundle2`` will cause the function to consider stream clone through
62 62 bundle2 and only through bundle2.
63 63
64 64 Returns a tuple of (supported, requirements). ``supported`` is True if
65 65 streaming clone is supported and False otherwise. ``requirements`` is
66 66 a set of repo requirements from the remote, or ``None`` if stream clone
67 67 isn't supported.
68 68 """
69 69 repo = pullop.repo
70 70 remote = pullop.remote
71 71
72 72 # should we consider streaming clone at all ?
73 73 streamrequested = pullop.streamclonerequested
74 74 # If we don't have a preference, let the server decide for us. This
75 75 # likely only comes into play in LANs.
76 76 if streamrequested is None:
77 77 # The server can advertise whether to prefer streaming clone.
78 78 streamrequested = remote.capable(b'stream-preferred')
79 79 if not streamrequested:
80 80 return False, None
81 81
82 82 # Streaming clone only works on an empty destination repository
83 83 if len(repo):
84 84 return False, None
85 85
86 86 # Streaming clone only works if all data is being requested.
87 87 if pullop.heads:
88 88 return False, None
89 89
90 90 bundle2supported = False
91 91 if pullop.canusebundle2:
92 92 local_caps = bundle2mod.getrepocaps(repo, role=b'client')
93 93 local_supported = set(local_caps.get(b'stream', []))
94 94 remote_supported = set(pullop.remotebundle2caps.get(b'stream', []))
95 95 bundle2supported = bool(local_supported & remote_supported)
96 96 # else
97 97 # Server doesn't support bundle2 stream clone or doesn't support
98 98 # the versions we support. Fall back and possibly allow legacy.
99 99
100 100 # Ensures legacy code path uses available bundle2.
101 101 if bundle2supported and not bundle2:
102 102 return False, None
103 103 # Ensures bundle2 doesn't try to do a stream clone if it isn't supported.
104 104 elif bundle2 and not bundle2supported:
105 105 return False, None
106 106
107 107 # In order for stream clone to work, the client has to support all the
108 108 # requirements advertised by the server.
109 109 #
110 110 # The server advertises its requirements via the "stream" and "streamreqs"
111 111 # capability. "stream" (a value-less capability) is advertised if and only
112 112 # if the only requirement is "revlogv1." Else, the "streamreqs" capability
113 113 # is advertised and contains a comma-delimited list of requirements.
114 114 requirements = set()
115 115 if remote.capable(b'stream'):
116 116 requirements.add(requirementsmod.REVLOGV1_REQUIREMENT)
117 117 else:
118 118 streamreqs = remote.capable(b'streamreqs')
119 119 # This is weird and shouldn't happen with modern servers.
120 120 if not streamreqs:
121 121 pullop.repo.ui.warn(
122 122 _(
123 123 b'warning: stream clone requested but server has them '
124 124 b'disabled\n'
125 125 )
126 126 )
127 127 return False, None
128 128
129 129 streamreqs = set(streamreqs.split(b','))
130 130 # Server requires something we don't support. Bail.
131 131 missingreqs = streamreqs - repo.supported
132 132 if missingreqs:
133 133 pullop.repo.ui.warn(
134 134 _(
135 135 b'warning: stream clone requested but client is missing '
136 136 b'requirements: %s\n'
137 137 )
138 138 % b', '.join(sorted(missingreqs))
139 139 )
140 140 pullop.repo.ui.warn(
141 141 _(
142 142 b'(see https://www.mercurial-scm.org/wiki/MissingRequirement '
143 143 b'for more information)\n'
144 144 )
145 145 )
146 146 return False, None
147 147 requirements = streamreqs
148 148
149 149 return True, requirements
150 150
151 151
152 152 def maybeperformlegacystreamclone(pullop):
153 153 """Possibly perform a legacy stream clone operation.
154 154
155 155 Legacy stream clones are performed as part of pull but before all other
156 156 operations.
157 157
158 158 A legacy stream clone will not be performed if a bundle2 stream clone is
159 159 supported.
160 160 """
161 161 from . import localrepo
162 162
163 163 supported, requirements = canperformstreamclone(pullop)
164 164
165 165 if not supported:
166 166 return
167 167
168 168 repo = pullop.repo
169 169 remote = pullop.remote
170 170
171 171 # Save remote branchmap. We will use it later to speed up branchcache
172 172 # creation.
173 173 rbranchmap = None
174 174 if remote.capable(b'branchmap'):
175 175 with remote.commandexecutor() as e:
176 176 rbranchmap = e.callcommand(b'branchmap', {}).result()
177 177
178 178 repo.ui.status(_(b'streaming all changes\n'))
179 179
180 180 with remote.commandexecutor() as e:
181 181 fp = e.callcommand(b'stream_out', {}).result()
182 182
183 183 # TODO strictly speaking, this code should all be inside the context
184 184 # manager because the context manager is supposed to ensure all wire state
185 185 # is flushed when exiting. But the legacy peers don't do this, so it
186 186 # doesn't matter.
187 187 l = fp.readline()
188 188 try:
189 189 resp = int(l)
190 190 except ValueError:
191 191 raise error.ResponseError(
192 192 _(b'unexpected response from remote server:'), l
193 193 )
194 194 if resp == 1:
195 195 raise error.Abort(_(b'operation forbidden by server'))
196 196 elif resp == 2:
197 197 raise error.Abort(_(b'locking the remote repository failed'))
198 198 elif resp != 0:
199 199 raise error.Abort(_(b'the server sent an unknown error code'))
200 200
201 201 l = fp.readline()
202 202 try:
203 203 filecount, bytecount = map(int, l.split(b' ', 1))
204 204 except (ValueError, TypeError):
205 205 raise error.ResponseError(
206 206 _(b'unexpected response from remote server:'), l
207 207 )
208 208
209 209 with repo.lock():
210 210 consumev1(repo, fp, filecount, bytecount)
211 211 repo.requirements = new_stream_clone_requirements(
212 212 repo.requirements,
213 213 requirements,
214 214 )
215 215 repo.svfs.options = localrepo.resolvestorevfsoptions(
216 216 repo.ui, repo.requirements, repo.features
217 217 )
218 218 scmutil.writereporequirements(repo)
219 219 nodemap.post_stream_cleanup(repo)
220 220
221 221 if rbranchmap:
222 222 repo._branchcaches.replace(repo, rbranchmap)
223 223
224 224 repo.invalidate()
225 225
226 226
227 227 def allowservergeneration(repo):
228 228 """Whether streaming clones are allowed from the server."""
229 229 if repository.REPO_FEATURE_STREAM_CLONE not in repo.features:
230 230 return False
231 231
232 232 if not repo.ui.configbool(b'server', b'uncompressed', untrusted=True):
233 233 return False
234 234
235 235 # The way stream clone works makes it impossible to hide secret changesets.
236 236 # So don't allow this by default.
237 237 secret = phases.hassecret(repo)
238 238 if secret:
239 239 return repo.ui.configbool(b'server', b'uncompressedallowsecret')
240 240
241 241 return True
242 242
243 243
244 244 # This is it's own function so extensions can override it.
245 245 def _walkstreamfiles(repo, matcher=None, phase=False, obsolescence=False):
246 246 return repo.store.walk(matcher, phase=phase, obsolescence=obsolescence)
247 247
248 248
249 249 def generatev1(repo):
250 250 """Emit content for version 1 of a streaming clone.
251 251
252 252 This returns a 3-tuple of (file count, byte size, data iterator).
253 253
254 254 The data iterator consists of N entries for each file being transferred.
255 255 Each file entry starts as a line with the file name and integer size
256 256 delimited by a null byte.
257 257
258 258 The raw file data follows. Following the raw file data is the next file
259 259 entry, or EOF.
260 260
261 261 When used on the wire protocol, an additional line indicating protocol
262 262 success will be prepended to the stream. This function is not responsible
263 263 for adding it.
264 264
265 265 This function will obtain a repository lock to ensure a consistent view of
266 266 the store is captured. It therefore may raise LockError.
267 267 """
268 268 entries = []
269 269 total_bytes = 0
270 270 # Get consistent snapshot of repo, lock during scan.
271 271 with repo.lock():
272 272 repo.ui.debug(b'scanning\n')
273 273 for entry in _walkstreamfiles(repo):
274 274 for f in entry.files():
275 275 file_size = f.file_size(repo.store.vfs)
276 276 if file_size:
277 277 entries.append((f.unencoded_path, file_size))
278 278 total_bytes += file_size
279 279 _test_sync_point_walk_1(repo)
280 280 _test_sync_point_walk_2(repo)
281 281
282 282 repo.ui.debug(
283 283 b'%d files, %d bytes to transfer\n' % (len(entries), total_bytes)
284 284 )
285 285
286 286 svfs = repo.svfs
287 287 debugflag = repo.ui.debugflag
288 288
289 289 def emitrevlogdata():
290 290 for name, size in entries:
291 291 if debugflag:
292 292 repo.ui.debug(b'sending %s (%d bytes)\n' % (name, size))
293 293 # partially encode name over the wire for backwards compat
294 294 yield b'%s\0%d\n' % (store.encodedir(name), size)
295 295 # auditing at this stage is both pointless (paths are already
296 296 # trusted by the local repo) and expensive
297 297 with svfs(name, b'rb', auditpath=False) as fp:
298 298 if size <= 65536:
299 299 yield fp.read(size)
300 300 else:
301 301 for chunk in util.filechunkiter(fp, limit=size):
302 302 yield chunk
303 303
304 304 return len(entries), total_bytes, emitrevlogdata()
305 305
306 306
307 307 def generatev1wireproto(repo):
308 308 """Emit content for version 1 of streaming clone suitable for the wire.
309 309
310 310 This is the data output from ``generatev1()`` with 2 header lines. The
311 311 first line indicates overall success. The 2nd contains the file count and
312 312 byte size of payload.
313 313
314 314 The success line contains "0" for success, "1" for stream generation not
315 315 allowed, and "2" for error locking the repository (possibly indicating
316 316 a permissions error for the server process).
317 317 """
318 318 if not allowservergeneration(repo):
319 319 yield b'1\n'
320 320 return
321 321
322 322 try:
323 323 filecount, bytecount, it = generatev1(repo)
324 324 except error.LockError:
325 325 yield b'2\n'
326 326 return
327 327
328 328 # Indicates successful response.
329 329 yield b'0\n'
330 330 yield b'%d %d\n' % (filecount, bytecount)
331 331 for chunk in it:
332 332 yield chunk
333 333
334 334
335 335 def generatebundlev1(repo, compression=b'UN'):
336 336 """Emit content for version 1 of a stream clone bundle.
337 337
338 338 The first 4 bytes of the output ("HGS1") denote this as stream clone
339 339 bundle version 1.
340 340
341 341 The next 2 bytes indicate the compression type. Only "UN" is currently
342 342 supported.
343 343
344 344 The next 16 bytes are two 64-bit big endian unsigned integers indicating
345 345 file count and byte count, respectively.
346 346
347 347 The next 2 bytes is a 16-bit big endian unsigned short declaring the length
348 348 of the requirements string, including a trailing \0. The following N bytes
349 349 are the requirements string, which is ASCII containing a comma-delimited
350 350 list of repo requirements that are needed to support the data.
351 351
352 352 The remaining content is the output of ``generatev1()`` (which may be
353 353 compressed in the future).
354 354
355 355 Returns a tuple of (requirements, data generator).
356 356 """
357 357 if compression != b'UN':
358 358 raise ValueError(b'we do not support the compression argument yet')
359 359
360 360 requirements = streamed_requirements(repo)
361 361 requires = b','.join(sorted(requirements))
362 362
363 363 def gen():
364 364 yield b'HGS1'
365 365 yield compression
366 366
367 367 filecount, bytecount, it = generatev1(repo)
368 368 repo.ui.status(
369 369 _(b'writing %d bytes for %d files\n') % (bytecount, filecount)
370 370 )
371 371
372 372 yield struct.pack(b'>QQ', filecount, bytecount)
373 373 yield struct.pack(b'>H', len(requires) + 1)
374 374 yield requires + b'\0'
375 375
376 376 # This is where we'll add compression in the future.
377 377 assert compression == b'UN'
378 378
379 379 progress = repo.ui.makeprogress(
380 380 _(b'bundle'), total=bytecount, unit=_(b'bytes')
381 381 )
382 382 progress.update(0)
383 383
384 384 for chunk in it:
385 385 progress.increment(step=len(chunk))
386 386 yield chunk
387 387
388 388 progress.complete()
389 389
390 390 return requirements, gen()
391 391
392 392
393 393 def consumev1(repo, fp, filecount, bytecount):
394 394 """Apply the contents from version 1 of a streaming clone file handle.
395 395
396 396 This takes the output from "stream_out" and applies it to the specified
397 397 repository.
398 398
399 399 Like "stream_out," the status line added by the wire protocol is not
400 400 handled by this function.
401 401 """
402 402 with repo.lock():
403 403 repo.ui.status(
404 404 _(b'%d files to transfer, %s of data\n')
405 405 % (filecount, util.bytecount(bytecount))
406 406 )
407 407 progress = repo.ui.makeprogress(
408 408 _(b'clone'), total=bytecount, unit=_(b'bytes')
409 409 )
410 410 progress.update(0)
411 411 start = util.timer()
412 412
413 413 # TODO: get rid of (potential) inconsistency
414 414 #
415 415 # If transaction is started and any @filecache property is
416 416 # changed at this point, it causes inconsistency between
417 417 # in-memory cached property and streamclone-ed file on the
418 418 # disk. Nested transaction prevents transaction scope "clone"
419 419 # below from writing in-memory changes out at the end of it,
420 420 # even though in-memory changes are discarded at the end of it
421 421 # regardless of transaction nesting.
422 422 #
423 423 # But transaction nesting can't be simply prohibited, because
424 424 # nesting occurs also in ordinary case (e.g. enabling
425 425 # clonebundles).
426 426
427 427 with repo.transaction(b'clone'):
428 428 with repo.svfs.backgroundclosing(repo.ui, expectedcount=filecount):
429 429 for i in range(filecount):
430 430 # XXX doesn't support '\n' or '\r' in filenames
431 431 l = fp.readline()
432 432 try:
433 433 name, size = l.split(b'\0', 1)
434 434 size = int(size)
435 435 except (ValueError, TypeError):
436 436 raise error.ResponseError(
437 437 _(b'unexpected response from remote server:'), l
438 438 )
439 439 if repo.ui.debugflag:
440 440 repo.ui.debug(
441 441 b'adding %s (%s)\n' % (name, util.bytecount(size))
442 442 )
443 443 # for backwards compat, name was partially encoded
444 444 path = store.decodedir(name)
445 445 with repo.svfs(path, b'w', backgroundclose=True) as ofp:
446 446 for chunk in util.filechunkiter(fp, limit=size):
447 447 progress.increment(step=len(chunk))
448 448 ofp.write(chunk)
449 449
450 450 # force @filecache properties to be reloaded from
451 451 # streamclone-ed file at next access
452 452 repo.invalidate(clearfilecache=True)
453 453
454 454 elapsed = util.timer() - start
455 455 if elapsed <= 0:
456 456 elapsed = 0.001
457 457 progress.complete()
458 458 repo.ui.status(
459 459 _(b'transferred %s in %.1f seconds (%s/sec)\n')
460 460 % (
461 461 util.bytecount(bytecount),
462 462 elapsed,
463 463 util.bytecount(bytecount / elapsed),
464 464 )
465 465 )
466 466
467 467
468 468 def readbundle1header(fp):
469 469 compression = fp.read(2)
470 470 if compression != b'UN':
471 471 raise error.Abort(
472 472 _(
473 473 b'only uncompressed stream clone bundles are '
474 474 b'supported; got %s'
475 475 )
476 476 % compression
477 477 )
478 478
479 479 filecount, bytecount = struct.unpack(b'>QQ', fp.read(16))
480 480 requireslen = struct.unpack(b'>H', fp.read(2))[0]
481 481 requires = fp.read(requireslen)
482 482
483 483 if not requires.endswith(b'\0'):
484 484 raise error.Abort(
485 485 _(
486 486 b'malformed stream clone bundle: '
487 487 b'requirements not properly encoded'
488 488 )
489 489 )
490 490
491 491 requirements = set(requires.rstrip(b'\0').split(b','))
492 492
493 493 return filecount, bytecount, requirements
494 494
495 495
496 496 def applybundlev1(repo, fp):
497 497 """Apply the content from a stream clone bundle version 1.
498 498
499 499 We assume the 4 byte header has been read and validated and the file handle
500 500 is at the 2 byte compression identifier.
501 501 """
502 502 if len(repo):
503 503 raise error.Abort(
504 504 _(b'cannot apply stream clone bundle on non-empty repo')
505 505 )
506 506
507 507 filecount, bytecount, requirements = readbundle1header(fp)
508 508 missingreqs = requirements - repo.supported
509 509 if missingreqs:
510 510 raise error.Abort(
511 511 _(b'unable to apply stream clone: unsupported format: %s')
512 512 % b', '.join(sorted(missingreqs))
513 513 )
514 514
515 515 consumev1(repo, fp, filecount, bytecount)
516 516 nodemap.post_stream_cleanup(repo)
517 517
518 518
519 519 class streamcloneapplier:
520 520 """Class to manage applying streaming clone bundles.
521 521
522 522 We need to wrap ``applybundlev1()`` in a dedicated type to enable bundle
523 523 readers to perform bundle type-specific functionality.
524 524 """
525 525
526 526 def __init__(self, fh):
527 527 self._fh = fh
528 528
529 529 def apply(self, repo):
530 530 return applybundlev1(repo, self._fh)
531 531
532 532
533 533 # type of file to stream
534 534 _fileappend = 0 # append only file
535 535 _filefull = 1 # full snapshot file
536 536
537 537 # Source of the file
538 538 _srcstore = b's' # store (svfs)
539 539 _srccache = b'c' # cache (cache)
540 540
541 541 # This is it's own function so extensions can override it.
542 542 def _walkstreamfullstorefiles(repo):
543 543 """list snapshot file from the store"""
544 544 fnames = []
545 545 if not repo.publishing():
546 546 fnames.append(b'phaseroots')
547 547 return fnames
548 548
549 549
550 550 def _filterfull(entry, copy, vfsmap):
551 551 """actually copy the snapshot files"""
552 552 src, name, ftype, data = entry
553 553 if ftype != _filefull:
554 554 return entry
555 555 return (src, name, ftype, copy(vfsmap[src].join(name)))
556 556
557 557
558 558 class TempCopyManager:
559 559 """Manage temporary backup of volatile file during stream clone
560 560
561 561 This should be used as a Python context, the copies will be discarded when
562 562 exiting the context.
563 563
564 564 A copy can be done by calling the object on the real path (encoded full
565 565 path)
566 566
567 567 The backup path can be retrieved using the __getitem__ protocol, obj[path].
568 568 On file without backup, it will return the unmodified path. (equivalent to
569 569 `dict.get(x, x)`)
570 570 """
571 571
572 572 def __init__(self):
573 573 self._copies = None
574 574 self._dst_dir = None
575 575
576 576 def __enter__(self):
577 577 if self._copies is not None:
578 578 msg = "Copies context already open"
579 579 raise error.ProgrammingError(msg)
580 580 self._copies = {}
581 581 self._dst_dir = pycompat.mkdtemp(prefix=b'hg-clone-')
582 582 return self
583 583
584 584 def __call__(self, src):
585 585 """create a backup of the file at src"""
586 586 prefix = os.path.basename(src)
587 587 fd, dst = pycompat.mkstemp(prefix=prefix, dir=self._dst_dir)
588 588 os.close(fd)
589 589 self._copies[src] = dst
590 590 util.copyfiles(src, dst, hardlink=True)
591 591 return dst
592 592
593 593 def __getitem__(self, src):
594 594 """return the path to a valid version of `src`
595 595
596 596 If the file has no backup, the path of the file is returned
597 597 unmodified."""
598 598 return self._copies.get(src, src)
599 599
600 600 def __exit__(self, *args, **kwars):
601 601 """discard all backups"""
602 602 for tmp in self._copies.values():
603 603 util.tryunlink(tmp)
604 604 util.tryrmdir(self._dst_dir)
605 605 self._copies = None
606 606 self._dst_dir = None
607 607
608 608
609 609 def _makemap(repo):
610 610 """make a (src -> vfs) map for the repo"""
611 611 vfsmap = {
612 612 _srcstore: repo.svfs,
613 613 _srccache: repo.cachevfs,
614 614 }
615 615 # we keep repo.vfs out of the on purpose, ther are too many danger there
616 616 # (eg: .hg/hgrc)
617 617 assert repo.vfs not in vfsmap.values()
618 618
619 619 return vfsmap
620 620
621 621
622 622 def _emit2(repo, entries):
623 623 """actually emit the stream bundle"""
624 624 vfsmap = _makemap(repo)
625 625 # we keep repo.vfs out of the on purpose, ther are too many danger there
626 626 # (eg: .hg/hgrc),
627 627 #
628 628 # this assert is duplicated (from _makemap) as author might think this is
629 629 # fine, while this is really not fine.
630 630 if repo.vfs in vfsmap.values():
631 631 raise error.ProgrammingError(
632 632 b'repo.vfs must not be added to vfsmap for security reasons'
633 633 )
634 634
635 635 # translate the vfs one
636 636 entries = [(vfs_key, vfsmap[vfs_key], e) for (vfs_key, e) in entries]
637 637
638 max_linkrev = len(repo)
638 639 file_count = totalfilesize = 0
639 640 # record the expected size of every file
640 641 for k, vfs, e in entries:
641 642 for f in e.files():
642 643 file_count += 1
643 644 totalfilesize += f.file_size(vfs)
644 645
645 646 progress = repo.ui.makeprogress(
646 647 _(b'bundle'), total=totalfilesize, unit=_(b'bytes')
647 648 )
648 649 progress.update(0)
649 650 with TempCopyManager() as copy, progress:
650 651 # create a copy of volatile files
651 652 for k, vfs, e in entries:
652 653 for f in e.files():
653 654 if f.is_volatile:
654 655 copy(vfs.join(f.unencoded_path))
655 656 # the first yield release the lock on the repository
656 657 yield file_count, totalfilesize
657 658 totalbytecount = 0
658 659
659 660 for src, vfs, e in entries:
660 for name, stream, size in e.get_streams(vfs, copies=copy):
661 entry_streams = e.get_streams(
662 repo=repo, vfs=vfs, copies=copy, max_changeset=max_linkrev
663 )
664 for name, stream, size in entry_streams:
661 665 yield src
662 666 yield util.uvarintencode(len(name))
663 667 yield util.uvarintencode(size)
664 668 yield name
665 669 bytecount = 0
666 670 for chunk in stream:
667 671 bytecount += len(chunk)
668 672 totalbytecount += len(chunk)
669 673 progress.update(totalbytecount)
670 674 yield chunk
671 675 if bytecount != size:
672 676 # Would most likely be caused by a race due to `hg
673 677 # strip` or a revlog split
674 678 msg = _(
675 679 b'clone could only read %d bytes from %s, but '
676 680 b'expected %d bytes'
677 681 )
678 682 raise error.Abort(msg % (bytecount, name, size))
679 683
680 684
681 685 def _test_sync_point_walk_1(repo):
682 686 """a function for synchronisation during tests"""
683 687
684 688
685 689 def _test_sync_point_walk_2(repo):
686 690 """a function for synchronisation during tests"""
687 691
688 692
689 693 def _entries_walk(repo, includes, excludes, includeobsmarkers):
690 694 """emit a seris of files information useful to clone a repo
691 695
692 696 return (vfs-key, entry) iterator
693 697
694 698 Where `entry` is StoreEntry. (used even for cache entries)
695 699 """
696 700 assert repo._currentlock(repo._lockref) is not None
697 701
698 702 matcher = None
699 703 if includes or excludes:
700 704 matcher = narrowspec.match(repo.root, includes, excludes)
701 705
702 706 phase = not repo.publishing()
703 707 entries = _walkstreamfiles(
704 708 repo,
705 709 matcher,
706 710 phase=phase,
707 711 obsolescence=includeobsmarkers,
708 712 )
709 713 for entry in entries:
710 714 yield (_srcstore, entry)
711 715
712 716 for name in cacheutil.cachetocopy(repo):
713 717 if repo.cachevfs.exists(name):
714 718 # not really a StoreEntry, but close enough
715 719 entry = store.SimpleStoreEntry(
716 720 entry_path=name,
717 721 is_volatile=True,
718 722 )
719 723 yield (_srccache, entry)
720 724
721 725
722 726 def generatev2(repo, includes, excludes, includeobsmarkers):
723 727 """Emit content for version 2 of a streaming clone.
724 728
725 729 the data stream consists the following entries:
726 730 1) A char representing the file destination (eg: store or cache)
727 731 2) A varint containing the length of the filename
728 732 3) A varint containing the length of file data
729 733 4) N bytes containing the filename (the internal, store-agnostic form)
730 734 5) N bytes containing the file data
731 735
732 736 Returns a 3-tuple of (file count, file size, data iterator).
733 737 """
734 738
735 739 with repo.lock():
736 740
737 741 repo.ui.debug(b'scanning\n')
738 742
739 743 entries = _entries_walk(
740 744 repo,
741 745 includes=includes,
742 746 excludes=excludes,
743 747 includeobsmarkers=includeobsmarkers,
744 748 )
745 749
746 750 chunks = _emit2(repo, entries)
747 751 first = next(chunks)
748 752 file_count, total_file_size = first
749 753 _test_sync_point_walk_1(repo)
750 754 _test_sync_point_walk_2(repo)
751 755
752 756 return file_count, total_file_size, chunks
753 757
754 758
755 759 def generatev3(repo, includes, excludes, includeobsmarkers):
756 760 return generatev2(repo, includes, excludes, includeobsmarkers)
757 761
758 762
759 763 @contextlib.contextmanager
760 764 def nested(*ctxs):
761 765 this = ctxs[0]
762 766 rest = ctxs[1:]
763 767 with this:
764 768 if rest:
765 769 with nested(*rest):
766 770 yield
767 771 else:
768 772 yield
769 773
770 774
771 775 def consumev2(repo, fp, filecount, filesize):
772 776 """Apply the contents from a version 2 streaming clone.
773 777
774 778 Data is read from an object that only needs to provide a ``read(size)``
775 779 method.
776 780 """
777 781 with repo.lock():
778 782 repo.ui.status(
779 783 _(b'%d files to transfer, %s of data\n')
780 784 % (filecount, util.bytecount(filesize))
781 785 )
782 786
783 787 start = util.timer()
784 788 progress = repo.ui.makeprogress(
785 789 _(b'clone'), total=filesize, unit=_(b'bytes')
786 790 )
787 791 progress.update(0)
788 792
789 793 vfsmap = _makemap(repo)
790 794 # we keep repo.vfs out of the on purpose, ther are too many danger
791 795 # there (eg: .hg/hgrc),
792 796 #
793 797 # this assert is duplicated (from _makemap) as author might think this
794 798 # is fine, while this is really not fine.
795 799 if repo.vfs in vfsmap.values():
796 800 raise error.ProgrammingError(
797 801 b'repo.vfs must not be added to vfsmap for security reasons'
798 802 )
799 803
800 804 with repo.transaction(b'clone'):
801 805 ctxs = (vfs.backgroundclosing(repo.ui) for vfs in vfsmap.values())
802 806 with nested(*ctxs):
803 807 for i in range(filecount):
804 808 src = util.readexactly(fp, 1)
805 809 vfs = vfsmap[src]
806 810 namelen = util.uvarintdecodestream(fp)
807 811 datalen = util.uvarintdecodestream(fp)
808 812
809 813 name = util.readexactly(fp, namelen)
810 814
811 815 if repo.ui.debugflag:
812 816 repo.ui.debug(
813 817 b'adding [%s] %s (%s)\n'
814 818 % (src, name, util.bytecount(datalen))
815 819 )
816 820
817 821 with vfs(name, b'w') as ofp:
818 822 for chunk in util.filechunkiter(fp, limit=datalen):
819 823 progress.increment(step=len(chunk))
820 824 ofp.write(chunk)
821 825
822 826 # force @filecache properties to be reloaded from
823 827 # streamclone-ed file at next access
824 828 repo.invalidate(clearfilecache=True)
825 829
826 830 elapsed = util.timer() - start
827 831 if elapsed <= 0:
828 832 elapsed = 0.001
829 833 repo.ui.status(
830 834 _(b'transferred %s in %.1f seconds (%s/sec)\n')
831 835 % (
832 836 util.bytecount(progress.pos),
833 837 elapsed,
834 838 util.bytecount(progress.pos / elapsed),
835 839 )
836 840 )
837 841 progress.complete()
838 842
839 843
840 844 def applybundlev2(repo, fp, filecount, filesize, requirements):
841 845 from . import localrepo
842 846
843 847 missingreqs = [r for r in requirements if r not in repo.supported]
844 848 if missingreqs:
845 849 raise error.Abort(
846 850 _(b'unable to apply stream clone: unsupported format: %s')
847 851 % b', '.join(sorted(missingreqs))
848 852 )
849 853
850 854 consumev2(repo, fp, filecount, filesize)
851 855
852 856 repo.requirements = new_stream_clone_requirements(
853 857 repo.requirements,
854 858 requirements,
855 859 )
856 860 repo.svfs.options = localrepo.resolvestorevfsoptions(
857 861 repo.ui, repo.requirements, repo.features
858 862 )
859 863 scmutil.writereporequirements(repo)
860 864 nodemap.post_stream_cleanup(repo)
861 865
862 866
863 867 def _copy_files(src_vfs_map, dst_vfs_map, entries, progress):
864 868 hardlink = [True]
865 869
866 870 def copy_used():
867 871 hardlink[0] = False
868 872 progress.topic = _(b'copying')
869 873
870 874 for k, path in entries:
871 875 src_vfs = src_vfs_map[k]
872 876 dst_vfs = dst_vfs_map[k]
873 877 src_path = src_vfs.join(path)
874 878 dst_path = dst_vfs.join(path)
875 879 # We cannot use dirname and makedirs of dst_vfs here because the store
876 880 # encoding confuses them. See issue 6581 for details.
877 881 dirname = os.path.dirname(dst_path)
878 882 if not os.path.exists(dirname):
879 883 util.makedirs(dirname)
880 884 dst_vfs.register_file(path)
881 885 # XXX we could use the #nb_bytes argument.
882 886 util.copyfile(
883 887 src_path,
884 888 dst_path,
885 889 hardlink=hardlink[0],
886 890 no_hardlink_cb=copy_used,
887 891 check_fs_hardlink=False,
888 892 )
889 893 progress.increment()
890 894 return hardlink[0]
891 895
892 896
893 897 def local_copy(src_repo, dest_repo):
894 898 """copy all content from one local repository to another
895 899
896 900 This is useful for local clone"""
897 901 src_store_requirements = {
898 902 r
899 903 for r in src_repo.requirements
900 904 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
901 905 }
902 906 dest_store_requirements = {
903 907 r
904 908 for r in dest_repo.requirements
905 909 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
906 910 }
907 911 assert src_store_requirements == dest_store_requirements
908 912
909 913 with dest_repo.lock():
910 914 with src_repo.lock():
911 915
912 916 # bookmark is not integrated to the streaming as it might use the
913 917 # `repo.vfs` and they are too many sentitive data accessible
914 918 # through `repo.vfs` to expose it to streaming clone.
915 919 src_book_vfs = bookmarks.bookmarksvfs(src_repo)
916 920 srcbookmarks = src_book_vfs.join(b'bookmarks')
917 921 bm_count = 0
918 922 if os.path.exists(srcbookmarks):
919 923 bm_count = 1
920 924
921 925 entries = _entries_walk(
922 926 src_repo,
923 927 includes=None,
924 928 excludes=None,
925 929 includeobsmarkers=True,
926 930 )
927 931 entries = list(entries)
928 932 src_vfs_map = _makemap(src_repo)
929 933 dest_vfs_map = _makemap(dest_repo)
930 934 total_files = sum(len(e[1].files()) for e in entries) + bm_count
931 935 progress = src_repo.ui.makeprogress(
932 936 topic=_(b'linking'),
933 937 total=total_files,
934 938 unit=_(b'files'),
935 939 )
936 940 # copy files
937 941 #
938 942 # We could copy the full file while the source repository is locked
939 943 # and the other one without the lock. However, in the linking case,
940 944 # this would also requires checks that nobody is appending any data
941 945 # to the files while we do the clone, so this is not done yet. We
942 946 # could do this blindly when copying files.
943 947 files = [
944 948 (vfs_key, f.unencoded_path)
945 949 for vfs_key, e in entries
946 950 for f in e.files()
947 951 ]
948 952 hardlink = _copy_files(src_vfs_map, dest_vfs_map, files, progress)
949 953
950 954 # copy bookmarks over
951 955 if bm_count:
952 956 dst_book_vfs = bookmarks.bookmarksvfs(dest_repo)
953 957 dstbookmarks = dst_book_vfs.join(b'bookmarks')
954 958 util.copyfile(srcbookmarks, dstbookmarks)
955 959 progress.complete()
956 960 if hardlink:
957 961 msg = b'linked %d files\n'
958 962 else:
959 963 msg = b'copied %d files\n'
960 964 src_repo.ui.debug(msg % total_files)
961 965
962 966 with dest_repo.transaction(b"localclone") as tr:
963 967 dest_repo.store.write(tr)
964 968
965 969 # clean up transaction file as they do not make sense
966 970 transaction.cleanup_undo_files(dest_repo.ui.warn, dest_repo.vfs_map)
@@ -1,150 +1,150 b''
1 1 Test stream cloning while a revlog split happens
2 2 ------------------------------------------------
3 3
4 4 #testcases stream-bundle2-v2 stream-bundle2-v3
5 5
6 6 #if stream-bundle2-v3
7 7 $ cat << EOF >> $HGRCPATH
8 8 > [experimental]
9 9 > stream-v3 = yes
10 10 > EOF
11 11 #endif
12 12
13 13 setup a repository for tests
14 14 ----------------------------
15 15
16 16 $ cat >> $HGRCPATH << EOF
17 17 > [format]
18 18 > # skip compression to make it easy to trigger a split
19 19 > revlog-compression=none
20 20 > EOF
21 21
22 22 $ hg init server
23 23 $ cd server
24 24 $ file="some-file"
25 25 $ printf '%20d' '1' > $file
26 26 $ hg commit -Aqma
27 27 $ printf '%1024d' '1' > $file
28 28 $ hg commit -Aqmb
29 29 $ printf '%20d' '1' > $file
30 30 $ hg commit -Aqmc
31 31
32 32 check the revlog is inline
33 33
34 34 $ f -s .hg/store/data/some-file*
35 35 .hg/store/data/some-file.i: size=1259
36 36 $ hg debug-revlog-index some-file
37 37 rev linkrev nodeid p1-nodeid p2-nodeid
38 38 0 0 ed70cecbc103 000000000000 000000000000
39 39 1 1 7241018db64c ed70cecbc103 000000000000
40 40 2 2 fa1120531cc1 7241018db64c 000000000000
41 41 $ cd ..
42 42
43 43 setup synchronisation file
44 44
45 45 $ HG_TEST_STREAM_WALKED_FILE_1="$TESTTMP/sync_file_walked_1"
46 46 $ export HG_TEST_STREAM_WALKED_FILE_1
47 47 $ HG_TEST_STREAM_WALKED_FILE_2="$TESTTMP/sync_file_walked_2"
48 48 $ export HG_TEST_STREAM_WALKED_FILE_2
49 49 $ HG_TEST_STREAM_WALKED_FILE_3="$TESTTMP/sync_file_walked_3"
50 50 $ export HG_TEST_STREAM_WALKED_FILE_3
51 51
52 52
53 53 Test stream-clone raced by a revlog-split
54 54 =========================================
55 55
56 56 Test stream-clone where the file is split right after the lock section is done
57 57
58 58 Start the server
59 59
60 60 $ hg serve -R server \
61 61 > -p $HGPORT1 -d --error errors.log --pid-file=hg.pid \
62 62 > --config extensions.stream_steps="$RUNTESTDIR/testlib/ext-stream-clone-steps.py"
63 63 $ cat hg.pid >> $DAEMON_PIDS
64 64
65 65 Start a client doing a streaming clone
66 66
67 67 $ (hg clone -q --stream -U http://localhost:$HGPORT1 clone-while-split > client.log 2>&1; touch "$HG_TEST_STREAM_WALKED_FILE_3") &
68 68
69 69 Wait for the server to be done collecting data
70 70
71 71 $ $RUNTESTDIR/testlib/wait-on-file 10 $HG_TEST_STREAM_WALKED_FILE_1
72 72
73 73 trigger a split
74 74
75 75 $ dd if=/dev/zero of=server/$file bs=1k count=128 > /dev/null 2>&1
76 76 $ hg -R server ci -m "triggering a split" --config ui.timeout.warn=-1
77 77
78 78 unlock the stream generation
79 79
80 80 $ touch $HG_TEST_STREAM_WALKED_FILE_2
81 81
82 82 wait for the client to be done cloning.
83 83
84 84 $ $RUNTESTDIR/testlib/wait-on-file 10 $HG_TEST_STREAM_WALKED_FILE_3
85 85
86 86 Check everything is fine
87 87
88 88 $ cat client.log
89 remote: abort: unexpected error: clone could only read 256 bytes from data/some-file.i, but expected 1259 bytes (known-bad-output !)
89 remote: abort: unexpected error: expected 0 bytes but 1067 provided for data/some-file.d (known-bad-output !)
90 90 abort: pull failed on remote (known-bad-output !)
91 91 $ tail -2 errors.log
92 mercurial.error.Abort: clone could only read 256 bytes from data/some-file.i, but expected 1259 bytes (known-bad-output !)
92 mercurial.error.Abort: expected 0 bytes but 1067 provided for data/some-file.d (known-bad-output !)
93 93 (known-bad-output !)
94 94 $ hg -R clone-while-split verify
95 95 checking changesets (missing-correct-output !)
96 96 checking manifests (missing-correct-output !)
97 97 crosschecking files in changesets and manifests (missing-correct-output !)
98 98 checking files (missing-correct-output !)
99 99 checking dirstate (missing-correct-output !)
100 100 checked 3 changesets with 3 changes to 1 files (missing-correct-output !)
101 101 abort: repository clone-while-split not found (known-bad-output !)
102 102 [255]
103 103 $ hg -R clone-while-split tip
104 104 changeset: 2:dbd9854c38a6 (missing-correct-output !)
105 105 tag: tip (missing-correct-output !)
106 106 user: test (missing-correct-output !)
107 107 date: Thu Jan 01 00:00:00 1970 +0000 (missing-correct-output !)
108 108 summary: c (missing-correct-output !)
109 109 (missing-correct-output !)
110 110 abort: repository clone-while-split not found (known-bad-output !)
111 111 [255]
112 112 $ hg -R clone-while-split debug-revlog-index some-file
113 113 rev linkrev nodeid p1-nodeid p2-nodeid (missing-correct-output !)
114 114 0 0 ed70cecbc103 000000000000 000000000000 (missing-correct-output !)
115 115 1 1 7241018db64c ed70cecbc103 000000000000 (missing-correct-output !)
116 116 2 2 fa1120531cc1 7241018db64c 000000000000 (missing-correct-output !)
117 117 abort: repository clone-while-split not found (known-bad-output !)
118 118 [255]
119 119
120 120 subsequent pull work
121 121
122 122 $ hg -R clone-while-split pull
123 123 pulling from http://localhost:$HGPORT1/ (missing-correct-output !)
124 124 searching for changes (missing-correct-output !)
125 125 adding changesets (missing-correct-output !)
126 126 adding manifests (missing-correct-output !)
127 127 adding file changes (missing-correct-output !)
128 128 added 1 changesets with 1 changes to 1 files (missing-correct-output !)
129 129 new changesets df05c6cb1406 (missing-correct-output !)
130 130 (run 'hg update' to get a working copy) (missing-correct-output !)
131 131 abort: repository clone-while-split not found (known-bad-output !)
132 132 [255]
133 133
134 134 $ hg -R clone-while-split debug-revlog-index some-file
135 135 rev linkrev nodeid p1-nodeid p2-nodeid (missing-correct-output !)
136 136 0 0 ed70cecbc103 000000000000 000000000000 (missing-correct-output !)
137 137 1 1 7241018db64c ed70cecbc103 000000000000 (missing-correct-output !)
138 138 2 2 fa1120531cc1 7241018db64c 000000000000 (missing-correct-output !)
139 139 3 3 a631378adaa3 fa1120531cc1 000000000000 (missing-correct-output !)
140 140 abort: repository clone-while-split not found (known-bad-output !)
141 141 [255]
142 142 $ hg -R clone-while-split verify
143 143 checking changesets (missing-correct-output !)
144 144 checking manifests (missing-correct-output !)
145 145 crosschecking files in changesets and manifests (missing-correct-output !)
146 146 checking files (missing-correct-output !)
147 147 checking dirstate (missing-correct-output !)
148 148 checked 4 changesets with 4 changes to 1 files (missing-correct-output !)
149 149 abort: repository clone-while-split not found (known-bad-output !)
150 150 [255]
General Comments 0
You need to be logged in to leave comments. Login now