##// END OF EJS Templates
revlog: replace the old `revlog_kind` approach with the new `target` one...
marmoute -
r47839:64cd1496 default
parent child Browse files
Show More
@@ -1,1936 +1,1939 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import os
12 12 import struct
13 13 import weakref
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 hex,
18 18 nullrev,
19 19 short,
20 20 )
21 21 from .pycompat import open
22 22
23 23 from . import (
24 24 error,
25 25 match as matchmod,
26 26 mdiff,
27 27 phases,
28 28 pycompat,
29 29 requirements,
30 30 scmutil,
31 31 util,
32 32 )
33 33
34 34 from .interfaces import repository
35 35 from .revlogutils import sidedata as sidedatamod
36 from .revlogutils import constants as revlog_constants
36 37
37 38 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s")
38 39 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s")
39 40 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH")
40 41
41 42 LFS_REQUIREMENT = b'lfs'
42 43
43 44 readexactly = util.readexactly
44 45
45 46
46 47 def getchunk(stream):
47 48 """return the next chunk from stream as a string"""
48 49 d = readexactly(stream, 4)
49 50 l = struct.unpack(b">l", d)[0]
50 51 if l <= 4:
51 52 if l:
52 53 raise error.Abort(_(b"invalid chunk length %d") % l)
53 54 return b""
54 55 return readexactly(stream, l - 4)
55 56
56 57
57 58 def chunkheader(length):
58 59 """return a changegroup chunk header (string)"""
59 60 return struct.pack(b">l", length + 4)
60 61
61 62
62 63 def closechunk():
63 64 """return a changegroup chunk header (string) for a zero-length chunk"""
64 65 return struct.pack(b">l", 0)
65 66
66 67
67 68 def _fileheader(path):
68 69 """Obtain a changegroup chunk header for a named path."""
69 70 return chunkheader(len(path)) + path
70 71
71 72
72 73 def writechunks(ui, chunks, filename, vfs=None):
73 74 """Write chunks to a file and return its filename.
74 75
75 76 The stream is assumed to be a bundle file.
76 77 Existing files will not be overwritten.
77 78 If no filename is specified, a temporary file is created.
78 79 """
79 80 fh = None
80 81 cleanup = None
81 82 try:
82 83 if filename:
83 84 if vfs:
84 85 fh = vfs.open(filename, b"wb")
85 86 else:
86 87 # Increase default buffer size because default is usually
87 88 # small (4k is common on Linux).
88 89 fh = open(filename, b"wb", 131072)
89 90 else:
90 91 fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg")
91 92 fh = os.fdopen(fd, "wb")
92 93 cleanup = filename
93 94 for c in chunks:
94 95 fh.write(c)
95 96 cleanup = None
96 97 return filename
97 98 finally:
98 99 if fh is not None:
99 100 fh.close()
100 101 if cleanup is not None:
101 102 if filename and vfs:
102 103 vfs.unlink(cleanup)
103 104 else:
104 105 os.unlink(cleanup)
105 106
106 107
107 108 class cg1unpacker(object):
108 109 """Unpacker for cg1 changegroup streams.
109 110
110 111 A changegroup unpacker handles the framing of the revision data in
111 112 the wire format. Most consumers will want to use the apply()
112 113 method to add the changes from the changegroup to a repository.
113 114
114 115 If you're forwarding a changegroup unmodified to another consumer,
115 116 use getchunks(), which returns an iterator of changegroup
116 117 chunks. This is mostly useful for cases where you need to know the
117 118 data stream has ended by observing the end of the changegroup.
118 119
119 120 deltachunk() is useful only if you're applying delta data. Most
120 121 consumers should prefer apply() instead.
121 122
122 123 A few other public methods exist. Those are used only for
123 124 bundlerepo and some debug commands - their use is discouraged.
124 125 """
125 126
126 127 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
127 128 deltaheadersize = deltaheader.size
128 129 version = b'01'
129 130 _grouplistcount = 1 # One list of files after the manifests
130 131
131 132 def __init__(self, fh, alg, extras=None):
132 133 if alg is None:
133 134 alg = b'UN'
134 135 if alg not in util.compengines.supportedbundletypes:
135 136 raise error.Abort(_(b'unknown stream compression type: %s') % alg)
136 137 if alg == b'BZ':
137 138 alg = b'_truncatedBZ'
138 139
139 140 compengine = util.compengines.forbundletype(alg)
140 141 self._stream = compengine.decompressorreader(fh)
141 142 self._type = alg
142 143 self.extras = extras or {}
143 144 self.callback = None
144 145
145 146 # These methods (compressed, read, seek, tell) all appear to only
146 147 # be used by bundlerepo, but it's a little hard to tell.
147 148 def compressed(self):
148 149 return self._type is not None and self._type != b'UN'
149 150
150 151 def read(self, l):
151 152 return self._stream.read(l)
152 153
153 154 def seek(self, pos):
154 155 return self._stream.seek(pos)
155 156
156 157 def tell(self):
157 158 return self._stream.tell()
158 159
159 160 def close(self):
160 161 return self._stream.close()
161 162
162 163 def _chunklength(self):
163 164 d = readexactly(self._stream, 4)
164 165 l = struct.unpack(b">l", d)[0]
165 166 if l <= 4:
166 167 if l:
167 168 raise error.Abort(_(b"invalid chunk length %d") % l)
168 169 return 0
169 170 if self.callback:
170 171 self.callback()
171 172 return l - 4
172 173
173 174 def changelogheader(self):
174 175 """v10 does not have a changelog header chunk"""
175 176 return {}
176 177
177 178 def manifestheader(self):
178 179 """v10 does not have a manifest header chunk"""
179 180 return {}
180 181
181 182 def filelogheader(self):
182 183 """return the header of the filelogs chunk, v10 only has the filename"""
183 184 l = self._chunklength()
184 185 if not l:
185 186 return {}
186 187 fname = readexactly(self._stream, l)
187 188 return {b'filename': fname}
188 189
189 190 def _deltaheader(self, headertuple, prevnode):
190 191 node, p1, p2, cs = headertuple
191 192 if prevnode is None:
192 193 deltabase = p1
193 194 else:
194 195 deltabase = prevnode
195 196 flags = 0
196 197 return node, p1, p2, deltabase, cs, flags
197 198
198 199 def deltachunk(self, prevnode):
199 200 l = self._chunklength()
200 201 if not l:
201 202 return {}
202 203 headerdata = readexactly(self._stream, self.deltaheadersize)
203 204 header = self.deltaheader.unpack(headerdata)
204 205 delta = readexactly(self._stream, l - self.deltaheadersize)
205 206 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
206 207 # cg4 forward-compat
207 208 sidedata = {}
208 209 return (node, p1, p2, cs, deltabase, delta, flags, sidedata)
209 210
210 211 def getchunks(self):
211 212 """returns all the chunks contains in the bundle
212 213
213 214 Used when you need to forward the binary stream to a file or another
214 215 network API. To do so, it parse the changegroup data, otherwise it will
215 216 block in case of sshrepo because it don't know the end of the stream.
216 217 """
217 218 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
218 219 # and a list of filelogs. For changegroup 3, we expect 4 parts:
219 220 # changelog, manifestlog, a list of tree manifestlogs, and a list of
220 221 # filelogs.
221 222 #
222 223 # Changelog and manifestlog parts are terminated with empty chunks. The
223 224 # tree and file parts are a list of entry sections. Each entry section
224 225 # is a series of chunks terminating in an empty chunk. The list of these
225 226 # entry sections is terminated in yet another empty chunk, so we know
226 227 # we've reached the end of the tree/file list when we reach an empty
227 228 # chunk that was proceeded by no non-empty chunks.
228 229
229 230 parts = 0
230 231 while parts < 2 + self._grouplistcount:
231 232 noentries = True
232 233 while True:
233 234 chunk = getchunk(self)
234 235 if not chunk:
235 236 # The first two empty chunks represent the end of the
236 237 # changelog and the manifestlog portions. The remaining
237 238 # empty chunks represent either A) the end of individual
238 239 # tree or file entries in the file list, or B) the end of
239 240 # the entire list. It's the end of the entire list if there
240 241 # were no entries (i.e. noentries is True).
241 242 if parts < 2:
242 243 parts += 1
243 244 elif noentries:
244 245 parts += 1
245 246 break
246 247 noentries = False
247 248 yield chunkheader(len(chunk))
248 249 pos = 0
249 250 while pos < len(chunk):
250 251 next = pos + 2 ** 20
251 252 yield chunk[pos:next]
252 253 pos = next
253 254 yield closechunk()
254 255
255 256 def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None):
256 257 self.callback = prog.increment
257 258 # no need to check for empty manifest group here:
258 259 # if the result of the merge of 1 and 2 is the same in 3 and 4,
259 260 # no new manifest will be created and the manifest group will
260 261 # be empty during the pull
261 262 self.manifestheader()
262 263 deltas = self.deltaiter()
263 264 storage = repo.manifestlog.getstorage(b'')
264 265 storage.addgroup(deltas, revmap, trp, addrevisioncb=addrevisioncb)
265 266 prog.complete()
266 267 self.callback = None
267 268
268 269 def apply(
269 270 self,
270 271 repo,
271 272 tr,
272 273 srctype,
273 274 url,
274 275 targetphase=phases.draft,
275 276 expectedtotal=None,
276 277 sidedata_categories=None,
277 278 ):
278 279 """Add the changegroup returned by source.read() to this repo.
279 280 srctype is a string like 'push', 'pull', or 'unbundle'. url is
280 281 the URL of the repo where this changegroup is coming from.
281 282
282 283 Return an integer summarizing the change to this repo:
283 284 - nothing changed or no source: 0
284 285 - more heads than before: 1+added heads (2..n)
285 286 - fewer heads than before: -1-removed heads (-2..-n)
286 287 - number of heads stays the same: 1
287 288
288 289 `sidedata_categories` is an optional set of the remote's sidedata wanted
289 290 categories.
290 291 """
291 292 repo = repo.unfiltered()
292 293
293 294 # Only useful if we're adding sidedata categories. If both peers have
294 295 # the same categories, then we simply don't do anything.
295 296 if self.version == b'04' and srctype == b'pull':
296 297 sidedata_helpers = get_sidedata_helpers(
297 298 repo,
298 299 sidedata_categories or set(),
299 300 pull=True,
300 301 )
301 302 else:
302 303 sidedata_helpers = None
303 304
304 305 def csmap(x):
305 306 repo.ui.debug(b"add changeset %s\n" % short(x))
306 307 return len(cl)
307 308
308 309 def revmap(x):
309 310 return cl.rev(x)
310 311
311 312 try:
312 313 # The transaction may already carry source information. In this
313 314 # case we use the top level data. We overwrite the argument
314 315 # because we need to use the top level value (if they exist)
315 316 # in this function.
316 317 srctype = tr.hookargs.setdefault(b'source', srctype)
317 318 tr.hookargs.setdefault(b'url', url)
318 319 repo.hook(
319 320 b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs)
320 321 )
321 322
322 323 # write changelog data to temp files so concurrent readers
323 324 # will not see an inconsistent view
324 325 cl = repo.changelog
325 326 cl.delayupdate(tr)
326 327 oldheads = set(cl.heads())
327 328
328 329 trp = weakref.proxy(tr)
329 330 # pull off the changeset group
330 331 repo.ui.status(_(b"adding changesets\n"))
331 332 clstart = len(cl)
332 333 progress = repo.ui.makeprogress(
333 334 _(b'changesets'), unit=_(b'chunks'), total=expectedtotal
334 335 )
335 336 self.callback = progress.increment
336 337
337 338 efilesset = set()
338 339 duprevs = []
339 340
340 341 def ondupchangelog(cl, rev):
341 342 if rev < clstart:
342 343 duprevs.append(rev)
343 344
344 345 def onchangelog(cl, rev):
345 346 ctx = cl.changelogrevision(rev)
346 347 efilesset.update(ctx.files)
347 348 repo.register_changeset(rev, ctx)
348 349
349 350 self.changelogheader()
350 351 deltas = self.deltaiter()
351 352 if not cl.addgroup(
352 353 deltas,
353 354 csmap,
354 355 trp,
355 356 alwayscache=True,
356 357 addrevisioncb=onchangelog,
357 358 duplicaterevisioncb=ondupchangelog,
358 359 ):
359 360 repo.ui.develwarn(
360 361 b'applied empty changelog from changegroup',
361 362 config=b'warn-empty-changegroup',
362 363 )
363 364 efiles = len(efilesset)
364 365 clend = len(cl)
365 366 changesets = clend - clstart
366 367 progress.complete()
367 368 del deltas
368 369 # TODO Python 2.7 removal
369 370 # del efilesset
370 371 efilesset = None
371 372 self.callback = None
372 373
373 374 # Keep track of the (non-changelog) revlogs we've updated and their
374 375 # range of new revisions for sidedata rewrite.
375 376 # TODO do something more efficient than keeping the reference to
376 377 # the revlogs, especially memory-wise.
377 378 touched_manifests = {}
378 379 touched_filelogs = {}
379 380
380 381 # pull off the manifest group
381 382 repo.ui.status(_(b"adding manifests\n"))
382 383 # We know that we'll never have more manifests than we had
383 384 # changesets.
384 385 progress = repo.ui.makeprogress(
385 386 _(b'manifests'), unit=_(b'chunks'), total=changesets
386 387 )
387 388 on_manifest_rev = None
388 if sidedata_helpers and b'manifest' in sidedata_helpers[1]:
389 if sidedata_helpers:
390 if revlog_constants.KIND_MANIFESTLOG in sidedata_helpers[1]:
389 391
390 392 def on_manifest_rev(manifest, rev):
391 393 range = touched_manifests.get(manifest)
392 394 if not range:
393 395 touched_manifests[manifest] = (rev, rev)
394 396 else:
395 397 assert rev == range[1] + 1
396 398 touched_manifests[manifest] = (range[0], rev)
397 399
398 400 self._unpackmanifests(
399 401 repo,
400 402 revmap,
401 403 trp,
402 404 progress,
403 405 addrevisioncb=on_manifest_rev,
404 406 )
405 407
406 408 needfiles = {}
407 409 if repo.ui.configbool(b'server', b'validate'):
408 410 cl = repo.changelog
409 411 ml = repo.manifestlog
410 412 # validate incoming csets have their manifests
411 413 for cset in pycompat.xrange(clstart, clend):
412 414 mfnode = cl.changelogrevision(cset).manifest
413 415 mfest = ml[mfnode].readdelta()
414 416 # store file nodes we must see
415 417 for f, n in pycompat.iteritems(mfest):
416 418 needfiles.setdefault(f, set()).add(n)
417 419
418 420 on_filelog_rev = None
419 if sidedata_helpers and b'filelog' in sidedata_helpers[1]:
421 if sidedata_helpers:
422 if revlog_constants.KIND_FILELOG in sidedata_helpers[1]:
420 423
421 424 def on_filelog_rev(filelog, rev):
422 425 range = touched_filelogs.get(filelog)
423 426 if not range:
424 427 touched_filelogs[filelog] = (rev, rev)
425 428 else:
426 429 assert rev == range[1] + 1
427 430 touched_filelogs[filelog] = (range[0], rev)
428 431
429 432 # process the files
430 433 repo.ui.status(_(b"adding file changes\n"))
431 434 newrevs, newfiles = _addchangegroupfiles(
432 435 repo,
433 436 self,
434 437 revmap,
435 438 trp,
436 439 efiles,
437 440 needfiles,
438 441 addrevisioncb=on_filelog_rev,
439 442 )
440 443
441 444 if sidedata_helpers:
442 if b'changelog' in sidedata_helpers[1]:
445 if revlog_constants.KIND_CHANGELOG in sidedata_helpers[1]:
443 446 cl.rewrite_sidedata(sidedata_helpers, clstart, clend - 1)
444 447 for mf, (startrev, endrev) in touched_manifests.items():
445 448 mf.rewrite_sidedata(sidedata_helpers, startrev, endrev)
446 449 for fl, (startrev, endrev) in touched_filelogs.items():
447 450 fl.rewrite_sidedata(sidedata_helpers, startrev, endrev)
448 451
449 452 # making sure the value exists
450 453 tr.changes.setdefault(b'changegroup-count-changesets', 0)
451 454 tr.changes.setdefault(b'changegroup-count-revisions', 0)
452 455 tr.changes.setdefault(b'changegroup-count-files', 0)
453 456 tr.changes.setdefault(b'changegroup-count-heads', 0)
454 457
455 458 # some code use bundle operation for internal purpose. They usually
456 459 # set `ui.quiet` to do this outside of user sight. Size the report
457 460 # of such operation now happens at the end of the transaction, that
458 461 # ui.quiet has not direct effect on the output.
459 462 #
460 463 # To preserve this intend use an inelegant hack, we fail to report
461 464 # the change if `quiet` is set. We should probably move to
462 465 # something better, but this is a good first step to allow the "end
463 466 # of transaction report" to pass tests.
464 467 if not repo.ui.quiet:
465 468 tr.changes[b'changegroup-count-changesets'] += changesets
466 469 tr.changes[b'changegroup-count-revisions'] += newrevs
467 470 tr.changes[b'changegroup-count-files'] += newfiles
468 471
469 472 deltaheads = 0
470 473 if oldheads:
471 474 heads = cl.heads()
472 475 deltaheads += len(heads) - len(oldheads)
473 476 for h in heads:
474 477 if h not in oldheads and repo[h].closesbranch():
475 478 deltaheads -= 1
476 479
477 480 # see previous comment about checking ui.quiet
478 481 if not repo.ui.quiet:
479 482 tr.changes[b'changegroup-count-heads'] += deltaheads
480 483 repo.invalidatevolatilesets()
481 484
482 485 if changesets > 0:
483 486 if b'node' not in tr.hookargs:
484 487 tr.hookargs[b'node'] = hex(cl.node(clstart))
485 488 tr.hookargs[b'node_last'] = hex(cl.node(clend - 1))
486 489 hookargs = dict(tr.hookargs)
487 490 else:
488 491 hookargs = dict(tr.hookargs)
489 492 hookargs[b'node'] = hex(cl.node(clstart))
490 493 hookargs[b'node_last'] = hex(cl.node(clend - 1))
491 494 repo.hook(
492 495 b'pretxnchangegroup',
493 496 throw=True,
494 497 **pycompat.strkwargs(hookargs)
495 498 )
496 499
497 500 added = pycompat.xrange(clstart, clend)
498 501 phaseall = None
499 502 if srctype in (b'push', b'serve'):
500 503 # Old servers can not push the boundary themselves.
501 504 # New servers won't push the boundary if changeset already
502 505 # exists locally as secret
503 506 #
504 507 # We should not use added here but the list of all change in
505 508 # the bundle
506 509 if repo.publishing():
507 510 targetphase = phaseall = phases.public
508 511 else:
509 512 # closer target phase computation
510 513
511 514 # Those changesets have been pushed from the
512 515 # outside, their phases are going to be pushed
513 516 # alongside. Therefor `targetphase` is
514 517 # ignored.
515 518 targetphase = phaseall = phases.draft
516 519 if added:
517 520 phases.registernew(repo, tr, targetphase, added)
518 521 if phaseall is not None:
519 522 if duprevs:
520 523 duprevs.extend(added)
521 524 else:
522 525 duprevs = added
523 526 phases.advanceboundary(repo, tr, phaseall, [], revs=duprevs)
524 527 duprevs = []
525 528
526 529 if changesets > 0:
527 530
528 531 def runhooks(unused_success):
529 532 # These hooks run when the lock releases, not when the
530 533 # transaction closes. So it's possible for the changelog
531 534 # to have changed since we last saw it.
532 535 if clstart >= len(repo):
533 536 return
534 537
535 538 repo.hook(b"changegroup", **pycompat.strkwargs(hookargs))
536 539
537 540 for rev in added:
538 541 args = hookargs.copy()
539 542 args[b'node'] = hex(cl.node(rev))
540 543 del args[b'node_last']
541 544 repo.hook(b"incoming", **pycompat.strkwargs(args))
542 545
543 546 newheads = [h for h in repo.heads() if h not in oldheads]
544 547 repo.ui.log(
545 548 b"incoming",
546 549 b"%d incoming changes - new heads: %s\n",
547 550 len(added),
548 551 b', '.join([hex(c[:6]) for c in newheads]),
549 552 )
550 553
551 554 tr.addpostclose(
552 555 b'changegroup-runhooks-%020i' % clstart,
553 556 lambda tr: repo._afterlock(runhooks),
554 557 )
555 558 finally:
556 559 repo.ui.flush()
557 560 # never return 0 here:
558 561 if deltaheads < 0:
559 562 ret = deltaheads - 1
560 563 else:
561 564 ret = deltaheads + 1
562 565 return ret
563 566
564 567 def deltaiter(self):
565 568 """
566 569 returns an iterator of the deltas in this changegroup
567 570
568 571 Useful for passing to the underlying storage system to be stored.
569 572 """
570 573 chain = None
571 574 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
572 575 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags, sidedata)
573 576 yield chunkdata
574 577 chain = chunkdata[0]
575 578
576 579
577 580 class cg2unpacker(cg1unpacker):
578 581 """Unpacker for cg2 streams.
579 582
580 583 cg2 streams add support for generaldelta, so the delta header
581 584 format is slightly different. All other features about the data
582 585 remain the same.
583 586 """
584 587
585 588 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
586 589 deltaheadersize = deltaheader.size
587 590 version = b'02'
588 591
589 592 def _deltaheader(self, headertuple, prevnode):
590 593 node, p1, p2, deltabase, cs = headertuple
591 594 flags = 0
592 595 return node, p1, p2, deltabase, cs, flags
593 596
594 597
595 598 class cg3unpacker(cg2unpacker):
596 599 """Unpacker for cg3 streams.
597 600
598 601 cg3 streams add support for exchanging treemanifests and revlog
599 602 flags. It adds the revlog flags to the delta header and an empty chunk
600 603 separating manifests and files.
601 604 """
602 605
603 606 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
604 607 deltaheadersize = deltaheader.size
605 608 version = b'03'
606 609 _grouplistcount = 2 # One list of manifests and one list of files
607 610
608 611 def _deltaheader(self, headertuple, prevnode):
609 612 node, p1, p2, deltabase, cs, flags = headertuple
610 613 return node, p1, p2, deltabase, cs, flags
611 614
612 615 def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None):
613 616 super(cg3unpacker, self)._unpackmanifests(
614 617 repo, revmap, trp, prog, addrevisioncb=addrevisioncb
615 618 )
616 619 for chunkdata in iter(self.filelogheader, {}):
617 620 # If we get here, there are directory manifests in the changegroup
618 621 d = chunkdata[b"filename"]
619 622 repo.ui.debug(b"adding %s revisions\n" % d)
620 623 deltas = self.deltaiter()
621 624 if not repo.manifestlog.getstorage(d).addgroup(
622 625 deltas, revmap, trp, addrevisioncb=addrevisioncb
623 626 ):
624 627 raise error.Abort(_(b"received dir revlog group is empty"))
625 628
626 629
627 630 class cg4unpacker(cg3unpacker):
628 631 """Unpacker for cg4 streams.
629 632
630 633 cg4 streams add support for exchanging sidedata.
631 634 """
632 635
633 636 version = b'04'
634 637
635 638 def deltachunk(self, prevnode):
636 639 res = super(cg4unpacker, self).deltachunk(prevnode)
637 640 if not res:
638 641 return res
639 642
640 643 (node, p1, p2, cs, deltabase, delta, flags, _sidedata) = res
641 644
642 645 sidedata_raw = getchunk(self._stream)
643 646 sidedata = {}
644 647 if len(sidedata_raw) > 0:
645 648 sidedata = sidedatamod.deserialize_sidedata(sidedata_raw)
646 649
647 650 return node, p1, p2, cs, deltabase, delta, flags, sidedata
648 651
649 652
650 653 class headerlessfixup(object):
651 654 def __init__(self, fh, h):
652 655 self._h = h
653 656 self._fh = fh
654 657
655 658 def read(self, n):
656 659 if self._h:
657 660 d, self._h = self._h[:n], self._h[n:]
658 661 if len(d) < n:
659 662 d += readexactly(self._fh, n - len(d))
660 663 return d
661 664 return readexactly(self._fh, n)
662 665
663 666
664 667 def _revisiondeltatochunks(repo, delta, headerfn):
665 668 """Serialize a revisiondelta to changegroup chunks."""
666 669
667 670 # The captured revision delta may be encoded as a delta against
668 671 # a base revision or as a full revision. The changegroup format
669 672 # requires that everything on the wire be deltas. So for full
670 673 # revisions, we need to invent a header that says to rewrite
671 674 # data.
672 675
673 676 if delta.delta is not None:
674 677 prefix, data = b'', delta.delta
675 678 elif delta.basenode == repo.nullid:
676 679 data = delta.revision
677 680 prefix = mdiff.trivialdiffheader(len(data))
678 681 else:
679 682 data = delta.revision
680 683 prefix = mdiff.replacediffheader(delta.baserevisionsize, len(data))
681 684
682 685 meta = headerfn(delta)
683 686
684 687 yield chunkheader(len(meta) + len(prefix) + len(data))
685 688 yield meta
686 689 if prefix:
687 690 yield prefix
688 691 yield data
689 692
690 693 sidedata = delta.sidedata
691 694 if sidedata is not None:
692 695 # Need a separate chunk for sidedata to be able to differentiate
693 696 # "raw delta" length and sidedata length
694 697 yield chunkheader(len(sidedata))
695 698 yield sidedata
696 699
697 700
698 701 def _sortnodesellipsis(store, nodes, cl, lookup):
699 702 """Sort nodes for changegroup generation."""
700 703 # Ellipses serving mode.
701 704 #
702 705 # In a perfect world, we'd generate better ellipsis-ified graphs
703 706 # for non-changelog revlogs. In practice, we haven't started doing
704 707 # that yet, so the resulting DAGs for the manifestlog and filelogs
705 708 # are actually full of bogus parentage on all the ellipsis
706 709 # nodes. This has the side effect that, while the contents are
707 710 # correct, the individual DAGs might be completely out of whack in
708 711 # a case like 882681bc3166 and its ancestors (back about 10
709 712 # revisions or so) in the main hg repo.
710 713 #
711 714 # The one invariant we *know* holds is that the new (potentially
712 715 # bogus) DAG shape will be valid if we order the nodes in the
713 716 # order that they're introduced in dramatis personae by the
714 717 # changelog, so what we do is we sort the non-changelog histories
715 718 # by the order in which they are used by the changelog.
716 719 key = lambda n: cl.rev(lookup(n))
717 720 return sorted(nodes, key=key)
718 721
719 722
720 723 def _resolvenarrowrevisioninfo(
721 724 cl,
722 725 store,
723 726 ischangelog,
724 727 rev,
725 728 linkrev,
726 729 linknode,
727 730 clrevtolocalrev,
728 731 fullclnodes,
729 732 precomputedellipsis,
730 733 ):
731 734 linkparents = precomputedellipsis[linkrev]
732 735
733 736 def local(clrev):
734 737 """Turn a changelog revnum into a local revnum.
735 738
736 739 The ellipsis dag is stored as revnums on the changelog,
737 740 but when we're producing ellipsis entries for
738 741 non-changelog revlogs, we need to turn those numbers into
739 742 something local. This does that for us, and during the
740 743 changelog sending phase will also expand the stored
741 744 mappings as needed.
742 745 """
743 746 if clrev == nullrev:
744 747 return nullrev
745 748
746 749 if ischangelog:
747 750 return clrev
748 751
749 752 # Walk the ellipsis-ized changelog breadth-first looking for a
750 753 # change that has been linked from the current revlog.
751 754 #
752 755 # For a flat manifest revlog only a single step should be necessary
753 756 # as all relevant changelog entries are relevant to the flat
754 757 # manifest.
755 758 #
756 759 # For a filelog or tree manifest dirlog however not every changelog
757 760 # entry will have been relevant, so we need to skip some changelog
758 761 # nodes even after ellipsis-izing.
759 762 walk = [clrev]
760 763 while walk:
761 764 p = walk[0]
762 765 walk = walk[1:]
763 766 if p in clrevtolocalrev:
764 767 return clrevtolocalrev[p]
765 768 elif p in fullclnodes:
766 769 walk.extend([pp for pp in cl.parentrevs(p) if pp != nullrev])
767 770 elif p in precomputedellipsis:
768 771 walk.extend(
769 772 [pp for pp in precomputedellipsis[p] if pp != nullrev]
770 773 )
771 774 else:
772 775 # In this case, we've got an ellipsis with parents
773 776 # outside the current bundle (likely an
774 777 # incremental pull). We "know" that we can use the
775 778 # value of this same revlog at whatever revision
776 779 # is pointed to by linknode. "Know" is in scare
777 780 # quotes because I haven't done enough examination
778 781 # of edge cases to convince myself this is really
779 782 # a fact - it works for all the (admittedly
780 783 # thorough) cases in our testsuite, but I would be
781 784 # somewhat unsurprised to find a case in the wild
782 785 # where this breaks down a bit. That said, I don't
783 786 # know if it would hurt anything.
784 787 for i in pycompat.xrange(rev, 0, -1):
785 788 if store.linkrev(i) == clrev:
786 789 return i
787 790 # We failed to resolve a parent for this node, so
788 791 # we crash the changegroup construction.
789 792 raise error.Abort(
790 793 b"unable to resolve parent while packing '%s' %r"
791 794 b' for changeset %r' % (store.indexfile, rev, clrev)
792 795 )
793 796
794 797 return nullrev
795 798
796 799 if not linkparents or (store.parentrevs(rev) == (nullrev, nullrev)):
797 800 p1, p2 = nullrev, nullrev
798 801 elif len(linkparents) == 1:
799 802 (p1,) = sorted(local(p) for p in linkparents)
800 803 p2 = nullrev
801 804 else:
802 805 p1, p2 = sorted(local(p) for p in linkparents)
803 806
804 807 p1node, p2node = store.node(p1), store.node(p2)
805 808
806 809 return p1node, p2node, linknode
807 810
808 811
809 812 def deltagroup(
810 813 repo,
811 814 store,
812 815 nodes,
813 816 ischangelog,
814 817 lookup,
815 818 forcedeltaparentprev,
816 819 topic=None,
817 820 ellipses=False,
818 821 clrevtolocalrev=None,
819 822 fullclnodes=None,
820 823 precomputedellipsis=None,
821 824 sidedata_helpers=None,
822 825 ):
823 826 """Calculate deltas for a set of revisions.
824 827
825 828 Is a generator of ``revisiondelta`` instances.
826 829
827 830 If topic is not None, progress detail will be generated using this
828 831 topic name (e.g. changesets, manifests, etc).
829 832
830 833 See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
831 834 """
832 835 if not nodes:
833 836 return
834 837
835 838 cl = repo.changelog
836 839
837 840 if ischangelog:
838 841 # `hg log` shows changesets in storage order. To preserve order
839 842 # across clones, send out changesets in storage order.
840 843 nodesorder = b'storage'
841 844 elif ellipses:
842 845 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
843 846 nodesorder = b'nodes'
844 847 else:
845 848 nodesorder = None
846 849
847 850 # Perform ellipses filtering and revision massaging. We do this before
848 851 # emitrevisions() because a) filtering out revisions creates less work
849 852 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
850 853 # assumptions about delta choices and we would possibly send a delta
851 854 # referencing a missing base revision.
852 855 #
853 856 # Also, calling lookup() has side-effects with regards to populating
854 857 # data structures. If we don't call lookup() for each node or if we call
855 858 # lookup() after the first pass through each node, things can break -
856 859 # possibly intermittently depending on the python hash seed! For that
857 860 # reason, we store a mapping of all linknodes during the initial node
858 861 # pass rather than use lookup() on the output side.
859 862 if ellipses:
860 863 filtered = []
861 864 adjustedparents = {}
862 865 linknodes = {}
863 866
864 867 for node in nodes:
865 868 rev = store.rev(node)
866 869 linknode = lookup(node)
867 870 linkrev = cl.rev(linknode)
868 871 clrevtolocalrev[linkrev] = rev
869 872
870 873 # If linknode is in fullclnodes, it means the corresponding
871 874 # changeset was a full changeset and is being sent unaltered.
872 875 if linknode in fullclnodes:
873 876 linknodes[node] = linknode
874 877
875 878 # If the corresponding changeset wasn't in the set computed
876 879 # as relevant to us, it should be dropped outright.
877 880 elif linkrev not in precomputedellipsis:
878 881 continue
879 882
880 883 else:
881 884 # We could probably do this later and avoid the dict
882 885 # holding state. But it likely doesn't matter.
883 886 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
884 887 cl,
885 888 store,
886 889 ischangelog,
887 890 rev,
888 891 linkrev,
889 892 linknode,
890 893 clrevtolocalrev,
891 894 fullclnodes,
892 895 precomputedellipsis,
893 896 )
894 897
895 898 adjustedparents[node] = (p1node, p2node)
896 899 linknodes[node] = linknode
897 900
898 901 filtered.append(node)
899 902
900 903 nodes = filtered
901 904
902 905 # We expect the first pass to be fast, so we only engage the progress
903 906 # meter for constructing the revision deltas.
904 907 progress = None
905 908 if topic is not None:
906 909 progress = repo.ui.makeprogress(
907 910 topic, unit=_(b'chunks'), total=len(nodes)
908 911 )
909 912
910 913 configtarget = repo.ui.config(b'devel', b'bundle.delta')
911 914 if configtarget not in (b'', b'p1', b'full'):
912 915 msg = _(b"""config "devel.bundle.delta" as unknown value: %s""")
913 916 repo.ui.warn(msg % configtarget)
914 917
915 918 deltamode = repository.CG_DELTAMODE_STD
916 919 if forcedeltaparentprev:
917 920 deltamode = repository.CG_DELTAMODE_PREV
918 921 elif configtarget == b'p1':
919 922 deltamode = repository.CG_DELTAMODE_P1
920 923 elif configtarget == b'full':
921 924 deltamode = repository.CG_DELTAMODE_FULL
922 925
923 926 revisions = store.emitrevisions(
924 927 nodes,
925 928 nodesorder=nodesorder,
926 929 revisiondata=True,
927 930 assumehaveparentrevisions=not ellipses,
928 931 deltamode=deltamode,
929 932 sidedata_helpers=sidedata_helpers,
930 933 )
931 934
932 935 for i, revision in enumerate(revisions):
933 936 if progress:
934 937 progress.update(i + 1)
935 938
936 939 if ellipses:
937 940 linknode = linknodes[revision.node]
938 941
939 942 if revision.node in adjustedparents:
940 943 p1node, p2node = adjustedparents[revision.node]
941 944 revision.p1node = p1node
942 945 revision.p2node = p2node
943 946 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
944 947
945 948 else:
946 949 linknode = lookup(revision.node)
947 950
948 951 revision.linknode = linknode
949 952 yield revision
950 953
951 954 if progress:
952 955 progress.complete()
953 956
954 957
955 958 class cgpacker(object):
956 959 def __init__(
957 960 self,
958 961 repo,
959 962 oldmatcher,
960 963 matcher,
961 964 version,
962 965 builddeltaheader,
963 966 manifestsend,
964 967 forcedeltaparentprev=False,
965 968 bundlecaps=None,
966 969 ellipses=False,
967 970 shallow=False,
968 971 ellipsisroots=None,
969 972 fullnodes=None,
970 973 remote_sidedata=None,
971 974 ):
972 975 """Given a source repo, construct a bundler.
973 976
974 977 oldmatcher is a matcher that matches on files the client already has.
975 978 These will not be included in the changegroup.
976 979
977 980 matcher is a matcher that matches on files to include in the
978 981 changegroup. Used to facilitate sparse changegroups.
979 982
980 983 forcedeltaparentprev indicates whether delta parents must be against
981 984 the previous revision in a delta group. This should only be used for
982 985 compatibility with changegroup version 1.
983 986
984 987 builddeltaheader is a callable that constructs the header for a group
985 988 delta.
986 989
987 990 manifestsend is a chunk to send after manifests have been fully emitted.
988 991
989 992 ellipses indicates whether ellipsis serving mode is enabled.
990 993
991 994 bundlecaps is optional and can be used to specify the set of
992 995 capabilities which can be used to build the bundle. While bundlecaps is
993 996 unused in core Mercurial, extensions rely on this feature to communicate
994 997 capabilities to customize the changegroup packer.
995 998
996 999 shallow indicates whether shallow data might be sent. The packer may
997 1000 need to pack file contents not introduced by the changes being packed.
998 1001
999 1002 fullnodes is the set of changelog nodes which should not be ellipsis
1000 1003 nodes. We store this rather than the set of nodes that should be
1001 1004 ellipsis because for very large histories we expect this to be
1002 1005 significantly smaller.
1003 1006
1004 1007 remote_sidedata is the set of sidedata categories wanted by the remote.
1005 1008 """
1006 1009 assert oldmatcher
1007 1010 assert matcher
1008 1011 self._oldmatcher = oldmatcher
1009 1012 self._matcher = matcher
1010 1013
1011 1014 self.version = version
1012 1015 self._forcedeltaparentprev = forcedeltaparentprev
1013 1016 self._builddeltaheader = builddeltaheader
1014 1017 self._manifestsend = manifestsend
1015 1018 self._ellipses = ellipses
1016 1019
1017 1020 # Set of capabilities we can use to build the bundle.
1018 1021 if bundlecaps is None:
1019 1022 bundlecaps = set()
1020 1023 self._bundlecaps = bundlecaps
1021 1024 if remote_sidedata is None:
1022 1025 remote_sidedata = set()
1023 1026 self._remote_sidedata = remote_sidedata
1024 1027 self._isshallow = shallow
1025 1028 self._fullclnodes = fullnodes
1026 1029
1027 1030 # Maps ellipsis revs to their roots at the changelog level.
1028 1031 self._precomputedellipsis = ellipsisroots
1029 1032
1030 1033 self._repo = repo
1031 1034
1032 1035 if self._repo.ui.verbose and not self._repo.ui.debugflag:
1033 1036 self._verbosenote = self._repo.ui.note
1034 1037 else:
1035 1038 self._verbosenote = lambda s: None
1036 1039
1037 1040 def generate(
1038 1041 self, commonrevs, clnodes, fastpathlinkrev, source, changelog=True
1039 1042 ):
1040 1043 """Yield a sequence of changegroup byte chunks.
1041 1044 If changelog is False, changelog data won't be added to changegroup
1042 1045 """
1043 1046
1044 1047 repo = self._repo
1045 1048 cl = repo.changelog
1046 1049
1047 1050 self._verbosenote(_(b'uncompressed size of bundle content:\n'))
1048 1051 size = 0
1049 1052
1050 1053 sidedata_helpers = None
1051 1054 if self.version == b'04':
1052 1055 remote_sidedata = self._remote_sidedata
1053 1056 if source == b'strip':
1054 1057 # We're our own remote when stripping, get the no-op helpers
1055 1058 # TODO a better approach would be for the strip bundle to
1056 1059 # correctly advertise its sidedata categories directly.
1057 1060 remote_sidedata = repo._wanted_sidedata
1058 1061 sidedata_helpers = get_sidedata_helpers(repo, remote_sidedata)
1059 1062
1060 1063 clstate, deltas = self._generatechangelog(
1061 1064 cl,
1062 1065 clnodes,
1063 1066 generate=changelog,
1064 1067 sidedata_helpers=sidedata_helpers,
1065 1068 )
1066 1069 for delta in deltas:
1067 1070 for chunk in _revisiondeltatochunks(
1068 1071 self._repo, delta, self._builddeltaheader
1069 1072 ):
1070 1073 size += len(chunk)
1071 1074 yield chunk
1072 1075
1073 1076 close = closechunk()
1074 1077 size += len(close)
1075 1078 yield closechunk()
1076 1079
1077 1080 self._verbosenote(_(b'%8.i (changelog)\n') % size)
1078 1081
1079 1082 clrevorder = clstate[b'clrevorder']
1080 1083 manifests = clstate[b'manifests']
1081 1084 changedfiles = clstate[b'changedfiles']
1082 1085
1083 1086 # We need to make sure that the linkrev in the changegroup refers to
1084 1087 # the first changeset that introduced the manifest or file revision.
1085 1088 # The fastpath is usually safer than the slowpath, because the filelogs
1086 1089 # are walked in revlog order.
1087 1090 #
1088 1091 # When taking the slowpath when the manifest revlog uses generaldelta,
1089 1092 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
1090 1093 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
1091 1094 #
1092 1095 # When taking the fastpath, we are only vulnerable to reordering
1093 1096 # of the changelog itself. The changelog never uses generaldelta and is
1094 1097 # never reordered. To handle this case, we simply take the slowpath,
1095 1098 # which already has the 'clrevorder' logic. This was also fixed in
1096 1099 # cc0ff93d0c0c.
1097 1100
1098 1101 # Treemanifests don't work correctly with fastpathlinkrev
1099 1102 # either, because we don't discover which directory nodes to
1100 1103 # send along with files. This could probably be fixed.
1101 1104 fastpathlinkrev = fastpathlinkrev and not scmutil.istreemanifest(repo)
1102 1105
1103 1106 fnodes = {} # needed file nodes
1104 1107
1105 1108 size = 0
1106 1109 it = self.generatemanifests(
1107 1110 commonrevs,
1108 1111 clrevorder,
1109 1112 fastpathlinkrev,
1110 1113 manifests,
1111 1114 fnodes,
1112 1115 source,
1113 1116 clstate[b'clrevtomanifestrev'],
1114 1117 sidedata_helpers=sidedata_helpers,
1115 1118 )
1116 1119
1117 1120 for tree, deltas in it:
1118 1121 if tree:
1119 1122 assert self.version in (b'03', b'04')
1120 1123 chunk = _fileheader(tree)
1121 1124 size += len(chunk)
1122 1125 yield chunk
1123 1126
1124 1127 for delta in deltas:
1125 1128 chunks = _revisiondeltatochunks(
1126 1129 self._repo, delta, self._builddeltaheader
1127 1130 )
1128 1131 for chunk in chunks:
1129 1132 size += len(chunk)
1130 1133 yield chunk
1131 1134
1132 1135 close = closechunk()
1133 1136 size += len(close)
1134 1137 yield close
1135 1138
1136 1139 self._verbosenote(_(b'%8.i (manifests)\n') % size)
1137 1140 yield self._manifestsend
1138 1141
1139 1142 mfdicts = None
1140 1143 if self._ellipses and self._isshallow:
1141 1144 mfdicts = [
1142 1145 (repo.manifestlog[n].read(), lr)
1143 1146 for (n, lr) in pycompat.iteritems(manifests)
1144 1147 ]
1145 1148
1146 1149 manifests.clear()
1147 1150 clrevs = {cl.rev(x) for x in clnodes}
1148 1151
1149 1152 it = self.generatefiles(
1150 1153 changedfiles,
1151 1154 commonrevs,
1152 1155 source,
1153 1156 mfdicts,
1154 1157 fastpathlinkrev,
1155 1158 fnodes,
1156 1159 clrevs,
1157 1160 sidedata_helpers=sidedata_helpers,
1158 1161 )
1159 1162
1160 1163 for path, deltas in it:
1161 1164 h = _fileheader(path)
1162 1165 size = len(h)
1163 1166 yield h
1164 1167
1165 1168 for delta in deltas:
1166 1169 chunks = _revisiondeltatochunks(
1167 1170 self._repo, delta, self._builddeltaheader
1168 1171 )
1169 1172 for chunk in chunks:
1170 1173 size += len(chunk)
1171 1174 yield chunk
1172 1175
1173 1176 close = closechunk()
1174 1177 size += len(close)
1175 1178 yield close
1176 1179
1177 1180 self._verbosenote(_(b'%8.i %s\n') % (size, path))
1178 1181
1179 1182 yield closechunk()
1180 1183
1181 1184 if clnodes:
1182 1185 repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
1183 1186
1184 1187 def _generatechangelog(
1185 1188 self, cl, nodes, generate=True, sidedata_helpers=None
1186 1189 ):
1187 1190 """Generate data for changelog chunks.
1188 1191
1189 1192 Returns a 2-tuple of a dict containing state and an iterable of
1190 1193 byte chunks. The state will not be fully populated until the
1191 1194 chunk stream has been fully consumed.
1192 1195
1193 1196 if generate is False, the state will be fully populated and no chunk
1194 1197 stream will be yielded
1195 1198
1196 1199 See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
1197 1200 """
1198 1201 clrevorder = {}
1199 1202 manifests = {}
1200 1203 mfl = self._repo.manifestlog
1201 1204 changedfiles = set()
1202 1205 clrevtomanifestrev = {}
1203 1206
1204 1207 state = {
1205 1208 b'clrevorder': clrevorder,
1206 1209 b'manifests': manifests,
1207 1210 b'changedfiles': changedfiles,
1208 1211 b'clrevtomanifestrev': clrevtomanifestrev,
1209 1212 }
1210 1213
1211 1214 if not (generate or self._ellipses):
1212 1215 # sort the nodes in storage order
1213 1216 nodes = sorted(nodes, key=cl.rev)
1214 1217 for node in nodes:
1215 1218 c = cl.changelogrevision(node)
1216 1219 clrevorder[node] = len(clrevorder)
1217 1220 # record the first changeset introducing this manifest version
1218 1221 manifests.setdefault(c.manifest, node)
1219 1222 # Record a complete list of potentially-changed files in
1220 1223 # this manifest.
1221 1224 changedfiles.update(c.files)
1222 1225
1223 1226 return state, ()
1224 1227
1225 1228 # Callback for the changelog, used to collect changed files and
1226 1229 # manifest nodes.
1227 1230 # Returns the linkrev node (identity in the changelog case).
1228 1231 def lookupcl(x):
1229 1232 c = cl.changelogrevision(x)
1230 1233 clrevorder[x] = len(clrevorder)
1231 1234
1232 1235 if self._ellipses:
1233 1236 # Only update manifests if x is going to be sent. Otherwise we
1234 1237 # end up with bogus linkrevs specified for manifests and
1235 1238 # we skip some manifest nodes that we should otherwise
1236 1239 # have sent.
1237 1240 if (
1238 1241 x in self._fullclnodes
1239 1242 or cl.rev(x) in self._precomputedellipsis
1240 1243 ):
1241 1244
1242 1245 manifestnode = c.manifest
1243 1246 # Record the first changeset introducing this manifest
1244 1247 # version.
1245 1248 manifests.setdefault(manifestnode, x)
1246 1249 # Set this narrow-specific dict so we have the lowest
1247 1250 # manifest revnum to look up for this cl revnum. (Part of
1248 1251 # mapping changelog ellipsis parents to manifest ellipsis
1249 1252 # parents)
1250 1253 clrevtomanifestrev.setdefault(
1251 1254 cl.rev(x), mfl.rev(manifestnode)
1252 1255 )
1253 1256 # We can't trust the changed files list in the changeset if the
1254 1257 # client requested a shallow clone.
1255 1258 if self._isshallow:
1256 1259 changedfiles.update(mfl[c.manifest].read().keys())
1257 1260 else:
1258 1261 changedfiles.update(c.files)
1259 1262 else:
1260 1263 # record the first changeset introducing this manifest version
1261 1264 manifests.setdefault(c.manifest, x)
1262 1265 # Record a complete list of potentially-changed files in
1263 1266 # this manifest.
1264 1267 changedfiles.update(c.files)
1265 1268
1266 1269 return x
1267 1270
1268 1271 gen = deltagroup(
1269 1272 self._repo,
1270 1273 cl,
1271 1274 nodes,
1272 1275 True,
1273 1276 lookupcl,
1274 1277 self._forcedeltaparentprev,
1275 1278 ellipses=self._ellipses,
1276 1279 topic=_(b'changesets'),
1277 1280 clrevtolocalrev={},
1278 1281 fullclnodes=self._fullclnodes,
1279 1282 precomputedellipsis=self._precomputedellipsis,
1280 1283 sidedata_helpers=sidedata_helpers,
1281 1284 )
1282 1285
1283 1286 return state, gen
1284 1287
1285 1288 def generatemanifests(
1286 1289 self,
1287 1290 commonrevs,
1288 1291 clrevorder,
1289 1292 fastpathlinkrev,
1290 1293 manifests,
1291 1294 fnodes,
1292 1295 source,
1293 1296 clrevtolocalrev,
1294 1297 sidedata_helpers=None,
1295 1298 ):
1296 1299 """Returns an iterator of changegroup chunks containing manifests.
1297 1300
1298 1301 `source` is unused here, but is used by extensions like remotefilelog to
1299 1302 change what is sent based in pulls vs pushes, etc.
1300 1303
1301 1304 See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
1302 1305 """
1303 1306 repo = self._repo
1304 1307 mfl = repo.manifestlog
1305 1308 tmfnodes = {b'': manifests}
1306 1309
1307 1310 # Callback for the manifest, used to collect linkrevs for filelog
1308 1311 # revisions.
1309 1312 # Returns the linkrev node (collected in lookupcl).
1310 1313 def makelookupmflinknode(tree, nodes):
1311 1314 if fastpathlinkrev:
1312 1315 assert not tree
1313 1316
1314 1317 # pytype: disable=unsupported-operands
1315 1318 return manifests.__getitem__
1316 1319 # pytype: enable=unsupported-operands
1317 1320
1318 1321 def lookupmflinknode(x):
1319 1322 """Callback for looking up the linknode for manifests.
1320 1323
1321 1324 Returns the linkrev node for the specified manifest.
1322 1325
1323 1326 SIDE EFFECT:
1324 1327
1325 1328 1) fclnodes gets populated with the list of relevant
1326 1329 file nodes if we're not using fastpathlinkrev
1327 1330 2) When treemanifests are in use, collects treemanifest nodes
1328 1331 to send
1329 1332
1330 1333 Note that this means manifests must be completely sent to
1331 1334 the client before you can trust the list of files and
1332 1335 treemanifests to send.
1333 1336 """
1334 1337 clnode = nodes[x]
1335 1338 mdata = mfl.get(tree, x).readfast(shallow=True)
1336 1339 for p, n, fl in mdata.iterentries():
1337 1340 if fl == b't': # subdirectory manifest
1338 1341 subtree = tree + p + b'/'
1339 1342 tmfclnodes = tmfnodes.setdefault(subtree, {})
1340 1343 tmfclnode = tmfclnodes.setdefault(n, clnode)
1341 1344 if clrevorder[clnode] < clrevorder[tmfclnode]:
1342 1345 tmfclnodes[n] = clnode
1343 1346 else:
1344 1347 f = tree + p
1345 1348 fclnodes = fnodes.setdefault(f, {})
1346 1349 fclnode = fclnodes.setdefault(n, clnode)
1347 1350 if clrevorder[clnode] < clrevorder[fclnode]:
1348 1351 fclnodes[n] = clnode
1349 1352 return clnode
1350 1353
1351 1354 return lookupmflinknode
1352 1355
1353 1356 while tmfnodes:
1354 1357 tree, nodes = tmfnodes.popitem()
1355 1358
1356 1359 should_visit = self._matcher.visitdir(tree[:-1])
1357 1360 if tree and not should_visit:
1358 1361 continue
1359 1362
1360 1363 store = mfl.getstorage(tree)
1361 1364
1362 1365 if not should_visit:
1363 1366 # No nodes to send because this directory is out of
1364 1367 # the client's view of the repository (probably
1365 1368 # because of narrow clones). Do this even for the root
1366 1369 # directory (tree=='')
1367 1370 prunednodes = []
1368 1371 else:
1369 1372 # Avoid sending any manifest nodes we can prove the
1370 1373 # client already has by checking linkrevs. See the
1371 1374 # related comment in generatefiles().
1372 1375 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1373 1376
1374 1377 if tree and not prunednodes:
1375 1378 continue
1376 1379
1377 1380 lookupfn = makelookupmflinknode(tree, nodes)
1378 1381
1379 1382 deltas = deltagroup(
1380 1383 self._repo,
1381 1384 store,
1382 1385 prunednodes,
1383 1386 False,
1384 1387 lookupfn,
1385 1388 self._forcedeltaparentprev,
1386 1389 ellipses=self._ellipses,
1387 1390 topic=_(b'manifests'),
1388 1391 clrevtolocalrev=clrevtolocalrev,
1389 1392 fullclnodes=self._fullclnodes,
1390 1393 precomputedellipsis=self._precomputedellipsis,
1391 1394 sidedata_helpers=sidedata_helpers,
1392 1395 )
1393 1396
1394 1397 if not self._oldmatcher.visitdir(store.tree[:-1]):
1395 1398 yield tree, deltas
1396 1399 else:
1397 1400 # 'deltas' is a generator and we need to consume it even if
1398 1401 # we are not going to send it because a side-effect is that
1399 1402 # it updates tmdnodes (via lookupfn)
1400 1403 for d in deltas:
1401 1404 pass
1402 1405 if not tree:
1403 1406 yield tree, []
1404 1407
1405 1408 def _prunemanifests(self, store, nodes, commonrevs):
1406 1409 if not self._ellipses:
1407 1410 # In non-ellipses case and large repositories, it is better to
1408 1411 # prevent calling of store.rev and store.linkrev on a lot of
1409 1412 # nodes as compared to sending some extra data
1410 1413 return nodes.copy()
1411 1414 # This is split out as a separate method to allow filtering
1412 1415 # commonrevs in extension code.
1413 1416 #
1414 1417 # TODO(augie): this shouldn't be required, instead we should
1415 1418 # make filtering of revisions to send delegated to the store
1416 1419 # layer.
1417 1420 frev, flr = store.rev, store.linkrev
1418 1421 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1419 1422
1420 1423 # The 'source' parameter is useful for extensions
1421 1424 def generatefiles(
1422 1425 self,
1423 1426 changedfiles,
1424 1427 commonrevs,
1425 1428 source,
1426 1429 mfdicts,
1427 1430 fastpathlinkrev,
1428 1431 fnodes,
1429 1432 clrevs,
1430 1433 sidedata_helpers=None,
1431 1434 ):
1432 1435 changedfiles = [
1433 1436 f
1434 1437 for f in changedfiles
1435 1438 if self._matcher(f) and not self._oldmatcher(f)
1436 1439 ]
1437 1440
1438 1441 if not fastpathlinkrev:
1439 1442
1440 1443 def normallinknodes(unused, fname):
1441 1444 return fnodes.get(fname, {})
1442 1445
1443 1446 else:
1444 1447 cln = self._repo.changelog.node
1445 1448
1446 1449 def normallinknodes(store, fname):
1447 1450 flinkrev = store.linkrev
1448 1451 fnode = store.node
1449 1452 revs = ((r, flinkrev(r)) for r in store)
1450 1453 return {fnode(r): cln(lr) for r, lr in revs if lr in clrevs}
1451 1454
1452 1455 clrevtolocalrev = {}
1453 1456
1454 1457 if self._isshallow:
1455 1458 # In a shallow clone, the linknodes callback needs to also include
1456 1459 # those file nodes that are in the manifests we sent but weren't
1457 1460 # introduced by those manifests.
1458 1461 commonctxs = [self._repo[c] for c in commonrevs]
1459 1462 clrev = self._repo.changelog.rev
1460 1463
1461 1464 def linknodes(flog, fname):
1462 1465 for c in commonctxs:
1463 1466 try:
1464 1467 fnode = c.filenode(fname)
1465 1468 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1466 1469 except error.ManifestLookupError:
1467 1470 pass
1468 1471 links = normallinknodes(flog, fname)
1469 1472 if len(links) != len(mfdicts):
1470 1473 for mf, lr in mfdicts:
1471 1474 fnode = mf.get(fname, None)
1472 1475 if fnode in links:
1473 1476 links[fnode] = min(links[fnode], lr, key=clrev)
1474 1477 elif fnode:
1475 1478 links[fnode] = lr
1476 1479 return links
1477 1480
1478 1481 else:
1479 1482 linknodes = normallinknodes
1480 1483
1481 1484 repo = self._repo
1482 1485 progress = repo.ui.makeprogress(
1483 1486 _(b'files'), unit=_(b'files'), total=len(changedfiles)
1484 1487 )
1485 1488 for i, fname in enumerate(sorted(changedfiles)):
1486 1489 filerevlog = repo.file(fname)
1487 1490 if not filerevlog:
1488 1491 raise error.Abort(
1489 1492 _(b"empty or missing file data for %s") % fname
1490 1493 )
1491 1494
1492 1495 clrevtolocalrev.clear()
1493 1496
1494 1497 linkrevnodes = linknodes(filerevlog, fname)
1495 1498 # Lookup for filenodes, we collected the linkrev nodes above in the
1496 1499 # fastpath case and with lookupmf in the slowpath case.
1497 1500 def lookupfilelog(x):
1498 1501 return linkrevnodes[x]
1499 1502
1500 1503 frev, flr = filerevlog.rev, filerevlog.linkrev
1501 1504 # Skip sending any filenode we know the client already
1502 1505 # has. This avoids over-sending files relatively
1503 1506 # inexpensively, so it's not a problem if we under-filter
1504 1507 # here.
1505 1508 filenodes = [
1506 1509 n for n in linkrevnodes if flr(frev(n)) not in commonrevs
1507 1510 ]
1508 1511
1509 1512 if not filenodes:
1510 1513 continue
1511 1514
1512 1515 progress.update(i + 1, item=fname)
1513 1516
1514 1517 deltas = deltagroup(
1515 1518 self._repo,
1516 1519 filerevlog,
1517 1520 filenodes,
1518 1521 False,
1519 1522 lookupfilelog,
1520 1523 self._forcedeltaparentprev,
1521 1524 ellipses=self._ellipses,
1522 1525 clrevtolocalrev=clrevtolocalrev,
1523 1526 fullclnodes=self._fullclnodes,
1524 1527 precomputedellipsis=self._precomputedellipsis,
1525 1528 sidedata_helpers=sidedata_helpers,
1526 1529 )
1527 1530
1528 1531 yield fname, deltas
1529 1532
1530 1533 progress.complete()
1531 1534
1532 1535
1533 1536 def _makecg1packer(
1534 1537 repo,
1535 1538 oldmatcher,
1536 1539 matcher,
1537 1540 bundlecaps,
1538 1541 ellipses=False,
1539 1542 shallow=False,
1540 1543 ellipsisroots=None,
1541 1544 fullnodes=None,
1542 1545 remote_sidedata=None,
1543 1546 ):
1544 1547 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1545 1548 d.node, d.p1node, d.p2node, d.linknode
1546 1549 )
1547 1550
1548 1551 return cgpacker(
1549 1552 repo,
1550 1553 oldmatcher,
1551 1554 matcher,
1552 1555 b'01',
1553 1556 builddeltaheader=builddeltaheader,
1554 1557 manifestsend=b'',
1555 1558 forcedeltaparentprev=True,
1556 1559 bundlecaps=bundlecaps,
1557 1560 ellipses=ellipses,
1558 1561 shallow=shallow,
1559 1562 ellipsisroots=ellipsisroots,
1560 1563 fullnodes=fullnodes,
1561 1564 )
1562 1565
1563 1566
1564 1567 def _makecg2packer(
1565 1568 repo,
1566 1569 oldmatcher,
1567 1570 matcher,
1568 1571 bundlecaps,
1569 1572 ellipses=False,
1570 1573 shallow=False,
1571 1574 ellipsisroots=None,
1572 1575 fullnodes=None,
1573 1576 remote_sidedata=None,
1574 1577 ):
1575 1578 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1576 1579 d.node, d.p1node, d.p2node, d.basenode, d.linknode
1577 1580 )
1578 1581
1579 1582 return cgpacker(
1580 1583 repo,
1581 1584 oldmatcher,
1582 1585 matcher,
1583 1586 b'02',
1584 1587 builddeltaheader=builddeltaheader,
1585 1588 manifestsend=b'',
1586 1589 bundlecaps=bundlecaps,
1587 1590 ellipses=ellipses,
1588 1591 shallow=shallow,
1589 1592 ellipsisroots=ellipsisroots,
1590 1593 fullnodes=fullnodes,
1591 1594 )
1592 1595
1593 1596
1594 1597 def _makecg3packer(
1595 1598 repo,
1596 1599 oldmatcher,
1597 1600 matcher,
1598 1601 bundlecaps,
1599 1602 ellipses=False,
1600 1603 shallow=False,
1601 1604 ellipsisroots=None,
1602 1605 fullnodes=None,
1603 1606 remote_sidedata=None,
1604 1607 ):
1605 1608 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1606 1609 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1607 1610 )
1608 1611
1609 1612 return cgpacker(
1610 1613 repo,
1611 1614 oldmatcher,
1612 1615 matcher,
1613 1616 b'03',
1614 1617 builddeltaheader=builddeltaheader,
1615 1618 manifestsend=closechunk(),
1616 1619 bundlecaps=bundlecaps,
1617 1620 ellipses=ellipses,
1618 1621 shallow=shallow,
1619 1622 ellipsisroots=ellipsisroots,
1620 1623 fullnodes=fullnodes,
1621 1624 )
1622 1625
1623 1626
1624 1627 def _makecg4packer(
1625 1628 repo,
1626 1629 oldmatcher,
1627 1630 matcher,
1628 1631 bundlecaps,
1629 1632 ellipses=False,
1630 1633 shallow=False,
1631 1634 ellipsisroots=None,
1632 1635 fullnodes=None,
1633 1636 remote_sidedata=None,
1634 1637 ):
1635 1638 # Same header func as cg3. Sidedata is in a separate chunk from the delta to
1636 1639 # differenciate "raw delta" and sidedata.
1637 1640 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1638 1641 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1639 1642 )
1640 1643
1641 1644 return cgpacker(
1642 1645 repo,
1643 1646 oldmatcher,
1644 1647 matcher,
1645 1648 b'04',
1646 1649 builddeltaheader=builddeltaheader,
1647 1650 manifestsend=closechunk(),
1648 1651 bundlecaps=bundlecaps,
1649 1652 ellipses=ellipses,
1650 1653 shallow=shallow,
1651 1654 ellipsisroots=ellipsisroots,
1652 1655 fullnodes=fullnodes,
1653 1656 remote_sidedata=remote_sidedata,
1654 1657 )
1655 1658
1656 1659
1657 1660 _packermap = {
1658 1661 b'01': (_makecg1packer, cg1unpacker),
1659 1662 # cg2 adds support for exchanging generaldelta
1660 1663 b'02': (_makecg2packer, cg2unpacker),
1661 1664 # cg3 adds support for exchanging revlog flags and treemanifests
1662 1665 b'03': (_makecg3packer, cg3unpacker),
1663 1666 # ch4 adds support for exchanging sidedata
1664 1667 b'04': (_makecg4packer, cg4unpacker),
1665 1668 }
1666 1669
1667 1670
1668 1671 def allsupportedversions(repo):
1669 1672 versions = set(_packermap.keys())
1670 1673 needv03 = False
1671 1674 if (
1672 1675 repo.ui.configbool(b'experimental', b'changegroup3')
1673 1676 or repo.ui.configbool(b'experimental', b'treemanifest')
1674 1677 or scmutil.istreemanifest(repo)
1675 1678 ):
1676 1679 # we keep version 03 because we need to to exchange treemanifest data
1677 1680 #
1678 1681 # we also keep vresion 01 and 02, because it is possible for repo to
1679 1682 # contains both normal and tree manifest at the same time. so using
1680 1683 # older version to pull data is viable
1681 1684 #
1682 1685 # (or even to push subset of history)
1683 1686 needv03 = True
1684 1687 has_revlogv2 = requirements.REVLOGV2_REQUIREMENT in repo.requirements
1685 1688 if not has_revlogv2:
1686 1689 versions.discard(b'04')
1687 1690 if not needv03:
1688 1691 versions.discard(b'03')
1689 1692 return versions
1690 1693
1691 1694
1692 1695 # Changegroup versions that can be applied to the repo
1693 1696 def supportedincomingversions(repo):
1694 1697 return allsupportedversions(repo)
1695 1698
1696 1699
1697 1700 # Changegroup versions that can be created from the repo
1698 1701 def supportedoutgoingversions(repo):
1699 1702 versions = allsupportedversions(repo)
1700 1703 if scmutil.istreemanifest(repo):
1701 1704 # Versions 01 and 02 support only flat manifests and it's just too
1702 1705 # expensive to convert between the flat manifest and tree manifest on
1703 1706 # the fly. Since tree manifests are hashed differently, all of history
1704 1707 # would have to be converted. Instead, we simply don't even pretend to
1705 1708 # support versions 01 and 02.
1706 1709 versions.discard(b'01')
1707 1710 versions.discard(b'02')
1708 1711 if requirements.NARROW_REQUIREMENT in repo.requirements:
1709 1712 # Versions 01 and 02 don't support revlog flags, and we need to
1710 1713 # support that for stripping and unbundling to work.
1711 1714 versions.discard(b'01')
1712 1715 versions.discard(b'02')
1713 1716 if LFS_REQUIREMENT in repo.requirements:
1714 1717 # Versions 01 and 02 don't support revlog flags, and we need to
1715 1718 # mark LFS entries with REVIDX_EXTSTORED.
1716 1719 versions.discard(b'01')
1717 1720 versions.discard(b'02')
1718 1721
1719 1722 return versions
1720 1723
1721 1724
1722 1725 def localversion(repo):
1723 1726 # Finds the best version to use for bundles that are meant to be used
1724 1727 # locally, such as those from strip and shelve, and temporary bundles.
1725 1728 return max(supportedoutgoingversions(repo))
1726 1729
1727 1730
1728 1731 def safeversion(repo):
1729 1732 # Finds the smallest version that it's safe to assume clients of the repo
1730 1733 # will support. For example, all hg versions that support generaldelta also
1731 1734 # support changegroup 02.
1732 1735 versions = supportedoutgoingversions(repo)
1733 1736 if requirements.GENERALDELTA_REQUIREMENT in repo.requirements:
1734 1737 versions.discard(b'01')
1735 1738 assert versions
1736 1739 return min(versions)
1737 1740
1738 1741
1739 1742 def getbundler(
1740 1743 version,
1741 1744 repo,
1742 1745 bundlecaps=None,
1743 1746 oldmatcher=None,
1744 1747 matcher=None,
1745 1748 ellipses=False,
1746 1749 shallow=False,
1747 1750 ellipsisroots=None,
1748 1751 fullnodes=None,
1749 1752 remote_sidedata=None,
1750 1753 ):
1751 1754 assert version in supportedoutgoingversions(repo)
1752 1755
1753 1756 if matcher is None:
1754 1757 matcher = matchmod.always()
1755 1758 if oldmatcher is None:
1756 1759 oldmatcher = matchmod.never()
1757 1760
1758 1761 if version == b'01' and not matcher.always():
1759 1762 raise error.ProgrammingError(
1760 1763 b'version 01 changegroups do not support sparse file matchers'
1761 1764 )
1762 1765
1763 1766 if ellipses and version in (b'01', b'02'):
1764 1767 raise error.Abort(
1765 1768 _(
1766 1769 b'ellipsis nodes require at least cg3 on client and server, '
1767 1770 b'but negotiated version %s'
1768 1771 )
1769 1772 % version
1770 1773 )
1771 1774
1772 1775 # Requested files could include files not in the local store. So
1773 1776 # filter those out.
1774 1777 matcher = repo.narrowmatch(matcher)
1775 1778
1776 1779 fn = _packermap[version][0]
1777 1780 return fn(
1778 1781 repo,
1779 1782 oldmatcher,
1780 1783 matcher,
1781 1784 bundlecaps,
1782 1785 ellipses=ellipses,
1783 1786 shallow=shallow,
1784 1787 ellipsisroots=ellipsisroots,
1785 1788 fullnodes=fullnodes,
1786 1789 remote_sidedata=remote_sidedata,
1787 1790 )
1788 1791
1789 1792
1790 1793 def getunbundler(version, fh, alg, extras=None):
1791 1794 return _packermap[version][1](fh, alg, extras=extras)
1792 1795
1793 1796
1794 1797 def _changegroupinfo(repo, nodes, source):
1795 1798 if repo.ui.verbose or source == b'bundle':
1796 1799 repo.ui.status(_(b"%d changesets found\n") % len(nodes))
1797 1800 if repo.ui.debugflag:
1798 1801 repo.ui.debug(b"list of changesets:\n")
1799 1802 for node in nodes:
1800 1803 repo.ui.debug(b"%s\n" % hex(node))
1801 1804
1802 1805
1803 1806 def makechangegroup(
1804 1807 repo, outgoing, version, source, fastpath=False, bundlecaps=None
1805 1808 ):
1806 1809 cgstream = makestream(
1807 1810 repo,
1808 1811 outgoing,
1809 1812 version,
1810 1813 source,
1811 1814 fastpath=fastpath,
1812 1815 bundlecaps=bundlecaps,
1813 1816 )
1814 1817 return getunbundler(
1815 1818 version,
1816 1819 util.chunkbuffer(cgstream),
1817 1820 None,
1818 1821 {b'clcount': len(outgoing.missing)},
1819 1822 )
1820 1823
1821 1824
1822 1825 def makestream(
1823 1826 repo,
1824 1827 outgoing,
1825 1828 version,
1826 1829 source,
1827 1830 fastpath=False,
1828 1831 bundlecaps=None,
1829 1832 matcher=None,
1830 1833 remote_sidedata=None,
1831 1834 ):
1832 1835 bundler = getbundler(
1833 1836 version,
1834 1837 repo,
1835 1838 bundlecaps=bundlecaps,
1836 1839 matcher=matcher,
1837 1840 remote_sidedata=remote_sidedata,
1838 1841 )
1839 1842
1840 1843 repo = repo.unfiltered()
1841 1844 commonrevs = outgoing.common
1842 1845 csets = outgoing.missing
1843 1846 heads = outgoing.ancestorsof
1844 1847 # We go through the fast path if we get told to, or if all (unfiltered
1845 1848 # heads have been requested (since we then know there all linkrevs will
1846 1849 # be pulled by the client).
1847 1850 heads.sort()
1848 1851 fastpathlinkrev = fastpath or (
1849 1852 repo.filtername is None and heads == sorted(repo.heads())
1850 1853 )
1851 1854
1852 1855 repo.hook(b'preoutgoing', throw=True, source=source)
1853 1856 _changegroupinfo(repo, csets, source)
1854 1857 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1855 1858
1856 1859
1857 1860 def _addchangegroupfiles(
1858 1861 repo,
1859 1862 source,
1860 1863 revmap,
1861 1864 trp,
1862 1865 expectedfiles,
1863 1866 needfiles,
1864 1867 addrevisioncb=None,
1865 1868 ):
1866 1869 revisions = 0
1867 1870 files = 0
1868 1871 progress = repo.ui.makeprogress(
1869 1872 _(b'files'), unit=_(b'files'), total=expectedfiles
1870 1873 )
1871 1874 for chunkdata in iter(source.filelogheader, {}):
1872 1875 files += 1
1873 1876 f = chunkdata[b"filename"]
1874 1877 repo.ui.debug(b"adding %s revisions\n" % f)
1875 1878 progress.increment()
1876 1879 fl = repo.file(f)
1877 1880 o = len(fl)
1878 1881 try:
1879 1882 deltas = source.deltaiter()
1880 1883 added = fl.addgroup(
1881 1884 deltas,
1882 1885 revmap,
1883 1886 trp,
1884 1887 addrevisioncb=addrevisioncb,
1885 1888 )
1886 1889 if not added:
1887 1890 raise error.Abort(_(b"received file revlog group is empty"))
1888 1891 except error.CensoredBaseError as e:
1889 1892 raise error.Abort(_(b"received delta base is censored: %s") % e)
1890 1893 revisions += len(fl) - o
1891 1894 if f in needfiles:
1892 1895 needs = needfiles[f]
1893 1896 for new in pycompat.xrange(o, len(fl)):
1894 1897 n = fl.node(new)
1895 1898 if n in needs:
1896 1899 needs.remove(n)
1897 1900 else:
1898 1901 raise error.Abort(_(b"received spurious file revlog entry"))
1899 1902 if not needs:
1900 1903 del needfiles[f]
1901 1904 progress.complete()
1902 1905
1903 1906 for f, needs in pycompat.iteritems(needfiles):
1904 1907 fl = repo.file(f)
1905 1908 for n in needs:
1906 1909 try:
1907 1910 fl.rev(n)
1908 1911 except error.LookupError:
1909 1912 raise error.Abort(
1910 1913 _(b'missing file data for %s:%s - run hg verify')
1911 1914 % (f, hex(n))
1912 1915 )
1913 1916
1914 1917 return revisions, files
1915 1918
1916 1919
1917 1920 def get_sidedata_helpers(repo, remote_sd_categories, pull=False):
1918 1921 # Computers for computing sidedata on-the-fly
1919 1922 sd_computers = collections.defaultdict(list)
1920 1923 # Computers for categories to remove from sidedata
1921 1924 sd_removers = collections.defaultdict(list)
1922 1925
1923 1926 to_generate = remote_sd_categories - repo._wanted_sidedata
1924 1927 to_remove = repo._wanted_sidedata - remote_sd_categories
1925 1928 if pull:
1926 1929 to_generate, to_remove = to_remove, to_generate
1927 1930
1928 1931 for revlog_kind, computers in repo._sidedata_computers.items():
1929 1932 for category, computer in computers.items():
1930 1933 if category in to_generate:
1931 1934 sd_computers[revlog_kind].append(computer)
1932 1935 if category in to_remove:
1933 1936 sd_removers[revlog_kind].append(computer)
1934 1937
1935 1938 sidedata_helpers = (repo, sd_computers, sd_removers)
1936 1939 return sidedata_helpers
@@ -1,626 +1,625 b''
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 )
15 15 from .thirdparty import attr
16 16
17 17 from . import (
18 18 encoding,
19 19 error,
20 20 metadata,
21 21 pycompat,
22 22 revlog,
23 23 )
24 24 from .utils import (
25 25 dateutil,
26 26 stringutil,
27 27 )
28 28 from .revlogutils import (
29 29 constants as revlog_constants,
30 30 flagutil,
31 31 )
32 32
33 33 _defaultextra = {b'branch': b'default'}
34 34
35 35
36 36 def _string_escape(text):
37 37 """
38 38 >>> from .pycompat import bytechr as chr
39 39 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
40 40 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
41 41 >>> s
42 42 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
43 43 >>> res = _string_escape(s)
44 44 >>> s == _string_unescape(res)
45 45 True
46 46 """
47 47 # subset of the string_escape codec
48 48 text = (
49 49 text.replace(b'\\', b'\\\\')
50 50 .replace(b'\n', b'\\n')
51 51 .replace(b'\r', b'\\r')
52 52 )
53 53 return text.replace(b'\0', b'\\0')
54 54
55 55
56 56 def _string_unescape(text):
57 57 if b'\\0' in text:
58 58 # fix up \0 without getting into trouble with \\0
59 59 text = text.replace(b'\\\\', b'\\\\\n')
60 60 text = text.replace(b'\\0', b'\0')
61 61 text = text.replace(b'\n', b'')
62 62 return stringutil.unescapestr(text)
63 63
64 64
65 65 def decodeextra(text):
66 66 """
67 67 >>> from .pycompat import bytechr as chr
68 68 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
69 69 ... ).items())
70 70 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
71 71 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
72 72 ... b'baz': chr(92) + chr(0) + b'2'})
73 73 ... ).items())
74 74 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
75 75 """
76 76 extra = _defaultextra.copy()
77 77 for l in text.split(b'\0'):
78 78 if l:
79 79 k, v = _string_unescape(l).split(b':', 1)
80 80 extra[k] = v
81 81 return extra
82 82
83 83
84 84 def encodeextra(d):
85 85 # keys must be sorted to produce a deterministic changelog entry
86 86 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
87 87 return b"\0".join(items)
88 88
89 89
90 90 def stripdesc(desc):
91 91 """strip trailing whitespace and leading and trailing empty lines"""
92 92 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
93 93
94 94
95 95 class appender(object):
96 96 """the changelog index must be updated last on disk, so we use this class
97 97 to delay writes to it"""
98 98
99 99 def __init__(self, vfs, name, mode, buf):
100 100 self.data = buf
101 101 fp = vfs(name, mode)
102 102 self.fp = fp
103 103 self.offset = fp.tell()
104 104 self.size = vfs.fstat(fp).st_size
105 105 self._end = self.size
106 106
107 107 def end(self):
108 108 return self._end
109 109
110 110 def tell(self):
111 111 return self.offset
112 112
113 113 def flush(self):
114 114 pass
115 115
116 116 @property
117 117 def closed(self):
118 118 return self.fp.closed
119 119
120 120 def close(self):
121 121 self.fp.close()
122 122
123 123 def seek(self, offset, whence=0):
124 124 '''virtual file offset spans real file and data'''
125 125 if whence == 0:
126 126 self.offset = offset
127 127 elif whence == 1:
128 128 self.offset += offset
129 129 elif whence == 2:
130 130 self.offset = self.end() + offset
131 131 if self.offset < self.size:
132 132 self.fp.seek(self.offset)
133 133
134 134 def read(self, count=-1):
135 135 '''only trick here is reads that span real file and data'''
136 136 ret = b""
137 137 if self.offset < self.size:
138 138 s = self.fp.read(count)
139 139 ret = s
140 140 self.offset += len(s)
141 141 if count > 0:
142 142 count -= len(s)
143 143 if count != 0:
144 144 doff = self.offset - self.size
145 145 self.data.insert(0, b"".join(self.data))
146 146 del self.data[1:]
147 147 s = self.data[0][doff : doff + count]
148 148 self.offset += len(s)
149 149 ret += s
150 150 return ret
151 151
152 152 def write(self, s):
153 153 self.data.append(bytes(s))
154 154 self.offset += len(s)
155 155 self._end += len(s)
156 156
157 157 def __enter__(self):
158 158 self.fp.__enter__()
159 159 return self
160 160
161 161 def __exit__(self, *args):
162 162 return self.fp.__exit__(*args)
163 163
164 164
165 165 class _divertopener(object):
166 166 def __init__(self, opener, target):
167 167 self._opener = opener
168 168 self._target = target
169 169
170 170 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
171 171 if name != self._target:
172 172 return self._opener(name, mode, **kwargs)
173 173 return self._opener(name + b".a", mode, **kwargs)
174 174
175 175 def __getattr__(self, attr):
176 176 return getattr(self._opener, attr)
177 177
178 178
179 179 def _delayopener(opener, target, buf):
180 180 """build an opener that stores chunks in 'buf' instead of 'target'"""
181 181
182 182 def _delay(name, mode=b'r', checkambig=False, **kwargs):
183 183 if name != target:
184 184 return opener(name, mode, **kwargs)
185 185 assert not kwargs
186 186 return appender(opener, name, mode, buf)
187 187
188 188 return _delay
189 189
190 190
191 191 @attr.s
192 192 class _changelogrevision(object):
193 193 # Extensions might modify _defaultextra, so let the constructor below pass
194 194 # it in
195 195 extra = attr.ib()
196 196 manifest = attr.ib()
197 197 user = attr.ib(default=b'')
198 198 date = attr.ib(default=(0, 0))
199 199 files = attr.ib(default=attr.Factory(list))
200 200 filesadded = attr.ib(default=None)
201 201 filesremoved = attr.ib(default=None)
202 202 p1copies = attr.ib(default=None)
203 203 p2copies = attr.ib(default=None)
204 204 description = attr.ib(default=b'')
205 205 branchinfo = attr.ib(default=(_defaultextra[b'branch'], False))
206 206
207 207
208 208 class changelogrevision(object):
209 209 """Holds results of a parsed changelog revision.
210 210
211 211 Changelog revisions consist of multiple pieces of data, including
212 212 the manifest node, user, and date. This object exposes a view into
213 213 the parsed object.
214 214 """
215 215
216 216 __slots__ = (
217 217 '_offsets',
218 218 '_text',
219 219 '_sidedata',
220 220 '_cpsd',
221 221 '_changes',
222 222 )
223 223
224 224 def __new__(cls, cl, text, sidedata, cpsd):
225 225 if not text:
226 226 return _changelogrevision(extra=_defaultextra, manifest=cl.nullid)
227 227
228 228 self = super(changelogrevision, cls).__new__(cls)
229 229 # We could return here and implement the following as an __init__.
230 230 # But doing it here is equivalent and saves an extra function call.
231 231
232 232 # format used:
233 233 # nodeid\n : manifest node in ascii
234 234 # user\n : user, no \n or \r allowed
235 235 # time tz extra\n : date (time is int or float, timezone is int)
236 236 # : extra is metadata, encoded and separated by '\0'
237 237 # : older versions ignore it
238 238 # files\n\n : files modified by the cset, no \n or \r allowed
239 239 # (.*) : comment (free text, ideally utf-8)
240 240 #
241 241 # changelog v0 doesn't use extra
242 242
243 243 nl1 = text.index(b'\n')
244 244 nl2 = text.index(b'\n', nl1 + 1)
245 245 nl3 = text.index(b'\n', nl2 + 1)
246 246
247 247 # The list of files may be empty. Which means nl3 is the first of the
248 248 # double newline that precedes the description.
249 249 if text[nl3 + 1 : nl3 + 2] == b'\n':
250 250 doublenl = nl3
251 251 else:
252 252 doublenl = text.index(b'\n\n', nl3 + 1)
253 253
254 254 self._offsets = (nl1, nl2, nl3, doublenl)
255 255 self._text = text
256 256 self._sidedata = sidedata
257 257 self._cpsd = cpsd
258 258 self._changes = None
259 259
260 260 return self
261 261
262 262 @property
263 263 def manifest(self):
264 264 return bin(self._text[0 : self._offsets[0]])
265 265
266 266 @property
267 267 def user(self):
268 268 off = self._offsets
269 269 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
270 270
271 271 @property
272 272 def _rawdate(self):
273 273 off = self._offsets
274 274 dateextra = self._text[off[1] + 1 : off[2]]
275 275 return dateextra.split(b' ', 2)[0:2]
276 276
277 277 @property
278 278 def _rawextra(self):
279 279 off = self._offsets
280 280 dateextra = self._text[off[1] + 1 : off[2]]
281 281 fields = dateextra.split(b' ', 2)
282 282 if len(fields) != 3:
283 283 return None
284 284
285 285 return fields[2]
286 286
287 287 @property
288 288 def date(self):
289 289 raw = self._rawdate
290 290 time = float(raw[0])
291 291 # Various tools did silly things with the timezone.
292 292 try:
293 293 timezone = int(raw[1])
294 294 except ValueError:
295 295 timezone = 0
296 296
297 297 return time, timezone
298 298
299 299 @property
300 300 def extra(self):
301 301 raw = self._rawextra
302 302 if raw is None:
303 303 return _defaultextra
304 304
305 305 return decodeextra(raw)
306 306
307 307 @property
308 308 def changes(self):
309 309 if self._changes is not None:
310 310 return self._changes
311 311 if self._cpsd:
312 312 changes = metadata.decode_files_sidedata(self._sidedata)
313 313 else:
314 314 changes = metadata.ChangingFiles(
315 315 touched=self.files or (),
316 316 added=self.filesadded or (),
317 317 removed=self.filesremoved or (),
318 318 p1_copies=self.p1copies or {},
319 319 p2_copies=self.p2copies or {},
320 320 )
321 321 self._changes = changes
322 322 return changes
323 323
324 324 @property
325 325 def files(self):
326 326 if self._cpsd:
327 327 return sorted(self.changes.touched)
328 328 off = self._offsets
329 329 if off[2] == off[3]:
330 330 return []
331 331
332 332 return self._text[off[2] + 1 : off[3]].split(b'\n')
333 333
334 334 @property
335 335 def filesadded(self):
336 336 if self._cpsd:
337 337 return self.changes.added
338 338 else:
339 339 rawindices = self.extra.get(b'filesadded')
340 340 if rawindices is None:
341 341 return None
342 342 return metadata.decodefileindices(self.files, rawindices)
343 343
344 344 @property
345 345 def filesremoved(self):
346 346 if self._cpsd:
347 347 return self.changes.removed
348 348 else:
349 349 rawindices = self.extra.get(b'filesremoved')
350 350 if rawindices is None:
351 351 return None
352 352 return metadata.decodefileindices(self.files, rawindices)
353 353
354 354 @property
355 355 def p1copies(self):
356 356 if self._cpsd:
357 357 return self.changes.copied_from_p1
358 358 else:
359 359 rawcopies = self.extra.get(b'p1copies')
360 360 if rawcopies is None:
361 361 return None
362 362 return metadata.decodecopies(self.files, rawcopies)
363 363
364 364 @property
365 365 def p2copies(self):
366 366 if self._cpsd:
367 367 return self.changes.copied_from_p2
368 368 else:
369 369 rawcopies = self.extra.get(b'p2copies')
370 370 if rawcopies is None:
371 371 return None
372 372 return metadata.decodecopies(self.files, rawcopies)
373 373
374 374 @property
375 375 def description(self):
376 376 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
377 377
378 378 @property
379 379 def branchinfo(self):
380 380 extra = self.extra
381 381 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
382 382
383 383
384 384 class changelog(revlog.revlog):
385 385 def __init__(self, opener, trypending=False, concurrencychecker=None):
386 386 """Load a changelog revlog using an opener.
387 387
388 388 If ``trypending`` is true, we attempt to load the index from a
389 389 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
390 390 The ``00changelog.i.a`` file contains index (and possibly inline
391 391 revision) data for a transaction that hasn't been finalized yet.
392 392 It exists in a separate file to facilitate readers (such as
393 393 hooks processes) accessing data before a transaction is finalized.
394 394
395 395 ``concurrencychecker`` will be passed to the revlog init function, see
396 396 the documentation there.
397 397 """
398 398 if trypending and opener.exists(b'00changelog.i.a'):
399 399 indexfile = b'00changelog.i.a'
400 400 else:
401 401 indexfile = b'00changelog.i'
402 402
403 403 datafile = b'00changelog.d'
404 404 revlog.revlog.__init__(
405 405 self,
406 406 opener,
407 407 target=(revlog_constants.KIND_CHANGELOG, None),
408 408 indexfile=indexfile,
409 409 datafile=datafile,
410 410 checkambig=True,
411 411 mmaplargeindex=True,
412 412 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
413 413 concurrencychecker=concurrencychecker,
414 414 )
415 415
416 416 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
417 417 # changelogs don't benefit from generaldelta.
418 418
419 419 self.version &= ~revlog.FLAG_GENERALDELTA
420 420 self._generaldelta = False
421 421
422 422 # Delta chains for changelogs tend to be very small because entries
423 423 # tend to be small and don't delta well with each. So disable delta
424 424 # chains.
425 425 self._storedeltachains = False
426 426
427 427 self._realopener = opener
428 428 self._delayed = False
429 429 self._delaybuf = None
430 430 self._divert = False
431 431 self._filteredrevs = frozenset()
432 432 self._filteredrevs_hashcache = {}
433 433 self._copiesstorage = opener.options.get(b'copies-storage')
434 self.revlog_kind = b'changelog'
435 434
436 435 @property
437 436 def filteredrevs(self):
438 437 return self._filteredrevs
439 438
440 439 @filteredrevs.setter
441 440 def filteredrevs(self, val):
442 441 # Ensure all updates go through this function
443 442 assert isinstance(val, frozenset)
444 443 self._filteredrevs = val
445 444 self._filteredrevs_hashcache = {}
446 445
447 446 def delayupdate(self, tr):
448 447 """delay visibility of index updates to other readers"""
449 448
450 449 if not self._delayed:
451 450 if len(self) == 0:
452 451 self._divert = True
453 452 if self._realopener.exists(self.indexfile + b'.a'):
454 453 self._realopener.unlink(self.indexfile + b'.a')
455 454 self.opener = _divertopener(self._realopener, self.indexfile)
456 455 else:
457 456 self._delaybuf = []
458 457 self.opener = _delayopener(
459 458 self._realopener, self.indexfile, self._delaybuf
460 459 )
461 460 self._delayed = True
462 461 tr.addpending(b'cl-%i' % id(self), self._writepending)
463 462 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
464 463
465 464 def _finalize(self, tr):
466 465 """finalize index updates"""
467 466 self._delayed = False
468 467 self.opener = self._realopener
469 468 # move redirected index data back into place
470 469 if self._divert:
471 470 assert not self._delaybuf
472 471 tmpname = self.indexfile + b".a"
473 472 nfile = self.opener.open(tmpname)
474 473 nfile.close()
475 474 self.opener.rename(tmpname, self.indexfile, checkambig=True)
476 475 elif self._delaybuf:
477 476 fp = self.opener(self.indexfile, b'a', checkambig=True)
478 477 fp.write(b"".join(self._delaybuf))
479 478 fp.close()
480 479 self._delaybuf = None
481 480 self._divert = False
482 481 # split when we're done
483 482 self._enforceinlinesize(tr)
484 483
485 484 def _writepending(self, tr):
486 485 """create a file containing the unfinalized state for
487 486 pretxnchangegroup"""
488 487 if self._delaybuf:
489 488 # make a temporary copy of the index
490 489 fp1 = self._realopener(self.indexfile)
491 490 pendingfilename = self.indexfile + b".a"
492 491 # register as a temp file to ensure cleanup on failure
493 492 tr.registertmp(pendingfilename)
494 493 # write existing data
495 494 fp2 = self._realopener(pendingfilename, b"w")
496 495 fp2.write(fp1.read())
497 496 # add pending data
498 497 fp2.write(b"".join(self._delaybuf))
499 498 fp2.close()
500 499 # switch modes so finalize can simply rename
501 500 self._delaybuf = None
502 501 self._divert = True
503 502 self.opener = _divertopener(self._realopener, self.indexfile)
504 503
505 504 if self._divert:
506 505 return True
507 506
508 507 return False
509 508
510 509 def _enforceinlinesize(self, tr, fp=None):
511 510 if not self._delayed:
512 511 revlog.revlog._enforceinlinesize(self, tr, fp)
513 512
514 513 def read(self, nodeorrev):
515 514 """Obtain data from a parsed changelog revision.
516 515
517 516 Returns a 6-tuple of:
518 517
519 518 - manifest node in binary
520 519 - author/user as a localstr
521 520 - date as a 2-tuple of (time, timezone)
522 521 - list of files
523 522 - commit message as a localstr
524 523 - dict of extra metadata
525 524
526 525 Unless you need to access all fields, consider calling
527 526 ``changelogrevision`` instead, as it is faster for partial object
528 527 access.
529 528 """
530 529 d, s = self._revisiondata(nodeorrev)
531 530 c = changelogrevision(
532 531 self, d, s, self._copiesstorage == b'changeset-sidedata'
533 532 )
534 533 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
535 534
536 535 def changelogrevision(self, nodeorrev):
537 536 """Obtain a ``changelogrevision`` for a node or revision."""
538 537 text, sidedata = self._revisiondata(nodeorrev)
539 538 return changelogrevision(
540 539 self, text, sidedata, self._copiesstorage == b'changeset-sidedata'
541 540 )
542 541
543 542 def readfiles(self, nodeorrev):
544 543 """
545 544 short version of read that only returns the files modified by the cset
546 545 """
547 546 text = self.revision(nodeorrev)
548 547 if not text:
549 548 return []
550 549 last = text.index(b"\n\n")
551 550 l = text[:last].split(b'\n')
552 551 return l[3:]
553 552
554 553 def add(
555 554 self,
556 555 manifest,
557 556 files,
558 557 desc,
559 558 transaction,
560 559 p1,
561 560 p2,
562 561 user,
563 562 date=None,
564 563 extra=None,
565 564 ):
566 565 # Convert to UTF-8 encoded bytestrings as the very first
567 566 # thing: calling any method on a localstr object will turn it
568 567 # into a str object and the cached UTF-8 string is thus lost.
569 568 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
570 569
571 570 user = user.strip()
572 571 # An empty username or a username with a "\n" will make the
573 572 # revision text contain two "\n\n" sequences -> corrupt
574 573 # repository since read cannot unpack the revision.
575 574 if not user:
576 575 raise error.StorageError(_(b"empty username"))
577 576 if b"\n" in user:
578 577 raise error.StorageError(
579 578 _(b"username %r contains a newline") % pycompat.bytestr(user)
580 579 )
581 580
582 581 desc = stripdesc(desc)
583 582
584 583 if date:
585 584 parseddate = b"%d %d" % dateutil.parsedate(date)
586 585 else:
587 586 parseddate = b"%d %d" % dateutil.makedate()
588 587 if extra:
589 588 branch = extra.get(b"branch")
590 589 if branch in (b"default", b""):
591 590 del extra[b"branch"]
592 591 elif branch in (b".", b"null", b"tip"):
593 592 raise error.StorageError(
594 593 _(b'the name \'%s\' is reserved') % branch
595 594 )
596 595 sortedfiles = sorted(files.touched)
597 596 flags = 0
598 597 sidedata = None
599 598 if self._copiesstorage == b'changeset-sidedata':
600 599 if files.has_copies_info:
601 600 flags |= flagutil.REVIDX_HASCOPIESINFO
602 601 sidedata = metadata.encode_files_sidedata(files)
603 602
604 603 if extra:
605 604 extra = encodeextra(extra)
606 605 parseddate = b"%s %s" % (parseddate, extra)
607 606 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
608 607 text = b"\n".join(l)
609 608 rev = self.addrevision(
610 609 text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
611 610 )
612 611 return self.node(rev)
613 612
614 613 def branchinfo(self, rev):
615 614 """return the branch name and open/close state of a revision
616 615
617 616 This function exists because creating a changectx object
618 617 just to access this is costly."""
619 618 return self.changelogrevision(rev).branchinfo
620 619
621 620 def _nodeduplicatecallback(self, transaction, rev):
622 621 # keep track of revisions that got "re-added", eg: unbunde of know rev.
623 622 #
624 623 # We track them in a list to preserve their order from the source bundle
625 624 duplicates = transaction.changes.setdefault(b'revduplicates', [])
626 625 duplicates.append(rev)
@@ -1,299 +1,298 b''
1 1 # filelog.py - file history class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import nullrev
12 12 from . import (
13 13 error,
14 14 revlog,
15 15 )
16 16 from .interfaces import (
17 17 repository,
18 18 util as interfaceutil,
19 19 )
20 20 from .utils import storageutil
21 21 from .revlogutils import (
22 22 constants as revlog_constants,
23 23 )
24 24
25 25
26 26 @interfaceutil.implementer(repository.ifilestorage)
27 27 class filelog(object):
28 28 def __init__(self, opener, path):
29 29 self._revlog = revlog.revlog(
30 30 opener,
31 31 # XXX should use the unencoded path
32 32 target=(revlog_constants.KIND_FILELOG, path),
33 33 indexfile=b'/'.join((b'data', path + b'.i')),
34 34 censorable=True,
35 35 )
36 36 # Full name of the user visible file, relative to the repository root.
37 37 # Used by LFS.
38 38 self._revlog.filename = path
39 self._revlog.revlog_kind = b'filelog'
40 39 self.nullid = self._revlog.nullid
41 40
42 41 def __len__(self):
43 42 return len(self._revlog)
44 43
45 44 def __iter__(self):
46 45 return self._revlog.__iter__()
47 46
48 47 def hasnode(self, node):
49 48 if node in (self.nullid, nullrev):
50 49 return False
51 50
52 51 try:
53 52 self._revlog.rev(node)
54 53 return True
55 54 except (TypeError, ValueError, IndexError, error.LookupError):
56 55 return False
57 56
58 57 def revs(self, start=0, stop=None):
59 58 return self._revlog.revs(start=start, stop=stop)
60 59
61 60 def parents(self, node):
62 61 return self._revlog.parents(node)
63 62
64 63 def parentrevs(self, rev):
65 64 return self._revlog.parentrevs(rev)
66 65
67 66 def rev(self, node):
68 67 return self._revlog.rev(node)
69 68
70 69 def node(self, rev):
71 70 return self._revlog.node(rev)
72 71
73 72 def lookup(self, node):
74 73 return storageutil.fileidlookup(
75 74 self._revlog, node, self._revlog.indexfile
76 75 )
77 76
78 77 def linkrev(self, rev):
79 78 return self._revlog.linkrev(rev)
80 79
81 80 def commonancestorsheads(self, node1, node2):
82 81 return self._revlog.commonancestorsheads(node1, node2)
83 82
84 83 # Used by dagop.blockdescendants().
85 84 def descendants(self, revs):
86 85 return self._revlog.descendants(revs)
87 86
88 87 def heads(self, start=None, stop=None):
89 88 return self._revlog.heads(start, stop)
90 89
91 90 # Used by hgweb, children extension.
92 91 def children(self, node):
93 92 return self._revlog.children(node)
94 93
95 94 def iscensored(self, rev):
96 95 return self._revlog.iscensored(rev)
97 96
98 97 def revision(self, node, _df=None, raw=False):
99 98 return self._revlog.revision(node, _df=_df, raw=raw)
100 99
101 100 def rawdata(self, node, _df=None):
102 101 return self._revlog.rawdata(node, _df=_df)
103 102
104 103 def emitrevisions(
105 104 self,
106 105 nodes,
107 106 nodesorder=None,
108 107 revisiondata=False,
109 108 assumehaveparentrevisions=False,
110 109 deltamode=repository.CG_DELTAMODE_STD,
111 110 sidedata_helpers=None,
112 111 ):
113 112 return self._revlog.emitrevisions(
114 113 nodes,
115 114 nodesorder=nodesorder,
116 115 revisiondata=revisiondata,
117 116 assumehaveparentrevisions=assumehaveparentrevisions,
118 117 deltamode=deltamode,
119 118 sidedata_helpers=sidedata_helpers,
120 119 )
121 120
122 121 def addrevision(
123 122 self,
124 123 revisiondata,
125 124 transaction,
126 125 linkrev,
127 126 p1,
128 127 p2,
129 128 node=None,
130 129 flags=revlog.REVIDX_DEFAULT_FLAGS,
131 130 cachedelta=None,
132 131 ):
133 132 return self._revlog.addrevision(
134 133 revisiondata,
135 134 transaction,
136 135 linkrev,
137 136 p1,
138 137 p2,
139 138 node=node,
140 139 flags=flags,
141 140 cachedelta=cachedelta,
142 141 )
143 142
144 143 def addgroup(
145 144 self,
146 145 deltas,
147 146 linkmapper,
148 147 transaction,
149 148 addrevisioncb=None,
150 149 duplicaterevisioncb=None,
151 150 maybemissingparents=False,
152 151 ):
153 152 if maybemissingparents:
154 153 raise error.Abort(
155 154 _(
156 155 b'revlog storage does not support missing '
157 156 b'parents write mode'
158 157 )
159 158 )
160 159
161 160 return self._revlog.addgroup(
162 161 deltas,
163 162 linkmapper,
164 163 transaction,
165 164 addrevisioncb=addrevisioncb,
166 165 duplicaterevisioncb=duplicaterevisioncb,
167 166 )
168 167
169 168 def getstrippoint(self, minlink):
170 169 return self._revlog.getstrippoint(minlink)
171 170
172 171 def strip(self, minlink, transaction):
173 172 return self._revlog.strip(minlink, transaction)
174 173
175 174 def censorrevision(self, tr, node, tombstone=b''):
176 175 return self._revlog.censorrevision(tr, node, tombstone=tombstone)
177 176
178 177 def files(self):
179 178 return self._revlog.files()
180 179
181 180 def read(self, node):
182 181 return storageutil.filtermetadata(self.revision(node))
183 182
184 183 def add(self, text, meta, transaction, link, p1=None, p2=None):
185 184 if meta or text.startswith(b'\1\n'):
186 185 text = storageutil.packmeta(meta, text)
187 186 rev = self.addrevision(text, transaction, link, p1, p2)
188 187 return self.node(rev)
189 188
190 189 def renamed(self, node):
191 190 return storageutil.filerevisioncopied(self, node)
192 191
193 192 def size(self, rev):
194 193 """return the size of a given revision"""
195 194
196 195 # for revisions with renames, we have to go the slow way
197 196 node = self.node(rev)
198 197 if self.renamed(node):
199 198 return len(self.read(node))
200 199 if self.iscensored(rev):
201 200 return 0
202 201
203 202 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
204 203 return self._revlog.size(rev)
205 204
206 205 def cmp(self, node, text):
207 206 """compare text with a given file revision
208 207
209 208 returns True if text is different than what is stored.
210 209 """
211 210 return not storageutil.filedataequivalent(self, node, text)
212 211
213 212 def verifyintegrity(self, state):
214 213 return self._revlog.verifyintegrity(state)
215 214
216 215 def storageinfo(
217 216 self,
218 217 exclusivefiles=False,
219 218 sharedfiles=False,
220 219 revisionscount=False,
221 220 trackedsize=False,
222 221 storedsize=False,
223 222 ):
224 223 return self._revlog.storageinfo(
225 224 exclusivefiles=exclusivefiles,
226 225 sharedfiles=sharedfiles,
227 226 revisionscount=revisionscount,
228 227 trackedsize=trackedsize,
229 228 storedsize=storedsize,
230 229 )
231 230
232 231 # TODO these aren't part of the interface and aren't internal methods.
233 232 # Callers should be fixed to not use them.
234 233
235 234 # Used by bundlefilelog, unionfilelog.
236 235 @property
237 236 def indexfile(self):
238 237 return self._revlog.indexfile
239 238
240 239 @indexfile.setter
241 240 def indexfile(self, value):
242 241 self._revlog.indexfile = value
243 242
244 243 # Used by repo upgrade.
245 244 def clone(self, tr, destrevlog, **kwargs):
246 245 if not isinstance(destrevlog, filelog):
247 246 raise error.ProgrammingError(b'expected filelog to clone()')
248 247
249 248 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
250 249
251 250
252 251 class narrowfilelog(filelog):
253 252 """Filelog variation to be used with narrow stores."""
254 253
255 254 def __init__(self, opener, path, narrowmatch):
256 255 super(narrowfilelog, self).__init__(opener, path)
257 256 self._narrowmatch = narrowmatch
258 257
259 258 def renamed(self, node):
260 259 res = super(narrowfilelog, self).renamed(node)
261 260
262 261 # Renames that come from outside the narrowspec are problematic
263 262 # because we may lack the base text for the rename. This can result
264 263 # in code attempting to walk the ancestry or compute a diff
265 264 # encountering a missing revision. We address this by silently
266 265 # removing rename metadata if the source file is outside the
267 266 # narrow spec.
268 267 #
269 268 # A better solution would be to see if the base revision is available,
270 269 # rather than assuming it isn't.
271 270 #
272 271 # An even better solution would be to teach all consumers of rename
273 272 # metadata that the base revision may not be available.
274 273 #
275 274 # TODO consider better ways of doing this.
276 275 if res and not self._narrowmatch(res[0]):
277 276 return None
278 277
279 278 return res
280 279
281 280 def size(self, rev):
282 281 # Because we have a custom renamed() that may lie, we need to call
283 282 # the base renamed() to report accurate results.
284 283 node = self.node(rev)
285 284 if super(narrowfilelog, self).renamed(node):
286 285 return len(self.read(node))
287 286 else:
288 287 return super(narrowfilelog, self).size(rev)
289 288
290 289 def cmp(self, node, text):
291 290 # We don't call `super` because narrow parents can be buggy in case of a
292 291 # ambiguous dirstate. Always take the slow path until there is a better
293 292 # fix, see issue6150.
294 293
295 294 # Censored files compare against the empty file.
296 295 if self.iscensored(self.rev(node)):
297 296 return text != b''
298 297
299 298 return self.read(node) != text
@@ -1,3760 +1,3760 b''
1 1 # localrepo.py - read/write repository class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import errno
11 11 import functools
12 12 import os
13 13 import random
14 14 import sys
15 15 import time
16 16 import weakref
17 17
18 18 from .i18n import _
19 19 from .node import (
20 20 bin,
21 21 hex,
22 22 nullrev,
23 23 sha1nodeconstants,
24 24 short,
25 25 )
26 26 from .pycompat import (
27 27 delattr,
28 28 getattr,
29 29 )
30 30 from . import (
31 31 bookmarks,
32 32 branchmap,
33 33 bundle2,
34 34 bundlecaches,
35 35 changegroup,
36 36 color,
37 37 commit,
38 38 context,
39 39 dirstate,
40 40 dirstateguard,
41 41 discovery,
42 42 encoding,
43 43 error,
44 44 exchange,
45 45 extensions,
46 46 filelog,
47 47 hook,
48 48 lock as lockmod,
49 49 match as matchmod,
50 50 mergestate as mergestatemod,
51 51 mergeutil,
52 52 metadata as metadatamod,
53 53 namespaces,
54 54 narrowspec,
55 55 obsolete,
56 56 pathutil,
57 57 phases,
58 58 pushkey,
59 59 pycompat,
60 60 rcutil,
61 61 repoview,
62 62 requirements as requirementsmod,
63 63 revlog,
64 64 revset,
65 65 revsetlang,
66 66 scmutil,
67 67 sparse,
68 68 store as storemod,
69 69 subrepoutil,
70 70 tags as tagsmod,
71 71 transaction,
72 72 txnutil,
73 73 util,
74 74 vfs as vfsmod,
75 75 wireprototypes,
76 76 )
77 77
78 78 from .interfaces import (
79 79 repository,
80 80 util as interfaceutil,
81 81 )
82 82
83 83 from .utils import (
84 84 hashutil,
85 85 procutil,
86 86 stringutil,
87 87 urlutil,
88 88 )
89 89
90 90 from .revlogutils import (
91 91 concurrency_checker as revlogchecker,
92 92 constants as revlogconst,
93 93 )
94 94
95 95 release = lockmod.release
96 96 urlerr = util.urlerr
97 97 urlreq = util.urlreq
98 98
99 99 # set of (path, vfs-location) tuples. vfs-location is:
100 100 # - 'plain for vfs relative paths
101 101 # - '' for svfs relative paths
102 102 _cachedfiles = set()
103 103
104 104
105 105 class _basefilecache(scmutil.filecache):
106 106 """All filecache usage on repo are done for logic that should be unfiltered"""
107 107
108 108 def __get__(self, repo, type=None):
109 109 if repo is None:
110 110 return self
111 111 # proxy to unfiltered __dict__ since filtered repo has no entry
112 112 unfi = repo.unfiltered()
113 113 try:
114 114 return unfi.__dict__[self.sname]
115 115 except KeyError:
116 116 pass
117 117 return super(_basefilecache, self).__get__(unfi, type)
118 118
119 119 def set(self, repo, value):
120 120 return super(_basefilecache, self).set(repo.unfiltered(), value)
121 121
122 122
123 123 class repofilecache(_basefilecache):
124 124 """filecache for files in .hg but outside of .hg/store"""
125 125
126 126 def __init__(self, *paths):
127 127 super(repofilecache, self).__init__(*paths)
128 128 for path in paths:
129 129 _cachedfiles.add((path, b'plain'))
130 130
131 131 def join(self, obj, fname):
132 132 return obj.vfs.join(fname)
133 133
134 134
135 135 class storecache(_basefilecache):
136 136 """filecache for files in the store"""
137 137
138 138 def __init__(self, *paths):
139 139 super(storecache, self).__init__(*paths)
140 140 for path in paths:
141 141 _cachedfiles.add((path, b''))
142 142
143 143 def join(self, obj, fname):
144 144 return obj.sjoin(fname)
145 145
146 146
147 147 class mixedrepostorecache(_basefilecache):
148 148 """filecache for a mix files in .hg/store and outside"""
149 149
150 150 def __init__(self, *pathsandlocations):
151 151 # scmutil.filecache only uses the path for passing back into our
152 152 # join(), so we can safely pass a list of paths and locations
153 153 super(mixedrepostorecache, self).__init__(*pathsandlocations)
154 154 _cachedfiles.update(pathsandlocations)
155 155
156 156 def join(self, obj, fnameandlocation):
157 157 fname, location = fnameandlocation
158 158 if location == b'plain':
159 159 return obj.vfs.join(fname)
160 160 else:
161 161 if location != b'':
162 162 raise error.ProgrammingError(
163 163 b'unexpected location: %s' % location
164 164 )
165 165 return obj.sjoin(fname)
166 166
167 167
168 168 def isfilecached(repo, name):
169 169 """check if a repo has already cached "name" filecache-ed property
170 170
171 171 This returns (cachedobj-or-None, iscached) tuple.
172 172 """
173 173 cacheentry = repo.unfiltered()._filecache.get(name, None)
174 174 if not cacheentry:
175 175 return None, False
176 176 return cacheentry.obj, True
177 177
178 178
179 179 class unfilteredpropertycache(util.propertycache):
180 180 """propertycache that apply to unfiltered repo only"""
181 181
182 182 def __get__(self, repo, type=None):
183 183 unfi = repo.unfiltered()
184 184 if unfi is repo:
185 185 return super(unfilteredpropertycache, self).__get__(unfi)
186 186 return getattr(unfi, self.name)
187 187
188 188
189 189 class filteredpropertycache(util.propertycache):
190 190 """propertycache that must take filtering in account"""
191 191
192 192 def cachevalue(self, obj, value):
193 193 object.__setattr__(obj, self.name, value)
194 194
195 195
196 196 def hasunfilteredcache(repo, name):
197 197 """check if a repo has an unfilteredpropertycache value for <name>"""
198 198 return name in vars(repo.unfiltered())
199 199
200 200
201 201 def unfilteredmethod(orig):
202 202 """decorate method that always need to be run on unfiltered version"""
203 203
204 204 @functools.wraps(orig)
205 205 def wrapper(repo, *args, **kwargs):
206 206 return orig(repo.unfiltered(), *args, **kwargs)
207 207
208 208 return wrapper
209 209
210 210
211 211 moderncaps = {
212 212 b'lookup',
213 213 b'branchmap',
214 214 b'pushkey',
215 215 b'known',
216 216 b'getbundle',
217 217 b'unbundle',
218 218 }
219 219 legacycaps = moderncaps.union({b'changegroupsubset'})
220 220
221 221
222 222 @interfaceutil.implementer(repository.ipeercommandexecutor)
223 223 class localcommandexecutor(object):
224 224 def __init__(self, peer):
225 225 self._peer = peer
226 226 self._sent = False
227 227 self._closed = False
228 228
229 229 def __enter__(self):
230 230 return self
231 231
232 232 def __exit__(self, exctype, excvalue, exctb):
233 233 self.close()
234 234
235 235 def callcommand(self, command, args):
236 236 if self._sent:
237 237 raise error.ProgrammingError(
238 238 b'callcommand() cannot be used after sendcommands()'
239 239 )
240 240
241 241 if self._closed:
242 242 raise error.ProgrammingError(
243 243 b'callcommand() cannot be used after close()'
244 244 )
245 245
246 246 # We don't need to support anything fancy. Just call the named
247 247 # method on the peer and return a resolved future.
248 248 fn = getattr(self._peer, pycompat.sysstr(command))
249 249
250 250 f = pycompat.futures.Future()
251 251
252 252 try:
253 253 result = fn(**pycompat.strkwargs(args))
254 254 except Exception:
255 255 pycompat.future_set_exception_info(f, sys.exc_info()[1:])
256 256 else:
257 257 f.set_result(result)
258 258
259 259 return f
260 260
261 261 def sendcommands(self):
262 262 self._sent = True
263 263
264 264 def close(self):
265 265 self._closed = True
266 266
267 267
268 268 @interfaceutil.implementer(repository.ipeercommands)
269 269 class localpeer(repository.peer):
270 270 '''peer for a local repo; reflects only the most recent API'''
271 271
272 272 def __init__(self, repo, caps=None):
273 273 super(localpeer, self).__init__()
274 274
275 275 if caps is None:
276 276 caps = moderncaps.copy()
277 277 self._repo = repo.filtered(b'served')
278 278 self.ui = repo.ui
279 279
280 280 if repo._wanted_sidedata:
281 281 formatted = bundle2.format_remote_wanted_sidedata(repo)
282 282 caps.add(b'exp-wanted-sidedata=' + formatted)
283 283
284 284 self._caps = repo._restrictcapabilities(caps)
285 285
286 286 # Begin of _basepeer interface.
287 287
288 288 def url(self):
289 289 return self._repo.url()
290 290
291 291 def local(self):
292 292 return self._repo
293 293
294 294 def peer(self):
295 295 return self
296 296
297 297 def canpush(self):
298 298 return True
299 299
300 300 def close(self):
301 301 self._repo.close()
302 302
303 303 # End of _basepeer interface.
304 304
305 305 # Begin of _basewirecommands interface.
306 306
307 307 def branchmap(self):
308 308 return self._repo.branchmap()
309 309
310 310 def capabilities(self):
311 311 return self._caps
312 312
313 313 def clonebundles(self):
314 314 return self._repo.tryread(bundlecaches.CB_MANIFEST_FILE)
315 315
316 316 def debugwireargs(self, one, two, three=None, four=None, five=None):
317 317 """Used to test argument passing over the wire"""
318 318 return b"%s %s %s %s %s" % (
319 319 one,
320 320 two,
321 321 pycompat.bytestr(three),
322 322 pycompat.bytestr(four),
323 323 pycompat.bytestr(five),
324 324 )
325 325
326 326 def getbundle(
327 327 self,
328 328 source,
329 329 heads=None,
330 330 common=None,
331 331 bundlecaps=None,
332 332 remote_sidedata=None,
333 333 **kwargs
334 334 ):
335 335 chunks = exchange.getbundlechunks(
336 336 self._repo,
337 337 source,
338 338 heads=heads,
339 339 common=common,
340 340 bundlecaps=bundlecaps,
341 341 remote_sidedata=remote_sidedata,
342 342 **kwargs
343 343 )[1]
344 344 cb = util.chunkbuffer(chunks)
345 345
346 346 if exchange.bundle2requested(bundlecaps):
347 347 # When requesting a bundle2, getbundle returns a stream to make the
348 348 # wire level function happier. We need to build a proper object
349 349 # from it in local peer.
350 350 return bundle2.getunbundler(self.ui, cb)
351 351 else:
352 352 return changegroup.getunbundler(b'01', cb, None)
353 353
354 354 def heads(self):
355 355 return self._repo.heads()
356 356
357 357 def known(self, nodes):
358 358 return self._repo.known(nodes)
359 359
360 360 def listkeys(self, namespace):
361 361 return self._repo.listkeys(namespace)
362 362
363 363 def lookup(self, key):
364 364 return self._repo.lookup(key)
365 365
366 366 def pushkey(self, namespace, key, old, new):
367 367 return self._repo.pushkey(namespace, key, old, new)
368 368
369 369 def stream_out(self):
370 370 raise error.Abort(_(b'cannot perform stream clone against local peer'))
371 371
372 372 def unbundle(self, bundle, heads, url):
373 373 """apply a bundle on a repo
374 374
375 375 This function handles the repo locking itself."""
376 376 try:
377 377 try:
378 378 bundle = exchange.readbundle(self.ui, bundle, None)
379 379 ret = exchange.unbundle(self._repo, bundle, heads, b'push', url)
380 380 if util.safehasattr(ret, b'getchunks'):
381 381 # This is a bundle20 object, turn it into an unbundler.
382 382 # This little dance should be dropped eventually when the
383 383 # API is finally improved.
384 384 stream = util.chunkbuffer(ret.getchunks())
385 385 ret = bundle2.getunbundler(self.ui, stream)
386 386 return ret
387 387 except Exception as exc:
388 388 # If the exception contains output salvaged from a bundle2
389 389 # reply, we need to make sure it is printed before continuing
390 390 # to fail. So we build a bundle2 with such output and consume
391 391 # it directly.
392 392 #
393 393 # This is not very elegant but allows a "simple" solution for
394 394 # issue4594
395 395 output = getattr(exc, '_bundle2salvagedoutput', ())
396 396 if output:
397 397 bundler = bundle2.bundle20(self._repo.ui)
398 398 for out in output:
399 399 bundler.addpart(out)
400 400 stream = util.chunkbuffer(bundler.getchunks())
401 401 b = bundle2.getunbundler(self.ui, stream)
402 402 bundle2.processbundle(self._repo, b)
403 403 raise
404 404 except error.PushRaced as exc:
405 405 raise error.ResponseError(
406 406 _(b'push failed:'), stringutil.forcebytestr(exc)
407 407 )
408 408
409 409 # End of _basewirecommands interface.
410 410
411 411 # Begin of peer interface.
412 412
413 413 def commandexecutor(self):
414 414 return localcommandexecutor(self)
415 415
416 416 # End of peer interface.
417 417
418 418
419 419 @interfaceutil.implementer(repository.ipeerlegacycommands)
420 420 class locallegacypeer(localpeer):
421 421 """peer extension which implements legacy methods too; used for tests with
422 422 restricted capabilities"""
423 423
424 424 def __init__(self, repo):
425 425 super(locallegacypeer, self).__init__(repo, caps=legacycaps)
426 426
427 427 # Begin of baselegacywirecommands interface.
428 428
429 429 def between(self, pairs):
430 430 return self._repo.between(pairs)
431 431
432 432 def branches(self, nodes):
433 433 return self._repo.branches(nodes)
434 434
435 435 def changegroup(self, nodes, source):
436 436 outgoing = discovery.outgoing(
437 437 self._repo, missingroots=nodes, ancestorsof=self._repo.heads()
438 438 )
439 439 return changegroup.makechangegroup(self._repo, outgoing, b'01', source)
440 440
441 441 def changegroupsubset(self, bases, heads, source):
442 442 outgoing = discovery.outgoing(
443 443 self._repo, missingroots=bases, ancestorsof=heads
444 444 )
445 445 return changegroup.makechangegroup(self._repo, outgoing, b'01', source)
446 446
447 447 # End of baselegacywirecommands interface.
448 448
449 449
450 450 # Functions receiving (ui, features) that extensions can register to impact
451 451 # the ability to load repositories with custom requirements. Only
452 452 # functions defined in loaded extensions are called.
453 453 #
454 454 # The function receives a set of requirement strings that the repository
455 455 # is capable of opening. Functions will typically add elements to the
456 456 # set to reflect that the extension knows how to handle that requirements.
457 457 featuresetupfuncs = set()
458 458
459 459
460 460 def _getsharedvfs(hgvfs, requirements):
461 461 """returns the vfs object pointing to root of shared source
462 462 repo for a shared repository
463 463
464 464 hgvfs is vfs pointing at .hg/ of current repo (shared one)
465 465 requirements is a set of requirements of current repo (shared one)
466 466 """
467 467 # The ``shared`` or ``relshared`` requirements indicate the
468 468 # store lives in the path contained in the ``.hg/sharedpath`` file.
469 469 # This is an absolute path for ``shared`` and relative to
470 470 # ``.hg/`` for ``relshared``.
471 471 sharedpath = hgvfs.read(b'sharedpath').rstrip(b'\n')
472 472 if requirementsmod.RELATIVE_SHARED_REQUIREMENT in requirements:
473 473 sharedpath = util.normpath(hgvfs.join(sharedpath))
474 474
475 475 sharedvfs = vfsmod.vfs(sharedpath, realpath=True)
476 476
477 477 if not sharedvfs.exists():
478 478 raise error.RepoError(
479 479 _(b'.hg/sharedpath points to nonexistent directory %s')
480 480 % sharedvfs.base
481 481 )
482 482 return sharedvfs
483 483
484 484
485 485 def _readrequires(vfs, allowmissing):
486 486 """reads the require file present at root of this vfs
487 487 and return a set of requirements
488 488
489 489 If allowmissing is True, we suppress ENOENT if raised"""
490 490 # requires file contains a newline-delimited list of
491 491 # features/capabilities the opener (us) must have in order to use
492 492 # the repository. This file was introduced in Mercurial 0.9.2,
493 493 # which means very old repositories may not have one. We assume
494 494 # a missing file translates to no requirements.
495 495 try:
496 496 requirements = set(vfs.read(b'requires').splitlines())
497 497 except IOError as e:
498 498 if not (allowmissing and e.errno == errno.ENOENT):
499 499 raise
500 500 requirements = set()
501 501 return requirements
502 502
503 503
504 504 def makelocalrepository(baseui, path, intents=None):
505 505 """Create a local repository object.
506 506
507 507 Given arguments needed to construct a local repository, this function
508 508 performs various early repository loading functionality (such as
509 509 reading the ``.hg/requires`` and ``.hg/hgrc`` files), validates that
510 510 the repository can be opened, derives a type suitable for representing
511 511 that repository, and returns an instance of it.
512 512
513 513 The returned object conforms to the ``repository.completelocalrepository``
514 514 interface.
515 515
516 516 The repository type is derived by calling a series of factory functions
517 517 for each aspect/interface of the final repository. These are defined by
518 518 ``REPO_INTERFACES``.
519 519
520 520 Each factory function is called to produce a type implementing a specific
521 521 interface. The cumulative list of returned types will be combined into a
522 522 new type and that type will be instantiated to represent the local
523 523 repository.
524 524
525 525 The factory functions each receive various state that may be consulted
526 526 as part of deriving a type.
527 527
528 528 Extensions should wrap these factory functions to customize repository type
529 529 creation. Note that an extension's wrapped function may be called even if
530 530 that extension is not loaded for the repo being constructed. Extensions
531 531 should check if their ``__name__`` appears in the
532 532 ``extensionmodulenames`` set passed to the factory function and no-op if
533 533 not.
534 534 """
535 535 ui = baseui.copy()
536 536 # Prevent copying repo configuration.
537 537 ui.copy = baseui.copy
538 538
539 539 # Working directory VFS rooted at repository root.
540 540 wdirvfs = vfsmod.vfs(path, expandpath=True, realpath=True)
541 541
542 542 # Main VFS for .hg/ directory.
543 543 hgpath = wdirvfs.join(b'.hg')
544 544 hgvfs = vfsmod.vfs(hgpath, cacheaudited=True)
545 545 # Whether this repository is shared one or not
546 546 shared = False
547 547 # If this repository is shared, vfs pointing to shared repo
548 548 sharedvfs = None
549 549
550 550 # The .hg/ path should exist and should be a directory. All other
551 551 # cases are errors.
552 552 if not hgvfs.isdir():
553 553 try:
554 554 hgvfs.stat()
555 555 except OSError as e:
556 556 if e.errno != errno.ENOENT:
557 557 raise
558 558 except ValueError as e:
559 559 # Can be raised on Python 3.8 when path is invalid.
560 560 raise error.Abort(
561 561 _(b'invalid path %s: %s') % (path, stringutil.forcebytestr(e))
562 562 )
563 563
564 564 raise error.RepoError(_(b'repository %s not found') % path)
565 565
566 566 requirements = _readrequires(hgvfs, True)
567 567 shared = (
568 568 requirementsmod.SHARED_REQUIREMENT in requirements
569 569 or requirementsmod.RELATIVE_SHARED_REQUIREMENT in requirements
570 570 )
571 571 storevfs = None
572 572 if shared:
573 573 # This is a shared repo
574 574 sharedvfs = _getsharedvfs(hgvfs, requirements)
575 575 storevfs = vfsmod.vfs(sharedvfs.join(b'store'))
576 576 else:
577 577 storevfs = vfsmod.vfs(hgvfs.join(b'store'))
578 578
579 579 # if .hg/requires contains the sharesafe requirement, it means
580 580 # there exists a `.hg/store/requires` too and we should read it
581 581 # NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement
582 582 # is present. We never write SHARESAFE_REQUIREMENT for a repo if store
583 583 # is not present, refer checkrequirementscompat() for that
584 584 #
585 585 # However, if SHARESAFE_REQUIREMENT is not present, it means that the
586 586 # repository was shared the old way. We check the share source .hg/requires
587 587 # for SHARESAFE_REQUIREMENT to detect whether the current repository needs
588 588 # to be reshared
589 589 hint = _(b"see `hg help config.format.use-share-safe` for more information")
590 590 if requirementsmod.SHARESAFE_REQUIREMENT in requirements:
591 591
592 592 if (
593 593 shared
594 594 and requirementsmod.SHARESAFE_REQUIREMENT
595 595 not in _readrequires(sharedvfs, True)
596 596 ):
597 597 mismatch_warn = ui.configbool(
598 598 b'share', b'safe-mismatch.source-not-safe.warn'
599 599 )
600 600 mismatch_config = ui.config(
601 601 b'share', b'safe-mismatch.source-not-safe'
602 602 )
603 603 if mismatch_config in (
604 604 b'downgrade-allow',
605 605 b'allow',
606 606 b'downgrade-abort',
607 607 ):
608 608 # prevent cyclic import localrepo -> upgrade -> localrepo
609 609 from . import upgrade
610 610
611 611 upgrade.downgrade_share_to_non_safe(
612 612 ui,
613 613 hgvfs,
614 614 sharedvfs,
615 615 requirements,
616 616 mismatch_config,
617 617 mismatch_warn,
618 618 )
619 619 elif mismatch_config == b'abort':
620 620 raise error.Abort(
621 621 _(b"share source does not support share-safe requirement"),
622 622 hint=hint,
623 623 )
624 624 else:
625 625 raise error.Abort(
626 626 _(
627 627 b"share-safe mismatch with source.\nUnrecognized"
628 628 b" value '%s' of `share.safe-mismatch.source-not-safe`"
629 629 b" set."
630 630 )
631 631 % mismatch_config,
632 632 hint=hint,
633 633 )
634 634 else:
635 635 requirements |= _readrequires(storevfs, False)
636 636 elif shared:
637 637 sourcerequires = _readrequires(sharedvfs, False)
638 638 if requirementsmod.SHARESAFE_REQUIREMENT in sourcerequires:
639 639 mismatch_config = ui.config(b'share', b'safe-mismatch.source-safe')
640 640 mismatch_warn = ui.configbool(
641 641 b'share', b'safe-mismatch.source-safe.warn'
642 642 )
643 643 if mismatch_config in (
644 644 b'upgrade-allow',
645 645 b'allow',
646 646 b'upgrade-abort',
647 647 ):
648 648 # prevent cyclic import localrepo -> upgrade -> localrepo
649 649 from . import upgrade
650 650
651 651 upgrade.upgrade_share_to_safe(
652 652 ui,
653 653 hgvfs,
654 654 storevfs,
655 655 requirements,
656 656 mismatch_config,
657 657 mismatch_warn,
658 658 )
659 659 elif mismatch_config == b'abort':
660 660 raise error.Abort(
661 661 _(
662 662 b'version mismatch: source uses share-safe'
663 663 b' functionality while the current share does not'
664 664 ),
665 665 hint=hint,
666 666 )
667 667 else:
668 668 raise error.Abort(
669 669 _(
670 670 b"share-safe mismatch with source.\nUnrecognized"
671 671 b" value '%s' of `share.safe-mismatch.source-safe` set."
672 672 )
673 673 % mismatch_config,
674 674 hint=hint,
675 675 )
676 676
677 677 # The .hg/hgrc file may load extensions or contain config options
678 678 # that influence repository construction. Attempt to load it and
679 679 # process any new extensions that it may have pulled in.
680 680 if loadhgrc(ui, wdirvfs, hgvfs, requirements, sharedvfs):
681 681 afterhgrcload(ui, wdirvfs, hgvfs, requirements)
682 682 extensions.loadall(ui)
683 683 extensions.populateui(ui)
684 684
685 685 # Set of module names of extensions loaded for this repository.
686 686 extensionmodulenames = {m.__name__ for n, m in extensions.extensions(ui)}
687 687
688 688 supportedrequirements = gathersupportedrequirements(ui)
689 689
690 690 # We first validate the requirements are known.
691 691 ensurerequirementsrecognized(requirements, supportedrequirements)
692 692
693 693 # Then we validate that the known set is reasonable to use together.
694 694 ensurerequirementscompatible(ui, requirements)
695 695
696 696 # TODO there are unhandled edge cases related to opening repositories with
697 697 # shared storage. If storage is shared, we should also test for requirements
698 698 # compatibility in the pointed-to repo. This entails loading the .hg/hgrc in
699 699 # that repo, as that repo may load extensions needed to open it. This is a
700 700 # bit complicated because we don't want the other hgrc to overwrite settings
701 701 # in this hgrc.
702 702 #
703 703 # This bug is somewhat mitigated by the fact that we copy the .hg/requires
704 704 # file when sharing repos. But if a requirement is added after the share is
705 705 # performed, thereby introducing a new requirement for the opener, we may
706 706 # will not see that and could encounter a run-time error interacting with
707 707 # that shared store since it has an unknown-to-us requirement.
708 708
709 709 # At this point, we know we should be capable of opening the repository.
710 710 # Now get on with doing that.
711 711
712 712 features = set()
713 713
714 714 # The "store" part of the repository holds versioned data. How it is
715 715 # accessed is determined by various requirements. If `shared` or
716 716 # `relshared` requirements are present, this indicates current repository
717 717 # is a share and store exists in path mentioned in `.hg/sharedpath`
718 718 if shared:
719 719 storebasepath = sharedvfs.base
720 720 cachepath = sharedvfs.join(b'cache')
721 721 features.add(repository.REPO_FEATURE_SHARED_STORAGE)
722 722 else:
723 723 storebasepath = hgvfs.base
724 724 cachepath = hgvfs.join(b'cache')
725 725 wcachepath = hgvfs.join(b'wcache')
726 726
727 727 # The store has changed over time and the exact layout is dictated by
728 728 # requirements. The store interface abstracts differences across all
729 729 # of them.
730 730 store = makestore(
731 731 requirements,
732 732 storebasepath,
733 733 lambda base: vfsmod.vfs(base, cacheaudited=True),
734 734 )
735 735 hgvfs.createmode = store.createmode
736 736
737 737 storevfs = store.vfs
738 738 storevfs.options = resolvestorevfsoptions(ui, requirements, features)
739 739
740 740 # The cache vfs is used to manage cache files.
741 741 cachevfs = vfsmod.vfs(cachepath, cacheaudited=True)
742 742 cachevfs.createmode = store.createmode
743 743 # The cache vfs is used to manage cache files related to the working copy
744 744 wcachevfs = vfsmod.vfs(wcachepath, cacheaudited=True)
745 745 wcachevfs.createmode = store.createmode
746 746
747 747 # Now resolve the type for the repository object. We do this by repeatedly
748 748 # calling a factory function to produces types for specific aspects of the
749 749 # repo's operation. The aggregate returned types are used as base classes
750 750 # for a dynamically-derived type, which will represent our new repository.
751 751
752 752 bases = []
753 753 extrastate = {}
754 754
755 755 for iface, fn in REPO_INTERFACES:
756 756 # We pass all potentially useful state to give extensions tons of
757 757 # flexibility.
758 758 typ = fn()(
759 759 ui=ui,
760 760 intents=intents,
761 761 requirements=requirements,
762 762 features=features,
763 763 wdirvfs=wdirvfs,
764 764 hgvfs=hgvfs,
765 765 store=store,
766 766 storevfs=storevfs,
767 767 storeoptions=storevfs.options,
768 768 cachevfs=cachevfs,
769 769 wcachevfs=wcachevfs,
770 770 extensionmodulenames=extensionmodulenames,
771 771 extrastate=extrastate,
772 772 baseclasses=bases,
773 773 )
774 774
775 775 if not isinstance(typ, type):
776 776 raise error.ProgrammingError(
777 777 b'unable to construct type for %s' % iface
778 778 )
779 779
780 780 bases.append(typ)
781 781
782 782 # type() allows you to use characters in type names that wouldn't be
783 783 # recognized as Python symbols in source code. We abuse that to add
784 784 # rich information about our constructed repo.
785 785 name = pycompat.sysstr(
786 786 b'derivedrepo:%s<%s>' % (wdirvfs.base, b','.join(sorted(requirements)))
787 787 )
788 788
789 789 cls = type(name, tuple(bases), {})
790 790
791 791 return cls(
792 792 baseui=baseui,
793 793 ui=ui,
794 794 origroot=path,
795 795 wdirvfs=wdirvfs,
796 796 hgvfs=hgvfs,
797 797 requirements=requirements,
798 798 supportedrequirements=supportedrequirements,
799 799 sharedpath=storebasepath,
800 800 store=store,
801 801 cachevfs=cachevfs,
802 802 wcachevfs=wcachevfs,
803 803 features=features,
804 804 intents=intents,
805 805 )
806 806
807 807
808 808 def loadhgrc(ui, wdirvfs, hgvfs, requirements, sharedvfs=None):
809 809 """Load hgrc files/content into a ui instance.
810 810
811 811 This is called during repository opening to load any additional
812 812 config files or settings relevant to the current repository.
813 813
814 814 Returns a bool indicating whether any additional configs were loaded.
815 815
816 816 Extensions should monkeypatch this function to modify how per-repo
817 817 configs are loaded. For example, an extension may wish to pull in
818 818 configs from alternate files or sources.
819 819
820 820 sharedvfs is vfs object pointing to source repo if the current one is a
821 821 shared one
822 822 """
823 823 if not rcutil.use_repo_hgrc():
824 824 return False
825 825
826 826 ret = False
827 827 # first load config from shared source if we has to
828 828 if requirementsmod.SHARESAFE_REQUIREMENT in requirements and sharedvfs:
829 829 try:
830 830 ui.readconfig(sharedvfs.join(b'hgrc'), root=sharedvfs.base)
831 831 ret = True
832 832 except IOError:
833 833 pass
834 834
835 835 try:
836 836 ui.readconfig(hgvfs.join(b'hgrc'), root=wdirvfs.base)
837 837 ret = True
838 838 except IOError:
839 839 pass
840 840
841 841 try:
842 842 ui.readconfig(hgvfs.join(b'hgrc-not-shared'), root=wdirvfs.base)
843 843 ret = True
844 844 except IOError:
845 845 pass
846 846
847 847 return ret
848 848
849 849
850 850 def afterhgrcload(ui, wdirvfs, hgvfs, requirements):
851 851 """Perform additional actions after .hg/hgrc is loaded.
852 852
853 853 This function is called during repository loading immediately after
854 854 the .hg/hgrc file is loaded and before per-repo extensions are loaded.
855 855
856 856 The function can be used to validate configs, automatically add
857 857 options (including extensions) based on requirements, etc.
858 858 """
859 859
860 860 # Map of requirements to list of extensions to load automatically when
861 861 # requirement is present.
862 862 autoextensions = {
863 863 b'git': [b'git'],
864 864 b'largefiles': [b'largefiles'],
865 865 b'lfs': [b'lfs'],
866 866 }
867 867
868 868 for requirement, names in sorted(autoextensions.items()):
869 869 if requirement not in requirements:
870 870 continue
871 871
872 872 for name in names:
873 873 if not ui.hasconfig(b'extensions', name):
874 874 ui.setconfig(b'extensions', name, b'', source=b'autoload')
875 875
876 876
877 877 def gathersupportedrequirements(ui):
878 878 """Determine the complete set of recognized requirements."""
879 879 # Start with all requirements supported by this file.
880 880 supported = set(localrepository._basesupported)
881 881
882 882 # Execute ``featuresetupfuncs`` entries if they belong to an extension
883 883 # relevant to this ui instance.
884 884 modules = {m.__name__ for n, m in extensions.extensions(ui)}
885 885
886 886 for fn in featuresetupfuncs:
887 887 if fn.__module__ in modules:
888 888 fn(ui, supported)
889 889
890 890 # Add derived requirements from registered compression engines.
891 891 for name in util.compengines:
892 892 engine = util.compengines[name]
893 893 if engine.available() and engine.revlogheader():
894 894 supported.add(b'exp-compression-%s' % name)
895 895 if engine.name() == b'zstd':
896 896 supported.add(b'revlog-compression-zstd')
897 897
898 898 return supported
899 899
900 900
901 901 def ensurerequirementsrecognized(requirements, supported):
902 902 """Validate that a set of local requirements is recognized.
903 903
904 904 Receives a set of requirements. Raises an ``error.RepoError`` if there
905 905 exists any requirement in that set that currently loaded code doesn't
906 906 recognize.
907 907
908 908 Returns a set of supported requirements.
909 909 """
910 910 missing = set()
911 911
912 912 for requirement in requirements:
913 913 if requirement in supported:
914 914 continue
915 915
916 916 if not requirement or not requirement[0:1].isalnum():
917 917 raise error.RequirementError(_(b'.hg/requires file is corrupt'))
918 918
919 919 missing.add(requirement)
920 920
921 921 if missing:
922 922 raise error.RequirementError(
923 923 _(b'repository requires features unknown to this Mercurial: %s')
924 924 % b' '.join(sorted(missing)),
925 925 hint=_(
926 926 b'see https://mercurial-scm.org/wiki/MissingRequirement '
927 927 b'for more information'
928 928 ),
929 929 )
930 930
931 931
932 932 def ensurerequirementscompatible(ui, requirements):
933 933 """Validates that a set of recognized requirements is mutually compatible.
934 934
935 935 Some requirements may not be compatible with others or require
936 936 config options that aren't enabled. This function is called during
937 937 repository opening to ensure that the set of requirements needed
938 938 to open a repository is sane and compatible with config options.
939 939
940 940 Extensions can monkeypatch this function to perform additional
941 941 checking.
942 942
943 943 ``error.RepoError`` should be raised on failure.
944 944 """
945 945 if (
946 946 requirementsmod.SPARSE_REQUIREMENT in requirements
947 947 and not sparse.enabled
948 948 ):
949 949 raise error.RepoError(
950 950 _(
951 951 b'repository is using sparse feature but '
952 952 b'sparse is not enabled; enable the '
953 953 b'"sparse" extensions to access'
954 954 )
955 955 )
956 956
957 957
958 958 def makestore(requirements, path, vfstype):
959 959 """Construct a storage object for a repository."""
960 960 if requirementsmod.STORE_REQUIREMENT in requirements:
961 961 if requirementsmod.FNCACHE_REQUIREMENT in requirements:
962 962 dotencode = requirementsmod.DOTENCODE_REQUIREMENT in requirements
963 963 return storemod.fncachestore(path, vfstype, dotencode)
964 964
965 965 return storemod.encodedstore(path, vfstype)
966 966
967 967 return storemod.basicstore(path, vfstype)
968 968
969 969
970 970 def resolvestorevfsoptions(ui, requirements, features):
971 971 """Resolve the options to pass to the store vfs opener.
972 972
973 973 The returned dict is used to influence behavior of the storage layer.
974 974 """
975 975 options = {}
976 976
977 977 if requirementsmod.TREEMANIFEST_REQUIREMENT in requirements:
978 978 options[b'treemanifest'] = True
979 979
980 980 # experimental config: format.manifestcachesize
981 981 manifestcachesize = ui.configint(b'format', b'manifestcachesize')
982 982 if manifestcachesize is not None:
983 983 options[b'manifestcachesize'] = manifestcachesize
984 984
985 985 # In the absence of another requirement superseding a revlog-related
986 986 # requirement, we have to assume the repo is using revlog version 0.
987 987 # This revlog format is super old and we don't bother trying to parse
988 988 # opener options for it because those options wouldn't do anything
989 989 # meaningful on such old repos.
990 990 if (
991 991 requirementsmod.REVLOGV1_REQUIREMENT in requirements
992 992 or requirementsmod.REVLOGV2_REQUIREMENT in requirements
993 993 ):
994 994 options.update(resolverevlogstorevfsoptions(ui, requirements, features))
995 995 else: # explicitly mark repo as using revlogv0
996 996 options[b'revlogv0'] = True
997 997
998 998 if requirementsmod.COPIESSDC_REQUIREMENT in requirements:
999 999 options[b'copies-storage'] = b'changeset-sidedata'
1000 1000 else:
1001 1001 writecopiesto = ui.config(b'experimental', b'copies.write-to')
1002 1002 copiesextramode = (b'changeset-only', b'compatibility')
1003 1003 if writecopiesto in copiesextramode:
1004 1004 options[b'copies-storage'] = b'extra'
1005 1005
1006 1006 return options
1007 1007
1008 1008
1009 1009 def resolverevlogstorevfsoptions(ui, requirements, features):
1010 1010 """Resolve opener options specific to revlogs."""
1011 1011
1012 1012 options = {}
1013 1013 options[b'flagprocessors'] = {}
1014 1014
1015 1015 if requirementsmod.REVLOGV1_REQUIREMENT in requirements:
1016 1016 options[b'revlogv1'] = True
1017 1017 if requirementsmod.REVLOGV2_REQUIREMENT in requirements:
1018 1018 options[b'revlogv2'] = True
1019 1019
1020 1020 if requirementsmod.GENERALDELTA_REQUIREMENT in requirements:
1021 1021 options[b'generaldelta'] = True
1022 1022
1023 1023 # experimental config: format.chunkcachesize
1024 1024 chunkcachesize = ui.configint(b'format', b'chunkcachesize')
1025 1025 if chunkcachesize is not None:
1026 1026 options[b'chunkcachesize'] = chunkcachesize
1027 1027
1028 1028 deltabothparents = ui.configbool(
1029 1029 b'storage', b'revlog.optimize-delta-parent-choice'
1030 1030 )
1031 1031 options[b'deltabothparents'] = deltabothparents
1032 1032
1033 1033 lazydelta = ui.configbool(b'storage', b'revlog.reuse-external-delta')
1034 1034 lazydeltabase = False
1035 1035 if lazydelta:
1036 1036 lazydeltabase = ui.configbool(
1037 1037 b'storage', b'revlog.reuse-external-delta-parent'
1038 1038 )
1039 1039 if lazydeltabase is None:
1040 1040 lazydeltabase = not scmutil.gddeltaconfig(ui)
1041 1041 options[b'lazydelta'] = lazydelta
1042 1042 options[b'lazydeltabase'] = lazydeltabase
1043 1043
1044 1044 chainspan = ui.configbytes(b'experimental', b'maxdeltachainspan')
1045 1045 if 0 <= chainspan:
1046 1046 options[b'maxdeltachainspan'] = chainspan
1047 1047
1048 1048 mmapindexthreshold = ui.configbytes(b'experimental', b'mmapindexthreshold')
1049 1049 if mmapindexthreshold is not None:
1050 1050 options[b'mmapindexthreshold'] = mmapindexthreshold
1051 1051
1052 1052 withsparseread = ui.configbool(b'experimental', b'sparse-read')
1053 1053 srdensitythres = float(
1054 1054 ui.config(b'experimental', b'sparse-read.density-threshold')
1055 1055 )
1056 1056 srmingapsize = ui.configbytes(b'experimental', b'sparse-read.min-gap-size')
1057 1057 options[b'with-sparse-read'] = withsparseread
1058 1058 options[b'sparse-read-density-threshold'] = srdensitythres
1059 1059 options[b'sparse-read-min-gap-size'] = srmingapsize
1060 1060
1061 1061 sparserevlog = requirementsmod.SPARSEREVLOG_REQUIREMENT in requirements
1062 1062 options[b'sparse-revlog'] = sparserevlog
1063 1063 if sparserevlog:
1064 1064 options[b'generaldelta'] = True
1065 1065
1066 1066 sidedata = requirementsmod.SIDEDATA_REQUIREMENT in requirements
1067 1067 options[b'side-data'] = sidedata
1068 1068
1069 1069 maxchainlen = None
1070 1070 if sparserevlog:
1071 1071 maxchainlen = revlogconst.SPARSE_REVLOG_MAX_CHAIN_LENGTH
1072 1072 # experimental config: format.maxchainlen
1073 1073 maxchainlen = ui.configint(b'format', b'maxchainlen', maxchainlen)
1074 1074 if maxchainlen is not None:
1075 1075 options[b'maxchainlen'] = maxchainlen
1076 1076
1077 1077 for r in requirements:
1078 1078 # we allow multiple compression engine requirement to co-exist because
1079 1079 # strickly speaking, revlog seems to support mixed compression style.
1080 1080 #
1081 1081 # The compression used for new entries will be "the last one"
1082 1082 prefix = r.startswith
1083 1083 if prefix(b'revlog-compression-') or prefix(b'exp-compression-'):
1084 1084 options[b'compengine'] = r.split(b'-', 2)[2]
1085 1085
1086 1086 options[b'zlib.level'] = ui.configint(b'storage', b'revlog.zlib.level')
1087 1087 if options[b'zlib.level'] is not None:
1088 1088 if not (0 <= options[b'zlib.level'] <= 9):
1089 1089 msg = _(b'invalid value for `storage.revlog.zlib.level` config: %d')
1090 1090 raise error.Abort(msg % options[b'zlib.level'])
1091 1091 options[b'zstd.level'] = ui.configint(b'storage', b'revlog.zstd.level')
1092 1092 if options[b'zstd.level'] is not None:
1093 1093 if not (0 <= options[b'zstd.level'] <= 22):
1094 1094 msg = _(b'invalid value for `storage.revlog.zstd.level` config: %d')
1095 1095 raise error.Abort(msg % options[b'zstd.level'])
1096 1096
1097 1097 if requirementsmod.NARROW_REQUIREMENT in requirements:
1098 1098 options[b'enableellipsis'] = True
1099 1099
1100 1100 if ui.configbool(b'experimental', b'rust.index'):
1101 1101 options[b'rust.index'] = True
1102 1102 if requirementsmod.NODEMAP_REQUIREMENT in requirements:
1103 1103 slow_path = ui.config(
1104 1104 b'storage', b'revlog.persistent-nodemap.slow-path'
1105 1105 )
1106 1106 if slow_path not in (b'allow', b'warn', b'abort'):
1107 1107 default = ui.config_default(
1108 1108 b'storage', b'revlog.persistent-nodemap.slow-path'
1109 1109 )
1110 1110 msg = _(
1111 1111 b'unknown value for config '
1112 1112 b'"storage.revlog.persistent-nodemap.slow-path": "%s"\n'
1113 1113 )
1114 1114 ui.warn(msg % slow_path)
1115 1115 if not ui.quiet:
1116 1116 ui.warn(_(b'falling back to default value: %s\n') % default)
1117 1117 slow_path = default
1118 1118
1119 1119 msg = _(
1120 1120 b"accessing `persistent-nodemap` repository without associated "
1121 1121 b"fast implementation."
1122 1122 )
1123 1123 hint = _(
1124 1124 b"check `hg help config.format.use-persistent-nodemap` "
1125 1125 b"for details"
1126 1126 )
1127 1127 if not revlog.HAS_FAST_PERSISTENT_NODEMAP:
1128 1128 if slow_path == b'warn':
1129 1129 msg = b"warning: " + msg + b'\n'
1130 1130 ui.warn(msg)
1131 1131 if not ui.quiet:
1132 1132 hint = b'(' + hint + b')\n'
1133 1133 ui.warn(hint)
1134 1134 if slow_path == b'abort':
1135 1135 raise error.Abort(msg, hint=hint)
1136 1136 options[b'persistent-nodemap'] = True
1137 1137 if ui.configbool(b'storage', b'revlog.persistent-nodemap.mmap'):
1138 1138 options[b'persistent-nodemap.mmap'] = True
1139 1139 if ui.configbool(b'devel', b'persistent-nodemap'):
1140 1140 options[b'devel-force-nodemap'] = True
1141 1141
1142 1142 return options
1143 1143
1144 1144
1145 1145 def makemain(**kwargs):
1146 1146 """Produce a type conforming to ``ilocalrepositorymain``."""
1147 1147 return localrepository
1148 1148
1149 1149
1150 1150 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1151 1151 class revlogfilestorage(object):
1152 1152 """File storage when using revlogs."""
1153 1153
1154 1154 def file(self, path):
1155 1155 if path.startswith(b'/'):
1156 1156 path = path[1:]
1157 1157
1158 1158 return filelog.filelog(self.svfs, path)
1159 1159
1160 1160
1161 1161 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1162 1162 class revlognarrowfilestorage(object):
1163 1163 """File storage when using revlogs and narrow files."""
1164 1164
1165 1165 def file(self, path):
1166 1166 if path.startswith(b'/'):
1167 1167 path = path[1:]
1168 1168
1169 1169 return filelog.narrowfilelog(self.svfs, path, self._storenarrowmatch)
1170 1170
1171 1171
1172 1172 def makefilestorage(requirements, features, **kwargs):
1173 1173 """Produce a type conforming to ``ilocalrepositoryfilestorage``."""
1174 1174 features.add(repository.REPO_FEATURE_REVLOG_FILE_STORAGE)
1175 1175 features.add(repository.REPO_FEATURE_STREAM_CLONE)
1176 1176
1177 1177 if requirementsmod.NARROW_REQUIREMENT in requirements:
1178 1178 return revlognarrowfilestorage
1179 1179 else:
1180 1180 return revlogfilestorage
1181 1181
1182 1182
1183 1183 # List of repository interfaces and factory functions for them. Each
1184 1184 # will be called in order during ``makelocalrepository()`` to iteratively
1185 1185 # derive the final type for a local repository instance. We capture the
1186 1186 # function as a lambda so we don't hold a reference and the module-level
1187 1187 # functions can be wrapped.
1188 1188 REPO_INTERFACES = [
1189 1189 (repository.ilocalrepositorymain, lambda: makemain),
1190 1190 (repository.ilocalrepositoryfilestorage, lambda: makefilestorage),
1191 1191 ]
1192 1192
1193 1193
1194 1194 @interfaceutil.implementer(repository.ilocalrepositorymain)
1195 1195 class localrepository(object):
1196 1196 """Main class for representing local repositories.
1197 1197
1198 1198 All local repositories are instances of this class.
1199 1199
1200 1200 Constructed on its own, instances of this class are not usable as
1201 1201 repository objects. To obtain a usable repository object, call
1202 1202 ``hg.repository()``, ``localrepo.instance()``, or
1203 1203 ``localrepo.makelocalrepository()``. The latter is the lowest-level.
1204 1204 ``instance()`` adds support for creating new repositories.
1205 1205 ``hg.repository()`` adds more extension integration, including calling
1206 1206 ``reposetup()``. Generally speaking, ``hg.repository()`` should be
1207 1207 used.
1208 1208 """
1209 1209
1210 1210 # obsolete experimental requirements:
1211 1211 # - manifestv2: An experimental new manifest format that allowed
1212 1212 # for stem compression of long paths. Experiment ended up not
1213 1213 # being successful (repository sizes went up due to worse delta
1214 1214 # chains), and the code was deleted in 4.6.
1215 1215 supportedformats = {
1216 1216 requirementsmod.REVLOGV1_REQUIREMENT,
1217 1217 requirementsmod.GENERALDELTA_REQUIREMENT,
1218 1218 requirementsmod.TREEMANIFEST_REQUIREMENT,
1219 1219 requirementsmod.COPIESSDC_REQUIREMENT,
1220 1220 requirementsmod.REVLOGV2_REQUIREMENT,
1221 1221 requirementsmod.SIDEDATA_REQUIREMENT,
1222 1222 requirementsmod.SPARSEREVLOG_REQUIREMENT,
1223 1223 requirementsmod.NODEMAP_REQUIREMENT,
1224 1224 bookmarks.BOOKMARKS_IN_STORE_REQUIREMENT,
1225 1225 requirementsmod.SHARESAFE_REQUIREMENT,
1226 1226 }
1227 1227 _basesupported = supportedformats | {
1228 1228 requirementsmod.STORE_REQUIREMENT,
1229 1229 requirementsmod.FNCACHE_REQUIREMENT,
1230 1230 requirementsmod.SHARED_REQUIREMENT,
1231 1231 requirementsmod.RELATIVE_SHARED_REQUIREMENT,
1232 1232 requirementsmod.DOTENCODE_REQUIREMENT,
1233 1233 requirementsmod.SPARSE_REQUIREMENT,
1234 1234 requirementsmod.INTERNAL_PHASE_REQUIREMENT,
1235 1235 }
1236 1236
1237 1237 # list of prefix for file which can be written without 'wlock'
1238 1238 # Extensions should extend this list when needed
1239 1239 _wlockfreeprefix = {
1240 1240 # We migh consider requiring 'wlock' for the next
1241 1241 # two, but pretty much all the existing code assume
1242 1242 # wlock is not needed so we keep them excluded for
1243 1243 # now.
1244 1244 b'hgrc',
1245 1245 b'requires',
1246 1246 # XXX cache is a complicatged business someone
1247 1247 # should investigate this in depth at some point
1248 1248 b'cache/',
1249 1249 # XXX shouldn't be dirstate covered by the wlock?
1250 1250 b'dirstate',
1251 1251 # XXX bisect was still a bit too messy at the time
1252 1252 # this changeset was introduced. Someone should fix
1253 1253 # the remainig bit and drop this line
1254 1254 b'bisect.state',
1255 1255 }
1256 1256
1257 1257 def __init__(
1258 1258 self,
1259 1259 baseui,
1260 1260 ui,
1261 1261 origroot,
1262 1262 wdirvfs,
1263 1263 hgvfs,
1264 1264 requirements,
1265 1265 supportedrequirements,
1266 1266 sharedpath,
1267 1267 store,
1268 1268 cachevfs,
1269 1269 wcachevfs,
1270 1270 features,
1271 1271 intents=None,
1272 1272 ):
1273 1273 """Create a new local repository instance.
1274 1274
1275 1275 Most callers should use ``hg.repository()``, ``localrepo.instance()``,
1276 1276 or ``localrepo.makelocalrepository()`` for obtaining a new repository
1277 1277 object.
1278 1278
1279 1279 Arguments:
1280 1280
1281 1281 baseui
1282 1282 ``ui.ui`` instance that ``ui`` argument was based off of.
1283 1283
1284 1284 ui
1285 1285 ``ui.ui`` instance for use by the repository.
1286 1286
1287 1287 origroot
1288 1288 ``bytes`` path to working directory root of this repository.
1289 1289
1290 1290 wdirvfs
1291 1291 ``vfs.vfs`` rooted at the working directory.
1292 1292
1293 1293 hgvfs
1294 1294 ``vfs.vfs`` rooted at .hg/
1295 1295
1296 1296 requirements
1297 1297 ``set`` of bytestrings representing repository opening requirements.
1298 1298
1299 1299 supportedrequirements
1300 1300 ``set`` of bytestrings representing repository requirements that we
1301 1301 know how to open. May be a supetset of ``requirements``.
1302 1302
1303 1303 sharedpath
1304 1304 ``bytes`` Defining path to storage base directory. Points to a
1305 1305 ``.hg/`` directory somewhere.
1306 1306
1307 1307 store
1308 1308 ``store.basicstore`` (or derived) instance providing access to
1309 1309 versioned storage.
1310 1310
1311 1311 cachevfs
1312 1312 ``vfs.vfs`` used for cache files.
1313 1313
1314 1314 wcachevfs
1315 1315 ``vfs.vfs`` used for cache files related to the working copy.
1316 1316
1317 1317 features
1318 1318 ``set`` of bytestrings defining features/capabilities of this
1319 1319 instance.
1320 1320
1321 1321 intents
1322 1322 ``set`` of system strings indicating what this repo will be used
1323 1323 for.
1324 1324 """
1325 1325 self.baseui = baseui
1326 1326 self.ui = ui
1327 1327 self.origroot = origroot
1328 1328 # vfs rooted at working directory.
1329 1329 self.wvfs = wdirvfs
1330 1330 self.root = wdirvfs.base
1331 1331 # vfs rooted at .hg/. Used to access most non-store paths.
1332 1332 self.vfs = hgvfs
1333 1333 self.path = hgvfs.base
1334 1334 self.requirements = requirements
1335 1335 self.nodeconstants = sha1nodeconstants
1336 1336 self.nullid = self.nodeconstants.nullid
1337 1337 self.supported = supportedrequirements
1338 1338 self.sharedpath = sharedpath
1339 1339 self.store = store
1340 1340 self.cachevfs = cachevfs
1341 1341 self.wcachevfs = wcachevfs
1342 1342 self.features = features
1343 1343
1344 1344 self.filtername = None
1345 1345
1346 1346 if self.ui.configbool(b'devel', b'all-warnings') or self.ui.configbool(
1347 1347 b'devel', b'check-locks'
1348 1348 ):
1349 1349 self.vfs.audit = self._getvfsward(self.vfs.audit)
1350 1350 # A list of callback to shape the phase if no data were found.
1351 1351 # Callback are in the form: func(repo, roots) --> processed root.
1352 1352 # This list it to be filled by extension during repo setup
1353 1353 self._phasedefaults = []
1354 1354
1355 1355 color.setup(self.ui)
1356 1356
1357 1357 self.spath = self.store.path
1358 1358 self.svfs = self.store.vfs
1359 1359 self.sjoin = self.store.join
1360 1360 if self.ui.configbool(b'devel', b'all-warnings') or self.ui.configbool(
1361 1361 b'devel', b'check-locks'
1362 1362 ):
1363 1363 if util.safehasattr(self.svfs, b'vfs'): # this is filtervfs
1364 1364 self.svfs.vfs.audit = self._getsvfsward(self.svfs.vfs.audit)
1365 1365 else: # standard vfs
1366 1366 self.svfs.audit = self._getsvfsward(self.svfs.audit)
1367 1367
1368 1368 self._dirstatevalidatewarned = False
1369 1369
1370 1370 self._branchcaches = branchmap.BranchMapCache()
1371 1371 self._revbranchcache = None
1372 1372 self._filterpats = {}
1373 1373 self._datafilters = {}
1374 1374 self._transref = self._lockref = self._wlockref = None
1375 1375
1376 1376 # A cache for various files under .hg/ that tracks file changes,
1377 1377 # (used by the filecache decorator)
1378 1378 #
1379 1379 # Maps a property name to its util.filecacheentry
1380 1380 self._filecache = {}
1381 1381
1382 1382 # hold sets of revision to be filtered
1383 1383 # should be cleared when something might have changed the filter value:
1384 1384 # - new changesets,
1385 1385 # - phase change,
1386 1386 # - new obsolescence marker,
1387 1387 # - working directory parent change,
1388 1388 # - bookmark changes
1389 1389 self.filteredrevcache = {}
1390 1390
1391 1391 # post-dirstate-status hooks
1392 1392 self._postdsstatus = []
1393 1393
1394 1394 # generic mapping between names and nodes
1395 1395 self.names = namespaces.namespaces()
1396 1396
1397 1397 # Key to signature value.
1398 1398 self._sparsesignaturecache = {}
1399 1399 # Signature to cached matcher instance.
1400 1400 self._sparsematchercache = {}
1401 1401
1402 1402 self._extrafilterid = repoview.extrafilter(ui)
1403 1403
1404 1404 self.filecopiesmode = None
1405 1405 if requirementsmod.COPIESSDC_REQUIREMENT in self.requirements:
1406 1406 self.filecopiesmode = b'changeset-sidedata'
1407 1407
1408 1408 self._wanted_sidedata = set()
1409 1409 self._sidedata_computers = {}
1410 1410 metadatamod.set_sidedata_spec_for_repo(self)
1411 1411
1412 1412 def _getvfsward(self, origfunc):
1413 1413 """build a ward for self.vfs"""
1414 1414 rref = weakref.ref(self)
1415 1415
1416 1416 def checkvfs(path, mode=None):
1417 1417 ret = origfunc(path, mode=mode)
1418 1418 repo = rref()
1419 1419 if (
1420 1420 repo is None
1421 1421 or not util.safehasattr(repo, b'_wlockref')
1422 1422 or not util.safehasattr(repo, b'_lockref')
1423 1423 ):
1424 1424 return
1425 1425 if mode in (None, b'r', b'rb'):
1426 1426 return
1427 1427 if path.startswith(repo.path):
1428 1428 # truncate name relative to the repository (.hg)
1429 1429 path = path[len(repo.path) + 1 :]
1430 1430 if path.startswith(b'cache/'):
1431 1431 msg = b'accessing cache with vfs instead of cachevfs: "%s"'
1432 1432 repo.ui.develwarn(msg % path, stacklevel=3, config=b"cache-vfs")
1433 1433 # path prefixes covered by 'lock'
1434 1434 vfs_path_prefixes = (
1435 1435 b'journal.',
1436 1436 b'undo.',
1437 1437 b'strip-backup/',
1438 1438 b'cache/',
1439 1439 )
1440 1440 if any(path.startswith(prefix) for prefix in vfs_path_prefixes):
1441 1441 if repo._currentlock(repo._lockref) is None:
1442 1442 repo.ui.develwarn(
1443 1443 b'write with no lock: "%s"' % path,
1444 1444 stacklevel=3,
1445 1445 config=b'check-locks',
1446 1446 )
1447 1447 elif repo._currentlock(repo._wlockref) is None:
1448 1448 # rest of vfs files are covered by 'wlock'
1449 1449 #
1450 1450 # exclude special files
1451 1451 for prefix in self._wlockfreeprefix:
1452 1452 if path.startswith(prefix):
1453 1453 return
1454 1454 repo.ui.develwarn(
1455 1455 b'write with no wlock: "%s"' % path,
1456 1456 stacklevel=3,
1457 1457 config=b'check-locks',
1458 1458 )
1459 1459 return ret
1460 1460
1461 1461 return checkvfs
1462 1462
1463 1463 def _getsvfsward(self, origfunc):
1464 1464 """build a ward for self.svfs"""
1465 1465 rref = weakref.ref(self)
1466 1466
1467 1467 def checksvfs(path, mode=None):
1468 1468 ret = origfunc(path, mode=mode)
1469 1469 repo = rref()
1470 1470 if repo is None or not util.safehasattr(repo, b'_lockref'):
1471 1471 return
1472 1472 if mode in (None, b'r', b'rb'):
1473 1473 return
1474 1474 if path.startswith(repo.sharedpath):
1475 1475 # truncate name relative to the repository (.hg)
1476 1476 path = path[len(repo.sharedpath) + 1 :]
1477 1477 if repo._currentlock(repo._lockref) is None:
1478 1478 repo.ui.develwarn(
1479 1479 b'write with no lock: "%s"' % path, stacklevel=4
1480 1480 )
1481 1481 return ret
1482 1482
1483 1483 return checksvfs
1484 1484
1485 1485 def close(self):
1486 1486 self._writecaches()
1487 1487
1488 1488 def _writecaches(self):
1489 1489 if self._revbranchcache:
1490 1490 self._revbranchcache.write()
1491 1491
1492 1492 def _restrictcapabilities(self, caps):
1493 1493 if self.ui.configbool(b'experimental', b'bundle2-advertise'):
1494 1494 caps = set(caps)
1495 1495 capsblob = bundle2.encodecaps(
1496 1496 bundle2.getrepocaps(self, role=b'client')
1497 1497 )
1498 1498 caps.add(b'bundle2=' + urlreq.quote(capsblob))
1499 1499 if self.ui.configbool(b'experimental', b'narrow'):
1500 1500 caps.add(wireprototypes.NARROWCAP)
1501 1501 return caps
1502 1502
1503 1503 # Don't cache auditor/nofsauditor, or you'll end up with reference cycle:
1504 1504 # self -> auditor -> self._checknested -> self
1505 1505
1506 1506 @property
1507 1507 def auditor(self):
1508 1508 # This is only used by context.workingctx.match in order to
1509 1509 # detect files in subrepos.
1510 1510 return pathutil.pathauditor(self.root, callback=self._checknested)
1511 1511
1512 1512 @property
1513 1513 def nofsauditor(self):
1514 1514 # This is only used by context.basectx.match in order to detect
1515 1515 # files in subrepos.
1516 1516 return pathutil.pathauditor(
1517 1517 self.root, callback=self._checknested, realfs=False, cached=True
1518 1518 )
1519 1519
1520 1520 def _checknested(self, path):
1521 1521 """Determine if path is a legal nested repository."""
1522 1522 if not path.startswith(self.root):
1523 1523 return False
1524 1524 subpath = path[len(self.root) + 1 :]
1525 1525 normsubpath = util.pconvert(subpath)
1526 1526
1527 1527 # XXX: Checking against the current working copy is wrong in
1528 1528 # the sense that it can reject things like
1529 1529 #
1530 1530 # $ hg cat -r 10 sub/x.txt
1531 1531 #
1532 1532 # if sub/ is no longer a subrepository in the working copy
1533 1533 # parent revision.
1534 1534 #
1535 1535 # However, it can of course also allow things that would have
1536 1536 # been rejected before, such as the above cat command if sub/
1537 1537 # is a subrepository now, but was a normal directory before.
1538 1538 # The old path auditor would have rejected by mistake since it
1539 1539 # panics when it sees sub/.hg/.
1540 1540 #
1541 1541 # All in all, checking against the working copy seems sensible
1542 1542 # since we want to prevent access to nested repositories on
1543 1543 # the filesystem *now*.
1544 1544 ctx = self[None]
1545 1545 parts = util.splitpath(subpath)
1546 1546 while parts:
1547 1547 prefix = b'/'.join(parts)
1548 1548 if prefix in ctx.substate:
1549 1549 if prefix == normsubpath:
1550 1550 return True
1551 1551 else:
1552 1552 sub = ctx.sub(prefix)
1553 1553 return sub.checknested(subpath[len(prefix) + 1 :])
1554 1554 else:
1555 1555 parts.pop()
1556 1556 return False
1557 1557
1558 1558 def peer(self):
1559 1559 return localpeer(self) # not cached to avoid reference cycle
1560 1560
1561 1561 def unfiltered(self):
1562 1562 """Return unfiltered version of the repository
1563 1563
1564 1564 Intended to be overwritten by filtered repo."""
1565 1565 return self
1566 1566
1567 1567 def filtered(self, name, visibilityexceptions=None):
1568 1568 """Return a filtered version of a repository
1569 1569
1570 1570 The `name` parameter is the identifier of the requested view. This
1571 1571 will return a repoview object set "exactly" to the specified view.
1572 1572
1573 1573 This function does not apply recursive filtering to a repository. For
1574 1574 example calling `repo.filtered("served")` will return a repoview using
1575 1575 the "served" view, regardless of the initial view used by `repo`.
1576 1576
1577 1577 In other word, there is always only one level of `repoview` "filtering".
1578 1578 """
1579 1579 if self._extrafilterid is not None and b'%' not in name:
1580 1580 name = name + b'%' + self._extrafilterid
1581 1581
1582 1582 cls = repoview.newtype(self.unfiltered().__class__)
1583 1583 return cls(self, name, visibilityexceptions)
1584 1584
1585 1585 @mixedrepostorecache(
1586 1586 (b'bookmarks', b'plain'),
1587 1587 (b'bookmarks.current', b'plain'),
1588 1588 (b'bookmarks', b''),
1589 1589 (b'00changelog.i', b''),
1590 1590 )
1591 1591 def _bookmarks(self):
1592 1592 # Since the multiple files involved in the transaction cannot be
1593 1593 # written atomically (with current repository format), there is a race
1594 1594 # condition here.
1595 1595 #
1596 1596 # 1) changelog content A is read
1597 1597 # 2) outside transaction update changelog to content B
1598 1598 # 3) outside transaction update bookmark file referring to content B
1599 1599 # 4) bookmarks file content is read and filtered against changelog-A
1600 1600 #
1601 1601 # When this happens, bookmarks against nodes missing from A are dropped.
1602 1602 #
1603 1603 # Having this happening during read is not great, but it become worse
1604 1604 # when this happen during write because the bookmarks to the "unknown"
1605 1605 # nodes will be dropped for good. However, writes happen within locks.
1606 1606 # This locking makes it possible to have a race free consistent read.
1607 1607 # For this purpose data read from disc before locking are
1608 1608 # "invalidated" right after the locks are taken. This invalidations are
1609 1609 # "light", the `filecache` mechanism keep the data in memory and will
1610 1610 # reuse them if the underlying files did not changed. Not parsing the
1611 1611 # same data multiple times helps performances.
1612 1612 #
1613 1613 # Unfortunately in the case describe above, the files tracked by the
1614 1614 # bookmarks file cache might not have changed, but the in-memory
1615 1615 # content is still "wrong" because we used an older changelog content
1616 1616 # to process the on-disk data. So after locking, the changelog would be
1617 1617 # refreshed but `_bookmarks` would be preserved.
1618 1618 # Adding `00changelog.i` to the list of tracked file is not
1619 1619 # enough, because at the time we build the content for `_bookmarks` in
1620 1620 # (4), the changelog file has already diverged from the content used
1621 1621 # for loading `changelog` in (1)
1622 1622 #
1623 1623 # To prevent the issue, we force the changelog to be explicitly
1624 1624 # reloaded while computing `_bookmarks`. The data race can still happen
1625 1625 # without the lock (with a narrower window), but it would no longer go
1626 1626 # undetected during the lock time refresh.
1627 1627 #
1628 1628 # The new schedule is as follow
1629 1629 #
1630 1630 # 1) filecache logic detect that `_bookmarks` needs to be computed
1631 1631 # 2) cachestat for `bookmarks` and `changelog` are captured (for book)
1632 1632 # 3) We force `changelog` filecache to be tested
1633 1633 # 4) cachestat for `changelog` are captured (for changelog)
1634 1634 # 5) `_bookmarks` is computed and cached
1635 1635 #
1636 1636 # The step in (3) ensure we have a changelog at least as recent as the
1637 1637 # cache stat computed in (1). As a result at locking time:
1638 1638 # * if the changelog did not changed since (1) -> we can reuse the data
1639 1639 # * otherwise -> the bookmarks get refreshed.
1640 1640 self._refreshchangelog()
1641 1641 return bookmarks.bmstore(self)
1642 1642
1643 1643 def _refreshchangelog(self):
1644 1644 """make sure the in memory changelog match the on-disk one"""
1645 1645 if 'changelog' in vars(self) and self.currenttransaction() is None:
1646 1646 del self.changelog
1647 1647
1648 1648 @property
1649 1649 def _activebookmark(self):
1650 1650 return self._bookmarks.active
1651 1651
1652 1652 # _phasesets depend on changelog. what we need is to call
1653 1653 # _phasecache.invalidate() if '00changelog.i' was changed, but it
1654 1654 # can't be easily expressed in filecache mechanism.
1655 1655 @storecache(b'phaseroots', b'00changelog.i')
1656 1656 def _phasecache(self):
1657 1657 return phases.phasecache(self, self._phasedefaults)
1658 1658
1659 1659 @storecache(b'obsstore')
1660 1660 def obsstore(self):
1661 1661 return obsolete.makestore(self.ui, self)
1662 1662
1663 1663 @storecache(b'00changelog.i')
1664 1664 def changelog(self):
1665 1665 # load dirstate before changelog to avoid race see issue6303
1666 1666 self.dirstate.prefetch_parents()
1667 1667 return self.store.changelog(
1668 1668 txnutil.mayhavepending(self.root),
1669 1669 concurrencychecker=revlogchecker.get_checker(self.ui, b'changelog'),
1670 1670 )
1671 1671
1672 1672 @storecache(b'00manifest.i')
1673 1673 def manifestlog(self):
1674 1674 return self.store.manifestlog(self, self._storenarrowmatch)
1675 1675
1676 1676 @repofilecache(b'dirstate')
1677 1677 def dirstate(self):
1678 1678 return self._makedirstate()
1679 1679
1680 1680 def _makedirstate(self):
1681 1681 """Extension point for wrapping the dirstate per-repo."""
1682 1682 sparsematchfn = lambda: sparse.matcher(self)
1683 1683
1684 1684 return dirstate.dirstate(
1685 1685 self.vfs,
1686 1686 self.ui,
1687 1687 self.root,
1688 1688 self._dirstatevalidate,
1689 1689 sparsematchfn,
1690 1690 self.nodeconstants,
1691 1691 )
1692 1692
1693 1693 def _dirstatevalidate(self, node):
1694 1694 try:
1695 1695 self.changelog.rev(node)
1696 1696 return node
1697 1697 except error.LookupError:
1698 1698 if not self._dirstatevalidatewarned:
1699 1699 self._dirstatevalidatewarned = True
1700 1700 self.ui.warn(
1701 1701 _(b"warning: ignoring unknown working parent %s!\n")
1702 1702 % short(node)
1703 1703 )
1704 1704 return self.nullid
1705 1705
1706 1706 @storecache(narrowspec.FILENAME)
1707 1707 def narrowpats(self):
1708 1708 """matcher patterns for this repository's narrowspec
1709 1709
1710 1710 A tuple of (includes, excludes).
1711 1711 """
1712 1712 return narrowspec.load(self)
1713 1713
1714 1714 @storecache(narrowspec.FILENAME)
1715 1715 def _storenarrowmatch(self):
1716 1716 if requirementsmod.NARROW_REQUIREMENT not in self.requirements:
1717 1717 return matchmod.always()
1718 1718 include, exclude = self.narrowpats
1719 1719 return narrowspec.match(self.root, include=include, exclude=exclude)
1720 1720
1721 1721 @storecache(narrowspec.FILENAME)
1722 1722 def _narrowmatch(self):
1723 1723 if requirementsmod.NARROW_REQUIREMENT not in self.requirements:
1724 1724 return matchmod.always()
1725 1725 narrowspec.checkworkingcopynarrowspec(self)
1726 1726 include, exclude = self.narrowpats
1727 1727 return narrowspec.match(self.root, include=include, exclude=exclude)
1728 1728
1729 1729 def narrowmatch(self, match=None, includeexact=False):
1730 1730 """matcher corresponding the the repo's narrowspec
1731 1731
1732 1732 If `match` is given, then that will be intersected with the narrow
1733 1733 matcher.
1734 1734
1735 1735 If `includeexact` is True, then any exact matches from `match` will
1736 1736 be included even if they're outside the narrowspec.
1737 1737 """
1738 1738 if match:
1739 1739 if includeexact and not self._narrowmatch.always():
1740 1740 # do not exclude explicitly-specified paths so that they can
1741 1741 # be warned later on
1742 1742 em = matchmod.exact(match.files())
1743 1743 nm = matchmod.unionmatcher([self._narrowmatch, em])
1744 1744 return matchmod.intersectmatchers(match, nm)
1745 1745 return matchmod.intersectmatchers(match, self._narrowmatch)
1746 1746 return self._narrowmatch
1747 1747
1748 1748 def setnarrowpats(self, newincludes, newexcludes):
1749 1749 narrowspec.save(self, newincludes, newexcludes)
1750 1750 self.invalidate(clearfilecache=True)
1751 1751
1752 1752 @unfilteredpropertycache
1753 1753 def _quick_access_changeid_null(self):
1754 1754 return {
1755 1755 b'null': (nullrev, self.nodeconstants.nullid),
1756 1756 nullrev: (nullrev, self.nodeconstants.nullid),
1757 1757 self.nullid: (nullrev, self.nullid),
1758 1758 }
1759 1759
1760 1760 @unfilteredpropertycache
1761 1761 def _quick_access_changeid_wc(self):
1762 1762 # also fast path access to the working copy parents
1763 1763 # however, only do it for filter that ensure wc is visible.
1764 1764 quick = self._quick_access_changeid_null.copy()
1765 1765 cl = self.unfiltered().changelog
1766 1766 for node in self.dirstate.parents():
1767 1767 if node == self.nullid:
1768 1768 continue
1769 1769 rev = cl.index.get_rev(node)
1770 1770 if rev is None:
1771 1771 # unknown working copy parent case:
1772 1772 #
1773 1773 # skip the fast path and let higher code deal with it
1774 1774 continue
1775 1775 pair = (rev, node)
1776 1776 quick[rev] = pair
1777 1777 quick[node] = pair
1778 1778 # also add the parents of the parents
1779 1779 for r in cl.parentrevs(rev):
1780 1780 if r == nullrev:
1781 1781 continue
1782 1782 n = cl.node(r)
1783 1783 pair = (r, n)
1784 1784 quick[r] = pair
1785 1785 quick[n] = pair
1786 1786 p1node = self.dirstate.p1()
1787 1787 if p1node != self.nullid:
1788 1788 quick[b'.'] = quick[p1node]
1789 1789 return quick
1790 1790
1791 1791 @unfilteredmethod
1792 1792 def _quick_access_changeid_invalidate(self):
1793 1793 if '_quick_access_changeid_wc' in vars(self):
1794 1794 del self.__dict__['_quick_access_changeid_wc']
1795 1795
1796 1796 @property
1797 1797 def _quick_access_changeid(self):
1798 1798 """an helper dictionnary for __getitem__ calls
1799 1799
1800 1800 This contains a list of symbol we can recognise right away without
1801 1801 further processing.
1802 1802 """
1803 1803 if self.filtername in repoview.filter_has_wc:
1804 1804 return self._quick_access_changeid_wc
1805 1805 return self._quick_access_changeid_null
1806 1806
1807 1807 def __getitem__(self, changeid):
1808 1808 # dealing with special cases
1809 1809 if changeid is None:
1810 1810 return context.workingctx(self)
1811 1811 if isinstance(changeid, context.basectx):
1812 1812 return changeid
1813 1813
1814 1814 # dealing with multiple revisions
1815 1815 if isinstance(changeid, slice):
1816 1816 # wdirrev isn't contiguous so the slice shouldn't include it
1817 1817 return [
1818 1818 self[i]
1819 1819 for i in pycompat.xrange(*changeid.indices(len(self)))
1820 1820 if i not in self.changelog.filteredrevs
1821 1821 ]
1822 1822
1823 1823 # dealing with some special values
1824 1824 quick_access = self._quick_access_changeid.get(changeid)
1825 1825 if quick_access is not None:
1826 1826 rev, node = quick_access
1827 1827 return context.changectx(self, rev, node, maybe_filtered=False)
1828 1828 if changeid == b'tip':
1829 1829 node = self.changelog.tip()
1830 1830 rev = self.changelog.rev(node)
1831 1831 return context.changectx(self, rev, node)
1832 1832
1833 1833 # dealing with arbitrary values
1834 1834 try:
1835 1835 if isinstance(changeid, int):
1836 1836 node = self.changelog.node(changeid)
1837 1837 rev = changeid
1838 1838 elif changeid == b'.':
1839 1839 # this is a hack to delay/avoid loading obsmarkers
1840 1840 # when we know that '.' won't be hidden
1841 1841 node = self.dirstate.p1()
1842 1842 rev = self.unfiltered().changelog.rev(node)
1843 1843 elif len(changeid) == self.nodeconstants.nodelen:
1844 1844 try:
1845 1845 node = changeid
1846 1846 rev = self.changelog.rev(changeid)
1847 1847 except error.FilteredLookupError:
1848 1848 changeid = hex(changeid) # for the error message
1849 1849 raise
1850 1850 except LookupError:
1851 1851 # check if it might have come from damaged dirstate
1852 1852 #
1853 1853 # XXX we could avoid the unfiltered if we had a recognizable
1854 1854 # exception for filtered changeset access
1855 1855 if (
1856 1856 self.local()
1857 1857 and changeid in self.unfiltered().dirstate.parents()
1858 1858 ):
1859 1859 msg = _(b"working directory has unknown parent '%s'!")
1860 1860 raise error.Abort(msg % short(changeid))
1861 1861 changeid = hex(changeid) # for the error message
1862 1862 raise
1863 1863
1864 1864 elif len(changeid) == 2 * self.nodeconstants.nodelen:
1865 1865 node = bin(changeid)
1866 1866 rev = self.changelog.rev(node)
1867 1867 else:
1868 1868 raise error.ProgrammingError(
1869 1869 b"unsupported changeid '%s' of type %s"
1870 1870 % (changeid, pycompat.bytestr(type(changeid)))
1871 1871 )
1872 1872
1873 1873 return context.changectx(self, rev, node)
1874 1874
1875 1875 except (error.FilteredIndexError, error.FilteredLookupError):
1876 1876 raise error.FilteredRepoLookupError(
1877 1877 _(b"filtered revision '%s'") % pycompat.bytestr(changeid)
1878 1878 )
1879 1879 except (IndexError, LookupError):
1880 1880 raise error.RepoLookupError(
1881 1881 _(b"unknown revision '%s'") % pycompat.bytestr(changeid)
1882 1882 )
1883 1883 except error.WdirUnsupported:
1884 1884 return context.workingctx(self)
1885 1885
1886 1886 def __contains__(self, changeid):
1887 1887 """True if the given changeid exists"""
1888 1888 try:
1889 1889 self[changeid]
1890 1890 return True
1891 1891 except error.RepoLookupError:
1892 1892 return False
1893 1893
1894 1894 def __nonzero__(self):
1895 1895 return True
1896 1896
1897 1897 __bool__ = __nonzero__
1898 1898
1899 1899 def __len__(self):
1900 1900 # no need to pay the cost of repoview.changelog
1901 1901 unfi = self.unfiltered()
1902 1902 return len(unfi.changelog)
1903 1903
1904 1904 def __iter__(self):
1905 1905 return iter(self.changelog)
1906 1906
1907 1907 def revs(self, expr, *args):
1908 1908 """Find revisions matching a revset.
1909 1909
1910 1910 The revset is specified as a string ``expr`` that may contain
1911 1911 %-formatting to escape certain types. See ``revsetlang.formatspec``.
1912 1912
1913 1913 Revset aliases from the configuration are not expanded. To expand
1914 1914 user aliases, consider calling ``scmutil.revrange()`` or
1915 1915 ``repo.anyrevs([expr], user=True)``.
1916 1916
1917 1917 Returns a smartset.abstractsmartset, which is a list-like interface
1918 1918 that contains integer revisions.
1919 1919 """
1920 1920 tree = revsetlang.spectree(expr, *args)
1921 1921 return revset.makematcher(tree)(self)
1922 1922
1923 1923 def set(self, expr, *args):
1924 1924 """Find revisions matching a revset and emit changectx instances.
1925 1925
1926 1926 This is a convenience wrapper around ``revs()`` that iterates the
1927 1927 result and is a generator of changectx instances.
1928 1928
1929 1929 Revset aliases from the configuration are not expanded. To expand
1930 1930 user aliases, consider calling ``scmutil.revrange()``.
1931 1931 """
1932 1932 for r in self.revs(expr, *args):
1933 1933 yield self[r]
1934 1934
1935 1935 def anyrevs(self, specs, user=False, localalias=None):
1936 1936 """Find revisions matching one of the given revsets.
1937 1937
1938 1938 Revset aliases from the configuration are not expanded by default. To
1939 1939 expand user aliases, specify ``user=True``. To provide some local
1940 1940 definitions overriding user aliases, set ``localalias`` to
1941 1941 ``{name: definitionstring}``.
1942 1942 """
1943 1943 if specs == [b'null']:
1944 1944 return revset.baseset([nullrev])
1945 1945 if specs == [b'.']:
1946 1946 quick_data = self._quick_access_changeid.get(b'.')
1947 1947 if quick_data is not None:
1948 1948 return revset.baseset([quick_data[0]])
1949 1949 if user:
1950 1950 m = revset.matchany(
1951 1951 self.ui,
1952 1952 specs,
1953 1953 lookup=revset.lookupfn(self),
1954 1954 localalias=localalias,
1955 1955 )
1956 1956 else:
1957 1957 m = revset.matchany(None, specs, localalias=localalias)
1958 1958 return m(self)
1959 1959
1960 1960 def url(self):
1961 1961 return b'file:' + self.root
1962 1962
1963 1963 def hook(self, name, throw=False, **args):
1964 1964 """Call a hook, passing this repo instance.
1965 1965
1966 1966 This a convenience method to aid invoking hooks. Extensions likely
1967 1967 won't call this unless they have registered a custom hook or are
1968 1968 replacing code that is expected to call a hook.
1969 1969 """
1970 1970 return hook.hook(self.ui, self, name, throw, **args)
1971 1971
1972 1972 @filteredpropertycache
1973 1973 def _tagscache(self):
1974 1974 """Returns a tagscache object that contains various tags related
1975 1975 caches."""
1976 1976
1977 1977 # This simplifies its cache management by having one decorated
1978 1978 # function (this one) and the rest simply fetch things from it.
1979 1979 class tagscache(object):
1980 1980 def __init__(self):
1981 1981 # These two define the set of tags for this repository. tags
1982 1982 # maps tag name to node; tagtypes maps tag name to 'global' or
1983 1983 # 'local'. (Global tags are defined by .hgtags across all
1984 1984 # heads, and local tags are defined in .hg/localtags.)
1985 1985 # They constitute the in-memory cache of tags.
1986 1986 self.tags = self.tagtypes = None
1987 1987
1988 1988 self.nodetagscache = self.tagslist = None
1989 1989
1990 1990 cache = tagscache()
1991 1991 cache.tags, cache.tagtypes = self._findtags()
1992 1992
1993 1993 return cache
1994 1994
1995 1995 def tags(self):
1996 1996 '''return a mapping of tag to node'''
1997 1997 t = {}
1998 1998 if self.changelog.filteredrevs:
1999 1999 tags, tt = self._findtags()
2000 2000 else:
2001 2001 tags = self._tagscache.tags
2002 2002 rev = self.changelog.rev
2003 2003 for k, v in pycompat.iteritems(tags):
2004 2004 try:
2005 2005 # ignore tags to unknown nodes
2006 2006 rev(v)
2007 2007 t[k] = v
2008 2008 except (error.LookupError, ValueError):
2009 2009 pass
2010 2010 return t
2011 2011
2012 2012 def _findtags(self):
2013 2013 """Do the hard work of finding tags. Return a pair of dicts
2014 2014 (tags, tagtypes) where tags maps tag name to node, and tagtypes
2015 2015 maps tag name to a string like \'global\' or \'local\'.
2016 2016 Subclasses or extensions are free to add their own tags, but
2017 2017 should be aware that the returned dicts will be retained for the
2018 2018 duration of the localrepo object."""
2019 2019
2020 2020 # XXX what tagtype should subclasses/extensions use? Currently
2021 2021 # mq and bookmarks add tags, but do not set the tagtype at all.
2022 2022 # Should each extension invent its own tag type? Should there
2023 2023 # be one tagtype for all such "virtual" tags? Or is the status
2024 2024 # quo fine?
2025 2025
2026 2026 # map tag name to (node, hist)
2027 2027 alltags = tagsmod.findglobaltags(self.ui, self)
2028 2028 # map tag name to tag type
2029 2029 tagtypes = {tag: b'global' for tag in alltags}
2030 2030
2031 2031 tagsmod.readlocaltags(self.ui, self, alltags, tagtypes)
2032 2032
2033 2033 # Build the return dicts. Have to re-encode tag names because
2034 2034 # the tags module always uses UTF-8 (in order not to lose info
2035 2035 # writing to the cache), but the rest of Mercurial wants them in
2036 2036 # local encoding.
2037 2037 tags = {}
2038 2038 for (name, (node, hist)) in pycompat.iteritems(alltags):
2039 2039 if node != self.nullid:
2040 2040 tags[encoding.tolocal(name)] = node
2041 2041 tags[b'tip'] = self.changelog.tip()
2042 2042 tagtypes = {
2043 2043 encoding.tolocal(name): value
2044 2044 for (name, value) in pycompat.iteritems(tagtypes)
2045 2045 }
2046 2046 return (tags, tagtypes)
2047 2047
2048 2048 def tagtype(self, tagname):
2049 2049 """
2050 2050 return the type of the given tag. result can be:
2051 2051
2052 2052 'local' : a local tag
2053 2053 'global' : a global tag
2054 2054 None : tag does not exist
2055 2055 """
2056 2056
2057 2057 return self._tagscache.tagtypes.get(tagname)
2058 2058
2059 2059 def tagslist(self):
2060 2060 '''return a list of tags ordered by revision'''
2061 2061 if not self._tagscache.tagslist:
2062 2062 l = []
2063 2063 for t, n in pycompat.iteritems(self.tags()):
2064 2064 l.append((self.changelog.rev(n), t, n))
2065 2065 self._tagscache.tagslist = [(t, n) for r, t, n in sorted(l)]
2066 2066
2067 2067 return self._tagscache.tagslist
2068 2068
2069 2069 def nodetags(self, node):
2070 2070 '''return the tags associated with a node'''
2071 2071 if not self._tagscache.nodetagscache:
2072 2072 nodetagscache = {}
2073 2073 for t, n in pycompat.iteritems(self._tagscache.tags):
2074 2074 nodetagscache.setdefault(n, []).append(t)
2075 2075 for tags in pycompat.itervalues(nodetagscache):
2076 2076 tags.sort()
2077 2077 self._tagscache.nodetagscache = nodetagscache
2078 2078 return self._tagscache.nodetagscache.get(node, [])
2079 2079
2080 2080 def nodebookmarks(self, node):
2081 2081 """return the list of bookmarks pointing to the specified node"""
2082 2082 return self._bookmarks.names(node)
2083 2083
2084 2084 def branchmap(self):
2085 2085 """returns a dictionary {branch: [branchheads]} with branchheads
2086 2086 ordered by increasing revision number"""
2087 2087 return self._branchcaches[self]
2088 2088
2089 2089 @unfilteredmethod
2090 2090 def revbranchcache(self):
2091 2091 if not self._revbranchcache:
2092 2092 self._revbranchcache = branchmap.revbranchcache(self.unfiltered())
2093 2093 return self._revbranchcache
2094 2094
2095 2095 def register_changeset(self, rev, changelogrevision):
2096 2096 self.revbranchcache().setdata(rev, changelogrevision)
2097 2097
2098 2098 def branchtip(self, branch, ignoremissing=False):
2099 2099 """return the tip node for a given branch
2100 2100
2101 2101 If ignoremissing is True, then this method will not raise an error.
2102 2102 This is helpful for callers that only expect None for a missing branch
2103 2103 (e.g. namespace).
2104 2104
2105 2105 """
2106 2106 try:
2107 2107 return self.branchmap().branchtip(branch)
2108 2108 except KeyError:
2109 2109 if not ignoremissing:
2110 2110 raise error.RepoLookupError(_(b"unknown branch '%s'") % branch)
2111 2111 else:
2112 2112 pass
2113 2113
2114 2114 def lookup(self, key):
2115 2115 node = scmutil.revsymbol(self, key).node()
2116 2116 if node is None:
2117 2117 raise error.RepoLookupError(_(b"unknown revision '%s'") % key)
2118 2118 return node
2119 2119
2120 2120 def lookupbranch(self, key):
2121 2121 if self.branchmap().hasbranch(key):
2122 2122 return key
2123 2123
2124 2124 return scmutil.revsymbol(self, key).branch()
2125 2125
2126 2126 def known(self, nodes):
2127 2127 cl = self.changelog
2128 2128 get_rev = cl.index.get_rev
2129 2129 filtered = cl.filteredrevs
2130 2130 result = []
2131 2131 for n in nodes:
2132 2132 r = get_rev(n)
2133 2133 resp = not (r is None or r in filtered)
2134 2134 result.append(resp)
2135 2135 return result
2136 2136
2137 2137 def local(self):
2138 2138 return self
2139 2139
2140 2140 def publishing(self):
2141 2141 # it's safe (and desirable) to trust the publish flag unconditionally
2142 2142 # so that we don't finalize changes shared between users via ssh or nfs
2143 2143 return self.ui.configbool(b'phases', b'publish', untrusted=True)
2144 2144
2145 2145 def cancopy(self):
2146 2146 # so statichttprepo's override of local() works
2147 2147 if not self.local():
2148 2148 return False
2149 2149 if not self.publishing():
2150 2150 return True
2151 2151 # if publishing we can't copy if there is filtered content
2152 2152 return not self.filtered(b'visible').changelog.filteredrevs
2153 2153
2154 2154 def shared(self):
2155 2155 '''the type of shared repository (None if not shared)'''
2156 2156 if self.sharedpath != self.path:
2157 2157 return b'store'
2158 2158 return None
2159 2159
2160 2160 def wjoin(self, f, *insidef):
2161 2161 return self.vfs.reljoin(self.root, f, *insidef)
2162 2162
2163 2163 def setparents(self, p1, p2=None):
2164 2164 if p2 is None:
2165 2165 p2 = self.nullid
2166 2166 self[None].setparents(p1, p2)
2167 2167 self._quick_access_changeid_invalidate()
2168 2168
2169 2169 def filectx(self, path, changeid=None, fileid=None, changectx=None):
2170 2170 """changeid must be a changeset revision, if specified.
2171 2171 fileid can be a file revision or node."""
2172 2172 return context.filectx(
2173 2173 self, path, changeid, fileid, changectx=changectx
2174 2174 )
2175 2175
2176 2176 def getcwd(self):
2177 2177 return self.dirstate.getcwd()
2178 2178
2179 2179 def pathto(self, f, cwd=None):
2180 2180 return self.dirstate.pathto(f, cwd)
2181 2181
2182 2182 def _loadfilter(self, filter):
2183 2183 if filter not in self._filterpats:
2184 2184 l = []
2185 2185 for pat, cmd in self.ui.configitems(filter):
2186 2186 if cmd == b'!':
2187 2187 continue
2188 2188 mf = matchmod.match(self.root, b'', [pat])
2189 2189 fn = None
2190 2190 params = cmd
2191 2191 for name, filterfn in pycompat.iteritems(self._datafilters):
2192 2192 if cmd.startswith(name):
2193 2193 fn = filterfn
2194 2194 params = cmd[len(name) :].lstrip()
2195 2195 break
2196 2196 if not fn:
2197 2197 fn = lambda s, c, **kwargs: procutil.filter(s, c)
2198 2198 fn.__name__ = 'commandfilter'
2199 2199 # Wrap old filters not supporting keyword arguments
2200 2200 if not pycompat.getargspec(fn)[2]:
2201 2201 oldfn = fn
2202 2202 fn = lambda s, c, oldfn=oldfn, **kwargs: oldfn(s, c)
2203 2203 fn.__name__ = 'compat-' + oldfn.__name__
2204 2204 l.append((mf, fn, params))
2205 2205 self._filterpats[filter] = l
2206 2206 return self._filterpats[filter]
2207 2207
2208 2208 def _filter(self, filterpats, filename, data):
2209 2209 for mf, fn, cmd in filterpats:
2210 2210 if mf(filename):
2211 2211 self.ui.debug(
2212 2212 b"filtering %s through %s\n"
2213 2213 % (filename, cmd or pycompat.sysbytes(fn.__name__))
2214 2214 )
2215 2215 data = fn(data, cmd, ui=self.ui, repo=self, filename=filename)
2216 2216 break
2217 2217
2218 2218 return data
2219 2219
2220 2220 @unfilteredpropertycache
2221 2221 def _encodefilterpats(self):
2222 2222 return self._loadfilter(b'encode')
2223 2223
2224 2224 @unfilteredpropertycache
2225 2225 def _decodefilterpats(self):
2226 2226 return self._loadfilter(b'decode')
2227 2227
2228 2228 def adddatafilter(self, name, filter):
2229 2229 self._datafilters[name] = filter
2230 2230
2231 2231 def wread(self, filename):
2232 2232 if self.wvfs.islink(filename):
2233 2233 data = self.wvfs.readlink(filename)
2234 2234 else:
2235 2235 data = self.wvfs.read(filename)
2236 2236 return self._filter(self._encodefilterpats, filename, data)
2237 2237
2238 2238 def wwrite(self, filename, data, flags, backgroundclose=False, **kwargs):
2239 2239 """write ``data`` into ``filename`` in the working directory
2240 2240
2241 2241 This returns length of written (maybe decoded) data.
2242 2242 """
2243 2243 data = self._filter(self._decodefilterpats, filename, data)
2244 2244 if b'l' in flags:
2245 2245 self.wvfs.symlink(data, filename)
2246 2246 else:
2247 2247 self.wvfs.write(
2248 2248 filename, data, backgroundclose=backgroundclose, **kwargs
2249 2249 )
2250 2250 if b'x' in flags:
2251 2251 self.wvfs.setflags(filename, False, True)
2252 2252 else:
2253 2253 self.wvfs.setflags(filename, False, False)
2254 2254 return len(data)
2255 2255
2256 2256 def wwritedata(self, filename, data):
2257 2257 return self._filter(self._decodefilterpats, filename, data)
2258 2258
2259 2259 def currenttransaction(self):
2260 2260 """return the current transaction or None if non exists"""
2261 2261 if self._transref:
2262 2262 tr = self._transref()
2263 2263 else:
2264 2264 tr = None
2265 2265
2266 2266 if tr and tr.running():
2267 2267 return tr
2268 2268 return None
2269 2269
2270 2270 def transaction(self, desc, report=None):
2271 2271 if self.ui.configbool(b'devel', b'all-warnings') or self.ui.configbool(
2272 2272 b'devel', b'check-locks'
2273 2273 ):
2274 2274 if self._currentlock(self._lockref) is None:
2275 2275 raise error.ProgrammingError(b'transaction requires locking')
2276 2276 tr = self.currenttransaction()
2277 2277 if tr is not None:
2278 2278 return tr.nest(name=desc)
2279 2279
2280 2280 # abort here if the journal already exists
2281 2281 if self.svfs.exists(b"journal"):
2282 2282 raise error.RepoError(
2283 2283 _(b"abandoned transaction found"),
2284 2284 hint=_(b"run 'hg recover' to clean up transaction"),
2285 2285 )
2286 2286
2287 2287 idbase = b"%.40f#%f" % (random.random(), time.time())
2288 2288 ha = hex(hashutil.sha1(idbase).digest())
2289 2289 txnid = b'TXN:' + ha
2290 2290 self.hook(b'pretxnopen', throw=True, txnname=desc, txnid=txnid)
2291 2291
2292 2292 self._writejournal(desc)
2293 2293 renames = [(vfs, x, undoname(x)) for vfs, x in self._journalfiles()]
2294 2294 if report:
2295 2295 rp = report
2296 2296 else:
2297 2297 rp = self.ui.warn
2298 2298 vfsmap = {b'plain': self.vfs, b'store': self.svfs} # root of .hg/
2299 2299 # we must avoid cyclic reference between repo and transaction.
2300 2300 reporef = weakref.ref(self)
2301 2301 # Code to track tag movement
2302 2302 #
2303 2303 # Since tags are all handled as file content, it is actually quite hard
2304 2304 # to track these movement from a code perspective. So we fallback to a
2305 2305 # tracking at the repository level. One could envision to track changes
2306 2306 # to the '.hgtags' file through changegroup apply but that fails to
2307 2307 # cope with case where transaction expose new heads without changegroup
2308 2308 # being involved (eg: phase movement).
2309 2309 #
2310 2310 # For now, We gate the feature behind a flag since this likely comes
2311 2311 # with performance impacts. The current code run more often than needed
2312 2312 # and do not use caches as much as it could. The current focus is on
2313 2313 # the behavior of the feature so we disable it by default. The flag
2314 2314 # will be removed when we are happy with the performance impact.
2315 2315 #
2316 2316 # Once this feature is no longer experimental move the following
2317 2317 # documentation to the appropriate help section:
2318 2318 #
2319 2319 # The ``HG_TAG_MOVED`` variable will be set if the transaction touched
2320 2320 # tags (new or changed or deleted tags). In addition the details of
2321 2321 # these changes are made available in a file at:
2322 2322 # ``REPOROOT/.hg/changes/tags.changes``.
2323 2323 # Make sure you check for HG_TAG_MOVED before reading that file as it
2324 2324 # might exist from a previous transaction even if no tag were touched
2325 2325 # in this one. Changes are recorded in a line base format::
2326 2326 #
2327 2327 # <action> <hex-node> <tag-name>\n
2328 2328 #
2329 2329 # Actions are defined as follow:
2330 2330 # "-R": tag is removed,
2331 2331 # "+A": tag is added,
2332 2332 # "-M": tag is moved (old value),
2333 2333 # "+M": tag is moved (new value),
2334 2334 tracktags = lambda x: None
2335 2335 # experimental config: experimental.hook-track-tags
2336 2336 shouldtracktags = self.ui.configbool(
2337 2337 b'experimental', b'hook-track-tags'
2338 2338 )
2339 2339 if desc != b'strip' and shouldtracktags:
2340 2340 oldheads = self.changelog.headrevs()
2341 2341
2342 2342 def tracktags(tr2):
2343 2343 repo = reporef()
2344 2344 assert repo is not None # help pytype
2345 2345 oldfnodes = tagsmod.fnoderevs(repo.ui, repo, oldheads)
2346 2346 newheads = repo.changelog.headrevs()
2347 2347 newfnodes = tagsmod.fnoderevs(repo.ui, repo, newheads)
2348 2348 # notes: we compare lists here.
2349 2349 # As we do it only once buiding set would not be cheaper
2350 2350 changes = tagsmod.difftags(repo.ui, repo, oldfnodes, newfnodes)
2351 2351 if changes:
2352 2352 tr2.hookargs[b'tag_moved'] = b'1'
2353 2353 with repo.vfs(
2354 2354 b'changes/tags.changes', b'w', atomictemp=True
2355 2355 ) as changesfile:
2356 2356 # note: we do not register the file to the transaction
2357 2357 # because we needs it to still exist on the transaction
2358 2358 # is close (for txnclose hooks)
2359 2359 tagsmod.writediff(changesfile, changes)
2360 2360
2361 2361 def validate(tr2):
2362 2362 """will run pre-closing hooks"""
2363 2363 # XXX the transaction API is a bit lacking here so we take a hacky
2364 2364 # path for now
2365 2365 #
2366 2366 # We cannot add this as a "pending" hooks since the 'tr.hookargs'
2367 2367 # dict is copied before these run. In addition we needs the data
2368 2368 # available to in memory hooks too.
2369 2369 #
2370 2370 # Moreover, we also need to make sure this runs before txnclose
2371 2371 # hooks and there is no "pending" mechanism that would execute
2372 2372 # logic only if hooks are about to run.
2373 2373 #
2374 2374 # Fixing this limitation of the transaction is also needed to track
2375 2375 # other families of changes (bookmarks, phases, obsolescence).
2376 2376 #
2377 2377 # This will have to be fixed before we remove the experimental
2378 2378 # gating.
2379 2379 tracktags(tr2)
2380 2380 repo = reporef()
2381 2381 assert repo is not None # help pytype
2382 2382
2383 2383 singleheadopt = (b'experimental', b'single-head-per-branch')
2384 2384 singlehead = repo.ui.configbool(*singleheadopt)
2385 2385 if singlehead:
2386 2386 singleheadsub = repo.ui.configsuboptions(*singleheadopt)[1]
2387 2387 accountclosed = singleheadsub.get(
2388 2388 b"account-closed-heads", False
2389 2389 )
2390 2390 if singleheadsub.get(b"public-changes-only", False):
2391 2391 filtername = b"immutable"
2392 2392 else:
2393 2393 filtername = b"visible"
2394 2394 scmutil.enforcesinglehead(
2395 2395 repo, tr2, desc, accountclosed, filtername
2396 2396 )
2397 2397 if hook.hashook(repo.ui, b'pretxnclose-bookmark'):
2398 2398 for name, (old, new) in sorted(
2399 2399 tr.changes[b'bookmarks'].items()
2400 2400 ):
2401 2401 args = tr.hookargs.copy()
2402 2402 args.update(bookmarks.preparehookargs(name, old, new))
2403 2403 repo.hook(
2404 2404 b'pretxnclose-bookmark',
2405 2405 throw=True,
2406 2406 **pycompat.strkwargs(args)
2407 2407 )
2408 2408 if hook.hashook(repo.ui, b'pretxnclose-phase'):
2409 2409 cl = repo.unfiltered().changelog
2410 2410 for revs, (old, new) in tr.changes[b'phases']:
2411 2411 for rev in revs:
2412 2412 args = tr.hookargs.copy()
2413 2413 node = hex(cl.node(rev))
2414 2414 args.update(phases.preparehookargs(node, old, new))
2415 2415 repo.hook(
2416 2416 b'pretxnclose-phase',
2417 2417 throw=True,
2418 2418 **pycompat.strkwargs(args)
2419 2419 )
2420 2420
2421 2421 repo.hook(
2422 2422 b'pretxnclose', throw=True, **pycompat.strkwargs(tr.hookargs)
2423 2423 )
2424 2424
2425 2425 def releasefn(tr, success):
2426 2426 repo = reporef()
2427 2427 if repo is None:
2428 2428 # If the repo has been GC'd (and this release function is being
2429 2429 # called from transaction.__del__), there's not much we can do,
2430 2430 # so just leave the unfinished transaction there and let the
2431 2431 # user run `hg recover`.
2432 2432 return
2433 2433 if success:
2434 2434 # this should be explicitly invoked here, because
2435 2435 # in-memory changes aren't written out at closing
2436 2436 # transaction, if tr.addfilegenerator (via
2437 2437 # dirstate.write or so) isn't invoked while
2438 2438 # transaction running
2439 2439 repo.dirstate.write(None)
2440 2440 else:
2441 2441 # discard all changes (including ones already written
2442 2442 # out) in this transaction
2443 2443 narrowspec.restorebackup(self, b'journal.narrowspec')
2444 2444 narrowspec.restorewcbackup(self, b'journal.narrowspec.dirstate')
2445 2445 repo.dirstate.restorebackup(None, b'journal.dirstate')
2446 2446
2447 2447 repo.invalidate(clearfilecache=True)
2448 2448
2449 2449 tr = transaction.transaction(
2450 2450 rp,
2451 2451 self.svfs,
2452 2452 vfsmap,
2453 2453 b"journal",
2454 2454 b"undo",
2455 2455 aftertrans(renames),
2456 2456 self.store.createmode,
2457 2457 validator=validate,
2458 2458 releasefn=releasefn,
2459 2459 checkambigfiles=_cachedfiles,
2460 2460 name=desc,
2461 2461 )
2462 2462 tr.changes[b'origrepolen'] = len(self)
2463 2463 tr.changes[b'obsmarkers'] = set()
2464 2464 tr.changes[b'phases'] = []
2465 2465 tr.changes[b'bookmarks'] = {}
2466 2466
2467 2467 tr.hookargs[b'txnid'] = txnid
2468 2468 tr.hookargs[b'txnname'] = desc
2469 2469 tr.hookargs[b'changes'] = tr.changes
2470 2470 # note: writing the fncache only during finalize mean that the file is
2471 2471 # outdated when running hooks. As fncache is used for streaming clone,
2472 2472 # this is not expected to break anything that happen during the hooks.
2473 2473 tr.addfinalize(b'flush-fncache', self.store.write)
2474 2474
2475 2475 def txnclosehook(tr2):
2476 2476 """To be run if transaction is successful, will schedule a hook run"""
2477 2477 # Don't reference tr2 in hook() so we don't hold a reference.
2478 2478 # This reduces memory consumption when there are multiple
2479 2479 # transactions per lock. This can likely go away if issue5045
2480 2480 # fixes the function accumulation.
2481 2481 hookargs = tr2.hookargs
2482 2482
2483 2483 def hookfunc(unused_success):
2484 2484 repo = reporef()
2485 2485 assert repo is not None # help pytype
2486 2486
2487 2487 if hook.hashook(repo.ui, b'txnclose-bookmark'):
2488 2488 bmchanges = sorted(tr.changes[b'bookmarks'].items())
2489 2489 for name, (old, new) in bmchanges:
2490 2490 args = tr.hookargs.copy()
2491 2491 args.update(bookmarks.preparehookargs(name, old, new))
2492 2492 repo.hook(
2493 2493 b'txnclose-bookmark',
2494 2494 throw=False,
2495 2495 **pycompat.strkwargs(args)
2496 2496 )
2497 2497
2498 2498 if hook.hashook(repo.ui, b'txnclose-phase'):
2499 2499 cl = repo.unfiltered().changelog
2500 2500 phasemv = sorted(
2501 2501 tr.changes[b'phases'], key=lambda r: r[0][0]
2502 2502 )
2503 2503 for revs, (old, new) in phasemv:
2504 2504 for rev in revs:
2505 2505 args = tr.hookargs.copy()
2506 2506 node = hex(cl.node(rev))
2507 2507 args.update(phases.preparehookargs(node, old, new))
2508 2508 repo.hook(
2509 2509 b'txnclose-phase',
2510 2510 throw=False,
2511 2511 **pycompat.strkwargs(args)
2512 2512 )
2513 2513
2514 2514 repo.hook(
2515 2515 b'txnclose', throw=False, **pycompat.strkwargs(hookargs)
2516 2516 )
2517 2517
2518 2518 repo = reporef()
2519 2519 assert repo is not None # help pytype
2520 2520 repo._afterlock(hookfunc)
2521 2521
2522 2522 tr.addfinalize(b'txnclose-hook', txnclosehook)
2523 2523 # Include a leading "-" to make it happen before the transaction summary
2524 2524 # reports registered via scmutil.registersummarycallback() whose names
2525 2525 # are 00-txnreport etc. That way, the caches will be warm when the
2526 2526 # callbacks run.
2527 2527 tr.addpostclose(b'-warm-cache', self._buildcacheupdater(tr))
2528 2528
2529 2529 def txnaborthook(tr2):
2530 2530 """To be run if transaction is aborted"""
2531 2531 repo = reporef()
2532 2532 assert repo is not None # help pytype
2533 2533 repo.hook(
2534 2534 b'txnabort', throw=False, **pycompat.strkwargs(tr2.hookargs)
2535 2535 )
2536 2536
2537 2537 tr.addabort(b'txnabort-hook', txnaborthook)
2538 2538 # avoid eager cache invalidation. in-memory data should be identical
2539 2539 # to stored data if transaction has no error.
2540 2540 tr.addpostclose(b'refresh-filecachestats', self._refreshfilecachestats)
2541 2541 self._transref = weakref.ref(tr)
2542 2542 scmutil.registersummarycallback(self, tr, desc)
2543 2543 return tr
2544 2544
2545 2545 def _journalfiles(self):
2546 2546 return (
2547 2547 (self.svfs, b'journal'),
2548 2548 (self.svfs, b'journal.narrowspec'),
2549 2549 (self.vfs, b'journal.narrowspec.dirstate'),
2550 2550 (self.vfs, b'journal.dirstate'),
2551 2551 (self.vfs, b'journal.branch'),
2552 2552 (self.vfs, b'journal.desc'),
2553 2553 (bookmarks.bookmarksvfs(self), b'journal.bookmarks'),
2554 2554 (self.svfs, b'journal.phaseroots'),
2555 2555 )
2556 2556
2557 2557 def undofiles(self):
2558 2558 return [(vfs, undoname(x)) for vfs, x in self._journalfiles()]
2559 2559
2560 2560 @unfilteredmethod
2561 2561 def _writejournal(self, desc):
2562 2562 self.dirstate.savebackup(None, b'journal.dirstate')
2563 2563 narrowspec.savewcbackup(self, b'journal.narrowspec.dirstate')
2564 2564 narrowspec.savebackup(self, b'journal.narrowspec')
2565 2565 self.vfs.write(
2566 2566 b"journal.branch", encoding.fromlocal(self.dirstate.branch())
2567 2567 )
2568 2568 self.vfs.write(b"journal.desc", b"%d\n%s\n" % (len(self), desc))
2569 2569 bookmarksvfs = bookmarks.bookmarksvfs(self)
2570 2570 bookmarksvfs.write(
2571 2571 b"journal.bookmarks", bookmarksvfs.tryread(b"bookmarks")
2572 2572 )
2573 2573 self.svfs.write(b"journal.phaseroots", self.svfs.tryread(b"phaseroots"))
2574 2574
2575 2575 def recover(self):
2576 2576 with self.lock():
2577 2577 if self.svfs.exists(b"journal"):
2578 2578 self.ui.status(_(b"rolling back interrupted transaction\n"))
2579 2579 vfsmap = {
2580 2580 b'': self.svfs,
2581 2581 b'plain': self.vfs,
2582 2582 }
2583 2583 transaction.rollback(
2584 2584 self.svfs,
2585 2585 vfsmap,
2586 2586 b"journal",
2587 2587 self.ui.warn,
2588 2588 checkambigfiles=_cachedfiles,
2589 2589 )
2590 2590 self.invalidate()
2591 2591 return True
2592 2592 else:
2593 2593 self.ui.warn(_(b"no interrupted transaction available\n"))
2594 2594 return False
2595 2595
2596 2596 def rollback(self, dryrun=False, force=False):
2597 2597 wlock = lock = dsguard = None
2598 2598 try:
2599 2599 wlock = self.wlock()
2600 2600 lock = self.lock()
2601 2601 if self.svfs.exists(b"undo"):
2602 2602 dsguard = dirstateguard.dirstateguard(self, b'rollback')
2603 2603
2604 2604 return self._rollback(dryrun, force, dsguard)
2605 2605 else:
2606 2606 self.ui.warn(_(b"no rollback information available\n"))
2607 2607 return 1
2608 2608 finally:
2609 2609 release(dsguard, lock, wlock)
2610 2610
2611 2611 @unfilteredmethod # Until we get smarter cache management
2612 2612 def _rollback(self, dryrun, force, dsguard):
2613 2613 ui = self.ui
2614 2614 try:
2615 2615 args = self.vfs.read(b'undo.desc').splitlines()
2616 2616 (oldlen, desc, detail) = (int(args[0]), args[1], None)
2617 2617 if len(args) >= 3:
2618 2618 detail = args[2]
2619 2619 oldtip = oldlen - 1
2620 2620
2621 2621 if detail and ui.verbose:
2622 2622 msg = _(
2623 2623 b'repository tip rolled back to revision %d'
2624 2624 b' (undo %s: %s)\n'
2625 2625 ) % (oldtip, desc, detail)
2626 2626 else:
2627 2627 msg = _(
2628 2628 b'repository tip rolled back to revision %d (undo %s)\n'
2629 2629 ) % (oldtip, desc)
2630 2630 except IOError:
2631 2631 msg = _(b'rolling back unknown transaction\n')
2632 2632 desc = None
2633 2633
2634 2634 if not force and self[b'.'] != self[b'tip'] and desc == b'commit':
2635 2635 raise error.Abort(
2636 2636 _(
2637 2637 b'rollback of last commit while not checked out '
2638 2638 b'may lose data'
2639 2639 ),
2640 2640 hint=_(b'use -f to force'),
2641 2641 )
2642 2642
2643 2643 ui.status(msg)
2644 2644 if dryrun:
2645 2645 return 0
2646 2646
2647 2647 parents = self.dirstate.parents()
2648 2648 self.destroying()
2649 2649 vfsmap = {b'plain': self.vfs, b'': self.svfs}
2650 2650 transaction.rollback(
2651 2651 self.svfs, vfsmap, b'undo', ui.warn, checkambigfiles=_cachedfiles
2652 2652 )
2653 2653 bookmarksvfs = bookmarks.bookmarksvfs(self)
2654 2654 if bookmarksvfs.exists(b'undo.bookmarks'):
2655 2655 bookmarksvfs.rename(
2656 2656 b'undo.bookmarks', b'bookmarks', checkambig=True
2657 2657 )
2658 2658 if self.svfs.exists(b'undo.phaseroots'):
2659 2659 self.svfs.rename(b'undo.phaseroots', b'phaseroots', checkambig=True)
2660 2660 self.invalidate()
2661 2661
2662 2662 has_node = self.changelog.index.has_node
2663 2663 parentgone = any(not has_node(p) for p in parents)
2664 2664 if parentgone:
2665 2665 # prevent dirstateguard from overwriting already restored one
2666 2666 dsguard.close()
2667 2667
2668 2668 narrowspec.restorebackup(self, b'undo.narrowspec')
2669 2669 narrowspec.restorewcbackup(self, b'undo.narrowspec.dirstate')
2670 2670 self.dirstate.restorebackup(None, b'undo.dirstate')
2671 2671 try:
2672 2672 branch = self.vfs.read(b'undo.branch')
2673 2673 self.dirstate.setbranch(encoding.tolocal(branch))
2674 2674 except IOError:
2675 2675 ui.warn(
2676 2676 _(
2677 2677 b'named branch could not be reset: '
2678 2678 b'current branch is still \'%s\'\n'
2679 2679 )
2680 2680 % self.dirstate.branch()
2681 2681 )
2682 2682
2683 2683 parents = tuple([p.rev() for p in self[None].parents()])
2684 2684 if len(parents) > 1:
2685 2685 ui.status(
2686 2686 _(
2687 2687 b'working directory now based on '
2688 2688 b'revisions %d and %d\n'
2689 2689 )
2690 2690 % parents
2691 2691 )
2692 2692 else:
2693 2693 ui.status(
2694 2694 _(b'working directory now based on revision %d\n') % parents
2695 2695 )
2696 2696 mergestatemod.mergestate.clean(self)
2697 2697
2698 2698 # TODO: if we know which new heads may result from this rollback, pass
2699 2699 # them to destroy(), which will prevent the branchhead cache from being
2700 2700 # invalidated.
2701 2701 self.destroyed()
2702 2702 return 0
2703 2703
2704 2704 def _buildcacheupdater(self, newtransaction):
2705 2705 """called during transaction to build the callback updating cache
2706 2706
2707 2707 Lives on the repository to help extension who might want to augment
2708 2708 this logic. For this purpose, the created transaction is passed to the
2709 2709 method.
2710 2710 """
2711 2711 # we must avoid cyclic reference between repo and transaction.
2712 2712 reporef = weakref.ref(self)
2713 2713
2714 2714 def updater(tr):
2715 2715 repo = reporef()
2716 2716 assert repo is not None # help pytype
2717 2717 repo.updatecaches(tr)
2718 2718
2719 2719 return updater
2720 2720
2721 2721 @unfilteredmethod
2722 2722 def updatecaches(self, tr=None, full=False):
2723 2723 """warm appropriate caches
2724 2724
2725 2725 If this function is called after a transaction closed. The transaction
2726 2726 will be available in the 'tr' argument. This can be used to selectively
2727 2727 update caches relevant to the changes in that transaction.
2728 2728
2729 2729 If 'full' is set, make sure all caches the function knows about have
2730 2730 up-to-date data. Even the ones usually loaded more lazily.
2731 2731 """
2732 2732 if tr is not None and tr.hookargs.get(b'source') == b'strip':
2733 2733 # During strip, many caches are invalid but
2734 2734 # later call to `destroyed` will refresh them.
2735 2735 return
2736 2736
2737 2737 if tr is None or tr.changes[b'origrepolen'] < len(self):
2738 2738 # accessing the 'served' branchmap should refresh all the others,
2739 2739 self.ui.debug(b'updating the branch cache\n')
2740 2740 self.filtered(b'served').branchmap()
2741 2741 self.filtered(b'served.hidden').branchmap()
2742 2742
2743 2743 if full:
2744 2744 unfi = self.unfiltered()
2745 2745
2746 2746 self.changelog.update_caches(transaction=tr)
2747 2747 self.manifestlog.update_caches(transaction=tr)
2748 2748
2749 2749 rbc = unfi.revbranchcache()
2750 2750 for r in unfi.changelog:
2751 2751 rbc.branchinfo(r)
2752 2752 rbc.write()
2753 2753
2754 2754 # ensure the working copy parents are in the manifestfulltextcache
2755 2755 for ctx in self[b'.'].parents():
2756 2756 ctx.manifest() # accessing the manifest is enough
2757 2757
2758 2758 # accessing fnode cache warms the cache
2759 2759 tagsmod.fnoderevs(self.ui, unfi, unfi.changelog.revs())
2760 2760 # accessing tags warm the cache
2761 2761 self.tags()
2762 2762 self.filtered(b'served').tags()
2763 2763
2764 2764 # The `full` arg is documented as updating even the lazily-loaded
2765 2765 # caches immediately, so we're forcing a write to cause these caches
2766 2766 # to be warmed up even if they haven't explicitly been requested
2767 2767 # yet (if they've never been used by hg, they won't ever have been
2768 2768 # written, even if they're a subset of another kind of cache that
2769 2769 # *has* been used).
2770 2770 for filt in repoview.filtertable.keys():
2771 2771 filtered = self.filtered(filt)
2772 2772 filtered.branchmap().write(filtered)
2773 2773
2774 2774 def invalidatecaches(self):
2775 2775
2776 2776 if '_tagscache' in vars(self):
2777 2777 # can't use delattr on proxy
2778 2778 del self.__dict__['_tagscache']
2779 2779
2780 2780 self._branchcaches.clear()
2781 2781 self.invalidatevolatilesets()
2782 2782 self._sparsesignaturecache.clear()
2783 2783
2784 2784 def invalidatevolatilesets(self):
2785 2785 self.filteredrevcache.clear()
2786 2786 obsolete.clearobscaches(self)
2787 2787 self._quick_access_changeid_invalidate()
2788 2788
2789 2789 def invalidatedirstate(self):
2790 2790 """Invalidates the dirstate, causing the next call to dirstate
2791 2791 to check if it was modified since the last time it was read,
2792 2792 rereading it if it has.
2793 2793
2794 2794 This is different to dirstate.invalidate() that it doesn't always
2795 2795 rereads the dirstate. Use dirstate.invalidate() if you want to
2796 2796 explicitly read the dirstate again (i.e. restoring it to a previous
2797 2797 known good state)."""
2798 2798 if hasunfilteredcache(self, 'dirstate'):
2799 2799 for k in self.dirstate._filecache:
2800 2800 try:
2801 2801 delattr(self.dirstate, k)
2802 2802 except AttributeError:
2803 2803 pass
2804 2804 delattr(self.unfiltered(), 'dirstate')
2805 2805
2806 2806 def invalidate(self, clearfilecache=False):
2807 2807 """Invalidates both store and non-store parts other than dirstate
2808 2808
2809 2809 If a transaction is running, invalidation of store is omitted,
2810 2810 because discarding in-memory changes might cause inconsistency
2811 2811 (e.g. incomplete fncache causes unintentional failure, but
2812 2812 redundant one doesn't).
2813 2813 """
2814 2814 unfiltered = self.unfiltered() # all file caches are stored unfiltered
2815 2815 for k in list(self._filecache.keys()):
2816 2816 # dirstate is invalidated separately in invalidatedirstate()
2817 2817 if k == b'dirstate':
2818 2818 continue
2819 2819 if (
2820 2820 k == b'changelog'
2821 2821 and self.currenttransaction()
2822 2822 and self.changelog._delayed
2823 2823 ):
2824 2824 # The changelog object may store unwritten revisions. We don't
2825 2825 # want to lose them.
2826 2826 # TODO: Solve the problem instead of working around it.
2827 2827 continue
2828 2828
2829 2829 if clearfilecache:
2830 2830 del self._filecache[k]
2831 2831 try:
2832 2832 delattr(unfiltered, k)
2833 2833 except AttributeError:
2834 2834 pass
2835 2835 self.invalidatecaches()
2836 2836 if not self.currenttransaction():
2837 2837 # TODO: Changing contents of store outside transaction
2838 2838 # causes inconsistency. We should make in-memory store
2839 2839 # changes detectable, and abort if changed.
2840 2840 self.store.invalidatecaches()
2841 2841
2842 2842 def invalidateall(self):
2843 2843 """Fully invalidates both store and non-store parts, causing the
2844 2844 subsequent operation to reread any outside changes."""
2845 2845 # extension should hook this to invalidate its caches
2846 2846 self.invalidate()
2847 2847 self.invalidatedirstate()
2848 2848
2849 2849 @unfilteredmethod
2850 2850 def _refreshfilecachestats(self, tr):
2851 2851 """Reload stats of cached files so that they are flagged as valid"""
2852 2852 for k, ce in self._filecache.items():
2853 2853 k = pycompat.sysstr(k)
2854 2854 if k == 'dirstate' or k not in self.__dict__:
2855 2855 continue
2856 2856 ce.refresh()
2857 2857
2858 2858 def _lock(
2859 2859 self,
2860 2860 vfs,
2861 2861 lockname,
2862 2862 wait,
2863 2863 releasefn,
2864 2864 acquirefn,
2865 2865 desc,
2866 2866 ):
2867 2867 timeout = 0
2868 2868 warntimeout = 0
2869 2869 if wait:
2870 2870 timeout = self.ui.configint(b"ui", b"timeout")
2871 2871 warntimeout = self.ui.configint(b"ui", b"timeout.warn")
2872 2872 # internal config: ui.signal-safe-lock
2873 2873 signalsafe = self.ui.configbool(b'ui', b'signal-safe-lock')
2874 2874
2875 2875 l = lockmod.trylock(
2876 2876 self.ui,
2877 2877 vfs,
2878 2878 lockname,
2879 2879 timeout,
2880 2880 warntimeout,
2881 2881 releasefn=releasefn,
2882 2882 acquirefn=acquirefn,
2883 2883 desc=desc,
2884 2884 signalsafe=signalsafe,
2885 2885 )
2886 2886 return l
2887 2887
2888 2888 def _afterlock(self, callback):
2889 2889 """add a callback to be run when the repository is fully unlocked
2890 2890
2891 2891 The callback will be executed when the outermost lock is released
2892 2892 (with wlock being higher level than 'lock')."""
2893 2893 for ref in (self._wlockref, self._lockref):
2894 2894 l = ref and ref()
2895 2895 if l and l.held:
2896 2896 l.postrelease.append(callback)
2897 2897 break
2898 2898 else: # no lock have been found.
2899 2899 callback(True)
2900 2900
2901 2901 def lock(self, wait=True):
2902 2902 """Lock the repository store (.hg/store) and return a weak reference
2903 2903 to the lock. Use this before modifying the store (e.g. committing or
2904 2904 stripping). If you are opening a transaction, get a lock as well.)
2905 2905
2906 2906 If both 'lock' and 'wlock' must be acquired, ensure you always acquires
2907 2907 'wlock' first to avoid a dead-lock hazard."""
2908 2908 l = self._currentlock(self._lockref)
2909 2909 if l is not None:
2910 2910 l.lock()
2911 2911 return l
2912 2912
2913 2913 l = self._lock(
2914 2914 vfs=self.svfs,
2915 2915 lockname=b"lock",
2916 2916 wait=wait,
2917 2917 releasefn=None,
2918 2918 acquirefn=self.invalidate,
2919 2919 desc=_(b'repository %s') % self.origroot,
2920 2920 )
2921 2921 self._lockref = weakref.ref(l)
2922 2922 return l
2923 2923
2924 2924 def wlock(self, wait=True):
2925 2925 """Lock the non-store parts of the repository (everything under
2926 2926 .hg except .hg/store) and return a weak reference to the lock.
2927 2927
2928 2928 Use this before modifying files in .hg.
2929 2929
2930 2930 If both 'lock' and 'wlock' must be acquired, ensure you always acquires
2931 2931 'wlock' first to avoid a dead-lock hazard."""
2932 2932 l = self._wlockref() if self._wlockref else None
2933 2933 if l is not None and l.held:
2934 2934 l.lock()
2935 2935 return l
2936 2936
2937 2937 # We do not need to check for non-waiting lock acquisition. Such
2938 2938 # acquisition would not cause dead-lock as they would just fail.
2939 2939 if wait and (
2940 2940 self.ui.configbool(b'devel', b'all-warnings')
2941 2941 or self.ui.configbool(b'devel', b'check-locks')
2942 2942 ):
2943 2943 if self._currentlock(self._lockref) is not None:
2944 2944 self.ui.develwarn(b'"wlock" acquired after "lock"')
2945 2945
2946 2946 def unlock():
2947 2947 if self.dirstate.pendingparentchange():
2948 2948 self.dirstate.invalidate()
2949 2949 else:
2950 2950 self.dirstate.write(None)
2951 2951
2952 2952 self._filecache[b'dirstate'].refresh()
2953 2953
2954 2954 l = self._lock(
2955 2955 self.vfs,
2956 2956 b"wlock",
2957 2957 wait,
2958 2958 unlock,
2959 2959 self.invalidatedirstate,
2960 2960 _(b'working directory of %s') % self.origroot,
2961 2961 )
2962 2962 self._wlockref = weakref.ref(l)
2963 2963 return l
2964 2964
2965 2965 def _currentlock(self, lockref):
2966 2966 """Returns the lock if it's held, or None if it's not."""
2967 2967 if lockref is None:
2968 2968 return None
2969 2969 l = lockref()
2970 2970 if l is None or not l.held:
2971 2971 return None
2972 2972 return l
2973 2973
2974 2974 def currentwlock(self):
2975 2975 """Returns the wlock if it's held, or None if it's not."""
2976 2976 return self._currentlock(self._wlockref)
2977 2977
2978 2978 def checkcommitpatterns(self, wctx, match, status, fail):
2979 2979 """check for commit arguments that aren't committable"""
2980 2980 if match.isexact() or match.prefix():
2981 2981 matched = set(status.modified + status.added + status.removed)
2982 2982
2983 2983 for f in match.files():
2984 2984 f = self.dirstate.normalize(f)
2985 2985 if f == b'.' or f in matched or f in wctx.substate:
2986 2986 continue
2987 2987 if f in status.deleted:
2988 2988 fail(f, _(b'file not found!'))
2989 2989 # Is it a directory that exists or used to exist?
2990 2990 if self.wvfs.isdir(f) or wctx.p1().hasdir(f):
2991 2991 d = f + b'/'
2992 2992 for mf in matched:
2993 2993 if mf.startswith(d):
2994 2994 break
2995 2995 else:
2996 2996 fail(f, _(b"no match under directory!"))
2997 2997 elif f not in self.dirstate:
2998 2998 fail(f, _(b"file not tracked!"))
2999 2999
3000 3000 @unfilteredmethod
3001 3001 def commit(
3002 3002 self,
3003 3003 text=b"",
3004 3004 user=None,
3005 3005 date=None,
3006 3006 match=None,
3007 3007 force=False,
3008 3008 editor=None,
3009 3009 extra=None,
3010 3010 ):
3011 3011 """Add a new revision to current repository.
3012 3012
3013 3013 Revision information is gathered from the working directory,
3014 3014 match can be used to filter the committed files. If editor is
3015 3015 supplied, it is called to get a commit message.
3016 3016 """
3017 3017 if extra is None:
3018 3018 extra = {}
3019 3019
3020 3020 def fail(f, msg):
3021 3021 raise error.InputError(b'%s: %s' % (f, msg))
3022 3022
3023 3023 if not match:
3024 3024 match = matchmod.always()
3025 3025
3026 3026 if not force:
3027 3027 match.bad = fail
3028 3028
3029 3029 # lock() for recent changelog (see issue4368)
3030 3030 with self.wlock(), self.lock():
3031 3031 wctx = self[None]
3032 3032 merge = len(wctx.parents()) > 1
3033 3033
3034 3034 if not force and merge and not match.always():
3035 3035 raise error.Abort(
3036 3036 _(
3037 3037 b'cannot partially commit a merge '
3038 3038 b'(do not specify files or patterns)'
3039 3039 )
3040 3040 )
3041 3041
3042 3042 status = self.status(match=match, clean=force)
3043 3043 if force:
3044 3044 status.modified.extend(
3045 3045 status.clean
3046 3046 ) # mq may commit clean files
3047 3047
3048 3048 # check subrepos
3049 3049 subs, commitsubs, newstate = subrepoutil.precommit(
3050 3050 self.ui, wctx, status, match, force=force
3051 3051 )
3052 3052
3053 3053 # make sure all explicit patterns are matched
3054 3054 if not force:
3055 3055 self.checkcommitpatterns(wctx, match, status, fail)
3056 3056
3057 3057 cctx = context.workingcommitctx(
3058 3058 self, status, text, user, date, extra
3059 3059 )
3060 3060
3061 3061 ms = mergestatemod.mergestate.read(self)
3062 3062 mergeutil.checkunresolved(ms)
3063 3063
3064 3064 # internal config: ui.allowemptycommit
3065 3065 if cctx.isempty() and not self.ui.configbool(
3066 3066 b'ui', b'allowemptycommit'
3067 3067 ):
3068 3068 self.ui.debug(b'nothing to commit, clearing merge state\n')
3069 3069 ms.reset()
3070 3070 return None
3071 3071
3072 3072 if merge and cctx.deleted():
3073 3073 raise error.Abort(_(b"cannot commit merge with missing files"))
3074 3074
3075 3075 if editor:
3076 3076 cctx._text = editor(self, cctx, subs)
3077 3077 edited = text != cctx._text
3078 3078
3079 3079 # Save commit message in case this transaction gets rolled back
3080 3080 # (e.g. by a pretxncommit hook). Leave the content alone on
3081 3081 # the assumption that the user will use the same editor again.
3082 3082 msgfn = self.savecommitmessage(cctx._text)
3083 3083
3084 3084 # commit subs and write new state
3085 3085 if subs:
3086 3086 uipathfn = scmutil.getuipathfn(self)
3087 3087 for s in sorted(commitsubs):
3088 3088 sub = wctx.sub(s)
3089 3089 self.ui.status(
3090 3090 _(b'committing subrepository %s\n')
3091 3091 % uipathfn(subrepoutil.subrelpath(sub))
3092 3092 )
3093 3093 sr = sub.commit(cctx._text, user, date)
3094 3094 newstate[s] = (newstate[s][0], sr)
3095 3095 subrepoutil.writestate(self, newstate)
3096 3096
3097 3097 p1, p2 = self.dirstate.parents()
3098 3098 hookp1, hookp2 = hex(p1), (p2 != self.nullid and hex(p2) or b'')
3099 3099 try:
3100 3100 self.hook(
3101 3101 b"precommit", throw=True, parent1=hookp1, parent2=hookp2
3102 3102 )
3103 3103 with self.transaction(b'commit'):
3104 3104 ret = self.commitctx(cctx, True)
3105 3105 # update bookmarks, dirstate and mergestate
3106 3106 bookmarks.update(self, [p1, p2], ret)
3107 3107 cctx.markcommitted(ret)
3108 3108 ms.reset()
3109 3109 except: # re-raises
3110 3110 if edited:
3111 3111 self.ui.write(
3112 3112 _(b'note: commit message saved in %s\n') % msgfn
3113 3113 )
3114 3114 self.ui.write(
3115 3115 _(
3116 3116 b"note: use 'hg commit --logfile "
3117 3117 b".hg/last-message.txt --edit' to reuse it\n"
3118 3118 )
3119 3119 )
3120 3120 raise
3121 3121
3122 3122 def commithook(unused_success):
3123 3123 # hack for command that use a temporary commit (eg: histedit)
3124 3124 # temporary commit got stripped before hook release
3125 3125 if self.changelog.hasnode(ret):
3126 3126 self.hook(
3127 3127 b"commit", node=hex(ret), parent1=hookp1, parent2=hookp2
3128 3128 )
3129 3129
3130 3130 self._afterlock(commithook)
3131 3131 return ret
3132 3132
3133 3133 @unfilteredmethod
3134 3134 def commitctx(self, ctx, error=False, origctx=None):
3135 3135 return commit.commitctx(self, ctx, error=error, origctx=origctx)
3136 3136
3137 3137 @unfilteredmethod
3138 3138 def destroying(self):
3139 3139 """Inform the repository that nodes are about to be destroyed.
3140 3140 Intended for use by strip and rollback, so there's a common
3141 3141 place for anything that has to be done before destroying history.
3142 3142
3143 3143 This is mostly useful for saving state that is in memory and waiting
3144 3144 to be flushed when the current lock is released. Because a call to
3145 3145 destroyed is imminent, the repo will be invalidated causing those
3146 3146 changes to stay in memory (waiting for the next unlock), or vanish
3147 3147 completely.
3148 3148 """
3149 3149 # When using the same lock to commit and strip, the phasecache is left
3150 3150 # dirty after committing. Then when we strip, the repo is invalidated,
3151 3151 # causing those changes to disappear.
3152 3152 if '_phasecache' in vars(self):
3153 3153 self._phasecache.write()
3154 3154
3155 3155 @unfilteredmethod
3156 3156 def destroyed(self):
3157 3157 """Inform the repository that nodes have been destroyed.
3158 3158 Intended for use by strip and rollback, so there's a common
3159 3159 place for anything that has to be done after destroying history.
3160 3160 """
3161 3161 # When one tries to:
3162 3162 # 1) destroy nodes thus calling this method (e.g. strip)
3163 3163 # 2) use phasecache somewhere (e.g. commit)
3164 3164 #
3165 3165 # then 2) will fail because the phasecache contains nodes that were
3166 3166 # removed. We can either remove phasecache from the filecache,
3167 3167 # causing it to reload next time it is accessed, or simply filter
3168 3168 # the removed nodes now and write the updated cache.
3169 3169 self._phasecache.filterunknown(self)
3170 3170 self._phasecache.write()
3171 3171
3172 3172 # refresh all repository caches
3173 3173 self.updatecaches()
3174 3174
3175 3175 # Ensure the persistent tag cache is updated. Doing it now
3176 3176 # means that the tag cache only has to worry about destroyed
3177 3177 # heads immediately after a strip/rollback. That in turn
3178 3178 # guarantees that "cachetip == currenttip" (comparing both rev
3179 3179 # and node) always means no nodes have been added or destroyed.
3180 3180
3181 3181 # XXX this is suboptimal when qrefresh'ing: we strip the current
3182 3182 # head, refresh the tag cache, then immediately add a new head.
3183 3183 # But I think doing it this way is necessary for the "instant
3184 3184 # tag cache retrieval" case to work.
3185 3185 self.invalidate()
3186 3186
3187 3187 def status(
3188 3188 self,
3189 3189 node1=b'.',
3190 3190 node2=None,
3191 3191 match=None,
3192 3192 ignored=False,
3193 3193 clean=False,
3194 3194 unknown=False,
3195 3195 listsubrepos=False,
3196 3196 ):
3197 3197 '''a convenience method that calls node1.status(node2)'''
3198 3198 return self[node1].status(
3199 3199 node2, match, ignored, clean, unknown, listsubrepos
3200 3200 )
3201 3201
3202 3202 def addpostdsstatus(self, ps):
3203 3203 """Add a callback to run within the wlock, at the point at which status
3204 3204 fixups happen.
3205 3205
3206 3206 On status completion, callback(wctx, status) will be called with the
3207 3207 wlock held, unless the dirstate has changed from underneath or the wlock
3208 3208 couldn't be grabbed.
3209 3209
3210 3210 Callbacks should not capture and use a cached copy of the dirstate --
3211 3211 it might change in the meanwhile. Instead, they should access the
3212 3212 dirstate via wctx.repo().dirstate.
3213 3213
3214 3214 This list is emptied out after each status run -- extensions should
3215 3215 make sure it adds to this list each time dirstate.status is called.
3216 3216 Extensions should also make sure they don't call this for statuses
3217 3217 that don't involve the dirstate.
3218 3218 """
3219 3219
3220 3220 # The list is located here for uniqueness reasons -- it is actually
3221 3221 # managed by the workingctx, but that isn't unique per-repo.
3222 3222 self._postdsstatus.append(ps)
3223 3223
3224 3224 def postdsstatus(self):
3225 3225 """Used by workingctx to get the list of post-dirstate-status hooks."""
3226 3226 return self._postdsstatus
3227 3227
3228 3228 def clearpostdsstatus(self):
3229 3229 """Used by workingctx to clear post-dirstate-status hooks."""
3230 3230 del self._postdsstatus[:]
3231 3231
3232 3232 def heads(self, start=None):
3233 3233 if start is None:
3234 3234 cl = self.changelog
3235 3235 headrevs = reversed(cl.headrevs())
3236 3236 return [cl.node(rev) for rev in headrevs]
3237 3237
3238 3238 heads = self.changelog.heads(start)
3239 3239 # sort the output in rev descending order
3240 3240 return sorted(heads, key=self.changelog.rev, reverse=True)
3241 3241
3242 3242 def branchheads(self, branch=None, start=None, closed=False):
3243 3243 """return a (possibly filtered) list of heads for the given branch
3244 3244
3245 3245 Heads are returned in topological order, from newest to oldest.
3246 3246 If branch is None, use the dirstate branch.
3247 3247 If start is not None, return only heads reachable from start.
3248 3248 If closed is True, return heads that are marked as closed as well.
3249 3249 """
3250 3250 if branch is None:
3251 3251 branch = self[None].branch()
3252 3252 branches = self.branchmap()
3253 3253 if not branches.hasbranch(branch):
3254 3254 return []
3255 3255 # the cache returns heads ordered lowest to highest
3256 3256 bheads = list(reversed(branches.branchheads(branch, closed=closed)))
3257 3257 if start is not None:
3258 3258 # filter out the heads that cannot be reached from startrev
3259 3259 fbheads = set(self.changelog.nodesbetween([start], bheads)[2])
3260 3260 bheads = [h for h in bheads if h in fbheads]
3261 3261 return bheads
3262 3262
3263 3263 def branches(self, nodes):
3264 3264 if not nodes:
3265 3265 nodes = [self.changelog.tip()]
3266 3266 b = []
3267 3267 for n in nodes:
3268 3268 t = n
3269 3269 while True:
3270 3270 p = self.changelog.parents(n)
3271 3271 if p[1] != self.nullid or p[0] == self.nullid:
3272 3272 b.append((t, n, p[0], p[1]))
3273 3273 break
3274 3274 n = p[0]
3275 3275 return b
3276 3276
3277 3277 def between(self, pairs):
3278 3278 r = []
3279 3279
3280 3280 for top, bottom in pairs:
3281 3281 n, l, i = top, [], 0
3282 3282 f = 1
3283 3283
3284 3284 while n != bottom and n != self.nullid:
3285 3285 p = self.changelog.parents(n)[0]
3286 3286 if i == f:
3287 3287 l.append(n)
3288 3288 f = f * 2
3289 3289 n = p
3290 3290 i += 1
3291 3291
3292 3292 r.append(l)
3293 3293
3294 3294 return r
3295 3295
3296 3296 def checkpush(self, pushop):
3297 3297 """Extensions can override this function if additional checks have
3298 3298 to be performed before pushing, or call it if they override push
3299 3299 command.
3300 3300 """
3301 3301
3302 3302 @unfilteredpropertycache
3303 3303 def prepushoutgoinghooks(self):
3304 3304 """Return util.hooks consists of a pushop with repo, remote, outgoing
3305 3305 methods, which are called before pushing changesets.
3306 3306 """
3307 3307 return util.hooks()
3308 3308
3309 3309 def pushkey(self, namespace, key, old, new):
3310 3310 try:
3311 3311 tr = self.currenttransaction()
3312 3312 hookargs = {}
3313 3313 if tr is not None:
3314 3314 hookargs.update(tr.hookargs)
3315 3315 hookargs = pycompat.strkwargs(hookargs)
3316 3316 hookargs['namespace'] = namespace
3317 3317 hookargs['key'] = key
3318 3318 hookargs['old'] = old
3319 3319 hookargs['new'] = new
3320 3320 self.hook(b'prepushkey', throw=True, **hookargs)
3321 3321 except error.HookAbort as exc:
3322 3322 self.ui.write_err(_(b"pushkey-abort: %s\n") % exc)
3323 3323 if exc.hint:
3324 3324 self.ui.write_err(_(b"(%s)\n") % exc.hint)
3325 3325 return False
3326 3326 self.ui.debug(b'pushing key for "%s:%s"\n' % (namespace, key))
3327 3327 ret = pushkey.push(self, namespace, key, old, new)
3328 3328
3329 3329 def runhook(unused_success):
3330 3330 self.hook(
3331 3331 b'pushkey',
3332 3332 namespace=namespace,
3333 3333 key=key,
3334 3334 old=old,
3335 3335 new=new,
3336 3336 ret=ret,
3337 3337 )
3338 3338
3339 3339 self._afterlock(runhook)
3340 3340 return ret
3341 3341
3342 3342 def listkeys(self, namespace):
3343 3343 self.hook(b'prelistkeys', throw=True, namespace=namespace)
3344 3344 self.ui.debug(b'listing keys for "%s"\n' % namespace)
3345 3345 values = pushkey.list(self, namespace)
3346 3346 self.hook(b'listkeys', namespace=namespace, values=values)
3347 3347 return values
3348 3348
3349 3349 def debugwireargs(self, one, two, three=None, four=None, five=None):
3350 3350 '''used to test argument passing over the wire'''
3351 3351 return b"%s %s %s %s %s" % (
3352 3352 one,
3353 3353 two,
3354 3354 pycompat.bytestr(three),
3355 3355 pycompat.bytestr(four),
3356 3356 pycompat.bytestr(five),
3357 3357 )
3358 3358
3359 3359 def savecommitmessage(self, text):
3360 3360 fp = self.vfs(b'last-message.txt', b'wb')
3361 3361 try:
3362 3362 fp.write(text)
3363 3363 finally:
3364 3364 fp.close()
3365 3365 return self.pathto(fp.name[len(self.root) + 1 :])
3366 3366
3367 3367 def register_wanted_sidedata(self, category):
3368 3368 self._wanted_sidedata.add(pycompat.bytestr(category))
3369 3369
3370 3370 def register_sidedata_computer(self, kind, category, keys, computer):
3371 if kind not in (b"changelog", b"manifest", b"filelog"):
3372 msg = _(b"unexpected revlog kind '%s'.")
3371 if kind not in revlogconst.ALL_KINDS:
3372 msg = _(b"unexpected revlog kind %r.")
3373 3373 raise error.ProgrammingError(msg % kind)
3374 3374 category = pycompat.bytestr(category)
3375 3375 if category in self._sidedata_computers.get(kind, []):
3376 3376 msg = _(
3377 3377 b"cannot register a sidedata computer twice for category '%s'."
3378 3378 )
3379 3379 raise error.ProgrammingError(msg % category)
3380 3380 self._sidedata_computers.setdefault(kind, {})
3381 3381 self._sidedata_computers[kind][category] = (keys, computer)
3382 3382
3383 3383
3384 3384 # used to avoid circular references so destructors work
3385 3385 def aftertrans(files):
3386 3386 renamefiles = [tuple(t) for t in files]
3387 3387
3388 3388 def a():
3389 3389 for vfs, src, dest in renamefiles:
3390 3390 # if src and dest refer to a same file, vfs.rename is a no-op,
3391 3391 # leaving both src and dest on disk. delete dest to make sure
3392 3392 # the rename couldn't be such a no-op.
3393 3393 vfs.tryunlink(dest)
3394 3394 try:
3395 3395 vfs.rename(src, dest)
3396 3396 except OSError: # journal file does not yet exist
3397 3397 pass
3398 3398
3399 3399 return a
3400 3400
3401 3401
3402 3402 def undoname(fn):
3403 3403 base, name = os.path.split(fn)
3404 3404 assert name.startswith(b'journal')
3405 3405 return os.path.join(base, name.replace(b'journal', b'undo', 1))
3406 3406
3407 3407
3408 3408 def instance(ui, path, create, intents=None, createopts=None):
3409 3409 localpath = urlutil.urllocalpath(path)
3410 3410 if create:
3411 3411 createrepository(ui, localpath, createopts=createopts)
3412 3412
3413 3413 return makelocalrepository(ui, localpath, intents=intents)
3414 3414
3415 3415
3416 3416 def islocal(path):
3417 3417 return True
3418 3418
3419 3419
3420 3420 def defaultcreateopts(ui, createopts=None):
3421 3421 """Populate the default creation options for a repository.
3422 3422
3423 3423 A dictionary of explicitly requested creation options can be passed
3424 3424 in. Missing keys will be populated.
3425 3425 """
3426 3426 createopts = dict(createopts or {})
3427 3427
3428 3428 if b'backend' not in createopts:
3429 3429 # experimental config: storage.new-repo-backend
3430 3430 createopts[b'backend'] = ui.config(b'storage', b'new-repo-backend')
3431 3431
3432 3432 return createopts
3433 3433
3434 3434
3435 3435 def newreporequirements(ui, createopts):
3436 3436 """Determine the set of requirements for a new local repository.
3437 3437
3438 3438 Extensions can wrap this function to specify custom requirements for
3439 3439 new repositories.
3440 3440 """
3441 3441 # If the repo is being created from a shared repository, we copy
3442 3442 # its requirements.
3443 3443 if b'sharedrepo' in createopts:
3444 3444 requirements = set(createopts[b'sharedrepo'].requirements)
3445 3445 if createopts.get(b'sharedrelative'):
3446 3446 requirements.add(requirementsmod.RELATIVE_SHARED_REQUIREMENT)
3447 3447 else:
3448 3448 requirements.add(requirementsmod.SHARED_REQUIREMENT)
3449 3449
3450 3450 return requirements
3451 3451
3452 3452 if b'backend' not in createopts:
3453 3453 raise error.ProgrammingError(
3454 3454 b'backend key not present in createopts; '
3455 3455 b'was defaultcreateopts() called?'
3456 3456 )
3457 3457
3458 3458 if createopts[b'backend'] != b'revlogv1':
3459 3459 raise error.Abort(
3460 3460 _(
3461 3461 b'unable to determine repository requirements for '
3462 3462 b'storage backend: %s'
3463 3463 )
3464 3464 % createopts[b'backend']
3465 3465 )
3466 3466
3467 3467 requirements = {requirementsmod.REVLOGV1_REQUIREMENT}
3468 3468 if ui.configbool(b'format', b'usestore'):
3469 3469 requirements.add(requirementsmod.STORE_REQUIREMENT)
3470 3470 if ui.configbool(b'format', b'usefncache'):
3471 3471 requirements.add(requirementsmod.FNCACHE_REQUIREMENT)
3472 3472 if ui.configbool(b'format', b'dotencode'):
3473 3473 requirements.add(requirementsmod.DOTENCODE_REQUIREMENT)
3474 3474
3475 3475 compengines = ui.configlist(b'format', b'revlog-compression')
3476 3476 for compengine in compengines:
3477 3477 if compengine in util.compengines:
3478 3478 engine = util.compengines[compengine]
3479 3479 if engine.available() and engine.revlogheader():
3480 3480 break
3481 3481 else:
3482 3482 raise error.Abort(
3483 3483 _(
3484 3484 b'compression engines %s defined by '
3485 3485 b'format.revlog-compression not available'
3486 3486 )
3487 3487 % b', '.join(b'"%s"' % e for e in compengines),
3488 3488 hint=_(
3489 3489 b'run "hg debuginstall" to list available '
3490 3490 b'compression engines'
3491 3491 ),
3492 3492 )
3493 3493
3494 3494 # zlib is the historical default and doesn't need an explicit requirement.
3495 3495 if compengine == b'zstd':
3496 3496 requirements.add(b'revlog-compression-zstd')
3497 3497 elif compengine != b'zlib':
3498 3498 requirements.add(b'exp-compression-%s' % compengine)
3499 3499
3500 3500 if scmutil.gdinitconfig(ui):
3501 3501 requirements.add(requirementsmod.GENERALDELTA_REQUIREMENT)
3502 3502 if ui.configbool(b'format', b'sparse-revlog'):
3503 3503 requirements.add(requirementsmod.SPARSEREVLOG_REQUIREMENT)
3504 3504
3505 3505 # experimental config: format.exp-use-side-data
3506 3506 if ui.configbool(b'format', b'exp-use-side-data'):
3507 3507 requirements.discard(requirementsmod.REVLOGV1_REQUIREMENT)
3508 3508 requirements.add(requirementsmod.REVLOGV2_REQUIREMENT)
3509 3509 requirements.add(requirementsmod.SIDEDATA_REQUIREMENT)
3510 3510 # experimental config: format.exp-use-copies-side-data-changeset
3511 3511 if ui.configbool(b'format', b'exp-use-copies-side-data-changeset'):
3512 3512 requirements.discard(requirementsmod.REVLOGV1_REQUIREMENT)
3513 3513 requirements.add(requirementsmod.REVLOGV2_REQUIREMENT)
3514 3514 requirements.add(requirementsmod.SIDEDATA_REQUIREMENT)
3515 3515 requirements.add(requirementsmod.COPIESSDC_REQUIREMENT)
3516 3516 if ui.configbool(b'experimental', b'treemanifest'):
3517 3517 requirements.add(requirementsmod.TREEMANIFEST_REQUIREMENT)
3518 3518
3519 3519 revlogv2 = ui.config(b'experimental', b'revlogv2')
3520 3520 if revlogv2 == b'enable-unstable-format-and-corrupt-my-data':
3521 3521 requirements.discard(requirementsmod.REVLOGV1_REQUIREMENT)
3522 3522 # generaldelta is implied by revlogv2.
3523 3523 requirements.discard(requirementsmod.GENERALDELTA_REQUIREMENT)
3524 3524 requirements.add(requirementsmod.REVLOGV2_REQUIREMENT)
3525 3525 # experimental config: format.internal-phase
3526 3526 if ui.configbool(b'format', b'internal-phase'):
3527 3527 requirements.add(requirementsmod.INTERNAL_PHASE_REQUIREMENT)
3528 3528
3529 3529 if createopts.get(b'narrowfiles'):
3530 3530 requirements.add(requirementsmod.NARROW_REQUIREMENT)
3531 3531
3532 3532 if createopts.get(b'lfs'):
3533 3533 requirements.add(b'lfs')
3534 3534
3535 3535 if ui.configbool(b'format', b'bookmarks-in-store'):
3536 3536 requirements.add(bookmarks.BOOKMARKS_IN_STORE_REQUIREMENT)
3537 3537
3538 3538 if ui.configbool(b'format', b'use-persistent-nodemap'):
3539 3539 requirements.add(requirementsmod.NODEMAP_REQUIREMENT)
3540 3540
3541 3541 # if share-safe is enabled, let's create the new repository with the new
3542 3542 # requirement
3543 3543 if ui.configbool(b'format', b'use-share-safe'):
3544 3544 requirements.add(requirementsmod.SHARESAFE_REQUIREMENT)
3545 3545
3546 3546 return requirements
3547 3547
3548 3548
3549 3549 def checkrequirementscompat(ui, requirements):
3550 3550 """Checks compatibility of repository requirements enabled and disabled.
3551 3551
3552 3552 Returns a set of requirements which needs to be dropped because dependend
3553 3553 requirements are not enabled. Also warns users about it"""
3554 3554
3555 3555 dropped = set()
3556 3556
3557 3557 if requirementsmod.STORE_REQUIREMENT not in requirements:
3558 3558 if bookmarks.BOOKMARKS_IN_STORE_REQUIREMENT in requirements:
3559 3559 ui.warn(
3560 3560 _(
3561 3561 b'ignoring enabled \'format.bookmarks-in-store\' config '
3562 3562 b'beacuse it is incompatible with disabled '
3563 3563 b'\'format.usestore\' config\n'
3564 3564 )
3565 3565 )
3566 3566 dropped.add(bookmarks.BOOKMARKS_IN_STORE_REQUIREMENT)
3567 3567
3568 3568 if (
3569 3569 requirementsmod.SHARED_REQUIREMENT in requirements
3570 3570 or requirementsmod.RELATIVE_SHARED_REQUIREMENT in requirements
3571 3571 ):
3572 3572 raise error.Abort(
3573 3573 _(
3574 3574 b"cannot create shared repository as source was created"
3575 3575 b" with 'format.usestore' config disabled"
3576 3576 )
3577 3577 )
3578 3578
3579 3579 if requirementsmod.SHARESAFE_REQUIREMENT in requirements:
3580 3580 ui.warn(
3581 3581 _(
3582 3582 b"ignoring enabled 'format.use-share-safe' config because "
3583 3583 b"it is incompatible with disabled 'format.usestore'"
3584 3584 b" config\n"
3585 3585 )
3586 3586 )
3587 3587 dropped.add(requirementsmod.SHARESAFE_REQUIREMENT)
3588 3588
3589 3589 return dropped
3590 3590
3591 3591
3592 3592 def filterknowncreateopts(ui, createopts):
3593 3593 """Filters a dict of repo creation options against options that are known.
3594 3594
3595 3595 Receives a dict of repo creation options and returns a dict of those
3596 3596 options that we don't know how to handle.
3597 3597
3598 3598 This function is called as part of repository creation. If the
3599 3599 returned dict contains any items, repository creation will not
3600 3600 be allowed, as it means there was a request to create a repository
3601 3601 with options not recognized by loaded code.
3602 3602
3603 3603 Extensions can wrap this function to filter out creation options
3604 3604 they know how to handle.
3605 3605 """
3606 3606 known = {
3607 3607 b'backend',
3608 3608 b'lfs',
3609 3609 b'narrowfiles',
3610 3610 b'sharedrepo',
3611 3611 b'sharedrelative',
3612 3612 b'shareditems',
3613 3613 b'shallowfilestore',
3614 3614 }
3615 3615
3616 3616 return {k: v for k, v in createopts.items() if k not in known}
3617 3617
3618 3618
3619 3619 def createrepository(ui, path, createopts=None):
3620 3620 """Create a new repository in a vfs.
3621 3621
3622 3622 ``path`` path to the new repo's working directory.
3623 3623 ``createopts`` options for the new repository.
3624 3624
3625 3625 The following keys for ``createopts`` are recognized:
3626 3626
3627 3627 backend
3628 3628 The storage backend to use.
3629 3629 lfs
3630 3630 Repository will be created with ``lfs`` requirement. The lfs extension
3631 3631 will automatically be loaded when the repository is accessed.
3632 3632 narrowfiles
3633 3633 Set up repository to support narrow file storage.
3634 3634 sharedrepo
3635 3635 Repository object from which storage should be shared.
3636 3636 sharedrelative
3637 3637 Boolean indicating if the path to the shared repo should be
3638 3638 stored as relative. By default, the pointer to the "parent" repo
3639 3639 is stored as an absolute path.
3640 3640 shareditems
3641 3641 Set of items to share to the new repository (in addition to storage).
3642 3642 shallowfilestore
3643 3643 Indicates that storage for files should be shallow (not all ancestor
3644 3644 revisions are known).
3645 3645 """
3646 3646 createopts = defaultcreateopts(ui, createopts=createopts)
3647 3647
3648 3648 unknownopts = filterknowncreateopts(ui, createopts)
3649 3649
3650 3650 if not isinstance(unknownopts, dict):
3651 3651 raise error.ProgrammingError(
3652 3652 b'filterknowncreateopts() did not return a dict'
3653 3653 )
3654 3654
3655 3655 if unknownopts:
3656 3656 raise error.Abort(
3657 3657 _(
3658 3658 b'unable to create repository because of unknown '
3659 3659 b'creation option: %s'
3660 3660 )
3661 3661 % b', '.join(sorted(unknownopts)),
3662 3662 hint=_(b'is a required extension not loaded?'),
3663 3663 )
3664 3664
3665 3665 requirements = newreporequirements(ui, createopts=createopts)
3666 3666 requirements -= checkrequirementscompat(ui, requirements)
3667 3667
3668 3668 wdirvfs = vfsmod.vfs(path, expandpath=True, realpath=True)
3669 3669
3670 3670 hgvfs = vfsmod.vfs(wdirvfs.join(b'.hg'))
3671 3671 if hgvfs.exists():
3672 3672 raise error.RepoError(_(b'repository %s already exists') % path)
3673 3673
3674 3674 if b'sharedrepo' in createopts:
3675 3675 sharedpath = createopts[b'sharedrepo'].sharedpath
3676 3676
3677 3677 if createopts.get(b'sharedrelative'):
3678 3678 try:
3679 3679 sharedpath = os.path.relpath(sharedpath, hgvfs.base)
3680 3680 sharedpath = util.pconvert(sharedpath)
3681 3681 except (IOError, ValueError) as e:
3682 3682 # ValueError is raised on Windows if the drive letters differ
3683 3683 # on each path.
3684 3684 raise error.Abort(
3685 3685 _(b'cannot calculate relative path'),
3686 3686 hint=stringutil.forcebytestr(e),
3687 3687 )
3688 3688
3689 3689 if not wdirvfs.exists():
3690 3690 wdirvfs.makedirs()
3691 3691
3692 3692 hgvfs.makedir(notindexed=True)
3693 3693 if b'sharedrepo' not in createopts:
3694 3694 hgvfs.mkdir(b'cache')
3695 3695 hgvfs.mkdir(b'wcache')
3696 3696
3697 3697 has_store = requirementsmod.STORE_REQUIREMENT in requirements
3698 3698 if has_store and b'sharedrepo' not in createopts:
3699 3699 hgvfs.mkdir(b'store')
3700 3700
3701 3701 # We create an invalid changelog outside the store so very old
3702 3702 # Mercurial versions (which didn't know about the requirements
3703 3703 # file) encounter an error on reading the changelog. This
3704 3704 # effectively locks out old clients and prevents them from
3705 3705 # mucking with a repo in an unknown format.
3706 3706 #
3707 3707 # The revlog header has version 65535, which won't be recognized by
3708 3708 # such old clients.
3709 3709 hgvfs.append(
3710 3710 b'00changelog.i',
3711 3711 b'\0\0\xFF\xFF dummy changelog to prevent using the old repo '
3712 3712 b'layout',
3713 3713 )
3714 3714
3715 3715 # Filter the requirements into working copy and store ones
3716 3716 wcreq, storereq = scmutil.filterrequirements(requirements)
3717 3717 # write working copy ones
3718 3718 scmutil.writerequires(hgvfs, wcreq)
3719 3719 # If there are store requirements and the current repository
3720 3720 # is not a shared one, write stored requirements
3721 3721 # For new shared repository, we don't need to write the store
3722 3722 # requirements as they are already present in store requires
3723 3723 if storereq and b'sharedrepo' not in createopts:
3724 3724 storevfs = vfsmod.vfs(hgvfs.join(b'store'), cacheaudited=True)
3725 3725 scmutil.writerequires(storevfs, storereq)
3726 3726
3727 3727 # Write out file telling readers where to find the shared store.
3728 3728 if b'sharedrepo' in createopts:
3729 3729 hgvfs.write(b'sharedpath', sharedpath)
3730 3730
3731 3731 if createopts.get(b'shareditems'):
3732 3732 shared = b'\n'.join(sorted(createopts[b'shareditems'])) + b'\n'
3733 3733 hgvfs.write(b'shared', shared)
3734 3734
3735 3735
3736 3736 def poisonrepository(repo):
3737 3737 """Poison a repository instance so it can no longer be used."""
3738 3738 # Perform any cleanup on the instance.
3739 3739 repo.close()
3740 3740
3741 3741 # Our strategy is to replace the type of the object with one that
3742 3742 # has all attribute lookups result in error.
3743 3743 #
3744 3744 # But we have to allow the close() method because some constructors
3745 3745 # of repos call close() on repo references.
3746 3746 class poisonedrepository(object):
3747 3747 def __getattribute__(self, item):
3748 3748 if item == 'close':
3749 3749 return object.__getattribute__(self, item)
3750 3750
3751 3751 raise error.ProgrammingError(
3752 3752 b'repo instances should not be used after unshare'
3753 3753 )
3754 3754
3755 3755 def close(self):
3756 3756 pass
3757 3757
3758 3758 # We may have a repoview, which intercepts __setattr__. So be sure
3759 3759 # we operate at the lowest level possible.
3760 3760 object.__setattr__(repo, '__class__', poisonedrepository)
@@ -1,2386 +1,2385 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11 import itertools
12 12 import struct
13 13 import weakref
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 bin,
18 18 hex,
19 19 nullrev,
20 20 )
21 21 from .pycompat import getattr
22 22 from . import (
23 23 encoding,
24 24 error,
25 25 match as matchmod,
26 26 mdiff,
27 27 pathutil,
28 28 policy,
29 29 pycompat,
30 30 revlog,
31 31 util,
32 32 )
33 33 from .interfaces import (
34 34 repository,
35 35 util as interfaceutil,
36 36 )
37 37 from .revlogutils import (
38 38 constants as revlog_constants,
39 39 )
40 40
41 41 parsers = policy.importmod('parsers')
42 42 propertycache = util.propertycache
43 43
44 44 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
45 45 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
46 46
47 47
48 48 def _parse(nodelen, data):
49 49 # This method does a little bit of excessive-looking
50 50 # precondition checking. This is so that the behavior of this
51 51 # class exactly matches its C counterpart to try and help
52 52 # prevent surprise breakage for anyone that develops against
53 53 # the pure version.
54 54 if data and data[-1:] != b'\n':
55 55 raise ValueError(b'Manifest did not end in a newline.')
56 56 prev = None
57 57 for l in data.splitlines():
58 58 if prev is not None and prev > l:
59 59 raise ValueError(b'Manifest lines not in sorted order.')
60 60 prev = l
61 61 f, n = l.split(b'\0')
62 62 nl = len(n)
63 63 flags = n[-1:]
64 64 if flags in _manifestflags:
65 65 n = n[:-1]
66 66 nl -= 1
67 67 else:
68 68 flags = b''
69 69 if nl != 2 * nodelen:
70 70 raise ValueError(b'Invalid manifest line')
71 71
72 72 yield f, bin(n), flags
73 73
74 74
75 75 def _text(it):
76 76 files = []
77 77 lines = []
78 78 for f, n, fl in it:
79 79 files.append(f)
80 80 # if this is changed to support newlines in filenames,
81 81 # be sure to check the templates/ dir again (especially *-raw.tmpl)
82 82 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
83 83
84 84 _checkforbidden(files)
85 85 return b''.join(lines)
86 86
87 87
88 88 class lazymanifestiter(object):
89 89 def __init__(self, lm):
90 90 self.pos = 0
91 91 self.lm = lm
92 92
93 93 def __iter__(self):
94 94 return self
95 95
96 96 def next(self):
97 97 try:
98 98 data, pos = self.lm._get(self.pos)
99 99 except IndexError:
100 100 raise StopIteration
101 101 if pos == -1:
102 102 self.pos += 1
103 103 return data[0]
104 104 self.pos += 1
105 105 zeropos = data.find(b'\x00', pos)
106 106 return data[pos:zeropos]
107 107
108 108 __next__ = next
109 109
110 110
111 111 class lazymanifestiterentries(object):
112 112 def __init__(self, lm):
113 113 self.lm = lm
114 114 self.pos = 0
115 115
116 116 def __iter__(self):
117 117 return self
118 118
119 119 def next(self):
120 120 try:
121 121 data, pos = self.lm._get(self.pos)
122 122 except IndexError:
123 123 raise StopIteration
124 124 if pos == -1:
125 125 self.pos += 1
126 126 return data
127 127 zeropos = data.find(b'\x00', pos)
128 128 nlpos = data.find(b'\n', pos)
129 129 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
130 130 raise error.StorageError(b'Invalid manifest line')
131 131 flags = data[nlpos - 1 : nlpos]
132 132 if flags in _manifestflags:
133 133 hlen = nlpos - zeropos - 2
134 134 else:
135 135 hlen = nlpos - zeropos - 1
136 136 flags = b''
137 137 if hlen != 2 * self.lm._nodelen:
138 138 raise error.StorageError(b'Invalid manifest line')
139 139 hashval = unhexlify(
140 140 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
141 141 )
142 142 self.pos += 1
143 143 return (data[pos:zeropos], hashval, flags)
144 144
145 145 __next__ = next
146 146
147 147
148 148 def unhexlify(data, extra, pos, length):
149 149 s = bin(data[pos : pos + length])
150 150 if extra:
151 151 s += chr(extra & 0xFF)
152 152 return s
153 153
154 154
155 155 def _cmp(a, b):
156 156 return (a > b) - (a < b)
157 157
158 158
159 159 _manifestflags = {b'', b'l', b't', b'x'}
160 160
161 161
162 162 class _lazymanifest(object):
163 163 """A pure python manifest backed by a byte string. It is supplimented with
164 164 internal lists as it is modified, until it is compacted back to a pure byte
165 165 string.
166 166
167 167 ``data`` is the initial manifest data.
168 168
169 169 ``positions`` is a list of offsets, one per manifest entry. Positive
170 170 values are offsets into ``data``, negative values are offsets into the
171 171 ``extradata`` list. When an entry is removed, its entry is dropped from
172 172 ``positions``. The values are encoded such that when walking the list and
173 173 indexing into ``data`` or ``extradata`` as appropriate, the entries are
174 174 sorted by filename.
175 175
176 176 ``extradata`` is a list of (key, hash, flags) for entries that were added or
177 177 modified since the manifest was created or compacted.
178 178 """
179 179
180 180 def __init__(
181 181 self,
182 182 nodelen,
183 183 data,
184 184 positions=None,
185 185 extrainfo=None,
186 186 extradata=None,
187 187 hasremovals=False,
188 188 ):
189 189 self._nodelen = nodelen
190 190 if positions is None:
191 191 self.positions = self.findlines(data)
192 192 self.extrainfo = [0] * len(self.positions)
193 193 self.data = data
194 194 self.extradata = []
195 195 self.hasremovals = False
196 196 else:
197 197 self.positions = positions[:]
198 198 self.extrainfo = extrainfo[:]
199 199 self.extradata = extradata[:]
200 200 self.data = data
201 201 self.hasremovals = hasremovals
202 202
203 203 def findlines(self, data):
204 204 if not data:
205 205 return []
206 206 pos = data.find(b"\n")
207 207 if pos == -1 or data[-1:] != b'\n':
208 208 raise ValueError(b"Manifest did not end in a newline.")
209 209 positions = [0]
210 210 prev = data[: data.find(b'\x00')]
211 211 while pos < len(data) - 1 and pos != -1:
212 212 positions.append(pos + 1)
213 213 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
214 214 if nexts < prev:
215 215 raise ValueError(b"Manifest lines not in sorted order.")
216 216 prev = nexts
217 217 pos = data.find(b"\n", pos + 1)
218 218 return positions
219 219
220 220 def _get(self, index):
221 221 # get the position encoded in pos:
222 222 # positive number is an index in 'data'
223 223 # negative number is in extrapieces
224 224 pos = self.positions[index]
225 225 if pos >= 0:
226 226 return self.data, pos
227 227 return self.extradata[-pos - 1], -1
228 228
229 229 def _getkey(self, pos):
230 230 if pos >= 0:
231 231 return self.data[pos : self.data.find(b'\x00', pos + 1)]
232 232 return self.extradata[-pos - 1][0]
233 233
234 234 def bsearch(self, key):
235 235 first = 0
236 236 last = len(self.positions) - 1
237 237
238 238 while first <= last:
239 239 midpoint = (first + last) // 2
240 240 nextpos = self.positions[midpoint]
241 241 candidate = self._getkey(nextpos)
242 242 r = _cmp(key, candidate)
243 243 if r == 0:
244 244 return midpoint
245 245 else:
246 246 if r < 0:
247 247 last = midpoint - 1
248 248 else:
249 249 first = midpoint + 1
250 250 return -1
251 251
252 252 def bsearch2(self, key):
253 253 # same as the above, but will always return the position
254 254 # done for performance reasons
255 255 first = 0
256 256 last = len(self.positions) - 1
257 257
258 258 while first <= last:
259 259 midpoint = (first + last) // 2
260 260 nextpos = self.positions[midpoint]
261 261 candidate = self._getkey(nextpos)
262 262 r = _cmp(key, candidate)
263 263 if r == 0:
264 264 return (midpoint, True)
265 265 else:
266 266 if r < 0:
267 267 last = midpoint - 1
268 268 else:
269 269 first = midpoint + 1
270 270 return (first, False)
271 271
272 272 def __contains__(self, key):
273 273 return self.bsearch(key) != -1
274 274
275 275 def __getitem__(self, key):
276 276 if not isinstance(key, bytes):
277 277 raise TypeError(b"getitem: manifest keys must be a bytes.")
278 278 needle = self.bsearch(key)
279 279 if needle == -1:
280 280 raise KeyError
281 281 data, pos = self._get(needle)
282 282 if pos == -1:
283 283 return (data[1], data[2])
284 284 zeropos = data.find(b'\x00', pos)
285 285 nlpos = data.find(b'\n', zeropos)
286 286 assert 0 <= needle <= len(self.positions)
287 287 assert len(self.extrainfo) == len(self.positions)
288 288 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
289 289 raise error.StorageError(b'Invalid manifest line')
290 290 hlen = nlpos - zeropos - 1
291 291 flags = data[nlpos - 1 : nlpos]
292 292 if flags in _manifestflags:
293 293 hlen -= 1
294 294 else:
295 295 flags = b''
296 296 if hlen != 2 * self._nodelen:
297 297 raise error.StorageError(b'Invalid manifest line')
298 298 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
299 299 return (hashval, flags)
300 300
301 301 def __delitem__(self, key):
302 302 needle, found = self.bsearch2(key)
303 303 if not found:
304 304 raise KeyError
305 305 cur = self.positions[needle]
306 306 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
307 307 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
308 308 if cur >= 0:
309 309 # This does NOT unsort the list as far as the search functions are
310 310 # concerned, as they only examine lines mapped by self.positions.
311 311 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
312 312 self.hasremovals = True
313 313
314 314 def __setitem__(self, key, value):
315 315 if not isinstance(key, bytes):
316 316 raise TypeError(b"setitem: manifest keys must be a byte string.")
317 317 if not isinstance(value, tuple) or len(value) != 2:
318 318 raise TypeError(
319 319 b"Manifest values must be a tuple of (node, flags)."
320 320 )
321 321 hashval = value[0]
322 322 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
323 323 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
324 324 flags = value[1]
325 325 if not isinstance(flags, bytes) or len(flags) > 1:
326 326 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
327 327 needle, found = self.bsearch2(key)
328 328 if found:
329 329 # put the item
330 330 pos = self.positions[needle]
331 331 if pos < 0:
332 332 self.extradata[-pos - 1] = (key, hashval, value[1])
333 333 else:
334 334 # just don't bother
335 335 self.extradata.append((key, hashval, value[1]))
336 336 self.positions[needle] = -len(self.extradata)
337 337 else:
338 338 # not found, put it in with extra positions
339 339 self.extradata.append((key, hashval, value[1]))
340 340 self.positions = (
341 341 self.positions[:needle]
342 342 + [-len(self.extradata)]
343 343 + self.positions[needle:]
344 344 )
345 345 self.extrainfo = (
346 346 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
347 347 )
348 348
349 349 def copy(self):
350 350 # XXX call _compact like in C?
351 351 return _lazymanifest(
352 352 self._nodelen,
353 353 self.data,
354 354 self.positions,
355 355 self.extrainfo,
356 356 self.extradata,
357 357 self.hasremovals,
358 358 )
359 359
360 360 def _compact(self):
361 361 # hopefully not called TOO often
362 362 if len(self.extradata) == 0 and not self.hasremovals:
363 363 return
364 364 l = []
365 365 i = 0
366 366 offset = 0
367 367 self.extrainfo = [0] * len(self.positions)
368 368 while i < len(self.positions):
369 369 if self.positions[i] >= 0:
370 370 cur = self.positions[i]
371 371 last_cut = cur
372 372
373 373 # Collect all contiguous entries in the buffer at the current
374 374 # offset, breaking out only for added/modified items held in
375 375 # extradata, or a deleted line prior to the next position.
376 376 while True:
377 377 self.positions[i] = offset
378 378 i += 1
379 379 if i == len(self.positions) or self.positions[i] < 0:
380 380 break
381 381
382 382 # A removed file has no positions[] entry, but does have an
383 383 # overwritten first byte. Break out and find the end of the
384 384 # current good entry/entries if there is a removed file
385 385 # before the next position.
386 386 if (
387 387 self.hasremovals
388 388 and self.data.find(b'\n\x00', cur, self.positions[i])
389 389 != -1
390 390 ):
391 391 break
392 392
393 393 offset += self.positions[i] - cur
394 394 cur = self.positions[i]
395 395 end_cut = self.data.find(b'\n', cur)
396 396 if end_cut != -1:
397 397 end_cut += 1
398 398 offset += end_cut - cur
399 399 l.append(self.data[last_cut:end_cut])
400 400 else:
401 401 while i < len(self.positions) and self.positions[i] < 0:
402 402 cur = self.positions[i]
403 403 t = self.extradata[-cur - 1]
404 404 l.append(self._pack(t))
405 405 self.positions[i] = offset
406 406 # Hashes are either 20 bytes (old sha1s) or 32
407 407 # bytes (new non-sha1).
408 408 hlen = 20
409 409 if len(t[1]) > 25:
410 410 hlen = 32
411 411 if len(t[1]) > hlen:
412 412 self.extrainfo[i] = ord(t[1][hlen + 1])
413 413 offset += len(l[-1])
414 414 i += 1
415 415 self.data = b''.join(l)
416 416 self.hasremovals = False
417 417 self.extradata = []
418 418
419 419 def _pack(self, d):
420 420 n = d[1]
421 421 assert len(n) in (20, 32)
422 422 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
423 423
424 424 def text(self):
425 425 self._compact()
426 426 return self.data
427 427
428 428 def diff(self, m2, clean=False):
429 429 '''Finds changes between the current manifest and m2.'''
430 430 # XXX think whether efficiency matters here
431 431 diff = {}
432 432
433 433 for fn, e1, flags in self.iterentries():
434 434 if fn not in m2:
435 435 diff[fn] = (e1, flags), (None, b'')
436 436 else:
437 437 e2 = m2[fn]
438 438 if (e1, flags) != e2:
439 439 diff[fn] = (e1, flags), e2
440 440 elif clean:
441 441 diff[fn] = None
442 442
443 443 for fn, e2, flags in m2.iterentries():
444 444 if fn not in self:
445 445 diff[fn] = (None, b''), (e2, flags)
446 446
447 447 return diff
448 448
449 449 def iterentries(self):
450 450 return lazymanifestiterentries(self)
451 451
452 452 def iterkeys(self):
453 453 return lazymanifestiter(self)
454 454
455 455 def __iter__(self):
456 456 return lazymanifestiter(self)
457 457
458 458 def __len__(self):
459 459 return len(self.positions)
460 460
461 461 def filtercopy(self, filterfn):
462 462 # XXX should be optimized
463 463 c = _lazymanifest(self._nodelen, b'')
464 464 for f, n, fl in self.iterentries():
465 465 if filterfn(f):
466 466 c[f] = n, fl
467 467 return c
468 468
469 469
470 470 try:
471 471 _lazymanifest = parsers.lazymanifest
472 472 except AttributeError:
473 473 pass
474 474
475 475
476 476 @interfaceutil.implementer(repository.imanifestdict)
477 477 class manifestdict(object):
478 478 def __init__(self, nodelen, data=b''):
479 479 self._nodelen = nodelen
480 480 self._lm = _lazymanifest(nodelen, data)
481 481
482 482 def __getitem__(self, key):
483 483 return self._lm[key][0]
484 484
485 485 def find(self, key):
486 486 return self._lm[key]
487 487
488 488 def __len__(self):
489 489 return len(self._lm)
490 490
491 491 def __nonzero__(self):
492 492 # nonzero is covered by the __len__ function, but implementing it here
493 493 # makes it easier for extensions to override.
494 494 return len(self._lm) != 0
495 495
496 496 __bool__ = __nonzero__
497 497
498 498 def __setitem__(self, key, node):
499 499 self._lm[key] = node, self.flags(key)
500 500
501 501 def __contains__(self, key):
502 502 if key is None:
503 503 return False
504 504 return key in self._lm
505 505
506 506 def __delitem__(self, key):
507 507 del self._lm[key]
508 508
509 509 def __iter__(self):
510 510 return self._lm.__iter__()
511 511
512 512 def iterkeys(self):
513 513 return self._lm.iterkeys()
514 514
515 515 def keys(self):
516 516 return list(self.iterkeys())
517 517
518 518 def filesnotin(self, m2, match=None):
519 519 '''Set of files in this manifest that are not in the other'''
520 520 if match is not None:
521 521 match = matchmod.badmatch(match, lambda path, msg: None)
522 522 sm2 = set(m2.walk(match))
523 523 return {f for f in self.walk(match) if f not in sm2}
524 524 return {f for f in self if f not in m2}
525 525
526 526 @propertycache
527 527 def _dirs(self):
528 528 return pathutil.dirs(self)
529 529
530 530 def dirs(self):
531 531 return self._dirs
532 532
533 533 def hasdir(self, dir):
534 534 return dir in self._dirs
535 535
536 536 def _filesfastpath(self, match):
537 537 """Checks whether we can correctly and quickly iterate over matcher
538 538 files instead of over manifest files."""
539 539 files = match.files()
540 540 return len(files) < 100 and (
541 541 match.isexact()
542 542 or (match.prefix() and all(fn in self for fn in files))
543 543 )
544 544
545 545 def walk(self, match):
546 546 """Generates matching file names.
547 547
548 548 Equivalent to manifest.matches(match).iterkeys(), but without creating
549 549 an entirely new manifest.
550 550
551 551 It also reports nonexistent files by marking them bad with match.bad().
552 552 """
553 553 if match.always():
554 554 for f in iter(self):
555 555 yield f
556 556 return
557 557
558 558 fset = set(match.files())
559 559
560 560 # avoid the entire walk if we're only looking for specific files
561 561 if self._filesfastpath(match):
562 562 for fn in sorted(fset):
563 563 if fn in self:
564 564 yield fn
565 565 return
566 566
567 567 for fn in self:
568 568 if fn in fset:
569 569 # specified pattern is the exact name
570 570 fset.remove(fn)
571 571 if match(fn):
572 572 yield fn
573 573
574 574 # for dirstate.walk, files=[''] means "walk the whole tree".
575 575 # follow that here, too
576 576 fset.discard(b'')
577 577
578 578 for fn in sorted(fset):
579 579 if not self.hasdir(fn):
580 580 match.bad(fn, None)
581 581
582 582 def _matches(self, match):
583 583 '''generate a new manifest filtered by the match argument'''
584 584 if match.always():
585 585 return self.copy()
586 586
587 587 if self._filesfastpath(match):
588 588 m = manifestdict(self._nodelen)
589 589 lm = self._lm
590 590 for fn in match.files():
591 591 if fn in lm:
592 592 m._lm[fn] = lm[fn]
593 593 return m
594 594
595 595 m = manifestdict(self._nodelen)
596 596 m._lm = self._lm.filtercopy(match)
597 597 return m
598 598
599 599 def diff(self, m2, match=None, clean=False):
600 600 """Finds changes between the current manifest and m2.
601 601
602 602 Args:
603 603 m2: the manifest to which this manifest should be compared.
604 604 clean: if true, include files unchanged between these manifests
605 605 with a None value in the returned dictionary.
606 606
607 607 The result is returned as a dict with filename as key and
608 608 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
609 609 nodeid in the current/other manifest and fl1/fl2 is the flag
610 610 in the current/other manifest. Where the file does not exist,
611 611 the nodeid will be None and the flags will be the empty
612 612 string.
613 613 """
614 614 if match:
615 615 m1 = self._matches(match)
616 616 m2 = m2._matches(match)
617 617 return m1.diff(m2, clean=clean)
618 618 return self._lm.diff(m2._lm, clean)
619 619
620 620 def setflag(self, key, flag):
621 621 if flag not in _manifestflags:
622 622 raise TypeError(b"Invalid manifest flag set.")
623 623 self._lm[key] = self[key], flag
624 624
625 625 def get(self, key, default=None):
626 626 try:
627 627 return self._lm[key][0]
628 628 except KeyError:
629 629 return default
630 630
631 631 def flags(self, key):
632 632 try:
633 633 return self._lm[key][1]
634 634 except KeyError:
635 635 return b''
636 636
637 637 def copy(self):
638 638 c = manifestdict(self._nodelen)
639 639 c._lm = self._lm.copy()
640 640 return c
641 641
642 642 def items(self):
643 643 return (x[:2] for x in self._lm.iterentries())
644 644
645 645 def iteritems(self):
646 646 return (x[:2] for x in self._lm.iterentries())
647 647
648 648 def iterentries(self):
649 649 return self._lm.iterentries()
650 650
651 651 def text(self):
652 652 # most likely uses native version
653 653 return self._lm.text()
654 654
655 655 def fastdelta(self, base, changes):
656 656 """Given a base manifest text as a bytearray and a list of changes
657 657 relative to that text, compute a delta that can be used by revlog.
658 658 """
659 659 delta = []
660 660 dstart = None
661 661 dend = None
662 662 dline = [b""]
663 663 start = 0
664 664 # zero copy representation of base as a buffer
665 665 addbuf = util.buffer(base)
666 666
667 667 changes = list(changes)
668 668 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
669 669 # start with a readonly loop that finds the offset of
670 670 # each line and creates the deltas
671 671 for f, todelete in changes:
672 672 # bs will either be the index of the item or the insert point
673 673 start, end = _msearch(addbuf, f, start)
674 674 if not todelete:
675 675 h, fl = self._lm[f]
676 676 l = b"%s\0%s%s\n" % (f, hex(h), fl)
677 677 else:
678 678 if start == end:
679 679 # item we want to delete was not found, error out
680 680 raise AssertionError(
681 681 _(b"failed to remove %s from manifest") % f
682 682 )
683 683 l = b""
684 684 if dstart is not None and dstart <= start and dend >= start:
685 685 if dend < end:
686 686 dend = end
687 687 if l:
688 688 dline.append(l)
689 689 else:
690 690 if dstart is not None:
691 691 delta.append([dstart, dend, b"".join(dline)])
692 692 dstart = start
693 693 dend = end
694 694 dline = [l]
695 695
696 696 if dstart is not None:
697 697 delta.append([dstart, dend, b"".join(dline)])
698 698 # apply the delta to the base, and get a delta for addrevision
699 699 deltatext, arraytext = _addlistdelta(base, delta)
700 700 else:
701 701 # For large changes, it's much cheaper to just build the text and
702 702 # diff it.
703 703 arraytext = bytearray(self.text())
704 704 deltatext = mdiff.textdiff(
705 705 util.buffer(base), util.buffer(arraytext)
706 706 )
707 707
708 708 return arraytext, deltatext
709 709
710 710
711 711 def _msearch(m, s, lo=0, hi=None):
712 712 """return a tuple (start, end) that says where to find s within m.
713 713
714 714 If the string is found m[start:end] are the line containing
715 715 that string. If start == end the string was not found and
716 716 they indicate the proper sorted insertion point.
717 717
718 718 m should be a buffer, a memoryview or a byte string.
719 719 s is a byte string"""
720 720
721 721 def advance(i, c):
722 722 while i < lenm and m[i : i + 1] != c:
723 723 i += 1
724 724 return i
725 725
726 726 if not s:
727 727 return (lo, lo)
728 728 lenm = len(m)
729 729 if not hi:
730 730 hi = lenm
731 731 while lo < hi:
732 732 mid = (lo + hi) // 2
733 733 start = mid
734 734 while start > 0 and m[start - 1 : start] != b'\n':
735 735 start -= 1
736 736 end = advance(start, b'\0')
737 737 if bytes(m[start:end]) < s:
738 738 # we know that after the null there are 40 bytes of sha1
739 739 # this translates to the bisect lo = mid + 1
740 740 lo = advance(end + 40, b'\n') + 1
741 741 else:
742 742 # this translates to the bisect hi = mid
743 743 hi = start
744 744 end = advance(lo, b'\0')
745 745 found = m[lo:end]
746 746 if s == found:
747 747 # we know that after the null there are 40 bytes of sha1
748 748 end = advance(end + 40, b'\n')
749 749 return (lo, end + 1)
750 750 else:
751 751 return (lo, lo)
752 752
753 753
754 754 def _checkforbidden(l):
755 755 """Check filenames for illegal characters."""
756 756 for f in l:
757 757 if b'\n' in f or b'\r' in f:
758 758 raise error.StorageError(
759 759 _(b"'\\n' and '\\r' disallowed in filenames: %r")
760 760 % pycompat.bytestr(f)
761 761 )
762 762
763 763
764 764 # apply the changes collected during the bisect loop to our addlist
765 765 # return a delta suitable for addrevision
766 766 def _addlistdelta(addlist, x):
767 767 # for large addlist arrays, building a new array is cheaper
768 768 # than repeatedly modifying the existing one
769 769 currentposition = 0
770 770 newaddlist = bytearray()
771 771
772 772 for start, end, content in x:
773 773 newaddlist += addlist[currentposition:start]
774 774 if content:
775 775 newaddlist += bytearray(content)
776 776
777 777 currentposition = end
778 778
779 779 newaddlist += addlist[currentposition:]
780 780
781 781 deltatext = b"".join(
782 782 struct.pack(b">lll", start, end, len(content)) + content
783 783 for start, end, content in x
784 784 )
785 785 return deltatext, newaddlist
786 786
787 787
788 788 def _splittopdir(f):
789 789 if b'/' in f:
790 790 dir, subpath = f.split(b'/', 1)
791 791 return dir + b'/', subpath
792 792 else:
793 793 return b'', f
794 794
795 795
796 796 _noop = lambda s: None
797 797
798 798
799 799 @interfaceutil.implementer(repository.imanifestdict)
800 800 class treemanifest(object):
801 801 def __init__(self, nodeconstants, dir=b'', text=b''):
802 802 self._dir = dir
803 803 self.nodeconstants = nodeconstants
804 804 self._node = self.nodeconstants.nullid
805 805 self._nodelen = self.nodeconstants.nodelen
806 806 self._loadfunc = _noop
807 807 self._copyfunc = _noop
808 808 self._dirty = False
809 809 self._dirs = {}
810 810 self._lazydirs = {}
811 811 # Using _lazymanifest here is a little slower than plain old dicts
812 812 self._files = {}
813 813 self._flags = {}
814 814 if text:
815 815
816 816 def readsubtree(subdir, subm):
817 817 raise AssertionError(
818 818 b'treemanifest constructor only accepts flat manifests'
819 819 )
820 820
821 821 self.parse(text, readsubtree)
822 822 self._dirty = True # Mark flat manifest dirty after parsing
823 823
824 824 def _subpath(self, path):
825 825 return self._dir + path
826 826
827 827 def _loadalllazy(self):
828 828 selfdirs = self._dirs
829 829 subpath = self._subpath
830 830 for d, (node, readsubtree, docopy) in pycompat.iteritems(
831 831 self._lazydirs
832 832 ):
833 833 if docopy:
834 834 selfdirs[d] = readsubtree(subpath(d), node).copy()
835 835 else:
836 836 selfdirs[d] = readsubtree(subpath(d), node)
837 837 self._lazydirs = {}
838 838
839 839 def _loadlazy(self, d):
840 840 v = self._lazydirs.get(d)
841 841 if v:
842 842 node, readsubtree, docopy = v
843 843 if docopy:
844 844 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
845 845 else:
846 846 self._dirs[d] = readsubtree(self._subpath(d), node)
847 847 del self._lazydirs[d]
848 848
849 849 def _loadchildrensetlazy(self, visit):
850 850 if not visit:
851 851 return None
852 852 if visit == b'all' or visit == b'this':
853 853 self._loadalllazy()
854 854 return None
855 855
856 856 loadlazy = self._loadlazy
857 857 for k in visit:
858 858 loadlazy(k + b'/')
859 859 return visit
860 860
861 861 def _loaddifflazy(self, t1, t2):
862 862 """load items in t1 and t2 if they're needed for diffing.
863 863
864 864 The criteria currently is:
865 865 - if it's not present in _lazydirs in either t1 or t2, load it in the
866 866 other (it may already be loaded or it may not exist, doesn't matter)
867 867 - if it's present in _lazydirs in both, compare the nodeid; if it
868 868 differs, load it in both
869 869 """
870 870 toloadlazy = []
871 871 for d, v1 in pycompat.iteritems(t1._lazydirs):
872 872 v2 = t2._lazydirs.get(d)
873 873 if not v2 or v2[0] != v1[0]:
874 874 toloadlazy.append(d)
875 875 for d, v1 in pycompat.iteritems(t2._lazydirs):
876 876 if d not in t1._lazydirs:
877 877 toloadlazy.append(d)
878 878
879 879 for d in toloadlazy:
880 880 t1._loadlazy(d)
881 881 t2._loadlazy(d)
882 882
883 883 def __len__(self):
884 884 self._load()
885 885 size = len(self._files)
886 886 self._loadalllazy()
887 887 for m in self._dirs.values():
888 888 size += m.__len__()
889 889 return size
890 890
891 891 def __nonzero__(self):
892 892 # Faster than "__len() != 0" since it avoids loading sub-manifests
893 893 return not self._isempty()
894 894
895 895 __bool__ = __nonzero__
896 896
897 897 def _isempty(self):
898 898 self._load() # for consistency; already loaded by all callers
899 899 # See if we can skip loading everything.
900 900 if self._files or (
901 901 self._dirs and any(not m._isempty() for m in self._dirs.values())
902 902 ):
903 903 return False
904 904 self._loadalllazy()
905 905 return not self._dirs or all(m._isempty() for m in self._dirs.values())
906 906
907 907 @encoding.strmethod
908 908 def __repr__(self):
909 909 return (
910 910 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
911 911 % (
912 912 self._dir,
913 913 hex(self._node),
914 914 bool(self._loadfunc is _noop),
915 915 self._dirty,
916 916 id(self),
917 917 )
918 918 )
919 919
920 920 def dir(self):
921 921 """The directory that this tree manifest represents, including a
922 922 trailing '/'. Empty string for the repo root directory."""
923 923 return self._dir
924 924
925 925 def node(self):
926 926 """This node of this instance. nullid for unsaved instances. Should
927 927 be updated when the instance is read or written from a revlog.
928 928 """
929 929 assert not self._dirty
930 930 return self._node
931 931
932 932 def setnode(self, node):
933 933 self._node = node
934 934 self._dirty = False
935 935
936 936 def iterentries(self):
937 937 self._load()
938 938 self._loadalllazy()
939 939 for p, n in sorted(
940 940 itertools.chain(self._dirs.items(), self._files.items())
941 941 ):
942 942 if p in self._files:
943 943 yield self._subpath(p), n, self._flags.get(p, b'')
944 944 else:
945 945 for x in n.iterentries():
946 946 yield x
947 947
948 948 def items(self):
949 949 self._load()
950 950 self._loadalllazy()
951 951 for p, n in sorted(
952 952 itertools.chain(self._dirs.items(), self._files.items())
953 953 ):
954 954 if p in self._files:
955 955 yield self._subpath(p), n
956 956 else:
957 957 for f, sn in pycompat.iteritems(n):
958 958 yield f, sn
959 959
960 960 iteritems = items
961 961
962 962 def iterkeys(self):
963 963 self._load()
964 964 self._loadalllazy()
965 965 for p in sorted(itertools.chain(self._dirs, self._files)):
966 966 if p in self._files:
967 967 yield self._subpath(p)
968 968 else:
969 969 for f in self._dirs[p]:
970 970 yield f
971 971
972 972 def keys(self):
973 973 return list(self.iterkeys())
974 974
975 975 def __iter__(self):
976 976 return self.iterkeys()
977 977
978 978 def __contains__(self, f):
979 979 if f is None:
980 980 return False
981 981 self._load()
982 982 dir, subpath = _splittopdir(f)
983 983 if dir:
984 984 self._loadlazy(dir)
985 985
986 986 if dir not in self._dirs:
987 987 return False
988 988
989 989 return self._dirs[dir].__contains__(subpath)
990 990 else:
991 991 return f in self._files
992 992
993 993 def get(self, f, default=None):
994 994 self._load()
995 995 dir, subpath = _splittopdir(f)
996 996 if dir:
997 997 self._loadlazy(dir)
998 998
999 999 if dir not in self._dirs:
1000 1000 return default
1001 1001 return self._dirs[dir].get(subpath, default)
1002 1002 else:
1003 1003 return self._files.get(f, default)
1004 1004
1005 1005 def __getitem__(self, f):
1006 1006 self._load()
1007 1007 dir, subpath = _splittopdir(f)
1008 1008 if dir:
1009 1009 self._loadlazy(dir)
1010 1010
1011 1011 return self._dirs[dir].__getitem__(subpath)
1012 1012 else:
1013 1013 return self._files[f]
1014 1014
1015 1015 def flags(self, f):
1016 1016 self._load()
1017 1017 dir, subpath = _splittopdir(f)
1018 1018 if dir:
1019 1019 self._loadlazy(dir)
1020 1020
1021 1021 if dir not in self._dirs:
1022 1022 return b''
1023 1023 return self._dirs[dir].flags(subpath)
1024 1024 else:
1025 1025 if f in self._lazydirs or f in self._dirs:
1026 1026 return b''
1027 1027 return self._flags.get(f, b'')
1028 1028
1029 1029 def find(self, f):
1030 1030 self._load()
1031 1031 dir, subpath = _splittopdir(f)
1032 1032 if dir:
1033 1033 self._loadlazy(dir)
1034 1034
1035 1035 return self._dirs[dir].find(subpath)
1036 1036 else:
1037 1037 return self._files[f], self._flags.get(f, b'')
1038 1038
1039 1039 def __delitem__(self, f):
1040 1040 self._load()
1041 1041 dir, subpath = _splittopdir(f)
1042 1042 if dir:
1043 1043 self._loadlazy(dir)
1044 1044
1045 1045 self._dirs[dir].__delitem__(subpath)
1046 1046 # If the directory is now empty, remove it
1047 1047 if self._dirs[dir]._isempty():
1048 1048 del self._dirs[dir]
1049 1049 else:
1050 1050 del self._files[f]
1051 1051 if f in self._flags:
1052 1052 del self._flags[f]
1053 1053 self._dirty = True
1054 1054
1055 1055 def __setitem__(self, f, n):
1056 1056 assert n is not None
1057 1057 self._load()
1058 1058 dir, subpath = _splittopdir(f)
1059 1059 if dir:
1060 1060 self._loadlazy(dir)
1061 1061 if dir not in self._dirs:
1062 1062 self._dirs[dir] = treemanifest(
1063 1063 self.nodeconstants, self._subpath(dir)
1064 1064 )
1065 1065 self._dirs[dir].__setitem__(subpath, n)
1066 1066 else:
1067 1067 # manifest nodes are either 20 bytes or 32 bytes,
1068 1068 # depending on the hash in use. Assert this as historically
1069 1069 # sometimes extra bytes were added.
1070 1070 assert len(n) in (20, 32)
1071 1071 self._files[f] = n
1072 1072 self._dirty = True
1073 1073
1074 1074 def _load(self):
1075 1075 if self._loadfunc is not _noop:
1076 1076 lf, self._loadfunc = self._loadfunc, _noop
1077 1077 lf(self)
1078 1078 elif self._copyfunc is not _noop:
1079 1079 cf, self._copyfunc = self._copyfunc, _noop
1080 1080 cf(self)
1081 1081
1082 1082 def setflag(self, f, flags):
1083 1083 """Set the flags (symlink, executable) for path f."""
1084 1084 if flags not in _manifestflags:
1085 1085 raise TypeError(b"Invalid manifest flag set.")
1086 1086 self._load()
1087 1087 dir, subpath = _splittopdir(f)
1088 1088 if dir:
1089 1089 self._loadlazy(dir)
1090 1090 if dir not in self._dirs:
1091 1091 self._dirs[dir] = treemanifest(
1092 1092 self.nodeconstants, self._subpath(dir)
1093 1093 )
1094 1094 self._dirs[dir].setflag(subpath, flags)
1095 1095 else:
1096 1096 self._flags[f] = flags
1097 1097 self._dirty = True
1098 1098
1099 1099 def copy(self):
1100 1100 copy = treemanifest(self.nodeconstants, self._dir)
1101 1101 copy._node = self._node
1102 1102 copy._dirty = self._dirty
1103 1103 if self._copyfunc is _noop:
1104 1104
1105 1105 def _copyfunc(s):
1106 1106 self._load()
1107 1107 s._lazydirs = {
1108 1108 d: (n, r, True)
1109 1109 for d, (n, r, c) in pycompat.iteritems(self._lazydirs)
1110 1110 }
1111 1111 sdirs = s._dirs
1112 1112 for d, v in pycompat.iteritems(self._dirs):
1113 1113 sdirs[d] = v.copy()
1114 1114 s._files = dict.copy(self._files)
1115 1115 s._flags = dict.copy(self._flags)
1116 1116
1117 1117 if self._loadfunc is _noop:
1118 1118 _copyfunc(copy)
1119 1119 else:
1120 1120 copy._copyfunc = _copyfunc
1121 1121 else:
1122 1122 copy._copyfunc = self._copyfunc
1123 1123 return copy
1124 1124
1125 1125 def filesnotin(self, m2, match=None):
1126 1126 '''Set of files in this manifest that are not in the other'''
1127 1127 if match and not match.always():
1128 1128 m1 = self._matches(match)
1129 1129 m2 = m2._matches(match)
1130 1130 return m1.filesnotin(m2)
1131 1131
1132 1132 files = set()
1133 1133
1134 1134 def _filesnotin(t1, t2):
1135 1135 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1136 1136 return
1137 1137 t1._load()
1138 1138 t2._load()
1139 1139 self._loaddifflazy(t1, t2)
1140 1140 for d, m1 in pycompat.iteritems(t1._dirs):
1141 1141 if d in t2._dirs:
1142 1142 m2 = t2._dirs[d]
1143 1143 _filesnotin(m1, m2)
1144 1144 else:
1145 1145 files.update(m1.iterkeys())
1146 1146
1147 1147 for fn in t1._files:
1148 1148 if fn not in t2._files:
1149 1149 files.add(t1._subpath(fn))
1150 1150
1151 1151 _filesnotin(self, m2)
1152 1152 return files
1153 1153
1154 1154 @propertycache
1155 1155 def _alldirs(self):
1156 1156 return pathutil.dirs(self)
1157 1157
1158 1158 def dirs(self):
1159 1159 return self._alldirs
1160 1160
1161 1161 def hasdir(self, dir):
1162 1162 self._load()
1163 1163 topdir, subdir = _splittopdir(dir)
1164 1164 if topdir:
1165 1165 self._loadlazy(topdir)
1166 1166 if topdir in self._dirs:
1167 1167 return self._dirs[topdir].hasdir(subdir)
1168 1168 return False
1169 1169 dirslash = dir + b'/'
1170 1170 return dirslash in self._dirs or dirslash in self._lazydirs
1171 1171
1172 1172 def walk(self, match):
1173 1173 """Generates matching file names.
1174 1174
1175 1175 It also reports nonexistent files by marking them bad with match.bad().
1176 1176 """
1177 1177 if match.always():
1178 1178 for f in iter(self):
1179 1179 yield f
1180 1180 return
1181 1181
1182 1182 fset = set(match.files())
1183 1183
1184 1184 for fn in self._walk(match):
1185 1185 if fn in fset:
1186 1186 # specified pattern is the exact name
1187 1187 fset.remove(fn)
1188 1188 yield fn
1189 1189
1190 1190 # for dirstate.walk, files=[''] means "walk the whole tree".
1191 1191 # follow that here, too
1192 1192 fset.discard(b'')
1193 1193
1194 1194 for fn in sorted(fset):
1195 1195 if not self.hasdir(fn):
1196 1196 match.bad(fn, None)
1197 1197
1198 1198 def _walk(self, match):
1199 1199 '''Recursively generates matching file names for walk().'''
1200 1200 visit = match.visitchildrenset(self._dir[:-1])
1201 1201 if not visit:
1202 1202 return
1203 1203
1204 1204 # yield this dir's files and walk its submanifests
1205 1205 self._load()
1206 1206 visit = self._loadchildrensetlazy(visit)
1207 1207 for p in sorted(list(self._dirs) + list(self._files)):
1208 1208 if p in self._files:
1209 1209 fullp = self._subpath(p)
1210 1210 if match(fullp):
1211 1211 yield fullp
1212 1212 else:
1213 1213 if not visit or p[:-1] in visit:
1214 1214 for f in self._dirs[p]._walk(match):
1215 1215 yield f
1216 1216
1217 1217 def _matches(self, match):
1218 1218 """recursively generate a new manifest filtered by the match argument."""
1219 1219 if match.always():
1220 1220 return self.copy()
1221 1221 return self._matches_inner(match)
1222 1222
1223 1223 def _matches_inner(self, match):
1224 1224 if match.always():
1225 1225 return self.copy()
1226 1226
1227 1227 visit = match.visitchildrenset(self._dir[:-1])
1228 1228 if visit == b'all':
1229 1229 return self.copy()
1230 1230 ret = treemanifest(self.nodeconstants, self._dir)
1231 1231 if not visit:
1232 1232 return ret
1233 1233
1234 1234 self._load()
1235 1235 for fn in self._files:
1236 1236 # While visitchildrenset *usually* lists only subdirs, this is
1237 1237 # actually up to the matcher and may have some files in the set().
1238 1238 # If visit == 'this', we should obviously look at the files in this
1239 1239 # directory; if visit is a set, and fn is in it, we should inspect
1240 1240 # fn (but no need to inspect things not in the set).
1241 1241 if visit != b'this' and fn not in visit:
1242 1242 continue
1243 1243 fullp = self._subpath(fn)
1244 1244 # visitchildrenset isn't perfect, we still need to call the regular
1245 1245 # matcher code to further filter results.
1246 1246 if not match(fullp):
1247 1247 continue
1248 1248 ret._files[fn] = self._files[fn]
1249 1249 if fn in self._flags:
1250 1250 ret._flags[fn] = self._flags[fn]
1251 1251
1252 1252 visit = self._loadchildrensetlazy(visit)
1253 1253 for dir, subm in pycompat.iteritems(self._dirs):
1254 1254 if visit and dir[:-1] not in visit:
1255 1255 continue
1256 1256 m = subm._matches_inner(match)
1257 1257 if not m._isempty():
1258 1258 ret._dirs[dir] = m
1259 1259
1260 1260 if not ret._isempty():
1261 1261 ret._dirty = True
1262 1262 return ret
1263 1263
1264 1264 def fastdelta(self, base, changes):
1265 1265 raise FastdeltaUnavailable()
1266 1266
1267 1267 def diff(self, m2, match=None, clean=False):
1268 1268 """Finds changes between the current manifest and m2.
1269 1269
1270 1270 Args:
1271 1271 m2: the manifest to which this manifest should be compared.
1272 1272 clean: if true, include files unchanged between these manifests
1273 1273 with a None value in the returned dictionary.
1274 1274
1275 1275 The result is returned as a dict with filename as key and
1276 1276 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1277 1277 nodeid in the current/other manifest and fl1/fl2 is the flag
1278 1278 in the current/other manifest. Where the file does not exist,
1279 1279 the nodeid will be None and the flags will be the empty
1280 1280 string.
1281 1281 """
1282 1282 if match and not match.always():
1283 1283 m1 = self._matches(match)
1284 1284 m2 = m2._matches(match)
1285 1285 return m1.diff(m2, clean=clean)
1286 1286 result = {}
1287 1287 emptytree = treemanifest(self.nodeconstants)
1288 1288
1289 1289 def _iterativediff(t1, t2, stack):
1290 1290 """compares two tree manifests and append new tree-manifests which
1291 1291 needs to be compared to stack"""
1292 1292 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1293 1293 return
1294 1294 t1._load()
1295 1295 t2._load()
1296 1296 self._loaddifflazy(t1, t2)
1297 1297
1298 1298 for d, m1 in pycompat.iteritems(t1._dirs):
1299 1299 m2 = t2._dirs.get(d, emptytree)
1300 1300 stack.append((m1, m2))
1301 1301
1302 1302 for d, m2 in pycompat.iteritems(t2._dirs):
1303 1303 if d not in t1._dirs:
1304 1304 stack.append((emptytree, m2))
1305 1305
1306 1306 for fn, n1 in pycompat.iteritems(t1._files):
1307 1307 fl1 = t1._flags.get(fn, b'')
1308 1308 n2 = t2._files.get(fn, None)
1309 1309 fl2 = t2._flags.get(fn, b'')
1310 1310 if n1 != n2 or fl1 != fl2:
1311 1311 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1312 1312 elif clean:
1313 1313 result[t1._subpath(fn)] = None
1314 1314
1315 1315 for fn, n2 in pycompat.iteritems(t2._files):
1316 1316 if fn not in t1._files:
1317 1317 fl2 = t2._flags.get(fn, b'')
1318 1318 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1319 1319
1320 1320 stackls = []
1321 1321 _iterativediff(self, m2, stackls)
1322 1322 while stackls:
1323 1323 t1, t2 = stackls.pop()
1324 1324 # stackls is populated in the function call
1325 1325 _iterativediff(t1, t2, stackls)
1326 1326 return result
1327 1327
1328 1328 def unmodifiedsince(self, m2):
1329 1329 return not self._dirty and not m2._dirty and self._node == m2._node
1330 1330
1331 1331 def parse(self, text, readsubtree):
1332 1332 selflazy = self._lazydirs
1333 1333 for f, n, fl in _parse(self._nodelen, text):
1334 1334 if fl == b't':
1335 1335 f = f + b'/'
1336 1336 # False below means "doesn't need to be copied" and can use the
1337 1337 # cached value from readsubtree directly.
1338 1338 selflazy[f] = (n, readsubtree, False)
1339 1339 elif b'/' in f:
1340 1340 # This is a flat manifest, so use __setitem__ and setflag rather
1341 1341 # than assigning directly to _files and _flags, so we can
1342 1342 # assign a path in a subdirectory, and to mark dirty (compared
1343 1343 # to nullid).
1344 1344 self[f] = n
1345 1345 if fl:
1346 1346 self.setflag(f, fl)
1347 1347 else:
1348 1348 # Assigning to _files and _flags avoids marking as dirty,
1349 1349 # and should be a little faster.
1350 1350 self._files[f] = n
1351 1351 if fl:
1352 1352 self._flags[f] = fl
1353 1353
1354 1354 def text(self):
1355 1355 """Get the full data of this manifest as a bytestring."""
1356 1356 self._load()
1357 1357 return _text(self.iterentries())
1358 1358
1359 1359 def dirtext(self):
1360 1360 """Get the full data of this directory as a bytestring. Make sure that
1361 1361 any submanifests have been written first, so their nodeids are correct.
1362 1362 """
1363 1363 self._load()
1364 1364 flags = self.flags
1365 1365 lazydirs = [
1366 1366 (d[:-1], v[0], b't') for d, v in pycompat.iteritems(self._lazydirs)
1367 1367 ]
1368 1368 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1369 1369 files = [(f, self._files[f], flags(f)) for f in self._files]
1370 1370 return _text(sorted(dirs + files + lazydirs))
1371 1371
1372 1372 def read(self, gettext, readsubtree):
1373 1373 def _load_for_read(s):
1374 1374 s.parse(gettext(), readsubtree)
1375 1375 s._dirty = False
1376 1376
1377 1377 self._loadfunc = _load_for_read
1378 1378
1379 1379 def writesubtrees(self, m1, m2, writesubtree, match):
1380 1380 self._load() # for consistency; should never have any effect here
1381 1381 m1._load()
1382 1382 m2._load()
1383 1383 emptytree = treemanifest(self.nodeconstants)
1384 1384
1385 1385 def getnode(m, d):
1386 1386 ld = m._lazydirs.get(d)
1387 1387 if ld:
1388 1388 return ld[0]
1389 1389 return m._dirs.get(d, emptytree)._node
1390 1390
1391 1391 # let's skip investigating things that `match` says we do not need.
1392 1392 visit = match.visitchildrenset(self._dir[:-1])
1393 1393 visit = self._loadchildrensetlazy(visit)
1394 1394 if visit == b'this' or visit == b'all':
1395 1395 visit = None
1396 1396 for d, subm in pycompat.iteritems(self._dirs):
1397 1397 if visit and d[:-1] not in visit:
1398 1398 continue
1399 1399 subp1 = getnode(m1, d)
1400 1400 subp2 = getnode(m2, d)
1401 1401 if subp1 == self.nodeconstants.nullid:
1402 1402 subp1, subp2 = subp2, subp1
1403 1403 writesubtree(subm, subp1, subp2, match)
1404 1404
1405 1405 def walksubtrees(self, matcher=None):
1406 1406 """Returns an iterator of the subtrees of this manifest, including this
1407 1407 manifest itself.
1408 1408
1409 1409 If `matcher` is provided, it only returns subtrees that match.
1410 1410 """
1411 1411 if matcher and not matcher.visitdir(self._dir[:-1]):
1412 1412 return
1413 1413 if not matcher or matcher(self._dir[:-1]):
1414 1414 yield self
1415 1415
1416 1416 self._load()
1417 1417 # OPT: use visitchildrenset to avoid loading everything.
1418 1418 self._loadalllazy()
1419 1419 for d, subm in pycompat.iteritems(self._dirs):
1420 1420 for subtree in subm.walksubtrees(matcher=matcher):
1421 1421 yield subtree
1422 1422
1423 1423
1424 1424 class manifestfulltextcache(util.lrucachedict):
1425 1425 """File-backed LRU cache for the manifest cache
1426 1426
1427 1427 File consists of entries, up to EOF:
1428 1428
1429 1429 - 20 bytes node, 4 bytes length, <length> manifest data
1430 1430
1431 1431 These are written in reverse cache order (oldest to newest).
1432 1432
1433 1433 """
1434 1434
1435 1435 _file = b'manifestfulltextcache'
1436 1436
1437 1437 def __init__(self, max):
1438 1438 super(manifestfulltextcache, self).__init__(max)
1439 1439 self._dirty = False
1440 1440 self._read = False
1441 1441 self._opener = None
1442 1442
1443 1443 def read(self):
1444 1444 if self._read or self._opener is None:
1445 1445 return
1446 1446
1447 1447 try:
1448 1448 with self._opener(self._file) as fp:
1449 1449 set = super(manifestfulltextcache, self).__setitem__
1450 1450 # ignore trailing data, this is a cache, corruption is skipped
1451 1451 while True:
1452 1452 # TODO do we need to do work here for sha1 portability?
1453 1453 node = fp.read(20)
1454 1454 if len(node) < 20:
1455 1455 break
1456 1456 try:
1457 1457 size = struct.unpack(b'>L', fp.read(4))[0]
1458 1458 except struct.error:
1459 1459 break
1460 1460 value = bytearray(fp.read(size))
1461 1461 if len(value) != size:
1462 1462 break
1463 1463 set(node, value)
1464 1464 except IOError:
1465 1465 # the file is allowed to be missing
1466 1466 pass
1467 1467
1468 1468 self._read = True
1469 1469 self._dirty = False
1470 1470
1471 1471 def write(self):
1472 1472 if not self._dirty or self._opener is None:
1473 1473 return
1474 1474 # rotate backwards to the first used node
1475 1475 try:
1476 1476 with self._opener(
1477 1477 self._file, b'w', atomictemp=True, checkambig=True
1478 1478 ) as fp:
1479 1479 node = self._head.prev
1480 1480 while True:
1481 1481 if node.key in self._cache:
1482 1482 fp.write(node.key)
1483 1483 fp.write(struct.pack(b'>L', len(node.value)))
1484 1484 fp.write(node.value)
1485 1485 if node is self._head:
1486 1486 break
1487 1487 node = node.prev
1488 1488 except IOError:
1489 1489 # We could not write the cache (eg: permission error)
1490 1490 # the content can be missing.
1491 1491 #
1492 1492 # We could try harder and see if we could recreate a wcache
1493 1493 # directory were we coudl write too.
1494 1494 #
1495 1495 # XXX the error pass silently, having some way to issue an error
1496 1496 # log `ui.log` would be nice.
1497 1497 pass
1498 1498
1499 1499 def __len__(self):
1500 1500 if not self._read:
1501 1501 self.read()
1502 1502 return super(manifestfulltextcache, self).__len__()
1503 1503
1504 1504 def __contains__(self, k):
1505 1505 if not self._read:
1506 1506 self.read()
1507 1507 return super(manifestfulltextcache, self).__contains__(k)
1508 1508
1509 1509 def __iter__(self):
1510 1510 if not self._read:
1511 1511 self.read()
1512 1512 return super(manifestfulltextcache, self).__iter__()
1513 1513
1514 1514 def __getitem__(self, k):
1515 1515 if not self._read:
1516 1516 self.read()
1517 1517 # the cache lru order can change on read
1518 1518 setdirty = self._cache.get(k) is not self._head
1519 1519 value = super(manifestfulltextcache, self).__getitem__(k)
1520 1520 if setdirty:
1521 1521 self._dirty = True
1522 1522 return value
1523 1523
1524 1524 def __setitem__(self, k, v):
1525 1525 if not self._read:
1526 1526 self.read()
1527 1527 super(manifestfulltextcache, self).__setitem__(k, v)
1528 1528 self._dirty = True
1529 1529
1530 1530 def __delitem__(self, k):
1531 1531 if not self._read:
1532 1532 self.read()
1533 1533 super(manifestfulltextcache, self).__delitem__(k)
1534 1534 self._dirty = True
1535 1535
1536 1536 def get(self, k, default=None):
1537 1537 if not self._read:
1538 1538 self.read()
1539 1539 return super(manifestfulltextcache, self).get(k, default=default)
1540 1540
1541 1541 def clear(self, clear_persisted_data=False):
1542 1542 super(manifestfulltextcache, self).clear()
1543 1543 if clear_persisted_data:
1544 1544 self._dirty = True
1545 1545 self.write()
1546 1546 self._read = False
1547 1547
1548 1548
1549 1549 # and upper bound of what we expect from compression
1550 1550 # (real live value seems to be "3")
1551 1551 MAXCOMPRESSION = 3
1552 1552
1553 1553
1554 1554 class FastdeltaUnavailable(Exception):
1555 1555 """Exception raised when fastdelta isn't usable on a manifest."""
1556 1556
1557 1557
1558 1558 @interfaceutil.implementer(repository.imanifeststorage)
1559 1559 class manifestrevlog(object):
1560 1560 """A revlog that stores manifest texts. This is responsible for caching the
1561 1561 full-text manifest contents.
1562 1562 """
1563 1563
1564 1564 def __init__(
1565 1565 self,
1566 1566 nodeconstants,
1567 1567 opener,
1568 1568 tree=b'',
1569 1569 dirlogcache=None,
1570 1570 indexfile=None,
1571 1571 treemanifest=False,
1572 1572 ):
1573 1573 """Constructs a new manifest revlog
1574 1574
1575 1575 `indexfile` - used by extensions to have two manifests at once, like
1576 1576 when transitioning between flatmanifeset and treemanifests.
1577 1577
1578 1578 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1579 1579 options can also be used to make this a tree manifest revlog. The opener
1580 1580 option takes precedence, so if it is set to True, we ignore whatever
1581 1581 value is passed in to the constructor.
1582 1582 """
1583 1583 self.nodeconstants = nodeconstants
1584 1584 # During normal operations, we expect to deal with not more than four
1585 1585 # revs at a time (such as during commit --amend). When rebasing large
1586 1586 # stacks of commits, the number can go up, hence the config knob below.
1587 1587 cachesize = 4
1588 1588 optiontreemanifest = False
1589 1589 opts = getattr(opener, 'options', None)
1590 1590 if opts is not None:
1591 1591 cachesize = opts.get(b'manifestcachesize', cachesize)
1592 1592 optiontreemanifest = opts.get(b'treemanifest', False)
1593 1593
1594 1594 self._treeondisk = optiontreemanifest or treemanifest
1595 1595
1596 1596 self._fulltextcache = manifestfulltextcache(cachesize)
1597 1597
1598 1598 if tree:
1599 1599 assert self._treeondisk, b'opts is %r' % opts
1600 1600
1601 1601 if indexfile is None:
1602 1602 indexfile = b'00manifest.i'
1603 1603 if tree:
1604 1604 indexfile = b"meta/" + tree + indexfile
1605 1605
1606 1606 self.tree = tree
1607 1607
1608 1608 # The dirlogcache is kept on the root manifest log
1609 1609 if tree:
1610 1610 self._dirlogcache = dirlogcache
1611 1611 else:
1612 1612 self._dirlogcache = {b'': self}
1613 1613
1614 1614 self._revlog = revlog.revlog(
1615 1615 opener,
1616 1616 target=(revlog_constants.KIND_MANIFESTLOG, self.tree),
1617 1617 indexfile=indexfile,
1618 1618 # only root indexfile is cached
1619 1619 checkambig=not bool(tree),
1620 1620 mmaplargeindex=True,
1621 1621 upperboundcomp=MAXCOMPRESSION,
1622 1622 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
1623 1623 )
1624 1624
1625 1625 self.index = self._revlog.index
1626 1626 self.version = self._revlog.version
1627 1627 self._generaldelta = self._revlog._generaldelta
1628 self._revlog.revlog_kind = b'manifest'
1629 1628
1630 1629 def _setupmanifestcachehooks(self, repo):
1631 1630 """Persist the manifestfulltextcache on lock release"""
1632 1631 if not util.safehasattr(repo, b'_wlockref'):
1633 1632 return
1634 1633
1635 1634 self._fulltextcache._opener = repo.wcachevfs
1636 1635 if repo._currentlock(repo._wlockref) is None:
1637 1636 return
1638 1637
1639 1638 reporef = weakref.ref(repo)
1640 1639 manifestrevlogref = weakref.ref(self)
1641 1640
1642 1641 def persistmanifestcache(success):
1643 1642 # Repo is in an unknown state, do not persist.
1644 1643 if not success:
1645 1644 return
1646 1645
1647 1646 repo = reporef()
1648 1647 self = manifestrevlogref()
1649 1648 if repo is None or self is None:
1650 1649 return
1651 1650 if repo.manifestlog.getstorage(b'') is not self:
1652 1651 # there's a different manifest in play now, abort
1653 1652 return
1654 1653 self._fulltextcache.write()
1655 1654
1656 1655 repo._afterlock(persistmanifestcache)
1657 1656
1658 1657 @property
1659 1658 def fulltextcache(self):
1660 1659 return self._fulltextcache
1661 1660
1662 1661 def clearcaches(self, clear_persisted_data=False):
1663 1662 self._revlog.clearcaches()
1664 1663 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1665 1664 self._dirlogcache = {self.tree: self}
1666 1665
1667 1666 def dirlog(self, d):
1668 1667 if d:
1669 1668 assert self._treeondisk
1670 1669 if d not in self._dirlogcache:
1671 1670 mfrevlog = manifestrevlog(
1672 1671 self.nodeconstants,
1673 1672 self.opener,
1674 1673 d,
1675 1674 self._dirlogcache,
1676 1675 treemanifest=self._treeondisk,
1677 1676 )
1678 1677 self._dirlogcache[d] = mfrevlog
1679 1678 return self._dirlogcache[d]
1680 1679
1681 1680 def add(
1682 1681 self,
1683 1682 m,
1684 1683 transaction,
1685 1684 link,
1686 1685 p1,
1687 1686 p2,
1688 1687 added,
1689 1688 removed,
1690 1689 readtree=None,
1691 1690 match=None,
1692 1691 ):
1693 1692 """add some manifest entry in to the manifest log
1694 1693
1695 1694 input:
1696 1695
1697 1696 m: the manifest dict we want to store
1698 1697 transaction: the open transaction
1699 1698 p1: manifest-node of p1
1700 1699 p2: manifest-node of p2
1701 1700 added: file added/changed compared to parent
1702 1701 removed: file removed compared to parent
1703 1702
1704 1703 tree manifest input:
1705 1704
1706 1705 readtree: a function to read a subtree
1707 1706 match: a filematcher for the subpart of the tree manifest
1708 1707 """
1709 1708 try:
1710 1709 if p1 not in self.fulltextcache:
1711 1710 raise FastdeltaUnavailable()
1712 1711 # If our first parent is in the manifest cache, we can
1713 1712 # compute a delta here using properties we know about the
1714 1713 # manifest up-front, which may save time later for the
1715 1714 # revlog layer.
1716 1715
1717 1716 _checkforbidden(added)
1718 1717 # combine the changed lists into one sorted iterator
1719 1718 work = heapq.merge(
1720 1719 [(x, False) for x in sorted(added)],
1721 1720 [(x, True) for x in sorted(removed)],
1722 1721 )
1723 1722
1724 1723 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1725 1724 cachedelta = self._revlog.rev(p1), deltatext
1726 1725 text = util.buffer(arraytext)
1727 1726 rev = self._revlog.addrevision(
1728 1727 text, transaction, link, p1, p2, cachedelta
1729 1728 )
1730 1729 n = self._revlog.node(rev)
1731 1730 except FastdeltaUnavailable:
1732 1731 # The first parent manifest isn't already loaded or the
1733 1732 # manifest implementation doesn't support fastdelta, so
1734 1733 # we'll just encode a fulltext of the manifest and pass
1735 1734 # that through to the revlog layer, and let it handle the
1736 1735 # delta process.
1737 1736 if self._treeondisk:
1738 1737 assert readtree, b"readtree must be set for treemanifest writes"
1739 1738 assert match, b"match must be specified for treemanifest writes"
1740 1739 m1 = readtree(self.tree, p1)
1741 1740 m2 = readtree(self.tree, p2)
1742 1741 n = self._addtree(
1743 1742 m, transaction, link, m1, m2, readtree, match=match
1744 1743 )
1745 1744 arraytext = None
1746 1745 else:
1747 1746 text = m.text()
1748 1747 rev = self._revlog.addrevision(text, transaction, link, p1, p2)
1749 1748 n = self._revlog.node(rev)
1750 1749 arraytext = bytearray(text)
1751 1750
1752 1751 if arraytext is not None:
1753 1752 self.fulltextcache[n] = arraytext
1754 1753
1755 1754 return n
1756 1755
1757 1756 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1758 1757 # If the manifest is unchanged compared to one parent,
1759 1758 # don't write a new revision
1760 1759 if self.tree != b'' and (
1761 1760 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1762 1761 ):
1763 1762 return m.node()
1764 1763
1765 1764 def writesubtree(subm, subp1, subp2, match):
1766 1765 sublog = self.dirlog(subm.dir())
1767 1766 sublog.add(
1768 1767 subm,
1769 1768 transaction,
1770 1769 link,
1771 1770 subp1,
1772 1771 subp2,
1773 1772 None,
1774 1773 None,
1775 1774 readtree=readtree,
1776 1775 match=match,
1777 1776 )
1778 1777
1779 1778 m.writesubtrees(m1, m2, writesubtree, match)
1780 1779 text = m.dirtext()
1781 1780 n = None
1782 1781 if self.tree != b'':
1783 1782 # Double-check whether contents are unchanged to one parent
1784 1783 if text == m1.dirtext():
1785 1784 n = m1.node()
1786 1785 elif text == m2.dirtext():
1787 1786 n = m2.node()
1788 1787
1789 1788 if not n:
1790 1789 rev = self._revlog.addrevision(
1791 1790 text, transaction, link, m1.node(), m2.node()
1792 1791 )
1793 1792 n = self._revlog.node(rev)
1794 1793
1795 1794 # Save nodeid so parent manifest can calculate its nodeid
1796 1795 m.setnode(n)
1797 1796 return n
1798 1797
1799 1798 def __len__(self):
1800 1799 return len(self._revlog)
1801 1800
1802 1801 def __iter__(self):
1803 1802 return self._revlog.__iter__()
1804 1803
1805 1804 def rev(self, node):
1806 1805 return self._revlog.rev(node)
1807 1806
1808 1807 def node(self, rev):
1809 1808 return self._revlog.node(rev)
1810 1809
1811 1810 def lookup(self, value):
1812 1811 return self._revlog.lookup(value)
1813 1812
1814 1813 def parentrevs(self, rev):
1815 1814 return self._revlog.parentrevs(rev)
1816 1815
1817 1816 def parents(self, node):
1818 1817 return self._revlog.parents(node)
1819 1818
1820 1819 def linkrev(self, rev):
1821 1820 return self._revlog.linkrev(rev)
1822 1821
1823 1822 def checksize(self):
1824 1823 return self._revlog.checksize()
1825 1824
1826 1825 def revision(self, node, _df=None, raw=False):
1827 1826 return self._revlog.revision(node, _df=_df, raw=raw)
1828 1827
1829 1828 def rawdata(self, node, _df=None):
1830 1829 return self._revlog.rawdata(node, _df=_df)
1831 1830
1832 1831 def revdiff(self, rev1, rev2):
1833 1832 return self._revlog.revdiff(rev1, rev2)
1834 1833
1835 1834 def cmp(self, node, text):
1836 1835 return self._revlog.cmp(node, text)
1837 1836
1838 1837 def deltaparent(self, rev):
1839 1838 return self._revlog.deltaparent(rev)
1840 1839
1841 1840 def emitrevisions(
1842 1841 self,
1843 1842 nodes,
1844 1843 nodesorder=None,
1845 1844 revisiondata=False,
1846 1845 assumehaveparentrevisions=False,
1847 1846 deltamode=repository.CG_DELTAMODE_STD,
1848 1847 sidedata_helpers=None,
1849 1848 ):
1850 1849 return self._revlog.emitrevisions(
1851 1850 nodes,
1852 1851 nodesorder=nodesorder,
1853 1852 revisiondata=revisiondata,
1854 1853 assumehaveparentrevisions=assumehaveparentrevisions,
1855 1854 deltamode=deltamode,
1856 1855 sidedata_helpers=sidedata_helpers,
1857 1856 )
1858 1857
1859 1858 def addgroup(
1860 1859 self,
1861 1860 deltas,
1862 1861 linkmapper,
1863 1862 transaction,
1864 1863 alwayscache=False,
1865 1864 addrevisioncb=None,
1866 1865 duplicaterevisioncb=None,
1867 1866 ):
1868 1867 return self._revlog.addgroup(
1869 1868 deltas,
1870 1869 linkmapper,
1871 1870 transaction,
1872 1871 alwayscache=alwayscache,
1873 1872 addrevisioncb=addrevisioncb,
1874 1873 duplicaterevisioncb=duplicaterevisioncb,
1875 1874 )
1876 1875
1877 1876 def rawsize(self, rev):
1878 1877 return self._revlog.rawsize(rev)
1879 1878
1880 1879 def getstrippoint(self, minlink):
1881 1880 return self._revlog.getstrippoint(minlink)
1882 1881
1883 1882 def strip(self, minlink, transaction):
1884 1883 return self._revlog.strip(minlink, transaction)
1885 1884
1886 1885 def files(self):
1887 1886 return self._revlog.files()
1888 1887
1889 1888 def clone(self, tr, destrevlog, **kwargs):
1890 1889 if not isinstance(destrevlog, manifestrevlog):
1891 1890 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
1892 1891
1893 1892 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1894 1893
1895 1894 def storageinfo(
1896 1895 self,
1897 1896 exclusivefiles=False,
1898 1897 sharedfiles=False,
1899 1898 revisionscount=False,
1900 1899 trackedsize=False,
1901 1900 storedsize=False,
1902 1901 ):
1903 1902 return self._revlog.storageinfo(
1904 1903 exclusivefiles=exclusivefiles,
1905 1904 sharedfiles=sharedfiles,
1906 1905 revisionscount=revisionscount,
1907 1906 trackedsize=trackedsize,
1908 1907 storedsize=storedsize,
1909 1908 )
1910 1909
1911 1910 @property
1912 1911 def indexfile(self):
1913 1912 return self._revlog.indexfile
1914 1913
1915 1914 @indexfile.setter
1916 1915 def indexfile(self, value):
1917 1916 self._revlog.indexfile = value
1918 1917
1919 1918 @property
1920 1919 def opener(self):
1921 1920 return self._revlog.opener
1922 1921
1923 1922 @opener.setter
1924 1923 def opener(self, value):
1925 1924 self._revlog.opener = value
1926 1925
1927 1926
1928 1927 @interfaceutil.implementer(repository.imanifestlog)
1929 1928 class manifestlog(object):
1930 1929 """A collection class representing the collection of manifest snapshots
1931 1930 referenced by commits in the repository.
1932 1931
1933 1932 In this situation, 'manifest' refers to the abstract concept of a snapshot
1934 1933 of the list of files in the given commit. Consumers of the output of this
1935 1934 class do not care about the implementation details of the actual manifests
1936 1935 they receive (i.e. tree or flat or lazily loaded, etc)."""
1937 1936
1938 1937 def __init__(self, opener, repo, rootstore, narrowmatch):
1939 1938 self.nodeconstants = repo.nodeconstants
1940 1939 usetreemanifest = False
1941 1940 cachesize = 4
1942 1941
1943 1942 opts = getattr(opener, 'options', None)
1944 1943 if opts is not None:
1945 1944 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
1946 1945 cachesize = opts.get(b'manifestcachesize', cachesize)
1947 1946
1948 1947 self._treemanifests = usetreemanifest
1949 1948
1950 1949 self._rootstore = rootstore
1951 1950 self._rootstore._setupmanifestcachehooks(repo)
1952 1951 self._narrowmatch = narrowmatch
1953 1952
1954 1953 # A cache of the manifestctx or treemanifestctx for each directory
1955 1954 self._dirmancache = {}
1956 1955 self._dirmancache[b''] = util.lrucachedict(cachesize)
1957 1956
1958 1957 self._cachesize = cachesize
1959 1958
1960 1959 def __getitem__(self, node):
1961 1960 """Retrieves the manifest instance for the given node. Throws a
1962 1961 LookupError if not found.
1963 1962 """
1964 1963 return self.get(b'', node)
1965 1964
1966 1965 def get(self, tree, node, verify=True):
1967 1966 """Retrieves the manifest instance for the given node. Throws a
1968 1967 LookupError if not found.
1969 1968
1970 1969 `verify` - if True an exception will be thrown if the node is not in
1971 1970 the revlog
1972 1971 """
1973 1972 if node in self._dirmancache.get(tree, ()):
1974 1973 return self._dirmancache[tree][node]
1975 1974
1976 1975 if not self._narrowmatch.always():
1977 1976 if not self._narrowmatch.visitdir(tree[:-1]):
1978 1977 return excludeddirmanifestctx(self.nodeconstants, tree, node)
1979 1978 if tree:
1980 1979 if self._rootstore._treeondisk:
1981 1980 if verify:
1982 1981 # Side-effect is LookupError is raised if node doesn't
1983 1982 # exist.
1984 1983 self.getstorage(tree).rev(node)
1985 1984
1986 1985 m = treemanifestctx(self, tree, node)
1987 1986 else:
1988 1987 raise error.Abort(
1989 1988 _(
1990 1989 b"cannot ask for manifest directory '%s' in a flat "
1991 1990 b"manifest"
1992 1991 )
1993 1992 % tree
1994 1993 )
1995 1994 else:
1996 1995 if verify:
1997 1996 # Side-effect is LookupError is raised if node doesn't exist.
1998 1997 self._rootstore.rev(node)
1999 1998
2000 1999 if self._treemanifests:
2001 2000 m = treemanifestctx(self, b'', node)
2002 2001 else:
2003 2002 m = manifestctx(self, node)
2004 2003
2005 2004 if node != self.nodeconstants.nullid:
2006 2005 mancache = self._dirmancache.get(tree)
2007 2006 if not mancache:
2008 2007 mancache = util.lrucachedict(self._cachesize)
2009 2008 self._dirmancache[tree] = mancache
2010 2009 mancache[node] = m
2011 2010 return m
2012 2011
2013 2012 def getstorage(self, tree):
2014 2013 return self._rootstore.dirlog(tree)
2015 2014
2016 2015 def clearcaches(self, clear_persisted_data=False):
2017 2016 self._dirmancache.clear()
2018 2017 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
2019 2018
2020 2019 def rev(self, node):
2021 2020 return self._rootstore.rev(node)
2022 2021
2023 2022 def update_caches(self, transaction):
2024 2023 return self._rootstore._revlog.update_caches(transaction=transaction)
2025 2024
2026 2025
2027 2026 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2028 2027 class memmanifestctx(object):
2029 2028 def __init__(self, manifestlog):
2030 2029 self._manifestlog = manifestlog
2031 2030 self._manifestdict = manifestdict(manifestlog.nodeconstants.nodelen)
2032 2031
2033 2032 def _storage(self):
2034 2033 return self._manifestlog.getstorage(b'')
2035 2034
2036 2035 def copy(self):
2037 2036 memmf = memmanifestctx(self._manifestlog)
2038 2037 memmf._manifestdict = self.read().copy()
2039 2038 return memmf
2040 2039
2041 2040 def read(self):
2042 2041 return self._manifestdict
2043 2042
2044 2043 def write(self, transaction, link, p1, p2, added, removed, match=None):
2045 2044 return self._storage().add(
2046 2045 self._manifestdict,
2047 2046 transaction,
2048 2047 link,
2049 2048 p1,
2050 2049 p2,
2051 2050 added,
2052 2051 removed,
2053 2052 match=match,
2054 2053 )
2055 2054
2056 2055
2057 2056 @interfaceutil.implementer(repository.imanifestrevisionstored)
2058 2057 class manifestctx(object):
2059 2058 """A class representing a single revision of a manifest, including its
2060 2059 contents, its parent revs, and its linkrev.
2061 2060 """
2062 2061
2063 2062 def __init__(self, manifestlog, node):
2064 2063 self._manifestlog = manifestlog
2065 2064 self._data = None
2066 2065
2067 2066 self._node = node
2068 2067
2069 2068 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2070 2069 # but let's add it later when something needs it and we can load it
2071 2070 # lazily.
2072 2071 # self.p1, self.p2 = store.parents(node)
2073 2072 # rev = store.rev(node)
2074 2073 # self.linkrev = store.linkrev(rev)
2075 2074
2076 2075 def _storage(self):
2077 2076 return self._manifestlog.getstorage(b'')
2078 2077
2079 2078 def node(self):
2080 2079 return self._node
2081 2080
2082 2081 def copy(self):
2083 2082 memmf = memmanifestctx(self._manifestlog)
2084 2083 memmf._manifestdict = self.read().copy()
2085 2084 return memmf
2086 2085
2087 2086 @propertycache
2088 2087 def parents(self):
2089 2088 return self._storage().parents(self._node)
2090 2089
2091 2090 def read(self):
2092 2091 if self._data is None:
2093 2092 nc = self._manifestlog.nodeconstants
2094 2093 if self._node == nc.nullid:
2095 2094 self._data = manifestdict(nc.nodelen)
2096 2095 else:
2097 2096 store = self._storage()
2098 2097 if self._node in store.fulltextcache:
2099 2098 text = pycompat.bytestr(store.fulltextcache[self._node])
2100 2099 else:
2101 2100 text = store.revision(self._node)
2102 2101 arraytext = bytearray(text)
2103 2102 store.fulltextcache[self._node] = arraytext
2104 2103 self._data = manifestdict(nc.nodelen, text)
2105 2104 return self._data
2106 2105
2107 2106 def readfast(self, shallow=False):
2108 2107 """Calls either readdelta or read, based on which would be less work.
2109 2108 readdelta is called if the delta is against the p1, and therefore can be
2110 2109 read quickly.
2111 2110
2112 2111 If `shallow` is True, nothing changes since this is a flat manifest.
2113 2112 """
2114 2113 store = self._storage()
2115 2114 r = store.rev(self._node)
2116 2115 deltaparent = store.deltaparent(r)
2117 2116 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2118 2117 return self.readdelta()
2119 2118 return self.read()
2120 2119
2121 2120 def readdelta(self, shallow=False):
2122 2121 """Returns a manifest containing just the entries that are present
2123 2122 in this manifest, but not in its p1 manifest. This is efficient to read
2124 2123 if the revlog delta is already p1.
2125 2124
2126 2125 Changing the value of `shallow` has no effect on flat manifests.
2127 2126 """
2128 2127 store = self._storage()
2129 2128 r = store.rev(self._node)
2130 2129 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2131 2130 return manifestdict(store.nodeconstants.nodelen, d)
2132 2131
2133 2132 def find(self, key):
2134 2133 return self.read().find(key)
2135 2134
2136 2135
2137 2136 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2138 2137 class memtreemanifestctx(object):
2139 2138 def __init__(self, manifestlog, dir=b''):
2140 2139 self._manifestlog = manifestlog
2141 2140 self._dir = dir
2142 2141 self._treemanifest = treemanifest(manifestlog.nodeconstants)
2143 2142
2144 2143 def _storage(self):
2145 2144 return self._manifestlog.getstorage(b'')
2146 2145
2147 2146 def copy(self):
2148 2147 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2149 2148 memmf._treemanifest = self._treemanifest.copy()
2150 2149 return memmf
2151 2150
2152 2151 def read(self):
2153 2152 return self._treemanifest
2154 2153
2155 2154 def write(self, transaction, link, p1, p2, added, removed, match=None):
2156 2155 def readtree(dir, node):
2157 2156 return self._manifestlog.get(dir, node).read()
2158 2157
2159 2158 return self._storage().add(
2160 2159 self._treemanifest,
2161 2160 transaction,
2162 2161 link,
2163 2162 p1,
2164 2163 p2,
2165 2164 added,
2166 2165 removed,
2167 2166 readtree=readtree,
2168 2167 match=match,
2169 2168 )
2170 2169
2171 2170
2172 2171 @interfaceutil.implementer(repository.imanifestrevisionstored)
2173 2172 class treemanifestctx(object):
2174 2173 def __init__(self, manifestlog, dir, node):
2175 2174 self._manifestlog = manifestlog
2176 2175 self._dir = dir
2177 2176 self._data = None
2178 2177
2179 2178 self._node = node
2180 2179
2181 2180 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2182 2181 # we can instantiate treemanifestctx objects for directories we don't
2183 2182 # have on disk.
2184 2183 # self.p1, self.p2 = store.parents(node)
2185 2184 # rev = store.rev(node)
2186 2185 # self.linkrev = store.linkrev(rev)
2187 2186
2188 2187 def _storage(self):
2189 2188 narrowmatch = self._manifestlog._narrowmatch
2190 2189 if not narrowmatch.always():
2191 2190 if not narrowmatch.visitdir(self._dir[:-1]):
2192 2191 return excludedmanifestrevlog(
2193 2192 self._manifestlog.nodeconstants, self._dir
2194 2193 )
2195 2194 return self._manifestlog.getstorage(self._dir)
2196 2195
2197 2196 def read(self):
2198 2197 if self._data is None:
2199 2198 store = self._storage()
2200 2199 if self._node == self._manifestlog.nodeconstants.nullid:
2201 2200 self._data = treemanifest(self._manifestlog.nodeconstants)
2202 2201 # TODO accessing non-public API
2203 2202 elif store._treeondisk:
2204 2203 m = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2205 2204
2206 2205 def gettext():
2207 2206 return store.revision(self._node)
2208 2207
2209 2208 def readsubtree(dir, subm):
2210 2209 # Set verify to False since we need to be able to create
2211 2210 # subtrees for trees that don't exist on disk.
2212 2211 return self._manifestlog.get(dir, subm, verify=False).read()
2213 2212
2214 2213 m.read(gettext, readsubtree)
2215 2214 m.setnode(self._node)
2216 2215 self._data = m
2217 2216 else:
2218 2217 if self._node in store.fulltextcache:
2219 2218 text = pycompat.bytestr(store.fulltextcache[self._node])
2220 2219 else:
2221 2220 text = store.revision(self._node)
2222 2221 arraytext = bytearray(text)
2223 2222 store.fulltextcache[self._node] = arraytext
2224 2223 self._data = treemanifest(
2225 2224 self._manifestlog.nodeconstants, dir=self._dir, text=text
2226 2225 )
2227 2226
2228 2227 return self._data
2229 2228
2230 2229 def node(self):
2231 2230 return self._node
2232 2231
2233 2232 def copy(self):
2234 2233 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2235 2234 memmf._treemanifest = self.read().copy()
2236 2235 return memmf
2237 2236
2238 2237 @propertycache
2239 2238 def parents(self):
2240 2239 return self._storage().parents(self._node)
2241 2240
2242 2241 def readdelta(self, shallow=False):
2243 2242 """Returns a manifest containing just the entries that are present
2244 2243 in this manifest, but not in its p1 manifest. This is efficient to read
2245 2244 if the revlog delta is already p1.
2246 2245
2247 2246 If `shallow` is True, this will read the delta for this directory,
2248 2247 without recursively reading subdirectory manifests. Instead, any
2249 2248 subdirectory entry will be reported as it appears in the manifest, i.e.
2250 2249 the subdirectory will be reported among files and distinguished only by
2251 2250 its 't' flag.
2252 2251 """
2253 2252 store = self._storage()
2254 2253 if shallow:
2255 2254 r = store.rev(self._node)
2256 2255 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2257 2256 return manifestdict(store.nodeconstants.nodelen, d)
2258 2257 else:
2259 2258 # Need to perform a slow delta
2260 2259 r0 = store.deltaparent(store.rev(self._node))
2261 2260 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2262 2261 m1 = self.read()
2263 2262 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2264 2263 for f, ((n0, fl0), (n1, fl1)) in pycompat.iteritems(m0.diff(m1)):
2265 2264 if n1:
2266 2265 md[f] = n1
2267 2266 if fl1:
2268 2267 md.setflag(f, fl1)
2269 2268 return md
2270 2269
2271 2270 def readfast(self, shallow=False):
2272 2271 """Calls either readdelta or read, based on which would be less work.
2273 2272 readdelta is called if the delta is against the p1, and therefore can be
2274 2273 read quickly.
2275 2274
2276 2275 If `shallow` is True, it only returns the entries from this manifest,
2277 2276 and not any submanifests.
2278 2277 """
2279 2278 store = self._storage()
2280 2279 r = store.rev(self._node)
2281 2280 deltaparent = store.deltaparent(r)
2282 2281 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2283 2282 return self.readdelta(shallow=shallow)
2284 2283
2285 2284 if shallow:
2286 2285 return manifestdict(
2287 2286 store.nodeconstants.nodelen, store.revision(self._node)
2288 2287 )
2289 2288 else:
2290 2289 return self.read()
2291 2290
2292 2291 def find(self, key):
2293 2292 return self.read().find(key)
2294 2293
2295 2294
2296 2295 class excludeddir(treemanifest):
2297 2296 """Stand-in for a directory that is excluded from the repository.
2298 2297
2299 2298 With narrowing active on a repository that uses treemanifests,
2300 2299 some of the directory revlogs will be excluded from the resulting
2301 2300 clone. This is a huge storage win for clients, but means we need
2302 2301 some sort of pseudo-manifest to surface to internals so we can
2303 2302 detect a merge conflict outside the narrowspec. That's what this
2304 2303 class is: it stands in for a directory whose node is known, but
2305 2304 whose contents are unknown.
2306 2305 """
2307 2306
2308 2307 def __init__(self, nodeconstants, dir, node):
2309 2308 super(excludeddir, self).__init__(nodeconstants, dir)
2310 2309 self._node = node
2311 2310 # Add an empty file, which will be included by iterators and such,
2312 2311 # appearing as the directory itself (i.e. something like "dir/")
2313 2312 self._files[b''] = node
2314 2313 self._flags[b''] = b't'
2315 2314
2316 2315 # Manifests outside the narrowspec should never be modified, so avoid
2317 2316 # copying. This makes a noticeable difference when there are very many
2318 2317 # directories outside the narrowspec. Also, it makes sense for the copy to
2319 2318 # be of the same type as the original, which would not happen with the
2320 2319 # super type's copy().
2321 2320 def copy(self):
2322 2321 return self
2323 2322
2324 2323
2325 2324 class excludeddirmanifestctx(treemanifestctx):
2326 2325 """context wrapper for excludeddir - see that docstring for rationale"""
2327 2326
2328 2327 def __init__(self, nodeconstants, dir, node):
2329 2328 self.nodeconstants = nodeconstants
2330 2329 self._dir = dir
2331 2330 self._node = node
2332 2331
2333 2332 def read(self):
2334 2333 return excludeddir(self.nodeconstants, self._dir, self._node)
2335 2334
2336 2335 def readfast(self, shallow=False):
2337 2336 # special version of readfast since we don't have underlying storage
2338 2337 return self.read()
2339 2338
2340 2339 def write(self, *args):
2341 2340 raise error.ProgrammingError(
2342 2341 b'attempt to write manifest from excluded dir %s' % self._dir
2343 2342 )
2344 2343
2345 2344
2346 2345 class excludedmanifestrevlog(manifestrevlog):
2347 2346 """Stand-in for excluded treemanifest revlogs.
2348 2347
2349 2348 When narrowing is active on a treemanifest repository, we'll have
2350 2349 references to directories we can't see due to the revlog being
2351 2350 skipped. This class exists to conform to the manifestrevlog
2352 2351 interface for those directories and proactively prevent writes to
2353 2352 outside the narrowspec.
2354 2353 """
2355 2354
2356 2355 def __init__(self, nodeconstants, dir):
2357 2356 self.nodeconstants = nodeconstants
2358 2357 self._dir = dir
2359 2358
2360 2359 def __len__(self):
2361 2360 raise error.ProgrammingError(
2362 2361 b'attempt to get length of excluded dir %s' % self._dir
2363 2362 )
2364 2363
2365 2364 def rev(self, node):
2366 2365 raise error.ProgrammingError(
2367 2366 b'attempt to get rev from excluded dir %s' % self._dir
2368 2367 )
2369 2368
2370 2369 def linkrev(self, node):
2371 2370 raise error.ProgrammingError(
2372 2371 b'attempt to get linkrev from excluded dir %s' % self._dir
2373 2372 )
2374 2373
2375 2374 def node(self, rev):
2376 2375 raise error.ProgrammingError(
2377 2376 b'attempt to get node from excluded dir %s' % self._dir
2378 2377 )
2379 2378
2380 2379 def add(self, *args, **kwargs):
2381 2380 # We should never write entries in dirlogs outside the narrow clone.
2382 2381 # However, the method still gets called from writesubtree() in
2383 2382 # _addtree(), so we need to handle it. We should possibly make that
2384 2383 # avoid calling add() with a clean manifest (_dirty is always False
2385 2384 # in excludeddir instances).
2386 2385 pass
@@ -1,963 +1,964 b''
1 1 # coding: utf-8
2 2 # metadata.py -- code related to various metadata computation and access.
3 3 #
4 4 # Copyright 2019 Google, Inc <martinvonz@google.com>
5 5 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9 from __future__ import absolute_import, print_function
10 10
11 11 import multiprocessing
12 12 import struct
13 13
14 14 from .node import nullrev
15 15 from . import (
16 16 error,
17 17 pycompat,
18 18 requirements as requirementsmod,
19 19 util,
20 20 )
21 21
22 22 from .revlogutils import (
23 constants as revlogconst,
23 24 flagutil as sidedataflag,
24 25 sidedata as sidedatamod,
25 26 )
26 27
27 28
28 29 class ChangingFiles(object):
29 30 """A class recording the changes made to files by a changeset
30 31
31 32 Actions performed on files are gathered into 3 sets:
32 33
33 34 - added: files actively added in the changeset.
34 35 - merged: files whose history got merged
35 36 - removed: files removed in the revision
36 37 - salvaged: files that might have been deleted by a merge but were not
37 38 - touched: files affected by the merge
38 39
39 40 and copies information is held by 2 mappings
40 41
41 42 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
42 43 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
43 44
44 45 See their inline help for details.
45 46 """
46 47
47 48 def __init__(
48 49 self,
49 50 touched=None,
50 51 added=None,
51 52 removed=None,
52 53 merged=None,
53 54 salvaged=None,
54 55 p1_copies=None,
55 56 p2_copies=None,
56 57 ):
57 58 self._added = set(() if added is None else added)
58 59 self._merged = set(() if merged is None else merged)
59 60 self._removed = set(() if removed is None else removed)
60 61 self._touched = set(() if touched is None else touched)
61 62 self._salvaged = set(() if salvaged is None else salvaged)
62 63 self._touched.update(self._added)
63 64 self._touched.update(self._merged)
64 65 self._touched.update(self._removed)
65 66 self._p1_copies = dict(() if p1_copies is None else p1_copies)
66 67 self._p2_copies = dict(() if p2_copies is None else p2_copies)
67 68
68 69 def __eq__(self, other):
69 70 return (
70 71 self.added == other.added
71 72 and self.merged == other.merged
72 73 and self.removed == other.removed
73 74 and self.salvaged == other.salvaged
74 75 and self.touched == other.touched
75 76 and self.copied_from_p1 == other.copied_from_p1
76 77 and self.copied_from_p2 == other.copied_from_p2
77 78 )
78 79
79 80 @property
80 81 def has_copies_info(self):
81 82 return bool(
82 83 self.removed
83 84 or self.merged
84 85 or self.salvaged
85 86 or self.copied_from_p1
86 87 or self.copied_from_p2
87 88 )
88 89
89 90 @util.propertycache
90 91 def added(self):
91 92 """files actively added in the changeset
92 93
93 94 Any file present in that revision that was absent in all the changeset's
94 95 parents.
95 96
96 97 In case of merge, this means a file absent in one of the parents but
97 98 existing in the other will *not* be contained in this set. (They were
98 99 added by an ancestor)
99 100 """
100 101 return frozenset(self._added)
101 102
102 103 def mark_added(self, filename):
103 104 if 'added' in vars(self):
104 105 del self.added
105 106 self._added.add(filename)
106 107 self.mark_touched(filename)
107 108
108 109 def update_added(self, filenames):
109 110 for f in filenames:
110 111 self.mark_added(f)
111 112
112 113 @util.propertycache
113 114 def merged(self):
114 115 """files actively merged during a merge
115 116
116 117 Any modified files which had modification on both size that needed merging.
117 118
118 119 In this case a new filenode was created and it has two parents.
119 120 """
120 121 return frozenset(self._merged)
121 122
122 123 def mark_merged(self, filename):
123 124 if 'merged' in vars(self):
124 125 del self.merged
125 126 self._merged.add(filename)
126 127 self.mark_touched(filename)
127 128
128 129 def update_merged(self, filenames):
129 130 for f in filenames:
130 131 self.mark_merged(f)
131 132
132 133 @util.propertycache
133 134 def removed(self):
134 135 """files actively removed by the changeset
135 136
136 137 In case of merge this will only contain the set of files removing "new"
137 138 content. For any file absent in the current changeset:
138 139
139 140 a) If the file exists in both parents, it is clearly "actively" removed
140 141 by this changeset.
141 142
142 143 b) If a file exists in only one parent and in none of the common
143 144 ancestors, then the file was newly added in one of the merged branches
144 145 and then got "actively" removed.
145 146
146 147 c) If a file exists in only one parent and at least one of the common
147 148 ancestors using the same filenode, then the file was unchanged on one
148 149 side and deleted on the other side. The merge "passively" propagated
149 150 that deletion, but didn't "actively" remove the file. In this case the
150 151 file is *not* included in the `removed` set.
151 152
152 153 d) If a file exists in only one parent and at least one of the common
153 154 ancestors using a different filenode, then the file was changed on one
154 155 side and removed on the other side. The merge process "actively"
155 156 decided to drop the new change and delete the file. Unlike in the
156 157 previous case, (c), the file included in the `removed` set.
157 158
158 159 Summary table for merge:
159 160
160 161 case | exists in parents | exists in gca || removed
161 162 (a) | both | * || yes
162 163 (b) | one | none || yes
163 164 (c) | one | same filenode || no
164 165 (d) | one | new filenode || yes
165 166 """
166 167 return frozenset(self._removed)
167 168
168 169 def mark_removed(self, filename):
169 170 if 'removed' in vars(self):
170 171 del self.removed
171 172 self._removed.add(filename)
172 173 self.mark_touched(filename)
173 174
174 175 def update_removed(self, filenames):
175 176 for f in filenames:
176 177 self.mark_removed(f)
177 178
178 179 @util.propertycache
179 180 def salvaged(self):
180 181 """files that might have been deleted by a merge, but still exists.
181 182
182 183 During a merge, the manifest merging might select some files for
183 184 removal, or for a removed/changed conflict. If at commit time the file
184 185 still exists, its removal was "reverted" and the file is "salvaged"
185 186 """
186 187 return frozenset(self._salvaged)
187 188
188 189 def mark_salvaged(self, filename):
189 190 if "salvaged" in vars(self):
190 191 del self.salvaged
191 192 self._salvaged.add(filename)
192 193 self.mark_touched(filename)
193 194
194 195 def update_salvaged(self, filenames):
195 196 for f in filenames:
196 197 self.mark_salvaged(f)
197 198
198 199 @util.propertycache
199 200 def touched(self):
200 201 """files either actively modified, added or removed"""
201 202 return frozenset(self._touched)
202 203
203 204 def mark_touched(self, filename):
204 205 if 'touched' in vars(self):
205 206 del self.touched
206 207 self._touched.add(filename)
207 208
208 209 def update_touched(self, filenames):
209 210 for f in filenames:
210 211 self.mark_touched(f)
211 212
212 213 @util.propertycache
213 214 def copied_from_p1(self):
214 215 return self._p1_copies.copy()
215 216
216 217 def mark_copied_from_p1(self, source, dest):
217 218 if 'copied_from_p1' in vars(self):
218 219 del self.copied_from_p1
219 220 self._p1_copies[dest] = source
220 221
221 222 def update_copies_from_p1(self, copies):
222 223 for dest, source in copies.items():
223 224 self.mark_copied_from_p1(source, dest)
224 225
225 226 @util.propertycache
226 227 def copied_from_p2(self):
227 228 return self._p2_copies.copy()
228 229
229 230 def mark_copied_from_p2(self, source, dest):
230 231 if 'copied_from_p2' in vars(self):
231 232 del self.copied_from_p2
232 233 self._p2_copies[dest] = source
233 234
234 235 def update_copies_from_p2(self, copies):
235 236 for dest, source in copies.items():
236 237 self.mark_copied_from_p2(source, dest)
237 238
238 239
239 240 def compute_all_files_changes(ctx):
240 241 """compute the files changed by a revision"""
241 242 p1 = ctx.p1()
242 243 p2 = ctx.p2()
243 244 if p1.rev() == nullrev and p2.rev() == nullrev:
244 245 return _process_root(ctx)
245 246 elif p1.rev() != nullrev and p2.rev() == nullrev:
246 247 return _process_linear(p1, ctx)
247 248 elif p1.rev() == nullrev and p2.rev() != nullrev:
248 249 # In the wild, one can encounter changeset where p1 is null but p2 is not
249 250 return _process_linear(p1, ctx, parent=2)
250 251 elif p1.rev() == p2.rev():
251 252 # In the wild, one can encounter such "non-merge"
252 253 return _process_linear(p1, ctx)
253 254 else:
254 255 return _process_merge(p1, p2, ctx)
255 256
256 257
257 258 def _process_root(ctx):
258 259 """compute the appropriate changed files for a changeset with no parents"""
259 260 # Simple, there was nothing before it, so everything is added.
260 261 md = ChangingFiles()
261 262 manifest = ctx.manifest()
262 263 for filename in manifest:
263 264 md.mark_added(filename)
264 265 return md
265 266
266 267
267 268 def _process_linear(parent_ctx, children_ctx, parent=1):
268 269 """compute the appropriate changed files for a changeset with a single parent"""
269 270 md = ChangingFiles()
270 271 parent_manifest = parent_ctx.manifest()
271 272 children_manifest = children_ctx.manifest()
272 273
273 274 copies_candidate = []
274 275
275 276 for filename, d in parent_manifest.diff(children_manifest).items():
276 277 if d[1][0] is None:
277 278 # no filenode for the "new" value, file is absent
278 279 md.mark_removed(filename)
279 280 else:
280 281 copies_candidate.append(filename)
281 282 if d[0][0] is None:
282 283 # not filenode for the "old" value file was absent
283 284 md.mark_added(filename)
284 285 else:
285 286 # filenode for both "old" and "new"
286 287 md.mark_touched(filename)
287 288
288 289 if parent == 1:
289 290 copied = md.mark_copied_from_p1
290 291 elif parent == 2:
291 292 copied = md.mark_copied_from_p2
292 293 else:
293 294 assert False, "bad parent value %d" % parent
294 295
295 296 for filename in copies_candidate:
296 297 copy_info = children_ctx[filename].renamed()
297 298 if copy_info:
298 299 source, srcnode = copy_info
299 300 copied(source, filename)
300 301
301 302 return md
302 303
303 304
304 305 def _process_merge(p1_ctx, p2_ctx, ctx):
305 306 """compute the appropriate changed files for a changeset with two parents
306 307
307 308 This is a more advance case. The information we need to record is summarise
308 309 in the following table:
309 310
310 311 β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
311 312 β”‚ diff β•² diff β”‚ ΓΈ β”‚ (Some, None) β”‚ (None, Some) β”‚ (Some, Some) β”‚
312 313 β”‚ p2 β•² p1 β”‚ β”‚ β”‚ β”‚ β”‚
313 314 β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
314 315 β”‚ β”‚ β”‚πŸ„± No Changes β”‚πŸ„³ No Changes β”‚ β”‚
315 316 β”‚ ΓΈ β”‚πŸ„° No Changes β”‚ OR β”‚ OR β”‚πŸ„΅ No Changes β”‚
316 317 β”‚ β”‚ β”‚πŸ„² Deleted[1] β”‚πŸ„΄ Salvaged[2]β”‚ [3] β”‚
317 318 β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
318 319 β”‚ β”‚πŸ„Ά No Changes β”‚ β”‚ β”‚ β”‚
319 320 β”‚ (Some, None) β”‚ OR β”‚πŸ„» Deleted β”‚ ΓΈ β”‚ ΓΈ β”‚
320 321 β”‚ β”‚πŸ„· Deleted[1] β”‚ β”‚ β”‚ β”‚
321 322 β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
322 323 β”‚ β”‚πŸ„Έ No Changes β”‚ β”‚ β”‚ πŸ„½ Touched β”‚
323 324 β”‚ (None, Some) β”‚ OR β”‚ ΓΈ β”‚πŸ„Ό Added β”‚OR πŸ…€ Salvaged β”‚
324 325 β”‚ β”‚πŸ„Ή Salvaged[2]β”‚ β”‚ (copied?) β”‚ (copied?) β”‚
325 326 β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
326 327 β”‚ β”‚ β”‚ β”‚ πŸ„Ύ Touched β”‚ πŸ„Ώ Merged β”‚
327 328 β”‚ (Some, Some) β”‚πŸ„Ί No Changes β”‚ ΓΈ β”‚OR πŸ… Salvaged β”‚OR πŸ…‚ Touched β”‚
328 329 β”‚ β”‚ [3] β”‚ β”‚ (copied?) β”‚ (copied?) β”‚
329 330 β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
330 331
331 332 Special case [1]:
332 333
333 334 The situation is:
334 335 - parent-A: file exists,
335 336 - parent-B: no file,
336 337 - working-copy: no file.
337 338
338 339 Detecting a "deletion" will depend on the presence of actual change on
339 340 the "parent-A" branch:
340 341
341 342 Subcase πŸ„± or πŸ„Ά : if the state of the file in "parent-A" is unchanged
342 343 compared to the merge ancestors, then parent-A branch left the file
343 344 untouched while parent-B deleted it. We simply apply the change from
344 345 "parent-B" branch the file was automatically dropped.
345 346 The result is:
346 347 - file is not recorded as touched by the merge.
347 348
348 349 Subcase πŸ„² or πŸ„· : otherwise, the change from parent-A branch were explicitly dropped and
349 350 the file was "deleted again". From a user perspective, the message
350 351 about "locally changed" while "remotely deleted" (or the other way
351 352 around) was issued and the user chose to deleted the file.
352 353 The result:
353 354 - file is recorded as touched by the merge.
354 355
355 356
356 357 Special case [2]:
357 358
358 359 The situation is:
359 360 - parent-A: no file,
360 361 - parent-B: file,
361 362 - working-copy: file (same content as parent-B).
362 363
363 364 There are three subcases depending on the ancestors contents:
364 365
365 366 - A) the file is missing in all ancestors,
366 367 - B) at least one ancestor has the file with filenode β‰  from parent-B,
367 368 - C) all ancestors use the same filenode as parent-B,
368 369
369 370 Subcase (A) is the simpler, nothing happend on parent-A side while
370 371 parent-B added it.
371 372
372 373 The result:
373 374 - the file is not marked as touched by the merge.
374 375
375 376 Subcase (B) is the counter part of "Special case [1]", the file was
376 377 modified on parent-B side, while parent-A side deleted it. However this
377 378 time, the conflict was solved by keeping the file (and its
378 379 modification). We consider the file as "salvaged".
379 380
380 381 The result:
381 382 - the file is marked as "salvaged" by the merge.
382 383
383 384 Subcase (C) is subtle variation of the case above. In this case, the
384 385 file in unchanged on the parent-B side and actively removed on the
385 386 parent-A side. So the merge machinery correctly decide it should be
386 387 removed. However, the file was explicitly restored to its parent-B
387 388 content before the merge was commited. The file is be marked
388 389 as salvaged too. From the merge result perspective, this is similar to
389 390 Subcase (B), however from the merge resolution perspective they differ
390 391 since in (C), there was some conflict not obvious solution to the
391 392 merge (That got reversed)
392 393
393 394 Special case [3]:
394 395
395 396 The situation is:
396 397 - parent-A: file,
397 398 - parent-B: file (different filenode as parent-A),
398 399 - working-copy: file (same filenode as parent-B).
399 400
400 401 This case is in theory much simple, for this to happens, this mean the
401 402 filenode in parent-A is purely replacing the one in parent-B (either a
402 403 descendant, or a full new file history, see changeset). So the merge
403 404 introduce no changes, and the file is not affected by the merge...
404 405
405 406 However, in the wild it is possible to find commit with the above is not
406 407 True. For example repository have some commit where the *new* node is an
407 408 ancestor of the node in parent-A, or where parent-A and parent-B are two
408 409 branches of the same file history, yet not merge-filenode were created
409 410 (while the "merge" should have led to a "modification").
410 411
411 412 Detecting such cases (and not recording the file as modified) would be a
412 413 nice bonus. However do not any of this yet.
413 414 """
414 415
415 416 repo = ctx.repo()
416 417 md = ChangingFiles()
417 418
418 419 m = ctx.manifest()
419 420 p1m = p1_ctx.manifest()
420 421 p2m = p2_ctx.manifest()
421 422 diff_p1 = p1m.diff(m)
422 423 diff_p2 = p2m.diff(m)
423 424
424 425 cahs = ctx.repo().changelog.commonancestorsheads(
425 426 p1_ctx.node(), p2_ctx.node()
426 427 )
427 428 if not cahs:
428 429 cahs = [nullrev]
429 430 mas = [ctx.repo()[r].manifest() for r in cahs]
430 431
431 432 copy_candidates = []
432 433
433 434 # Dealing with case πŸ„° happens automatically. Since there are no entry in
434 435 # d1 nor d2, we won't iterate on it ever.
435 436
436 437 # Iteration over d1 content will deal with all cases, but the one in the
437 438 # first column of the table.
438 439 for filename, d1 in diff_p1.items():
439 440
440 441 d2 = diff_p2.pop(filename, None)
441 442
442 443 if d2 is None:
443 444 # this deal with the first line of the table.
444 445 _process_other_unchanged(md, mas, filename, d1)
445 446 else:
446 447
447 448 if d1[0][0] is None and d2[0][0] is None:
448 449 # case πŸ„Ό β€” both deleted the file.
449 450 md.mark_added(filename)
450 451 copy_candidates.append(filename)
451 452 elif d1[1][0] is None and d2[1][0] is None:
452 453 # case πŸ„» β€” both deleted the file.
453 454 md.mark_removed(filename)
454 455 elif d1[1][0] is not None and d2[1][0] is not None:
455 456 if d1[0][0] is None or d2[0][0] is None:
456 457 if any(_find(ma, filename) is not None for ma in mas):
457 458 # case πŸ…€ or πŸ…
458 459 md.mark_salvaged(filename)
459 460 else:
460 461 # case πŸ„½ πŸ„Ύ : touched
461 462 md.mark_touched(filename)
462 463 else:
463 464 fctx = repo.filectx(filename, fileid=d1[1][0])
464 465 if fctx.p2().rev() == nullrev:
465 466 # case πŸ…‚
466 467 # lets assume we can trust the file history. If the
467 468 # filenode is not a merge, the file was not merged.
468 469 md.mark_touched(filename)
469 470 else:
470 471 # case πŸ„Ώ
471 472 md.mark_merged(filename)
472 473 copy_candidates.append(filename)
473 474 else:
474 475 # Impossible case, the post-merge file status cannot be None on
475 476 # one side and Something on the other side.
476 477 assert False, "unreachable"
477 478
478 479 # Iteration over remaining d2 content deal with the first column of the
479 480 # table.
480 481 for filename, d2 in diff_p2.items():
481 482 _process_other_unchanged(md, mas, filename, d2)
482 483
483 484 for filename in copy_candidates:
484 485 copy_info = ctx[filename].renamed()
485 486 if copy_info:
486 487 source, srcnode = copy_info
487 488 if source in p1_ctx and p1_ctx[source].filenode() == srcnode:
488 489 md.mark_copied_from_p1(source, filename)
489 490 elif source in p2_ctx and p2_ctx[source].filenode() == srcnode:
490 491 md.mark_copied_from_p2(source, filename)
491 492 return md
492 493
493 494
494 495 def _find(manifest, filename):
495 496 """return the associate filenode or None"""
496 497 if filename not in manifest:
497 498 return None
498 499 return manifest.find(filename)[0]
499 500
500 501
501 502 def _process_other_unchanged(md, mas, filename, diff):
502 503 source_node = diff[0][0]
503 504 target_node = diff[1][0]
504 505
505 506 if source_node is not None and target_node is None:
506 507 if any(not _find(ma, filename) == source_node for ma in mas):
507 508 # case πŸ„² of πŸ„·
508 509 md.mark_removed(filename)
509 510 # else, we have case πŸ„± or πŸ„Ά : no change need to be recorded
510 511 elif source_node is None and target_node is not None:
511 512 if any(_find(ma, filename) is not None for ma in mas):
512 513 # case πŸ„΄ or πŸ„Ή
513 514 md.mark_salvaged(filename)
514 515 # else, we have case πŸ„³ or πŸ„Έ : simple merge without intervention
515 516 elif source_node is not None and target_node is not None:
516 517 # case πŸ„΅ or πŸ„Ί : simple merge without intervention
517 518 #
518 519 # In buggy case where source_node is not an ancestors of target_node.
519 520 # There should have a been a new filenode created, recording this as
520 521 # "modified". We do not deal with them yet.
521 522 pass
522 523 else:
523 524 # An impossible case, the diff algorithm should not return entry if the
524 525 # file is missing on both side.
525 526 assert False, "unreachable"
526 527
527 528
528 529 def _missing_from_all_ancestors(mas, filename):
529 530 return all(_find(ma, filename) is None for ma in mas)
530 531
531 532
532 533 def computechangesetfilesadded(ctx):
533 534 """return the list of files added in a changeset"""
534 535 added = []
535 536 for f in ctx.files():
536 537 if not any(f in p for p in ctx.parents()):
537 538 added.append(f)
538 539 return added
539 540
540 541
541 542 def get_removal_filter(ctx, x=None):
542 543 """return a function to detect files "wrongly" detected as `removed`
543 544
544 545 When a file is removed relative to p1 in a merge, this
545 546 function determines whether the absence is due to a
546 547 deletion from a parent, or whether the merge commit
547 548 itself deletes the file. We decide this by doing a
548 549 simplified three way merge of the manifest entry for
549 550 the file. There are two ways we decide the merge
550 551 itself didn't delete a file:
551 552 - neither parent (nor the merge) contain the file
552 553 - exactly one parent contains the file, and that
553 554 parent has the same filelog entry as the merge
554 555 ancestor (or all of them if there two). In other
555 556 words, that parent left the file unchanged while the
556 557 other one deleted it.
557 558 One way to think about this is that deleting a file is
558 559 similar to emptying it, so the list of changed files
559 560 should be similar either way. The computation
560 561 described above is not done directly in _filecommit
561 562 when creating the list of changed files, however
562 563 it does something very similar by comparing filelog
563 564 nodes.
564 565 """
565 566
566 567 if x is not None:
567 568 p1, p2, m1, m2 = x
568 569 else:
569 570 p1 = ctx.p1()
570 571 p2 = ctx.p2()
571 572 m1 = p1.manifest()
572 573 m2 = p2.manifest()
573 574
574 575 @util.cachefunc
575 576 def mas():
576 577 p1n = p1.node()
577 578 p2n = p2.node()
578 579 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
579 580 if not cahs:
580 581 cahs = [nullrev]
581 582 return [ctx.repo()[r].manifest() for r in cahs]
582 583
583 584 def deletionfromparent(f):
584 585 if f in m1:
585 586 return f not in m2 and all(
586 587 f in ma and ma.find(f) == m1.find(f) for ma in mas()
587 588 )
588 589 elif f in m2:
589 590 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
590 591 else:
591 592 return True
592 593
593 594 return deletionfromparent
594 595
595 596
596 597 def computechangesetfilesremoved(ctx):
597 598 """return the list of files removed in a changeset"""
598 599 removed = []
599 600 for f in ctx.files():
600 601 if f not in ctx:
601 602 removed.append(f)
602 603 if removed:
603 604 rf = get_removal_filter(ctx)
604 605 removed = [r for r in removed if not rf(r)]
605 606 return removed
606 607
607 608
608 609 def computechangesetfilesmerged(ctx):
609 610 """return the list of files merged in a changeset"""
610 611 merged = []
611 612 if len(ctx.parents()) < 2:
612 613 return merged
613 614 for f in ctx.files():
614 615 if f in ctx:
615 616 fctx = ctx[f]
616 617 parents = fctx._filelog.parents(fctx._filenode)
617 618 if parents[1] != ctx.repo().nullid:
618 619 merged.append(f)
619 620 return merged
620 621
621 622
622 623 def computechangesetcopies(ctx):
623 624 """return the copies data for a changeset
624 625
625 626 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
626 627
627 628 Each dictionnary are in the form: `{newname: oldname}`
628 629 """
629 630 p1copies = {}
630 631 p2copies = {}
631 632 p1 = ctx.p1()
632 633 p2 = ctx.p2()
633 634 narrowmatch = ctx._repo.narrowmatch()
634 635 for dst in ctx.files():
635 636 if not narrowmatch(dst) or dst not in ctx:
636 637 continue
637 638 copied = ctx[dst].renamed()
638 639 if not copied:
639 640 continue
640 641 src, srcnode = copied
641 642 if src in p1 and p1[src].filenode() == srcnode:
642 643 p1copies[dst] = src
643 644 elif src in p2 and p2[src].filenode() == srcnode:
644 645 p2copies[dst] = src
645 646 return p1copies, p2copies
646 647
647 648
648 649 def encodecopies(files, copies):
649 650 items = []
650 651 for i, dst in enumerate(files):
651 652 if dst in copies:
652 653 items.append(b'%d\0%s' % (i, copies[dst]))
653 654 if len(items) != len(copies):
654 655 raise error.ProgrammingError(
655 656 b'some copy targets missing from file list'
656 657 )
657 658 return b"\n".join(items)
658 659
659 660
660 661 def decodecopies(files, data):
661 662 try:
662 663 copies = {}
663 664 if not data:
664 665 return copies
665 666 for l in data.split(b'\n'):
666 667 strindex, src = l.split(b'\0')
667 668 i = int(strindex)
668 669 dst = files[i]
669 670 copies[dst] = src
670 671 return copies
671 672 except (ValueError, IndexError):
672 673 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
673 674 # used different syntax for the value.
674 675 return None
675 676
676 677
677 678 def encodefileindices(files, subset):
678 679 subset = set(subset)
679 680 indices = []
680 681 for i, f in enumerate(files):
681 682 if f in subset:
682 683 indices.append(b'%d' % i)
683 684 return b'\n'.join(indices)
684 685
685 686
686 687 def decodefileindices(files, data):
687 688 try:
688 689 subset = []
689 690 if not data:
690 691 return subset
691 692 for strindex in data.split(b'\n'):
692 693 i = int(strindex)
693 694 if i < 0 or i >= len(files):
694 695 return None
695 696 subset.append(files[i])
696 697 return subset
697 698 except (ValueError, IndexError):
698 699 # Perhaps someone had chosen the same key name (e.g. "added") and
699 700 # used different syntax for the value.
700 701 return None
701 702
702 703
703 704 # see mercurial/helptext/internals/revlogs.txt for details about the format
704 705
705 706 ACTION_MASK = int("111" "00", 2)
706 707 # note: untouched file used as copy source will as `000` for this mask.
707 708 ADDED_FLAG = int("001" "00", 2)
708 709 MERGED_FLAG = int("010" "00", 2)
709 710 REMOVED_FLAG = int("011" "00", 2)
710 711 SALVAGED_FLAG = int("100" "00", 2)
711 712 TOUCHED_FLAG = int("101" "00", 2)
712 713
713 714 COPIED_MASK = int("11", 2)
714 715 COPIED_FROM_P1_FLAG = int("10", 2)
715 716 COPIED_FROM_P2_FLAG = int("11", 2)
716 717
717 718 # structure is <flag><filename-end><copy-source>
718 719 INDEX_HEADER = struct.Struct(">L")
719 720 INDEX_ENTRY = struct.Struct(">bLL")
720 721
721 722
722 723 def encode_files_sidedata(files):
723 724 all_files = set(files.touched)
724 725 all_files.update(files.copied_from_p1.values())
725 726 all_files.update(files.copied_from_p2.values())
726 727 all_files = sorted(all_files)
727 728 file_idx = {f: i for (i, f) in enumerate(all_files)}
728 729 file_idx[None] = 0
729 730
730 731 chunks = [INDEX_HEADER.pack(len(all_files))]
731 732
732 733 filename_length = 0
733 734 for f in all_files:
734 735 filename_size = len(f)
735 736 filename_length += filename_size
736 737 flag = 0
737 738 if f in files.added:
738 739 flag |= ADDED_FLAG
739 740 elif f in files.merged:
740 741 flag |= MERGED_FLAG
741 742 elif f in files.removed:
742 743 flag |= REMOVED_FLAG
743 744 elif f in files.salvaged:
744 745 flag |= SALVAGED_FLAG
745 746 elif f in files.touched:
746 747 flag |= TOUCHED_FLAG
747 748
748 749 copy = None
749 750 if f in files.copied_from_p1:
750 751 flag |= COPIED_FROM_P1_FLAG
751 752 copy = files.copied_from_p1.get(f)
752 753 elif f in files.copied_from_p2:
753 754 copy = files.copied_from_p2.get(f)
754 755 flag |= COPIED_FROM_P2_FLAG
755 756 copy_idx = file_idx[copy]
756 757 chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))
757 758 chunks.extend(all_files)
758 759 return {sidedatamod.SD_FILES: b''.join(chunks)}
759 760
760 761
761 762 def decode_files_sidedata(sidedata):
762 763 md = ChangingFiles()
763 764 raw = sidedata.get(sidedatamod.SD_FILES)
764 765
765 766 if raw is None:
766 767 return md
767 768
768 769 copies = []
769 770 all_files = []
770 771
771 772 assert len(raw) >= INDEX_HEADER.size
772 773 total_files = INDEX_HEADER.unpack_from(raw, 0)[0]
773 774
774 775 offset = INDEX_HEADER.size
775 776 file_offset_base = offset + (INDEX_ENTRY.size * total_files)
776 777 file_offset_last = file_offset_base
777 778
778 779 assert len(raw) >= file_offset_base
779 780
780 781 for idx in range(total_files):
781 782 flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)
782 783 file_end += file_offset_base
783 784 filename = raw[file_offset_last:file_end]
784 785 filesize = file_end - file_offset_last
785 786 assert len(filename) == filesize
786 787 offset += INDEX_ENTRY.size
787 788 file_offset_last = file_end
788 789 all_files.append(filename)
789 790 if flag & ACTION_MASK == ADDED_FLAG:
790 791 md.mark_added(filename)
791 792 elif flag & ACTION_MASK == MERGED_FLAG:
792 793 md.mark_merged(filename)
793 794 elif flag & ACTION_MASK == REMOVED_FLAG:
794 795 md.mark_removed(filename)
795 796 elif flag & ACTION_MASK == SALVAGED_FLAG:
796 797 md.mark_salvaged(filename)
797 798 elif flag & ACTION_MASK == TOUCHED_FLAG:
798 799 md.mark_touched(filename)
799 800
800 801 copied = None
801 802 if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:
802 803 copied = md.mark_copied_from_p1
803 804 elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:
804 805 copied = md.mark_copied_from_p2
805 806
806 807 if copied is not None:
807 808 copies.append((copied, filename, copy_idx))
808 809
809 810 for copied, filename, copy_idx in copies:
810 811 copied(all_files[copy_idx], filename)
811 812
812 813 return md
813 814
814 815
815 816 def _getsidedata(srcrepo, rev):
816 817 ctx = srcrepo[rev]
817 818 files = compute_all_files_changes(ctx)
818 819 return encode_files_sidedata(files), files.has_copies_info
819 820
820 821
821 822 def copies_sidedata_computer(repo, revlog, rev, existing_sidedata):
822 823 return _getsidedata(repo, rev)[0]
823 824
824 825
825 826 def set_sidedata_spec_for_repo(repo):
826 827 if requirementsmod.COPIESSDC_REQUIREMENT in repo.requirements:
827 828 repo.register_wanted_sidedata(sidedatamod.SD_FILES)
828 829 repo.register_sidedata_computer(
829 b"changelog",
830 revlogconst.KIND_CHANGELOG,
830 831 sidedatamod.SD_FILES,
831 832 (sidedatamod.SD_FILES,),
832 833 copies_sidedata_computer,
833 834 )
834 835
835 836
836 837 def getsidedataadder(srcrepo, destrepo):
837 838 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
838 839 if pycompat.iswindows or not use_w:
839 840 return _get_simple_sidedata_adder(srcrepo, destrepo)
840 841 else:
841 842 return _get_worker_sidedata_adder(srcrepo, destrepo)
842 843
843 844
844 845 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
845 846 """The function used by worker precomputing sidedata
846 847
847 848 It read an input queue containing revision numbers
848 849 It write in an output queue containing (rev, <sidedata-map>)
849 850
850 851 The `None` input value is used as a stop signal.
851 852
852 853 The `tokens` semaphore is user to avoid having too many unprocessed
853 854 entries. The workers needs to acquire one token before fetching a task.
854 855 They will be released by the consumer of the produced data.
855 856 """
856 857 tokens.acquire()
857 858 rev = revs_queue.get()
858 859 while rev is not None:
859 860 data = _getsidedata(srcrepo, rev)
860 861 sidedata_queue.put((rev, data))
861 862 tokens.acquire()
862 863 rev = revs_queue.get()
863 864 # processing of `None` is completed, release the token.
864 865 tokens.release()
865 866
866 867
867 868 BUFF_PER_WORKER = 50
868 869
869 870
870 871 def _get_worker_sidedata_adder(srcrepo, destrepo):
871 872 """The parallel version of the sidedata computation
872 873
873 874 This code spawn a pool of worker that precompute a buffer of sidedata
874 875 before we actually need them"""
875 876 # avoid circular import copies -> scmutil -> worker -> copies
876 877 from . import worker
877 878
878 879 nbworkers = worker._numworkers(srcrepo.ui)
879 880
880 881 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
881 882 revsq = multiprocessing.Queue()
882 883 sidedataq = multiprocessing.Queue()
883 884
884 885 assert srcrepo.filtername is None
885 886 # queue all tasks beforehand, revision numbers are small and it make
886 887 # synchronisation simpler
887 888 #
888 889 # Since the computation for each node can be quite expensive, the overhead
889 890 # of using a single queue is not revelant. In practice, most computation
890 891 # are fast but some are very expensive and dominate all the other smaller
891 892 # cost.
892 893 for r in srcrepo.changelog.revs():
893 894 revsq.put(r)
894 895 # queue the "no more tasks" markers
895 896 for i in range(nbworkers):
896 897 revsq.put(None)
897 898
898 899 allworkers = []
899 900 for i in range(nbworkers):
900 901 args = (srcrepo, revsq, sidedataq, tokens)
901 902 w = multiprocessing.Process(target=_sidedata_worker, args=args)
902 903 allworkers.append(w)
903 904 w.start()
904 905
905 906 # dictionnary to store results for revision higher than we one we are
906 907 # looking for. For example, if we need the sidedatamap for 42, and 43 is
907 908 # received, when shelve 43 for later use.
908 909 staging = {}
909 910
910 911 def sidedata_companion(revlog, rev):
911 912 data = {}, False
912 913 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
913 914 # Is the data previously shelved ?
914 915 data = staging.pop(rev, None)
915 916 if data is None:
916 917 # look at the queued result until we find the one we are lookig
917 918 # for (shelve the other ones)
918 919 r, data = sidedataq.get()
919 920 while r != rev:
920 921 staging[r] = data
921 922 r, data = sidedataq.get()
922 923 tokens.release()
923 924 sidedata, has_copies_info = data
924 925 new_flag = 0
925 926 if has_copies_info:
926 927 new_flag = sidedataflag.REVIDX_HASCOPIESINFO
927 928 return False, (), sidedata, new_flag, 0
928 929
929 930 return sidedata_companion
930 931
931 932
932 933 def _get_simple_sidedata_adder(srcrepo, destrepo):
933 934 """The simple version of the sidedata computation
934 935
935 936 It just compute it in the same thread on request"""
936 937
937 938 def sidedatacompanion(revlog, rev):
938 939 sidedata, has_copies_info = {}, False
939 940 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
940 941 sidedata, has_copies_info = _getsidedata(srcrepo, rev)
941 942 new_flag = 0
942 943 if has_copies_info:
943 944 new_flag = sidedataflag.REVIDX_HASCOPIESINFO
944 945
945 946 return False, (), sidedata, new_flag, 0
946 947
947 948 return sidedatacompanion
948 949
949 950
950 951 def getsidedataremover(srcrepo, destrepo):
951 952 def sidedatacompanion(revlog, rev):
952 953 f = ()
953 954 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
954 955 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
955 956 f = (
956 957 sidedatamod.SD_P1COPIES,
957 958 sidedatamod.SD_P2COPIES,
958 959 sidedatamod.SD_FILESADDED,
959 960 sidedatamod.SD_FILESREMOVED,
960 961 )
961 962 return False, f, {}, 0, sidedataflag.REVIDX_HASCOPIESINFO
962 963
963 964 return sidedatacompanion
@@ -1,3138 +1,3142 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 INDEX_HEADER,
41 41 REVLOGV0,
42 42 REVLOGV1,
43 43 REVLOGV1_FLAGS,
44 44 REVLOGV2,
45 45 REVLOGV2_FLAGS,
46 46 REVLOG_DEFAULT_FLAGS,
47 47 REVLOG_DEFAULT_FORMAT,
48 48 REVLOG_DEFAULT_VERSION,
49 49 )
50 50 from .revlogutils.flagutil import (
51 51 REVIDX_DEFAULT_FLAGS,
52 52 REVIDX_ELLIPSIS,
53 53 REVIDX_EXTSTORED,
54 54 REVIDX_FLAGS_ORDER,
55 55 REVIDX_HASCOPIESINFO,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 REVIDX_SIDEDATA,
59 59 )
60 60 from .thirdparty import attr
61 61 from . import (
62 62 ancestor,
63 63 dagop,
64 64 error,
65 65 mdiff,
66 66 policy,
67 67 pycompat,
68 68 templatefilters,
69 69 util,
70 70 )
71 71 from .interfaces import (
72 72 repository,
73 73 util as interfaceutil,
74 74 )
75 75 from .revlogutils import (
76 76 deltas as deltautil,
77 77 flagutil,
78 78 nodemap as nodemaputil,
79 79 revlogv0,
80 80 sidedata as sidedatautil,
81 81 )
82 82 from .utils import (
83 83 storageutil,
84 84 stringutil,
85 85 )
86 86
87 87 # blanked usage of all the name to prevent pyflakes constraints
88 88 # We need these name available in the module for extensions.
89 89 REVLOGV0
90 90 REVLOGV1
91 91 REVLOGV2
92 92 FLAG_INLINE_DATA
93 93 FLAG_GENERALDELTA
94 94 REVLOG_DEFAULT_FLAGS
95 95 REVLOG_DEFAULT_FORMAT
96 96 REVLOG_DEFAULT_VERSION
97 97 REVLOGV1_FLAGS
98 98 REVLOGV2_FLAGS
99 99 REVIDX_ISCENSORED
100 100 REVIDX_ELLIPSIS
101 101 REVIDX_SIDEDATA
102 102 REVIDX_HASCOPIESINFO
103 103 REVIDX_EXTSTORED
104 104 REVIDX_DEFAULT_FLAGS
105 105 REVIDX_FLAGS_ORDER
106 106 REVIDX_RAWTEXT_CHANGING_FLAGS
107 107
108 108 parsers = policy.importmod('parsers')
109 109 rustancestor = policy.importrust('ancestor')
110 110 rustdagop = policy.importrust('dagop')
111 111 rustrevlog = policy.importrust('revlog')
112 112
113 113 # Aliased for performance.
114 114 _zlibdecompress = zlib.decompress
115 115
116 116 # max size of revlog with inline data
117 117 _maxinline = 131072
118 118 _chunksize = 1048576
119 119
120 120 # Flag processors for REVIDX_ELLIPSIS.
121 121 def ellipsisreadprocessor(rl, text):
122 122 return text, False
123 123
124 124
125 125 def ellipsiswriteprocessor(rl, text):
126 126 return text, False
127 127
128 128
129 129 def ellipsisrawprocessor(rl, text):
130 130 return False
131 131
132 132
133 133 ellipsisprocessor = (
134 134 ellipsisreadprocessor,
135 135 ellipsiswriteprocessor,
136 136 ellipsisrawprocessor,
137 137 )
138 138
139 139
140 140 def offset_type(offset, type):
141 141 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
142 142 raise ValueError(b'unknown revlog index flags')
143 143 return int(int(offset) << 16 | type)
144 144
145 145
146 146 def _verify_revision(rl, skipflags, state, node):
147 147 """Verify the integrity of the given revlog ``node`` while providing a hook
148 148 point for extensions to influence the operation."""
149 149 if skipflags:
150 150 state[b'skipread'].add(node)
151 151 else:
152 152 # Side-effect: read content and verify hash.
153 153 rl.revision(node)
154 154
155 155
156 156 # True if a fast implementation for persistent-nodemap is available
157 157 #
158 158 # We also consider we have a "fast" implementation in "pure" python because
159 159 # people using pure don't really have performance consideration (and a
160 160 # wheelbarrow of other slowness source)
161 161 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
162 162 parsers, 'BaseIndexObject'
163 163 )
164 164
165 165
166 166 @attr.s(slots=True, frozen=True)
167 167 class _revisioninfo(object):
168 168 """Information about a revision that allows building its fulltext
169 169 node: expected hash of the revision
170 170 p1, p2: parent revs of the revision
171 171 btext: built text cache consisting of a one-element list
172 172 cachedelta: (baserev, uncompressed_delta) or None
173 173 flags: flags associated to the revision storage
174 174
175 175 One of btext[0] or cachedelta must be set.
176 176 """
177 177
178 178 node = attr.ib()
179 179 p1 = attr.ib()
180 180 p2 = attr.ib()
181 181 btext = attr.ib()
182 182 textlen = attr.ib()
183 183 cachedelta = attr.ib()
184 184 flags = attr.ib()
185 185
186 186
187 187 @interfaceutil.implementer(repository.irevisiondelta)
188 188 @attr.s(slots=True)
189 189 class revlogrevisiondelta(object):
190 190 node = attr.ib()
191 191 p1node = attr.ib()
192 192 p2node = attr.ib()
193 193 basenode = attr.ib()
194 194 flags = attr.ib()
195 195 baserevisionsize = attr.ib()
196 196 revision = attr.ib()
197 197 delta = attr.ib()
198 198 sidedata = attr.ib()
199 199 linknode = attr.ib(default=None)
200 200
201 201
202 202 @interfaceutil.implementer(repository.iverifyproblem)
203 203 @attr.s(frozen=True)
204 204 class revlogproblem(object):
205 205 warning = attr.ib(default=None)
206 206 error = attr.ib(default=None)
207 207 node = attr.ib(default=None)
208 208
209 209
210 210 def parse_index_v1(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline)
213 213 return index, cache
214 214
215 215
216 216 def parse_index_v2(data, inline):
217 217 # call the C implementation to parse the index data
218 218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
219 219 return index, cache
220 220
221 221
222 222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
223 223
224 224 def parse_index_v1_nodemap(data, inline):
225 225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 226 return index, cache
227 227
228 228
229 229 else:
230 230 parse_index_v1_nodemap = None
231 231
232 232
233 233 def parse_index_v1_mixed(data, inline):
234 234 index, cache = parse_index_v1(data, inline)
235 235 return rustrevlog.MixedIndex(index), cache
236 236
237 237
238 238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 239 # signed integer)
240 240 _maxentrysize = 0x7FFFFFFF
241 241
242 242
243 243 class revlog(object):
244 244 """
245 245 the underlying revision storage object
246 246
247 247 A revlog consists of two parts, an index and the revision data.
248 248
249 249 The index is a file with a fixed record size containing
250 250 information on each revision, including its nodeid (hash), the
251 251 nodeids of its parents, the position and offset of its data within
252 252 the data file, and the revision it's based on. Finally, each entry
253 253 contains a linkrev entry that can serve as a pointer to external
254 254 data.
255 255
256 256 The revision data itself is a linear collection of data chunks.
257 257 Each chunk represents a revision and is usually represented as a
258 258 delta against the previous chunk. To bound lookup time, runs of
259 259 deltas are limited to about 2 times the length of the original
260 260 version data. This makes retrieval of a version proportional to
261 261 its size, or O(1) relative to the number of revisions.
262 262
263 263 Both pieces of the revlog are written to in an append-only
264 264 fashion, which means we never need to rewrite a file to insert or
265 265 remove data, and can use some simple techniques to avoid the need
266 266 for locking while reading.
267 267
268 268 If checkambig, indexfile is opened with checkambig=True at
269 269 writing, to avoid file stat ambiguity.
270 270
271 271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 272 index will be mmapped rather than read if it is larger than the
273 273 configured threshold.
274 274
275 275 If censorable is True, the revlog can have censored revisions.
276 276
277 277 If `upperboundcomp` is not None, this is the expected maximal gain from
278 278 compression for the data content.
279 279
280 280 `concurrencychecker` is an optional function that receives 3 arguments: a
281 281 file handle, a filename, and an expected position. It should check whether
282 282 the current position in the file handle is valid, and log/warn/fail (by
283 283 raising).
284 284 """
285 285
286 286 _flagserrorclass = error.RevlogError
287 287
288 288 def __init__(
289 289 self,
290 290 opener,
291 291 target,
292 292 indexfile=None,
293 293 datafile=None,
294 294 checkambig=False,
295 295 mmaplargeindex=False,
296 296 censorable=False,
297 297 upperboundcomp=None,
298 298 persistentnodemap=False,
299 299 concurrencychecker=None,
300 300 ):
301 301 """
302 302 create a revlog object
303 303
304 304 opener is a function that abstracts the file opening operation
305 305 and can be used to implement COW semantics or the like.
306 306
307 307 `target`: a (KIND, ID) tuple that identify the content stored in
308 308 this revlog. It help the rest of the code to understand what the revlog
309 309 is about without having to resort to heuristic and index filename
310 310 analysis. Note: that this must be reliably be set by normal code, but
311 311 that test, debug, or performance measurement code might not set this to
312 312 accurate value.
313 313 """
314 314 self.upperboundcomp = upperboundcomp
315 315 self.indexfile = indexfile
316 316 self.datafile = datafile or (indexfile[:-2] + b".d")
317 317 self.nodemap_file = None
318 318 if persistentnodemap:
319 319 self.nodemap_file = nodemaputil.get_nodemap_file(
320 320 opener, self.indexfile
321 321 )
322 322
323 323 self.opener = opener
324 324 assert target[0] in ALL_KINDS
325 325 assert len(target) == 2
326 326 self.target = target
327 327 # When True, indexfile is opened with checkambig=True at writing, to
328 328 # avoid file stat ambiguity.
329 329 self._checkambig = checkambig
330 330 self._mmaplargeindex = mmaplargeindex
331 331 self._censorable = censorable
332 332 # 3-tuple of (node, rev, text) for a raw revision.
333 333 self._revisioncache = None
334 334 # Maps rev to chain base rev.
335 335 self._chainbasecache = util.lrucachedict(100)
336 336 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
337 337 self._chunkcache = (0, b'')
338 338 # How much data to read and cache into the raw revlog data cache.
339 339 self._chunkcachesize = 65536
340 340 self._maxchainlen = None
341 341 self._deltabothparents = True
342 342 self.index = None
343 343 self._nodemap_docket = None
344 344 # Mapping of partial identifiers to full nodes.
345 345 self._pcache = {}
346 346 # Mapping of revision integer to full node.
347 347 self._compengine = b'zlib'
348 348 self._compengineopts = {}
349 349 self._maxdeltachainspan = -1
350 350 self._withsparseread = False
351 351 self._sparserevlog = False
352 352 self._srdensitythreshold = 0.50
353 353 self._srmingapsize = 262144
354 354
355 355 # Make copy of flag processors so each revlog instance can support
356 356 # custom flags.
357 357 self._flagprocessors = dict(flagutil.flagprocessors)
358 358
359 359 # 2-tuple of file handles being used for active writing.
360 360 self._writinghandles = None
361 361
362 362 self._loadindex()
363 363
364 364 self._concurrencychecker = concurrencychecker
365 365
366 366 def _loadindex(self):
367 367 mmapindexthreshold = None
368 368 opts = self.opener.options
369 369
370 370 if b'revlogv2' in opts:
371 371 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
372 372 elif b'revlogv1' in opts:
373 373 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
374 374 if b'generaldelta' in opts:
375 375 newversionflags |= FLAG_GENERALDELTA
376 376 elif b'revlogv0' in self.opener.options:
377 377 newversionflags = REVLOGV0
378 378 else:
379 379 newversionflags = REVLOG_DEFAULT_VERSION
380 380
381 381 if b'chunkcachesize' in opts:
382 382 self._chunkcachesize = opts[b'chunkcachesize']
383 383 if b'maxchainlen' in opts:
384 384 self._maxchainlen = opts[b'maxchainlen']
385 385 if b'deltabothparents' in opts:
386 386 self._deltabothparents = opts[b'deltabothparents']
387 387 self._lazydelta = bool(opts.get(b'lazydelta', True))
388 388 self._lazydeltabase = False
389 389 if self._lazydelta:
390 390 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
391 391 if b'compengine' in opts:
392 392 self._compengine = opts[b'compengine']
393 393 if b'zlib.level' in opts:
394 394 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
395 395 if b'zstd.level' in opts:
396 396 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
397 397 if b'maxdeltachainspan' in opts:
398 398 self._maxdeltachainspan = opts[b'maxdeltachainspan']
399 399 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
400 400 mmapindexthreshold = opts[b'mmapindexthreshold']
401 401 self.hassidedata = bool(opts.get(b'side-data', False))
402 402 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
403 403 withsparseread = bool(opts.get(b'with-sparse-read', False))
404 404 # sparse-revlog forces sparse-read
405 405 self._withsparseread = self._sparserevlog or withsparseread
406 406 if b'sparse-read-density-threshold' in opts:
407 407 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
408 408 if b'sparse-read-min-gap-size' in opts:
409 409 self._srmingapsize = opts[b'sparse-read-min-gap-size']
410 410 if opts.get(b'enableellipsis'):
411 411 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
412 412
413 413 # revlog v0 doesn't have flag processors
414 414 for flag, processor in pycompat.iteritems(
415 415 opts.get(b'flagprocessors', {})
416 416 ):
417 417 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
418 418
419 419 if self._chunkcachesize <= 0:
420 420 raise error.RevlogError(
421 421 _(b'revlog chunk cache size %r is not greater than 0')
422 422 % self._chunkcachesize
423 423 )
424 424 elif self._chunkcachesize & (self._chunkcachesize - 1):
425 425 raise error.RevlogError(
426 426 _(b'revlog chunk cache size %r is not a power of 2')
427 427 % self._chunkcachesize
428 428 )
429 429
430 430 indexdata = b''
431 431 self._initempty = True
432 432 try:
433 433 with self._indexfp() as f:
434 434 if (
435 435 mmapindexthreshold is not None
436 436 and self.opener.fstat(f).st_size >= mmapindexthreshold
437 437 ):
438 438 # TODO: should .close() to release resources without
439 439 # relying on Python GC
440 440 indexdata = util.buffer(util.mmapread(f))
441 441 else:
442 442 indexdata = f.read()
443 443 if len(indexdata) > 0:
444 444 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
445 445 self._initempty = False
446 446 else:
447 447 versionflags = newversionflags
448 448 except IOError as inst:
449 449 if inst.errno != errno.ENOENT:
450 450 raise
451 451
452 452 versionflags = newversionflags
453 453
454 454 self.version = versionflags
455 455
456 456 flags = versionflags & ~0xFFFF
457 457 fmt = versionflags & 0xFFFF
458 458
459 459 if fmt == REVLOGV0:
460 460 if flags:
461 461 raise error.RevlogError(
462 462 _(b'unknown flags (%#04x) in version %d revlog %s')
463 463 % (flags >> 16, fmt, self.indexfile)
464 464 )
465 465
466 466 self._inline = False
467 467 self._generaldelta = False
468 468
469 469 elif fmt == REVLOGV1:
470 470 if flags & ~REVLOGV1_FLAGS:
471 471 raise error.RevlogError(
472 472 _(b'unknown flags (%#04x) in version %d revlog %s')
473 473 % (flags >> 16, fmt, self.indexfile)
474 474 )
475 475
476 476 self._inline = versionflags & FLAG_INLINE_DATA
477 477 self._generaldelta = versionflags & FLAG_GENERALDELTA
478 478
479 479 elif fmt == REVLOGV2:
480 480 if flags & ~REVLOGV2_FLAGS:
481 481 raise error.RevlogError(
482 482 _(b'unknown flags (%#04x) in version %d revlog %s')
483 483 % (flags >> 16, fmt, self.indexfile)
484 484 )
485 485
486 486 # There is a bug in the transaction handling when going from an
487 487 # inline revlog to a separate index and data file. Turn it off until
488 488 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
489 489 # See issue6485
490 490 self._inline = False
491 491 # generaldelta implied by version 2 revlogs.
492 492 self._generaldelta = True
493 493
494 494 else:
495 495 raise error.RevlogError(
496 496 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
497 497 )
498 498
499 499 self.nodeconstants = sha1nodeconstants
500 500 self.nullid = self.nodeconstants.nullid
501 501
502 502 # sparse-revlog can't be on without general-delta (issue6056)
503 503 if not self._generaldelta:
504 504 self._sparserevlog = False
505 505
506 506 self._storedeltachains = True
507 507
508 508 devel_nodemap = (
509 509 self.nodemap_file
510 510 and opts.get(b'devel-force-nodemap', False)
511 511 and parse_index_v1_nodemap is not None
512 512 )
513 513
514 514 use_rust_index = False
515 515 if rustrevlog is not None:
516 516 if self.nodemap_file is not None:
517 517 use_rust_index = True
518 518 else:
519 519 use_rust_index = self.opener.options.get(b'rust.index')
520 520
521 521 self._parse_index = parse_index_v1
522 522 if self.version == REVLOGV0:
523 523 self._parse_index = revlogv0.parse_index_v0
524 524 elif fmt == REVLOGV2:
525 525 self._parse_index = parse_index_v2
526 526 elif devel_nodemap:
527 527 self._parse_index = parse_index_v1_nodemap
528 528 elif use_rust_index:
529 529 self._parse_index = parse_index_v1_mixed
530 530 try:
531 531 d = self._parse_index(indexdata, self._inline)
532 532 index, _chunkcache = d
533 533 use_nodemap = (
534 534 not self._inline
535 535 and self.nodemap_file is not None
536 536 and util.safehasattr(index, 'update_nodemap_data')
537 537 )
538 538 if use_nodemap:
539 539 nodemap_data = nodemaputil.persisted_data(self)
540 540 if nodemap_data is not None:
541 541 docket = nodemap_data[0]
542 542 if (
543 543 len(d[0]) > docket.tip_rev
544 544 and d[0][docket.tip_rev][7] == docket.tip_node
545 545 ):
546 546 # no changelog tampering
547 547 self._nodemap_docket = docket
548 548 index.update_nodemap_data(*nodemap_data)
549 549 except (ValueError, IndexError):
550 550 raise error.RevlogError(
551 551 _(b"index %s is corrupted") % self.indexfile
552 552 )
553 553 self.index, self._chunkcache = d
554 554 if not self._chunkcache:
555 555 self._chunkclear()
556 556 # revnum -> (chain-length, sum-delta-length)
557 557 self._chaininfocache = util.lrucachedict(500)
558 558 # revlog header -> revlog compressor
559 559 self._decompressors = {}
560 560
561 561 @util.propertycache
562 def revlog_kind(self):
563 return self.target[0]
564
565 @util.propertycache
562 566 def _compressor(self):
563 567 engine = util.compengines[self._compengine]
564 568 return engine.revlogcompressor(self._compengineopts)
565 569
566 570 def _indexfp(self, mode=b'r'):
567 571 """file object for the revlog's index file"""
568 572 args = {'mode': mode}
569 573 if mode != b'r':
570 574 args['checkambig'] = self._checkambig
571 575 if mode == b'w':
572 576 args['atomictemp'] = True
573 577 return self.opener(self.indexfile, **args)
574 578
575 579 def _datafp(self, mode=b'r'):
576 580 """file object for the revlog's data file"""
577 581 return self.opener(self.datafile, mode=mode)
578 582
579 583 @contextlib.contextmanager
580 584 def _datareadfp(self, existingfp=None):
581 585 """file object suitable to read data"""
582 586 # Use explicit file handle, if given.
583 587 if existingfp is not None:
584 588 yield existingfp
585 589
586 590 # Use a file handle being actively used for writes, if available.
587 591 # There is some danger to doing this because reads will seek the
588 592 # file. However, _writeentry() performs a SEEK_END before all writes,
589 593 # so we should be safe.
590 594 elif self._writinghandles:
591 595 if self._inline:
592 596 yield self._writinghandles[0]
593 597 else:
594 598 yield self._writinghandles[1]
595 599
596 600 # Otherwise open a new file handle.
597 601 else:
598 602 if self._inline:
599 603 func = self._indexfp
600 604 else:
601 605 func = self._datafp
602 606 with func() as fp:
603 607 yield fp
604 608
605 609 def tiprev(self):
606 610 return len(self.index) - 1
607 611
608 612 def tip(self):
609 613 return self.node(self.tiprev())
610 614
611 615 def __contains__(self, rev):
612 616 return 0 <= rev < len(self)
613 617
614 618 def __len__(self):
615 619 return len(self.index)
616 620
617 621 def __iter__(self):
618 622 return iter(pycompat.xrange(len(self)))
619 623
620 624 def revs(self, start=0, stop=None):
621 625 """iterate over all rev in this revlog (from start to stop)"""
622 626 return storageutil.iterrevs(len(self), start=start, stop=stop)
623 627
624 628 @property
625 629 def nodemap(self):
626 630 msg = (
627 631 b"revlog.nodemap is deprecated, "
628 632 b"use revlog.index.[has_node|rev|get_rev]"
629 633 )
630 634 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
631 635 return self.index.nodemap
632 636
633 637 @property
634 638 def _nodecache(self):
635 639 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
636 640 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
637 641 return self.index.nodemap
638 642
639 643 def hasnode(self, node):
640 644 try:
641 645 self.rev(node)
642 646 return True
643 647 except KeyError:
644 648 return False
645 649
646 650 def candelta(self, baserev, rev):
647 651 """whether two revisions (baserev, rev) can be delta-ed or not"""
648 652 # Disable delta if either rev requires a content-changing flag
649 653 # processor (ex. LFS). This is because such flag processor can alter
650 654 # the rawtext content that the delta will be based on, and two clients
651 655 # could have a same revlog node with different flags (i.e. different
652 656 # rawtext contents) and the delta could be incompatible.
653 657 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
654 658 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
655 659 ):
656 660 return False
657 661 return True
658 662
659 663 def update_caches(self, transaction):
660 664 if self.nodemap_file is not None:
661 665 if transaction is None:
662 666 nodemaputil.update_persistent_nodemap(self)
663 667 else:
664 668 nodemaputil.setup_persistent_nodemap(transaction, self)
665 669
666 670 def clearcaches(self):
667 671 self._revisioncache = None
668 672 self._chainbasecache.clear()
669 673 self._chunkcache = (0, b'')
670 674 self._pcache = {}
671 675 self._nodemap_docket = None
672 676 self.index.clearcaches()
673 677 # The python code is the one responsible for validating the docket, we
674 678 # end up having to refresh it here.
675 679 use_nodemap = (
676 680 not self._inline
677 681 and self.nodemap_file is not None
678 682 and util.safehasattr(self.index, 'update_nodemap_data')
679 683 )
680 684 if use_nodemap:
681 685 nodemap_data = nodemaputil.persisted_data(self)
682 686 if nodemap_data is not None:
683 687 self._nodemap_docket = nodemap_data[0]
684 688 self.index.update_nodemap_data(*nodemap_data)
685 689
686 690 def rev(self, node):
687 691 try:
688 692 return self.index.rev(node)
689 693 except TypeError:
690 694 raise
691 695 except error.RevlogError:
692 696 # parsers.c radix tree lookup failed
693 697 if (
694 698 node == self.nodeconstants.wdirid
695 699 or node in self.nodeconstants.wdirfilenodeids
696 700 ):
697 701 raise error.WdirUnsupported
698 702 raise error.LookupError(node, self.indexfile, _(b'no node'))
699 703
700 704 # Accessors for index entries.
701 705
702 706 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
703 707 # are flags.
704 708 def start(self, rev):
705 709 return int(self.index[rev][0] >> 16)
706 710
707 711 def flags(self, rev):
708 712 return self.index[rev][0] & 0xFFFF
709 713
710 714 def length(self, rev):
711 715 return self.index[rev][1]
712 716
713 717 def sidedata_length(self, rev):
714 718 if self.version & 0xFFFF != REVLOGV2:
715 719 return 0
716 720 return self.index[rev][9]
717 721
718 722 def rawsize(self, rev):
719 723 """return the length of the uncompressed text for a given revision"""
720 724 l = self.index[rev][2]
721 725 if l >= 0:
722 726 return l
723 727
724 728 t = self.rawdata(rev)
725 729 return len(t)
726 730
727 731 def size(self, rev):
728 732 """length of non-raw text (processed by a "read" flag processor)"""
729 733 # fast path: if no "read" flag processor could change the content,
730 734 # size is rawsize. note: ELLIPSIS is known to not change the content.
731 735 flags = self.flags(rev)
732 736 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
733 737 return self.rawsize(rev)
734 738
735 739 return len(self.revision(rev, raw=False))
736 740
737 741 def chainbase(self, rev):
738 742 base = self._chainbasecache.get(rev)
739 743 if base is not None:
740 744 return base
741 745
742 746 index = self.index
743 747 iterrev = rev
744 748 base = index[iterrev][3]
745 749 while base != iterrev:
746 750 iterrev = base
747 751 base = index[iterrev][3]
748 752
749 753 self._chainbasecache[rev] = base
750 754 return base
751 755
752 756 def linkrev(self, rev):
753 757 return self.index[rev][4]
754 758
755 759 def parentrevs(self, rev):
756 760 try:
757 761 entry = self.index[rev]
758 762 except IndexError:
759 763 if rev == wdirrev:
760 764 raise error.WdirUnsupported
761 765 raise
762 766 if entry[5] == nullrev:
763 767 return entry[6], entry[5]
764 768 else:
765 769 return entry[5], entry[6]
766 770
767 771 # fast parentrevs(rev) where rev isn't filtered
768 772 _uncheckedparentrevs = parentrevs
769 773
770 774 def node(self, rev):
771 775 try:
772 776 return self.index[rev][7]
773 777 except IndexError:
774 778 if rev == wdirrev:
775 779 raise error.WdirUnsupported
776 780 raise
777 781
778 782 # Derived from index values.
779 783
780 784 def end(self, rev):
781 785 return self.start(rev) + self.length(rev)
782 786
783 787 def parents(self, node):
784 788 i = self.index
785 789 d = i[self.rev(node)]
786 790 # inline node() to avoid function call overhead
787 791 if d[5] == self.nullid:
788 792 return i[d[6]][7], i[d[5]][7]
789 793 else:
790 794 return i[d[5]][7], i[d[6]][7]
791 795
792 796 def chainlen(self, rev):
793 797 return self._chaininfo(rev)[0]
794 798
795 799 def _chaininfo(self, rev):
796 800 chaininfocache = self._chaininfocache
797 801 if rev in chaininfocache:
798 802 return chaininfocache[rev]
799 803 index = self.index
800 804 generaldelta = self._generaldelta
801 805 iterrev = rev
802 806 e = index[iterrev]
803 807 clen = 0
804 808 compresseddeltalen = 0
805 809 while iterrev != e[3]:
806 810 clen += 1
807 811 compresseddeltalen += e[1]
808 812 if generaldelta:
809 813 iterrev = e[3]
810 814 else:
811 815 iterrev -= 1
812 816 if iterrev in chaininfocache:
813 817 t = chaininfocache[iterrev]
814 818 clen += t[0]
815 819 compresseddeltalen += t[1]
816 820 break
817 821 e = index[iterrev]
818 822 else:
819 823 # Add text length of base since decompressing that also takes
820 824 # work. For cache hits the length is already included.
821 825 compresseddeltalen += e[1]
822 826 r = (clen, compresseddeltalen)
823 827 chaininfocache[rev] = r
824 828 return r
825 829
826 830 def _deltachain(self, rev, stoprev=None):
827 831 """Obtain the delta chain for a revision.
828 832
829 833 ``stoprev`` specifies a revision to stop at. If not specified, we
830 834 stop at the base of the chain.
831 835
832 836 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
833 837 revs in ascending order and ``stopped`` is a bool indicating whether
834 838 ``stoprev`` was hit.
835 839 """
836 840 # Try C implementation.
837 841 try:
838 842 return self.index.deltachain(rev, stoprev, self._generaldelta)
839 843 except AttributeError:
840 844 pass
841 845
842 846 chain = []
843 847
844 848 # Alias to prevent attribute lookup in tight loop.
845 849 index = self.index
846 850 generaldelta = self._generaldelta
847 851
848 852 iterrev = rev
849 853 e = index[iterrev]
850 854 while iterrev != e[3] and iterrev != stoprev:
851 855 chain.append(iterrev)
852 856 if generaldelta:
853 857 iterrev = e[3]
854 858 else:
855 859 iterrev -= 1
856 860 e = index[iterrev]
857 861
858 862 if iterrev == stoprev:
859 863 stopped = True
860 864 else:
861 865 chain.append(iterrev)
862 866 stopped = False
863 867
864 868 chain.reverse()
865 869 return chain, stopped
866 870
867 871 def ancestors(self, revs, stoprev=0, inclusive=False):
868 872 """Generate the ancestors of 'revs' in reverse revision order.
869 873 Does not generate revs lower than stoprev.
870 874
871 875 See the documentation for ancestor.lazyancestors for more details."""
872 876
873 877 # first, make sure start revisions aren't filtered
874 878 revs = list(revs)
875 879 checkrev = self.node
876 880 for r in revs:
877 881 checkrev(r)
878 882 # and we're sure ancestors aren't filtered as well
879 883
880 884 if rustancestor is not None:
881 885 lazyancestors = rustancestor.LazyAncestors
882 886 arg = self.index
883 887 else:
884 888 lazyancestors = ancestor.lazyancestors
885 889 arg = self._uncheckedparentrevs
886 890 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
887 891
888 892 def descendants(self, revs):
889 893 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
890 894
891 895 def findcommonmissing(self, common=None, heads=None):
892 896 """Return a tuple of the ancestors of common and the ancestors of heads
893 897 that are not ancestors of common. In revset terminology, we return the
894 898 tuple:
895 899
896 900 ::common, (::heads) - (::common)
897 901
898 902 The list is sorted by revision number, meaning it is
899 903 topologically sorted.
900 904
901 905 'heads' and 'common' are both lists of node IDs. If heads is
902 906 not supplied, uses all of the revlog's heads. If common is not
903 907 supplied, uses nullid."""
904 908 if common is None:
905 909 common = [self.nullid]
906 910 if heads is None:
907 911 heads = self.heads()
908 912
909 913 common = [self.rev(n) for n in common]
910 914 heads = [self.rev(n) for n in heads]
911 915
912 916 # we want the ancestors, but inclusive
913 917 class lazyset(object):
914 918 def __init__(self, lazyvalues):
915 919 self.addedvalues = set()
916 920 self.lazyvalues = lazyvalues
917 921
918 922 def __contains__(self, value):
919 923 return value in self.addedvalues or value in self.lazyvalues
920 924
921 925 def __iter__(self):
922 926 added = self.addedvalues
923 927 for r in added:
924 928 yield r
925 929 for r in self.lazyvalues:
926 930 if not r in added:
927 931 yield r
928 932
929 933 def add(self, value):
930 934 self.addedvalues.add(value)
931 935
932 936 def update(self, values):
933 937 self.addedvalues.update(values)
934 938
935 939 has = lazyset(self.ancestors(common))
936 940 has.add(nullrev)
937 941 has.update(common)
938 942
939 943 # take all ancestors from heads that aren't in has
940 944 missing = set()
941 945 visit = collections.deque(r for r in heads if r not in has)
942 946 while visit:
943 947 r = visit.popleft()
944 948 if r in missing:
945 949 continue
946 950 else:
947 951 missing.add(r)
948 952 for p in self.parentrevs(r):
949 953 if p not in has:
950 954 visit.append(p)
951 955 missing = list(missing)
952 956 missing.sort()
953 957 return has, [self.node(miss) for miss in missing]
954 958
955 959 def incrementalmissingrevs(self, common=None):
956 960 """Return an object that can be used to incrementally compute the
957 961 revision numbers of the ancestors of arbitrary sets that are not
958 962 ancestors of common. This is an ancestor.incrementalmissingancestors
959 963 object.
960 964
961 965 'common' is a list of revision numbers. If common is not supplied, uses
962 966 nullrev.
963 967 """
964 968 if common is None:
965 969 common = [nullrev]
966 970
967 971 if rustancestor is not None:
968 972 return rustancestor.MissingAncestors(self.index, common)
969 973 return ancestor.incrementalmissingancestors(self.parentrevs, common)
970 974
971 975 def findmissingrevs(self, common=None, heads=None):
972 976 """Return the revision numbers of the ancestors of heads that
973 977 are not ancestors of common.
974 978
975 979 More specifically, return a list of revision numbers corresponding to
976 980 nodes N such that every N satisfies the following constraints:
977 981
978 982 1. N is an ancestor of some node in 'heads'
979 983 2. N is not an ancestor of any node in 'common'
980 984
981 985 The list is sorted by revision number, meaning it is
982 986 topologically sorted.
983 987
984 988 'heads' and 'common' are both lists of revision numbers. If heads is
985 989 not supplied, uses all of the revlog's heads. If common is not
986 990 supplied, uses nullid."""
987 991 if common is None:
988 992 common = [nullrev]
989 993 if heads is None:
990 994 heads = self.headrevs()
991 995
992 996 inc = self.incrementalmissingrevs(common=common)
993 997 return inc.missingancestors(heads)
994 998
995 999 def findmissing(self, common=None, heads=None):
996 1000 """Return the ancestors of heads that are not ancestors of common.
997 1001
998 1002 More specifically, return a list of nodes N such that every N
999 1003 satisfies the following constraints:
1000 1004
1001 1005 1. N is an ancestor of some node in 'heads'
1002 1006 2. N is not an ancestor of any node in 'common'
1003 1007
1004 1008 The list is sorted by revision number, meaning it is
1005 1009 topologically sorted.
1006 1010
1007 1011 'heads' and 'common' are both lists of node IDs. If heads is
1008 1012 not supplied, uses all of the revlog's heads. If common is not
1009 1013 supplied, uses nullid."""
1010 1014 if common is None:
1011 1015 common = [self.nullid]
1012 1016 if heads is None:
1013 1017 heads = self.heads()
1014 1018
1015 1019 common = [self.rev(n) for n in common]
1016 1020 heads = [self.rev(n) for n in heads]
1017 1021
1018 1022 inc = self.incrementalmissingrevs(common=common)
1019 1023 return [self.node(r) for r in inc.missingancestors(heads)]
1020 1024
1021 1025 def nodesbetween(self, roots=None, heads=None):
1022 1026 """Return a topological path from 'roots' to 'heads'.
1023 1027
1024 1028 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1025 1029 topologically sorted list of all nodes N that satisfy both of
1026 1030 these constraints:
1027 1031
1028 1032 1. N is a descendant of some node in 'roots'
1029 1033 2. N is an ancestor of some node in 'heads'
1030 1034
1031 1035 Every node is considered to be both a descendant and an ancestor
1032 1036 of itself, so every reachable node in 'roots' and 'heads' will be
1033 1037 included in 'nodes'.
1034 1038
1035 1039 'outroots' is the list of reachable nodes in 'roots', i.e., the
1036 1040 subset of 'roots' that is returned in 'nodes'. Likewise,
1037 1041 'outheads' is the subset of 'heads' that is also in 'nodes'.
1038 1042
1039 1043 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1040 1044 unspecified, uses nullid as the only root. If 'heads' is
1041 1045 unspecified, uses list of all of the revlog's heads."""
1042 1046 nonodes = ([], [], [])
1043 1047 if roots is not None:
1044 1048 roots = list(roots)
1045 1049 if not roots:
1046 1050 return nonodes
1047 1051 lowestrev = min([self.rev(n) for n in roots])
1048 1052 else:
1049 1053 roots = [self.nullid] # Everybody's a descendant of nullid
1050 1054 lowestrev = nullrev
1051 1055 if (lowestrev == nullrev) and (heads is None):
1052 1056 # We want _all_ the nodes!
1053 1057 return (
1054 1058 [self.node(r) for r in self],
1055 1059 [self.nullid],
1056 1060 list(self.heads()),
1057 1061 )
1058 1062 if heads is None:
1059 1063 # All nodes are ancestors, so the latest ancestor is the last
1060 1064 # node.
1061 1065 highestrev = len(self) - 1
1062 1066 # Set ancestors to None to signal that every node is an ancestor.
1063 1067 ancestors = None
1064 1068 # Set heads to an empty dictionary for later discovery of heads
1065 1069 heads = {}
1066 1070 else:
1067 1071 heads = list(heads)
1068 1072 if not heads:
1069 1073 return nonodes
1070 1074 ancestors = set()
1071 1075 # Turn heads into a dictionary so we can remove 'fake' heads.
1072 1076 # Also, later we will be using it to filter out the heads we can't
1073 1077 # find from roots.
1074 1078 heads = dict.fromkeys(heads, False)
1075 1079 # Start at the top and keep marking parents until we're done.
1076 1080 nodestotag = set(heads)
1077 1081 # Remember where the top was so we can use it as a limit later.
1078 1082 highestrev = max([self.rev(n) for n in nodestotag])
1079 1083 while nodestotag:
1080 1084 # grab a node to tag
1081 1085 n = nodestotag.pop()
1082 1086 # Never tag nullid
1083 1087 if n == self.nullid:
1084 1088 continue
1085 1089 # A node's revision number represents its place in a
1086 1090 # topologically sorted list of nodes.
1087 1091 r = self.rev(n)
1088 1092 if r >= lowestrev:
1089 1093 if n not in ancestors:
1090 1094 # If we are possibly a descendant of one of the roots
1091 1095 # and we haven't already been marked as an ancestor
1092 1096 ancestors.add(n) # Mark as ancestor
1093 1097 # Add non-nullid parents to list of nodes to tag.
1094 1098 nodestotag.update(
1095 1099 [p for p in self.parents(n) if p != self.nullid]
1096 1100 )
1097 1101 elif n in heads: # We've seen it before, is it a fake head?
1098 1102 # So it is, real heads should not be the ancestors of
1099 1103 # any other heads.
1100 1104 heads.pop(n)
1101 1105 if not ancestors:
1102 1106 return nonodes
1103 1107 # Now that we have our set of ancestors, we want to remove any
1104 1108 # roots that are not ancestors.
1105 1109
1106 1110 # If one of the roots was nullid, everything is included anyway.
1107 1111 if lowestrev > nullrev:
1108 1112 # But, since we weren't, let's recompute the lowest rev to not
1109 1113 # include roots that aren't ancestors.
1110 1114
1111 1115 # Filter out roots that aren't ancestors of heads
1112 1116 roots = [root for root in roots if root in ancestors]
1113 1117 # Recompute the lowest revision
1114 1118 if roots:
1115 1119 lowestrev = min([self.rev(root) for root in roots])
1116 1120 else:
1117 1121 # No more roots? Return empty list
1118 1122 return nonodes
1119 1123 else:
1120 1124 # We are descending from nullid, and don't need to care about
1121 1125 # any other roots.
1122 1126 lowestrev = nullrev
1123 1127 roots = [self.nullid]
1124 1128 # Transform our roots list into a set.
1125 1129 descendants = set(roots)
1126 1130 # Also, keep the original roots so we can filter out roots that aren't
1127 1131 # 'real' roots (i.e. are descended from other roots).
1128 1132 roots = descendants.copy()
1129 1133 # Our topologically sorted list of output nodes.
1130 1134 orderedout = []
1131 1135 # Don't start at nullid since we don't want nullid in our output list,
1132 1136 # and if nullid shows up in descendants, empty parents will look like
1133 1137 # they're descendants.
1134 1138 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1135 1139 n = self.node(r)
1136 1140 isdescendant = False
1137 1141 if lowestrev == nullrev: # Everybody is a descendant of nullid
1138 1142 isdescendant = True
1139 1143 elif n in descendants:
1140 1144 # n is already a descendant
1141 1145 isdescendant = True
1142 1146 # This check only needs to be done here because all the roots
1143 1147 # will start being marked is descendants before the loop.
1144 1148 if n in roots:
1145 1149 # If n was a root, check if it's a 'real' root.
1146 1150 p = tuple(self.parents(n))
1147 1151 # If any of its parents are descendants, it's not a root.
1148 1152 if (p[0] in descendants) or (p[1] in descendants):
1149 1153 roots.remove(n)
1150 1154 else:
1151 1155 p = tuple(self.parents(n))
1152 1156 # A node is a descendant if either of its parents are
1153 1157 # descendants. (We seeded the dependents list with the roots
1154 1158 # up there, remember?)
1155 1159 if (p[0] in descendants) or (p[1] in descendants):
1156 1160 descendants.add(n)
1157 1161 isdescendant = True
1158 1162 if isdescendant and ((ancestors is None) or (n in ancestors)):
1159 1163 # Only include nodes that are both descendants and ancestors.
1160 1164 orderedout.append(n)
1161 1165 if (ancestors is not None) and (n in heads):
1162 1166 # We're trying to figure out which heads are reachable
1163 1167 # from roots.
1164 1168 # Mark this head as having been reached
1165 1169 heads[n] = True
1166 1170 elif ancestors is None:
1167 1171 # Otherwise, we're trying to discover the heads.
1168 1172 # Assume this is a head because if it isn't, the next step
1169 1173 # will eventually remove it.
1170 1174 heads[n] = True
1171 1175 # But, obviously its parents aren't.
1172 1176 for p in self.parents(n):
1173 1177 heads.pop(p, None)
1174 1178 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1175 1179 roots = list(roots)
1176 1180 assert orderedout
1177 1181 assert roots
1178 1182 assert heads
1179 1183 return (orderedout, roots, heads)
1180 1184
1181 1185 def headrevs(self, revs=None):
1182 1186 if revs is None:
1183 1187 try:
1184 1188 return self.index.headrevs()
1185 1189 except AttributeError:
1186 1190 return self._headrevs()
1187 1191 if rustdagop is not None:
1188 1192 return rustdagop.headrevs(self.index, revs)
1189 1193 return dagop.headrevs(revs, self._uncheckedparentrevs)
1190 1194
1191 1195 def computephases(self, roots):
1192 1196 return self.index.computephasesmapsets(roots)
1193 1197
1194 1198 def _headrevs(self):
1195 1199 count = len(self)
1196 1200 if not count:
1197 1201 return [nullrev]
1198 1202 # we won't iter over filtered rev so nobody is a head at start
1199 1203 ishead = [0] * (count + 1)
1200 1204 index = self.index
1201 1205 for r in self:
1202 1206 ishead[r] = 1 # I may be an head
1203 1207 e = index[r]
1204 1208 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1205 1209 return [r for r, val in enumerate(ishead) if val]
1206 1210
1207 1211 def heads(self, start=None, stop=None):
1208 1212 """return the list of all nodes that have no children
1209 1213
1210 1214 if start is specified, only heads that are descendants of
1211 1215 start will be returned
1212 1216 if stop is specified, it will consider all the revs from stop
1213 1217 as if they had no children
1214 1218 """
1215 1219 if start is None and stop is None:
1216 1220 if not len(self):
1217 1221 return [self.nullid]
1218 1222 return [self.node(r) for r in self.headrevs()]
1219 1223
1220 1224 if start is None:
1221 1225 start = nullrev
1222 1226 else:
1223 1227 start = self.rev(start)
1224 1228
1225 1229 stoprevs = {self.rev(n) for n in stop or []}
1226 1230
1227 1231 revs = dagop.headrevssubset(
1228 1232 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1229 1233 )
1230 1234
1231 1235 return [self.node(rev) for rev in revs]
1232 1236
1233 1237 def children(self, node):
1234 1238 """find the children of a given node"""
1235 1239 c = []
1236 1240 p = self.rev(node)
1237 1241 for r in self.revs(start=p + 1):
1238 1242 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1239 1243 if prevs:
1240 1244 for pr in prevs:
1241 1245 if pr == p:
1242 1246 c.append(self.node(r))
1243 1247 elif p == nullrev:
1244 1248 c.append(self.node(r))
1245 1249 return c
1246 1250
1247 1251 def commonancestorsheads(self, a, b):
1248 1252 """calculate all the heads of the common ancestors of nodes a and b"""
1249 1253 a, b = self.rev(a), self.rev(b)
1250 1254 ancs = self._commonancestorsheads(a, b)
1251 1255 return pycompat.maplist(self.node, ancs)
1252 1256
1253 1257 def _commonancestorsheads(self, *revs):
1254 1258 """calculate all the heads of the common ancestors of revs"""
1255 1259 try:
1256 1260 ancs = self.index.commonancestorsheads(*revs)
1257 1261 except (AttributeError, OverflowError): # C implementation failed
1258 1262 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1259 1263 return ancs
1260 1264
1261 1265 def isancestor(self, a, b):
1262 1266 """return True if node a is an ancestor of node b
1263 1267
1264 1268 A revision is considered an ancestor of itself."""
1265 1269 a, b = self.rev(a), self.rev(b)
1266 1270 return self.isancestorrev(a, b)
1267 1271
1268 1272 def isancestorrev(self, a, b):
1269 1273 """return True if revision a is an ancestor of revision b
1270 1274
1271 1275 A revision is considered an ancestor of itself.
1272 1276
1273 1277 The implementation of this is trivial but the use of
1274 1278 reachableroots is not."""
1275 1279 if a == nullrev:
1276 1280 return True
1277 1281 elif a == b:
1278 1282 return True
1279 1283 elif a > b:
1280 1284 return False
1281 1285 return bool(self.reachableroots(a, [b], [a], includepath=False))
1282 1286
1283 1287 def reachableroots(self, minroot, heads, roots, includepath=False):
1284 1288 """return (heads(::(<roots> and <roots>::<heads>)))
1285 1289
1286 1290 If includepath is True, return (<roots>::<heads>)."""
1287 1291 try:
1288 1292 return self.index.reachableroots2(
1289 1293 minroot, heads, roots, includepath
1290 1294 )
1291 1295 except AttributeError:
1292 1296 return dagop._reachablerootspure(
1293 1297 self.parentrevs, minroot, roots, heads, includepath
1294 1298 )
1295 1299
1296 1300 def ancestor(self, a, b):
1297 1301 """calculate the "best" common ancestor of nodes a and b"""
1298 1302
1299 1303 a, b = self.rev(a), self.rev(b)
1300 1304 try:
1301 1305 ancs = self.index.ancestors(a, b)
1302 1306 except (AttributeError, OverflowError):
1303 1307 ancs = ancestor.ancestors(self.parentrevs, a, b)
1304 1308 if ancs:
1305 1309 # choose a consistent winner when there's a tie
1306 1310 return min(map(self.node, ancs))
1307 1311 return self.nullid
1308 1312
1309 1313 def _match(self, id):
1310 1314 if isinstance(id, int):
1311 1315 # rev
1312 1316 return self.node(id)
1313 1317 if len(id) == self.nodeconstants.nodelen:
1314 1318 # possibly a binary node
1315 1319 # odds of a binary node being all hex in ASCII are 1 in 10**25
1316 1320 try:
1317 1321 node = id
1318 1322 self.rev(node) # quick search the index
1319 1323 return node
1320 1324 except error.LookupError:
1321 1325 pass # may be partial hex id
1322 1326 try:
1323 1327 # str(rev)
1324 1328 rev = int(id)
1325 1329 if b"%d" % rev != id:
1326 1330 raise ValueError
1327 1331 if rev < 0:
1328 1332 rev = len(self) + rev
1329 1333 if rev < 0 or rev >= len(self):
1330 1334 raise ValueError
1331 1335 return self.node(rev)
1332 1336 except (ValueError, OverflowError):
1333 1337 pass
1334 1338 if len(id) == 2 * self.nodeconstants.nodelen:
1335 1339 try:
1336 1340 # a full hex nodeid?
1337 1341 node = bin(id)
1338 1342 self.rev(node)
1339 1343 return node
1340 1344 except (TypeError, error.LookupError):
1341 1345 pass
1342 1346
1343 1347 def _partialmatch(self, id):
1344 1348 # we don't care wdirfilenodeids as they should be always full hash
1345 1349 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1346 1350 try:
1347 1351 partial = self.index.partialmatch(id)
1348 1352 if partial and self.hasnode(partial):
1349 1353 if maybewdir:
1350 1354 # single 'ff...' match in radix tree, ambiguous with wdir
1351 1355 raise error.RevlogError
1352 1356 return partial
1353 1357 if maybewdir:
1354 1358 # no 'ff...' match in radix tree, wdir identified
1355 1359 raise error.WdirUnsupported
1356 1360 return None
1357 1361 except error.RevlogError:
1358 1362 # parsers.c radix tree lookup gave multiple matches
1359 1363 # fast path: for unfiltered changelog, radix tree is accurate
1360 1364 if not getattr(self, 'filteredrevs', None):
1361 1365 raise error.AmbiguousPrefixLookupError(
1362 1366 id, self.indexfile, _(b'ambiguous identifier')
1363 1367 )
1364 1368 # fall through to slow path that filters hidden revisions
1365 1369 except (AttributeError, ValueError):
1366 1370 # we are pure python, or key was too short to search radix tree
1367 1371 pass
1368 1372
1369 1373 if id in self._pcache:
1370 1374 return self._pcache[id]
1371 1375
1372 1376 if len(id) <= 40:
1373 1377 try:
1374 1378 # hex(node)[:...]
1375 1379 l = len(id) // 2 # grab an even number of digits
1376 1380 prefix = bin(id[: l * 2])
1377 1381 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1378 1382 nl = [
1379 1383 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1380 1384 ]
1381 1385 if self.nodeconstants.nullhex.startswith(id):
1382 1386 nl.append(self.nullid)
1383 1387 if len(nl) > 0:
1384 1388 if len(nl) == 1 and not maybewdir:
1385 1389 self._pcache[id] = nl[0]
1386 1390 return nl[0]
1387 1391 raise error.AmbiguousPrefixLookupError(
1388 1392 id, self.indexfile, _(b'ambiguous identifier')
1389 1393 )
1390 1394 if maybewdir:
1391 1395 raise error.WdirUnsupported
1392 1396 return None
1393 1397 except TypeError:
1394 1398 pass
1395 1399
1396 1400 def lookup(self, id):
1397 1401 """locate a node based on:
1398 1402 - revision number or str(revision number)
1399 1403 - nodeid or subset of hex nodeid
1400 1404 """
1401 1405 n = self._match(id)
1402 1406 if n is not None:
1403 1407 return n
1404 1408 n = self._partialmatch(id)
1405 1409 if n:
1406 1410 return n
1407 1411
1408 1412 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1409 1413
1410 1414 def shortest(self, node, minlength=1):
1411 1415 """Find the shortest unambiguous prefix that matches node."""
1412 1416
1413 1417 def isvalid(prefix):
1414 1418 try:
1415 1419 matchednode = self._partialmatch(prefix)
1416 1420 except error.AmbiguousPrefixLookupError:
1417 1421 return False
1418 1422 except error.WdirUnsupported:
1419 1423 # single 'ff...' match
1420 1424 return True
1421 1425 if matchednode is None:
1422 1426 raise error.LookupError(node, self.indexfile, _(b'no node'))
1423 1427 return True
1424 1428
1425 1429 def maybewdir(prefix):
1426 1430 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1427 1431
1428 1432 hexnode = hex(node)
1429 1433
1430 1434 def disambiguate(hexnode, minlength):
1431 1435 """Disambiguate against wdirid."""
1432 1436 for length in range(minlength, len(hexnode) + 1):
1433 1437 prefix = hexnode[:length]
1434 1438 if not maybewdir(prefix):
1435 1439 return prefix
1436 1440
1437 1441 if not getattr(self, 'filteredrevs', None):
1438 1442 try:
1439 1443 length = max(self.index.shortest(node), minlength)
1440 1444 return disambiguate(hexnode, length)
1441 1445 except error.RevlogError:
1442 1446 if node != self.nodeconstants.wdirid:
1443 1447 raise error.LookupError(node, self.indexfile, _(b'no node'))
1444 1448 except AttributeError:
1445 1449 # Fall through to pure code
1446 1450 pass
1447 1451
1448 1452 if node == self.nodeconstants.wdirid:
1449 1453 for length in range(minlength, len(hexnode) + 1):
1450 1454 prefix = hexnode[:length]
1451 1455 if isvalid(prefix):
1452 1456 return prefix
1453 1457
1454 1458 for length in range(minlength, len(hexnode) + 1):
1455 1459 prefix = hexnode[:length]
1456 1460 if isvalid(prefix):
1457 1461 return disambiguate(hexnode, length)
1458 1462
1459 1463 def cmp(self, node, text):
1460 1464 """compare text with a given file revision
1461 1465
1462 1466 returns True if text is different than what is stored.
1463 1467 """
1464 1468 p1, p2 = self.parents(node)
1465 1469 return storageutil.hashrevisionsha1(text, p1, p2) != node
1466 1470
1467 1471 def _cachesegment(self, offset, data):
1468 1472 """Add a segment to the revlog cache.
1469 1473
1470 1474 Accepts an absolute offset and the data that is at that location.
1471 1475 """
1472 1476 o, d = self._chunkcache
1473 1477 # try to add to existing cache
1474 1478 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1475 1479 self._chunkcache = o, d + data
1476 1480 else:
1477 1481 self._chunkcache = offset, data
1478 1482
1479 1483 def _readsegment(self, offset, length, df=None):
1480 1484 """Load a segment of raw data from the revlog.
1481 1485
1482 1486 Accepts an absolute offset, length to read, and an optional existing
1483 1487 file handle to read from.
1484 1488
1485 1489 If an existing file handle is passed, it will be seeked and the
1486 1490 original seek position will NOT be restored.
1487 1491
1488 1492 Returns a str or buffer of raw byte data.
1489 1493
1490 1494 Raises if the requested number of bytes could not be read.
1491 1495 """
1492 1496 # Cache data both forward and backward around the requested
1493 1497 # data, in a fixed size window. This helps speed up operations
1494 1498 # involving reading the revlog backwards.
1495 1499 cachesize = self._chunkcachesize
1496 1500 realoffset = offset & ~(cachesize - 1)
1497 1501 reallength = (
1498 1502 (offset + length + cachesize) & ~(cachesize - 1)
1499 1503 ) - realoffset
1500 1504 with self._datareadfp(df) as df:
1501 1505 df.seek(realoffset)
1502 1506 d = df.read(reallength)
1503 1507
1504 1508 self._cachesegment(realoffset, d)
1505 1509 if offset != realoffset or reallength != length:
1506 1510 startoffset = offset - realoffset
1507 1511 if len(d) - startoffset < length:
1508 1512 raise error.RevlogError(
1509 1513 _(
1510 1514 b'partial read of revlog %s; expected %d bytes from '
1511 1515 b'offset %d, got %d'
1512 1516 )
1513 1517 % (
1514 1518 self.indexfile if self._inline else self.datafile,
1515 1519 length,
1516 1520 realoffset,
1517 1521 len(d) - startoffset,
1518 1522 )
1519 1523 )
1520 1524
1521 1525 return util.buffer(d, startoffset, length)
1522 1526
1523 1527 if len(d) < length:
1524 1528 raise error.RevlogError(
1525 1529 _(
1526 1530 b'partial read of revlog %s; expected %d bytes from offset '
1527 1531 b'%d, got %d'
1528 1532 )
1529 1533 % (
1530 1534 self.indexfile if self._inline else self.datafile,
1531 1535 length,
1532 1536 offset,
1533 1537 len(d),
1534 1538 )
1535 1539 )
1536 1540
1537 1541 return d
1538 1542
1539 1543 def _getsegment(self, offset, length, df=None):
1540 1544 """Obtain a segment of raw data from the revlog.
1541 1545
1542 1546 Accepts an absolute offset, length of bytes to obtain, and an
1543 1547 optional file handle to the already-opened revlog. If the file
1544 1548 handle is used, it's original seek position will not be preserved.
1545 1549
1546 1550 Requests for data may be returned from a cache.
1547 1551
1548 1552 Returns a str or a buffer instance of raw byte data.
1549 1553 """
1550 1554 o, d = self._chunkcache
1551 1555 l = len(d)
1552 1556
1553 1557 # is it in the cache?
1554 1558 cachestart = offset - o
1555 1559 cacheend = cachestart + length
1556 1560 if cachestart >= 0 and cacheend <= l:
1557 1561 if cachestart == 0 and cacheend == l:
1558 1562 return d # avoid a copy
1559 1563 return util.buffer(d, cachestart, cacheend - cachestart)
1560 1564
1561 1565 return self._readsegment(offset, length, df=df)
1562 1566
1563 1567 def _getsegmentforrevs(self, startrev, endrev, df=None):
1564 1568 """Obtain a segment of raw data corresponding to a range of revisions.
1565 1569
1566 1570 Accepts the start and end revisions and an optional already-open
1567 1571 file handle to be used for reading. If the file handle is read, its
1568 1572 seek position will not be preserved.
1569 1573
1570 1574 Requests for data may be satisfied by a cache.
1571 1575
1572 1576 Returns a 2-tuple of (offset, data) for the requested range of
1573 1577 revisions. Offset is the integer offset from the beginning of the
1574 1578 revlog and data is a str or buffer of the raw byte data.
1575 1579
1576 1580 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1577 1581 to determine where each revision's data begins and ends.
1578 1582 """
1579 1583 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1580 1584 # (functions are expensive).
1581 1585 index = self.index
1582 1586 istart = index[startrev]
1583 1587 start = int(istart[0] >> 16)
1584 1588 if startrev == endrev:
1585 1589 end = start + istart[1]
1586 1590 else:
1587 1591 iend = index[endrev]
1588 1592 end = int(iend[0] >> 16) + iend[1]
1589 1593
1590 1594 if self._inline:
1591 1595 start += (startrev + 1) * self.index.entry_size
1592 1596 end += (endrev + 1) * self.index.entry_size
1593 1597 length = end - start
1594 1598
1595 1599 return start, self._getsegment(start, length, df=df)
1596 1600
1597 1601 def _chunk(self, rev, df=None):
1598 1602 """Obtain a single decompressed chunk for a revision.
1599 1603
1600 1604 Accepts an integer revision and an optional already-open file handle
1601 1605 to be used for reading. If used, the seek position of the file will not
1602 1606 be preserved.
1603 1607
1604 1608 Returns a str holding uncompressed data for the requested revision.
1605 1609 """
1606 1610 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1607 1611
1608 1612 def _chunks(self, revs, df=None, targetsize=None):
1609 1613 """Obtain decompressed chunks for the specified revisions.
1610 1614
1611 1615 Accepts an iterable of numeric revisions that are assumed to be in
1612 1616 ascending order. Also accepts an optional already-open file handle
1613 1617 to be used for reading. If used, the seek position of the file will
1614 1618 not be preserved.
1615 1619
1616 1620 This function is similar to calling ``self._chunk()`` multiple times,
1617 1621 but is faster.
1618 1622
1619 1623 Returns a list with decompressed data for each requested revision.
1620 1624 """
1621 1625 if not revs:
1622 1626 return []
1623 1627 start = self.start
1624 1628 length = self.length
1625 1629 inline = self._inline
1626 1630 iosize = self.index.entry_size
1627 1631 buffer = util.buffer
1628 1632
1629 1633 l = []
1630 1634 ladd = l.append
1631 1635
1632 1636 if not self._withsparseread:
1633 1637 slicedchunks = (revs,)
1634 1638 else:
1635 1639 slicedchunks = deltautil.slicechunk(
1636 1640 self, revs, targetsize=targetsize
1637 1641 )
1638 1642
1639 1643 for revschunk in slicedchunks:
1640 1644 firstrev = revschunk[0]
1641 1645 # Skip trailing revisions with empty diff
1642 1646 for lastrev in revschunk[::-1]:
1643 1647 if length(lastrev) != 0:
1644 1648 break
1645 1649
1646 1650 try:
1647 1651 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1648 1652 except OverflowError:
1649 1653 # issue4215 - we can't cache a run of chunks greater than
1650 1654 # 2G on Windows
1651 1655 return [self._chunk(rev, df=df) for rev in revschunk]
1652 1656
1653 1657 decomp = self.decompress
1654 1658 for rev in revschunk:
1655 1659 chunkstart = start(rev)
1656 1660 if inline:
1657 1661 chunkstart += (rev + 1) * iosize
1658 1662 chunklength = length(rev)
1659 1663 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1660 1664
1661 1665 return l
1662 1666
1663 1667 def _chunkclear(self):
1664 1668 """Clear the raw chunk cache."""
1665 1669 self._chunkcache = (0, b'')
1666 1670
1667 1671 def deltaparent(self, rev):
1668 1672 """return deltaparent of the given revision"""
1669 1673 base = self.index[rev][3]
1670 1674 if base == rev:
1671 1675 return nullrev
1672 1676 elif self._generaldelta:
1673 1677 return base
1674 1678 else:
1675 1679 return rev - 1
1676 1680
1677 1681 def issnapshot(self, rev):
1678 1682 """tells whether rev is a snapshot"""
1679 1683 if not self._sparserevlog:
1680 1684 return self.deltaparent(rev) == nullrev
1681 1685 elif util.safehasattr(self.index, b'issnapshot'):
1682 1686 # directly assign the method to cache the testing and access
1683 1687 self.issnapshot = self.index.issnapshot
1684 1688 return self.issnapshot(rev)
1685 1689 if rev == nullrev:
1686 1690 return True
1687 1691 entry = self.index[rev]
1688 1692 base = entry[3]
1689 1693 if base == rev:
1690 1694 return True
1691 1695 if base == nullrev:
1692 1696 return True
1693 1697 p1 = entry[5]
1694 1698 p2 = entry[6]
1695 1699 if base == p1 or base == p2:
1696 1700 return False
1697 1701 return self.issnapshot(base)
1698 1702
1699 1703 def snapshotdepth(self, rev):
1700 1704 """number of snapshot in the chain before this one"""
1701 1705 if not self.issnapshot(rev):
1702 1706 raise error.ProgrammingError(b'revision %d not a snapshot')
1703 1707 return len(self._deltachain(rev)[0]) - 1
1704 1708
1705 1709 def revdiff(self, rev1, rev2):
1706 1710 """return or calculate a delta between two revisions
1707 1711
1708 1712 The delta calculated is in binary form and is intended to be written to
1709 1713 revlog data directly. So this function needs raw revision data.
1710 1714 """
1711 1715 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1712 1716 return bytes(self._chunk(rev2))
1713 1717
1714 1718 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1715 1719
1716 1720 def _processflags(self, text, flags, operation, raw=False):
1717 1721 """deprecated entry point to access flag processors"""
1718 1722 msg = b'_processflag(...) use the specialized variant'
1719 1723 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1720 1724 if raw:
1721 1725 return text, flagutil.processflagsraw(self, text, flags)
1722 1726 elif operation == b'read':
1723 1727 return flagutil.processflagsread(self, text, flags)
1724 1728 else: # write operation
1725 1729 return flagutil.processflagswrite(self, text, flags)
1726 1730
1727 1731 def revision(self, nodeorrev, _df=None, raw=False):
1728 1732 """return an uncompressed revision of a given node or revision
1729 1733 number.
1730 1734
1731 1735 _df - an existing file handle to read from. (internal-only)
1732 1736 raw - an optional argument specifying if the revision data is to be
1733 1737 treated as raw data when applying flag transforms. 'raw' should be set
1734 1738 to True when generating changegroups or in debug commands.
1735 1739 """
1736 1740 if raw:
1737 1741 msg = (
1738 1742 b'revlog.revision(..., raw=True) is deprecated, '
1739 1743 b'use revlog.rawdata(...)'
1740 1744 )
1741 1745 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1742 1746 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1743 1747
1744 1748 def sidedata(self, nodeorrev, _df=None):
1745 1749 """a map of extra data related to the changeset but not part of the hash
1746 1750
1747 1751 This function currently return a dictionary. However, more advanced
1748 1752 mapping object will likely be used in the future for a more
1749 1753 efficient/lazy code.
1750 1754 """
1751 1755 return self._revisiondata(nodeorrev, _df)[1]
1752 1756
1753 1757 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1754 1758 # deal with <nodeorrev> argument type
1755 1759 if isinstance(nodeorrev, int):
1756 1760 rev = nodeorrev
1757 1761 node = self.node(rev)
1758 1762 else:
1759 1763 node = nodeorrev
1760 1764 rev = None
1761 1765
1762 1766 # fast path the special `nullid` rev
1763 1767 if node == self.nullid:
1764 1768 return b"", {}
1765 1769
1766 1770 # ``rawtext`` is the text as stored inside the revlog. Might be the
1767 1771 # revision or might need to be processed to retrieve the revision.
1768 1772 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1769 1773
1770 1774 if self.version & 0xFFFF == REVLOGV2:
1771 1775 if rev is None:
1772 1776 rev = self.rev(node)
1773 1777 sidedata = self._sidedata(rev)
1774 1778 else:
1775 1779 sidedata = {}
1776 1780
1777 1781 if raw and validated:
1778 1782 # if we don't want to process the raw text and that raw
1779 1783 # text is cached, we can exit early.
1780 1784 return rawtext, sidedata
1781 1785 if rev is None:
1782 1786 rev = self.rev(node)
1783 1787 # the revlog's flag for this revision
1784 1788 # (usually alter its state or content)
1785 1789 flags = self.flags(rev)
1786 1790
1787 1791 if validated and flags == REVIDX_DEFAULT_FLAGS:
1788 1792 # no extra flags set, no flag processor runs, text = rawtext
1789 1793 return rawtext, sidedata
1790 1794
1791 1795 if raw:
1792 1796 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1793 1797 text = rawtext
1794 1798 else:
1795 1799 r = flagutil.processflagsread(self, rawtext, flags)
1796 1800 text, validatehash = r
1797 1801 if validatehash:
1798 1802 self.checkhash(text, node, rev=rev)
1799 1803 if not validated:
1800 1804 self._revisioncache = (node, rev, rawtext)
1801 1805
1802 1806 return text, sidedata
1803 1807
1804 1808 def _rawtext(self, node, rev, _df=None):
1805 1809 """return the possibly unvalidated rawtext for a revision
1806 1810
1807 1811 returns (rev, rawtext, validated)
1808 1812 """
1809 1813
1810 1814 # revision in the cache (could be useful to apply delta)
1811 1815 cachedrev = None
1812 1816 # An intermediate text to apply deltas to
1813 1817 basetext = None
1814 1818
1815 1819 # Check if we have the entry in cache
1816 1820 # The cache entry looks like (node, rev, rawtext)
1817 1821 if self._revisioncache:
1818 1822 if self._revisioncache[0] == node:
1819 1823 return (rev, self._revisioncache[2], True)
1820 1824 cachedrev = self._revisioncache[1]
1821 1825
1822 1826 if rev is None:
1823 1827 rev = self.rev(node)
1824 1828
1825 1829 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1826 1830 if stopped:
1827 1831 basetext = self._revisioncache[2]
1828 1832
1829 1833 # drop cache to save memory, the caller is expected to
1830 1834 # update self._revisioncache after validating the text
1831 1835 self._revisioncache = None
1832 1836
1833 1837 targetsize = None
1834 1838 rawsize = self.index[rev][2]
1835 1839 if 0 <= rawsize:
1836 1840 targetsize = 4 * rawsize
1837 1841
1838 1842 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1839 1843 if basetext is None:
1840 1844 basetext = bytes(bins[0])
1841 1845 bins = bins[1:]
1842 1846
1843 1847 rawtext = mdiff.patches(basetext, bins)
1844 1848 del basetext # let us have a chance to free memory early
1845 1849 return (rev, rawtext, False)
1846 1850
1847 1851 def _sidedata(self, rev):
1848 1852 """Return the sidedata for a given revision number."""
1849 1853 index_entry = self.index[rev]
1850 1854 sidedata_offset = index_entry[8]
1851 1855 sidedata_size = index_entry[9]
1852 1856
1853 1857 if self._inline:
1854 1858 sidedata_offset += self.index.entry_size * (1 + rev)
1855 1859 if sidedata_size == 0:
1856 1860 return {}
1857 1861
1858 1862 segment = self._getsegment(sidedata_offset, sidedata_size)
1859 1863 sidedata = sidedatautil.deserialize_sidedata(segment)
1860 1864 return sidedata
1861 1865
1862 1866 def rawdata(self, nodeorrev, _df=None):
1863 1867 """return an uncompressed raw data of a given node or revision number.
1864 1868
1865 1869 _df - an existing file handle to read from. (internal-only)
1866 1870 """
1867 1871 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1868 1872
1869 1873 def hash(self, text, p1, p2):
1870 1874 """Compute a node hash.
1871 1875
1872 1876 Available as a function so that subclasses can replace the hash
1873 1877 as needed.
1874 1878 """
1875 1879 return storageutil.hashrevisionsha1(text, p1, p2)
1876 1880
1877 1881 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1878 1882 """Check node hash integrity.
1879 1883
1880 1884 Available as a function so that subclasses can extend hash mismatch
1881 1885 behaviors as needed.
1882 1886 """
1883 1887 try:
1884 1888 if p1 is None and p2 is None:
1885 1889 p1, p2 = self.parents(node)
1886 1890 if node != self.hash(text, p1, p2):
1887 1891 # Clear the revision cache on hash failure. The revision cache
1888 1892 # only stores the raw revision and clearing the cache does have
1889 1893 # the side-effect that we won't have a cache hit when the raw
1890 1894 # revision data is accessed. But this case should be rare and
1891 1895 # it is extra work to teach the cache about the hash
1892 1896 # verification state.
1893 1897 if self._revisioncache and self._revisioncache[0] == node:
1894 1898 self._revisioncache = None
1895 1899
1896 1900 revornode = rev
1897 1901 if revornode is None:
1898 1902 revornode = templatefilters.short(hex(node))
1899 1903 raise error.RevlogError(
1900 1904 _(b"integrity check failed on %s:%s")
1901 1905 % (self.indexfile, pycompat.bytestr(revornode))
1902 1906 )
1903 1907 except error.RevlogError:
1904 1908 if self._censorable and storageutil.iscensoredtext(text):
1905 1909 raise error.CensoredNodeError(self.indexfile, node, text)
1906 1910 raise
1907 1911
1908 1912 def _enforceinlinesize(self, tr, fp=None):
1909 1913 """Check if the revlog is too big for inline and convert if so.
1910 1914
1911 1915 This should be called after revisions are added to the revlog. If the
1912 1916 revlog has grown too large to be an inline revlog, it will convert it
1913 1917 to use multiple index and data files.
1914 1918 """
1915 1919 tiprev = len(self) - 1
1916 1920 if (
1917 1921 not self._inline
1918 1922 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1919 1923 ):
1920 1924 return
1921 1925
1922 1926 troffset = tr.findoffset(self.indexfile)
1923 1927 if troffset is None:
1924 1928 raise error.RevlogError(
1925 1929 _(b"%s not found in the transaction") % self.indexfile
1926 1930 )
1927 1931 trindex = 0
1928 1932 tr.add(self.datafile, 0)
1929 1933
1930 1934 if fp:
1931 1935 fp.flush()
1932 1936 fp.close()
1933 1937 # We can't use the cached file handle after close(). So prevent
1934 1938 # its usage.
1935 1939 self._writinghandles = None
1936 1940
1937 1941 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1938 1942 for r in self:
1939 1943 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1940 1944 if troffset <= self.start(r):
1941 1945 trindex = r
1942 1946
1943 1947 with self._indexfp(b'w') as fp:
1944 1948 self.version &= ~FLAG_INLINE_DATA
1945 1949 self._inline = False
1946 1950 for i in self:
1947 1951 e = self.index.entry_binary(i)
1948 1952 if i == 0:
1949 1953 header = self.index.pack_header(self.version)
1950 1954 e = header + e
1951 1955 fp.write(e)
1952 1956
1953 1957 # the temp file replace the real index when we exit the context
1954 1958 # manager
1955 1959
1956 1960 tr.replace(self.indexfile, trindex * self.index.entry_size)
1957 1961 nodemaputil.setup_persistent_nodemap(tr, self)
1958 1962 self._chunkclear()
1959 1963
1960 1964 def _nodeduplicatecallback(self, transaction, node):
1961 1965 """called when trying to add a node already stored."""
1962 1966
1963 1967 def addrevision(
1964 1968 self,
1965 1969 text,
1966 1970 transaction,
1967 1971 link,
1968 1972 p1,
1969 1973 p2,
1970 1974 cachedelta=None,
1971 1975 node=None,
1972 1976 flags=REVIDX_DEFAULT_FLAGS,
1973 1977 deltacomputer=None,
1974 1978 sidedata=None,
1975 1979 ):
1976 1980 """add a revision to the log
1977 1981
1978 1982 text - the revision data to add
1979 1983 transaction - the transaction object used for rollback
1980 1984 link - the linkrev data to add
1981 1985 p1, p2 - the parent nodeids of the revision
1982 1986 cachedelta - an optional precomputed delta
1983 1987 node - nodeid of revision; typically node is not specified, and it is
1984 1988 computed by default as hash(text, p1, p2), however subclasses might
1985 1989 use different hashing method (and override checkhash() in such case)
1986 1990 flags - the known flags to set on the revision
1987 1991 deltacomputer - an optional deltacomputer instance shared between
1988 1992 multiple calls
1989 1993 """
1990 1994 if link == nullrev:
1991 1995 raise error.RevlogError(
1992 1996 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1993 1997 )
1994 1998
1995 1999 if sidedata is None:
1996 2000 sidedata = {}
1997 2001 elif not self.hassidedata:
1998 2002 raise error.ProgrammingError(
1999 2003 _(b"trying to add sidedata to a revlog who don't support them")
2000 2004 )
2001 2005
2002 2006 if flags:
2003 2007 node = node or self.hash(text, p1, p2)
2004 2008
2005 2009 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2006 2010
2007 2011 # If the flag processor modifies the revision data, ignore any provided
2008 2012 # cachedelta.
2009 2013 if rawtext != text:
2010 2014 cachedelta = None
2011 2015
2012 2016 if len(rawtext) > _maxentrysize:
2013 2017 raise error.RevlogError(
2014 2018 _(
2015 2019 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2016 2020 )
2017 2021 % (self.indexfile, len(rawtext))
2018 2022 )
2019 2023
2020 2024 node = node or self.hash(rawtext, p1, p2)
2021 2025 rev = self.index.get_rev(node)
2022 2026 if rev is not None:
2023 2027 return rev
2024 2028
2025 2029 if validatehash:
2026 2030 self.checkhash(rawtext, node, p1=p1, p2=p2)
2027 2031
2028 2032 return self.addrawrevision(
2029 2033 rawtext,
2030 2034 transaction,
2031 2035 link,
2032 2036 p1,
2033 2037 p2,
2034 2038 node,
2035 2039 flags,
2036 2040 cachedelta=cachedelta,
2037 2041 deltacomputer=deltacomputer,
2038 2042 sidedata=sidedata,
2039 2043 )
2040 2044
2041 2045 def addrawrevision(
2042 2046 self,
2043 2047 rawtext,
2044 2048 transaction,
2045 2049 link,
2046 2050 p1,
2047 2051 p2,
2048 2052 node,
2049 2053 flags,
2050 2054 cachedelta=None,
2051 2055 deltacomputer=None,
2052 2056 sidedata=None,
2053 2057 ):
2054 2058 """add a raw revision with known flags, node and parents
2055 2059 useful when reusing a revision not stored in this revlog (ex: received
2056 2060 over wire, or read from an external bundle).
2057 2061 """
2058 2062 dfh = None
2059 2063 if not self._inline:
2060 2064 dfh = self._datafp(b"a+")
2061 2065 ifh = self._indexfp(b"a+")
2062 2066 try:
2063 2067 return self._addrevision(
2064 2068 node,
2065 2069 rawtext,
2066 2070 transaction,
2067 2071 link,
2068 2072 p1,
2069 2073 p2,
2070 2074 flags,
2071 2075 cachedelta,
2072 2076 ifh,
2073 2077 dfh,
2074 2078 deltacomputer=deltacomputer,
2075 2079 sidedata=sidedata,
2076 2080 )
2077 2081 finally:
2078 2082 if dfh:
2079 2083 dfh.close()
2080 2084 ifh.close()
2081 2085
2082 2086 def compress(self, data):
2083 2087 """Generate a possibly-compressed representation of data."""
2084 2088 if not data:
2085 2089 return b'', data
2086 2090
2087 2091 compressed = self._compressor.compress(data)
2088 2092
2089 2093 if compressed:
2090 2094 # The revlog compressor added the header in the returned data.
2091 2095 return b'', compressed
2092 2096
2093 2097 if data[0:1] == b'\0':
2094 2098 return b'', data
2095 2099 return b'u', data
2096 2100
2097 2101 def decompress(self, data):
2098 2102 """Decompress a revlog chunk.
2099 2103
2100 2104 The chunk is expected to begin with a header identifying the
2101 2105 format type so it can be routed to an appropriate decompressor.
2102 2106 """
2103 2107 if not data:
2104 2108 return data
2105 2109
2106 2110 # Revlogs are read much more frequently than they are written and many
2107 2111 # chunks only take microseconds to decompress, so performance is
2108 2112 # important here.
2109 2113 #
2110 2114 # We can make a few assumptions about revlogs:
2111 2115 #
2112 2116 # 1) the majority of chunks will be compressed (as opposed to inline
2113 2117 # raw data).
2114 2118 # 2) decompressing *any* data will likely by at least 10x slower than
2115 2119 # returning raw inline data.
2116 2120 # 3) we want to prioritize common and officially supported compression
2117 2121 # engines
2118 2122 #
2119 2123 # It follows that we want to optimize for "decompress compressed data
2120 2124 # when encoded with common and officially supported compression engines"
2121 2125 # case over "raw data" and "data encoded by less common or non-official
2122 2126 # compression engines." That is why we have the inline lookup first
2123 2127 # followed by the compengines lookup.
2124 2128 #
2125 2129 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2126 2130 # compressed chunks. And this matters for changelog and manifest reads.
2127 2131 t = data[0:1]
2128 2132
2129 2133 if t == b'x':
2130 2134 try:
2131 2135 return _zlibdecompress(data)
2132 2136 except zlib.error as e:
2133 2137 raise error.RevlogError(
2134 2138 _(b'revlog decompress error: %s')
2135 2139 % stringutil.forcebytestr(e)
2136 2140 )
2137 2141 # '\0' is more common than 'u' so it goes first.
2138 2142 elif t == b'\0':
2139 2143 return data
2140 2144 elif t == b'u':
2141 2145 return util.buffer(data, 1)
2142 2146
2143 2147 try:
2144 2148 compressor = self._decompressors[t]
2145 2149 except KeyError:
2146 2150 try:
2147 2151 engine = util.compengines.forrevlogheader(t)
2148 2152 compressor = engine.revlogcompressor(self._compengineopts)
2149 2153 self._decompressors[t] = compressor
2150 2154 except KeyError:
2151 2155 raise error.RevlogError(
2152 2156 _(b'unknown compression type %s') % binascii.hexlify(t)
2153 2157 )
2154 2158
2155 2159 return compressor.decompress(data)
2156 2160
2157 2161 def _addrevision(
2158 2162 self,
2159 2163 node,
2160 2164 rawtext,
2161 2165 transaction,
2162 2166 link,
2163 2167 p1,
2164 2168 p2,
2165 2169 flags,
2166 2170 cachedelta,
2167 2171 ifh,
2168 2172 dfh,
2169 2173 alwayscache=False,
2170 2174 deltacomputer=None,
2171 2175 sidedata=None,
2172 2176 ):
2173 2177 """internal function to add revisions to the log
2174 2178
2175 2179 see addrevision for argument descriptions.
2176 2180
2177 2181 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2178 2182
2179 2183 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2180 2184 be used.
2181 2185
2182 2186 invariants:
2183 2187 - rawtext is optional (can be None); if not set, cachedelta must be set.
2184 2188 if both are set, they must correspond to each other.
2185 2189 """
2186 2190 if node == self.nullid:
2187 2191 raise error.RevlogError(
2188 2192 _(b"%s: attempt to add null revision") % self.indexfile
2189 2193 )
2190 2194 if (
2191 2195 node == self.nodeconstants.wdirid
2192 2196 or node in self.nodeconstants.wdirfilenodeids
2193 2197 ):
2194 2198 raise error.RevlogError(
2195 2199 _(b"%s: attempt to add wdir revision") % self.indexfile
2196 2200 )
2197 2201
2198 2202 if self._inline:
2199 2203 fh = ifh
2200 2204 else:
2201 2205 fh = dfh
2202 2206
2203 2207 btext = [rawtext]
2204 2208
2205 2209 curr = len(self)
2206 2210 prev = curr - 1
2207 2211
2208 2212 offset = self._get_data_offset(prev)
2209 2213
2210 2214 if self._concurrencychecker:
2211 2215 if self._inline:
2212 2216 # offset is "as if" it were in the .d file, so we need to add on
2213 2217 # the size of the entry metadata.
2214 2218 self._concurrencychecker(
2215 2219 ifh, self.indexfile, offset + curr * self.index.entry_size
2216 2220 )
2217 2221 else:
2218 2222 # Entries in the .i are a consistent size.
2219 2223 self._concurrencychecker(
2220 2224 ifh, self.indexfile, curr * self.index.entry_size
2221 2225 )
2222 2226 self._concurrencychecker(dfh, self.datafile, offset)
2223 2227
2224 2228 p1r, p2r = self.rev(p1), self.rev(p2)
2225 2229
2226 2230 # full versions are inserted when the needed deltas
2227 2231 # become comparable to the uncompressed text
2228 2232 if rawtext is None:
2229 2233 # need rawtext size, before changed by flag processors, which is
2230 2234 # the non-raw size. use revlog explicitly to avoid filelog's extra
2231 2235 # logic that might remove metadata size.
2232 2236 textlen = mdiff.patchedsize(
2233 2237 revlog.size(self, cachedelta[0]), cachedelta[1]
2234 2238 )
2235 2239 else:
2236 2240 textlen = len(rawtext)
2237 2241
2238 2242 if deltacomputer is None:
2239 2243 deltacomputer = deltautil.deltacomputer(self)
2240 2244
2241 2245 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2242 2246
2243 2247 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2244 2248
2245 2249 if sidedata:
2246 2250 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2247 2251 sidedata_offset = offset + deltainfo.deltalen
2248 2252 else:
2249 2253 serialized_sidedata = b""
2250 2254 # Don't store the offset if the sidedata is empty, that way
2251 2255 # we can easily detect empty sidedata and they will be no different
2252 2256 # than ones we manually add.
2253 2257 sidedata_offset = 0
2254 2258
2255 2259 e = (
2256 2260 offset_type(offset, flags),
2257 2261 deltainfo.deltalen,
2258 2262 textlen,
2259 2263 deltainfo.base,
2260 2264 link,
2261 2265 p1r,
2262 2266 p2r,
2263 2267 node,
2264 2268 sidedata_offset,
2265 2269 len(serialized_sidedata),
2266 2270 )
2267 2271
2268 2272 if self.version & 0xFFFF != REVLOGV2:
2269 2273 e = e[:8]
2270 2274
2271 2275 self.index.append(e)
2272 2276 entry = self.index.entry_binary(curr)
2273 2277 if curr == 0:
2274 2278 header = self.index.pack_header(self.version)
2275 2279 entry = header + entry
2276 2280 self._writeentry(
2277 2281 transaction,
2278 2282 ifh,
2279 2283 dfh,
2280 2284 entry,
2281 2285 deltainfo.data,
2282 2286 link,
2283 2287 offset,
2284 2288 serialized_sidedata,
2285 2289 )
2286 2290
2287 2291 rawtext = btext[0]
2288 2292
2289 2293 if alwayscache and rawtext is None:
2290 2294 rawtext = deltacomputer.buildtext(revinfo, fh)
2291 2295
2292 2296 if type(rawtext) == bytes: # only accept immutable objects
2293 2297 self._revisioncache = (node, curr, rawtext)
2294 2298 self._chainbasecache[curr] = deltainfo.chainbase
2295 2299 return curr
2296 2300
2297 2301 def _get_data_offset(self, prev):
2298 2302 """Returns the current offset in the (in-transaction) data file.
2299 2303 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2300 2304 file to store that information: since sidedata can be rewritten to the
2301 2305 end of the data file within a transaction, you can have cases where, for
2302 2306 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2303 2307 to `n - 1`'s sidedata being written after `n`'s data.
2304 2308
2305 2309 TODO cache this in a docket file before getting out of experimental."""
2306 2310 if self.version & 0xFFFF != REVLOGV2:
2307 2311 return self.end(prev)
2308 2312
2309 2313 offset = 0
2310 2314 for rev, entry in enumerate(self.index):
2311 2315 sidedata_end = entry[8] + entry[9]
2312 2316 # Sidedata for a previous rev has potentially been written after
2313 2317 # this rev's end, so take the max.
2314 2318 offset = max(self.end(rev), offset, sidedata_end)
2315 2319 return offset
2316 2320
2317 2321 def _writeentry(
2318 2322 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2319 2323 ):
2320 2324 # Files opened in a+ mode have inconsistent behavior on various
2321 2325 # platforms. Windows requires that a file positioning call be made
2322 2326 # when the file handle transitions between reads and writes. See
2323 2327 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2324 2328 # platforms, Python or the platform itself can be buggy. Some versions
2325 2329 # of Solaris have been observed to not append at the end of the file
2326 2330 # if the file was seeked to before the end. See issue4943 for more.
2327 2331 #
2328 2332 # We work around this issue by inserting a seek() before writing.
2329 2333 # Note: This is likely not necessary on Python 3. However, because
2330 2334 # the file handle is reused for reads and may be seeked there, we need
2331 2335 # to be careful before changing this.
2332 2336 ifh.seek(0, os.SEEK_END)
2333 2337 if dfh:
2334 2338 dfh.seek(0, os.SEEK_END)
2335 2339
2336 2340 curr = len(self) - 1
2337 2341 if not self._inline:
2338 2342 transaction.add(self.datafile, offset)
2339 2343 transaction.add(self.indexfile, curr * len(entry))
2340 2344 if data[0]:
2341 2345 dfh.write(data[0])
2342 2346 dfh.write(data[1])
2343 2347 if sidedata:
2344 2348 dfh.write(sidedata)
2345 2349 ifh.write(entry)
2346 2350 else:
2347 2351 offset += curr * self.index.entry_size
2348 2352 transaction.add(self.indexfile, offset)
2349 2353 ifh.write(entry)
2350 2354 ifh.write(data[0])
2351 2355 ifh.write(data[1])
2352 2356 if sidedata:
2353 2357 ifh.write(sidedata)
2354 2358 self._enforceinlinesize(transaction, ifh)
2355 2359 nodemaputil.setup_persistent_nodemap(transaction, self)
2356 2360
2357 2361 def addgroup(
2358 2362 self,
2359 2363 deltas,
2360 2364 linkmapper,
2361 2365 transaction,
2362 2366 alwayscache=False,
2363 2367 addrevisioncb=None,
2364 2368 duplicaterevisioncb=None,
2365 2369 ):
2366 2370 """
2367 2371 add a delta group
2368 2372
2369 2373 given a set of deltas, add them to the revision log. the
2370 2374 first delta is against its parent, which should be in our
2371 2375 log, the rest are against the previous delta.
2372 2376
2373 2377 If ``addrevisioncb`` is defined, it will be called with arguments of
2374 2378 this revlog and the node that was added.
2375 2379 """
2376 2380
2377 2381 if self._writinghandles:
2378 2382 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2379 2383
2380 2384 r = len(self)
2381 2385 end = 0
2382 2386 if r:
2383 2387 end = self.end(r - 1)
2384 2388 ifh = self._indexfp(b"a+")
2385 2389 isize = r * self.index.entry_size
2386 2390 if self._inline:
2387 2391 transaction.add(self.indexfile, end + isize)
2388 2392 dfh = None
2389 2393 else:
2390 2394 transaction.add(self.indexfile, isize)
2391 2395 transaction.add(self.datafile, end)
2392 2396 dfh = self._datafp(b"a+")
2393 2397
2394 2398 def flush():
2395 2399 if dfh:
2396 2400 dfh.flush()
2397 2401 ifh.flush()
2398 2402
2399 2403 self._writinghandles = (ifh, dfh)
2400 2404 empty = True
2401 2405
2402 2406 try:
2403 2407 deltacomputer = deltautil.deltacomputer(self)
2404 2408 # loop through our set of deltas
2405 2409 for data in deltas:
2406 2410 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2407 2411 link = linkmapper(linknode)
2408 2412 flags = flags or REVIDX_DEFAULT_FLAGS
2409 2413
2410 2414 rev = self.index.get_rev(node)
2411 2415 if rev is not None:
2412 2416 # this can happen if two branches make the same change
2413 2417 self._nodeduplicatecallback(transaction, rev)
2414 2418 if duplicaterevisioncb:
2415 2419 duplicaterevisioncb(self, rev)
2416 2420 empty = False
2417 2421 continue
2418 2422
2419 2423 for p in (p1, p2):
2420 2424 if not self.index.has_node(p):
2421 2425 raise error.LookupError(
2422 2426 p, self.indexfile, _(b'unknown parent')
2423 2427 )
2424 2428
2425 2429 if not self.index.has_node(deltabase):
2426 2430 raise error.LookupError(
2427 2431 deltabase, self.indexfile, _(b'unknown delta base')
2428 2432 )
2429 2433
2430 2434 baserev = self.rev(deltabase)
2431 2435
2432 2436 if baserev != nullrev and self.iscensored(baserev):
2433 2437 # if base is censored, delta must be full replacement in a
2434 2438 # single patch operation
2435 2439 hlen = struct.calcsize(b">lll")
2436 2440 oldlen = self.rawsize(baserev)
2437 2441 newlen = len(delta) - hlen
2438 2442 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2439 2443 raise error.CensoredBaseError(
2440 2444 self.indexfile, self.node(baserev)
2441 2445 )
2442 2446
2443 2447 if not flags and self._peek_iscensored(baserev, delta, flush):
2444 2448 flags |= REVIDX_ISCENSORED
2445 2449
2446 2450 # We assume consumers of addrevisioncb will want to retrieve
2447 2451 # the added revision, which will require a call to
2448 2452 # revision(). revision() will fast path if there is a cache
2449 2453 # hit. So, we tell _addrevision() to always cache in this case.
2450 2454 # We're only using addgroup() in the context of changegroup
2451 2455 # generation so the revision data can always be handled as raw
2452 2456 # by the flagprocessor.
2453 2457 rev = self._addrevision(
2454 2458 node,
2455 2459 None,
2456 2460 transaction,
2457 2461 link,
2458 2462 p1,
2459 2463 p2,
2460 2464 flags,
2461 2465 (baserev, delta),
2462 2466 ifh,
2463 2467 dfh,
2464 2468 alwayscache=alwayscache,
2465 2469 deltacomputer=deltacomputer,
2466 2470 sidedata=sidedata,
2467 2471 )
2468 2472
2469 2473 if addrevisioncb:
2470 2474 addrevisioncb(self, rev)
2471 2475 empty = False
2472 2476
2473 2477 if not dfh and not self._inline:
2474 2478 # addrevision switched from inline to conventional
2475 2479 # reopen the index
2476 2480 ifh.close()
2477 2481 dfh = self._datafp(b"a+")
2478 2482 ifh = self._indexfp(b"a+")
2479 2483 self._writinghandles = (ifh, dfh)
2480 2484 finally:
2481 2485 self._writinghandles = None
2482 2486
2483 2487 if dfh:
2484 2488 dfh.close()
2485 2489 ifh.close()
2486 2490 return not empty
2487 2491
2488 2492 def iscensored(self, rev):
2489 2493 """Check if a file revision is censored."""
2490 2494 if not self._censorable:
2491 2495 return False
2492 2496
2493 2497 return self.flags(rev) & REVIDX_ISCENSORED
2494 2498
2495 2499 def _peek_iscensored(self, baserev, delta, flush):
2496 2500 """Quickly check if a delta produces a censored revision."""
2497 2501 if not self._censorable:
2498 2502 return False
2499 2503
2500 2504 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2501 2505
2502 2506 def getstrippoint(self, minlink):
2503 2507 """find the minimum rev that must be stripped to strip the linkrev
2504 2508
2505 2509 Returns a tuple containing the minimum rev and a set of all revs that
2506 2510 have linkrevs that will be broken by this strip.
2507 2511 """
2508 2512 return storageutil.resolvestripinfo(
2509 2513 minlink,
2510 2514 len(self) - 1,
2511 2515 self.headrevs(),
2512 2516 self.linkrev,
2513 2517 self.parentrevs,
2514 2518 )
2515 2519
2516 2520 def strip(self, minlink, transaction):
2517 2521 """truncate the revlog on the first revision with a linkrev >= minlink
2518 2522
2519 2523 This function is called when we're stripping revision minlink and
2520 2524 its descendants from the repository.
2521 2525
2522 2526 We have to remove all revisions with linkrev >= minlink, because
2523 2527 the equivalent changelog revisions will be renumbered after the
2524 2528 strip.
2525 2529
2526 2530 So we truncate the revlog on the first of these revisions, and
2527 2531 trust that the caller has saved the revisions that shouldn't be
2528 2532 removed and that it'll re-add them after this truncation.
2529 2533 """
2530 2534 if len(self) == 0:
2531 2535 return
2532 2536
2533 2537 rev, _ = self.getstrippoint(minlink)
2534 2538 if rev == len(self):
2535 2539 return
2536 2540
2537 2541 # first truncate the files on disk
2538 2542 end = self.start(rev)
2539 2543 if not self._inline:
2540 2544 transaction.add(self.datafile, end)
2541 2545 end = rev * self.index.entry_size
2542 2546 else:
2543 2547 end += rev * self.index.entry_size
2544 2548
2545 2549 transaction.add(self.indexfile, end)
2546 2550
2547 2551 # then reset internal state in memory to forget those revisions
2548 2552 self._revisioncache = None
2549 2553 self._chaininfocache = util.lrucachedict(500)
2550 2554 self._chunkclear()
2551 2555
2552 2556 del self.index[rev:-1]
2553 2557
2554 2558 def checksize(self):
2555 2559 """Check size of index and data files
2556 2560
2557 2561 return a (dd, di) tuple.
2558 2562 - dd: extra bytes for the "data" file
2559 2563 - di: extra bytes for the "index" file
2560 2564
2561 2565 A healthy revlog will return (0, 0).
2562 2566 """
2563 2567 expected = 0
2564 2568 if len(self):
2565 2569 expected = max(0, self.end(len(self) - 1))
2566 2570
2567 2571 try:
2568 2572 with self._datafp() as f:
2569 2573 f.seek(0, io.SEEK_END)
2570 2574 actual = f.tell()
2571 2575 dd = actual - expected
2572 2576 except IOError as inst:
2573 2577 if inst.errno != errno.ENOENT:
2574 2578 raise
2575 2579 dd = 0
2576 2580
2577 2581 try:
2578 2582 f = self.opener(self.indexfile)
2579 2583 f.seek(0, io.SEEK_END)
2580 2584 actual = f.tell()
2581 2585 f.close()
2582 2586 s = self.index.entry_size
2583 2587 i = max(0, actual // s)
2584 2588 di = actual - (i * s)
2585 2589 if self._inline:
2586 2590 databytes = 0
2587 2591 for r in self:
2588 2592 databytes += max(0, self.length(r))
2589 2593 dd = 0
2590 2594 di = actual - len(self) * s - databytes
2591 2595 except IOError as inst:
2592 2596 if inst.errno != errno.ENOENT:
2593 2597 raise
2594 2598 di = 0
2595 2599
2596 2600 return (dd, di)
2597 2601
2598 2602 def files(self):
2599 2603 res = [self.indexfile]
2600 2604 if not self._inline:
2601 2605 res.append(self.datafile)
2602 2606 return res
2603 2607
2604 2608 def emitrevisions(
2605 2609 self,
2606 2610 nodes,
2607 2611 nodesorder=None,
2608 2612 revisiondata=False,
2609 2613 assumehaveparentrevisions=False,
2610 2614 deltamode=repository.CG_DELTAMODE_STD,
2611 2615 sidedata_helpers=None,
2612 2616 ):
2613 2617 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2614 2618 raise error.ProgrammingError(
2615 2619 b'unhandled value for nodesorder: %s' % nodesorder
2616 2620 )
2617 2621
2618 2622 if nodesorder is None and not self._generaldelta:
2619 2623 nodesorder = b'storage'
2620 2624
2621 2625 if (
2622 2626 not self._storedeltachains
2623 2627 and deltamode != repository.CG_DELTAMODE_PREV
2624 2628 ):
2625 2629 deltamode = repository.CG_DELTAMODE_FULL
2626 2630
2627 2631 return storageutil.emitrevisions(
2628 2632 self,
2629 2633 nodes,
2630 2634 nodesorder,
2631 2635 revlogrevisiondelta,
2632 2636 deltaparentfn=self.deltaparent,
2633 2637 candeltafn=self.candelta,
2634 2638 rawsizefn=self.rawsize,
2635 2639 revdifffn=self.revdiff,
2636 2640 flagsfn=self.flags,
2637 2641 deltamode=deltamode,
2638 2642 revisiondata=revisiondata,
2639 2643 assumehaveparentrevisions=assumehaveparentrevisions,
2640 2644 sidedata_helpers=sidedata_helpers,
2641 2645 )
2642 2646
2643 2647 DELTAREUSEALWAYS = b'always'
2644 2648 DELTAREUSESAMEREVS = b'samerevs'
2645 2649 DELTAREUSENEVER = b'never'
2646 2650
2647 2651 DELTAREUSEFULLADD = b'fulladd'
2648 2652
2649 2653 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2650 2654
2651 2655 def clone(
2652 2656 self,
2653 2657 tr,
2654 2658 destrevlog,
2655 2659 addrevisioncb=None,
2656 2660 deltareuse=DELTAREUSESAMEREVS,
2657 2661 forcedeltabothparents=None,
2658 2662 sidedatacompanion=None,
2659 2663 ):
2660 2664 """Copy this revlog to another, possibly with format changes.
2661 2665
2662 2666 The destination revlog will contain the same revisions and nodes.
2663 2667 However, it may not be bit-for-bit identical due to e.g. delta encoding
2664 2668 differences.
2665 2669
2666 2670 The ``deltareuse`` argument control how deltas from the existing revlog
2667 2671 are preserved in the destination revlog. The argument can have the
2668 2672 following values:
2669 2673
2670 2674 DELTAREUSEALWAYS
2671 2675 Deltas will always be reused (if possible), even if the destination
2672 2676 revlog would not select the same revisions for the delta. This is the
2673 2677 fastest mode of operation.
2674 2678 DELTAREUSESAMEREVS
2675 2679 Deltas will be reused if the destination revlog would pick the same
2676 2680 revisions for the delta. This mode strikes a balance between speed
2677 2681 and optimization.
2678 2682 DELTAREUSENEVER
2679 2683 Deltas will never be reused. This is the slowest mode of execution.
2680 2684 This mode can be used to recompute deltas (e.g. if the diff/delta
2681 2685 algorithm changes).
2682 2686 DELTAREUSEFULLADD
2683 2687 Revision will be re-added as if their were new content. This is
2684 2688 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2685 2689 eg: large file detection and handling.
2686 2690
2687 2691 Delta computation can be slow, so the choice of delta reuse policy can
2688 2692 significantly affect run time.
2689 2693
2690 2694 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2691 2695 two extremes. Deltas will be reused if they are appropriate. But if the
2692 2696 delta could choose a better revision, it will do so. This means if you
2693 2697 are converting a non-generaldelta revlog to a generaldelta revlog,
2694 2698 deltas will be recomputed if the delta's parent isn't a parent of the
2695 2699 revision.
2696 2700
2697 2701 In addition to the delta policy, the ``forcedeltabothparents``
2698 2702 argument controls whether to force compute deltas against both parents
2699 2703 for merges. By default, the current default is used.
2700 2704
2701 2705 If not None, the `sidedatacompanion` is callable that accept two
2702 2706 arguments:
2703 2707
2704 2708 (srcrevlog, rev)
2705 2709
2706 2710 and return a quintet that control changes to sidedata content from the
2707 2711 old revision to the new clone result:
2708 2712
2709 2713 (dropall, filterout, update, new_flags, dropped_flags)
2710 2714
2711 2715 * if `dropall` is True, all sidedata should be dropped
2712 2716 * `filterout` is a set of sidedata keys that should be dropped
2713 2717 * `update` is a mapping of additionnal/new key -> value
2714 2718 * new_flags is a bitfields of new flags that the revision should get
2715 2719 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2716 2720 """
2717 2721 if deltareuse not in self.DELTAREUSEALL:
2718 2722 raise ValueError(
2719 2723 _(b'value for deltareuse invalid: %s') % deltareuse
2720 2724 )
2721 2725
2722 2726 if len(destrevlog):
2723 2727 raise ValueError(_(b'destination revlog is not empty'))
2724 2728
2725 2729 if getattr(self, 'filteredrevs', None):
2726 2730 raise ValueError(_(b'source revlog has filtered revisions'))
2727 2731 if getattr(destrevlog, 'filteredrevs', None):
2728 2732 raise ValueError(_(b'destination revlog has filtered revisions'))
2729 2733
2730 2734 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2731 2735 # if possible.
2732 2736 oldlazydelta = destrevlog._lazydelta
2733 2737 oldlazydeltabase = destrevlog._lazydeltabase
2734 2738 oldamd = destrevlog._deltabothparents
2735 2739
2736 2740 try:
2737 2741 if deltareuse == self.DELTAREUSEALWAYS:
2738 2742 destrevlog._lazydeltabase = True
2739 2743 destrevlog._lazydelta = True
2740 2744 elif deltareuse == self.DELTAREUSESAMEREVS:
2741 2745 destrevlog._lazydeltabase = False
2742 2746 destrevlog._lazydelta = True
2743 2747 elif deltareuse == self.DELTAREUSENEVER:
2744 2748 destrevlog._lazydeltabase = False
2745 2749 destrevlog._lazydelta = False
2746 2750
2747 2751 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2748 2752
2749 2753 self._clone(
2750 2754 tr,
2751 2755 destrevlog,
2752 2756 addrevisioncb,
2753 2757 deltareuse,
2754 2758 forcedeltabothparents,
2755 2759 sidedatacompanion,
2756 2760 )
2757 2761
2758 2762 finally:
2759 2763 destrevlog._lazydelta = oldlazydelta
2760 2764 destrevlog._lazydeltabase = oldlazydeltabase
2761 2765 destrevlog._deltabothparents = oldamd
2762 2766
2763 2767 def _clone(
2764 2768 self,
2765 2769 tr,
2766 2770 destrevlog,
2767 2771 addrevisioncb,
2768 2772 deltareuse,
2769 2773 forcedeltabothparents,
2770 2774 sidedatacompanion,
2771 2775 ):
2772 2776 """perform the core duty of `revlog.clone` after parameter processing"""
2773 2777 deltacomputer = deltautil.deltacomputer(destrevlog)
2774 2778 index = self.index
2775 2779 for rev in self:
2776 2780 entry = index[rev]
2777 2781
2778 2782 # Some classes override linkrev to take filtered revs into
2779 2783 # account. Use raw entry from index.
2780 2784 flags = entry[0] & 0xFFFF
2781 2785 linkrev = entry[4]
2782 2786 p1 = index[entry[5]][7]
2783 2787 p2 = index[entry[6]][7]
2784 2788 node = entry[7]
2785 2789
2786 2790 sidedataactions = (False, [], {}, 0, 0)
2787 2791 if sidedatacompanion is not None:
2788 2792 sidedataactions = sidedatacompanion(self, rev)
2789 2793
2790 2794 # (Possibly) reuse the delta from the revlog if allowed and
2791 2795 # the revlog chunk is a delta.
2792 2796 cachedelta = None
2793 2797 rawtext = None
2794 2798 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2795 2799 dropall = sidedataactions[0]
2796 2800 filterout = sidedataactions[1]
2797 2801 update = sidedataactions[2]
2798 2802 new_flags = sidedataactions[3]
2799 2803 dropped_flags = sidedataactions[4]
2800 2804 text, sidedata = self._revisiondata(rev)
2801 2805 if dropall:
2802 2806 sidedata = {}
2803 2807 for key in filterout:
2804 2808 sidedata.pop(key, None)
2805 2809 sidedata.update(update)
2806 2810 if not sidedata:
2807 2811 sidedata = None
2808 2812
2809 2813 flags |= new_flags
2810 2814 flags &= ~dropped_flags
2811 2815
2812 2816 destrevlog.addrevision(
2813 2817 text,
2814 2818 tr,
2815 2819 linkrev,
2816 2820 p1,
2817 2821 p2,
2818 2822 cachedelta=cachedelta,
2819 2823 node=node,
2820 2824 flags=flags,
2821 2825 deltacomputer=deltacomputer,
2822 2826 sidedata=sidedata,
2823 2827 )
2824 2828 else:
2825 2829 if destrevlog._lazydelta:
2826 2830 dp = self.deltaparent(rev)
2827 2831 if dp != nullrev:
2828 2832 cachedelta = (dp, bytes(self._chunk(rev)))
2829 2833
2830 2834 if not cachedelta:
2831 2835 rawtext = self.rawdata(rev)
2832 2836
2833 2837 ifh = destrevlog.opener(
2834 2838 destrevlog.indexfile, b'a+', checkambig=False
2835 2839 )
2836 2840 dfh = None
2837 2841 if not destrevlog._inline:
2838 2842 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2839 2843 try:
2840 2844 destrevlog._addrevision(
2841 2845 node,
2842 2846 rawtext,
2843 2847 tr,
2844 2848 linkrev,
2845 2849 p1,
2846 2850 p2,
2847 2851 flags,
2848 2852 cachedelta,
2849 2853 ifh,
2850 2854 dfh,
2851 2855 deltacomputer=deltacomputer,
2852 2856 )
2853 2857 finally:
2854 2858 if dfh:
2855 2859 dfh.close()
2856 2860 ifh.close()
2857 2861
2858 2862 if addrevisioncb:
2859 2863 addrevisioncb(self, rev, node)
2860 2864
2861 2865 def censorrevision(self, tr, censornode, tombstone=b''):
2862 2866 if (self.version & 0xFFFF) == REVLOGV0:
2863 2867 raise error.RevlogError(
2864 2868 _(b'cannot censor with version %d revlogs') % self.version
2865 2869 )
2866 2870
2867 2871 censorrev = self.rev(censornode)
2868 2872 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2869 2873
2870 2874 if len(tombstone) > self.rawsize(censorrev):
2871 2875 raise error.Abort(
2872 2876 _(b'censor tombstone must be no longer than censored data')
2873 2877 )
2874 2878
2875 2879 # Rewriting the revlog in place is hard. Our strategy for censoring is
2876 2880 # to create a new revlog, copy all revisions to it, then replace the
2877 2881 # revlogs on transaction close.
2878 2882
2879 2883 newindexfile = self.indexfile + b'.tmpcensored'
2880 2884 newdatafile = self.datafile + b'.tmpcensored'
2881 2885
2882 2886 # This is a bit dangerous. We could easily have a mismatch of state.
2883 2887 newrl = revlog(
2884 2888 self.opener,
2885 2889 target=self.target,
2886 2890 indexfile=newindexfile,
2887 2891 datafile=newdatafile,
2888 2892 censorable=True,
2889 2893 )
2890 2894 newrl.version = self.version
2891 2895 newrl._generaldelta = self._generaldelta
2892 2896 newrl._parse_index = self._parse_index
2893 2897
2894 2898 for rev in self.revs():
2895 2899 node = self.node(rev)
2896 2900 p1, p2 = self.parents(node)
2897 2901
2898 2902 if rev == censorrev:
2899 2903 newrl.addrawrevision(
2900 2904 tombstone,
2901 2905 tr,
2902 2906 self.linkrev(censorrev),
2903 2907 p1,
2904 2908 p2,
2905 2909 censornode,
2906 2910 REVIDX_ISCENSORED,
2907 2911 )
2908 2912
2909 2913 if newrl.deltaparent(rev) != nullrev:
2910 2914 raise error.Abort(
2911 2915 _(
2912 2916 b'censored revision stored as delta; '
2913 2917 b'cannot censor'
2914 2918 ),
2915 2919 hint=_(
2916 2920 b'censoring of revlogs is not '
2917 2921 b'fully implemented; please report '
2918 2922 b'this bug'
2919 2923 ),
2920 2924 )
2921 2925 continue
2922 2926
2923 2927 if self.iscensored(rev):
2924 2928 if self.deltaparent(rev) != nullrev:
2925 2929 raise error.Abort(
2926 2930 _(
2927 2931 b'cannot censor due to censored '
2928 2932 b'revision having delta stored'
2929 2933 )
2930 2934 )
2931 2935 rawtext = self._chunk(rev)
2932 2936 else:
2933 2937 rawtext = self.rawdata(rev)
2934 2938
2935 2939 newrl.addrawrevision(
2936 2940 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2937 2941 )
2938 2942
2939 2943 tr.addbackup(self.indexfile, location=b'store')
2940 2944 if not self._inline:
2941 2945 tr.addbackup(self.datafile, location=b'store')
2942 2946
2943 2947 self.opener.rename(newrl.indexfile, self.indexfile)
2944 2948 if not self._inline:
2945 2949 self.opener.rename(newrl.datafile, self.datafile)
2946 2950
2947 2951 self.clearcaches()
2948 2952 self._loadindex()
2949 2953
2950 2954 def verifyintegrity(self, state):
2951 2955 """Verifies the integrity of the revlog.
2952 2956
2953 2957 Yields ``revlogproblem`` instances describing problems that are
2954 2958 found.
2955 2959 """
2956 2960 dd, di = self.checksize()
2957 2961 if dd:
2958 2962 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2959 2963 if di:
2960 2964 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2961 2965
2962 2966 version = self.version & 0xFFFF
2963 2967
2964 2968 # The verifier tells us what version revlog we should be.
2965 2969 if version != state[b'expectedversion']:
2966 2970 yield revlogproblem(
2967 2971 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2968 2972 % (self.indexfile, version, state[b'expectedversion'])
2969 2973 )
2970 2974
2971 2975 state[b'skipread'] = set()
2972 2976 state[b'safe_renamed'] = set()
2973 2977
2974 2978 for rev in self:
2975 2979 node = self.node(rev)
2976 2980
2977 2981 # Verify contents. 4 cases to care about:
2978 2982 #
2979 2983 # common: the most common case
2980 2984 # rename: with a rename
2981 2985 # meta: file content starts with b'\1\n', the metadata
2982 2986 # header defined in filelog.py, but without a rename
2983 2987 # ext: content stored externally
2984 2988 #
2985 2989 # More formally, their differences are shown below:
2986 2990 #
2987 2991 # | common | rename | meta | ext
2988 2992 # -------------------------------------------------------
2989 2993 # flags() | 0 | 0 | 0 | not 0
2990 2994 # renamed() | False | True | False | ?
2991 2995 # rawtext[0:2]=='\1\n'| False | True | True | ?
2992 2996 #
2993 2997 # "rawtext" means the raw text stored in revlog data, which
2994 2998 # could be retrieved by "rawdata(rev)". "text"
2995 2999 # mentioned below is "revision(rev)".
2996 3000 #
2997 3001 # There are 3 different lengths stored physically:
2998 3002 # 1. L1: rawsize, stored in revlog index
2999 3003 # 2. L2: len(rawtext), stored in revlog data
3000 3004 # 3. L3: len(text), stored in revlog data if flags==0, or
3001 3005 # possibly somewhere else if flags!=0
3002 3006 #
3003 3007 # L1 should be equal to L2. L3 could be different from them.
3004 3008 # "text" may or may not affect commit hash depending on flag
3005 3009 # processors (see flagutil.addflagprocessor).
3006 3010 #
3007 3011 # | common | rename | meta | ext
3008 3012 # -------------------------------------------------
3009 3013 # rawsize() | L1 | L1 | L1 | L1
3010 3014 # size() | L1 | L2-LM | L1(*) | L1 (?)
3011 3015 # len(rawtext) | L2 | L2 | L2 | L2
3012 3016 # len(text) | L2 | L2 | L2 | L3
3013 3017 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3014 3018 #
3015 3019 # LM: length of metadata, depending on rawtext
3016 3020 # (*): not ideal, see comment in filelog.size
3017 3021 # (?): could be "- len(meta)" if the resolved content has
3018 3022 # rename metadata
3019 3023 #
3020 3024 # Checks needed to be done:
3021 3025 # 1. length check: L1 == L2, in all cases.
3022 3026 # 2. hash check: depending on flag processor, we may need to
3023 3027 # use either "text" (external), or "rawtext" (in revlog).
3024 3028
3025 3029 try:
3026 3030 skipflags = state.get(b'skipflags', 0)
3027 3031 if skipflags:
3028 3032 skipflags &= self.flags(rev)
3029 3033
3030 3034 _verify_revision(self, skipflags, state, node)
3031 3035
3032 3036 l1 = self.rawsize(rev)
3033 3037 l2 = len(self.rawdata(node))
3034 3038
3035 3039 if l1 != l2:
3036 3040 yield revlogproblem(
3037 3041 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3038 3042 node=node,
3039 3043 )
3040 3044
3041 3045 except error.CensoredNodeError:
3042 3046 if state[b'erroroncensored']:
3043 3047 yield revlogproblem(
3044 3048 error=_(b'censored file data'), node=node
3045 3049 )
3046 3050 state[b'skipread'].add(node)
3047 3051 except Exception as e:
3048 3052 yield revlogproblem(
3049 3053 error=_(b'unpacking %s: %s')
3050 3054 % (short(node), stringutil.forcebytestr(e)),
3051 3055 node=node,
3052 3056 )
3053 3057 state[b'skipread'].add(node)
3054 3058
3055 3059 def storageinfo(
3056 3060 self,
3057 3061 exclusivefiles=False,
3058 3062 sharedfiles=False,
3059 3063 revisionscount=False,
3060 3064 trackedsize=False,
3061 3065 storedsize=False,
3062 3066 ):
3063 3067 d = {}
3064 3068
3065 3069 if exclusivefiles:
3066 3070 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3067 3071 if not self._inline:
3068 3072 d[b'exclusivefiles'].append((self.opener, self.datafile))
3069 3073
3070 3074 if sharedfiles:
3071 3075 d[b'sharedfiles'] = []
3072 3076
3073 3077 if revisionscount:
3074 3078 d[b'revisionscount'] = len(self)
3075 3079
3076 3080 if trackedsize:
3077 3081 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3078 3082
3079 3083 if storedsize:
3080 3084 d[b'storedsize'] = sum(
3081 3085 self.opener.stat(path).st_size for path in self.files()
3082 3086 )
3083 3087
3084 3088 return d
3085 3089
3086 3090 def rewrite_sidedata(self, helpers, startrev, endrev):
3087 3091 if self.version & 0xFFFF != REVLOGV2:
3088 3092 return
3089 3093 # inline are not yet supported because they suffer from an issue when
3090 3094 # rewriting them (since it's not an append-only operation).
3091 3095 # See issue6485.
3092 3096 assert not self._inline
3093 3097 if not helpers[1] and not helpers[2]:
3094 3098 # Nothing to generate or remove
3095 3099 return
3096 3100
3097 3101 new_entries = []
3098 3102 # append the new sidedata
3099 3103 with self._datafp(b'a+') as fp:
3100 3104 # Maybe this bug still exists, see revlog._writeentry
3101 3105 fp.seek(0, os.SEEK_END)
3102 3106 current_offset = fp.tell()
3103 3107 for rev in range(startrev, endrev + 1):
3104 3108 entry = self.index[rev]
3105 3109 new_sidedata = storageutil.run_sidedata_helpers(
3106 3110 store=self,
3107 3111 sidedata_helpers=helpers,
3108 3112 sidedata={},
3109 3113 rev=rev,
3110 3114 )
3111 3115
3112 3116 serialized_sidedata = sidedatautil.serialize_sidedata(
3113 3117 new_sidedata
3114 3118 )
3115 3119 if entry[8] != 0 or entry[9] != 0:
3116 3120 # rewriting entries that already have sidedata is not
3117 3121 # supported yet, because it introduces garbage data in the
3118 3122 # revlog.
3119 3123 msg = b"Rewriting existing sidedata is not supported yet"
3120 3124 raise error.Abort(msg)
3121 3125 entry = entry[:8]
3122 3126 entry += (current_offset, len(serialized_sidedata))
3123 3127
3124 3128 fp.write(serialized_sidedata)
3125 3129 new_entries.append(entry)
3126 3130 current_offset += len(serialized_sidedata)
3127 3131
3128 3132 # rewrite the new index entries
3129 3133 with self._indexfp(b'w+') as fp:
3130 3134 fp.seek(startrev * self.index.entry_size)
3131 3135 for i, entry in enumerate(new_entries):
3132 3136 rev = startrev + i
3133 3137 self.index.replace_sidedata_info(rev, entry[8], entry[9])
3134 3138 packed = self.index.entry_binary(rev)
3135 3139 if rev == 0:
3136 3140 header = self.index.pack_header(self.version)
3137 3141 packed = header + packed
3138 3142 fp.write(packed)
@@ -1,50 +1,51 b''
1 1 # coding: utf8
2 2 # ext-sidedata-2.py - small extension to test (differently) the sidedata logic
3 3 #
4 4 # Simulates a client for a complex sidedata exchange.
5 5 #
6 6 # Copyright 2021 Raphaël Gomès <rgomes@octobus.net>
7 7 #
8 8 # This software may be used and distributed according to the terms of the
9 9 # GNU General Public License version 2 or any later version.
10 10
11 11 from __future__ import absolute_import
12 12
13 13 import hashlib
14 14 import struct
15 15
16 16 from mercurial.revlogutils import sidedata as sidedatamod
17 from mercurial.revlogutils import constants
17 18
18 19
19 20 def compute_sidedata_1(repo, revlog, rev, sidedata, text=None):
20 21 sidedata = sidedata.copy()
21 22 if text is None:
22 23 text = revlog.revision(rev)
23 24 sidedata[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
24 25 return sidedata
25 26
26 27
27 28 def compute_sidedata_2(repo, revlog, rev, sidedata, text=None):
28 29 sidedata = sidedata.copy()
29 30 if text is None:
30 31 text = revlog.revision(rev)
31 32 sha256 = hashlib.sha256(text).digest()
32 33 sidedata[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
33 34 return sidedata
34 35
35 36
36 37 def reposetup(ui, repo):
37 38 # Sidedata keys happen to be the same as the categories, easier for testing.
38 for kind in (b'changelog', b'manifest', b'filelog'):
39 for kind in constants.ALL_KINDS:
39 40 repo.register_sidedata_computer(
40 41 kind,
41 42 sidedatamod.SD_TEST1,
42 43 (sidedatamod.SD_TEST1,),
43 44 compute_sidedata_1,
44 45 )
45 46 repo.register_sidedata_computer(
46 47 kind,
47 48 sidedatamod.SD_TEST2,
48 49 (sidedatamod.SD_TEST2,),
49 50 compute_sidedata_2,
50 51 )
@@ -1,88 +1,89 b''
1 1 # coding: utf8
2 2 # ext-sidedata-3.py - small extension to test (differently still) the sidedata
3 3 # logic
4 4 #
5 5 # Simulates a client for a complex sidedata exchange.
6 6 #
7 7 # Copyright 2021 Raphaël Gomès <rgomes@octobus.net>
8 8 #
9 9 # This software may be used and distributed according to the terms of the
10 10 # GNU General Public License version 2 or any later version.
11 11
12 12 from __future__ import absolute_import
13 13
14 14 import hashlib
15 15 import struct
16 16
17 17 from mercurial import (
18 18 extensions,
19 19 revlog,
20 20 )
21 21
22 22 from mercurial.revlogutils import sidedata as sidedatamod
23 from mercurial.revlogutils import constants
23 24
24 25
25 26 def compute_sidedata_1(repo, revlog, rev, sidedata, text=None):
26 27 sidedata = sidedata.copy()
27 28 if text is None:
28 29 text = revlog.revision(rev)
29 30 sidedata[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
30 31 return sidedata
31 32
32 33
33 34 def compute_sidedata_2(repo, revlog, rev, sidedata, text=None):
34 35 sidedata = sidedata.copy()
35 36 if text is None:
36 37 text = revlog.revision(rev)
37 38 sha256 = hashlib.sha256(text).digest()
38 39 sidedata[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
39 40 return sidedata
40 41
41 42
42 43 def compute_sidedata_3(repo, revlog, rev, sidedata, text=None):
43 44 sidedata = sidedata.copy()
44 45 if text is None:
45 46 text = revlog.revision(rev)
46 47 sha384 = hashlib.sha384(text).digest()
47 48 sidedata[sidedatamod.SD_TEST3] = struct.pack('>48s', sha384)
48 49 return sidedata
49 50
50 51
51 52 def wrapaddrevision(
52 53 orig, self, text, transaction, link, p1, p2, *args, **kwargs
53 54 ):
54 55 if kwargs.get('sidedata') is None:
55 56 kwargs['sidedata'] = {}
56 57 sd = kwargs['sidedata']
57 58 sd = compute_sidedata_1(None, self, None, sd, text=text)
58 59 kwargs['sidedata'] = compute_sidedata_2(None, self, None, sd, text=text)
59 60 return orig(self, text, transaction, link, p1, p2, *args, **kwargs)
60 61
61 62
62 63 def extsetup(ui):
63 64 extensions.wrapfunction(revlog.revlog, 'addrevision', wrapaddrevision)
64 65
65 66
66 67 def reposetup(ui, repo):
67 68 # Sidedata keys happen to be the same as the categories, easier for testing.
68 for kind in (b'changelog', b'manifest', b'filelog'):
69 for kind in constants.ALL_KINDS:
69 70 repo.register_sidedata_computer(
70 71 kind,
71 72 sidedatamod.SD_TEST1,
72 73 (sidedatamod.SD_TEST1,),
73 74 compute_sidedata_1,
74 75 )
75 76 repo.register_sidedata_computer(
76 77 kind,
77 78 sidedatamod.SD_TEST2,
78 79 (sidedatamod.SD_TEST2,),
79 80 compute_sidedata_2,
80 81 )
81 82 repo.register_sidedata_computer(
82 83 kind,
83 84 sidedatamod.SD_TEST3,
84 85 (sidedatamod.SD_TEST3,),
85 86 compute_sidedata_3,
86 87 )
87 88 repo.register_wanted_sidedata(sidedatamod.SD_TEST1)
88 89 repo.register_wanted_sidedata(sidedatamod.SD_TEST2)
@@ -1,81 +1,82 b''
1 1 # coding: utf8
2 2 # ext-sidedata-5.py - small extension to test (differently still) the sidedata
3 3 # logic
4 4 #
5 5 # Simulates a server for a simple sidedata exchange.
6 6 #
7 7 # Copyright 2021 Raphaël Gomès <rgomes@octobus.net>
8 8 #
9 9 # This software may be used and distributed according to the terms of the
10 10 # GNU General Public License version 2 or any later version.
11 11
12 12 from __future__ import absolute_import
13 13
14 14 import hashlib
15 15 import struct
16 16
17 17 from mercurial import (
18 18 extensions,
19 19 revlog,
20 20 )
21 21
22 22
23 23 from mercurial.revlogutils import sidedata as sidedatamod
24 from mercurial.revlogutils import constants
24 25
25 26
26 27 def compute_sidedata_1(repo, revlog, rev, sidedata, text=None):
27 28 sidedata = sidedata.copy()
28 29 if text is None:
29 30 text = revlog.revision(rev)
30 31 sidedata[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
31 32 return sidedata
32 33
33 34
34 35 def compute_sidedata_2(repo, revlog, rev, sidedata, text=None):
35 36 sidedata = sidedata.copy()
36 37 if text is None:
37 38 text = revlog.revision(rev)
38 39 sha256 = hashlib.sha256(text).digest()
39 40 sidedata[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
40 41 return sidedata
41 42
42 43
43 44 def reposetup(ui, repo):
44 45 # Sidedata keys happen to be the same as the categories, easier for testing.
45 for kind in (b'changelog', b'manifest', b'filelog'):
46 for kind in constants.ALL_KINDS:
46 47 repo.register_sidedata_computer(
47 48 kind,
48 49 sidedatamod.SD_TEST1,
49 50 (sidedatamod.SD_TEST1,),
50 51 compute_sidedata_1,
51 52 )
52 53 repo.register_sidedata_computer(
53 54 kind,
54 55 sidedatamod.SD_TEST2,
55 56 (sidedatamod.SD_TEST2,),
56 57 compute_sidedata_2,
57 58 )
58 59
59 60 # We don't register sidedata computers because we don't care within these
60 61 # tests
61 62 repo.register_wanted_sidedata(sidedatamod.SD_TEST1)
62 63 repo.register_wanted_sidedata(sidedatamod.SD_TEST2)
63 64
64 65
65 66 def wrapaddrevision(
66 67 orig, self, text, transaction, link, p1, p2, *args, **kwargs
67 68 ):
68 69 if kwargs.get('sidedata') is None:
69 70 kwargs['sidedata'] = {}
70 71 sd = kwargs['sidedata']
71 72 ## let's store some arbitrary data just for testing
72 73 # text length
73 74 sd[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
74 75 # and sha2 hashes
75 76 sha256 = hashlib.sha256(text).digest()
76 77 sd[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
77 78 return orig(self, text, transaction, link, p1, p2, *args, **kwargs)
78 79
79 80
80 81 def extsetup(ui):
81 82 extensions.wrapfunction(revlog.revlog, 'addrevision', wrapaddrevision)
General Comments 0
You need to be logged in to leave comments. Login now