##// END OF EJS Templates
lock: pass "success" boolean to _afterlock callbacks...
Kyle Lippincott -
r44167:4b065b01 default draft
parent child Browse files
Show More
@@ -1,1298 +1,1298 b''
1 1 # __init__.py - remotefilelog extension
2 2 #
3 3 # Copyright 2013 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 """remotefilelog causes Mercurial to lazilly fetch file contents (EXPERIMENTAL)
8 8
9 9 This extension is HIGHLY EXPERIMENTAL. There are NO BACKWARDS COMPATIBILITY
10 10 GUARANTEES. This means that repositories created with this extension may
11 11 only be usable with the exact version of this extension/Mercurial that was
12 12 used. The extension attempts to enforce this in order to prevent repository
13 13 corruption.
14 14
15 15 remotefilelog works by fetching file contents lazily and storing them
16 16 in a cache on the client rather than in revlogs. This allows enormous
17 17 histories to be transferred only partially, making them easier to
18 18 operate on.
19 19
20 20 Configs:
21 21
22 22 ``packs.maxchainlen`` specifies the maximum delta chain length in pack files
23 23
24 24 ``packs.maxpacksize`` specifies the maximum pack file size
25 25
26 26 ``packs.maxpackfilecount`` specifies the maximum number of packs in the
27 27 shared cache (trees only for now)
28 28
29 29 ``remotefilelog.backgroundprefetch`` runs prefetch in background when True
30 30
31 31 ``remotefilelog.bgprefetchrevs`` specifies revisions to fetch on commit and
32 32 update, and on other commands that use them. Different from pullprefetch.
33 33
34 34 ``remotefilelog.gcrepack`` does garbage collection during repack when True
35 35
36 36 ``remotefilelog.nodettl`` specifies maximum TTL of a node in seconds before
37 37 it is garbage collected
38 38
39 39 ``remotefilelog.repackonhggc`` runs repack on hg gc when True
40 40
41 41 ``remotefilelog.prefetchdays`` specifies the maximum age of a commit in
42 42 days after which it is no longer prefetched.
43 43
44 44 ``remotefilelog.prefetchdelay`` specifies delay between background
45 45 prefetches in seconds after operations that change the working copy parent
46 46
47 47 ``remotefilelog.data.gencountlimit`` constraints the minimum number of data
48 48 pack files required to be considered part of a generation. In particular,
49 49 minimum number of packs files > gencountlimit.
50 50
51 51 ``remotefilelog.data.generations`` list for specifying the lower bound of
52 52 each generation of the data pack files. For example, list ['100MB','1MB']
53 53 or ['1MB', '100MB'] will lead to three generations: [0, 1MB), [
54 54 1MB, 100MB) and [100MB, infinity).
55 55
56 56 ``remotefilelog.data.maxrepackpacks`` the maximum number of pack files to
57 57 include in an incremental data repack.
58 58
59 59 ``remotefilelog.data.repackmaxpacksize`` the maximum size of a pack file for
60 60 it to be considered for an incremental data repack.
61 61
62 62 ``remotefilelog.data.repacksizelimit`` the maximum total size of pack files
63 63 to include in an incremental data repack.
64 64
65 65 ``remotefilelog.history.gencountlimit`` constraints the minimum number of
66 66 history pack files required to be considered part of a generation. In
67 67 particular, minimum number of packs files > gencountlimit.
68 68
69 69 ``remotefilelog.history.generations`` list for specifying the lower bound of
70 70 each generation of the history pack files. For example, list [
71 71 '100MB', '1MB'] or ['1MB', '100MB'] will lead to three generations: [
72 72 0, 1MB), [1MB, 100MB) and [100MB, infinity).
73 73
74 74 ``remotefilelog.history.maxrepackpacks`` the maximum number of pack files to
75 75 include in an incremental history repack.
76 76
77 77 ``remotefilelog.history.repackmaxpacksize`` the maximum size of a pack file
78 78 for it to be considered for an incremental history repack.
79 79
80 80 ``remotefilelog.history.repacksizelimit`` the maximum total size of pack
81 81 files to include in an incremental history repack.
82 82
83 83 ``remotefilelog.backgroundrepack`` automatically consolidate packs in the
84 84 background
85 85
86 86 ``remotefilelog.cachepath`` path to cache
87 87
88 88 ``remotefilelog.cachegroup`` if set, make cache directory sgid to this
89 89 group
90 90
91 91 ``remotefilelog.cacheprocess`` binary to invoke for fetching file data
92 92
93 93 ``remotefilelog.debug`` turn on remotefilelog-specific debug output
94 94
95 95 ``remotefilelog.excludepattern`` pattern of files to exclude from pulls
96 96
97 97 ``remotefilelog.includepattern`` pattern of files to include in pulls
98 98
99 99 ``remotefilelog.fetchwarning``: message to print when too many
100 100 single-file fetches occur
101 101
102 102 ``remotefilelog.getfilesstep`` number of files to request in a single RPC
103 103
104 104 ``remotefilelog.getfilestype`` if set to 'threaded' use threads to fetch
105 105 files, otherwise use optimistic fetching
106 106
107 107 ``remotefilelog.pullprefetch`` revset for selecting files that should be
108 108 eagerly downloaded rather than lazily
109 109
110 110 ``remotefilelog.reponame`` name of the repo. If set, used to partition
111 111 data from other repos in a shared store.
112 112
113 113 ``remotefilelog.server`` if true, enable server-side functionality
114 114
115 115 ``remotefilelog.servercachepath`` path for caching blobs on the server
116 116
117 117 ``remotefilelog.serverexpiration`` number of days to keep cached server
118 118 blobs
119 119
120 120 ``remotefilelog.validatecache`` if set, check cache entries for corruption
121 121 before returning blobs
122 122
123 123 ``remotefilelog.validatecachelog`` if set, check cache entries for
124 124 corruption before returning metadata
125 125
126 126 """
127 127 from __future__ import absolute_import
128 128
129 129 import os
130 130 import time
131 131 import traceback
132 132
133 133 from mercurial.node import hex
134 134 from mercurial.i18n import _
135 135 from mercurial.pycompat import open
136 136 from mercurial import (
137 137 changegroup,
138 138 changelog,
139 139 cmdutil,
140 140 commands,
141 141 configitems,
142 142 context,
143 143 copies,
144 144 debugcommands as hgdebugcommands,
145 145 dispatch,
146 146 error,
147 147 exchange,
148 148 extensions,
149 149 hg,
150 150 localrepo,
151 151 match,
152 152 merge,
153 153 node as nodemod,
154 154 patch,
155 155 pycompat,
156 156 registrar,
157 157 repair,
158 158 repoview,
159 159 revset,
160 160 scmutil,
161 161 smartset,
162 162 streamclone,
163 163 util,
164 164 )
165 165 from . import (
166 166 constants,
167 167 debugcommands,
168 168 fileserverclient,
169 169 remotefilectx,
170 170 remotefilelog,
171 171 remotefilelogserver,
172 172 repack as repackmod,
173 173 shallowbundle,
174 174 shallowrepo,
175 175 shallowstore,
176 176 shallowutil,
177 177 shallowverifier,
178 178 )
179 179
180 180 # ensures debug commands are registered
181 181 hgdebugcommands.command
182 182
183 183 cmdtable = {}
184 184 command = registrar.command(cmdtable)
185 185
186 186 configtable = {}
187 187 configitem = registrar.configitem(configtable)
188 188
189 189 configitem(b'remotefilelog', b'debug', default=False)
190 190
191 191 configitem(b'remotefilelog', b'reponame', default=b'')
192 192 configitem(b'remotefilelog', b'cachepath', default=None)
193 193 configitem(b'remotefilelog', b'cachegroup', default=None)
194 194 configitem(b'remotefilelog', b'cacheprocess', default=None)
195 195 configitem(b'remotefilelog', b'cacheprocess.includepath', default=None)
196 196 configitem(b"remotefilelog", b"cachelimit", default=b"1000 GB")
197 197
198 198 configitem(
199 199 b'remotefilelog',
200 200 b'fallbackpath',
201 201 default=configitems.dynamicdefault,
202 202 alias=[(b'remotefilelog', b'fallbackrepo')],
203 203 )
204 204
205 205 configitem(b'remotefilelog', b'validatecachelog', default=None)
206 206 configitem(b'remotefilelog', b'validatecache', default=b'on')
207 207 configitem(b'remotefilelog', b'server', default=None)
208 208 configitem(b'remotefilelog', b'servercachepath', default=None)
209 209 configitem(b"remotefilelog", b"serverexpiration", default=30)
210 210 configitem(b'remotefilelog', b'backgroundrepack', default=False)
211 211 configitem(b'remotefilelog', b'bgprefetchrevs', default=None)
212 212 configitem(b'remotefilelog', b'pullprefetch', default=None)
213 213 configitem(b'remotefilelog', b'backgroundprefetch', default=False)
214 214 configitem(b'remotefilelog', b'prefetchdelay', default=120)
215 215 configitem(b'remotefilelog', b'prefetchdays', default=14)
216 216
217 217 configitem(b'remotefilelog', b'getfilesstep', default=10000)
218 218 configitem(b'remotefilelog', b'getfilestype', default=b'optimistic')
219 219 configitem(b'remotefilelog', b'batchsize', configitems.dynamicdefault)
220 220 configitem(b'remotefilelog', b'fetchwarning', default=b'')
221 221
222 222 configitem(b'remotefilelog', b'includepattern', default=None)
223 223 configitem(b'remotefilelog', b'excludepattern', default=None)
224 224
225 225 configitem(b'remotefilelog', b'gcrepack', default=False)
226 226 configitem(b'remotefilelog', b'repackonhggc', default=False)
227 227 configitem(b'repack', b'chainorphansbysize', default=True, experimental=True)
228 228
229 229 configitem(b'packs', b'maxpacksize', default=0)
230 230 configitem(b'packs', b'maxchainlen', default=1000)
231 231
232 232 configitem(b'devel', b'remotefilelog.ensurestart', default=False)
233 233
234 234 # default TTL limit is 30 days
235 235 _defaultlimit = 60 * 60 * 24 * 30
236 236 configitem(b'remotefilelog', b'nodettl', default=_defaultlimit)
237 237
238 238 configitem(b'remotefilelog', b'data.gencountlimit', default=2),
239 239 configitem(
240 240 b'remotefilelog', b'data.generations', default=[b'1GB', b'100MB', b'1MB']
241 241 )
242 242 configitem(b'remotefilelog', b'data.maxrepackpacks', default=50)
243 243 configitem(b'remotefilelog', b'data.repackmaxpacksize', default=b'4GB')
244 244 configitem(b'remotefilelog', b'data.repacksizelimit', default=b'100MB')
245 245
246 246 configitem(b'remotefilelog', b'history.gencountlimit', default=2),
247 247 configitem(b'remotefilelog', b'history.generations', default=[b'100MB'])
248 248 configitem(b'remotefilelog', b'history.maxrepackpacks', default=50)
249 249 configitem(b'remotefilelog', b'history.repackmaxpacksize', default=b'400MB')
250 250 configitem(b'remotefilelog', b'history.repacksizelimit', default=b'100MB')
251 251
252 252 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
253 253 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
254 254 # be specifying the version(s) of Mercurial they are tested with, or
255 255 # leave the attribute unspecified.
256 256 testedwith = b'ships-with-hg-core'
257 257
258 258 repoclass = localrepo.localrepository
259 259 repoclass._basesupported.add(constants.SHALLOWREPO_REQUIREMENT)
260 260
261 261 isenabled = shallowutil.isenabled
262 262
263 263
264 264 def uisetup(ui):
265 265 """Wraps user facing Mercurial commands to swap them out with shallow
266 266 versions.
267 267 """
268 268 hg.wirepeersetupfuncs.append(fileserverclient.peersetup)
269 269
270 270 entry = extensions.wrapcommand(commands.table, b'clone', cloneshallow)
271 271 entry[1].append(
272 272 (
273 273 b'',
274 274 b'shallow',
275 275 None,
276 276 _(b"create a shallow clone which uses remote file history"),
277 277 )
278 278 )
279 279
280 280 extensions.wrapcommand(
281 281 commands.table, b'debugindex', debugcommands.debugindex
282 282 )
283 283 extensions.wrapcommand(
284 284 commands.table, b'debugindexdot', debugcommands.debugindexdot
285 285 )
286 286 extensions.wrapcommand(commands.table, b'log', log)
287 287 extensions.wrapcommand(commands.table, b'pull', pull)
288 288
289 289 # Prevent 'hg manifest --all'
290 290 def _manifest(orig, ui, repo, *args, **opts):
291 291 if isenabled(repo) and opts.get('all'):
292 292 raise error.Abort(_(b"--all is not supported in a shallow repo"))
293 293
294 294 return orig(ui, repo, *args, **opts)
295 295
296 296 extensions.wrapcommand(commands.table, b"manifest", _manifest)
297 297
298 298 # Wrap remotefilelog with lfs code
299 299 def _lfsloaded(loaded=False):
300 300 lfsmod = None
301 301 try:
302 302 lfsmod = extensions.find(b'lfs')
303 303 except KeyError:
304 304 pass
305 305 if lfsmod:
306 306 lfsmod.wrapfilelog(remotefilelog.remotefilelog)
307 307 fileserverclient._lfsmod = lfsmod
308 308
309 309 extensions.afterloaded(b'lfs', _lfsloaded)
310 310
311 311 # debugdata needs remotefilelog.len to work
312 312 extensions.wrapcommand(commands.table, b'debugdata', debugdatashallow)
313 313
314 314 changegroup.cgpacker = shallowbundle.shallowcg1packer
315 315
316 316 extensions.wrapfunction(
317 317 changegroup, b'_addchangegroupfiles', shallowbundle.addchangegroupfiles
318 318 )
319 319 extensions.wrapfunction(
320 320 changegroup, b'makechangegroup', shallowbundle.makechangegroup
321 321 )
322 322 extensions.wrapfunction(localrepo, b'makestore', storewrapper)
323 323 extensions.wrapfunction(exchange, b'pull', exchangepull)
324 324 extensions.wrapfunction(merge, b'applyupdates', applyupdates)
325 325 extensions.wrapfunction(merge, b'_checkunknownfiles', checkunknownfiles)
326 326 extensions.wrapfunction(context.workingctx, b'_checklookup', checklookup)
327 327 extensions.wrapfunction(scmutil, b'_findrenames', findrenames)
328 328 extensions.wrapfunction(
329 329 copies, b'_computeforwardmissing', computeforwardmissing
330 330 )
331 331 extensions.wrapfunction(dispatch, b'runcommand', runcommand)
332 332 extensions.wrapfunction(repair, b'_collectbrokencsets', _collectbrokencsets)
333 333 extensions.wrapfunction(context.changectx, b'filectx', filectx)
334 334 extensions.wrapfunction(context.workingctx, b'filectx', workingfilectx)
335 335 extensions.wrapfunction(patch, b'trydiff', trydiff)
336 336 extensions.wrapfunction(hg, b'verify', _verify)
337 337 scmutil.fileprefetchhooks.add(b'remotefilelog', _fileprefetchhook)
338 338
339 339 # disappointing hacks below
340 340 extensions.wrapfunction(scmutil, b'getrenamedfn', getrenamedfn)
341 341 extensions.wrapfunction(revset, b'filelog', filelogrevset)
342 342 revset.symbols[b'filelog'] = revset.filelog
343 343 extensions.wrapfunction(cmdutil, b'walkfilerevs', walkfilerevs)
344 344
345 345
346 346 def cloneshallow(orig, ui, repo, *args, **opts):
347 347 if opts.get('shallow'):
348 348 repos = []
349 349
350 350 def pull_shallow(orig, self, *args, **kwargs):
351 351 if not isenabled(self):
352 352 repos.append(self.unfiltered())
353 353 # set up the client hooks so the post-clone update works
354 354 setupclient(self.ui, self.unfiltered())
355 355
356 356 # setupclient fixed the class on the repo itself
357 357 # but we also need to fix it on the repoview
358 358 if isinstance(self, repoview.repoview):
359 359 self.__class__.__bases__ = (
360 360 self.__class__.__bases__[0],
361 361 self.unfiltered().__class__,
362 362 )
363 363 self.requirements.add(constants.SHALLOWREPO_REQUIREMENT)
364 364 self._writerequirements()
365 365
366 366 # Since setupclient hadn't been called, exchange.pull was not
367 367 # wrapped. So we need to manually invoke our version of it.
368 368 return exchangepull(orig, self, *args, **kwargs)
369 369 else:
370 370 return orig(self, *args, **kwargs)
371 371
372 372 extensions.wrapfunction(exchange, b'pull', pull_shallow)
373 373
374 374 # Wrap the stream logic to add requirements and to pass include/exclude
375 375 # patterns around.
376 376 def setup_streamout(repo, remote):
377 377 # Replace remote.stream_out with a version that sends file
378 378 # patterns.
379 379 def stream_out_shallow(orig):
380 380 caps = remote.capabilities()
381 381 if constants.NETWORK_CAP_LEGACY_SSH_GETFILES in caps:
382 382 opts = {}
383 383 if repo.includepattern:
384 384 opts['includepattern'] = b'\0'.join(repo.includepattern)
385 385 if repo.excludepattern:
386 386 opts['excludepattern'] = b'\0'.join(repo.excludepattern)
387 387 return remote._callstream(b'stream_out_shallow', **opts)
388 388 else:
389 389 return orig()
390 390
391 391 extensions.wrapfunction(remote, b'stream_out', stream_out_shallow)
392 392
393 393 def stream_wrap(orig, op):
394 394 setup_streamout(op.repo, op.remote)
395 395 return orig(op)
396 396
397 397 extensions.wrapfunction(
398 398 streamclone, b'maybeperformlegacystreamclone', stream_wrap
399 399 )
400 400
401 401 def canperformstreamclone(orig, pullop, bundle2=False):
402 402 # remotefilelog is currently incompatible with the
403 403 # bundle2 flavor of streamclones, so force us to use
404 404 # v1 instead.
405 405 if b'v2' in pullop.remotebundle2caps.get(b'stream', []):
406 406 pullop.remotebundle2caps[b'stream'] = [
407 407 c for c in pullop.remotebundle2caps[b'stream'] if c != b'v2'
408 408 ]
409 409 if bundle2:
410 410 return False, None
411 411 supported, requirements = orig(pullop, bundle2=bundle2)
412 412 if requirements is not None:
413 413 requirements.add(constants.SHALLOWREPO_REQUIREMENT)
414 414 return supported, requirements
415 415
416 416 extensions.wrapfunction(
417 417 streamclone, b'canperformstreamclone', canperformstreamclone
418 418 )
419 419
420 420 try:
421 421 orig(ui, repo, *args, **opts)
422 422 finally:
423 423 if opts.get('shallow'):
424 424 for r in repos:
425 425 if util.safehasattr(r, b'fileservice'):
426 426 r.fileservice.close()
427 427
428 428
429 429 def debugdatashallow(orig, *args, **kwds):
430 430 oldlen = remotefilelog.remotefilelog.__len__
431 431 try:
432 432 remotefilelog.remotefilelog.__len__ = lambda x: 1
433 433 return orig(*args, **kwds)
434 434 finally:
435 435 remotefilelog.remotefilelog.__len__ = oldlen
436 436
437 437
438 438 def reposetup(ui, repo):
439 439 if not repo.local():
440 440 return
441 441
442 442 # put here intentionally bc doesnt work in uisetup
443 443 ui.setconfig(b'hooks', b'update.prefetch', wcpprefetch)
444 444 ui.setconfig(b'hooks', b'commit.prefetch', wcpprefetch)
445 445
446 446 isserverenabled = ui.configbool(b'remotefilelog', b'server')
447 447 isshallowclient = isenabled(repo)
448 448
449 449 if isserverenabled and isshallowclient:
450 450 raise RuntimeError(b"Cannot be both a server and shallow client.")
451 451
452 452 if isshallowclient:
453 453 setupclient(ui, repo)
454 454
455 455 if isserverenabled:
456 456 remotefilelogserver.setupserver(ui, repo)
457 457
458 458
459 459 def setupclient(ui, repo):
460 460 if not isinstance(repo, localrepo.localrepository):
461 461 return
462 462
463 463 # Even clients get the server setup since they need to have the
464 464 # wireprotocol endpoints registered.
465 465 remotefilelogserver.onetimesetup(ui)
466 466 onetimeclientsetup(ui)
467 467
468 468 shallowrepo.wraprepo(repo)
469 469 repo.store = shallowstore.wrapstore(repo.store)
470 470
471 471
472 472 def storewrapper(orig, requirements, path, vfstype):
473 473 s = orig(requirements, path, vfstype)
474 474 if constants.SHALLOWREPO_REQUIREMENT in requirements:
475 475 s = shallowstore.wrapstore(s)
476 476
477 477 return s
478 478
479 479
480 480 # prefetch files before update
481 481 def applyupdates(
482 482 orig, repo, actions, wctx, mctx, overwrite, wantfiledata, labels=None
483 483 ):
484 484 if isenabled(repo):
485 485 manifest = mctx.manifest()
486 486 files = []
487 487 for f, args, msg in actions[b'g']:
488 488 files.append((f, hex(manifest[f])))
489 489 # batch fetch the needed files from the server
490 490 repo.fileservice.prefetch(files)
491 491 return orig(
492 492 repo, actions, wctx, mctx, overwrite, wantfiledata, labels=labels
493 493 )
494 494
495 495
496 496 # Prefetch merge checkunknownfiles
497 497 def checkunknownfiles(orig, repo, wctx, mctx, force, actions, *args, **kwargs):
498 498 if isenabled(repo):
499 499 files = []
500 500 sparsematch = repo.maybesparsematch(mctx.rev())
501 501 for f, (m, actionargs, msg) in pycompat.iteritems(actions):
502 502 if sparsematch and not sparsematch(f):
503 503 continue
504 504 if m in (b'c', b'dc', b'cm'):
505 505 files.append((f, hex(mctx.filenode(f))))
506 506 elif m == b'dg':
507 507 f2 = actionargs[0]
508 508 files.append((f2, hex(mctx.filenode(f2))))
509 509 # batch fetch the needed files from the server
510 510 repo.fileservice.prefetch(files)
511 511 return orig(repo, wctx, mctx, force, actions, *args, **kwargs)
512 512
513 513
514 514 # Prefetch files before status attempts to look at their size and contents
515 515 def checklookup(orig, self, files):
516 516 repo = self._repo
517 517 if isenabled(repo):
518 518 prefetchfiles = []
519 519 for parent in self._parents:
520 520 for f in files:
521 521 if f in parent:
522 522 prefetchfiles.append((f, hex(parent.filenode(f))))
523 523 # batch fetch the needed files from the server
524 524 repo.fileservice.prefetch(prefetchfiles)
525 525 return orig(self, files)
526 526
527 527
528 528 # Prefetch the logic that compares added and removed files for renames
529 529 def findrenames(orig, repo, matcher, added, removed, *args, **kwargs):
530 530 if isenabled(repo):
531 531 files = []
532 532 pmf = repo[b'.'].manifest()
533 533 for f in removed:
534 534 if f in pmf:
535 535 files.append((f, hex(pmf[f])))
536 536 # batch fetch the needed files from the server
537 537 repo.fileservice.prefetch(files)
538 538 return orig(repo, matcher, added, removed, *args, **kwargs)
539 539
540 540
541 541 # prefetch files before pathcopies check
542 542 def computeforwardmissing(orig, a, b, match=None):
543 543 missing = orig(a, b, match=match)
544 544 repo = a._repo
545 545 if isenabled(repo):
546 546 mb = b.manifest()
547 547
548 548 files = []
549 549 sparsematch = repo.maybesparsematch(b.rev())
550 550 if sparsematch:
551 551 sparsemissing = set()
552 552 for f in missing:
553 553 if sparsematch(f):
554 554 files.append((f, hex(mb[f])))
555 555 sparsemissing.add(f)
556 556 missing = sparsemissing
557 557
558 558 # batch fetch the needed files from the server
559 559 repo.fileservice.prefetch(files)
560 560 return missing
561 561
562 562
563 563 # close cache miss server connection after the command has finished
564 564 def runcommand(orig, lui, repo, *args, **kwargs):
565 565 fileservice = None
566 566 # repo can be None when running in chg:
567 567 # - at startup, reposetup was called because serve is not norepo
568 568 # - a norepo command like "help" is called
569 569 if repo and isenabled(repo):
570 570 fileservice = repo.fileservice
571 571 try:
572 572 return orig(lui, repo, *args, **kwargs)
573 573 finally:
574 574 if fileservice:
575 575 fileservice.close()
576 576
577 577
578 578 # prevent strip from stripping remotefilelogs
579 579 def _collectbrokencsets(orig, repo, files, striprev):
580 580 if isenabled(repo):
581 581 files = list([f for f in files if not repo.shallowmatch(f)])
582 582 return orig(repo, files, striprev)
583 583
584 584
585 585 # changectx wrappers
586 586 def filectx(orig, self, path, fileid=None, filelog=None):
587 587 if fileid is None:
588 588 fileid = self.filenode(path)
589 589 if isenabled(self._repo) and self._repo.shallowmatch(path):
590 590 return remotefilectx.remotefilectx(
591 591 self._repo, path, fileid=fileid, changectx=self, filelog=filelog
592 592 )
593 593 return orig(self, path, fileid=fileid, filelog=filelog)
594 594
595 595
596 596 def workingfilectx(orig, self, path, filelog=None):
597 597 if isenabled(self._repo) and self._repo.shallowmatch(path):
598 598 return remotefilectx.remoteworkingfilectx(
599 599 self._repo, path, workingctx=self, filelog=filelog
600 600 )
601 601 return orig(self, path, filelog=filelog)
602 602
603 603
604 604 # prefetch required revisions before a diff
605 605 def trydiff(
606 606 orig,
607 607 repo,
608 608 revs,
609 609 ctx1,
610 610 ctx2,
611 611 modified,
612 612 added,
613 613 removed,
614 614 copy,
615 615 getfilectx,
616 616 *args,
617 617 **kwargs
618 618 ):
619 619 if isenabled(repo):
620 620 prefetch = []
621 621 mf1 = ctx1.manifest()
622 622 for fname in modified + added + removed:
623 623 if fname in mf1:
624 624 fnode = getfilectx(fname, ctx1).filenode()
625 625 # fnode can be None if it's a edited working ctx file
626 626 if fnode:
627 627 prefetch.append((fname, hex(fnode)))
628 628 if fname not in removed:
629 629 fnode = getfilectx(fname, ctx2).filenode()
630 630 if fnode:
631 631 prefetch.append((fname, hex(fnode)))
632 632
633 633 repo.fileservice.prefetch(prefetch)
634 634
635 635 return orig(
636 636 repo,
637 637 revs,
638 638 ctx1,
639 639 ctx2,
640 640 modified,
641 641 added,
642 642 removed,
643 643 copy,
644 644 getfilectx,
645 645 *args,
646 646 **kwargs
647 647 )
648 648
649 649
650 650 # Prevent verify from processing files
651 651 # a stub for mercurial.hg.verify()
652 652 def _verify(orig, repo, level=None):
653 653 lock = repo.lock()
654 654 try:
655 655 return shallowverifier.shallowverifier(repo).verify()
656 656 finally:
657 657 lock.release()
658 658
659 659
660 660 clientonetime = False
661 661
662 662
663 663 def onetimeclientsetup(ui):
664 664 global clientonetime
665 665 if clientonetime:
666 666 return
667 667 clientonetime = True
668 668
669 669 # Don't commit filelogs until we know the commit hash, since the hash
670 670 # is present in the filelog blob.
671 671 # This violates Mercurial's filelog->manifest->changelog write order,
672 672 # but is generally fine for client repos.
673 673 pendingfilecommits = []
674 674
675 675 def addrawrevision(
676 676 orig,
677 677 self,
678 678 rawtext,
679 679 transaction,
680 680 link,
681 681 p1,
682 682 p2,
683 683 node,
684 684 flags,
685 685 cachedelta=None,
686 686 _metatuple=None,
687 687 ):
688 688 if isinstance(link, int):
689 689 pendingfilecommits.append(
690 690 (
691 691 self,
692 692 rawtext,
693 693 transaction,
694 694 link,
695 695 p1,
696 696 p2,
697 697 node,
698 698 flags,
699 699 cachedelta,
700 700 _metatuple,
701 701 )
702 702 )
703 703 return node
704 704 else:
705 705 return orig(
706 706 self,
707 707 rawtext,
708 708 transaction,
709 709 link,
710 710 p1,
711 711 p2,
712 712 node,
713 713 flags,
714 714 cachedelta,
715 715 _metatuple=_metatuple,
716 716 )
717 717
718 718 extensions.wrapfunction(
719 719 remotefilelog.remotefilelog, b'addrawrevision', addrawrevision
720 720 )
721 721
722 722 def changelogadd(orig, self, *args, **kwargs):
723 723 oldlen = len(self)
724 724 node = orig(self, *args, **kwargs)
725 725 newlen = len(self)
726 726 if oldlen != newlen:
727 727 for oldargs in pendingfilecommits:
728 728 log, rt, tr, link, p1, p2, n, fl, c, m = oldargs
729 729 linknode = self.node(link)
730 730 if linknode == node:
731 731 log.addrawrevision(rt, tr, linknode, p1, p2, n, fl, c, m)
732 732 else:
733 733 raise error.ProgrammingError(
734 734 b'pending multiple integer revisions are not supported'
735 735 )
736 736 else:
737 737 # "link" is actually wrong here (it is set to len(changelog))
738 738 # if changelog remains unchanged, skip writing file revisions
739 739 # but still do a sanity check about pending multiple revisions
740 740 if len(set(x[3] for x in pendingfilecommits)) > 1:
741 741 raise error.ProgrammingError(
742 742 b'pending multiple integer revisions are not supported'
743 743 )
744 744 del pendingfilecommits[:]
745 745 return node
746 746
747 747 extensions.wrapfunction(changelog.changelog, b'add', changelogadd)
748 748
749 749
750 750 def getrenamedfn(orig, repo, endrev=None):
751 751 if not isenabled(repo) or copies.usechangesetcentricalgo(repo):
752 752 return orig(repo, endrev)
753 753
754 754 rcache = {}
755 755
756 756 def getrenamed(fn, rev):
757 757 '''looks up all renames for a file (up to endrev) the first
758 758 time the file is given. It indexes on the changerev and only
759 759 parses the manifest if linkrev != changerev.
760 760 Returns rename info for fn at changerev rev.'''
761 761 if rev in rcache.setdefault(fn, {}):
762 762 return rcache[fn][rev]
763 763
764 764 try:
765 765 fctx = repo[rev].filectx(fn)
766 766 for ancestor in fctx.ancestors():
767 767 if ancestor.path() == fn:
768 768 renamed = ancestor.renamed()
769 769 rcache[fn][ancestor.rev()] = renamed and renamed[0]
770 770
771 771 renamed = fctx.renamed()
772 772 return renamed and renamed[0]
773 773 except error.LookupError:
774 774 return None
775 775
776 776 return getrenamed
777 777
778 778
779 779 def walkfilerevs(orig, repo, match, follow, revs, fncache):
780 780 if not isenabled(repo):
781 781 return orig(repo, match, follow, revs, fncache)
782 782
783 783 # remotefilelog's can't be walked in rev order, so throw.
784 784 # The caller will see the exception and walk the commit tree instead.
785 785 if not follow:
786 786 raise cmdutil.FileWalkError(b"Cannot walk via filelog")
787 787
788 788 wanted = set()
789 789 minrev, maxrev = min(revs), max(revs)
790 790
791 791 pctx = repo[b'.']
792 792 for filename in match.files():
793 793 if filename not in pctx:
794 794 raise error.Abort(
795 795 _(b'cannot follow file not in parent revision: "%s"') % filename
796 796 )
797 797 fctx = pctx[filename]
798 798
799 799 linkrev = fctx.linkrev()
800 800 if linkrev >= minrev and linkrev <= maxrev:
801 801 fncache.setdefault(linkrev, []).append(filename)
802 802 wanted.add(linkrev)
803 803
804 804 for ancestor in fctx.ancestors():
805 805 linkrev = ancestor.linkrev()
806 806 if linkrev >= minrev and linkrev <= maxrev:
807 807 fncache.setdefault(linkrev, []).append(ancestor.path())
808 808 wanted.add(linkrev)
809 809
810 810 return wanted
811 811
812 812
813 813 def filelogrevset(orig, repo, subset, x):
814 814 """``filelog(pattern)``
815 815 Changesets connected to the specified filelog.
816 816
817 817 For performance reasons, ``filelog()`` does not show every changeset
818 818 that affects the requested file(s). See :hg:`help log` for details. For
819 819 a slower, more accurate result, use ``file()``.
820 820 """
821 821
822 822 if not isenabled(repo):
823 823 return orig(repo, subset, x)
824 824
825 825 # i18n: "filelog" is a keyword
826 826 pat = revset.getstring(x, _(b"filelog requires a pattern"))
827 827 m = match.match(
828 828 repo.root, repo.getcwd(), [pat], default=b'relpath', ctx=repo[None]
829 829 )
830 830 s = set()
831 831
832 832 if not match.patkind(pat):
833 833 # slow
834 834 for r in subset:
835 835 ctx = repo[r]
836 836 cfiles = ctx.files()
837 837 for f in m.files():
838 838 if f in cfiles:
839 839 s.add(ctx.rev())
840 840 break
841 841 else:
842 842 # partial
843 843 files = (f for f in repo[None] if m(f))
844 844 for f in files:
845 845 fctx = repo[None].filectx(f)
846 846 s.add(fctx.linkrev())
847 847 for actx in fctx.ancestors():
848 848 s.add(actx.linkrev())
849 849
850 850 return smartset.baseset([r for r in subset if r in s])
851 851
852 852
853 853 @command(b'gc', [], _(b'hg gc [REPO...]'), norepo=True)
854 854 def gc(ui, *args, **opts):
855 855 '''garbage collect the client and server filelog caches
856 856 '''
857 857 cachepaths = set()
858 858
859 859 # get the system client cache
860 860 systemcache = shallowutil.getcachepath(ui, allowempty=True)
861 861 if systemcache:
862 862 cachepaths.add(systemcache)
863 863
864 864 # get repo client and server cache
865 865 repopaths = []
866 866 pwd = ui.environ.get(b'PWD')
867 867 if pwd:
868 868 repopaths.append(pwd)
869 869
870 870 repopaths.extend(args)
871 871 repos = []
872 872 for repopath in repopaths:
873 873 try:
874 874 repo = hg.peer(ui, {}, repopath)
875 875 repos.append(repo)
876 876
877 877 repocache = shallowutil.getcachepath(repo.ui, allowempty=True)
878 878 if repocache:
879 879 cachepaths.add(repocache)
880 880 except error.RepoError:
881 881 pass
882 882
883 883 # gc client cache
884 884 for cachepath in cachepaths:
885 885 gcclient(ui, cachepath)
886 886
887 887 # gc server cache
888 888 for repo in repos:
889 889 remotefilelogserver.gcserver(ui, repo._repo)
890 890
891 891
892 892 def gcclient(ui, cachepath):
893 893 # get list of repos that use this cache
894 894 repospath = os.path.join(cachepath, b'repos')
895 895 if not os.path.exists(repospath):
896 896 ui.warn(_(b"no known cache at %s\n") % cachepath)
897 897 return
898 898
899 899 reposfile = open(repospath, b'rb')
900 900 repos = {r[:-1] for r in reposfile.readlines()}
901 901 reposfile.close()
902 902
903 903 # build list of useful files
904 904 validrepos = []
905 905 keepkeys = set()
906 906
907 907 sharedcache = None
908 908 filesrepacked = False
909 909
910 910 count = 0
911 911 progress = ui.makeprogress(
912 912 _(b"analyzing repositories"), unit=b"repos", total=len(repos)
913 913 )
914 914 for path in repos:
915 915 progress.update(count)
916 916 count += 1
917 917 try:
918 918 path = ui.expandpath(os.path.normpath(path))
919 919 except TypeError as e:
920 920 ui.warn(_(b"warning: malformed path: %r:%s\n") % (path, e))
921 921 traceback.print_exc()
922 922 continue
923 923 try:
924 924 peer = hg.peer(ui, {}, path)
925 925 repo = peer._repo
926 926 except error.RepoError:
927 927 continue
928 928
929 929 validrepos.append(path)
930 930
931 931 # Protect against any repo or config changes that have happened since
932 932 # this repo was added to the repos file. We'd rather this loop succeed
933 933 # and too much be deleted, than the loop fail and nothing gets deleted.
934 934 if not isenabled(repo):
935 935 continue
936 936
937 937 if not util.safehasattr(repo, b'name'):
938 938 ui.warn(
939 939 _(b"repo %s is a misconfigured remotefilelog repo\n") % path
940 940 )
941 941 continue
942 942
943 943 # If garbage collection on repack and repack on hg gc are enabled
944 944 # then loose files are repacked and garbage collected.
945 945 # Otherwise regular garbage collection is performed.
946 946 repackonhggc = repo.ui.configbool(b'remotefilelog', b'repackonhggc')
947 947 gcrepack = repo.ui.configbool(b'remotefilelog', b'gcrepack')
948 948 if repackonhggc and gcrepack:
949 949 try:
950 950 repackmod.incrementalrepack(repo)
951 951 filesrepacked = True
952 952 continue
953 953 except (IOError, repackmod.RepackAlreadyRunning):
954 954 # If repack cannot be performed due to not enough disk space
955 955 # continue doing garbage collection of loose files w/o repack
956 956 pass
957 957
958 958 reponame = repo.name
959 959 if not sharedcache:
960 960 sharedcache = repo.sharedstore
961 961
962 962 # Compute a keepset which is not garbage collected
963 963 def keyfn(fname, fnode):
964 964 return fileserverclient.getcachekey(reponame, fname, hex(fnode))
965 965
966 966 keepkeys = repackmod.keepset(repo, keyfn=keyfn, lastkeepkeys=keepkeys)
967 967
968 968 progress.complete()
969 969
970 970 # write list of valid repos back
971 971 oldumask = os.umask(0o002)
972 972 try:
973 973 reposfile = open(repospath, b'wb')
974 974 reposfile.writelines([(b"%s\n" % r) for r in validrepos])
975 975 reposfile.close()
976 976 finally:
977 977 os.umask(oldumask)
978 978
979 979 # prune cache
980 980 if sharedcache is not None:
981 981 sharedcache.gc(keepkeys)
982 982 elif not filesrepacked:
983 983 ui.warn(_(b"warning: no valid repos in repofile\n"))
984 984
985 985
986 986 def log(orig, ui, repo, *pats, **opts):
987 987 if not isenabled(repo):
988 988 return orig(ui, repo, *pats, **opts)
989 989
990 990 follow = opts.get('follow')
991 991 revs = opts.get('rev')
992 992 if pats:
993 993 # Force slowpath for non-follow patterns and follows that start from
994 994 # non-working-copy-parent revs.
995 995 if not follow or revs:
996 996 # This forces the slowpath
997 997 opts['removed'] = True
998 998
999 999 # If this is a non-follow log without any revs specified, recommend that
1000 1000 # the user add -f to speed it up.
1001 1001 if not follow and not revs:
1002 1002 match = scmutil.match(repo[b'.'], pats, pycompat.byteskwargs(opts))
1003 1003 isfile = not match.anypats()
1004 1004 if isfile:
1005 1005 for file in match.files():
1006 1006 if not os.path.isfile(repo.wjoin(file)):
1007 1007 isfile = False
1008 1008 break
1009 1009
1010 1010 if isfile:
1011 1011 ui.warn(
1012 1012 _(
1013 1013 b"warning: file log can be slow on large repos - "
1014 1014 + b"use -f to speed it up\n"
1015 1015 )
1016 1016 )
1017 1017
1018 1018 return orig(ui, repo, *pats, **opts)
1019 1019
1020 1020
1021 1021 def revdatelimit(ui, revset):
1022 1022 """Update revset so that only changesets no older than 'prefetchdays' days
1023 1023 are included. The default value is set to 14 days. If 'prefetchdays' is set
1024 1024 to zero or negative value then date restriction is not applied.
1025 1025 """
1026 1026 days = ui.configint(b'remotefilelog', b'prefetchdays')
1027 1027 if days > 0:
1028 1028 revset = b'(%s) & date(-%s)' % (revset, days)
1029 1029 return revset
1030 1030
1031 1031
1032 1032 def readytofetch(repo):
1033 1033 """Check that enough time has passed since the last background prefetch.
1034 1034 This only relates to prefetches after operations that change the working
1035 1035 copy parent. Default delay between background prefetches is 2 minutes.
1036 1036 """
1037 1037 timeout = repo.ui.configint(b'remotefilelog', b'prefetchdelay')
1038 1038 fname = repo.vfs.join(b'lastprefetch')
1039 1039
1040 1040 ready = False
1041 1041 with open(fname, b'a'):
1042 1042 # the with construct above is used to avoid race conditions
1043 1043 modtime = os.path.getmtime(fname)
1044 1044 if (time.time() - modtime) > timeout:
1045 1045 os.utime(fname, None)
1046 1046 ready = True
1047 1047
1048 1048 return ready
1049 1049
1050 1050
1051 1051 def wcpprefetch(ui, repo, **kwargs):
1052 1052 """Prefetches in background revisions specified by bgprefetchrevs revset.
1053 1053 Does background repack if backgroundrepack flag is set in config.
1054 1054 """
1055 1055 shallow = isenabled(repo)
1056 1056 bgprefetchrevs = ui.config(b'remotefilelog', b'bgprefetchrevs')
1057 1057 isready = readytofetch(repo)
1058 1058
1059 1059 if not (shallow and bgprefetchrevs and isready):
1060 1060 return
1061 1061
1062 1062 bgrepack = repo.ui.configbool(b'remotefilelog', b'backgroundrepack')
1063 1063 # update a revset with a date limit
1064 1064 bgprefetchrevs = revdatelimit(ui, bgprefetchrevs)
1065 1065
1066 def anon():
1066 def anon(unused_success):
1067 1067 if util.safehasattr(repo, b'ranprefetch') and repo.ranprefetch:
1068 1068 return
1069 1069 repo.ranprefetch = True
1070 1070 repo.backgroundprefetch(bgprefetchrevs, repack=bgrepack)
1071 1071
1072 1072 repo._afterlock(anon)
1073 1073
1074 1074
1075 1075 def pull(orig, ui, repo, *pats, **opts):
1076 1076 result = orig(ui, repo, *pats, **opts)
1077 1077
1078 1078 if isenabled(repo):
1079 1079 # prefetch if it's configured
1080 1080 prefetchrevset = ui.config(b'remotefilelog', b'pullprefetch')
1081 1081 bgrepack = repo.ui.configbool(b'remotefilelog', b'backgroundrepack')
1082 1082 bgprefetch = repo.ui.configbool(b'remotefilelog', b'backgroundprefetch')
1083 1083 ensurestart = repo.ui.configbool(b'devel', b'remotefilelog.ensurestart')
1084 1084
1085 1085 if prefetchrevset:
1086 1086 ui.status(_(b"prefetching file contents\n"))
1087 1087 revs = scmutil.revrange(repo, [prefetchrevset])
1088 1088 base = repo[b'.'].rev()
1089 1089 if bgprefetch:
1090 1090 repo.backgroundprefetch(
1091 1091 prefetchrevset, repack=bgrepack, ensurestart=ensurestart
1092 1092 )
1093 1093 else:
1094 1094 repo.prefetch(revs, base=base)
1095 1095 if bgrepack:
1096 1096 repackmod.backgroundrepack(
1097 1097 repo, incremental=True, ensurestart=ensurestart
1098 1098 )
1099 1099 elif bgrepack:
1100 1100 repackmod.backgroundrepack(
1101 1101 repo, incremental=True, ensurestart=ensurestart
1102 1102 )
1103 1103
1104 1104 return result
1105 1105
1106 1106
1107 1107 def exchangepull(orig, repo, remote, *args, **kwargs):
1108 1108 # Hook into the callstream/getbundle to insert bundle capabilities
1109 1109 # during a pull.
1110 1110 def localgetbundle(
1111 1111 orig, source, heads=None, common=None, bundlecaps=None, **kwargs
1112 1112 ):
1113 1113 if not bundlecaps:
1114 1114 bundlecaps = set()
1115 1115 bundlecaps.add(constants.BUNDLE2_CAPABLITY)
1116 1116 return orig(
1117 1117 source, heads=heads, common=common, bundlecaps=bundlecaps, **kwargs
1118 1118 )
1119 1119
1120 1120 if util.safehasattr(remote, b'_callstream'):
1121 1121 remote._localrepo = repo
1122 1122 elif util.safehasattr(remote, b'getbundle'):
1123 1123 extensions.wrapfunction(remote, b'getbundle', localgetbundle)
1124 1124
1125 1125 return orig(repo, remote, *args, **kwargs)
1126 1126
1127 1127
1128 1128 def _fileprefetchhook(repo, revs, match):
1129 1129 if isenabled(repo):
1130 1130 allfiles = []
1131 1131 for rev in revs:
1132 1132 if rev == nodemod.wdirrev or rev is None:
1133 1133 continue
1134 1134 ctx = repo[rev]
1135 1135 mf = ctx.manifest()
1136 1136 sparsematch = repo.maybesparsematch(ctx.rev())
1137 1137 for path in ctx.walk(match):
1138 1138 if (not sparsematch or sparsematch(path)) and path in mf:
1139 1139 allfiles.append((path, hex(mf[path])))
1140 1140 repo.fileservice.prefetch(allfiles)
1141 1141
1142 1142
1143 1143 @command(
1144 1144 b'debugremotefilelog',
1145 1145 [(b'd', b'decompress', None, _(b'decompress the filelog first')),],
1146 1146 _(b'hg debugremotefilelog <path>'),
1147 1147 norepo=True,
1148 1148 )
1149 1149 def debugremotefilelog(ui, path, **opts):
1150 1150 return debugcommands.debugremotefilelog(ui, path, **opts)
1151 1151
1152 1152
1153 1153 @command(
1154 1154 b'verifyremotefilelog',
1155 1155 [(b'd', b'decompress', None, _(b'decompress the filelogs first')),],
1156 1156 _(b'hg verifyremotefilelogs <directory>'),
1157 1157 norepo=True,
1158 1158 )
1159 1159 def verifyremotefilelog(ui, path, **opts):
1160 1160 return debugcommands.verifyremotefilelog(ui, path, **opts)
1161 1161
1162 1162
1163 1163 @command(
1164 1164 b'debugdatapack',
1165 1165 [
1166 1166 (b'', b'long', None, _(b'print the long hashes')),
1167 1167 (b'', b'node', b'', _(b'dump the contents of node'), b'NODE'),
1168 1168 ],
1169 1169 _(b'hg debugdatapack <paths>'),
1170 1170 norepo=True,
1171 1171 )
1172 1172 def debugdatapack(ui, *paths, **opts):
1173 1173 return debugcommands.debugdatapack(ui, *paths, **opts)
1174 1174
1175 1175
1176 1176 @command(b'debughistorypack', [], _(b'hg debughistorypack <path>'), norepo=True)
1177 1177 def debughistorypack(ui, path, **opts):
1178 1178 return debugcommands.debughistorypack(ui, path)
1179 1179
1180 1180
1181 1181 @command(b'debugkeepset', [], _(b'hg debugkeepset'))
1182 1182 def debugkeepset(ui, repo, **opts):
1183 1183 # The command is used to measure keepset computation time
1184 1184 def keyfn(fname, fnode):
1185 1185 return fileserverclient.getcachekey(repo.name, fname, hex(fnode))
1186 1186
1187 1187 repackmod.keepset(repo, keyfn)
1188 1188 return
1189 1189
1190 1190
1191 1191 @command(b'debugwaitonrepack', [], _(b'hg debugwaitonrepack'))
1192 1192 def debugwaitonrepack(ui, repo, **opts):
1193 1193 return debugcommands.debugwaitonrepack(repo)
1194 1194
1195 1195
1196 1196 @command(b'debugwaitonprefetch', [], _(b'hg debugwaitonprefetch'))
1197 1197 def debugwaitonprefetch(ui, repo, **opts):
1198 1198 return debugcommands.debugwaitonprefetch(repo)
1199 1199
1200 1200
1201 1201 def resolveprefetchopts(ui, opts):
1202 1202 if not opts.get(b'rev'):
1203 1203 revset = [b'.', b'draft()']
1204 1204
1205 1205 prefetchrevset = ui.config(b'remotefilelog', b'pullprefetch', None)
1206 1206 if prefetchrevset:
1207 1207 revset.append(b'(%s)' % prefetchrevset)
1208 1208 bgprefetchrevs = ui.config(b'remotefilelog', b'bgprefetchrevs', None)
1209 1209 if bgprefetchrevs:
1210 1210 revset.append(b'(%s)' % bgprefetchrevs)
1211 1211 revset = b'+'.join(revset)
1212 1212
1213 1213 # update a revset with a date limit
1214 1214 revset = revdatelimit(ui, revset)
1215 1215
1216 1216 opts[b'rev'] = [revset]
1217 1217
1218 1218 if not opts.get(b'base'):
1219 1219 opts[b'base'] = None
1220 1220
1221 1221 return opts
1222 1222
1223 1223
1224 1224 @command(
1225 1225 b'prefetch',
1226 1226 [
1227 1227 (b'r', b'rev', [], _(b'prefetch the specified revisions'), _(b'REV')),
1228 1228 (b'', b'repack', False, _(b'run repack after prefetch')),
1229 1229 (b'b', b'base', b'', _(b"rev that is assumed to already be local")),
1230 1230 ]
1231 1231 + commands.walkopts,
1232 1232 _(b'hg prefetch [OPTIONS] [FILE...]'),
1233 1233 helpcategory=command.CATEGORY_MAINTENANCE,
1234 1234 )
1235 1235 def prefetch(ui, repo, *pats, **opts):
1236 1236 """prefetch file revisions from the server
1237 1237
1238 1238 Prefetchs file revisions for the specified revs and stores them in the
1239 1239 local remotefilelog cache. If no rev is specified, the default rev is
1240 1240 used which is the union of dot, draft, pullprefetch and bgprefetchrev.
1241 1241 File names or patterns can be used to limit which files are downloaded.
1242 1242
1243 1243 Return 0 on success.
1244 1244 """
1245 1245 opts = pycompat.byteskwargs(opts)
1246 1246 if not isenabled(repo):
1247 1247 raise error.Abort(_(b"repo is not shallow"))
1248 1248
1249 1249 opts = resolveprefetchopts(ui, opts)
1250 1250 revs = scmutil.revrange(repo, opts.get(b'rev'))
1251 1251 repo.prefetch(revs, opts.get(b'base'), pats, opts)
1252 1252
1253 1253 ensurestart = repo.ui.configbool(b'devel', b'remotefilelog.ensurestart')
1254 1254
1255 1255 # Run repack in background
1256 1256 if opts.get(b'repack'):
1257 1257 repackmod.backgroundrepack(
1258 1258 repo, incremental=True, ensurestart=ensurestart
1259 1259 )
1260 1260
1261 1261
1262 1262 @command(
1263 1263 b'repack',
1264 1264 [
1265 1265 (b'', b'background', None, _(b'run in a background process'), None),
1266 1266 (b'', b'incremental', None, _(b'do an incremental repack'), None),
1267 1267 (
1268 1268 b'',
1269 1269 b'packsonly',
1270 1270 None,
1271 1271 _(b'only repack packs (skip loose objects)'),
1272 1272 None,
1273 1273 ),
1274 1274 ],
1275 1275 _(b'hg repack [OPTIONS]'),
1276 1276 )
1277 1277 def repack_(ui, repo, *pats, **opts):
1278 1278 if opts.get('background'):
1279 1279 ensurestart = repo.ui.configbool(b'devel', b'remotefilelog.ensurestart')
1280 1280 repackmod.backgroundrepack(
1281 1281 repo,
1282 1282 incremental=opts.get('incremental'),
1283 1283 packsonly=opts.get('packsonly', False),
1284 1284 ensurestart=ensurestart,
1285 1285 )
1286 1286 return
1287 1287
1288 1288 options = {b'packsonly': opts.get('packsonly')}
1289 1289
1290 1290 try:
1291 1291 if opts.get('incremental'):
1292 1292 repackmod.incrementalrepack(repo, options=options)
1293 1293 else:
1294 1294 repackmod.fullrepack(repo, options=options)
1295 1295 except repackmod.RepackAlreadyRunning as ex:
1296 1296 # Don't propogate the exception if the repack is already in
1297 1297 # progress, since we want the command to exit 0.
1298 1298 repo.ui.warn(b'%s\n' % ex)
@@ -1,2578 +1,2578 b''
1 1 # bundle2.py - generic container format to transmit arbitrary data.
2 2 #
3 3 # Copyright 2013 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 """Handling of the new bundle2 format
8 8
9 9 The goal of bundle2 is to act as an atomically packet to transmit a set of
10 10 payloads in an application agnostic way. It consist in a sequence of "parts"
11 11 that will be handed to and processed by the application layer.
12 12
13 13
14 14 General format architecture
15 15 ===========================
16 16
17 17 The format is architectured as follow
18 18
19 19 - magic string
20 20 - stream level parameters
21 21 - payload parts (any number)
22 22 - end of stream marker.
23 23
24 24 the Binary format
25 25 ============================
26 26
27 27 All numbers are unsigned and big-endian.
28 28
29 29 stream level parameters
30 30 ------------------------
31 31
32 32 Binary format is as follow
33 33
34 34 :params size: int32
35 35
36 36 The total number of Bytes used by the parameters
37 37
38 38 :params value: arbitrary number of Bytes
39 39
40 40 A blob of `params size` containing the serialized version of all stream level
41 41 parameters.
42 42
43 43 The blob contains a space separated list of parameters. Parameters with value
44 44 are stored in the form `<name>=<value>`. Both name and value are urlquoted.
45 45
46 46 Empty name are obviously forbidden.
47 47
48 48 Name MUST start with a letter. If this first letter is lower case, the
49 49 parameter is advisory and can be safely ignored. However when the first
50 50 letter is capital, the parameter is mandatory and the bundling process MUST
51 51 stop if he is not able to proceed it.
52 52
53 53 Stream parameters use a simple textual format for two main reasons:
54 54
55 55 - Stream level parameters should remain simple and we want to discourage any
56 56 crazy usage.
57 57 - Textual data allow easy human inspection of a bundle2 header in case of
58 58 troubles.
59 59
60 60 Any Applicative level options MUST go into a bundle2 part instead.
61 61
62 62 Payload part
63 63 ------------------------
64 64
65 65 Binary format is as follow
66 66
67 67 :header size: int32
68 68
69 69 The total number of Bytes used by the part header. When the header is empty
70 70 (size = 0) this is interpreted as the end of stream marker.
71 71
72 72 :header:
73 73
74 74 The header defines how to interpret the part. It contains two piece of
75 75 data: the part type, and the part parameters.
76 76
77 77 The part type is used to route an application level handler, that can
78 78 interpret payload.
79 79
80 80 Part parameters are passed to the application level handler. They are
81 81 meant to convey information that will help the application level object to
82 82 interpret the part payload.
83 83
84 84 The binary format of the header is has follow
85 85
86 86 :typesize: (one byte)
87 87
88 88 :parttype: alphanumerical part name (restricted to [a-zA-Z0-9_:-]*)
89 89
90 90 :partid: A 32bits integer (unique in the bundle) that can be used to refer
91 91 to this part.
92 92
93 93 :parameters:
94 94
95 95 Part's parameter may have arbitrary content, the binary structure is::
96 96
97 97 <mandatory-count><advisory-count><param-sizes><param-data>
98 98
99 99 :mandatory-count: 1 byte, number of mandatory parameters
100 100
101 101 :advisory-count: 1 byte, number of advisory parameters
102 102
103 103 :param-sizes:
104 104
105 105 N couple of bytes, where N is the total number of parameters. Each
106 106 couple contains (<size-of-key>, <size-of-value) for one parameter.
107 107
108 108 :param-data:
109 109
110 110 A blob of bytes from which each parameter key and value can be
111 111 retrieved using the list of size couples stored in the previous
112 112 field.
113 113
114 114 Mandatory parameters comes first, then the advisory ones.
115 115
116 116 Each parameter's key MUST be unique within the part.
117 117
118 118 :payload:
119 119
120 120 payload is a series of `<chunksize><chunkdata>`.
121 121
122 122 `chunksize` is an int32, `chunkdata` are plain bytes (as much as
123 123 `chunksize` says)` The payload part is concluded by a zero size chunk.
124 124
125 125 The current implementation always produces either zero or one chunk.
126 126 This is an implementation limitation that will ultimately be lifted.
127 127
128 128 `chunksize` can be negative to trigger special case processing. No such
129 129 processing is in place yet.
130 130
131 131 Bundle processing
132 132 ============================
133 133
134 134 Each part is processed in order using a "part handler". Handler are registered
135 135 for a certain part type.
136 136
137 137 The matching of a part to its handler is case insensitive. The case of the
138 138 part type is used to know if a part is mandatory or advisory. If the Part type
139 139 contains any uppercase char it is considered mandatory. When no handler is
140 140 known for a Mandatory part, the process is aborted and an exception is raised.
141 141 If the part is advisory and no handler is known, the part is ignored. When the
142 142 process is aborted, the full bundle is still read from the stream to keep the
143 143 channel usable. But none of the part read from an abort are processed. In the
144 144 future, dropping the stream may become an option for channel we do not care to
145 145 preserve.
146 146 """
147 147
148 148 from __future__ import absolute_import, division
149 149
150 150 import collections
151 151 import errno
152 152 import os
153 153 import re
154 154 import string
155 155 import struct
156 156 import sys
157 157
158 158 from .i18n import _
159 159 from . import (
160 160 bookmarks,
161 161 changegroup,
162 162 encoding,
163 163 error,
164 164 node as nodemod,
165 165 obsolete,
166 166 phases,
167 167 pushkey,
168 168 pycompat,
169 169 streamclone,
170 170 tags,
171 171 url,
172 172 util,
173 173 )
174 174 from .utils import stringutil
175 175
176 176 urlerr = util.urlerr
177 177 urlreq = util.urlreq
178 178
179 179 _pack = struct.pack
180 180 _unpack = struct.unpack
181 181
182 182 _fstreamparamsize = b'>i'
183 183 _fpartheadersize = b'>i'
184 184 _fparttypesize = b'>B'
185 185 _fpartid = b'>I'
186 186 _fpayloadsize = b'>i'
187 187 _fpartparamcount = b'>BB'
188 188
189 189 preferedchunksize = 32768
190 190
191 191 _parttypeforbidden = re.compile(b'[^a-zA-Z0-9_:-]')
192 192
193 193
194 194 def outdebug(ui, message):
195 195 """debug regarding output stream (bundling)"""
196 196 if ui.configbool(b'devel', b'bundle2.debug'):
197 197 ui.debug(b'bundle2-output: %s\n' % message)
198 198
199 199
200 200 def indebug(ui, message):
201 201 """debug on input stream (unbundling)"""
202 202 if ui.configbool(b'devel', b'bundle2.debug'):
203 203 ui.debug(b'bundle2-input: %s\n' % message)
204 204
205 205
206 206 def validateparttype(parttype):
207 207 """raise ValueError if a parttype contains invalid character"""
208 208 if _parttypeforbidden.search(parttype):
209 209 raise ValueError(parttype)
210 210
211 211
212 212 def _makefpartparamsizes(nbparams):
213 213 """return a struct format to read part parameter sizes
214 214
215 215 The number parameters is variable so we need to build that format
216 216 dynamically.
217 217 """
218 218 return b'>' + (b'BB' * nbparams)
219 219
220 220
221 221 parthandlermapping = {}
222 222
223 223
224 224 def parthandler(parttype, params=()):
225 225 """decorator that register a function as a bundle2 part handler
226 226
227 227 eg::
228 228
229 229 @parthandler('myparttype', ('mandatory', 'param', 'handled'))
230 230 def myparttypehandler(...):
231 231 '''process a part of type "my part".'''
232 232 ...
233 233 """
234 234 validateparttype(parttype)
235 235
236 236 def _decorator(func):
237 237 lparttype = parttype.lower() # enforce lower case matching.
238 238 assert lparttype not in parthandlermapping
239 239 parthandlermapping[lparttype] = func
240 240 func.params = frozenset(params)
241 241 return func
242 242
243 243 return _decorator
244 244
245 245
246 246 class unbundlerecords(object):
247 247 """keep record of what happens during and unbundle
248 248
249 249 New records are added using `records.add('cat', obj)`. Where 'cat' is a
250 250 category of record and obj is an arbitrary object.
251 251
252 252 `records['cat']` will return all entries of this category 'cat'.
253 253
254 254 Iterating on the object itself will yield `('category', obj)` tuples
255 255 for all entries.
256 256
257 257 All iterations happens in chronological order.
258 258 """
259 259
260 260 def __init__(self):
261 261 self._categories = {}
262 262 self._sequences = []
263 263 self._replies = {}
264 264
265 265 def add(self, category, entry, inreplyto=None):
266 266 """add a new record of a given category.
267 267
268 268 The entry can then be retrieved in the list returned by
269 269 self['category']."""
270 270 self._categories.setdefault(category, []).append(entry)
271 271 self._sequences.append((category, entry))
272 272 if inreplyto is not None:
273 273 self.getreplies(inreplyto).add(category, entry)
274 274
275 275 def getreplies(self, partid):
276 276 """get the records that are replies to a specific part"""
277 277 return self._replies.setdefault(partid, unbundlerecords())
278 278
279 279 def __getitem__(self, cat):
280 280 return tuple(self._categories.get(cat, ()))
281 281
282 282 def __iter__(self):
283 283 return iter(self._sequences)
284 284
285 285 def __len__(self):
286 286 return len(self._sequences)
287 287
288 288 def __nonzero__(self):
289 289 return bool(self._sequences)
290 290
291 291 __bool__ = __nonzero__
292 292
293 293
294 294 class bundleoperation(object):
295 295 """an object that represents a single bundling process
296 296
297 297 Its purpose is to carry unbundle-related objects and states.
298 298
299 299 A new object should be created at the beginning of each bundle processing.
300 300 The object is to be returned by the processing function.
301 301
302 302 The object has very little content now it will ultimately contain:
303 303 * an access to the repo the bundle is applied to,
304 304 * a ui object,
305 305 * a way to retrieve a transaction to add changes to the repo,
306 306 * a way to record the result of processing each part,
307 307 * a way to construct a bundle response when applicable.
308 308 """
309 309
310 310 def __init__(self, repo, transactiongetter, captureoutput=True, source=b''):
311 311 self.repo = repo
312 312 self.ui = repo.ui
313 313 self.records = unbundlerecords()
314 314 self.reply = None
315 315 self.captureoutput = captureoutput
316 316 self.hookargs = {}
317 317 self._gettransaction = transactiongetter
318 318 # carries value that can modify part behavior
319 319 self.modes = {}
320 320 self.source = source
321 321
322 322 def gettransaction(self):
323 323 transaction = self._gettransaction()
324 324
325 325 if self.hookargs:
326 326 # the ones added to the transaction supercede those added
327 327 # to the operation.
328 328 self.hookargs.update(transaction.hookargs)
329 329 transaction.hookargs = self.hookargs
330 330
331 331 # mark the hookargs as flushed. further attempts to add to
332 332 # hookargs will result in an abort.
333 333 self.hookargs = None
334 334
335 335 return transaction
336 336
337 337 def addhookargs(self, hookargs):
338 338 if self.hookargs is None:
339 339 raise error.ProgrammingError(
340 340 b'attempted to add hookargs to '
341 341 b'operation after transaction started'
342 342 )
343 343 self.hookargs.update(hookargs)
344 344
345 345
346 346 class TransactionUnavailable(RuntimeError):
347 347 pass
348 348
349 349
350 350 def _notransaction():
351 351 """default method to get a transaction while processing a bundle
352 352
353 353 Raise an exception to highlight the fact that no transaction was expected
354 354 to be created"""
355 355 raise TransactionUnavailable()
356 356
357 357
358 358 def applybundle(repo, unbundler, tr, source, url=None, **kwargs):
359 359 # transform me into unbundler.apply() as soon as the freeze is lifted
360 360 if isinstance(unbundler, unbundle20):
361 361 tr.hookargs[b'bundle2'] = b'1'
362 362 if source is not None and b'source' not in tr.hookargs:
363 363 tr.hookargs[b'source'] = source
364 364 if url is not None and b'url' not in tr.hookargs:
365 365 tr.hookargs[b'url'] = url
366 366 return processbundle(repo, unbundler, lambda: tr, source=source)
367 367 else:
368 368 # the transactiongetter won't be used, but we might as well set it
369 369 op = bundleoperation(repo, lambda: tr, source=source)
370 370 _processchangegroup(op, unbundler, tr, source, url, **kwargs)
371 371 return op
372 372
373 373
374 374 class partiterator(object):
375 375 def __init__(self, repo, op, unbundler):
376 376 self.repo = repo
377 377 self.op = op
378 378 self.unbundler = unbundler
379 379 self.iterator = None
380 380 self.count = 0
381 381 self.current = None
382 382
383 383 def __enter__(self):
384 384 def func():
385 385 itr = enumerate(self.unbundler.iterparts(), 1)
386 386 for count, p in itr:
387 387 self.count = count
388 388 self.current = p
389 389 yield p
390 390 p.consume()
391 391 self.current = None
392 392
393 393 self.iterator = func()
394 394 return self.iterator
395 395
396 396 def __exit__(self, type, exc, tb):
397 397 if not self.iterator:
398 398 return
399 399
400 400 # Only gracefully abort in a normal exception situation. User aborts
401 401 # like Ctrl+C throw a KeyboardInterrupt which is not a base Exception,
402 402 # and should not gracefully cleanup.
403 403 if isinstance(exc, Exception):
404 404 # Any exceptions seeking to the end of the bundle at this point are
405 405 # almost certainly related to the underlying stream being bad.
406 406 # And, chances are that the exception we're handling is related to
407 407 # getting in that bad state. So, we swallow the seeking error and
408 408 # re-raise the original error.
409 409 seekerror = False
410 410 try:
411 411 if self.current:
412 412 # consume the part content to not corrupt the stream.
413 413 self.current.consume()
414 414
415 415 for part in self.iterator:
416 416 # consume the bundle content
417 417 part.consume()
418 418 except Exception:
419 419 seekerror = True
420 420
421 421 # Small hack to let caller code distinguish exceptions from bundle2
422 422 # processing from processing the old format. This is mostly needed
423 423 # to handle different return codes to unbundle according to the type
424 424 # of bundle. We should probably clean up or drop this return code
425 425 # craziness in a future version.
426 426 exc.duringunbundle2 = True
427 427 salvaged = []
428 428 replycaps = None
429 429 if self.op.reply is not None:
430 430 salvaged = self.op.reply.salvageoutput()
431 431 replycaps = self.op.reply.capabilities
432 432 exc._replycaps = replycaps
433 433 exc._bundle2salvagedoutput = salvaged
434 434
435 435 # Re-raising from a variable loses the original stack. So only use
436 436 # that form if we need to.
437 437 if seekerror:
438 438 raise exc
439 439
440 440 self.repo.ui.debug(
441 441 b'bundle2-input-bundle: %i parts total\n' % self.count
442 442 )
443 443
444 444
445 445 def processbundle(repo, unbundler, transactiongetter=None, op=None, source=b''):
446 446 """This function process a bundle, apply effect to/from a repo
447 447
448 448 It iterates over each part then searches for and uses the proper handling
449 449 code to process the part. Parts are processed in order.
450 450
451 451 Unknown Mandatory part will abort the process.
452 452
453 453 It is temporarily possible to provide a prebuilt bundleoperation to the
454 454 function. This is used to ensure output is properly propagated in case of
455 455 an error during the unbundling. This output capturing part will likely be
456 456 reworked and this ability will probably go away in the process.
457 457 """
458 458 if op is None:
459 459 if transactiongetter is None:
460 460 transactiongetter = _notransaction
461 461 op = bundleoperation(repo, transactiongetter, source=source)
462 462 # todo:
463 463 # - replace this is a init function soon.
464 464 # - exception catching
465 465 unbundler.params
466 466 if repo.ui.debugflag:
467 467 msg = [b'bundle2-input-bundle:']
468 468 if unbundler.params:
469 469 msg.append(b' %i params' % len(unbundler.params))
470 470 if op._gettransaction is None or op._gettransaction is _notransaction:
471 471 msg.append(b' no-transaction')
472 472 else:
473 473 msg.append(b' with-transaction')
474 474 msg.append(b'\n')
475 475 repo.ui.debug(b''.join(msg))
476 476
477 477 processparts(repo, op, unbundler)
478 478
479 479 return op
480 480
481 481
482 482 def processparts(repo, op, unbundler):
483 483 with partiterator(repo, op, unbundler) as parts:
484 484 for part in parts:
485 485 _processpart(op, part)
486 486
487 487
488 488 def _processchangegroup(op, cg, tr, source, url, **kwargs):
489 489 ret = cg.apply(op.repo, tr, source, url, **kwargs)
490 490 op.records.add(b'changegroup', {b'return': ret,})
491 491 return ret
492 492
493 493
494 494 def _gethandler(op, part):
495 495 status = b'unknown' # used by debug output
496 496 try:
497 497 handler = parthandlermapping.get(part.type)
498 498 if handler is None:
499 499 status = b'unsupported-type'
500 500 raise error.BundleUnknownFeatureError(parttype=part.type)
501 501 indebug(op.ui, b'found a handler for part %s' % part.type)
502 502 unknownparams = part.mandatorykeys - handler.params
503 503 if unknownparams:
504 504 unknownparams = list(unknownparams)
505 505 unknownparams.sort()
506 506 status = b'unsupported-params (%s)' % b', '.join(unknownparams)
507 507 raise error.BundleUnknownFeatureError(
508 508 parttype=part.type, params=unknownparams
509 509 )
510 510 status = b'supported'
511 511 except error.BundleUnknownFeatureError as exc:
512 512 if part.mandatory: # mandatory parts
513 513 raise
514 514 indebug(op.ui, b'ignoring unsupported advisory part %s' % exc)
515 515 return # skip to part processing
516 516 finally:
517 517 if op.ui.debugflag:
518 518 msg = [b'bundle2-input-part: "%s"' % part.type]
519 519 if not part.mandatory:
520 520 msg.append(b' (advisory)')
521 521 nbmp = len(part.mandatorykeys)
522 522 nbap = len(part.params) - nbmp
523 523 if nbmp or nbap:
524 524 msg.append(b' (params:')
525 525 if nbmp:
526 526 msg.append(b' %i mandatory' % nbmp)
527 527 if nbap:
528 528 msg.append(b' %i advisory' % nbmp)
529 529 msg.append(b')')
530 530 msg.append(b' %s\n' % status)
531 531 op.ui.debug(b''.join(msg))
532 532
533 533 return handler
534 534
535 535
536 536 def _processpart(op, part):
537 537 """process a single part from a bundle
538 538
539 539 The part is guaranteed to have been fully consumed when the function exits
540 540 (even if an exception is raised)."""
541 541 handler = _gethandler(op, part)
542 542 if handler is None:
543 543 return
544 544
545 545 # handler is called outside the above try block so that we don't
546 546 # risk catching KeyErrors from anything other than the
547 547 # parthandlermapping lookup (any KeyError raised by handler()
548 548 # itself represents a defect of a different variety).
549 549 output = None
550 550 if op.captureoutput and op.reply is not None:
551 551 op.ui.pushbuffer(error=True, subproc=True)
552 552 output = b''
553 553 try:
554 554 handler(op, part)
555 555 finally:
556 556 if output is not None:
557 557 output = op.ui.popbuffer()
558 558 if output:
559 559 outpart = op.reply.newpart(b'output', data=output, mandatory=False)
560 560 outpart.addparam(
561 561 b'in-reply-to', pycompat.bytestr(part.id), mandatory=False
562 562 )
563 563
564 564
565 565 def decodecaps(blob):
566 566 """decode a bundle2 caps bytes blob into a dictionary
567 567
568 568 The blob is a list of capabilities (one per line)
569 569 Capabilities may have values using a line of the form::
570 570
571 571 capability=value1,value2,value3
572 572
573 573 The values are always a list."""
574 574 caps = {}
575 575 for line in blob.splitlines():
576 576 if not line:
577 577 continue
578 578 if b'=' not in line:
579 579 key, vals = line, ()
580 580 else:
581 581 key, vals = line.split(b'=', 1)
582 582 vals = vals.split(b',')
583 583 key = urlreq.unquote(key)
584 584 vals = [urlreq.unquote(v) for v in vals]
585 585 caps[key] = vals
586 586 return caps
587 587
588 588
589 589 def encodecaps(caps):
590 590 """encode a bundle2 caps dictionary into a bytes blob"""
591 591 chunks = []
592 592 for ca in sorted(caps):
593 593 vals = caps[ca]
594 594 ca = urlreq.quote(ca)
595 595 vals = [urlreq.quote(v) for v in vals]
596 596 if vals:
597 597 ca = b"%s=%s" % (ca, b','.join(vals))
598 598 chunks.append(ca)
599 599 return b'\n'.join(chunks)
600 600
601 601
602 602 bundletypes = {
603 603 b"": (b"", b'UN'), # only when using unbundle on ssh and old http servers
604 604 # since the unification ssh accepts a header but there
605 605 # is no capability signaling it.
606 606 b"HG20": (), # special-cased below
607 607 b"HG10UN": (b"HG10UN", b'UN'),
608 608 b"HG10BZ": (b"HG10", b'BZ'),
609 609 b"HG10GZ": (b"HG10GZ", b'GZ'),
610 610 }
611 611
612 612 # hgweb uses this list to communicate its preferred type
613 613 bundlepriority = [b'HG10GZ', b'HG10BZ', b'HG10UN']
614 614
615 615
616 616 class bundle20(object):
617 617 """represent an outgoing bundle2 container
618 618
619 619 Use the `addparam` method to add stream level parameter. and `newpart` to
620 620 populate it. Then call `getchunks` to retrieve all the binary chunks of
621 621 data that compose the bundle2 container."""
622 622
623 623 _magicstring = b'HG20'
624 624
625 625 def __init__(self, ui, capabilities=()):
626 626 self.ui = ui
627 627 self._params = []
628 628 self._parts = []
629 629 self.capabilities = dict(capabilities)
630 630 self._compengine = util.compengines.forbundletype(b'UN')
631 631 self._compopts = None
632 632 # If compression is being handled by a consumer of the raw
633 633 # data (e.g. the wire protocol), unsetting this flag tells
634 634 # consumers that the bundle is best left uncompressed.
635 635 self.prefercompressed = True
636 636
637 637 def setcompression(self, alg, compopts=None):
638 638 """setup core part compression to <alg>"""
639 639 if alg in (None, b'UN'):
640 640 return
641 641 assert not any(n.lower() == b'compression' for n, v in self._params)
642 642 self.addparam(b'Compression', alg)
643 643 self._compengine = util.compengines.forbundletype(alg)
644 644 self._compopts = compopts
645 645
646 646 @property
647 647 def nbparts(self):
648 648 """total number of parts added to the bundler"""
649 649 return len(self._parts)
650 650
651 651 # methods used to defines the bundle2 content
652 652 def addparam(self, name, value=None):
653 653 """add a stream level parameter"""
654 654 if not name:
655 655 raise error.ProgrammingError(b'empty parameter name')
656 656 if name[0:1] not in pycompat.bytestr(
657 657 string.ascii_letters # pytype: disable=wrong-arg-types
658 658 ):
659 659 raise error.ProgrammingError(
660 660 b'non letter first character: %s' % name
661 661 )
662 662 self._params.append((name, value))
663 663
664 664 def addpart(self, part):
665 665 """add a new part to the bundle2 container
666 666
667 667 Parts contains the actual applicative payload."""
668 668 assert part.id is None
669 669 part.id = len(self._parts) # very cheap counter
670 670 self._parts.append(part)
671 671
672 672 def newpart(self, typeid, *args, **kwargs):
673 673 """create a new part and add it to the containers
674 674
675 675 As the part is directly added to the containers. For now, this means
676 676 that any failure to properly initialize the part after calling
677 677 ``newpart`` should result in a failure of the whole bundling process.
678 678
679 679 You can still fall back to manually create and add if you need better
680 680 control."""
681 681 part = bundlepart(typeid, *args, **kwargs)
682 682 self.addpart(part)
683 683 return part
684 684
685 685 # methods used to generate the bundle2 stream
686 686 def getchunks(self):
687 687 if self.ui.debugflag:
688 688 msg = [b'bundle2-output-bundle: "%s",' % self._magicstring]
689 689 if self._params:
690 690 msg.append(b' (%i params)' % len(self._params))
691 691 msg.append(b' %i parts total\n' % len(self._parts))
692 692 self.ui.debug(b''.join(msg))
693 693 outdebug(self.ui, b'start emission of %s stream' % self._magicstring)
694 694 yield self._magicstring
695 695 param = self._paramchunk()
696 696 outdebug(self.ui, b'bundle parameter: %s' % param)
697 697 yield _pack(_fstreamparamsize, len(param))
698 698 if param:
699 699 yield param
700 700 for chunk in self._compengine.compressstream(
701 701 self._getcorechunk(), self._compopts
702 702 ):
703 703 yield chunk
704 704
705 705 def _paramchunk(self):
706 706 """return a encoded version of all stream parameters"""
707 707 blocks = []
708 708 for par, value in self._params:
709 709 par = urlreq.quote(par)
710 710 if value is not None:
711 711 value = urlreq.quote(value)
712 712 par = b'%s=%s' % (par, value)
713 713 blocks.append(par)
714 714 return b' '.join(blocks)
715 715
716 716 def _getcorechunk(self):
717 717 """yield chunk for the core part of the bundle
718 718
719 719 (all but headers and parameters)"""
720 720 outdebug(self.ui, b'start of parts')
721 721 for part in self._parts:
722 722 outdebug(self.ui, b'bundle part: "%s"' % part.type)
723 723 for chunk in part.getchunks(ui=self.ui):
724 724 yield chunk
725 725 outdebug(self.ui, b'end of bundle')
726 726 yield _pack(_fpartheadersize, 0)
727 727
728 728 def salvageoutput(self):
729 729 """return a list with a copy of all output parts in the bundle
730 730
731 731 This is meant to be used during error handling to make sure we preserve
732 732 server output"""
733 733 salvaged = []
734 734 for part in self._parts:
735 735 if part.type.startswith(b'output'):
736 736 salvaged.append(part.copy())
737 737 return salvaged
738 738
739 739
740 740 class unpackermixin(object):
741 741 """A mixin to extract bytes and struct data from a stream"""
742 742
743 743 def __init__(self, fp):
744 744 self._fp = fp
745 745
746 746 def _unpack(self, format):
747 747 """unpack this struct format from the stream
748 748
749 749 This method is meant for internal usage by the bundle2 protocol only.
750 750 They directly manipulate the low level stream including bundle2 level
751 751 instruction.
752 752
753 753 Do not use it to implement higher-level logic or methods."""
754 754 data = self._readexact(struct.calcsize(format))
755 755 return _unpack(format, data)
756 756
757 757 def _readexact(self, size):
758 758 """read exactly <size> bytes from the stream
759 759
760 760 This method is meant for internal usage by the bundle2 protocol only.
761 761 They directly manipulate the low level stream including bundle2 level
762 762 instruction.
763 763
764 764 Do not use it to implement higher-level logic or methods."""
765 765 return changegroup.readexactly(self._fp, size)
766 766
767 767
768 768 def getunbundler(ui, fp, magicstring=None):
769 769 """return a valid unbundler object for a given magicstring"""
770 770 if magicstring is None:
771 771 magicstring = changegroup.readexactly(fp, 4)
772 772 magic, version = magicstring[0:2], magicstring[2:4]
773 773 if magic != b'HG':
774 774 ui.debug(
775 775 b"error: invalid magic: %r (version %r), should be 'HG'\n"
776 776 % (magic, version)
777 777 )
778 778 raise error.Abort(_(b'not a Mercurial bundle'))
779 779 unbundlerclass = formatmap.get(version)
780 780 if unbundlerclass is None:
781 781 raise error.Abort(_(b'unknown bundle version %s') % version)
782 782 unbundler = unbundlerclass(ui, fp)
783 783 indebug(ui, b'start processing of %s stream' % magicstring)
784 784 return unbundler
785 785
786 786
787 787 class unbundle20(unpackermixin):
788 788 """interpret a bundle2 stream
789 789
790 790 This class is fed with a binary stream and yields parts through its
791 791 `iterparts` methods."""
792 792
793 793 _magicstring = b'HG20'
794 794
795 795 def __init__(self, ui, fp):
796 796 """If header is specified, we do not read it out of the stream."""
797 797 self.ui = ui
798 798 self._compengine = util.compengines.forbundletype(b'UN')
799 799 self._compressed = None
800 800 super(unbundle20, self).__init__(fp)
801 801
802 802 @util.propertycache
803 803 def params(self):
804 804 """dictionary of stream level parameters"""
805 805 indebug(self.ui, b'reading bundle2 stream parameters')
806 806 params = {}
807 807 paramssize = self._unpack(_fstreamparamsize)[0]
808 808 if paramssize < 0:
809 809 raise error.BundleValueError(
810 810 b'negative bundle param size: %i' % paramssize
811 811 )
812 812 if paramssize:
813 813 params = self._readexact(paramssize)
814 814 params = self._processallparams(params)
815 815 return params
816 816
817 817 def _processallparams(self, paramsblock):
818 818 """"""
819 819 params = util.sortdict()
820 820 for p in paramsblock.split(b' '):
821 821 p = p.split(b'=', 1)
822 822 p = [urlreq.unquote(i) for i in p]
823 823 if len(p) < 2:
824 824 p.append(None)
825 825 self._processparam(*p)
826 826 params[p[0]] = p[1]
827 827 return params
828 828
829 829 def _processparam(self, name, value):
830 830 """process a parameter, applying its effect if needed
831 831
832 832 Parameter starting with a lower case letter are advisory and will be
833 833 ignored when unknown. Those starting with an upper case letter are
834 834 mandatory and will this function will raise a KeyError when unknown.
835 835
836 836 Note: no option are currently supported. Any input will be either
837 837 ignored or failing.
838 838 """
839 839 if not name:
840 840 raise ValueError('empty parameter name')
841 841 if name[0:1] not in pycompat.bytestr(
842 842 string.ascii_letters # pytype: disable=wrong-arg-types
843 843 ):
844 844 raise ValueError('non letter first character: %s' % name)
845 845 try:
846 846 handler = b2streamparamsmap[name.lower()]
847 847 except KeyError:
848 848 if name[0:1].islower():
849 849 indebug(self.ui, b"ignoring unknown parameter %s" % name)
850 850 else:
851 851 raise error.BundleUnknownFeatureError(params=(name,))
852 852 else:
853 853 handler(self, name, value)
854 854
855 855 def _forwardchunks(self):
856 856 """utility to transfer a bundle2 as binary
857 857
858 858 This is made necessary by the fact the 'getbundle' command over 'ssh'
859 859 have no way to know then the reply end, relying on the bundle to be
860 860 interpreted to know its end. This is terrible and we are sorry, but we
861 861 needed to move forward to get general delta enabled.
862 862 """
863 863 yield self._magicstring
864 864 assert 'params' not in vars(self)
865 865 paramssize = self._unpack(_fstreamparamsize)[0]
866 866 if paramssize < 0:
867 867 raise error.BundleValueError(
868 868 b'negative bundle param size: %i' % paramssize
869 869 )
870 870 if paramssize:
871 871 params = self._readexact(paramssize)
872 872 self._processallparams(params)
873 873 # The payload itself is decompressed below, so drop
874 874 # the compression parameter passed down to compensate.
875 875 outparams = []
876 876 for p in params.split(b' '):
877 877 k, v = p.split(b'=', 1)
878 878 if k.lower() != b'compression':
879 879 outparams.append(p)
880 880 outparams = b' '.join(outparams)
881 881 yield _pack(_fstreamparamsize, len(outparams))
882 882 yield outparams
883 883 else:
884 884 yield _pack(_fstreamparamsize, paramssize)
885 885 # From there, payload might need to be decompressed
886 886 self._fp = self._compengine.decompressorreader(self._fp)
887 887 emptycount = 0
888 888 while emptycount < 2:
889 889 # so we can brainlessly loop
890 890 assert _fpartheadersize == _fpayloadsize
891 891 size = self._unpack(_fpartheadersize)[0]
892 892 yield _pack(_fpartheadersize, size)
893 893 if size:
894 894 emptycount = 0
895 895 else:
896 896 emptycount += 1
897 897 continue
898 898 if size == flaginterrupt:
899 899 continue
900 900 elif size < 0:
901 901 raise error.BundleValueError(b'negative chunk size: %i')
902 902 yield self._readexact(size)
903 903
904 904 def iterparts(self, seekable=False):
905 905 """yield all parts contained in the stream"""
906 906 cls = seekableunbundlepart if seekable else unbundlepart
907 907 # make sure param have been loaded
908 908 self.params
909 909 # From there, payload need to be decompressed
910 910 self._fp = self._compengine.decompressorreader(self._fp)
911 911 indebug(self.ui, b'start extraction of bundle2 parts')
912 912 headerblock = self._readpartheader()
913 913 while headerblock is not None:
914 914 part = cls(self.ui, headerblock, self._fp)
915 915 yield part
916 916 # Ensure part is fully consumed so we can start reading the next
917 917 # part.
918 918 part.consume()
919 919
920 920 headerblock = self._readpartheader()
921 921 indebug(self.ui, b'end of bundle2 stream')
922 922
923 923 def _readpartheader(self):
924 924 """reads a part header size and return the bytes blob
925 925
926 926 returns None if empty"""
927 927 headersize = self._unpack(_fpartheadersize)[0]
928 928 if headersize < 0:
929 929 raise error.BundleValueError(
930 930 b'negative part header size: %i' % headersize
931 931 )
932 932 indebug(self.ui, b'part header size: %i' % headersize)
933 933 if headersize:
934 934 return self._readexact(headersize)
935 935 return None
936 936
937 937 def compressed(self):
938 938 self.params # load params
939 939 return self._compressed
940 940
941 941 def close(self):
942 942 """close underlying file"""
943 943 if util.safehasattr(self._fp, 'close'):
944 944 return self._fp.close()
945 945
946 946
947 947 formatmap = {b'20': unbundle20}
948 948
949 949 b2streamparamsmap = {}
950 950
951 951
952 952 def b2streamparamhandler(name):
953 953 """register a handler for a stream level parameter"""
954 954
955 955 def decorator(func):
956 956 assert name not in formatmap
957 957 b2streamparamsmap[name] = func
958 958 return func
959 959
960 960 return decorator
961 961
962 962
963 963 @b2streamparamhandler(b'compression')
964 964 def processcompression(unbundler, param, value):
965 965 """read compression parameter and install payload decompression"""
966 966 if value not in util.compengines.supportedbundletypes:
967 967 raise error.BundleUnknownFeatureError(params=(param,), values=(value,))
968 968 unbundler._compengine = util.compengines.forbundletype(value)
969 969 if value is not None:
970 970 unbundler._compressed = True
971 971
972 972
973 973 class bundlepart(object):
974 974 """A bundle2 part contains application level payload
975 975
976 976 The part `type` is used to route the part to the application level
977 977 handler.
978 978
979 979 The part payload is contained in ``part.data``. It could be raw bytes or a
980 980 generator of byte chunks.
981 981
982 982 You can add parameters to the part using the ``addparam`` method.
983 983 Parameters can be either mandatory (default) or advisory. Remote side
984 984 should be able to safely ignore the advisory ones.
985 985
986 986 Both data and parameters cannot be modified after the generation has begun.
987 987 """
988 988
989 989 def __init__(
990 990 self,
991 991 parttype,
992 992 mandatoryparams=(),
993 993 advisoryparams=(),
994 994 data=b'',
995 995 mandatory=True,
996 996 ):
997 997 validateparttype(parttype)
998 998 self.id = None
999 999 self.type = parttype
1000 1000 self._data = data
1001 1001 self._mandatoryparams = list(mandatoryparams)
1002 1002 self._advisoryparams = list(advisoryparams)
1003 1003 # checking for duplicated entries
1004 1004 self._seenparams = set()
1005 1005 for pname, __ in self._mandatoryparams + self._advisoryparams:
1006 1006 if pname in self._seenparams:
1007 1007 raise error.ProgrammingError(b'duplicated params: %s' % pname)
1008 1008 self._seenparams.add(pname)
1009 1009 # status of the part's generation:
1010 1010 # - None: not started,
1011 1011 # - False: currently generated,
1012 1012 # - True: generation done.
1013 1013 self._generated = None
1014 1014 self.mandatory = mandatory
1015 1015
1016 1016 def __repr__(self):
1017 1017 cls = b"%s.%s" % (self.__class__.__module__, self.__class__.__name__)
1018 1018 return b'<%s object at %x; id: %s; type: %s; mandatory: %s>' % (
1019 1019 cls,
1020 1020 id(self),
1021 1021 self.id,
1022 1022 self.type,
1023 1023 self.mandatory,
1024 1024 )
1025 1025
1026 1026 def copy(self):
1027 1027 """return a copy of the part
1028 1028
1029 1029 The new part have the very same content but no partid assigned yet.
1030 1030 Parts with generated data cannot be copied."""
1031 1031 assert not util.safehasattr(self.data, 'next')
1032 1032 return self.__class__(
1033 1033 self.type,
1034 1034 self._mandatoryparams,
1035 1035 self._advisoryparams,
1036 1036 self._data,
1037 1037 self.mandatory,
1038 1038 )
1039 1039
1040 1040 # methods used to defines the part content
1041 1041 @property
1042 1042 def data(self):
1043 1043 return self._data
1044 1044
1045 1045 @data.setter
1046 1046 def data(self, data):
1047 1047 if self._generated is not None:
1048 1048 raise error.ReadOnlyPartError(b'part is being generated')
1049 1049 self._data = data
1050 1050
1051 1051 @property
1052 1052 def mandatoryparams(self):
1053 1053 # make it an immutable tuple to force people through ``addparam``
1054 1054 return tuple(self._mandatoryparams)
1055 1055
1056 1056 @property
1057 1057 def advisoryparams(self):
1058 1058 # make it an immutable tuple to force people through ``addparam``
1059 1059 return tuple(self._advisoryparams)
1060 1060
1061 1061 def addparam(self, name, value=b'', mandatory=True):
1062 1062 """add a parameter to the part
1063 1063
1064 1064 If 'mandatory' is set to True, the remote handler must claim support
1065 1065 for this parameter or the unbundling will be aborted.
1066 1066
1067 1067 The 'name' and 'value' cannot exceed 255 bytes each.
1068 1068 """
1069 1069 if self._generated is not None:
1070 1070 raise error.ReadOnlyPartError(b'part is being generated')
1071 1071 if name in self._seenparams:
1072 1072 raise ValueError(b'duplicated params: %s' % name)
1073 1073 self._seenparams.add(name)
1074 1074 params = self._advisoryparams
1075 1075 if mandatory:
1076 1076 params = self._mandatoryparams
1077 1077 params.append((name, value))
1078 1078
1079 1079 # methods used to generates the bundle2 stream
1080 1080 def getchunks(self, ui):
1081 1081 if self._generated is not None:
1082 1082 raise error.ProgrammingError(b'part can only be consumed once')
1083 1083 self._generated = False
1084 1084
1085 1085 if ui.debugflag:
1086 1086 msg = [b'bundle2-output-part: "%s"' % self.type]
1087 1087 if not self.mandatory:
1088 1088 msg.append(b' (advisory)')
1089 1089 nbmp = len(self.mandatoryparams)
1090 1090 nbap = len(self.advisoryparams)
1091 1091 if nbmp or nbap:
1092 1092 msg.append(b' (params:')
1093 1093 if nbmp:
1094 1094 msg.append(b' %i mandatory' % nbmp)
1095 1095 if nbap:
1096 1096 msg.append(b' %i advisory' % nbmp)
1097 1097 msg.append(b')')
1098 1098 if not self.data:
1099 1099 msg.append(b' empty payload')
1100 1100 elif util.safehasattr(self.data, 'next') or util.safehasattr(
1101 1101 self.data, b'__next__'
1102 1102 ):
1103 1103 msg.append(b' streamed payload')
1104 1104 else:
1105 1105 msg.append(b' %i bytes payload' % len(self.data))
1106 1106 msg.append(b'\n')
1107 1107 ui.debug(b''.join(msg))
1108 1108
1109 1109 #### header
1110 1110 if self.mandatory:
1111 1111 parttype = self.type.upper()
1112 1112 else:
1113 1113 parttype = self.type.lower()
1114 1114 outdebug(ui, b'part %s: "%s"' % (pycompat.bytestr(self.id), parttype))
1115 1115 ## parttype
1116 1116 header = [
1117 1117 _pack(_fparttypesize, len(parttype)),
1118 1118 parttype,
1119 1119 _pack(_fpartid, self.id),
1120 1120 ]
1121 1121 ## parameters
1122 1122 # count
1123 1123 manpar = self.mandatoryparams
1124 1124 advpar = self.advisoryparams
1125 1125 header.append(_pack(_fpartparamcount, len(manpar), len(advpar)))
1126 1126 # size
1127 1127 parsizes = []
1128 1128 for key, value in manpar:
1129 1129 parsizes.append(len(key))
1130 1130 parsizes.append(len(value))
1131 1131 for key, value in advpar:
1132 1132 parsizes.append(len(key))
1133 1133 parsizes.append(len(value))
1134 1134 paramsizes = _pack(_makefpartparamsizes(len(parsizes) // 2), *parsizes)
1135 1135 header.append(paramsizes)
1136 1136 # key, value
1137 1137 for key, value in manpar:
1138 1138 header.append(key)
1139 1139 header.append(value)
1140 1140 for key, value in advpar:
1141 1141 header.append(key)
1142 1142 header.append(value)
1143 1143 ## finalize header
1144 1144 try:
1145 1145 headerchunk = b''.join(header)
1146 1146 except TypeError:
1147 1147 raise TypeError(
1148 1148 'Found a non-bytes trying to '
1149 1149 'build bundle part header: %r' % header
1150 1150 )
1151 1151 outdebug(ui, b'header chunk size: %i' % len(headerchunk))
1152 1152 yield _pack(_fpartheadersize, len(headerchunk))
1153 1153 yield headerchunk
1154 1154 ## payload
1155 1155 try:
1156 1156 for chunk in self._payloadchunks():
1157 1157 outdebug(ui, b'payload chunk size: %i' % len(chunk))
1158 1158 yield _pack(_fpayloadsize, len(chunk))
1159 1159 yield chunk
1160 1160 except GeneratorExit:
1161 1161 # GeneratorExit means that nobody is listening for our
1162 1162 # results anyway, so just bail quickly rather than trying
1163 1163 # to produce an error part.
1164 1164 ui.debug(b'bundle2-generatorexit\n')
1165 1165 raise
1166 1166 except BaseException as exc:
1167 1167 bexc = stringutil.forcebytestr(exc)
1168 1168 # backup exception data for later
1169 1169 ui.debug(
1170 1170 b'bundle2-input-stream-interrupt: encoding exception %s' % bexc
1171 1171 )
1172 1172 tb = sys.exc_info()[2]
1173 1173 msg = b'unexpected error: %s' % bexc
1174 1174 interpart = bundlepart(
1175 1175 b'error:abort', [(b'message', msg)], mandatory=False
1176 1176 )
1177 1177 interpart.id = 0
1178 1178 yield _pack(_fpayloadsize, -1)
1179 1179 for chunk in interpart.getchunks(ui=ui):
1180 1180 yield chunk
1181 1181 outdebug(ui, b'closing payload chunk')
1182 1182 # abort current part payload
1183 1183 yield _pack(_fpayloadsize, 0)
1184 1184 pycompat.raisewithtb(exc, tb)
1185 1185 # end of payload
1186 1186 outdebug(ui, b'closing payload chunk')
1187 1187 yield _pack(_fpayloadsize, 0)
1188 1188 self._generated = True
1189 1189
1190 1190 def _payloadchunks(self):
1191 1191 """yield chunks of a the part payload
1192 1192
1193 1193 Exists to handle the different methods to provide data to a part."""
1194 1194 # we only support fixed size data now.
1195 1195 # This will be improved in the future.
1196 1196 if util.safehasattr(self.data, 'next') or util.safehasattr(
1197 1197 self.data, b'__next__'
1198 1198 ):
1199 1199 buff = util.chunkbuffer(self.data)
1200 1200 chunk = buff.read(preferedchunksize)
1201 1201 while chunk:
1202 1202 yield chunk
1203 1203 chunk = buff.read(preferedchunksize)
1204 1204 elif len(self.data):
1205 1205 yield self.data
1206 1206
1207 1207
1208 1208 flaginterrupt = -1
1209 1209
1210 1210
1211 1211 class interrupthandler(unpackermixin):
1212 1212 """read one part and process it with restricted capability
1213 1213
1214 1214 This allows to transmit exception raised on the producer size during part
1215 1215 iteration while the consumer is reading a part.
1216 1216
1217 1217 Part processed in this manner only have access to a ui object,"""
1218 1218
1219 1219 def __init__(self, ui, fp):
1220 1220 super(interrupthandler, self).__init__(fp)
1221 1221 self.ui = ui
1222 1222
1223 1223 def _readpartheader(self):
1224 1224 """reads a part header size and return the bytes blob
1225 1225
1226 1226 returns None if empty"""
1227 1227 headersize = self._unpack(_fpartheadersize)[0]
1228 1228 if headersize < 0:
1229 1229 raise error.BundleValueError(
1230 1230 b'negative part header size: %i' % headersize
1231 1231 )
1232 1232 indebug(self.ui, b'part header size: %i\n' % headersize)
1233 1233 if headersize:
1234 1234 return self._readexact(headersize)
1235 1235 return None
1236 1236
1237 1237 def __call__(self):
1238 1238
1239 1239 self.ui.debug(
1240 1240 b'bundle2-input-stream-interrupt: opening out of band context\n'
1241 1241 )
1242 1242 indebug(self.ui, b'bundle2 stream interruption, looking for a part.')
1243 1243 headerblock = self._readpartheader()
1244 1244 if headerblock is None:
1245 1245 indebug(self.ui, b'no part found during interruption.')
1246 1246 return
1247 1247 part = unbundlepart(self.ui, headerblock, self._fp)
1248 1248 op = interruptoperation(self.ui)
1249 1249 hardabort = False
1250 1250 try:
1251 1251 _processpart(op, part)
1252 1252 except (SystemExit, KeyboardInterrupt):
1253 1253 hardabort = True
1254 1254 raise
1255 1255 finally:
1256 1256 if not hardabort:
1257 1257 part.consume()
1258 1258 self.ui.debug(
1259 1259 b'bundle2-input-stream-interrupt: closing out of band context\n'
1260 1260 )
1261 1261
1262 1262
1263 1263 class interruptoperation(object):
1264 1264 """A limited operation to be use by part handler during interruption
1265 1265
1266 1266 It only have access to an ui object.
1267 1267 """
1268 1268
1269 1269 def __init__(self, ui):
1270 1270 self.ui = ui
1271 1271 self.reply = None
1272 1272 self.captureoutput = False
1273 1273
1274 1274 @property
1275 1275 def repo(self):
1276 1276 raise error.ProgrammingError(b'no repo access from stream interruption')
1277 1277
1278 1278 def gettransaction(self):
1279 1279 raise TransactionUnavailable(b'no repo access from stream interruption')
1280 1280
1281 1281
1282 1282 def decodepayloadchunks(ui, fh):
1283 1283 """Reads bundle2 part payload data into chunks.
1284 1284
1285 1285 Part payload data consists of framed chunks. This function takes
1286 1286 a file handle and emits those chunks.
1287 1287 """
1288 1288 dolog = ui.configbool(b'devel', b'bundle2.debug')
1289 1289 debug = ui.debug
1290 1290
1291 1291 headerstruct = struct.Struct(_fpayloadsize)
1292 1292 headersize = headerstruct.size
1293 1293 unpack = headerstruct.unpack
1294 1294
1295 1295 readexactly = changegroup.readexactly
1296 1296 read = fh.read
1297 1297
1298 1298 chunksize = unpack(readexactly(fh, headersize))[0]
1299 1299 indebug(ui, b'payload chunk size: %i' % chunksize)
1300 1300
1301 1301 # changegroup.readexactly() is inlined below for performance.
1302 1302 while chunksize:
1303 1303 if chunksize >= 0:
1304 1304 s = read(chunksize)
1305 1305 if len(s) < chunksize:
1306 1306 raise error.Abort(
1307 1307 _(
1308 1308 b'stream ended unexpectedly '
1309 1309 b' (got %d bytes, expected %d)'
1310 1310 )
1311 1311 % (len(s), chunksize)
1312 1312 )
1313 1313
1314 1314 yield s
1315 1315 elif chunksize == flaginterrupt:
1316 1316 # Interrupt "signal" detected. The regular stream is interrupted
1317 1317 # and a bundle2 part follows. Consume it.
1318 1318 interrupthandler(ui, fh)()
1319 1319 else:
1320 1320 raise error.BundleValueError(
1321 1321 b'negative payload chunk size: %s' % chunksize
1322 1322 )
1323 1323
1324 1324 s = read(headersize)
1325 1325 if len(s) < headersize:
1326 1326 raise error.Abort(
1327 1327 _(b'stream ended unexpectedly (got %d bytes, expected %d)')
1328 1328 % (len(s), chunksize)
1329 1329 )
1330 1330
1331 1331 chunksize = unpack(s)[0]
1332 1332
1333 1333 # indebug() inlined for performance.
1334 1334 if dolog:
1335 1335 debug(b'bundle2-input: payload chunk size: %i\n' % chunksize)
1336 1336
1337 1337
1338 1338 class unbundlepart(unpackermixin):
1339 1339 """a bundle part read from a bundle"""
1340 1340
1341 1341 def __init__(self, ui, header, fp):
1342 1342 super(unbundlepart, self).__init__(fp)
1343 1343 self._seekable = util.safehasattr(fp, 'seek') and util.safehasattr(
1344 1344 fp, b'tell'
1345 1345 )
1346 1346 self.ui = ui
1347 1347 # unbundle state attr
1348 1348 self._headerdata = header
1349 1349 self._headeroffset = 0
1350 1350 self._initialized = False
1351 1351 self.consumed = False
1352 1352 # part data
1353 1353 self.id = None
1354 1354 self.type = None
1355 1355 self.mandatoryparams = None
1356 1356 self.advisoryparams = None
1357 1357 self.params = None
1358 1358 self.mandatorykeys = ()
1359 1359 self._readheader()
1360 1360 self._mandatory = None
1361 1361 self._pos = 0
1362 1362
1363 1363 def _fromheader(self, size):
1364 1364 """return the next <size> byte from the header"""
1365 1365 offset = self._headeroffset
1366 1366 data = self._headerdata[offset : (offset + size)]
1367 1367 self._headeroffset = offset + size
1368 1368 return data
1369 1369
1370 1370 def _unpackheader(self, format):
1371 1371 """read given format from header
1372 1372
1373 1373 This automatically compute the size of the format to read."""
1374 1374 data = self._fromheader(struct.calcsize(format))
1375 1375 return _unpack(format, data)
1376 1376
1377 1377 def _initparams(self, mandatoryparams, advisoryparams):
1378 1378 """internal function to setup all logic related parameters"""
1379 1379 # make it read only to prevent people touching it by mistake.
1380 1380 self.mandatoryparams = tuple(mandatoryparams)
1381 1381 self.advisoryparams = tuple(advisoryparams)
1382 1382 # user friendly UI
1383 1383 self.params = util.sortdict(self.mandatoryparams)
1384 1384 self.params.update(self.advisoryparams)
1385 1385 self.mandatorykeys = frozenset(p[0] for p in mandatoryparams)
1386 1386
1387 1387 def _readheader(self):
1388 1388 """read the header and setup the object"""
1389 1389 typesize = self._unpackheader(_fparttypesize)[0]
1390 1390 self.type = self._fromheader(typesize)
1391 1391 indebug(self.ui, b'part type: "%s"' % self.type)
1392 1392 self.id = self._unpackheader(_fpartid)[0]
1393 1393 indebug(self.ui, b'part id: "%s"' % pycompat.bytestr(self.id))
1394 1394 # extract mandatory bit from type
1395 1395 self.mandatory = self.type != self.type.lower()
1396 1396 self.type = self.type.lower()
1397 1397 ## reading parameters
1398 1398 # param count
1399 1399 mancount, advcount = self._unpackheader(_fpartparamcount)
1400 1400 indebug(self.ui, b'part parameters: %i' % (mancount + advcount))
1401 1401 # param size
1402 1402 fparamsizes = _makefpartparamsizes(mancount + advcount)
1403 1403 paramsizes = self._unpackheader(fparamsizes)
1404 1404 # make it a list of couple again
1405 1405 paramsizes = list(zip(paramsizes[::2], paramsizes[1::2]))
1406 1406 # split mandatory from advisory
1407 1407 mansizes = paramsizes[:mancount]
1408 1408 advsizes = paramsizes[mancount:]
1409 1409 # retrieve param value
1410 1410 manparams = []
1411 1411 for key, value in mansizes:
1412 1412 manparams.append((self._fromheader(key), self._fromheader(value)))
1413 1413 advparams = []
1414 1414 for key, value in advsizes:
1415 1415 advparams.append((self._fromheader(key), self._fromheader(value)))
1416 1416 self._initparams(manparams, advparams)
1417 1417 ## part payload
1418 1418 self._payloadstream = util.chunkbuffer(self._payloadchunks())
1419 1419 # we read the data, tell it
1420 1420 self._initialized = True
1421 1421
1422 1422 def _payloadchunks(self):
1423 1423 """Generator of decoded chunks in the payload."""
1424 1424 return decodepayloadchunks(self.ui, self._fp)
1425 1425
1426 1426 def consume(self):
1427 1427 """Read the part payload until completion.
1428 1428
1429 1429 By consuming the part data, the underlying stream read offset will
1430 1430 be advanced to the next part (or end of stream).
1431 1431 """
1432 1432 if self.consumed:
1433 1433 return
1434 1434
1435 1435 chunk = self.read(32768)
1436 1436 while chunk:
1437 1437 self._pos += len(chunk)
1438 1438 chunk = self.read(32768)
1439 1439
1440 1440 def read(self, size=None):
1441 1441 """read payload data"""
1442 1442 if not self._initialized:
1443 1443 self._readheader()
1444 1444 if size is None:
1445 1445 data = self._payloadstream.read()
1446 1446 else:
1447 1447 data = self._payloadstream.read(size)
1448 1448 self._pos += len(data)
1449 1449 if size is None or len(data) < size:
1450 1450 if not self.consumed and self._pos:
1451 1451 self.ui.debug(
1452 1452 b'bundle2-input-part: total payload size %i\n' % self._pos
1453 1453 )
1454 1454 self.consumed = True
1455 1455 return data
1456 1456
1457 1457
1458 1458 class seekableunbundlepart(unbundlepart):
1459 1459 """A bundle2 part in a bundle that is seekable.
1460 1460
1461 1461 Regular ``unbundlepart`` instances can only be read once. This class
1462 1462 extends ``unbundlepart`` to enable bi-directional seeking within the
1463 1463 part.
1464 1464
1465 1465 Bundle2 part data consists of framed chunks. Offsets when seeking
1466 1466 refer to the decoded data, not the offsets in the underlying bundle2
1467 1467 stream.
1468 1468
1469 1469 To facilitate quickly seeking within the decoded data, instances of this
1470 1470 class maintain a mapping between offsets in the underlying stream and
1471 1471 the decoded payload. This mapping will consume memory in proportion
1472 1472 to the number of chunks within the payload (which almost certainly
1473 1473 increases in proportion with the size of the part).
1474 1474 """
1475 1475
1476 1476 def __init__(self, ui, header, fp):
1477 1477 # (payload, file) offsets for chunk starts.
1478 1478 self._chunkindex = []
1479 1479
1480 1480 super(seekableunbundlepart, self).__init__(ui, header, fp)
1481 1481
1482 1482 def _payloadchunks(self, chunknum=0):
1483 1483 '''seek to specified chunk and start yielding data'''
1484 1484 if len(self._chunkindex) == 0:
1485 1485 assert chunknum == 0, b'Must start with chunk 0'
1486 1486 self._chunkindex.append((0, self._tellfp()))
1487 1487 else:
1488 1488 assert chunknum < len(self._chunkindex), (
1489 1489 b'Unknown chunk %d' % chunknum
1490 1490 )
1491 1491 self._seekfp(self._chunkindex[chunknum][1])
1492 1492
1493 1493 pos = self._chunkindex[chunknum][0]
1494 1494
1495 1495 for chunk in decodepayloadchunks(self.ui, self._fp):
1496 1496 chunknum += 1
1497 1497 pos += len(chunk)
1498 1498 if chunknum == len(self._chunkindex):
1499 1499 self._chunkindex.append((pos, self._tellfp()))
1500 1500
1501 1501 yield chunk
1502 1502
1503 1503 def _findchunk(self, pos):
1504 1504 '''for a given payload position, return a chunk number and offset'''
1505 1505 for chunk, (ppos, fpos) in enumerate(self._chunkindex):
1506 1506 if ppos == pos:
1507 1507 return chunk, 0
1508 1508 elif ppos > pos:
1509 1509 return chunk - 1, pos - self._chunkindex[chunk - 1][0]
1510 1510 raise ValueError(b'Unknown chunk')
1511 1511
1512 1512 def tell(self):
1513 1513 return self._pos
1514 1514
1515 1515 def seek(self, offset, whence=os.SEEK_SET):
1516 1516 if whence == os.SEEK_SET:
1517 1517 newpos = offset
1518 1518 elif whence == os.SEEK_CUR:
1519 1519 newpos = self._pos + offset
1520 1520 elif whence == os.SEEK_END:
1521 1521 if not self.consumed:
1522 1522 # Can't use self.consume() here because it advances self._pos.
1523 1523 chunk = self.read(32768)
1524 1524 while chunk:
1525 1525 chunk = self.read(32768)
1526 1526 newpos = self._chunkindex[-1][0] - offset
1527 1527 else:
1528 1528 raise ValueError(b'Unknown whence value: %r' % (whence,))
1529 1529
1530 1530 if newpos > self._chunkindex[-1][0] and not self.consumed:
1531 1531 # Can't use self.consume() here because it advances self._pos.
1532 1532 chunk = self.read(32768)
1533 1533 while chunk:
1534 1534 chunk = self.read(32668)
1535 1535
1536 1536 if not 0 <= newpos <= self._chunkindex[-1][0]:
1537 1537 raise ValueError(b'Offset out of range')
1538 1538
1539 1539 if self._pos != newpos:
1540 1540 chunk, internaloffset = self._findchunk(newpos)
1541 1541 self._payloadstream = util.chunkbuffer(self._payloadchunks(chunk))
1542 1542 adjust = self.read(internaloffset)
1543 1543 if len(adjust) != internaloffset:
1544 1544 raise error.Abort(_(b'Seek failed\n'))
1545 1545 self._pos = newpos
1546 1546
1547 1547 def _seekfp(self, offset, whence=0):
1548 1548 """move the underlying file pointer
1549 1549
1550 1550 This method is meant for internal usage by the bundle2 protocol only.
1551 1551 They directly manipulate the low level stream including bundle2 level
1552 1552 instruction.
1553 1553
1554 1554 Do not use it to implement higher-level logic or methods."""
1555 1555 if self._seekable:
1556 1556 return self._fp.seek(offset, whence)
1557 1557 else:
1558 1558 raise NotImplementedError(_(b'File pointer is not seekable'))
1559 1559
1560 1560 def _tellfp(self):
1561 1561 """return the file offset, or None if file is not seekable
1562 1562
1563 1563 This method is meant for internal usage by the bundle2 protocol only.
1564 1564 They directly manipulate the low level stream including bundle2 level
1565 1565 instruction.
1566 1566
1567 1567 Do not use it to implement higher-level logic or methods."""
1568 1568 if self._seekable:
1569 1569 try:
1570 1570 return self._fp.tell()
1571 1571 except IOError as e:
1572 1572 if e.errno == errno.ESPIPE:
1573 1573 self._seekable = False
1574 1574 else:
1575 1575 raise
1576 1576 return None
1577 1577
1578 1578
1579 1579 # These are only the static capabilities.
1580 1580 # Check the 'getrepocaps' function for the rest.
1581 1581 capabilities = {
1582 1582 b'HG20': (),
1583 1583 b'bookmarks': (),
1584 1584 b'error': (b'abort', b'unsupportedcontent', b'pushraced', b'pushkey'),
1585 1585 b'listkeys': (),
1586 1586 b'pushkey': (),
1587 1587 b'digests': tuple(sorted(util.DIGESTS.keys())),
1588 1588 b'remote-changegroup': (b'http', b'https'),
1589 1589 b'hgtagsfnodes': (),
1590 1590 b'rev-branch-cache': (),
1591 1591 b'phases': (b'heads',),
1592 1592 b'stream': (b'v2',),
1593 1593 }
1594 1594
1595 1595
1596 1596 def getrepocaps(repo, allowpushback=False, role=None):
1597 1597 """return the bundle2 capabilities for a given repo
1598 1598
1599 1599 Exists to allow extensions (like evolution) to mutate the capabilities.
1600 1600
1601 1601 The returned value is used for servers advertising their capabilities as
1602 1602 well as clients advertising their capabilities to servers as part of
1603 1603 bundle2 requests. The ``role`` argument specifies which is which.
1604 1604 """
1605 1605 if role not in (b'client', b'server'):
1606 1606 raise error.ProgrammingError(b'role argument must be client or server')
1607 1607
1608 1608 caps = capabilities.copy()
1609 1609 caps[b'changegroup'] = tuple(
1610 1610 sorted(changegroup.supportedincomingversions(repo))
1611 1611 )
1612 1612 if obsolete.isenabled(repo, obsolete.exchangeopt):
1613 1613 supportedformat = tuple(b'V%i' % v for v in obsolete.formats)
1614 1614 caps[b'obsmarkers'] = supportedformat
1615 1615 if allowpushback:
1616 1616 caps[b'pushback'] = ()
1617 1617 cpmode = repo.ui.config(b'server', b'concurrent-push-mode')
1618 1618 if cpmode == b'check-related':
1619 1619 caps[b'checkheads'] = (b'related',)
1620 1620 if b'phases' in repo.ui.configlist(b'devel', b'legacy.exchange'):
1621 1621 caps.pop(b'phases')
1622 1622
1623 1623 # Don't advertise stream clone support in server mode if not configured.
1624 1624 if role == b'server':
1625 1625 streamsupported = repo.ui.configbool(
1626 1626 b'server', b'uncompressed', untrusted=True
1627 1627 )
1628 1628 featuresupported = repo.ui.configbool(b'server', b'bundle2.stream')
1629 1629
1630 1630 if not streamsupported or not featuresupported:
1631 1631 caps.pop(b'stream')
1632 1632 # Else always advertise support on client, because payload support
1633 1633 # should always be advertised.
1634 1634
1635 1635 return caps
1636 1636
1637 1637
1638 1638 def bundle2caps(remote):
1639 1639 """return the bundle capabilities of a peer as dict"""
1640 1640 raw = remote.capable(b'bundle2')
1641 1641 if not raw and raw != b'':
1642 1642 return {}
1643 1643 capsblob = urlreq.unquote(remote.capable(b'bundle2'))
1644 1644 return decodecaps(capsblob)
1645 1645
1646 1646
1647 1647 def obsmarkersversion(caps):
1648 1648 """extract the list of supported obsmarkers versions from a bundle2caps dict
1649 1649 """
1650 1650 obscaps = caps.get(b'obsmarkers', ())
1651 1651 return [int(c[1:]) for c in obscaps if c.startswith(b'V')]
1652 1652
1653 1653
1654 1654 def writenewbundle(
1655 1655 ui,
1656 1656 repo,
1657 1657 source,
1658 1658 filename,
1659 1659 bundletype,
1660 1660 outgoing,
1661 1661 opts,
1662 1662 vfs=None,
1663 1663 compression=None,
1664 1664 compopts=None,
1665 1665 ):
1666 1666 if bundletype.startswith(b'HG10'):
1667 1667 cg = changegroup.makechangegroup(repo, outgoing, b'01', source)
1668 1668 return writebundle(
1669 1669 ui,
1670 1670 cg,
1671 1671 filename,
1672 1672 bundletype,
1673 1673 vfs=vfs,
1674 1674 compression=compression,
1675 1675 compopts=compopts,
1676 1676 )
1677 1677 elif not bundletype.startswith(b'HG20'):
1678 1678 raise error.ProgrammingError(b'unknown bundle type: %s' % bundletype)
1679 1679
1680 1680 caps = {}
1681 1681 if b'obsolescence' in opts:
1682 1682 caps[b'obsmarkers'] = (b'V1',)
1683 1683 bundle = bundle20(ui, caps)
1684 1684 bundle.setcompression(compression, compopts)
1685 1685 _addpartsfromopts(ui, repo, bundle, source, outgoing, opts)
1686 1686 chunkiter = bundle.getchunks()
1687 1687
1688 1688 return changegroup.writechunks(ui, chunkiter, filename, vfs=vfs)
1689 1689
1690 1690
1691 1691 def _addpartsfromopts(ui, repo, bundler, source, outgoing, opts):
1692 1692 # We should eventually reconcile this logic with the one behind
1693 1693 # 'exchange.getbundle2partsgenerator'.
1694 1694 #
1695 1695 # The type of input from 'getbundle' and 'writenewbundle' are a bit
1696 1696 # different right now. So we keep them separated for now for the sake of
1697 1697 # simplicity.
1698 1698
1699 1699 # we might not always want a changegroup in such bundle, for example in
1700 1700 # stream bundles
1701 1701 if opts.get(b'changegroup', True):
1702 1702 cgversion = opts.get(b'cg.version')
1703 1703 if cgversion is None:
1704 1704 cgversion = changegroup.safeversion(repo)
1705 1705 cg = changegroup.makechangegroup(repo, outgoing, cgversion, source)
1706 1706 part = bundler.newpart(b'changegroup', data=cg.getchunks())
1707 1707 part.addparam(b'version', cg.version)
1708 1708 if b'clcount' in cg.extras:
1709 1709 part.addparam(
1710 1710 b'nbchanges', b'%d' % cg.extras[b'clcount'], mandatory=False
1711 1711 )
1712 1712 if opts.get(b'phases') and repo.revs(
1713 1713 b'%ln and secret()', outgoing.missingheads
1714 1714 ):
1715 1715 part.addparam(
1716 1716 b'targetphase', b'%d' % phases.secret, mandatory=False
1717 1717 )
1718 1718 if b'exp-sidedata-flag' in repo.requirements:
1719 1719 part.addparam(b'exp-sidedata', b'1')
1720 1720
1721 1721 if opts.get(b'streamv2', False):
1722 1722 addpartbundlestream2(bundler, repo, stream=True)
1723 1723
1724 1724 if opts.get(b'tagsfnodescache', True):
1725 1725 addparttagsfnodescache(repo, bundler, outgoing)
1726 1726
1727 1727 if opts.get(b'revbranchcache', True):
1728 1728 addpartrevbranchcache(repo, bundler, outgoing)
1729 1729
1730 1730 if opts.get(b'obsolescence', False):
1731 1731 obsmarkers = repo.obsstore.relevantmarkers(outgoing.missing)
1732 1732 buildobsmarkerspart(bundler, obsmarkers)
1733 1733
1734 1734 if opts.get(b'phases', False):
1735 1735 headsbyphase = phases.subsetphaseheads(repo, outgoing.missing)
1736 1736 phasedata = phases.binaryencode(headsbyphase)
1737 1737 bundler.newpart(b'phase-heads', data=phasedata)
1738 1738
1739 1739
1740 1740 def addparttagsfnodescache(repo, bundler, outgoing):
1741 1741 # we include the tags fnode cache for the bundle changeset
1742 1742 # (as an optional parts)
1743 1743 cache = tags.hgtagsfnodescache(repo.unfiltered())
1744 1744 chunks = []
1745 1745
1746 1746 # .hgtags fnodes are only relevant for head changesets. While we could
1747 1747 # transfer values for all known nodes, there will likely be little to
1748 1748 # no benefit.
1749 1749 #
1750 1750 # We don't bother using a generator to produce output data because
1751 1751 # a) we only have 40 bytes per head and even esoteric numbers of heads
1752 1752 # consume little memory (1M heads is 40MB) b) we don't want to send the
1753 1753 # part if we don't have entries and knowing if we have entries requires
1754 1754 # cache lookups.
1755 1755 for node in outgoing.missingheads:
1756 1756 # Don't compute missing, as this may slow down serving.
1757 1757 fnode = cache.getfnode(node, computemissing=False)
1758 1758 if fnode is not None:
1759 1759 chunks.extend([node, fnode])
1760 1760
1761 1761 if chunks:
1762 1762 bundler.newpart(b'hgtagsfnodes', data=b''.join(chunks))
1763 1763
1764 1764
1765 1765 def addpartrevbranchcache(repo, bundler, outgoing):
1766 1766 # we include the rev branch cache for the bundle changeset
1767 1767 # (as an optional parts)
1768 1768 cache = repo.revbranchcache()
1769 1769 cl = repo.unfiltered().changelog
1770 1770 branchesdata = collections.defaultdict(lambda: (set(), set()))
1771 1771 for node in outgoing.missing:
1772 1772 branch, close = cache.branchinfo(cl.rev(node))
1773 1773 branchesdata[branch][close].add(node)
1774 1774
1775 1775 def generate():
1776 1776 for branch, (nodes, closed) in sorted(branchesdata.items()):
1777 1777 utf8branch = encoding.fromlocal(branch)
1778 1778 yield rbcstruct.pack(len(utf8branch), len(nodes), len(closed))
1779 1779 yield utf8branch
1780 1780 for n in sorted(nodes):
1781 1781 yield n
1782 1782 for n in sorted(closed):
1783 1783 yield n
1784 1784
1785 1785 bundler.newpart(b'cache:rev-branch-cache', data=generate(), mandatory=False)
1786 1786
1787 1787
1788 1788 def _formatrequirementsspec(requirements):
1789 1789 requirements = [req for req in requirements if req != b"shared"]
1790 1790 return urlreq.quote(b','.join(sorted(requirements)))
1791 1791
1792 1792
1793 1793 def _formatrequirementsparams(requirements):
1794 1794 requirements = _formatrequirementsspec(requirements)
1795 1795 params = b"%s%s" % (urlreq.quote(b"requirements="), requirements)
1796 1796 return params
1797 1797
1798 1798
1799 1799 def addpartbundlestream2(bundler, repo, **kwargs):
1800 1800 if not kwargs.get('stream', False):
1801 1801 return
1802 1802
1803 1803 if not streamclone.allowservergeneration(repo):
1804 1804 raise error.Abort(
1805 1805 _(
1806 1806 b'stream data requested but server does not allow '
1807 1807 b'this feature'
1808 1808 ),
1809 1809 hint=_(
1810 1810 b'well-behaved clients should not be '
1811 1811 b'requesting stream data from servers not '
1812 1812 b'advertising it; the client may be buggy'
1813 1813 ),
1814 1814 )
1815 1815
1816 1816 # Stream clones don't compress well. And compression undermines a
1817 1817 # goal of stream clones, which is to be fast. Communicate the desire
1818 1818 # to avoid compression to consumers of the bundle.
1819 1819 bundler.prefercompressed = False
1820 1820
1821 1821 # get the includes and excludes
1822 1822 includepats = kwargs.get('includepats')
1823 1823 excludepats = kwargs.get('excludepats')
1824 1824
1825 1825 narrowstream = repo.ui.configbool(
1826 1826 b'experimental', b'server.stream-narrow-clones'
1827 1827 )
1828 1828
1829 1829 if (includepats or excludepats) and not narrowstream:
1830 1830 raise error.Abort(_(b'server does not support narrow stream clones'))
1831 1831
1832 1832 includeobsmarkers = False
1833 1833 if repo.obsstore:
1834 1834 remoteversions = obsmarkersversion(bundler.capabilities)
1835 1835 if not remoteversions:
1836 1836 raise error.Abort(
1837 1837 _(
1838 1838 b'server has obsolescence markers, but client '
1839 1839 b'cannot receive them via stream clone'
1840 1840 )
1841 1841 )
1842 1842 elif repo.obsstore._version in remoteversions:
1843 1843 includeobsmarkers = True
1844 1844
1845 1845 filecount, bytecount, it = streamclone.generatev2(
1846 1846 repo, includepats, excludepats, includeobsmarkers
1847 1847 )
1848 1848 requirements = _formatrequirementsspec(repo.requirements)
1849 1849 part = bundler.newpart(b'stream2', data=it)
1850 1850 part.addparam(b'bytecount', b'%d' % bytecount, mandatory=True)
1851 1851 part.addparam(b'filecount', b'%d' % filecount, mandatory=True)
1852 1852 part.addparam(b'requirements', requirements, mandatory=True)
1853 1853
1854 1854
1855 1855 def buildobsmarkerspart(bundler, markers):
1856 1856 """add an obsmarker part to the bundler with <markers>
1857 1857
1858 1858 No part is created if markers is empty.
1859 1859 Raises ValueError if the bundler doesn't support any known obsmarker format.
1860 1860 """
1861 1861 if not markers:
1862 1862 return None
1863 1863
1864 1864 remoteversions = obsmarkersversion(bundler.capabilities)
1865 1865 version = obsolete.commonversion(remoteversions)
1866 1866 if version is None:
1867 1867 raise ValueError(b'bundler does not support common obsmarker format')
1868 1868 stream = obsolete.encodemarkers(markers, True, version=version)
1869 1869 return bundler.newpart(b'obsmarkers', data=stream)
1870 1870
1871 1871
1872 1872 def writebundle(
1873 1873 ui, cg, filename, bundletype, vfs=None, compression=None, compopts=None
1874 1874 ):
1875 1875 """Write a bundle file and return its filename.
1876 1876
1877 1877 Existing files will not be overwritten.
1878 1878 If no filename is specified, a temporary file is created.
1879 1879 bz2 compression can be turned off.
1880 1880 The bundle file will be deleted in case of errors.
1881 1881 """
1882 1882
1883 1883 if bundletype == b"HG20":
1884 1884 bundle = bundle20(ui)
1885 1885 bundle.setcompression(compression, compopts)
1886 1886 part = bundle.newpart(b'changegroup', data=cg.getchunks())
1887 1887 part.addparam(b'version', cg.version)
1888 1888 if b'clcount' in cg.extras:
1889 1889 part.addparam(
1890 1890 b'nbchanges', b'%d' % cg.extras[b'clcount'], mandatory=False
1891 1891 )
1892 1892 chunkiter = bundle.getchunks()
1893 1893 else:
1894 1894 # compression argument is only for the bundle2 case
1895 1895 assert compression is None
1896 1896 if cg.version != b'01':
1897 1897 raise error.Abort(
1898 1898 _(b'old bundle types only supports v1 changegroups')
1899 1899 )
1900 1900 header, comp = bundletypes[bundletype]
1901 1901 if comp not in util.compengines.supportedbundletypes:
1902 1902 raise error.Abort(_(b'unknown stream compression type: %s') % comp)
1903 1903 compengine = util.compengines.forbundletype(comp)
1904 1904
1905 1905 def chunkiter():
1906 1906 yield header
1907 1907 for chunk in compengine.compressstream(cg.getchunks(), compopts):
1908 1908 yield chunk
1909 1909
1910 1910 chunkiter = chunkiter()
1911 1911
1912 1912 # parse the changegroup data, otherwise we will block
1913 1913 # in case of sshrepo because we don't know the end of the stream
1914 1914 return changegroup.writechunks(ui, chunkiter, filename, vfs=vfs)
1915 1915
1916 1916
1917 1917 def combinechangegroupresults(op):
1918 1918 """logic to combine 0 or more addchangegroup results into one"""
1919 1919 results = [r.get(b'return', 0) for r in op.records[b'changegroup']]
1920 1920 changedheads = 0
1921 1921 result = 1
1922 1922 for ret in results:
1923 1923 # If any changegroup result is 0, return 0
1924 1924 if ret == 0:
1925 1925 result = 0
1926 1926 break
1927 1927 if ret < -1:
1928 1928 changedheads += ret + 1
1929 1929 elif ret > 1:
1930 1930 changedheads += ret - 1
1931 1931 if changedheads > 0:
1932 1932 result = 1 + changedheads
1933 1933 elif changedheads < 0:
1934 1934 result = -1 + changedheads
1935 1935 return result
1936 1936
1937 1937
1938 1938 @parthandler(
1939 1939 b'changegroup',
1940 1940 (
1941 1941 b'version',
1942 1942 b'nbchanges',
1943 1943 b'exp-sidedata',
1944 1944 b'treemanifest',
1945 1945 b'targetphase',
1946 1946 ),
1947 1947 )
1948 1948 def handlechangegroup(op, inpart):
1949 1949 """apply a changegroup part on the repo
1950 1950
1951 1951 This is a very early implementation that will massive rework before being
1952 1952 inflicted to any end-user.
1953 1953 """
1954 1954 from . import localrepo
1955 1955
1956 1956 tr = op.gettransaction()
1957 1957 unpackerversion = inpart.params.get(b'version', b'01')
1958 1958 # We should raise an appropriate exception here
1959 1959 cg = changegroup.getunbundler(unpackerversion, inpart, None)
1960 1960 # the source and url passed here are overwritten by the one contained in
1961 1961 # the transaction.hookargs argument. So 'bundle2' is a placeholder
1962 1962 nbchangesets = None
1963 1963 if b'nbchanges' in inpart.params:
1964 1964 nbchangesets = int(inpart.params.get(b'nbchanges'))
1965 1965 if (
1966 1966 b'treemanifest' in inpart.params
1967 1967 and b'treemanifest' not in op.repo.requirements
1968 1968 ):
1969 1969 if len(op.repo.changelog) != 0:
1970 1970 raise error.Abort(
1971 1971 _(
1972 1972 b"bundle contains tree manifests, but local repo is "
1973 1973 b"non-empty and does not use tree manifests"
1974 1974 )
1975 1975 )
1976 1976 op.repo.requirements.add(b'treemanifest')
1977 1977 op.repo.svfs.options = localrepo.resolvestorevfsoptions(
1978 1978 op.repo.ui, op.repo.requirements, op.repo.features
1979 1979 )
1980 1980 op.repo._writerequirements()
1981 1981
1982 1982 bundlesidedata = bool(b'exp-sidedata' in inpart.params)
1983 1983 reposidedata = bool(b'exp-sidedata-flag' in op.repo.requirements)
1984 1984 if reposidedata and not bundlesidedata:
1985 1985 msg = b"repository is using sidedata but the bundle source do not"
1986 1986 hint = b'this is currently unsupported'
1987 1987 raise error.Abort(msg, hint=hint)
1988 1988
1989 1989 extrakwargs = {}
1990 1990 targetphase = inpart.params.get(b'targetphase')
1991 1991 if targetphase is not None:
1992 1992 extrakwargs['targetphase'] = int(targetphase)
1993 1993 ret = _processchangegroup(
1994 1994 op,
1995 1995 cg,
1996 1996 tr,
1997 1997 b'bundle2',
1998 1998 b'bundle2',
1999 1999 expectedtotal=nbchangesets,
2000 2000 **extrakwargs
2001 2001 )
2002 2002 if op.reply is not None:
2003 2003 # This is definitely not the final form of this
2004 2004 # return. But one need to start somewhere.
2005 2005 part = op.reply.newpart(b'reply:changegroup', mandatory=False)
2006 2006 part.addparam(
2007 2007 b'in-reply-to', pycompat.bytestr(inpart.id), mandatory=False
2008 2008 )
2009 2009 part.addparam(b'return', b'%i' % ret, mandatory=False)
2010 2010 assert not inpart.read()
2011 2011
2012 2012
2013 2013 _remotechangegroupparams = tuple(
2014 2014 [b'url', b'size', b'digests']
2015 2015 + [b'digest:%s' % k for k in util.DIGESTS.keys()]
2016 2016 )
2017 2017
2018 2018
2019 2019 @parthandler(b'remote-changegroup', _remotechangegroupparams)
2020 2020 def handleremotechangegroup(op, inpart):
2021 2021 """apply a bundle10 on the repo, given an url and validation information
2022 2022
2023 2023 All the information about the remote bundle to import are given as
2024 2024 parameters. The parameters include:
2025 2025 - url: the url to the bundle10.
2026 2026 - size: the bundle10 file size. It is used to validate what was
2027 2027 retrieved by the client matches the server knowledge about the bundle.
2028 2028 - digests: a space separated list of the digest types provided as
2029 2029 parameters.
2030 2030 - digest:<digest-type>: the hexadecimal representation of the digest with
2031 2031 that name. Like the size, it is used to validate what was retrieved by
2032 2032 the client matches what the server knows about the bundle.
2033 2033
2034 2034 When multiple digest types are given, all of them are checked.
2035 2035 """
2036 2036 try:
2037 2037 raw_url = inpart.params[b'url']
2038 2038 except KeyError:
2039 2039 raise error.Abort(_(b'remote-changegroup: missing "%s" param') % b'url')
2040 2040 parsed_url = util.url(raw_url)
2041 2041 if parsed_url.scheme not in capabilities[b'remote-changegroup']:
2042 2042 raise error.Abort(
2043 2043 _(b'remote-changegroup does not support %s urls')
2044 2044 % parsed_url.scheme
2045 2045 )
2046 2046
2047 2047 try:
2048 2048 size = int(inpart.params[b'size'])
2049 2049 except ValueError:
2050 2050 raise error.Abort(
2051 2051 _(b'remote-changegroup: invalid value for param "%s"') % b'size'
2052 2052 )
2053 2053 except KeyError:
2054 2054 raise error.Abort(
2055 2055 _(b'remote-changegroup: missing "%s" param') % b'size'
2056 2056 )
2057 2057
2058 2058 digests = {}
2059 2059 for typ in inpart.params.get(b'digests', b'').split():
2060 2060 param = b'digest:%s' % typ
2061 2061 try:
2062 2062 value = inpart.params[param]
2063 2063 except KeyError:
2064 2064 raise error.Abort(
2065 2065 _(b'remote-changegroup: missing "%s" param') % param
2066 2066 )
2067 2067 digests[typ] = value
2068 2068
2069 2069 real_part = util.digestchecker(url.open(op.ui, raw_url), size, digests)
2070 2070
2071 2071 tr = op.gettransaction()
2072 2072 from . import exchange
2073 2073
2074 2074 cg = exchange.readbundle(op.repo.ui, real_part, raw_url)
2075 2075 if not isinstance(cg, changegroup.cg1unpacker):
2076 2076 raise error.Abort(
2077 2077 _(b'%s: not a bundle version 1.0') % util.hidepassword(raw_url)
2078 2078 )
2079 2079 ret = _processchangegroup(op, cg, tr, b'bundle2', b'bundle2')
2080 2080 if op.reply is not None:
2081 2081 # This is definitely not the final form of this
2082 2082 # return. But one need to start somewhere.
2083 2083 part = op.reply.newpart(b'reply:changegroup')
2084 2084 part.addparam(
2085 2085 b'in-reply-to', pycompat.bytestr(inpart.id), mandatory=False
2086 2086 )
2087 2087 part.addparam(b'return', b'%i' % ret, mandatory=False)
2088 2088 try:
2089 2089 real_part.validate()
2090 2090 except error.Abort as e:
2091 2091 raise error.Abort(
2092 2092 _(b'bundle at %s is corrupted:\n%s')
2093 2093 % (util.hidepassword(raw_url), bytes(e))
2094 2094 )
2095 2095 assert not inpart.read()
2096 2096
2097 2097
2098 2098 @parthandler(b'reply:changegroup', (b'return', b'in-reply-to'))
2099 2099 def handlereplychangegroup(op, inpart):
2100 2100 ret = int(inpart.params[b'return'])
2101 2101 replyto = int(inpart.params[b'in-reply-to'])
2102 2102 op.records.add(b'changegroup', {b'return': ret}, replyto)
2103 2103
2104 2104
2105 2105 @parthandler(b'check:bookmarks')
2106 2106 def handlecheckbookmarks(op, inpart):
2107 2107 """check location of bookmarks
2108 2108
2109 2109 This part is to be used to detect push race regarding bookmark, it
2110 2110 contains binary encoded (bookmark, node) tuple. If the local state does
2111 2111 not marks the one in the part, a PushRaced exception is raised
2112 2112 """
2113 2113 bookdata = bookmarks.binarydecode(inpart)
2114 2114
2115 2115 msgstandard = (
2116 2116 b'remote repository changed while pushing - please try again '
2117 2117 b'(bookmark "%s" move from %s to %s)'
2118 2118 )
2119 2119 msgmissing = (
2120 2120 b'remote repository changed while pushing - please try again '
2121 2121 b'(bookmark "%s" is missing, expected %s)'
2122 2122 )
2123 2123 msgexist = (
2124 2124 b'remote repository changed while pushing - please try again '
2125 2125 b'(bookmark "%s" set on %s, expected missing)'
2126 2126 )
2127 2127 for book, node in bookdata:
2128 2128 currentnode = op.repo._bookmarks.get(book)
2129 2129 if currentnode != node:
2130 2130 if node is None:
2131 2131 finalmsg = msgexist % (book, nodemod.short(currentnode))
2132 2132 elif currentnode is None:
2133 2133 finalmsg = msgmissing % (book, nodemod.short(node))
2134 2134 else:
2135 2135 finalmsg = msgstandard % (
2136 2136 book,
2137 2137 nodemod.short(node),
2138 2138 nodemod.short(currentnode),
2139 2139 )
2140 2140 raise error.PushRaced(finalmsg)
2141 2141
2142 2142
2143 2143 @parthandler(b'check:heads')
2144 2144 def handlecheckheads(op, inpart):
2145 2145 """check that head of the repo did not change
2146 2146
2147 2147 This is used to detect a push race when using unbundle.
2148 2148 This replaces the "heads" argument of unbundle."""
2149 2149 h = inpart.read(20)
2150 2150 heads = []
2151 2151 while len(h) == 20:
2152 2152 heads.append(h)
2153 2153 h = inpart.read(20)
2154 2154 assert not h
2155 2155 # Trigger a transaction so that we are guaranteed to have the lock now.
2156 2156 if op.ui.configbool(b'experimental', b'bundle2lazylocking'):
2157 2157 op.gettransaction()
2158 2158 if sorted(heads) != sorted(op.repo.heads()):
2159 2159 raise error.PushRaced(
2160 2160 b'remote repository changed while pushing - please try again'
2161 2161 )
2162 2162
2163 2163
2164 2164 @parthandler(b'check:updated-heads')
2165 2165 def handlecheckupdatedheads(op, inpart):
2166 2166 """check for race on the heads touched by a push
2167 2167
2168 2168 This is similar to 'check:heads' but focus on the heads actually updated
2169 2169 during the push. If other activities happen on unrelated heads, it is
2170 2170 ignored.
2171 2171
2172 2172 This allow server with high traffic to avoid push contention as long as
2173 2173 unrelated parts of the graph are involved."""
2174 2174 h = inpart.read(20)
2175 2175 heads = []
2176 2176 while len(h) == 20:
2177 2177 heads.append(h)
2178 2178 h = inpart.read(20)
2179 2179 assert not h
2180 2180 # trigger a transaction so that we are guaranteed to have the lock now.
2181 2181 if op.ui.configbool(b'experimental', b'bundle2lazylocking'):
2182 2182 op.gettransaction()
2183 2183
2184 2184 currentheads = set()
2185 2185 for ls in op.repo.branchmap().iterheads():
2186 2186 currentheads.update(ls)
2187 2187
2188 2188 for h in heads:
2189 2189 if h not in currentheads:
2190 2190 raise error.PushRaced(
2191 2191 b'remote repository changed while pushing - '
2192 2192 b'please try again'
2193 2193 )
2194 2194
2195 2195
2196 2196 @parthandler(b'check:phases')
2197 2197 def handlecheckphases(op, inpart):
2198 2198 """check that phase boundaries of the repository did not change
2199 2199
2200 2200 This is used to detect a push race.
2201 2201 """
2202 2202 phasetonodes = phases.binarydecode(inpart)
2203 2203 unfi = op.repo.unfiltered()
2204 2204 cl = unfi.changelog
2205 2205 phasecache = unfi._phasecache
2206 2206 msg = (
2207 2207 b'remote repository changed while pushing - please try again '
2208 2208 b'(%s is %s expected %s)'
2209 2209 )
2210 2210 for expectedphase, nodes in enumerate(phasetonodes):
2211 2211 for n in nodes:
2212 2212 actualphase = phasecache.phase(unfi, cl.rev(n))
2213 2213 if actualphase != expectedphase:
2214 2214 finalmsg = msg % (
2215 2215 nodemod.short(n),
2216 2216 phases.phasenames[actualphase],
2217 2217 phases.phasenames[expectedphase],
2218 2218 )
2219 2219 raise error.PushRaced(finalmsg)
2220 2220
2221 2221
2222 2222 @parthandler(b'output')
2223 2223 def handleoutput(op, inpart):
2224 2224 """forward output captured on the server to the client"""
2225 2225 for line in inpart.read().splitlines():
2226 2226 op.ui.status(_(b'remote: %s\n') % line)
2227 2227
2228 2228
2229 2229 @parthandler(b'replycaps')
2230 2230 def handlereplycaps(op, inpart):
2231 2231 """Notify that a reply bundle should be created
2232 2232
2233 2233 The payload contains the capabilities information for the reply"""
2234 2234 caps = decodecaps(inpart.read())
2235 2235 if op.reply is None:
2236 2236 op.reply = bundle20(op.ui, caps)
2237 2237
2238 2238
2239 2239 class AbortFromPart(error.Abort):
2240 2240 """Sub-class of Abort that denotes an error from a bundle2 part."""
2241 2241
2242 2242
2243 2243 @parthandler(b'error:abort', (b'message', b'hint'))
2244 2244 def handleerrorabort(op, inpart):
2245 2245 """Used to transmit abort error over the wire"""
2246 2246 raise AbortFromPart(
2247 2247 inpart.params[b'message'], hint=inpart.params.get(b'hint')
2248 2248 )
2249 2249
2250 2250
2251 2251 @parthandler(
2252 2252 b'error:pushkey',
2253 2253 (b'namespace', b'key', b'new', b'old', b'ret', b'in-reply-to'),
2254 2254 )
2255 2255 def handleerrorpushkey(op, inpart):
2256 2256 """Used to transmit failure of a mandatory pushkey over the wire"""
2257 2257 kwargs = {}
2258 2258 for name in (b'namespace', b'key', b'new', b'old', b'ret'):
2259 2259 value = inpart.params.get(name)
2260 2260 if value is not None:
2261 2261 kwargs[name] = value
2262 2262 raise error.PushkeyFailed(
2263 2263 inpart.params[b'in-reply-to'], **pycompat.strkwargs(kwargs)
2264 2264 )
2265 2265
2266 2266
2267 2267 @parthandler(b'error:unsupportedcontent', (b'parttype', b'params'))
2268 2268 def handleerrorunsupportedcontent(op, inpart):
2269 2269 """Used to transmit unknown content error over the wire"""
2270 2270 kwargs = {}
2271 2271 parttype = inpart.params.get(b'parttype')
2272 2272 if parttype is not None:
2273 2273 kwargs[b'parttype'] = parttype
2274 2274 params = inpart.params.get(b'params')
2275 2275 if params is not None:
2276 2276 kwargs[b'params'] = params.split(b'\0')
2277 2277
2278 2278 raise error.BundleUnknownFeatureError(**pycompat.strkwargs(kwargs))
2279 2279
2280 2280
2281 2281 @parthandler(b'error:pushraced', (b'message',))
2282 2282 def handleerrorpushraced(op, inpart):
2283 2283 """Used to transmit push race error over the wire"""
2284 2284 raise error.ResponseError(_(b'push failed:'), inpart.params[b'message'])
2285 2285
2286 2286
2287 2287 @parthandler(b'listkeys', (b'namespace',))
2288 2288 def handlelistkeys(op, inpart):
2289 2289 """retrieve pushkey namespace content stored in a bundle2"""
2290 2290 namespace = inpart.params[b'namespace']
2291 2291 r = pushkey.decodekeys(inpart.read())
2292 2292 op.records.add(b'listkeys', (namespace, r))
2293 2293
2294 2294
2295 2295 @parthandler(b'pushkey', (b'namespace', b'key', b'old', b'new'))
2296 2296 def handlepushkey(op, inpart):
2297 2297 """process a pushkey request"""
2298 2298 dec = pushkey.decode
2299 2299 namespace = dec(inpart.params[b'namespace'])
2300 2300 key = dec(inpart.params[b'key'])
2301 2301 old = dec(inpart.params[b'old'])
2302 2302 new = dec(inpart.params[b'new'])
2303 2303 # Grab the transaction to ensure that we have the lock before performing the
2304 2304 # pushkey.
2305 2305 if op.ui.configbool(b'experimental', b'bundle2lazylocking'):
2306 2306 op.gettransaction()
2307 2307 ret = op.repo.pushkey(namespace, key, old, new)
2308 2308 record = {b'namespace': namespace, b'key': key, b'old': old, b'new': new}
2309 2309 op.records.add(b'pushkey', record)
2310 2310 if op.reply is not None:
2311 2311 rpart = op.reply.newpart(b'reply:pushkey')
2312 2312 rpart.addparam(
2313 2313 b'in-reply-to', pycompat.bytestr(inpart.id), mandatory=False
2314 2314 )
2315 2315 rpart.addparam(b'return', b'%i' % ret, mandatory=False)
2316 2316 if inpart.mandatory and not ret:
2317 2317 kwargs = {}
2318 2318 for key in (b'namespace', b'key', b'new', b'old', b'ret'):
2319 2319 if key in inpart.params:
2320 2320 kwargs[key] = inpart.params[key]
2321 2321 raise error.PushkeyFailed(
2322 2322 partid=b'%d' % inpart.id, **pycompat.strkwargs(kwargs)
2323 2323 )
2324 2324
2325 2325
2326 2326 @parthandler(b'bookmarks')
2327 2327 def handlebookmark(op, inpart):
2328 2328 """transmit bookmark information
2329 2329
2330 2330 The part contains binary encoded bookmark information.
2331 2331
2332 2332 The exact behavior of this part can be controlled by the 'bookmarks' mode
2333 2333 on the bundle operation.
2334 2334
2335 2335 When mode is 'apply' (the default) the bookmark information is applied as
2336 2336 is to the unbundling repository. Make sure a 'check:bookmarks' part is
2337 2337 issued earlier to check for push races in such update. This behavior is
2338 2338 suitable for pushing.
2339 2339
2340 2340 When mode is 'records', the information is recorded into the 'bookmarks'
2341 2341 records of the bundle operation. This behavior is suitable for pulling.
2342 2342 """
2343 2343 changes = bookmarks.binarydecode(inpart)
2344 2344
2345 2345 pushkeycompat = op.repo.ui.configbool(
2346 2346 b'server', b'bookmarks-pushkey-compat'
2347 2347 )
2348 2348 bookmarksmode = op.modes.get(b'bookmarks', b'apply')
2349 2349
2350 2350 if bookmarksmode == b'apply':
2351 2351 tr = op.gettransaction()
2352 2352 bookstore = op.repo._bookmarks
2353 2353 if pushkeycompat:
2354 2354 allhooks = []
2355 2355 for book, node in changes:
2356 2356 hookargs = tr.hookargs.copy()
2357 2357 hookargs[b'pushkeycompat'] = b'1'
2358 2358 hookargs[b'namespace'] = b'bookmarks'
2359 2359 hookargs[b'key'] = book
2360 2360 hookargs[b'old'] = nodemod.hex(bookstore.get(book, b''))
2361 2361 hookargs[b'new'] = nodemod.hex(
2362 2362 node if node is not None else b''
2363 2363 )
2364 2364 allhooks.append(hookargs)
2365 2365
2366 2366 for hookargs in allhooks:
2367 2367 op.repo.hook(
2368 2368 b'prepushkey', throw=True, **pycompat.strkwargs(hookargs)
2369 2369 )
2370 2370
2371 2371 bookstore.applychanges(op.repo, op.gettransaction(), changes)
2372 2372
2373 2373 if pushkeycompat:
2374 2374
2375 def runhook():
2375 def runhook(unused_success):
2376 2376 for hookargs in allhooks:
2377 2377 op.repo.hook(b'pushkey', **pycompat.strkwargs(hookargs))
2378 2378
2379 2379 op.repo._afterlock(runhook)
2380 2380
2381 2381 elif bookmarksmode == b'records':
2382 2382 for book, node in changes:
2383 2383 record = {b'bookmark': book, b'node': node}
2384 2384 op.records.add(b'bookmarks', record)
2385 2385 else:
2386 2386 raise error.ProgrammingError(
2387 2387 b'unkown bookmark mode: %s' % bookmarksmode
2388 2388 )
2389 2389
2390 2390
2391 2391 @parthandler(b'phase-heads')
2392 2392 def handlephases(op, inpart):
2393 2393 """apply phases from bundle part to repo"""
2394 2394 headsbyphase = phases.binarydecode(inpart)
2395 2395 phases.updatephases(op.repo.unfiltered(), op.gettransaction, headsbyphase)
2396 2396
2397 2397
2398 2398 @parthandler(b'reply:pushkey', (b'return', b'in-reply-to'))
2399 2399 def handlepushkeyreply(op, inpart):
2400 2400 """retrieve the result of a pushkey request"""
2401 2401 ret = int(inpart.params[b'return'])
2402 2402 partid = int(inpart.params[b'in-reply-to'])
2403 2403 op.records.add(b'pushkey', {b'return': ret}, partid)
2404 2404
2405 2405
2406 2406 @parthandler(b'obsmarkers')
2407 2407 def handleobsmarker(op, inpart):
2408 2408 """add a stream of obsmarkers to the repo"""
2409 2409 tr = op.gettransaction()
2410 2410 markerdata = inpart.read()
2411 2411 if op.ui.config(b'experimental', b'obsmarkers-exchange-debug'):
2412 2412 op.ui.writenoi18n(
2413 2413 b'obsmarker-exchange: %i bytes received\n' % len(markerdata)
2414 2414 )
2415 2415 # The mergemarkers call will crash if marker creation is not enabled.
2416 2416 # we want to avoid this if the part is advisory.
2417 2417 if not inpart.mandatory and op.repo.obsstore.readonly:
2418 2418 op.repo.ui.debug(
2419 2419 b'ignoring obsolescence markers, feature not enabled\n'
2420 2420 )
2421 2421 return
2422 2422 new = op.repo.obsstore.mergemarkers(tr, markerdata)
2423 2423 op.repo.invalidatevolatilesets()
2424 2424 op.records.add(b'obsmarkers', {b'new': new})
2425 2425 if op.reply is not None:
2426 2426 rpart = op.reply.newpart(b'reply:obsmarkers')
2427 2427 rpart.addparam(
2428 2428 b'in-reply-to', pycompat.bytestr(inpart.id), mandatory=False
2429 2429 )
2430 2430 rpart.addparam(b'new', b'%i' % new, mandatory=False)
2431 2431
2432 2432
2433 2433 @parthandler(b'reply:obsmarkers', (b'new', b'in-reply-to'))
2434 2434 def handleobsmarkerreply(op, inpart):
2435 2435 """retrieve the result of a pushkey request"""
2436 2436 ret = int(inpart.params[b'new'])
2437 2437 partid = int(inpart.params[b'in-reply-to'])
2438 2438 op.records.add(b'obsmarkers', {b'new': ret}, partid)
2439 2439
2440 2440
2441 2441 @parthandler(b'hgtagsfnodes')
2442 2442 def handlehgtagsfnodes(op, inpart):
2443 2443 """Applies .hgtags fnodes cache entries to the local repo.
2444 2444
2445 2445 Payload is pairs of 20 byte changeset nodes and filenodes.
2446 2446 """
2447 2447 # Grab the transaction so we ensure that we have the lock at this point.
2448 2448 if op.ui.configbool(b'experimental', b'bundle2lazylocking'):
2449 2449 op.gettransaction()
2450 2450 cache = tags.hgtagsfnodescache(op.repo.unfiltered())
2451 2451
2452 2452 count = 0
2453 2453 while True:
2454 2454 node = inpart.read(20)
2455 2455 fnode = inpart.read(20)
2456 2456 if len(node) < 20 or len(fnode) < 20:
2457 2457 op.ui.debug(b'ignoring incomplete received .hgtags fnodes data\n')
2458 2458 break
2459 2459 cache.setfnode(node, fnode)
2460 2460 count += 1
2461 2461
2462 2462 cache.write()
2463 2463 op.ui.debug(b'applied %i hgtags fnodes cache entries\n' % count)
2464 2464
2465 2465
2466 2466 rbcstruct = struct.Struct(b'>III')
2467 2467
2468 2468
2469 2469 @parthandler(b'cache:rev-branch-cache')
2470 2470 def handlerbc(op, inpart):
2471 2471 """receive a rev-branch-cache payload and update the local cache
2472 2472
2473 2473 The payload is a series of data related to each branch
2474 2474
2475 2475 1) branch name length
2476 2476 2) number of open heads
2477 2477 3) number of closed heads
2478 2478 4) open heads nodes
2479 2479 5) closed heads nodes
2480 2480 """
2481 2481 total = 0
2482 2482 rawheader = inpart.read(rbcstruct.size)
2483 2483 cache = op.repo.revbranchcache()
2484 2484 cl = op.repo.unfiltered().changelog
2485 2485 while rawheader:
2486 2486 header = rbcstruct.unpack(rawheader)
2487 2487 total += header[1] + header[2]
2488 2488 utf8branch = inpart.read(header[0])
2489 2489 branch = encoding.tolocal(utf8branch)
2490 2490 for x in pycompat.xrange(header[1]):
2491 2491 node = inpart.read(20)
2492 2492 rev = cl.rev(node)
2493 2493 cache.setdata(branch, rev, node, False)
2494 2494 for x in pycompat.xrange(header[2]):
2495 2495 node = inpart.read(20)
2496 2496 rev = cl.rev(node)
2497 2497 cache.setdata(branch, rev, node, True)
2498 2498 rawheader = inpart.read(rbcstruct.size)
2499 2499 cache.write()
2500 2500
2501 2501
2502 2502 @parthandler(b'pushvars')
2503 2503 def bundle2getvars(op, part):
2504 2504 '''unbundle a bundle2 containing shellvars on the server'''
2505 2505 # An option to disable unbundling on server-side for security reasons
2506 2506 if op.ui.configbool(b'push', b'pushvars.server'):
2507 2507 hookargs = {}
2508 2508 for key, value in part.advisoryparams:
2509 2509 key = key.upper()
2510 2510 # We want pushed variables to have USERVAR_ prepended so we know
2511 2511 # they came from the --pushvar flag.
2512 2512 key = b"USERVAR_" + key
2513 2513 hookargs[key] = value
2514 2514 op.addhookargs(hookargs)
2515 2515
2516 2516
2517 2517 @parthandler(b'stream2', (b'requirements', b'filecount', b'bytecount'))
2518 2518 def handlestreamv2bundle(op, part):
2519 2519
2520 2520 requirements = urlreq.unquote(part.params[b'requirements']).split(b',')
2521 2521 filecount = int(part.params[b'filecount'])
2522 2522 bytecount = int(part.params[b'bytecount'])
2523 2523
2524 2524 repo = op.repo
2525 2525 if len(repo):
2526 2526 msg = _(b'cannot apply stream clone to non empty repository')
2527 2527 raise error.Abort(msg)
2528 2528
2529 2529 repo.ui.debug(b'applying stream bundle\n')
2530 2530 streamclone.applybundlev2(repo, part, filecount, bytecount, requirements)
2531 2531
2532 2532
2533 2533 def widen_bundle(
2534 2534 bundler, repo, oldmatcher, newmatcher, common, known, cgversion, ellipses
2535 2535 ):
2536 2536 """generates bundle2 for widening a narrow clone
2537 2537
2538 2538 bundler is the bundle to which data should be added
2539 2539 repo is the localrepository instance
2540 2540 oldmatcher matches what the client already has
2541 2541 newmatcher matches what the client needs (including what it already has)
2542 2542 common is set of common heads between server and client
2543 2543 known is a set of revs known on the client side (used in ellipses)
2544 2544 cgversion is the changegroup version to send
2545 2545 ellipses is boolean value telling whether to send ellipses data or not
2546 2546
2547 2547 returns bundle2 of the data required for extending
2548 2548 """
2549 2549 commonnodes = set()
2550 2550 cl = repo.changelog
2551 2551 for r in repo.revs(b"::%ln", common):
2552 2552 commonnodes.add(cl.node(r))
2553 2553 if commonnodes:
2554 2554 # XXX: we should only send the filelogs (and treemanifest). user
2555 2555 # already has the changelog and manifest
2556 2556 packer = changegroup.getbundler(
2557 2557 cgversion,
2558 2558 repo,
2559 2559 oldmatcher=oldmatcher,
2560 2560 matcher=newmatcher,
2561 2561 fullnodes=commonnodes,
2562 2562 )
2563 2563 cgdata = packer.generate(
2564 2564 {nodemod.nullid},
2565 2565 list(commonnodes),
2566 2566 False,
2567 2567 b'narrow_widen',
2568 2568 changelog=False,
2569 2569 )
2570 2570
2571 2571 part = bundler.newpart(b'changegroup', data=cgdata)
2572 2572 part.addparam(b'version', cgversion)
2573 2573 if b'treemanifest' in repo.requirements:
2574 2574 part.addparam(b'treemanifest', b'1')
2575 2575 if b'exp-sidedata-flag' in repo.requirements:
2576 2576 part.addparam(b'exp-sidedata', b'1')
2577 2577
2578 2578 return bundler
@@ -1,1693 +1,1693 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21 from .pycompat import open
22 22
23 23 from . import (
24 24 error,
25 25 match as matchmod,
26 26 mdiff,
27 27 phases,
28 28 pycompat,
29 29 util,
30 30 )
31 31
32 32 from .interfaces import repository
33 33
34 34 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s")
35 35 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s")
36 36 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH")
37 37
38 38 LFS_REQUIREMENT = b'lfs'
39 39
40 40 readexactly = util.readexactly
41 41
42 42
43 43 def getchunk(stream):
44 44 """return the next chunk from stream as a string"""
45 45 d = readexactly(stream, 4)
46 46 l = struct.unpack(b">l", d)[0]
47 47 if l <= 4:
48 48 if l:
49 49 raise error.Abort(_(b"invalid chunk length %d") % l)
50 50 return b""
51 51 return readexactly(stream, l - 4)
52 52
53 53
54 54 def chunkheader(length):
55 55 """return a changegroup chunk header (string)"""
56 56 return struct.pack(b">l", length + 4)
57 57
58 58
59 59 def closechunk():
60 60 """return a changegroup chunk header (string) for a zero-length chunk"""
61 61 return struct.pack(b">l", 0)
62 62
63 63
64 64 def _fileheader(path):
65 65 """Obtain a changegroup chunk header for a named path."""
66 66 return chunkheader(len(path)) + path
67 67
68 68
69 69 def writechunks(ui, chunks, filename, vfs=None):
70 70 """Write chunks to a file and return its filename.
71 71
72 72 The stream is assumed to be a bundle file.
73 73 Existing files will not be overwritten.
74 74 If no filename is specified, a temporary file is created.
75 75 """
76 76 fh = None
77 77 cleanup = None
78 78 try:
79 79 if filename:
80 80 if vfs:
81 81 fh = vfs.open(filename, b"wb")
82 82 else:
83 83 # Increase default buffer size because default is usually
84 84 # small (4k is common on Linux).
85 85 fh = open(filename, b"wb", 131072)
86 86 else:
87 87 fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg")
88 88 fh = os.fdopen(fd, "wb")
89 89 cleanup = filename
90 90 for c in chunks:
91 91 fh.write(c)
92 92 cleanup = None
93 93 return filename
94 94 finally:
95 95 if fh is not None:
96 96 fh.close()
97 97 if cleanup is not None:
98 98 if filename and vfs:
99 99 vfs.unlink(cleanup)
100 100 else:
101 101 os.unlink(cleanup)
102 102
103 103
104 104 class cg1unpacker(object):
105 105 """Unpacker for cg1 changegroup streams.
106 106
107 107 A changegroup unpacker handles the framing of the revision data in
108 108 the wire format. Most consumers will want to use the apply()
109 109 method to add the changes from the changegroup to a repository.
110 110
111 111 If you're forwarding a changegroup unmodified to another consumer,
112 112 use getchunks(), which returns an iterator of changegroup
113 113 chunks. This is mostly useful for cases where you need to know the
114 114 data stream has ended by observing the end of the changegroup.
115 115
116 116 deltachunk() is useful only if you're applying delta data. Most
117 117 consumers should prefer apply() instead.
118 118
119 119 A few other public methods exist. Those are used only for
120 120 bundlerepo and some debug commands - their use is discouraged.
121 121 """
122 122
123 123 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
124 124 deltaheadersize = deltaheader.size
125 125 version = b'01'
126 126 _grouplistcount = 1 # One list of files after the manifests
127 127
128 128 def __init__(self, fh, alg, extras=None):
129 129 if alg is None:
130 130 alg = b'UN'
131 131 if alg not in util.compengines.supportedbundletypes:
132 132 raise error.Abort(_(b'unknown stream compression type: %s') % alg)
133 133 if alg == b'BZ':
134 134 alg = b'_truncatedBZ'
135 135
136 136 compengine = util.compengines.forbundletype(alg)
137 137 self._stream = compengine.decompressorreader(fh)
138 138 self._type = alg
139 139 self.extras = extras or {}
140 140 self.callback = None
141 141
142 142 # These methods (compressed, read, seek, tell) all appear to only
143 143 # be used by bundlerepo, but it's a little hard to tell.
144 144 def compressed(self):
145 145 return self._type is not None and self._type != b'UN'
146 146
147 147 def read(self, l):
148 148 return self._stream.read(l)
149 149
150 150 def seek(self, pos):
151 151 return self._stream.seek(pos)
152 152
153 153 def tell(self):
154 154 return self._stream.tell()
155 155
156 156 def close(self):
157 157 return self._stream.close()
158 158
159 159 def _chunklength(self):
160 160 d = readexactly(self._stream, 4)
161 161 l = struct.unpack(b">l", d)[0]
162 162 if l <= 4:
163 163 if l:
164 164 raise error.Abort(_(b"invalid chunk length %d") % l)
165 165 return 0
166 166 if self.callback:
167 167 self.callback()
168 168 return l - 4
169 169
170 170 def changelogheader(self):
171 171 """v10 does not have a changelog header chunk"""
172 172 return {}
173 173
174 174 def manifestheader(self):
175 175 """v10 does not have a manifest header chunk"""
176 176 return {}
177 177
178 178 def filelogheader(self):
179 179 """return the header of the filelogs chunk, v10 only has the filename"""
180 180 l = self._chunklength()
181 181 if not l:
182 182 return {}
183 183 fname = readexactly(self._stream, l)
184 184 return {b'filename': fname}
185 185
186 186 def _deltaheader(self, headertuple, prevnode):
187 187 node, p1, p2, cs = headertuple
188 188 if prevnode is None:
189 189 deltabase = p1
190 190 else:
191 191 deltabase = prevnode
192 192 flags = 0
193 193 return node, p1, p2, deltabase, cs, flags
194 194
195 195 def deltachunk(self, prevnode):
196 196 l = self._chunklength()
197 197 if not l:
198 198 return {}
199 199 headerdata = readexactly(self._stream, self.deltaheadersize)
200 200 header = self.deltaheader.unpack(headerdata)
201 201 delta = readexactly(self._stream, l - self.deltaheadersize)
202 202 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
203 203 return (node, p1, p2, cs, deltabase, delta, flags)
204 204
205 205 def getchunks(self):
206 206 """returns all the chunks contains in the bundle
207 207
208 208 Used when you need to forward the binary stream to a file or another
209 209 network API. To do so, it parse the changegroup data, otherwise it will
210 210 block in case of sshrepo because it don't know the end of the stream.
211 211 """
212 212 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
213 213 # and a list of filelogs. For changegroup 3, we expect 4 parts:
214 214 # changelog, manifestlog, a list of tree manifestlogs, and a list of
215 215 # filelogs.
216 216 #
217 217 # Changelog and manifestlog parts are terminated with empty chunks. The
218 218 # tree and file parts are a list of entry sections. Each entry section
219 219 # is a series of chunks terminating in an empty chunk. The list of these
220 220 # entry sections is terminated in yet another empty chunk, so we know
221 221 # we've reached the end of the tree/file list when we reach an empty
222 222 # chunk that was proceeded by no non-empty chunks.
223 223
224 224 parts = 0
225 225 while parts < 2 + self._grouplistcount:
226 226 noentries = True
227 227 while True:
228 228 chunk = getchunk(self)
229 229 if not chunk:
230 230 # The first two empty chunks represent the end of the
231 231 # changelog and the manifestlog portions. The remaining
232 232 # empty chunks represent either A) the end of individual
233 233 # tree or file entries in the file list, or B) the end of
234 234 # the entire list. It's the end of the entire list if there
235 235 # were no entries (i.e. noentries is True).
236 236 if parts < 2:
237 237 parts += 1
238 238 elif noentries:
239 239 parts += 1
240 240 break
241 241 noentries = False
242 242 yield chunkheader(len(chunk))
243 243 pos = 0
244 244 while pos < len(chunk):
245 245 next = pos + 2 ** 20
246 246 yield chunk[pos:next]
247 247 pos = next
248 248 yield closechunk()
249 249
250 250 def _unpackmanifests(self, repo, revmap, trp, prog):
251 251 self.callback = prog.increment
252 252 # no need to check for empty manifest group here:
253 253 # if the result of the merge of 1 and 2 is the same in 3 and 4,
254 254 # no new manifest will be created and the manifest group will
255 255 # be empty during the pull
256 256 self.manifestheader()
257 257 deltas = self.deltaiter()
258 258 repo.manifestlog.getstorage(b'').addgroup(deltas, revmap, trp)
259 259 prog.complete()
260 260 self.callback = None
261 261
262 262 def apply(
263 263 self,
264 264 repo,
265 265 tr,
266 266 srctype,
267 267 url,
268 268 targetphase=phases.draft,
269 269 expectedtotal=None,
270 270 ):
271 271 """Add the changegroup returned by source.read() to this repo.
272 272 srctype is a string like 'push', 'pull', or 'unbundle'. url is
273 273 the URL of the repo where this changegroup is coming from.
274 274
275 275 Return an integer summarizing the change to this repo:
276 276 - nothing changed or no source: 0
277 277 - more heads than before: 1+added heads (2..n)
278 278 - fewer heads than before: -1-removed heads (-2..-n)
279 279 - number of heads stays the same: 1
280 280 """
281 281 repo = repo.unfiltered()
282 282
283 283 def csmap(x):
284 284 repo.ui.debug(b"add changeset %s\n" % short(x))
285 285 return len(cl)
286 286
287 287 def revmap(x):
288 288 return cl.rev(x)
289 289
290 290 changesets = 0
291 291
292 292 try:
293 293 # The transaction may already carry source information. In this
294 294 # case we use the top level data. We overwrite the argument
295 295 # because we need to use the top level value (if they exist)
296 296 # in this function.
297 297 srctype = tr.hookargs.setdefault(b'source', srctype)
298 298 tr.hookargs.setdefault(b'url', url)
299 299 repo.hook(
300 300 b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs)
301 301 )
302 302
303 303 # write changelog data to temp files so concurrent readers
304 304 # will not see an inconsistent view
305 305 cl = repo.changelog
306 306 cl.delayupdate(tr)
307 307 oldheads = set(cl.heads())
308 308
309 309 trp = weakref.proxy(tr)
310 310 # pull off the changeset group
311 311 repo.ui.status(_(b"adding changesets\n"))
312 312 clstart = len(cl)
313 313 progress = repo.ui.makeprogress(
314 314 _(b'changesets'), unit=_(b'chunks'), total=expectedtotal
315 315 )
316 316 self.callback = progress.increment
317 317
318 318 efilesset = set()
319 319
320 320 def onchangelog(cl, node):
321 321 efilesset.update(cl.readfiles(node))
322 322
323 323 self.changelogheader()
324 324 deltas = self.deltaiter()
325 325 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
326 326 efiles = len(efilesset)
327 327
328 328 if not cgnodes:
329 329 repo.ui.develwarn(
330 330 b'applied empty changelog from changegroup',
331 331 config=b'warn-empty-changegroup',
332 332 )
333 333 clend = len(cl)
334 334 changesets = clend - clstart
335 335 progress.complete()
336 336 self.callback = None
337 337
338 338 # pull off the manifest group
339 339 repo.ui.status(_(b"adding manifests\n"))
340 340 # We know that we'll never have more manifests than we had
341 341 # changesets.
342 342 progress = repo.ui.makeprogress(
343 343 _(b'manifests'), unit=_(b'chunks'), total=changesets
344 344 )
345 345 self._unpackmanifests(repo, revmap, trp, progress)
346 346
347 347 needfiles = {}
348 348 if repo.ui.configbool(b'server', b'validate'):
349 349 cl = repo.changelog
350 350 ml = repo.manifestlog
351 351 # validate incoming csets have their manifests
352 352 for cset in pycompat.xrange(clstart, clend):
353 353 mfnode = cl.changelogrevision(cset).manifest
354 354 mfest = ml[mfnode].readdelta()
355 355 # store file cgnodes we must see
356 356 for f, n in pycompat.iteritems(mfest):
357 357 needfiles.setdefault(f, set()).add(n)
358 358
359 359 # process the files
360 360 repo.ui.status(_(b"adding file changes\n"))
361 361 newrevs, newfiles = _addchangegroupfiles(
362 362 repo, self, revmap, trp, efiles, needfiles
363 363 )
364 364
365 365 # making sure the value exists
366 366 tr.changes.setdefault(b'changegroup-count-changesets', 0)
367 367 tr.changes.setdefault(b'changegroup-count-revisions', 0)
368 368 tr.changes.setdefault(b'changegroup-count-files', 0)
369 369 tr.changes.setdefault(b'changegroup-count-heads', 0)
370 370
371 371 # some code use bundle operation for internal purpose. They usually
372 372 # set `ui.quiet` to do this outside of user sight. Size the report
373 373 # of such operation now happens at the end of the transaction, that
374 374 # ui.quiet has not direct effect on the output.
375 375 #
376 376 # To preserve this intend use an inelegant hack, we fail to report
377 377 # the change if `quiet` is set. We should probably move to
378 378 # something better, but this is a good first step to allow the "end
379 379 # of transaction report" to pass tests.
380 380 if not repo.ui.quiet:
381 381 tr.changes[b'changegroup-count-changesets'] += changesets
382 382 tr.changes[b'changegroup-count-revisions'] += newrevs
383 383 tr.changes[b'changegroup-count-files'] += newfiles
384 384
385 385 deltaheads = 0
386 386 if oldheads:
387 387 heads = cl.heads()
388 388 deltaheads += len(heads) - len(oldheads)
389 389 for h in heads:
390 390 if h not in oldheads and repo[h].closesbranch():
391 391 deltaheads -= 1
392 392
393 393 # see previous comment about checking ui.quiet
394 394 if not repo.ui.quiet:
395 395 tr.changes[b'changegroup-count-heads'] += deltaheads
396 396 repo.invalidatevolatilesets()
397 397
398 398 if changesets > 0:
399 399 if b'node' not in tr.hookargs:
400 400 tr.hookargs[b'node'] = hex(cl.node(clstart))
401 401 tr.hookargs[b'node_last'] = hex(cl.node(clend - 1))
402 402 hookargs = dict(tr.hookargs)
403 403 else:
404 404 hookargs = dict(tr.hookargs)
405 405 hookargs[b'node'] = hex(cl.node(clstart))
406 406 hookargs[b'node_last'] = hex(cl.node(clend - 1))
407 407 repo.hook(
408 408 b'pretxnchangegroup',
409 409 throw=True,
410 410 **pycompat.strkwargs(hookargs)
411 411 )
412 412
413 413 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
414 414 phaseall = None
415 415 if srctype in (b'push', b'serve'):
416 416 # Old servers can not push the boundary themselves.
417 417 # New servers won't push the boundary if changeset already
418 418 # exists locally as secret
419 419 #
420 420 # We should not use added here but the list of all change in
421 421 # the bundle
422 422 if repo.publishing():
423 423 targetphase = phaseall = phases.public
424 424 else:
425 425 # closer target phase computation
426 426
427 427 # Those changesets have been pushed from the
428 428 # outside, their phases are going to be pushed
429 429 # alongside. Therefor `targetphase` is
430 430 # ignored.
431 431 targetphase = phaseall = phases.draft
432 432 if added:
433 433 phases.registernew(repo, tr, targetphase, added)
434 434 if phaseall is not None:
435 435 phases.advanceboundary(repo, tr, phaseall, cgnodes)
436 436
437 437 if changesets > 0:
438 438
439 def runhooks():
439 def runhooks(unused_success):
440 440 # These hooks run when the lock releases, not when the
441 441 # transaction closes. So it's possible for the changelog
442 442 # to have changed since we last saw it.
443 443 if clstart >= len(repo):
444 444 return
445 445
446 446 repo.hook(b"changegroup", **pycompat.strkwargs(hookargs))
447 447
448 448 for n in added:
449 449 args = hookargs.copy()
450 450 args[b'node'] = hex(n)
451 451 del args[b'node_last']
452 452 repo.hook(b"incoming", **pycompat.strkwargs(args))
453 453
454 454 newheads = [h for h in repo.heads() if h not in oldheads]
455 455 repo.ui.log(
456 456 b"incoming",
457 457 b"%d incoming changes - new heads: %s\n",
458 458 len(added),
459 459 b', '.join([hex(c[:6]) for c in newheads]),
460 460 )
461 461
462 462 tr.addpostclose(
463 463 b'changegroup-runhooks-%020i' % clstart,
464 464 lambda tr: repo._afterlock(runhooks),
465 465 )
466 466 finally:
467 467 repo.ui.flush()
468 468 # never return 0 here:
469 469 if deltaheads < 0:
470 470 ret = deltaheads - 1
471 471 else:
472 472 ret = deltaheads + 1
473 473 return ret
474 474
475 475 def deltaiter(self):
476 476 """
477 477 returns an iterator of the deltas in this changegroup
478 478
479 479 Useful for passing to the underlying storage system to be stored.
480 480 """
481 481 chain = None
482 482 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
483 483 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
484 484 yield chunkdata
485 485 chain = chunkdata[0]
486 486
487 487
488 488 class cg2unpacker(cg1unpacker):
489 489 """Unpacker for cg2 streams.
490 490
491 491 cg2 streams add support for generaldelta, so the delta header
492 492 format is slightly different. All other features about the data
493 493 remain the same.
494 494 """
495 495
496 496 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
497 497 deltaheadersize = deltaheader.size
498 498 version = b'02'
499 499
500 500 def _deltaheader(self, headertuple, prevnode):
501 501 node, p1, p2, deltabase, cs = headertuple
502 502 flags = 0
503 503 return node, p1, p2, deltabase, cs, flags
504 504
505 505
506 506 class cg3unpacker(cg2unpacker):
507 507 """Unpacker for cg3 streams.
508 508
509 509 cg3 streams add support for exchanging treemanifests and revlog
510 510 flags. It adds the revlog flags to the delta header and an empty chunk
511 511 separating manifests and files.
512 512 """
513 513
514 514 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
515 515 deltaheadersize = deltaheader.size
516 516 version = b'03'
517 517 _grouplistcount = 2 # One list of manifests and one list of files
518 518
519 519 def _deltaheader(self, headertuple, prevnode):
520 520 node, p1, p2, deltabase, cs, flags = headertuple
521 521 return node, p1, p2, deltabase, cs, flags
522 522
523 523 def _unpackmanifests(self, repo, revmap, trp, prog):
524 524 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
525 525 for chunkdata in iter(self.filelogheader, {}):
526 526 # If we get here, there are directory manifests in the changegroup
527 527 d = chunkdata[b"filename"]
528 528 repo.ui.debug(b"adding %s revisions\n" % d)
529 529 deltas = self.deltaiter()
530 530 if not repo.manifestlog.getstorage(d).addgroup(deltas, revmap, trp):
531 531 raise error.Abort(_(b"received dir revlog group is empty"))
532 532
533 533
534 534 class headerlessfixup(object):
535 535 def __init__(self, fh, h):
536 536 self._h = h
537 537 self._fh = fh
538 538
539 539 def read(self, n):
540 540 if self._h:
541 541 d, self._h = self._h[:n], self._h[n:]
542 542 if len(d) < n:
543 543 d += readexactly(self._fh, n - len(d))
544 544 return d
545 545 return readexactly(self._fh, n)
546 546
547 547
548 548 def _revisiondeltatochunks(delta, headerfn):
549 549 """Serialize a revisiondelta to changegroup chunks."""
550 550
551 551 # The captured revision delta may be encoded as a delta against
552 552 # a base revision or as a full revision. The changegroup format
553 553 # requires that everything on the wire be deltas. So for full
554 554 # revisions, we need to invent a header that says to rewrite
555 555 # data.
556 556
557 557 if delta.delta is not None:
558 558 prefix, data = b'', delta.delta
559 559 elif delta.basenode == nullid:
560 560 data = delta.revision
561 561 prefix = mdiff.trivialdiffheader(len(data))
562 562 else:
563 563 data = delta.revision
564 564 prefix = mdiff.replacediffheader(delta.baserevisionsize, len(data))
565 565
566 566 meta = headerfn(delta)
567 567
568 568 yield chunkheader(len(meta) + len(prefix) + len(data))
569 569 yield meta
570 570 if prefix:
571 571 yield prefix
572 572 yield data
573 573
574 574
575 575 def _sortnodesellipsis(store, nodes, cl, lookup):
576 576 """Sort nodes for changegroup generation."""
577 577 # Ellipses serving mode.
578 578 #
579 579 # In a perfect world, we'd generate better ellipsis-ified graphs
580 580 # for non-changelog revlogs. In practice, we haven't started doing
581 581 # that yet, so the resulting DAGs for the manifestlog and filelogs
582 582 # are actually full of bogus parentage on all the ellipsis
583 583 # nodes. This has the side effect that, while the contents are
584 584 # correct, the individual DAGs might be completely out of whack in
585 585 # a case like 882681bc3166 and its ancestors (back about 10
586 586 # revisions or so) in the main hg repo.
587 587 #
588 588 # The one invariant we *know* holds is that the new (potentially
589 589 # bogus) DAG shape will be valid if we order the nodes in the
590 590 # order that they're introduced in dramatis personae by the
591 591 # changelog, so what we do is we sort the non-changelog histories
592 592 # by the order in which they are used by the changelog.
593 593 key = lambda n: cl.rev(lookup(n))
594 594 return sorted(nodes, key=key)
595 595
596 596
597 597 def _resolvenarrowrevisioninfo(
598 598 cl,
599 599 store,
600 600 ischangelog,
601 601 rev,
602 602 linkrev,
603 603 linknode,
604 604 clrevtolocalrev,
605 605 fullclnodes,
606 606 precomputedellipsis,
607 607 ):
608 608 linkparents = precomputedellipsis[linkrev]
609 609
610 610 def local(clrev):
611 611 """Turn a changelog revnum into a local revnum.
612 612
613 613 The ellipsis dag is stored as revnums on the changelog,
614 614 but when we're producing ellipsis entries for
615 615 non-changelog revlogs, we need to turn those numbers into
616 616 something local. This does that for us, and during the
617 617 changelog sending phase will also expand the stored
618 618 mappings as needed.
619 619 """
620 620 if clrev == nullrev:
621 621 return nullrev
622 622
623 623 if ischangelog:
624 624 return clrev
625 625
626 626 # Walk the ellipsis-ized changelog breadth-first looking for a
627 627 # change that has been linked from the current revlog.
628 628 #
629 629 # For a flat manifest revlog only a single step should be necessary
630 630 # as all relevant changelog entries are relevant to the flat
631 631 # manifest.
632 632 #
633 633 # For a filelog or tree manifest dirlog however not every changelog
634 634 # entry will have been relevant, so we need to skip some changelog
635 635 # nodes even after ellipsis-izing.
636 636 walk = [clrev]
637 637 while walk:
638 638 p = walk[0]
639 639 walk = walk[1:]
640 640 if p in clrevtolocalrev:
641 641 return clrevtolocalrev[p]
642 642 elif p in fullclnodes:
643 643 walk.extend([pp for pp in cl.parentrevs(p) if pp != nullrev])
644 644 elif p in precomputedellipsis:
645 645 walk.extend(
646 646 [pp for pp in precomputedellipsis[p] if pp != nullrev]
647 647 )
648 648 else:
649 649 # In this case, we've got an ellipsis with parents
650 650 # outside the current bundle (likely an
651 651 # incremental pull). We "know" that we can use the
652 652 # value of this same revlog at whatever revision
653 653 # is pointed to by linknode. "Know" is in scare
654 654 # quotes because I haven't done enough examination
655 655 # of edge cases to convince myself this is really
656 656 # a fact - it works for all the (admittedly
657 657 # thorough) cases in our testsuite, but I would be
658 658 # somewhat unsurprised to find a case in the wild
659 659 # where this breaks down a bit. That said, I don't
660 660 # know if it would hurt anything.
661 661 for i in pycompat.xrange(rev, 0, -1):
662 662 if store.linkrev(i) == clrev:
663 663 return i
664 664 # We failed to resolve a parent for this node, so
665 665 # we crash the changegroup construction.
666 666 raise error.Abort(
667 667 b'unable to resolve parent while packing %r %r'
668 668 b' for changeset %r' % (store.indexfile, rev, clrev)
669 669 )
670 670
671 671 return nullrev
672 672
673 673 if not linkparents or (store.parentrevs(rev) == (nullrev, nullrev)):
674 674 p1, p2 = nullrev, nullrev
675 675 elif len(linkparents) == 1:
676 676 (p1,) = sorted(local(p) for p in linkparents)
677 677 p2 = nullrev
678 678 else:
679 679 p1, p2 = sorted(local(p) for p in linkparents)
680 680
681 681 p1node, p2node = store.node(p1), store.node(p2)
682 682
683 683 return p1node, p2node, linknode
684 684
685 685
686 686 def deltagroup(
687 687 repo,
688 688 store,
689 689 nodes,
690 690 ischangelog,
691 691 lookup,
692 692 forcedeltaparentprev,
693 693 topic=None,
694 694 ellipses=False,
695 695 clrevtolocalrev=None,
696 696 fullclnodes=None,
697 697 precomputedellipsis=None,
698 698 ):
699 699 """Calculate deltas for a set of revisions.
700 700
701 701 Is a generator of ``revisiondelta`` instances.
702 702
703 703 If topic is not None, progress detail will be generated using this
704 704 topic name (e.g. changesets, manifests, etc).
705 705 """
706 706 if not nodes:
707 707 return
708 708
709 709 cl = repo.changelog
710 710
711 711 if ischangelog:
712 712 # `hg log` shows changesets in storage order. To preserve order
713 713 # across clones, send out changesets in storage order.
714 714 nodesorder = b'storage'
715 715 elif ellipses:
716 716 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
717 717 nodesorder = b'nodes'
718 718 else:
719 719 nodesorder = None
720 720
721 721 # Perform ellipses filtering and revision massaging. We do this before
722 722 # emitrevisions() because a) filtering out revisions creates less work
723 723 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
724 724 # assumptions about delta choices and we would possibly send a delta
725 725 # referencing a missing base revision.
726 726 #
727 727 # Also, calling lookup() has side-effects with regards to populating
728 728 # data structures. If we don't call lookup() for each node or if we call
729 729 # lookup() after the first pass through each node, things can break -
730 730 # possibly intermittently depending on the python hash seed! For that
731 731 # reason, we store a mapping of all linknodes during the initial node
732 732 # pass rather than use lookup() on the output side.
733 733 if ellipses:
734 734 filtered = []
735 735 adjustedparents = {}
736 736 linknodes = {}
737 737
738 738 for node in nodes:
739 739 rev = store.rev(node)
740 740 linknode = lookup(node)
741 741 linkrev = cl.rev(linknode)
742 742 clrevtolocalrev[linkrev] = rev
743 743
744 744 # If linknode is in fullclnodes, it means the corresponding
745 745 # changeset was a full changeset and is being sent unaltered.
746 746 if linknode in fullclnodes:
747 747 linknodes[node] = linknode
748 748
749 749 # If the corresponding changeset wasn't in the set computed
750 750 # as relevant to us, it should be dropped outright.
751 751 elif linkrev not in precomputedellipsis:
752 752 continue
753 753
754 754 else:
755 755 # We could probably do this later and avoid the dict
756 756 # holding state. But it likely doesn't matter.
757 757 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
758 758 cl,
759 759 store,
760 760 ischangelog,
761 761 rev,
762 762 linkrev,
763 763 linknode,
764 764 clrevtolocalrev,
765 765 fullclnodes,
766 766 precomputedellipsis,
767 767 )
768 768
769 769 adjustedparents[node] = (p1node, p2node)
770 770 linknodes[node] = linknode
771 771
772 772 filtered.append(node)
773 773
774 774 nodes = filtered
775 775
776 776 # We expect the first pass to be fast, so we only engage the progress
777 777 # meter for constructing the revision deltas.
778 778 progress = None
779 779 if topic is not None:
780 780 progress = repo.ui.makeprogress(
781 781 topic, unit=_(b'chunks'), total=len(nodes)
782 782 )
783 783
784 784 configtarget = repo.ui.config(b'devel', b'bundle.delta')
785 785 if configtarget not in (b'', b'p1', b'full'):
786 786 msg = _("""config "devel.bundle.delta" as unknown value: %s""")
787 787 repo.ui.warn(msg % configtarget)
788 788
789 789 deltamode = repository.CG_DELTAMODE_STD
790 790 if forcedeltaparentprev:
791 791 deltamode = repository.CG_DELTAMODE_PREV
792 792 elif configtarget == b'p1':
793 793 deltamode = repository.CG_DELTAMODE_P1
794 794 elif configtarget == b'full':
795 795 deltamode = repository.CG_DELTAMODE_FULL
796 796
797 797 revisions = store.emitrevisions(
798 798 nodes,
799 799 nodesorder=nodesorder,
800 800 revisiondata=True,
801 801 assumehaveparentrevisions=not ellipses,
802 802 deltamode=deltamode,
803 803 )
804 804
805 805 for i, revision in enumerate(revisions):
806 806 if progress:
807 807 progress.update(i + 1)
808 808
809 809 if ellipses:
810 810 linknode = linknodes[revision.node]
811 811
812 812 if revision.node in adjustedparents:
813 813 p1node, p2node = adjustedparents[revision.node]
814 814 revision.p1node = p1node
815 815 revision.p2node = p2node
816 816 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
817 817
818 818 else:
819 819 linknode = lookup(revision.node)
820 820
821 821 revision.linknode = linknode
822 822 yield revision
823 823
824 824 if progress:
825 825 progress.complete()
826 826
827 827
828 828 class cgpacker(object):
829 829 def __init__(
830 830 self,
831 831 repo,
832 832 oldmatcher,
833 833 matcher,
834 834 version,
835 835 builddeltaheader,
836 836 manifestsend,
837 837 forcedeltaparentprev=False,
838 838 bundlecaps=None,
839 839 ellipses=False,
840 840 shallow=False,
841 841 ellipsisroots=None,
842 842 fullnodes=None,
843 843 ):
844 844 """Given a source repo, construct a bundler.
845 845
846 846 oldmatcher is a matcher that matches on files the client already has.
847 847 These will not be included in the changegroup.
848 848
849 849 matcher is a matcher that matches on files to include in the
850 850 changegroup. Used to facilitate sparse changegroups.
851 851
852 852 forcedeltaparentprev indicates whether delta parents must be against
853 853 the previous revision in a delta group. This should only be used for
854 854 compatibility with changegroup version 1.
855 855
856 856 builddeltaheader is a callable that constructs the header for a group
857 857 delta.
858 858
859 859 manifestsend is a chunk to send after manifests have been fully emitted.
860 860
861 861 ellipses indicates whether ellipsis serving mode is enabled.
862 862
863 863 bundlecaps is optional and can be used to specify the set of
864 864 capabilities which can be used to build the bundle. While bundlecaps is
865 865 unused in core Mercurial, extensions rely on this feature to communicate
866 866 capabilities to customize the changegroup packer.
867 867
868 868 shallow indicates whether shallow data might be sent. The packer may
869 869 need to pack file contents not introduced by the changes being packed.
870 870
871 871 fullnodes is the set of changelog nodes which should not be ellipsis
872 872 nodes. We store this rather than the set of nodes that should be
873 873 ellipsis because for very large histories we expect this to be
874 874 significantly smaller.
875 875 """
876 876 assert oldmatcher
877 877 assert matcher
878 878 self._oldmatcher = oldmatcher
879 879 self._matcher = matcher
880 880
881 881 self.version = version
882 882 self._forcedeltaparentprev = forcedeltaparentprev
883 883 self._builddeltaheader = builddeltaheader
884 884 self._manifestsend = manifestsend
885 885 self._ellipses = ellipses
886 886
887 887 # Set of capabilities we can use to build the bundle.
888 888 if bundlecaps is None:
889 889 bundlecaps = set()
890 890 self._bundlecaps = bundlecaps
891 891 self._isshallow = shallow
892 892 self._fullclnodes = fullnodes
893 893
894 894 # Maps ellipsis revs to their roots at the changelog level.
895 895 self._precomputedellipsis = ellipsisroots
896 896
897 897 self._repo = repo
898 898
899 899 if self._repo.ui.verbose and not self._repo.ui.debugflag:
900 900 self._verbosenote = self._repo.ui.note
901 901 else:
902 902 self._verbosenote = lambda s: None
903 903
904 904 def generate(
905 905 self, commonrevs, clnodes, fastpathlinkrev, source, changelog=True
906 906 ):
907 907 """Yield a sequence of changegroup byte chunks.
908 908 If changelog is False, changelog data won't be added to changegroup
909 909 """
910 910
911 911 repo = self._repo
912 912 cl = repo.changelog
913 913
914 914 self._verbosenote(_(b'uncompressed size of bundle content:\n'))
915 915 size = 0
916 916
917 917 clstate, deltas = self._generatechangelog(
918 918 cl, clnodes, generate=changelog
919 919 )
920 920 for delta in deltas:
921 921 for chunk in _revisiondeltatochunks(delta, self._builddeltaheader):
922 922 size += len(chunk)
923 923 yield chunk
924 924
925 925 close = closechunk()
926 926 size += len(close)
927 927 yield closechunk()
928 928
929 929 self._verbosenote(_(b'%8.i (changelog)\n') % size)
930 930
931 931 clrevorder = clstate[b'clrevorder']
932 932 manifests = clstate[b'manifests']
933 933 changedfiles = clstate[b'changedfiles']
934 934
935 935 # We need to make sure that the linkrev in the changegroup refers to
936 936 # the first changeset that introduced the manifest or file revision.
937 937 # The fastpath is usually safer than the slowpath, because the filelogs
938 938 # are walked in revlog order.
939 939 #
940 940 # When taking the slowpath when the manifest revlog uses generaldelta,
941 941 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
942 942 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
943 943 #
944 944 # When taking the fastpath, we are only vulnerable to reordering
945 945 # of the changelog itself. The changelog never uses generaldelta and is
946 946 # never reordered. To handle this case, we simply take the slowpath,
947 947 # which already has the 'clrevorder' logic. This was also fixed in
948 948 # cc0ff93d0c0c.
949 949
950 950 # Treemanifests don't work correctly with fastpathlinkrev
951 951 # either, because we don't discover which directory nodes to
952 952 # send along with files. This could probably be fixed.
953 953 fastpathlinkrev = fastpathlinkrev and (
954 954 b'treemanifest' not in repo.requirements
955 955 )
956 956
957 957 fnodes = {} # needed file nodes
958 958
959 959 size = 0
960 960 it = self.generatemanifests(
961 961 commonrevs,
962 962 clrevorder,
963 963 fastpathlinkrev,
964 964 manifests,
965 965 fnodes,
966 966 source,
967 967 clstate[b'clrevtomanifestrev'],
968 968 )
969 969
970 970 for tree, deltas in it:
971 971 if tree:
972 972 assert self.version == b'03'
973 973 chunk = _fileheader(tree)
974 974 size += len(chunk)
975 975 yield chunk
976 976
977 977 for delta in deltas:
978 978 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
979 979 for chunk in chunks:
980 980 size += len(chunk)
981 981 yield chunk
982 982
983 983 close = closechunk()
984 984 size += len(close)
985 985 yield close
986 986
987 987 self._verbosenote(_(b'%8.i (manifests)\n') % size)
988 988 yield self._manifestsend
989 989
990 990 mfdicts = None
991 991 if self._ellipses and self._isshallow:
992 992 mfdicts = [
993 993 (self._repo.manifestlog[n].read(), lr)
994 994 for (n, lr) in pycompat.iteritems(manifests)
995 995 ]
996 996
997 997 manifests.clear()
998 998 clrevs = set(cl.rev(x) for x in clnodes)
999 999
1000 1000 it = self.generatefiles(
1001 1001 changedfiles,
1002 1002 commonrevs,
1003 1003 source,
1004 1004 mfdicts,
1005 1005 fastpathlinkrev,
1006 1006 fnodes,
1007 1007 clrevs,
1008 1008 )
1009 1009
1010 1010 for path, deltas in it:
1011 1011 h = _fileheader(path)
1012 1012 size = len(h)
1013 1013 yield h
1014 1014
1015 1015 for delta in deltas:
1016 1016 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
1017 1017 for chunk in chunks:
1018 1018 size += len(chunk)
1019 1019 yield chunk
1020 1020
1021 1021 close = closechunk()
1022 1022 size += len(close)
1023 1023 yield close
1024 1024
1025 1025 self._verbosenote(_(b'%8.i %s\n') % (size, path))
1026 1026
1027 1027 yield closechunk()
1028 1028
1029 1029 if clnodes:
1030 1030 repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
1031 1031
1032 1032 def _generatechangelog(self, cl, nodes, generate=True):
1033 1033 """Generate data for changelog chunks.
1034 1034
1035 1035 Returns a 2-tuple of a dict containing state and an iterable of
1036 1036 byte chunks. The state will not be fully populated until the
1037 1037 chunk stream has been fully consumed.
1038 1038
1039 1039 if generate is False, the state will be fully populated and no chunk
1040 1040 stream will be yielded
1041 1041 """
1042 1042 clrevorder = {}
1043 1043 manifests = {}
1044 1044 mfl = self._repo.manifestlog
1045 1045 changedfiles = set()
1046 1046 clrevtomanifestrev = {}
1047 1047
1048 1048 state = {
1049 1049 b'clrevorder': clrevorder,
1050 1050 b'manifests': manifests,
1051 1051 b'changedfiles': changedfiles,
1052 1052 b'clrevtomanifestrev': clrevtomanifestrev,
1053 1053 }
1054 1054
1055 1055 if not (generate or self._ellipses):
1056 1056 # sort the nodes in storage order
1057 1057 nodes = sorted(nodes, key=cl.rev)
1058 1058 for node in nodes:
1059 1059 c = cl.changelogrevision(node)
1060 1060 clrevorder[node] = len(clrevorder)
1061 1061 # record the first changeset introducing this manifest version
1062 1062 manifests.setdefault(c.manifest, node)
1063 1063 # Record a complete list of potentially-changed files in
1064 1064 # this manifest.
1065 1065 changedfiles.update(c.files)
1066 1066
1067 1067 return state, ()
1068 1068
1069 1069 # Callback for the changelog, used to collect changed files and
1070 1070 # manifest nodes.
1071 1071 # Returns the linkrev node (identity in the changelog case).
1072 1072 def lookupcl(x):
1073 1073 c = cl.changelogrevision(x)
1074 1074 clrevorder[x] = len(clrevorder)
1075 1075
1076 1076 if self._ellipses:
1077 1077 # Only update manifests if x is going to be sent. Otherwise we
1078 1078 # end up with bogus linkrevs specified for manifests and
1079 1079 # we skip some manifest nodes that we should otherwise
1080 1080 # have sent.
1081 1081 if (
1082 1082 x in self._fullclnodes
1083 1083 or cl.rev(x) in self._precomputedellipsis
1084 1084 ):
1085 1085
1086 1086 manifestnode = c.manifest
1087 1087 # Record the first changeset introducing this manifest
1088 1088 # version.
1089 1089 manifests.setdefault(manifestnode, x)
1090 1090 # Set this narrow-specific dict so we have the lowest
1091 1091 # manifest revnum to look up for this cl revnum. (Part of
1092 1092 # mapping changelog ellipsis parents to manifest ellipsis
1093 1093 # parents)
1094 1094 clrevtomanifestrev.setdefault(
1095 1095 cl.rev(x), mfl.rev(manifestnode)
1096 1096 )
1097 1097 # We can't trust the changed files list in the changeset if the
1098 1098 # client requested a shallow clone.
1099 1099 if self._isshallow:
1100 1100 changedfiles.update(mfl[c.manifest].read().keys())
1101 1101 else:
1102 1102 changedfiles.update(c.files)
1103 1103 else:
1104 1104 # record the first changeset introducing this manifest version
1105 1105 manifests.setdefault(c.manifest, x)
1106 1106 # Record a complete list of potentially-changed files in
1107 1107 # this manifest.
1108 1108 changedfiles.update(c.files)
1109 1109
1110 1110 return x
1111 1111
1112 1112 gen = deltagroup(
1113 1113 self._repo,
1114 1114 cl,
1115 1115 nodes,
1116 1116 True,
1117 1117 lookupcl,
1118 1118 self._forcedeltaparentprev,
1119 1119 ellipses=self._ellipses,
1120 1120 topic=_(b'changesets'),
1121 1121 clrevtolocalrev={},
1122 1122 fullclnodes=self._fullclnodes,
1123 1123 precomputedellipsis=self._precomputedellipsis,
1124 1124 )
1125 1125
1126 1126 return state, gen
1127 1127
1128 1128 def generatemanifests(
1129 1129 self,
1130 1130 commonrevs,
1131 1131 clrevorder,
1132 1132 fastpathlinkrev,
1133 1133 manifests,
1134 1134 fnodes,
1135 1135 source,
1136 1136 clrevtolocalrev,
1137 1137 ):
1138 1138 """Returns an iterator of changegroup chunks containing manifests.
1139 1139
1140 1140 `source` is unused here, but is used by extensions like remotefilelog to
1141 1141 change what is sent based in pulls vs pushes, etc.
1142 1142 """
1143 1143 repo = self._repo
1144 1144 mfl = repo.manifestlog
1145 1145 tmfnodes = {b'': manifests}
1146 1146
1147 1147 # Callback for the manifest, used to collect linkrevs for filelog
1148 1148 # revisions.
1149 1149 # Returns the linkrev node (collected in lookupcl).
1150 1150 def makelookupmflinknode(tree, nodes):
1151 1151 if fastpathlinkrev:
1152 1152 assert not tree
1153 1153 return (
1154 1154 manifests.__getitem__ # pytype: disable=unsupported-operands
1155 1155 )
1156 1156
1157 1157 def lookupmflinknode(x):
1158 1158 """Callback for looking up the linknode for manifests.
1159 1159
1160 1160 Returns the linkrev node for the specified manifest.
1161 1161
1162 1162 SIDE EFFECT:
1163 1163
1164 1164 1) fclnodes gets populated with the list of relevant
1165 1165 file nodes if we're not using fastpathlinkrev
1166 1166 2) When treemanifests are in use, collects treemanifest nodes
1167 1167 to send
1168 1168
1169 1169 Note that this means manifests must be completely sent to
1170 1170 the client before you can trust the list of files and
1171 1171 treemanifests to send.
1172 1172 """
1173 1173 clnode = nodes[x]
1174 1174 mdata = mfl.get(tree, x).readfast(shallow=True)
1175 1175 for p, n, fl in mdata.iterentries():
1176 1176 if fl == b't': # subdirectory manifest
1177 1177 subtree = tree + p + b'/'
1178 1178 tmfclnodes = tmfnodes.setdefault(subtree, {})
1179 1179 tmfclnode = tmfclnodes.setdefault(n, clnode)
1180 1180 if clrevorder[clnode] < clrevorder[tmfclnode]:
1181 1181 tmfclnodes[n] = clnode
1182 1182 else:
1183 1183 f = tree + p
1184 1184 fclnodes = fnodes.setdefault(f, {})
1185 1185 fclnode = fclnodes.setdefault(n, clnode)
1186 1186 if clrevorder[clnode] < clrevorder[fclnode]:
1187 1187 fclnodes[n] = clnode
1188 1188 return clnode
1189 1189
1190 1190 return lookupmflinknode
1191 1191
1192 1192 while tmfnodes:
1193 1193 tree, nodes = tmfnodes.popitem()
1194 1194
1195 1195 should_visit = self._matcher.visitdir(tree[:-1])
1196 1196 if tree and not should_visit:
1197 1197 continue
1198 1198
1199 1199 store = mfl.getstorage(tree)
1200 1200
1201 1201 if not should_visit:
1202 1202 # No nodes to send because this directory is out of
1203 1203 # the client's view of the repository (probably
1204 1204 # because of narrow clones). Do this even for the root
1205 1205 # directory (tree=='')
1206 1206 prunednodes = []
1207 1207 else:
1208 1208 # Avoid sending any manifest nodes we can prove the
1209 1209 # client already has by checking linkrevs. See the
1210 1210 # related comment in generatefiles().
1211 1211 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1212 1212
1213 1213 if tree and not prunednodes:
1214 1214 continue
1215 1215
1216 1216 lookupfn = makelookupmflinknode(tree, nodes)
1217 1217
1218 1218 deltas = deltagroup(
1219 1219 self._repo,
1220 1220 store,
1221 1221 prunednodes,
1222 1222 False,
1223 1223 lookupfn,
1224 1224 self._forcedeltaparentprev,
1225 1225 ellipses=self._ellipses,
1226 1226 topic=_(b'manifests'),
1227 1227 clrevtolocalrev=clrevtolocalrev,
1228 1228 fullclnodes=self._fullclnodes,
1229 1229 precomputedellipsis=self._precomputedellipsis,
1230 1230 )
1231 1231
1232 1232 if not self._oldmatcher.visitdir(store.tree[:-1]):
1233 1233 yield tree, deltas
1234 1234 else:
1235 1235 # 'deltas' is a generator and we need to consume it even if
1236 1236 # we are not going to send it because a side-effect is that
1237 1237 # it updates tmdnodes (via lookupfn)
1238 1238 for d in deltas:
1239 1239 pass
1240 1240 if not tree:
1241 1241 yield tree, []
1242 1242
1243 1243 def _prunemanifests(self, store, nodes, commonrevs):
1244 1244 if not self._ellipses:
1245 1245 # In non-ellipses case and large repositories, it is better to
1246 1246 # prevent calling of store.rev and store.linkrev on a lot of
1247 1247 # nodes as compared to sending some extra data
1248 1248 return nodes.copy()
1249 1249 # This is split out as a separate method to allow filtering
1250 1250 # commonrevs in extension code.
1251 1251 #
1252 1252 # TODO(augie): this shouldn't be required, instead we should
1253 1253 # make filtering of revisions to send delegated to the store
1254 1254 # layer.
1255 1255 frev, flr = store.rev, store.linkrev
1256 1256 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1257 1257
1258 1258 # The 'source' parameter is useful for extensions
1259 1259 def generatefiles(
1260 1260 self,
1261 1261 changedfiles,
1262 1262 commonrevs,
1263 1263 source,
1264 1264 mfdicts,
1265 1265 fastpathlinkrev,
1266 1266 fnodes,
1267 1267 clrevs,
1268 1268 ):
1269 1269 changedfiles = [
1270 1270 f
1271 1271 for f in changedfiles
1272 1272 if self._matcher(f) and not self._oldmatcher(f)
1273 1273 ]
1274 1274
1275 1275 if not fastpathlinkrev:
1276 1276
1277 1277 def normallinknodes(unused, fname):
1278 1278 return fnodes.get(fname, {})
1279 1279
1280 1280 else:
1281 1281 cln = self._repo.changelog.node
1282 1282
1283 1283 def normallinknodes(store, fname):
1284 1284 flinkrev = store.linkrev
1285 1285 fnode = store.node
1286 1286 revs = ((r, flinkrev(r)) for r in store)
1287 1287 return dict(
1288 1288 (fnode(r), cln(lr)) for r, lr in revs if lr in clrevs
1289 1289 )
1290 1290
1291 1291 clrevtolocalrev = {}
1292 1292
1293 1293 if self._isshallow:
1294 1294 # In a shallow clone, the linknodes callback needs to also include
1295 1295 # those file nodes that are in the manifests we sent but weren't
1296 1296 # introduced by those manifests.
1297 1297 commonctxs = [self._repo[c] for c in commonrevs]
1298 1298 clrev = self._repo.changelog.rev
1299 1299
1300 1300 def linknodes(flog, fname):
1301 1301 for c in commonctxs:
1302 1302 try:
1303 1303 fnode = c.filenode(fname)
1304 1304 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1305 1305 except error.ManifestLookupError:
1306 1306 pass
1307 1307 links = normallinknodes(flog, fname)
1308 1308 if len(links) != len(mfdicts):
1309 1309 for mf, lr in mfdicts:
1310 1310 fnode = mf.get(fname, None)
1311 1311 if fnode in links:
1312 1312 links[fnode] = min(links[fnode], lr, key=clrev)
1313 1313 elif fnode:
1314 1314 links[fnode] = lr
1315 1315 return links
1316 1316
1317 1317 else:
1318 1318 linknodes = normallinknodes
1319 1319
1320 1320 repo = self._repo
1321 1321 progress = repo.ui.makeprogress(
1322 1322 _(b'files'), unit=_(b'files'), total=len(changedfiles)
1323 1323 )
1324 1324 for i, fname in enumerate(sorted(changedfiles)):
1325 1325 filerevlog = repo.file(fname)
1326 1326 if not filerevlog:
1327 1327 raise error.Abort(
1328 1328 _(b"empty or missing file data for %s") % fname
1329 1329 )
1330 1330
1331 1331 clrevtolocalrev.clear()
1332 1332
1333 1333 linkrevnodes = linknodes(filerevlog, fname)
1334 1334 # Lookup for filenodes, we collected the linkrev nodes above in the
1335 1335 # fastpath case and with lookupmf in the slowpath case.
1336 1336 def lookupfilelog(x):
1337 1337 return linkrevnodes[x]
1338 1338
1339 1339 frev, flr = filerevlog.rev, filerevlog.linkrev
1340 1340 # Skip sending any filenode we know the client already
1341 1341 # has. This avoids over-sending files relatively
1342 1342 # inexpensively, so it's not a problem if we under-filter
1343 1343 # here.
1344 1344 filenodes = [
1345 1345 n for n in linkrevnodes if flr(frev(n)) not in commonrevs
1346 1346 ]
1347 1347
1348 1348 if not filenodes:
1349 1349 continue
1350 1350
1351 1351 progress.update(i + 1, item=fname)
1352 1352
1353 1353 deltas = deltagroup(
1354 1354 self._repo,
1355 1355 filerevlog,
1356 1356 filenodes,
1357 1357 False,
1358 1358 lookupfilelog,
1359 1359 self._forcedeltaparentprev,
1360 1360 ellipses=self._ellipses,
1361 1361 clrevtolocalrev=clrevtolocalrev,
1362 1362 fullclnodes=self._fullclnodes,
1363 1363 precomputedellipsis=self._precomputedellipsis,
1364 1364 )
1365 1365
1366 1366 yield fname, deltas
1367 1367
1368 1368 progress.complete()
1369 1369
1370 1370
1371 1371 def _makecg1packer(
1372 1372 repo,
1373 1373 oldmatcher,
1374 1374 matcher,
1375 1375 bundlecaps,
1376 1376 ellipses=False,
1377 1377 shallow=False,
1378 1378 ellipsisroots=None,
1379 1379 fullnodes=None,
1380 1380 ):
1381 1381 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1382 1382 d.node, d.p1node, d.p2node, d.linknode
1383 1383 )
1384 1384
1385 1385 return cgpacker(
1386 1386 repo,
1387 1387 oldmatcher,
1388 1388 matcher,
1389 1389 b'01',
1390 1390 builddeltaheader=builddeltaheader,
1391 1391 manifestsend=b'',
1392 1392 forcedeltaparentprev=True,
1393 1393 bundlecaps=bundlecaps,
1394 1394 ellipses=ellipses,
1395 1395 shallow=shallow,
1396 1396 ellipsisroots=ellipsisroots,
1397 1397 fullnodes=fullnodes,
1398 1398 )
1399 1399
1400 1400
1401 1401 def _makecg2packer(
1402 1402 repo,
1403 1403 oldmatcher,
1404 1404 matcher,
1405 1405 bundlecaps,
1406 1406 ellipses=False,
1407 1407 shallow=False,
1408 1408 ellipsisroots=None,
1409 1409 fullnodes=None,
1410 1410 ):
1411 1411 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1412 1412 d.node, d.p1node, d.p2node, d.basenode, d.linknode
1413 1413 )
1414 1414
1415 1415 return cgpacker(
1416 1416 repo,
1417 1417 oldmatcher,
1418 1418 matcher,
1419 1419 b'02',
1420 1420 builddeltaheader=builddeltaheader,
1421 1421 manifestsend=b'',
1422 1422 bundlecaps=bundlecaps,
1423 1423 ellipses=ellipses,
1424 1424 shallow=shallow,
1425 1425 ellipsisroots=ellipsisroots,
1426 1426 fullnodes=fullnodes,
1427 1427 )
1428 1428
1429 1429
1430 1430 def _makecg3packer(
1431 1431 repo,
1432 1432 oldmatcher,
1433 1433 matcher,
1434 1434 bundlecaps,
1435 1435 ellipses=False,
1436 1436 shallow=False,
1437 1437 ellipsisroots=None,
1438 1438 fullnodes=None,
1439 1439 ):
1440 1440 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1441 1441 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1442 1442 )
1443 1443
1444 1444 return cgpacker(
1445 1445 repo,
1446 1446 oldmatcher,
1447 1447 matcher,
1448 1448 b'03',
1449 1449 builddeltaheader=builddeltaheader,
1450 1450 manifestsend=closechunk(),
1451 1451 bundlecaps=bundlecaps,
1452 1452 ellipses=ellipses,
1453 1453 shallow=shallow,
1454 1454 ellipsisroots=ellipsisroots,
1455 1455 fullnodes=fullnodes,
1456 1456 )
1457 1457
1458 1458
1459 1459 _packermap = {
1460 1460 b'01': (_makecg1packer, cg1unpacker),
1461 1461 # cg2 adds support for exchanging generaldelta
1462 1462 b'02': (_makecg2packer, cg2unpacker),
1463 1463 # cg3 adds support for exchanging revlog flags and treemanifests
1464 1464 b'03': (_makecg3packer, cg3unpacker),
1465 1465 }
1466 1466
1467 1467
1468 1468 def allsupportedversions(repo):
1469 1469 versions = set(_packermap.keys())
1470 1470 needv03 = False
1471 1471 if (
1472 1472 repo.ui.configbool(b'experimental', b'changegroup3')
1473 1473 or repo.ui.configbool(b'experimental', b'treemanifest')
1474 1474 or b'treemanifest' in repo.requirements
1475 1475 ):
1476 1476 # we keep version 03 because we need to to exchange treemanifest data
1477 1477 #
1478 1478 # we also keep vresion 01 and 02, because it is possible for repo to
1479 1479 # contains both normal and tree manifest at the same time. so using
1480 1480 # older version to pull data is viable
1481 1481 #
1482 1482 # (or even to push subset of history)
1483 1483 needv03 = True
1484 1484 if b'exp-sidedata-flag' in repo.requirements:
1485 1485 needv03 = True
1486 1486 # don't attempt to use 01/02 until we do sidedata cleaning
1487 1487 versions.discard(b'01')
1488 1488 versions.discard(b'02')
1489 1489 if not needv03:
1490 1490 versions.discard(b'03')
1491 1491 return versions
1492 1492
1493 1493
1494 1494 # Changegroup versions that can be applied to the repo
1495 1495 def supportedincomingversions(repo):
1496 1496 return allsupportedversions(repo)
1497 1497
1498 1498
1499 1499 # Changegroup versions that can be created from the repo
1500 1500 def supportedoutgoingversions(repo):
1501 1501 versions = allsupportedversions(repo)
1502 1502 if b'treemanifest' in repo.requirements:
1503 1503 # Versions 01 and 02 support only flat manifests and it's just too
1504 1504 # expensive to convert between the flat manifest and tree manifest on
1505 1505 # the fly. Since tree manifests are hashed differently, all of history
1506 1506 # would have to be converted. Instead, we simply don't even pretend to
1507 1507 # support versions 01 and 02.
1508 1508 versions.discard(b'01')
1509 1509 versions.discard(b'02')
1510 1510 if repository.NARROW_REQUIREMENT in repo.requirements:
1511 1511 # Versions 01 and 02 don't support revlog flags, and we need to
1512 1512 # support that for stripping and unbundling to work.
1513 1513 versions.discard(b'01')
1514 1514 versions.discard(b'02')
1515 1515 if LFS_REQUIREMENT in repo.requirements:
1516 1516 # Versions 01 and 02 don't support revlog flags, and we need to
1517 1517 # mark LFS entries with REVIDX_EXTSTORED.
1518 1518 versions.discard(b'01')
1519 1519 versions.discard(b'02')
1520 1520
1521 1521 return versions
1522 1522
1523 1523
1524 1524 def localversion(repo):
1525 1525 # Finds the best version to use for bundles that are meant to be used
1526 1526 # locally, such as those from strip and shelve, and temporary bundles.
1527 1527 return max(supportedoutgoingversions(repo))
1528 1528
1529 1529
1530 1530 def safeversion(repo):
1531 1531 # Finds the smallest version that it's safe to assume clients of the repo
1532 1532 # will support. For example, all hg versions that support generaldelta also
1533 1533 # support changegroup 02.
1534 1534 versions = supportedoutgoingversions(repo)
1535 1535 if b'generaldelta' in repo.requirements:
1536 1536 versions.discard(b'01')
1537 1537 assert versions
1538 1538 return min(versions)
1539 1539
1540 1540
1541 1541 def getbundler(
1542 1542 version,
1543 1543 repo,
1544 1544 bundlecaps=None,
1545 1545 oldmatcher=None,
1546 1546 matcher=None,
1547 1547 ellipses=False,
1548 1548 shallow=False,
1549 1549 ellipsisroots=None,
1550 1550 fullnodes=None,
1551 1551 ):
1552 1552 assert version in supportedoutgoingversions(repo)
1553 1553
1554 1554 if matcher is None:
1555 1555 matcher = matchmod.always()
1556 1556 if oldmatcher is None:
1557 1557 oldmatcher = matchmod.never()
1558 1558
1559 1559 if version == b'01' and not matcher.always():
1560 1560 raise error.ProgrammingError(
1561 1561 b'version 01 changegroups do not support sparse file matchers'
1562 1562 )
1563 1563
1564 1564 if ellipses and version in (b'01', b'02'):
1565 1565 raise error.Abort(
1566 1566 _(
1567 1567 b'ellipsis nodes require at least cg3 on client and server, '
1568 1568 b'but negotiated version %s'
1569 1569 )
1570 1570 % version
1571 1571 )
1572 1572
1573 1573 # Requested files could include files not in the local store. So
1574 1574 # filter those out.
1575 1575 matcher = repo.narrowmatch(matcher)
1576 1576
1577 1577 fn = _packermap[version][0]
1578 1578 return fn(
1579 1579 repo,
1580 1580 oldmatcher,
1581 1581 matcher,
1582 1582 bundlecaps,
1583 1583 ellipses=ellipses,
1584 1584 shallow=shallow,
1585 1585 ellipsisroots=ellipsisroots,
1586 1586 fullnodes=fullnodes,
1587 1587 )
1588 1588
1589 1589
1590 1590 def getunbundler(version, fh, alg, extras=None):
1591 1591 return _packermap[version][1](fh, alg, extras=extras)
1592 1592
1593 1593
1594 1594 def _changegroupinfo(repo, nodes, source):
1595 1595 if repo.ui.verbose or source == b'bundle':
1596 1596 repo.ui.status(_(b"%d changesets found\n") % len(nodes))
1597 1597 if repo.ui.debugflag:
1598 1598 repo.ui.debug(b"list of changesets:\n")
1599 1599 for node in nodes:
1600 1600 repo.ui.debug(b"%s\n" % hex(node))
1601 1601
1602 1602
1603 1603 def makechangegroup(
1604 1604 repo, outgoing, version, source, fastpath=False, bundlecaps=None
1605 1605 ):
1606 1606 cgstream = makestream(
1607 1607 repo,
1608 1608 outgoing,
1609 1609 version,
1610 1610 source,
1611 1611 fastpath=fastpath,
1612 1612 bundlecaps=bundlecaps,
1613 1613 )
1614 1614 return getunbundler(
1615 1615 version,
1616 1616 util.chunkbuffer(cgstream),
1617 1617 None,
1618 1618 {b'clcount': len(outgoing.missing)},
1619 1619 )
1620 1620
1621 1621
1622 1622 def makestream(
1623 1623 repo,
1624 1624 outgoing,
1625 1625 version,
1626 1626 source,
1627 1627 fastpath=False,
1628 1628 bundlecaps=None,
1629 1629 matcher=None,
1630 1630 ):
1631 1631 bundler = getbundler(version, repo, bundlecaps=bundlecaps, matcher=matcher)
1632 1632
1633 1633 repo = repo.unfiltered()
1634 1634 commonrevs = outgoing.common
1635 1635 csets = outgoing.missing
1636 1636 heads = outgoing.missingheads
1637 1637 # We go through the fast path if we get told to, or if all (unfiltered
1638 1638 # heads have been requested (since we then know there all linkrevs will
1639 1639 # be pulled by the client).
1640 1640 heads.sort()
1641 1641 fastpathlinkrev = fastpath or (
1642 1642 repo.filtername is None and heads == sorted(repo.heads())
1643 1643 )
1644 1644
1645 1645 repo.hook(b'preoutgoing', throw=True, source=source)
1646 1646 _changegroupinfo(repo, csets, source)
1647 1647 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1648 1648
1649 1649
1650 1650 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1651 1651 revisions = 0
1652 1652 files = 0
1653 1653 progress = repo.ui.makeprogress(
1654 1654 _(b'files'), unit=_(b'files'), total=expectedfiles
1655 1655 )
1656 1656 for chunkdata in iter(source.filelogheader, {}):
1657 1657 files += 1
1658 1658 f = chunkdata[b"filename"]
1659 1659 repo.ui.debug(b"adding %s revisions\n" % f)
1660 1660 progress.increment()
1661 1661 fl = repo.file(f)
1662 1662 o = len(fl)
1663 1663 try:
1664 1664 deltas = source.deltaiter()
1665 1665 if not fl.addgroup(deltas, revmap, trp):
1666 1666 raise error.Abort(_(b"received file revlog group is empty"))
1667 1667 except error.CensoredBaseError as e:
1668 1668 raise error.Abort(_(b"received delta base is censored: %s") % e)
1669 1669 revisions += len(fl) - o
1670 1670 if f in needfiles:
1671 1671 needs = needfiles[f]
1672 1672 for new in pycompat.xrange(o, len(fl)):
1673 1673 n = fl.node(new)
1674 1674 if n in needs:
1675 1675 needs.remove(n)
1676 1676 else:
1677 1677 raise error.Abort(_(b"received spurious file revlog entry"))
1678 1678 if not needs:
1679 1679 del needfiles[f]
1680 1680 progress.complete()
1681 1681
1682 1682 for f, needs in pycompat.iteritems(needfiles):
1683 1683 fl = repo.file(f)
1684 1684 for n in needs:
1685 1685 try:
1686 1686 fl.rev(n)
1687 1687 except error.LookupError:
1688 1688 raise error.Abort(
1689 1689 _(b'missing file data for %s:%s - run hg verify')
1690 1690 % (f, hex(n))
1691 1691 )
1692 1692
1693 1693 return revisions, files
@@ -1,3744 +1,3744 b''
1 1 # localrepo.py - read/write repository class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import errno
11 11 import hashlib
12 12 import os
13 13 import random
14 14 import sys
15 15 import time
16 16 import weakref
17 17
18 18 from .i18n import _
19 19 from .node import (
20 20 bin,
21 21 hex,
22 22 nullid,
23 23 nullrev,
24 24 short,
25 25 )
26 26 from .pycompat import (
27 27 delattr,
28 28 getattr,
29 29 )
30 30 from . import (
31 31 bookmarks,
32 32 branchmap,
33 33 bundle2,
34 34 changegroup,
35 35 color,
36 36 context,
37 37 dirstate,
38 38 dirstateguard,
39 39 discovery,
40 40 encoding,
41 41 error,
42 42 exchange,
43 43 extensions,
44 44 filelog,
45 45 hook,
46 46 lock as lockmod,
47 47 match as matchmod,
48 48 merge as mergemod,
49 49 mergeutil,
50 50 namespaces,
51 51 narrowspec,
52 52 obsolete,
53 53 pathutil,
54 54 phases,
55 55 pushkey,
56 56 pycompat,
57 57 repoview,
58 58 revset,
59 59 revsetlang,
60 60 scmutil,
61 61 sparse,
62 62 store as storemod,
63 63 subrepoutil,
64 64 tags as tagsmod,
65 65 transaction,
66 66 txnutil,
67 67 util,
68 68 vfs as vfsmod,
69 69 )
70 70
71 71 from .interfaces import (
72 72 repository,
73 73 util as interfaceutil,
74 74 )
75 75
76 76 from .utils import (
77 77 procutil,
78 78 stringutil,
79 79 )
80 80
81 81 from .revlogutils import constants as revlogconst
82 82
83 83 release = lockmod.release
84 84 urlerr = util.urlerr
85 85 urlreq = util.urlreq
86 86
87 87 # set of (path, vfs-location) tuples. vfs-location is:
88 88 # - 'plain for vfs relative paths
89 89 # - '' for svfs relative paths
90 90 _cachedfiles = set()
91 91
92 92
93 93 class _basefilecache(scmutil.filecache):
94 94 """All filecache usage on repo are done for logic that should be unfiltered
95 95 """
96 96
97 97 def __get__(self, repo, type=None):
98 98 if repo is None:
99 99 return self
100 100 # proxy to unfiltered __dict__ since filtered repo has no entry
101 101 unfi = repo.unfiltered()
102 102 try:
103 103 return unfi.__dict__[self.sname]
104 104 except KeyError:
105 105 pass
106 106 return super(_basefilecache, self).__get__(unfi, type)
107 107
108 108 def set(self, repo, value):
109 109 return super(_basefilecache, self).set(repo.unfiltered(), value)
110 110
111 111
112 112 class repofilecache(_basefilecache):
113 113 """filecache for files in .hg but outside of .hg/store"""
114 114
115 115 def __init__(self, *paths):
116 116 super(repofilecache, self).__init__(*paths)
117 117 for path in paths:
118 118 _cachedfiles.add((path, b'plain'))
119 119
120 120 def join(self, obj, fname):
121 121 return obj.vfs.join(fname)
122 122
123 123
124 124 class storecache(_basefilecache):
125 125 """filecache for files in the store"""
126 126
127 127 def __init__(self, *paths):
128 128 super(storecache, self).__init__(*paths)
129 129 for path in paths:
130 130 _cachedfiles.add((path, b''))
131 131
132 132 def join(self, obj, fname):
133 133 return obj.sjoin(fname)
134 134
135 135
136 136 class mixedrepostorecache(_basefilecache):
137 137 """filecache for a mix files in .hg/store and outside"""
138 138
139 139 def __init__(self, *pathsandlocations):
140 140 # scmutil.filecache only uses the path for passing back into our
141 141 # join(), so we can safely pass a list of paths and locations
142 142 super(mixedrepostorecache, self).__init__(*pathsandlocations)
143 143 _cachedfiles.update(pathsandlocations)
144 144
145 145 def join(self, obj, fnameandlocation):
146 146 fname, location = fnameandlocation
147 147 if location == b'plain':
148 148 return obj.vfs.join(fname)
149 149 else:
150 150 if location != b'':
151 151 raise error.ProgrammingError(
152 152 b'unexpected location: %s' % location
153 153 )
154 154 return obj.sjoin(fname)
155 155
156 156
157 157 def isfilecached(repo, name):
158 158 """check if a repo has already cached "name" filecache-ed property
159 159
160 160 This returns (cachedobj-or-None, iscached) tuple.
161 161 """
162 162 cacheentry = repo.unfiltered()._filecache.get(name, None)
163 163 if not cacheentry:
164 164 return None, False
165 165 return cacheentry.obj, True
166 166
167 167
168 168 class unfilteredpropertycache(util.propertycache):
169 169 """propertycache that apply to unfiltered repo only"""
170 170
171 171 def __get__(self, repo, type=None):
172 172 unfi = repo.unfiltered()
173 173 if unfi is repo:
174 174 return super(unfilteredpropertycache, self).__get__(unfi)
175 175 return getattr(unfi, self.name)
176 176
177 177
178 178 class filteredpropertycache(util.propertycache):
179 179 """propertycache that must take filtering in account"""
180 180
181 181 def cachevalue(self, obj, value):
182 182 object.__setattr__(obj, self.name, value)
183 183
184 184
185 185 def hasunfilteredcache(repo, name):
186 186 """check if a repo has an unfilteredpropertycache value for <name>"""
187 187 return name in vars(repo.unfiltered())
188 188
189 189
190 190 def unfilteredmethod(orig):
191 191 """decorate method that always need to be run on unfiltered version"""
192 192
193 193 def wrapper(repo, *args, **kwargs):
194 194 return orig(repo.unfiltered(), *args, **kwargs)
195 195
196 196 return wrapper
197 197
198 198
199 199 moderncaps = {
200 200 b'lookup',
201 201 b'branchmap',
202 202 b'pushkey',
203 203 b'known',
204 204 b'getbundle',
205 205 b'unbundle',
206 206 }
207 207 legacycaps = moderncaps.union({b'changegroupsubset'})
208 208
209 209
210 210 @interfaceutil.implementer(repository.ipeercommandexecutor)
211 211 class localcommandexecutor(object):
212 212 def __init__(self, peer):
213 213 self._peer = peer
214 214 self._sent = False
215 215 self._closed = False
216 216
217 217 def __enter__(self):
218 218 return self
219 219
220 220 def __exit__(self, exctype, excvalue, exctb):
221 221 self.close()
222 222
223 223 def callcommand(self, command, args):
224 224 if self._sent:
225 225 raise error.ProgrammingError(
226 226 b'callcommand() cannot be used after sendcommands()'
227 227 )
228 228
229 229 if self._closed:
230 230 raise error.ProgrammingError(
231 231 b'callcommand() cannot be used after close()'
232 232 )
233 233
234 234 # We don't need to support anything fancy. Just call the named
235 235 # method on the peer and return a resolved future.
236 236 fn = getattr(self._peer, pycompat.sysstr(command))
237 237
238 238 f = pycompat.futures.Future()
239 239
240 240 try:
241 241 result = fn(**pycompat.strkwargs(args))
242 242 except Exception:
243 243 pycompat.future_set_exception_info(f, sys.exc_info()[1:])
244 244 else:
245 245 f.set_result(result)
246 246
247 247 return f
248 248
249 249 def sendcommands(self):
250 250 self._sent = True
251 251
252 252 def close(self):
253 253 self._closed = True
254 254
255 255
256 256 @interfaceutil.implementer(repository.ipeercommands)
257 257 class localpeer(repository.peer):
258 258 '''peer for a local repo; reflects only the most recent API'''
259 259
260 260 def __init__(self, repo, caps=None):
261 261 super(localpeer, self).__init__()
262 262
263 263 if caps is None:
264 264 caps = moderncaps.copy()
265 265 self._repo = repo.filtered(b'served')
266 266 self.ui = repo.ui
267 267 self._caps = repo._restrictcapabilities(caps)
268 268
269 269 # Begin of _basepeer interface.
270 270
271 271 def url(self):
272 272 return self._repo.url()
273 273
274 274 def local(self):
275 275 return self._repo
276 276
277 277 def peer(self):
278 278 return self
279 279
280 280 def canpush(self):
281 281 return True
282 282
283 283 def close(self):
284 284 self._repo.close()
285 285
286 286 # End of _basepeer interface.
287 287
288 288 # Begin of _basewirecommands interface.
289 289
290 290 def branchmap(self):
291 291 return self._repo.branchmap()
292 292
293 293 def capabilities(self):
294 294 return self._caps
295 295
296 296 def clonebundles(self):
297 297 return self._repo.tryread(b'clonebundles.manifest')
298 298
299 299 def debugwireargs(self, one, two, three=None, four=None, five=None):
300 300 """Used to test argument passing over the wire"""
301 301 return b"%s %s %s %s %s" % (
302 302 one,
303 303 two,
304 304 pycompat.bytestr(three),
305 305 pycompat.bytestr(four),
306 306 pycompat.bytestr(five),
307 307 )
308 308
309 309 def getbundle(
310 310 self, source, heads=None, common=None, bundlecaps=None, **kwargs
311 311 ):
312 312 chunks = exchange.getbundlechunks(
313 313 self._repo,
314 314 source,
315 315 heads=heads,
316 316 common=common,
317 317 bundlecaps=bundlecaps,
318 318 **kwargs
319 319 )[1]
320 320 cb = util.chunkbuffer(chunks)
321 321
322 322 if exchange.bundle2requested(bundlecaps):
323 323 # When requesting a bundle2, getbundle returns a stream to make the
324 324 # wire level function happier. We need to build a proper object
325 325 # from it in local peer.
326 326 return bundle2.getunbundler(self.ui, cb)
327 327 else:
328 328 return changegroup.getunbundler(b'01', cb, None)
329 329
330 330 def heads(self):
331 331 return self._repo.heads()
332 332
333 333 def known(self, nodes):
334 334 return self._repo.known(nodes)
335 335
336 336 def listkeys(self, namespace):
337 337 return self._repo.listkeys(namespace)
338 338
339 339 def lookup(self, key):
340 340 return self._repo.lookup(key)
341 341
342 342 def pushkey(self, namespace, key, old, new):
343 343 return self._repo.pushkey(namespace, key, old, new)
344 344
345 345 def stream_out(self):
346 346 raise error.Abort(_(b'cannot perform stream clone against local peer'))
347 347
348 348 def unbundle(self, bundle, heads, url):
349 349 """apply a bundle on a repo
350 350
351 351 This function handles the repo locking itself."""
352 352 try:
353 353 try:
354 354 bundle = exchange.readbundle(self.ui, bundle, None)
355 355 ret = exchange.unbundle(self._repo, bundle, heads, b'push', url)
356 356 if util.safehasattr(ret, b'getchunks'):
357 357 # This is a bundle20 object, turn it into an unbundler.
358 358 # This little dance should be dropped eventually when the
359 359 # API is finally improved.
360 360 stream = util.chunkbuffer(ret.getchunks())
361 361 ret = bundle2.getunbundler(self.ui, stream)
362 362 return ret
363 363 except Exception as exc:
364 364 # If the exception contains output salvaged from a bundle2
365 365 # reply, we need to make sure it is printed before continuing
366 366 # to fail. So we build a bundle2 with such output and consume
367 367 # it directly.
368 368 #
369 369 # This is not very elegant but allows a "simple" solution for
370 370 # issue4594
371 371 output = getattr(exc, '_bundle2salvagedoutput', ())
372 372 if output:
373 373 bundler = bundle2.bundle20(self._repo.ui)
374 374 for out in output:
375 375 bundler.addpart(out)
376 376 stream = util.chunkbuffer(bundler.getchunks())
377 377 b = bundle2.getunbundler(self.ui, stream)
378 378 bundle2.processbundle(self._repo, b)
379 379 raise
380 380 except error.PushRaced as exc:
381 381 raise error.ResponseError(
382 382 _(b'push failed:'), stringutil.forcebytestr(exc)
383 383 )
384 384
385 385 # End of _basewirecommands interface.
386 386
387 387 # Begin of peer interface.
388 388
389 389 def commandexecutor(self):
390 390 return localcommandexecutor(self)
391 391
392 392 # End of peer interface.
393 393
394 394
395 395 @interfaceutil.implementer(repository.ipeerlegacycommands)
396 396 class locallegacypeer(localpeer):
397 397 '''peer extension which implements legacy methods too; used for tests with
398 398 restricted capabilities'''
399 399
400 400 def __init__(self, repo):
401 401 super(locallegacypeer, self).__init__(repo, caps=legacycaps)
402 402
403 403 # Begin of baselegacywirecommands interface.
404 404
405 405 def between(self, pairs):
406 406 return self._repo.between(pairs)
407 407
408 408 def branches(self, nodes):
409 409 return self._repo.branches(nodes)
410 410
411 411 def changegroup(self, nodes, source):
412 412 outgoing = discovery.outgoing(
413 413 self._repo, missingroots=nodes, missingheads=self._repo.heads()
414 414 )
415 415 return changegroup.makechangegroup(self._repo, outgoing, b'01', source)
416 416
417 417 def changegroupsubset(self, bases, heads, source):
418 418 outgoing = discovery.outgoing(
419 419 self._repo, missingroots=bases, missingheads=heads
420 420 )
421 421 return changegroup.makechangegroup(self._repo, outgoing, b'01', source)
422 422
423 423 # End of baselegacywirecommands interface.
424 424
425 425
426 426 # Increment the sub-version when the revlog v2 format changes to lock out old
427 427 # clients.
428 428 REVLOGV2_REQUIREMENT = b'exp-revlogv2.1'
429 429
430 430 # A repository with the sparserevlog feature will have delta chains that
431 431 # can spread over a larger span. Sparse reading cuts these large spans into
432 432 # pieces, so that each piece isn't too big.
433 433 # Without the sparserevlog capability, reading from the repository could use
434 434 # huge amounts of memory, because the whole span would be read at once,
435 435 # including all the intermediate revisions that aren't pertinent for the chain.
436 436 # This is why once a repository has enabled sparse-read, it becomes required.
437 437 SPARSEREVLOG_REQUIREMENT = b'sparserevlog'
438 438
439 439 # A repository with the sidedataflag requirement will allow to store extra
440 440 # information for revision without altering their original hashes.
441 441 SIDEDATA_REQUIREMENT = b'exp-sidedata-flag'
442 442
443 443 # A repository with the the copies-sidedata-changeset requirement will store
444 444 # copies related information in changeset's sidedata.
445 445 COPIESSDC_REQUIREMENT = b'exp-copies-sidedata-changeset'
446 446
447 447 # Functions receiving (ui, features) that extensions can register to impact
448 448 # the ability to load repositories with custom requirements. Only
449 449 # functions defined in loaded extensions are called.
450 450 #
451 451 # The function receives a set of requirement strings that the repository
452 452 # is capable of opening. Functions will typically add elements to the
453 453 # set to reflect that the extension knows how to handle that requirements.
454 454 featuresetupfuncs = set()
455 455
456 456
457 457 def makelocalrepository(baseui, path, intents=None):
458 458 """Create a local repository object.
459 459
460 460 Given arguments needed to construct a local repository, this function
461 461 performs various early repository loading functionality (such as
462 462 reading the ``.hg/requires`` and ``.hg/hgrc`` files), validates that
463 463 the repository can be opened, derives a type suitable for representing
464 464 that repository, and returns an instance of it.
465 465
466 466 The returned object conforms to the ``repository.completelocalrepository``
467 467 interface.
468 468
469 469 The repository type is derived by calling a series of factory functions
470 470 for each aspect/interface of the final repository. These are defined by
471 471 ``REPO_INTERFACES``.
472 472
473 473 Each factory function is called to produce a type implementing a specific
474 474 interface. The cumulative list of returned types will be combined into a
475 475 new type and that type will be instantiated to represent the local
476 476 repository.
477 477
478 478 The factory functions each receive various state that may be consulted
479 479 as part of deriving a type.
480 480
481 481 Extensions should wrap these factory functions to customize repository type
482 482 creation. Note that an extension's wrapped function may be called even if
483 483 that extension is not loaded for the repo being constructed. Extensions
484 484 should check if their ``__name__`` appears in the
485 485 ``extensionmodulenames`` set passed to the factory function and no-op if
486 486 not.
487 487 """
488 488 ui = baseui.copy()
489 489 # Prevent copying repo configuration.
490 490 ui.copy = baseui.copy
491 491
492 492 # Working directory VFS rooted at repository root.
493 493 wdirvfs = vfsmod.vfs(path, expandpath=True, realpath=True)
494 494
495 495 # Main VFS for .hg/ directory.
496 496 hgpath = wdirvfs.join(b'.hg')
497 497 hgvfs = vfsmod.vfs(hgpath, cacheaudited=True)
498 498
499 499 # The .hg/ path should exist and should be a directory. All other
500 500 # cases are errors.
501 501 if not hgvfs.isdir():
502 502 try:
503 503 hgvfs.stat()
504 504 except OSError as e:
505 505 if e.errno != errno.ENOENT:
506 506 raise
507 507
508 508 raise error.RepoError(_(b'repository %s not found') % path)
509 509
510 510 # .hg/requires file contains a newline-delimited list of
511 511 # features/capabilities the opener (us) must have in order to use
512 512 # the repository. This file was introduced in Mercurial 0.9.2,
513 513 # which means very old repositories may not have one. We assume
514 514 # a missing file translates to no requirements.
515 515 try:
516 516 requirements = set(hgvfs.read(b'requires').splitlines())
517 517 except IOError as e:
518 518 if e.errno != errno.ENOENT:
519 519 raise
520 520 requirements = set()
521 521
522 522 # The .hg/hgrc file may load extensions or contain config options
523 523 # that influence repository construction. Attempt to load it and
524 524 # process any new extensions that it may have pulled in.
525 525 if loadhgrc(ui, wdirvfs, hgvfs, requirements):
526 526 afterhgrcload(ui, wdirvfs, hgvfs, requirements)
527 527 extensions.loadall(ui)
528 528 extensions.populateui(ui)
529 529
530 530 # Set of module names of extensions loaded for this repository.
531 531 extensionmodulenames = {m.__name__ for n, m in extensions.extensions(ui)}
532 532
533 533 supportedrequirements = gathersupportedrequirements(ui)
534 534
535 535 # We first validate the requirements are known.
536 536 ensurerequirementsrecognized(requirements, supportedrequirements)
537 537
538 538 # Then we validate that the known set is reasonable to use together.
539 539 ensurerequirementscompatible(ui, requirements)
540 540
541 541 # TODO there are unhandled edge cases related to opening repositories with
542 542 # shared storage. If storage is shared, we should also test for requirements
543 543 # compatibility in the pointed-to repo. This entails loading the .hg/hgrc in
544 544 # that repo, as that repo may load extensions needed to open it. This is a
545 545 # bit complicated because we don't want the other hgrc to overwrite settings
546 546 # in this hgrc.
547 547 #
548 548 # This bug is somewhat mitigated by the fact that we copy the .hg/requires
549 549 # file when sharing repos. But if a requirement is added after the share is
550 550 # performed, thereby introducing a new requirement for the opener, we may
551 551 # will not see that and could encounter a run-time error interacting with
552 552 # that shared store since it has an unknown-to-us requirement.
553 553
554 554 # At this point, we know we should be capable of opening the repository.
555 555 # Now get on with doing that.
556 556
557 557 features = set()
558 558
559 559 # The "store" part of the repository holds versioned data. How it is
560 560 # accessed is determined by various requirements. The ``shared`` or
561 561 # ``relshared`` requirements indicate the store lives in the path contained
562 562 # in the ``.hg/sharedpath`` file. This is an absolute path for
563 563 # ``shared`` and relative to ``.hg/`` for ``relshared``.
564 564 if b'shared' in requirements or b'relshared' in requirements:
565 565 sharedpath = hgvfs.read(b'sharedpath').rstrip(b'\n')
566 566 if b'relshared' in requirements:
567 567 sharedpath = hgvfs.join(sharedpath)
568 568
569 569 sharedvfs = vfsmod.vfs(sharedpath, realpath=True)
570 570
571 571 if not sharedvfs.exists():
572 572 raise error.RepoError(
573 573 _(b'.hg/sharedpath points to nonexistent directory %s')
574 574 % sharedvfs.base
575 575 )
576 576
577 577 features.add(repository.REPO_FEATURE_SHARED_STORAGE)
578 578
579 579 storebasepath = sharedvfs.base
580 580 cachepath = sharedvfs.join(b'cache')
581 581 else:
582 582 storebasepath = hgvfs.base
583 583 cachepath = hgvfs.join(b'cache')
584 584 wcachepath = hgvfs.join(b'wcache')
585 585
586 586 # The store has changed over time and the exact layout is dictated by
587 587 # requirements. The store interface abstracts differences across all
588 588 # of them.
589 589 store = makestore(
590 590 requirements,
591 591 storebasepath,
592 592 lambda base: vfsmod.vfs(base, cacheaudited=True),
593 593 )
594 594 hgvfs.createmode = store.createmode
595 595
596 596 storevfs = store.vfs
597 597 storevfs.options = resolvestorevfsoptions(ui, requirements, features)
598 598
599 599 # The cache vfs is used to manage cache files.
600 600 cachevfs = vfsmod.vfs(cachepath, cacheaudited=True)
601 601 cachevfs.createmode = store.createmode
602 602 # The cache vfs is used to manage cache files related to the working copy
603 603 wcachevfs = vfsmod.vfs(wcachepath, cacheaudited=True)
604 604 wcachevfs.createmode = store.createmode
605 605
606 606 # Now resolve the type for the repository object. We do this by repeatedly
607 607 # calling a factory function to produces types for specific aspects of the
608 608 # repo's operation. The aggregate returned types are used as base classes
609 609 # for a dynamically-derived type, which will represent our new repository.
610 610
611 611 bases = []
612 612 extrastate = {}
613 613
614 614 for iface, fn in REPO_INTERFACES:
615 615 # We pass all potentially useful state to give extensions tons of
616 616 # flexibility.
617 617 typ = fn()(
618 618 ui=ui,
619 619 intents=intents,
620 620 requirements=requirements,
621 621 features=features,
622 622 wdirvfs=wdirvfs,
623 623 hgvfs=hgvfs,
624 624 store=store,
625 625 storevfs=storevfs,
626 626 storeoptions=storevfs.options,
627 627 cachevfs=cachevfs,
628 628 wcachevfs=wcachevfs,
629 629 extensionmodulenames=extensionmodulenames,
630 630 extrastate=extrastate,
631 631 baseclasses=bases,
632 632 )
633 633
634 634 if not isinstance(typ, type):
635 635 raise error.ProgrammingError(
636 636 b'unable to construct type for %s' % iface
637 637 )
638 638
639 639 bases.append(typ)
640 640
641 641 # type() allows you to use characters in type names that wouldn't be
642 642 # recognized as Python symbols in source code. We abuse that to add
643 643 # rich information about our constructed repo.
644 644 name = pycompat.sysstr(
645 645 b'derivedrepo:%s<%s>' % (wdirvfs.base, b','.join(sorted(requirements)))
646 646 )
647 647
648 648 cls = type(name, tuple(bases), {})
649 649
650 650 return cls(
651 651 baseui=baseui,
652 652 ui=ui,
653 653 origroot=path,
654 654 wdirvfs=wdirvfs,
655 655 hgvfs=hgvfs,
656 656 requirements=requirements,
657 657 supportedrequirements=supportedrequirements,
658 658 sharedpath=storebasepath,
659 659 store=store,
660 660 cachevfs=cachevfs,
661 661 wcachevfs=wcachevfs,
662 662 features=features,
663 663 intents=intents,
664 664 )
665 665
666 666
667 667 def loadhgrc(ui, wdirvfs, hgvfs, requirements):
668 668 """Load hgrc files/content into a ui instance.
669 669
670 670 This is called during repository opening to load any additional
671 671 config files or settings relevant to the current repository.
672 672
673 673 Returns a bool indicating whether any additional configs were loaded.
674 674
675 675 Extensions should monkeypatch this function to modify how per-repo
676 676 configs are loaded. For example, an extension may wish to pull in
677 677 configs from alternate files or sources.
678 678 """
679 679 try:
680 680 ui.readconfig(hgvfs.join(b'hgrc'), root=wdirvfs.base)
681 681 return True
682 682 except IOError:
683 683 return False
684 684
685 685
686 686 def afterhgrcload(ui, wdirvfs, hgvfs, requirements):
687 687 """Perform additional actions after .hg/hgrc is loaded.
688 688
689 689 This function is called during repository loading immediately after
690 690 the .hg/hgrc file is loaded and before per-repo extensions are loaded.
691 691
692 692 The function can be used to validate configs, automatically add
693 693 options (including extensions) based on requirements, etc.
694 694 """
695 695
696 696 # Map of requirements to list of extensions to load automatically when
697 697 # requirement is present.
698 698 autoextensions = {
699 699 b'largefiles': [b'largefiles'],
700 700 b'lfs': [b'lfs'],
701 701 }
702 702
703 703 for requirement, names in sorted(autoextensions.items()):
704 704 if requirement not in requirements:
705 705 continue
706 706
707 707 for name in names:
708 708 if not ui.hasconfig(b'extensions', name):
709 709 ui.setconfig(b'extensions', name, b'', source=b'autoload')
710 710
711 711
712 712 def gathersupportedrequirements(ui):
713 713 """Determine the complete set of recognized requirements."""
714 714 # Start with all requirements supported by this file.
715 715 supported = set(localrepository._basesupported)
716 716
717 717 # Execute ``featuresetupfuncs`` entries if they belong to an extension
718 718 # relevant to this ui instance.
719 719 modules = {m.__name__ for n, m in extensions.extensions(ui)}
720 720
721 721 for fn in featuresetupfuncs:
722 722 if fn.__module__ in modules:
723 723 fn(ui, supported)
724 724
725 725 # Add derived requirements from registered compression engines.
726 726 for name in util.compengines:
727 727 engine = util.compengines[name]
728 728 if engine.available() and engine.revlogheader():
729 729 supported.add(b'exp-compression-%s' % name)
730 730 if engine.name() == b'zstd':
731 731 supported.add(b'revlog-compression-zstd')
732 732
733 733 return supported
734 734
735 735
736 736 def ensurerequirementsrecognized(requirements, supported):
737 737 """Validate that a set of local requirements is recognized.
738 738
739 739 Receives a set of requirements. Raises an ``error.RepoError`` if there
740 740 exists any requirement in that set that currently loaded code doesn't
741 741 recognize.
742 742
743 743 Returns a set of supported requirements.
744 744 """
745 745 missing = set()
746 746
747 747 for requirement in requirements:
748 748 if requirement in supported:
749 749 continue
750 750
751 751 if not requirement or not requirement[0:1].isalnum():
752 752 raise error.RequirementError(_(b'.hg/requires file is corrupt'))
753 753
754 754 missing.add(requirement)
755 755
756 756 if missing:
757 757 raise error.RequirementError(
758 758 _(b'repository requires features unknown to this Mercurial: %s')
759 759 % b' '.join(sorted(missing)),
760 760 hint=_(
761 761 b'see https://mercurial-scm.org/wiki/MissingRequirement '
762 762 b'for more information'
763 763 ),
764 764 )
765 765
766 766
767 767 def ensurerequirementscompatible(ui, requirements):
768 768 """Validates that a set of recognized requirements is mutually compatible.
769 769
770 770 Some requirements may not be compatible with others or require
771 771 config options that aren't enabled. This function is called during
772 772 repository opening to ensure that the set of requirements needed
773 773 to open a repository is sane and compatible with config options.
774 774
775 775 Extensions can monkeypatch this function to perform additional
776 776 checking.
777 777
778 778 ``error.RepoError`` should be raised on failure.
779 779 """
780 780 if b'exp-sparse' in requirements and not sparse.enabled:
781 781 raise error.RepoError(
782 782 _(
783 783 b'repository is using sparse feature but '
784 784 b'sparse is not enabled; enable the '
785 785 b'"sparse" extensions to access'
786 786 )
787 787 )
788 788
789 789
790 790 def makestore(requirements, path, vfstype):
791 791 """Construct a storage object for a repository."""
792 792 if b'store' in requirements:
793 793 if b'fncache' in requirements:
794 794 return storemod.fncachestore(
795 795 path, vfstype, b'dotencode' in requirements
796 796 )
797 797
798 798 return storemod.encodedstore(path, vfstype)
799 799
800 800 return storemod.basicstore(path, vfstype)
801 801
802 802
803 803 def resolvestorevfsoptions(ui, requirements, features):
804 804 """Resolve the options to pass to the store vfs opener.
805 805
806 806 The returned dict is used to influence behavior of the storage layer.
807 807 """
808 808 options = {}
809 809
810 810 if b'treemanifest' in requirements:
811 811 options[b'treemanifest'] = True
812 812
813 813 # experimental config: format.manifestcachesize
814 814 manifestcachesize = ui.configint(b'format', b'manifestcachesize')
815 815 if manifestcachesize is not None:
816 816 options[b'manifestcachesize'] = manifestcachesize
817 817
818 818 # In the absence of another requirement superseding a revlog-related
819 819 # requirement, we have to assume the repo is using revlog version 0.
820 820 # This revlog format is super old and we don't bother trying to parse
821 821 # opener options for it because those options wouldn't do anything
822 822 # meaningful on such old repos.
823 823 if b'revlogv1' in requirements or REVLOGV2_REQUIREMENT in requirements:
824 824 options.update(resolverevlogstorevfsoptions(ui, requirements, features))
825 825 else: # explicitly mark repo as using revlogv0
826 826 options[b'revlogv0'] = True
827 827
828 828 if COPIESSDC_REQUIREMENT in requirements:
829 829 options[b'copies-storage'] = b'changeset-sidedata'
830 830 else:
831 831 writecopiesto = ui.config(b'experimental', b'copies.write-to')
832 832 copiesextramode = (b'changeset-only', b'compatibility')
833 833 if writecopiesto in copiesextramode:
834 834 options[b'copies-storage'] = b'extra'
835 835
836 836 return options
837 837
838 838
839 839 def resolverevlogstorevfsoptions(ui, requirements, features):
840 840 """Resolve opener options specific to revlogs."""
841 841
842 842 options = {}
843 843 options[b'flagprocessors'] = {}
844 844
845 845 if b'revlogv1' in requirements:
846 846 options[b'revlogv1'] = True
847 847 if REVLOGV2_REQUIREMENT in requirements:
848 848 options[b'revlogv2'] = True
849 849
850 850 if b'generaldelta' in requirements:
851 851 options[b'generaldelta'] = True
852 852
853 853 # experimental config: format.chunkcachesize
854 854 chunkcachesize = ui.configint(b'format', b'chunkcachesize')
855 855 if chunkcachesize is not None:
856 856 options[b'chunkcachesize'] = chunkcachesize
857 857
858 858 deltabothparents = ui.configbool(
859 859 b'storage', b'revlog.optimize-delta-parent-choice'
860 860 )
861 861 options[b'deltabothparents'] = deltabothparents
862 862
863 863 lazydelta = ui.configbool(b'storage', b'revlog.reuse-external-delta')
864 864 lazydeltabase = False
865 865 if lazydelta:
866 866 lazydeltabase = ui.configbool(
867 867 b'storage', b'revlog.reuse-external-delta-parent'
868 868 )
869 869 if lazydeltabase is None:
870 870 lazydeltabase = not scmutil.gddeltaconfig(ui)
871 871 options[b'lazydelta'] = lazydelta
872 872 options[b'lazydeltabase'] = lazydeltabase
873 873
874 874 chainspan = ui.configbytes(b'experimental', b'maxdeltachainspan')
875 875 if 0 <= chainspan:
876 876 options[b'maxdeltachainspan'] = chainspan
877 877
878 878 mmapindexthreshold = ui.configbytes(b'experimental', b'mmapindexthreshold')
879 879 if mmapindexthreshold is not None:
880 880 options[b'mmapindexthreshold'] = mmapindexthreshold
881 881
882 882 withsparseread = ui.configbool(b'experimental', b'sparse-read')
883 883 srdensitythres = float(
884 884 ui.config(b'experimental', b'sparse-read.density-threshold')
885 885 )
886 886 srmingapsize = ui.configbytes(b'experimental', b'sparse-read.min-gap-size')
887 887 options[b'with-sparse-read'] = withsparseread
888 888 options[b'sparse-read-density-threshold'] = srdensitythres
889 889 options[b'sparse-read-min-gap-size'] = srmingapsize
890 890
891 891 sparserevlog = SPARSEREVLOG_REQUIREMENT in requirements
892 892 options[b'sparse-revlog'] = sparserevlog
893 893 if sparserevlog:
894 894 options[b'generaldelta'] = True
895 895
896 896 sidedata = SIDEDATA_REQUIREMENT in requirements
897 897 options[b'side-data'] = sidedata
898 898
899 899 maxchainlen = None
900 900 if sparserevlog:
901 901 maxchainlen = revlogconst.SPARSE_REVLOG_MAX_CHAIN_LENGTH
902 902 # experimental config: format.maxchainlen
903 903 maxchainlen = ui.configint(b'format', b'maxchainlen', maxchainlen)
904 904 if maxchainlen is not None:
905 905 options[b'maxchainlen'] = maxchainlen
906 906
907 907 for r in requirements:
908 908 # we allow multiple compression engine requirement to co-exist because
909 909 # strickly speaking, revlog seems to support mixed compression style.
910 910 #
911 911 # The compression used for new entries will be "the last one"
912 912 prefix = r.startswith
913 913 if prefix(b'revlog-compression-') or prefix(b'exp-compression-'):
914 914 options[b'compengine'] = r.split(b'-', 2)[2]
915 915
916 916 options[b'zlib.level'] = ui.configint(b'storage', b'revlog.zlib.level')
917 917 if options[b'zlib.level'] is not None:
918 918 if not (0 <= options[b'zlib.level'] <= 9):
919 919 msg = _(b'invalid value for `storage.revlog.zlib.level` config: %d')
920 920 raise error.Abort(msg % options[b'zlib.level'])
921 921 options[b'zstd.level'] = ui.configint(b'storage', b'revlog.zstd.level')
922 922 if options[b'zstd.level'] is not None:
923 923 if not (0 <= options[b'zstd.level'] <= 22):
924 924 msg = _(b'invalid value for `storage.revlog.zstd.level` config: %d')
925 925 raise error.Abort(msg % options[b'zstd.level'])
926 926
927 927 if repository.NARROW_REQUIREMENT in requirements:
928 928 options[b'enableellipsis'] = True
929 929
930 930 return options
931 931
932 932
933 933 def makemain(**kwargs):
934 934 """Produce a type conforming to ``ilocalrepositorymain``."""
935 935 return localrepository
936 936
937 937
938 938 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
939 939 class revlogfilestorage(object):
940 940 """File storage when using revlogs."""
941 941
942 942 def file(self, path):
943 943 if path[0] == b'/':
944 944 path = path[1:]
945 945
946 946 return filelog.filelog(self.svfs, path)
947 947
948 948
949 949 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
950 950 class revlognarrowfilestorage(object):
951 951 """File storage when using revlogs and narrow files."""
952 952
953 953 def file(self, path):
954 954 if path[0] == b'/':
955 955 path = path[1:]
956 956
957 957 return filelog.narrowfilelog(self.svfs, path, self._storenarrowmatch)
958 958
959 959
960 960 def makefilestorage(requirements, features, **kwargs):
961 961 """Produce a type conforming to ``ilocalrepositoryfilestorage``."""
962 962 features.add(repository.REPO_FEATURE_REVLOG_FILE_STORAGE)
963 963 features.add(repository.REPO_FEATURE_STREAM_CLONE)
964 964
965 965 if repository.NARROW_REQUIREMENT in requirements:
966 966 return revlognarrowfilestorage
967 967 else:
968 968 return revlogfilestorage
969 969
970 970
971 971 # List of repository interfaces and factory functions for them. Each
972 972 # will be called in order during ``makelocalrepository()`` to iteratively
973 973 # derive the final type for a local repository instance. We capture the
974 974 # function as a lambda so we don't hold a reference and the module-level
975 975 # functions can be wrapped.
976 976 REPO_INTERFACES = [
977 977 (repository.ilocalrepositorymain, lambda: makemain),
978 978 (repository.ilocalrepositoryfilestorage, lambda: makefilestorage),
979 979 ]
980 980
981 981
982 982 @interfaceutil.implementer(repository.ilocalrepositorymain)
983 983 class localrepository(object):
984 984 """Main class for representing local repositories.
985 985
986 986 All local repositories are instances of this class.
987 987
988 988 Constructed on its own, instances of this class are not usable as
989 989 repository objects. To obtain a usable repository object, call
990 990 ``hg.repository()``, ``localrepo.instance()``, or
991 991 ``localrepo.makelocalrepository()``. The latter is the lowest-level.
992 992 ``instance()`` adds support for creating new repositories.
993 993 ``hg.repository()`` adds more extension integration, including calling
994 994 ``reposetup()``. Generally speaking, ``hg.repository()`` should be
995 995 used.
996 996 """
997 997
998 998 # obsolete experimental requirements:
999 999 # - manifestv2: An experimental new manifest format that allowed
1000 1000 # for stem compression of long paths. Experiment ended up not
1001 1001 # being successful (repository sizes went up due to worse delta
1002 1002 # chains), and the code was deleted in 4.6.
1003 1003 supportedformats = {
1004 1004 b'revlogv1',
1005 1005 b'generaldelta',
1006 1006 b'treemanifest',
1007 1007 COPIESSDC_REQUIREMENT,
1008 1008 REVLOGV2_REQUIREMENT,
1009 1009 SIDEDATA_REQUIREMENT,
1010 1010 SPARSEREVLOG_REQUIREMENT,
1011 1011 bookmarks.BOOKMARKS_IN_STORE_REQUIREMENT,
1012 1012 }
1013 1013 _basesupported = supportedformats | {
1014 1014 b'store',
1015 1015 b'fncache',
1016 1016 b'shared',
1017 1017 b'relshared',
1018 1018 b'dotencode',
1019 1019 b'exp-sparse',
1020 1020 b'internal-phase',
1021 1021 }
1022 1022
1023 1023 # list of prefix for file which can be written without 'wlock'
1024 1024 # Extensions should extend this list when needed
1025 1025 _wlockfreeprefix = {
1026 1026 # We migh consider requiring 'wlock' for the next
1027 1027 # two, but pretty much all the existing code assume
1028 1028 # wlock is not needed so we keep them excluded for
1029 1029 # now.
1030 1030 b'hgrc',
1031 1031 b'requires',
1032 1032 # XXX cache is a complicatged business someone
1033 1033 # should investigate this in depth at some point
1034 1034 b'cache/',
1035 1035 # XXX shouldn't be dirstate covered by the wlock?
1036 1036 b'dirstate',
1037 1037 # XXX bisect was still a bit too messy at the time
1038 1038 # this changeset was introduced. Someone should fix
1039 1039 # the remainig bit and drop this line
1040 1040 b'bisect.state',
1041 1041 }
1042 1042
1043 1043 def __init__(
1044 1044 self,
1045 1045 baseui,
1046 1046 ui,
1047 1047 origroot,
1048 1048 wdirvfs,
1049 1049 hgvfs,
1050 1050 requirements,
1051 1051 supportedrequirements,
1052 1052 sharedpath,
1053 1053 store,
1054 1054 cachevfs,
1055 1055 wcachevfs,
1056 1056 features,
1057 1057 intents=None,
1058 1058 ):
1059 1059 """Create a new local repository instance.
1060 1060
1061 1061 Most callers should use ``hg.repository()``, ``localrepo.instance()``,
1062 1062 or ``localrepo.makelocalrepository()`` for obtaining a new repository
1063 1063 object.
1064 1064
1065 1065 Arguments:
1066 1066
1067 1067 baseui
1068 1068 ``ui.ui`` instance that ``ui`` argument was based off of.
1069 1069
1070 1070 ui
1071 1071 ``ui.ui`` instance for use by the repository.
1072 1072
1073 1073 origroot
1074 1074 ``bytes`` path to working directory root of this repository.
1075 1075
1076 1076 wdirvfs
1077 1077 ``vfs.vfs`` rooted at the working directory.
1078 1078
1079 1079 hgvfs
1080 1080 ``vfs.vfs`` rooted at .hg/
1081 1081
1082 1082 requirements
1083 1083 ``set`` of bytestrings representing repository opening requirements.
1084 1084
1085 1085 supportedrequirements
1086 1086 ``set`` of bytestrings representing repository requirements that we
1087 1087 know how to open. May be a supetset of ``requirements``.
1088 1088
1089 1089 sharedpath
1090 1090 ``bytes`` Defining path to storage base directory. Points to a
1091 1091 ``.hg/`` directory somewhere.
1092 1092
1093 1093 store
1094 1094 ``store.basicstore`` (or derived) instance providing access to
1095 1095 versioned storage.
1096 1096
1097 1097 cachevfs
1098 1098 ``vfs.vfs`` used for cache files.
1099 1099
1100 1100 wcachevfs
1101 1101 ``vfs.vfs`` used for cache files related to the working copy.
1102 1102
1103 1103 features
1104 1104 ``set`` of bytestrings defining features/capabilities of this
1105 1105 instance.
1106 1106
1107 1107 intents
1108 1108 ``set`` of system strings indicating what this repo will be used
1109 1109 for.
1110 1110 """
1111 1111 self.baseui = baseui
1112 1112 self.ui = ui
1113 1113 self.origroot = origroot
1114 1114 # vfs rooted at working directory.
1115 1115 self.wvfs = wdirvfs
1116 1116 self.root = wdirvfs.base
1117 1117 # vfs rooted at .hg/. Used to access most non-store paths.
1118 1118 self.vfs = hgvfs
1119 1119 self.path = hgvfs.base
1120 1120 self.requirements = requirements
1121 1121 self.supported = supportedrequirements
1122 1122 self.sharedpath = sharedpath
1123 1123 self.store = store
1124 1124 self.cachevfs = cachevfs
1125 1125 self.wcachevfs = wcachevfs
1126 1126 self.features = features
1127 1127
1128 1128 self.filtername = None
1129 1129
1130 1130 if self.ui.configbool(b'devel', b'all-warnings') or self.ui.configbool(
1131 1131 b'devel', b'check-locks'
1132 1132 ):
1133 1133 self.vfs.audit = self._getvfsward(self.vfs.audit)
1134 1134 # A list of callback to shape the phase if no data were found.
1135 1135 # Callback are in the form: func(repo, roots) --> processed root.
1136 1136 # This list it to be filled by extension during repo setup
1137 1137 self._phasedefaults = []
1138 1138
1139 1139 color.setup(self.ui)
1140 1140
1141 1141 self.spath = self.store.path
1142 1142 self.svfs = self.store.vfs
1143 1143 self.sjoin = self.store.join
1144 1144 if self.ui.configbool(b'devel', b'all-warnings') or self.ui.configbool(
1145 1145 b'devel', b'check-locks'
1146 1146 ):
1147 1147 if util.safehasattr(self.svfs, b'vfs'): # this is filtervfs
1148 1148 self.svfs.vfs.audit = self._getsvfsward(self.svfs.vfs.audit)
1149 1149 else: # standard vfs
1150 1150 self.svfs.audit = self._getsvfsward(self.svfs.audit)
1151 1151
1152 1152 self._dirstatevalidatewarned = False
1153 1153
1154 1154 self._branchcaches = branchmap.BranchMapCache()
1155 1155 self._revbranchcache = None
1156 1156 self._filterpats = {}
1157 1157 self._datafilters = {}
1158 1158 self._transref = self._lockref = self._wlockref = None
1159 1159
1160 1160 # A cache for various files under .hg/ that tracks file changes,
1161 1161 # (used by the filecache decorator)
1162 1162 #
1163 1163 # Maps a property name to its util.filecacheentry
1164 1164 self._filecache = {}
1165 1165
1166 1166 # hold sets of revision to be filtered
1167 1167 # should be cleared when something might have changed the filter value:
1168 1168 # - new changesets,
1169 1169 # - phase change,
1170 1170 # - new obsolescence marker,
1171 1171 # - working directory parent change,
1172 1172 # - bookmark changes
1173 1173 self.filteredrevcache = {}
1174 1174
1175 1175 # post-dirstate-status hooks
1176 1176 self._postdsstatus = []
1177 1177
1178 1178 # generic mapping between names and nodes
1179 1179 self.names = namespaces.namespaces()
1180 1180
1181 1181 # Key to signature value.
1182 1182 self._sparsesignaturecache = {}
1183 1183 # Signature to cached matcher instance.
1184 1184 self._sparsematchercache = {}
1185 1185
1186 1186 self._extrafilterid = repoview.extrafilter(ui)
1187 1187
1188 1188 self.filecopiesmode = None
1189 1189 if COPIESSDC_REQUIREMENT in self.requirements:
1190 1190 self.filecopiesmode = b'changeset-sidedata'
1191 1191
1192 1192 def _getvfsward(self, origfunc):
1193 1193 """build a ward for self.vfs"""
1194 1194 rref = weakref.ref(self)
1195 1195
1196 1196 def checkvfs(path, mode=None):
1197 1197 ret = origfunc(path, mode=mode)
1198 1198 repo = rref()
1199 1199 if (
1200 1200 repo is None
1201 1201 or not util.safehasattr(repo, b'_wlockref')
1202 1202 or not util.safehasattr(repo, b'_lockref')
1203 1203 ):
1204 1204 return
1205 1205 if mode in (None, b'r', b'rb'):
1206 1206 return
1207 1207 if path.startswith(repo.path):
1208 1208 # truncate name relative to the repository (.hg)
1209 1209 path = path[len(repo.path) + 1 :]
1210 1210 if path.startswith(b'cache/'):
1211 1211 msg = b'accessing cache with vfs instead of cachevfs: "%s"'
1212 1212 repo.ui.develwarn(msg % path, stacklevel=3, config=b"cache-vfs")
1213 1213 if path.startswith(b'journal.') or path.startswith(b'undo.'):
1214 1214 # journal is covered by 'lock'
1215 1215 if repo._currentlock(repo._lockref) is None:
1216 1216 repo.ui.develwarn(
1217 1217 b'write with no lock: "%s"' % path,
1218 1218 stacklevel=3,
1219 1219 config=b'check-locks',
1220 1220 )
1221 1221 elif repo._currentlock(repo._wlockref) is None:
1222 1222 # rest of vfs files are covered by 'wlock'
1223 1223 #
1224 1224 # exclude special files
1225 1225 for prefix in self._wlockfreeprefix:
1226 1226 if path.startswith(prefix):
1227 1227 return
1228 1228 repo.ui.develwarn(
1229 1229 b'write with no wlock: "%s"' % path,
1230 1230 stacklevel=3,
1231 1231 config=b'check-locks',
1232 1232 )
1233 1233 return ret
1234 1234
1235 1235 return checkvfs
1236 1236
1237 1237 def _getsvfsward(self, origfunc):
1238 1238 """build a ward for self.svfs"""
1239 1239 rref = weakref.ref(self)
1240 1240
1241 1241 def checksvfs(path, mode=None):
1242 1242 ret = origfunc(path, mode=mode)
1243 1243 repo = rref()
1244 1244 if repo is None or not util.safehasattr(repo, b'_lockref'):
1245 1245 return
1246 1246 if mode in (None, b'r', b'rb'):
1247 1247 return
1248 1248 if path.startswith(repo.sharedpath):
1249 1249 # truncate name relative to the repository (.hg)
1250 1250 path = path[len(repo.sharedpath) + 1 :]
1251 1251 if repo._currentlock(repo._lockref) is None:
1252 1252 repo.ui.develwarn(
1253 1253 b'write with no lock: "%s"' % path, stacklevel=4
1254 1254 )
1255 1255 return ret
1256 1256
1257 1257 return checksvfs
1258 1258
1259 1259 def close(self):
1260 1260 self._writecaches()
1261 1261
1262 1262 def _writecaches(self):
1263 1263 if self._revbranchcache:
1264 1264 self._revbranchcache.write()
1265 1265
1266 1266 def _restrictcapabilities(self, caps):
1267 1267 if self.ui.configbool(b'experimental', b'bundle2-advertise'):
1268 1268 caps = set(caps)
1269 1269 capsblob = bundle2.encodecaps(
1270 1270 bundle2.getrepocaps(self, role=b'client')
1271 1271 )
1272 1272 caps.add(b'bundle2=' + urlreq.quote(capsblob))
1273 1273 return caps
1274 1274
1275 1275 def _writerequirements(self):
1276 1276 scmutil.writerequires(self.vfs, self.requirements)
1277 1277
1278 1278 # Don't cache auditor/nofsauditor, or you'll end up with reference cycle:
1279 1279 # self -> auditor -> self._checknested -> self
1280 1280
1281 1281 @property
1282 1282 def auditor(self):
1283 1283 # This is only used by context.workingctx.match in order to
1284 1284 # detect files in subrepos.
1285 1285 return pathutil.pathauditor(self.root, callback=self._checknested)
1286 1286
1287 1287 @property
1288 1288 def nofsauditor(self):
1289 1289 # This is only used by context.basectx.match in order to detect
1290 1290 # files in subrepos.
1291 1291 return pathutil.pathauditor(
1292 1292 self.root, callback=self._checknested, realfs=False, cached=True
1293 1293 )
1294 1294
1295 1295 def _checknested(self, path):
1296 1296 """Determine if path is a legal nested repository."""
1297 1297 if not path.startswith(self.root):
1298 1298 return False
1299 1299 subpath = path[len(self.root) + 1 :]
1300 1300 normsubpath = util.pconvert(subpath)
1301 1301
1302 1302 # XXX: Checking against the current working copy is wrong in
1303 1303 # the sense that it can reject things like
1304 1304 #
1305 1305 # $ hg cat -r 10 sub/x.txt
1306 1306 #
1307 1307 # if sub/ is no longer a subrepository in the working copy
1308 1308 # parent revision.
1309 1309 #
1310 1310 # However, it can of course also allow things that would have
1311 1311 # been rejected before, such as the above cat command if sub/
1312 1312 # is a subrepository now, but was a normal directory before.
1313 1313 # The old path auditor would have rejected by mistake since it
1314 1314 # panics when it sees sub/.hg/.
1315 1315 #
1316 1316 # All in all, checking against the working copy seems sensible
1317 1317 # since we want to prevent access to nested repositories on
1318 1318 # the filesystem *now*.
1319 1319 ctx = self[None]
1320 1320 parts = util.splitpath(subpath)
1321 1321 while parts:
1322 1322 prefix = b'/'.join(parts)
1323 1323 if prefix in ctx.substate:
1324 1324 if prefix == normsubpath:
1325 1325 return True
1326 1326 else:
1327 1327 sub = ctx.sub(prefix)
1328 1328 return sub.checknested(subpath[len(prefix) + 1 :])
1329 1329 else:
1330 1330 parts.pop()
1331 1331 return False
1332 1332
1333 1333 def peer(self):
1334 1334 return localpeer(self) # not cached to avoid reference cycle
1335 1335
1336 1336 def unfiltered(self):
1337 1337 """Return unfiltered version of the repository
1338 1338
1339 1339 Intended to be overwritten by filtered repo."""
1340 1340 return self
1341 1341
1342 1342 def filtered(self, name, visibilityexceptions=None):
1343 1343 """Return a filtered version of a repository
1344 1344
1345 1345 The `name` parameter is the identifier of the requested view. This
1346 1346 will return a repoview object set "exactly" to the specified view.
1347 1347
1348 1348 This function does not apply recursive filtering to a repository. For
1349 1349 example calling `repo.filtered("served")` will return a repoview using
1350 1350 the "served" view, regardless of the initial view used by `repo`.
1351 1351
1352 1352 In other word, there is always only one level of `repoview` "filtering".
1353 1353 """
1354 1354 if self._extrafilterid is not None and b'%' not in name:
1355 1355 name = name + b'%' + self._extrafilterid
1356 1356
1357 1357 cls = repoview.newtype(self.unfiltered().__class__)
1358 1358 return cls(self, name, visibilityexceptions)
1359 1359
1360 1360 @mixedrepostorecache(
1361 1361 (b'bookmarks', b'plain'),
1362 1362 (b'bookmarks.current', b'plain'),
1363 1363 (b'bookmarks', b''),
1364 1364 (b'00changelog.i', b''),
1365 1365 )
1366 1366 def _bookmarks(self):
1367 1367 # Since the multiple files involved in the transaction cannot be
1368 1368 # written atomically (with current repository format), there is a race
1369 1369 # condition here.
1370 1370 #
1371 1371 # 1) changelog content A is read
1372 1372 # 2) outside transaction update changelog to content B
1373 1373 # 3) outside transaction update bookmark file referring to content B
1374 1374 # 4) bookmarks file content is read and filtered against changelog-A
1375 1375 #
1376 1376 # When this happens, bookmarks against nodes missing from A are dropped.
1377 1377 #
1378 1378 # Having this happening during read is not great, but it become worse
1379 1379 # when this happen during write because the bookmarks to the "unknown"
1380 1380 # nodes will be dropped for good. However, writes happen within locks.
1381 1381 # This locking makes it possible to have a race free consistent read.
1382 1382 # For this purpose data read from disc before locking are
1383 1383 # "invalidated" right after the locks are taken. This invalidations are
1384 1384 # "light", the `filecache` mechanism keep the data in memory and will
1385 1385 # reuse them if the underlying files did not changed. Not parsing the
1386 1386 # same data multiple times helps performances.
1387 1387 #
1388 1388 # Unfortunately in the case describe above, the files tracked by the
1389 1389 # bookmarks file cache might not have changed, but the in-memory
1390 1390 # content is still "wrong" because we used an older changelog content
1391 1391 # to process the on-disk data. So after locking, the changelog would be
1392 1392 # refreshed but `_bookmarks` would be preserved.
1393 1393 # Adding `00changelog.i` to the list of tracked file is not
1394 1394 # enough, because at the time we build the content for `_bookmarks` in
1395 1395 # (4), the changelog file has already diverged from the content used
1396 1396 # for loading `changelog` in (1)
1397 1397 #
1398 1398 # To prevent the issue, we force the changelog to be explicitly
1399 1399 # reloaded while computing `_bookmarks`. The data race can still happen
1400 1400 # without the lock (with a narrower window), but it would no longer go
1401 1401 # undetected during the lock time refresh.
1402 1402 #
1403 1403 # The new schedule is as follow
1404 1404 #
1405 1405 # 1) filecache logic detect that `_bookmarks` needs to be computed
1406 1406 # 2) cachestat for `bookmarks` and `changelog` are captured (for book)
1407 1407 # 3) We force `changelog` filecache to be tested
1408 1408 # 4) cachestat for `changelog` are captured (for changelog)
1409 1409 # 5) `_bookmarks` is computed and cached
1410 1410 #
1411 1411 # The step in (3) ensure we have a changelog at least as recent as the
1412 1412 # cache stat computed in (1). As a result at locking time:
1413 1413 # * if the changelog did not changed since (1) -> we can reuse the data
1414 1414 # * otherwise -> the bookmarks get refreshed.
1415 1415 self._refreshchangelog()
1416 1416 return bookmarks.bmstore(self)
1417 1417
1418 1418 def _refreshchangelog(self):
1419 1419 """make sure the in memory changelog match the on-disk one"""
1420 1420 if 'changelog' in vars(self) and self.currenttransaction() is None:
1421 1421 del self.changelog
1422 1422
1423 1423 @property
1424 1424 def _activebookmark(self):
1425 1425 return self._bookmarks.active
1426 1426
1427 1427 # _phasesets depend on changelog. what we need is to call
1428 1428 # _phasecache.invalidate() if '00changelog.i' was changed, but it
1429 1429 # can't be easily expressed in filecache mechanism.
1430 1430 @storecache(b'phaseroots', b'00changelog.i')
1431 1431 def _phasecache(self):
1432 1432 return phases.phasecache(self, self._phasedefaults)
1433 1433
1434 1434 @storecache(b'obsstore')
1435 1435 def obsstore(self):
1436 1436 return obsolete.makestore(self.ui, self)
1437 1437
1438 1438 @storecache(b'00changelog.i')
1439 1439 def changelog(self):
1440 1440 return self.store.changelog(txnutil.mayhavepending(self.root))
1441 1441
1442 1442 @storecache(b'00manifest.i')
1443 1443 def manifestlog(self):
1444 1444 return self.store.manifestlog(self, self._storenarrowmatch)
1445 1445
1446 1446 @repofilecache(b'dirstate')
1447 1447 def dirstate(self):
1448 1448 return self._makedirstate()
1449 1449
1450 1450 def _makedirstate(self):
1451 1451 """Extension point for wrapping the dirstate per-repo."""
1452 1452 sparsematchfn = lambda: sparse.matcher(self)
1453 1453
1454 1454 return dirstate.dirstate(
1455 1455 self.vfs, self.ui, self.root, self._dirstatevalidate, sparsematchfn
1456 1456 )
1457 1457
1458 1458 def _dirstatevalidate(self, node):
1459 1459 try:
1460 1460 self.changelog.rev(node)
1461 1461 return node
1462 1462 except error.LookupError:
1463 1463 if not self._dirstatevalidatewarned:
1464 1464 self._dirstatevalidatewarned = True
1465 1465 self.ui.warn(
1466 1466 _(b"warning: ignoring unknown working parent %s!\n")
1467 1467 % short(node)
1468 1468 )
1469 1469 return nullid
1470 1470
1471 1471 @storecache(narrowspec.FILENAME)
1472 1472 def narrowpats(self):
1473 1473 """matcher patterns for this repository's narrowspec
1474 1474
1475 1475 A tuple of (includes, excludes).
1476 1476 """
1477 1477 return narrowspec.load(self)
1478 1478
1479 1479 @storecache(narrowspec.FILENAME)
1480 1480 def _storenarrowmatch(self):
1481 1481 if repository.NARROW_REQUIREMENT not in self.requirements:
1482 1482 return matchmod.always()
1483 1483 include, exclude = self.narrowpats
1484 1484 return narrowspec.match(self.root, include=include, exclude=exclude)
1485 1485
1486 1486 @storecache(narrowspec.FILENAME)
1487 1487 def _narrowmatch(self):
1488 1488 if repository.NARROW_REQUIREMENT not in self.requirements:
1489 1489 return matchmod.always()
1490 1490 narrowspec.checkworkingcopynarrowspec(self)
1491 1491 include, exclude = self.narrowpats
1492 1492 return narrowspec.match(self.root, include=include, exclude=exclude)
1493 1493
1494 1494 def narrowmatch(self, match=None, includeexact=False):
1495 1495 """matcher corresponding the the repo's narrowspec
1496 1496
1497 1497 If `match` is given, then that will be intersected with the narrow
1498 1498 matcher.
1499 1499
1500 1500 If `includeexact` is True, then any exact matches from `match` will
1501 1501 be included even if they're outside the narrowspec.
1502 1502 """
1503 1503 if match:
1504 1504 if includeexact and not self._narrowmatch.always():
1505 1505 # do not exclude explicitly-specified paths so that they can
1506 1506 # be warned later on
1507 1507 em = matchmod.exact(match.files())
1508 1508 nm = matchmod.unionmatcher([self._narrowmatch, em])
1509 1509 return matchmod.intersectmatchers(match, nm)
1510 1510 return matchmod.intersectmatchers(match, self._narrowmatch)
1511 1511 return self._narrowmatch
1512 1512
1513 1513 def setnarrowpats(self, newincludes, newexcludes):
1514 1514 narrowspec.save(self, newincludes, newexcludes)
1515 1515 self.invalidate(clearfilecache=True)
1516 1516
1517 1517 @util.propertycache
1518 1518 def _quick_access_changeid(self):
1519 1519 """an helper dictionnary for __getitem__ calls
1520 1520
1521 1521 This contains a list of symbol we can recognise right away without
1522 1522 further processing.
1523 1523 """
1524 1524 return {
1525 1525 b'null': (nullrev, nullid),
1526 1526 nullrev: (nullrev, nullid),
1527 1527 nullid: (nullrev, nullid),
1528 1528 }
1529 1529
1530 1530 def __getitem__(self, changeid):
1531 1531 # dealing with special cases
1532 1532 if changeid is None:
1533 1533 return context.workingctx(self)
1534 1534 if isinstance(changeid, context.basectx):
1535 1535 return changeid
1536 1536
1537 1537 # dealing with multiple revisions
1538 1538 if isinstance(changeid, slice):
1539 1539 # wdirrev isn't contiguous so the slice shouldn't include it
1540 1540 return [
1541 1541 self[i]
1542 1542 for i in pycompat.xrange(*changeid.indices(len(self)))
1543 1543 if i not in self.changelog.filteredrevs
1544 1544 ]
1545 1545
1546 1546 # dealing with some special values
1547 1547 quick_access = self._quick_access_changeid.get(changeid)
1548 1548 if quick_access is not None:
1549 1549 rev, node = quick_access
1550 1550 return context.changectx(self, rev, node, maybe_filtered=False)
1551 1551 if changeid == b'tip':
1552 1552 node = self.changelog.tip()
1553 1553 rev = self.changelog.rev(node)
1554 1554 return context.changectx(self, rev, node)
1555 1555
1556 1556 # dealing with arbitrary values
1557 1557 try:
1558 1558 if isinstance(changeid, int):
1559 1559 node = self.changelog.node(changeid)
1560 1560 rev = changeid
1561 1561 elif changeid == b'.':
1562 1562 # this is a hack to delay/avoid loading obsmarkers
1563 1563 # when we know that '.' won't be hidden
1564 1564 node = self.dirstate.p1()
1565 1565 rev = self.unfiltered().changelog.rev(node)
1566 1566 elif len(changeid) == 20:
1567 1567 try:
1568 1568 node = changeid
1569 1569 rev = self.changelog.rev(changeid)
1570 1570 except error.FilteredLookupError:
1571 1571 changeid = hex(changeid) # for the error message
1572 1572 raise
1573 1573 except LookupError:
1574 1574 # check if it might have come from damaged dirstate
1575 1575 #
1576 1576 # XXX we could avoid the unfiltered if we had a recognizable
1577 1577 # exception for filtered changeset access
1578 1578 if (
1579 1579 self.local()
1580 1580 and changeid in self.unfiltered().dirstate.parents()
1581 1581 ):
1582 1582 msg = _(b"working directory has unknown parent '%s'!")
1583 1583 raise error.Abort(msg % short(changeid))
1584 1584 changeid = hex(changeid) # for the error message
1585 1585 raise
1586 1586
1587 1587 elif len(changeid) == 40:
1588 1588 node = bin(changeid)
1589 1589 rev = self.changelog.rev(node)
1590 1590 else:
1591 1591 raise error.ProgrammingError(
1592 1592 b"unsupported changeid '%s' of type %s"
1593 1593 % (changeid, pycompat.bytestr(type(changeid)))
1594 1594 )
1595 1595
1596 1596 return context.changectx(self, rev, node)
1597 1597
1598 1598 except (error.FilteredIndexError, error.FilteredLookupError):
1599 1599 raise error.FilteredRepoLookupError(
1600 1600 _(b"filtered revision '%s'") % pycompat.bytestr(changeid)
1601 1601 )
1602 1602 except (IndexError, LookupError):
1603 1603 raise error.RepoLookupError(
1604 1604 _(b"unknown revision '%s'") % pycompat.bytestr(changeid)
1605 1605 )
1606 1606 except error.WdirUnsupported:
1607 1607 return context.workingctx(self)
1608 1608
1609 1609 def __contains__(self, changeid):
1610 1610 """True if the given changeid exists
1611 1611
1612 1612 error.AmbiguousPrefixLookupError is raised if an ambiguous node
1613 1613 specified.
1614 1614 """
1615 1615 try:
1616 1616 self[changeid]
1617 1617 return True
1618 1618 except error.RepoLookupError:
1619 1619 return False
1620 1620
1621 1621 def __nonzero__(self):
1622 1622 return True
1623 1623
1624 1624 __bool__ = __nonzero__
1625 1625
1626 1626 def __len__(self):
1627 1627 # no need to pay the cost of repoview.changelog
1628 1628 unfi = self.unfiltered()
1629 1629 return len(unfi.changelog)
1630 1630
1631 1631 def __iter__(self):
1632 1632 return iter(self.changelog)
1633 1633
1634 1634 def revs(self, expr, *args):
1635 1635 '''Find revisions matching a revset.
1636 1636
1637 1637 The revset is specified as a string ``expr`` that may contain
1638 1638 %-formatting to escape certain types. See ``revsetlang.formatspec``.
1639 1639
1640 1640 Revset aliases from the configuration are not expanded. To expand
1641 1641 user aliases, consider calling ``scmutil.revrange()`` or
1642 1642 ``repo.anyrevs([expr], user=True)``.
1643 1643
1644 1644 Returns a revset.abstractsmartset, which is a list-like interface
1645 1645 that contains integer revisions.
1646 1646 '''
1647 1647 tree = revsetlang.spectree(expr, *args)
1648 1648 return revset.makematcher(tree)(self)
1649 1649
1650 1650 def set(self, expr, *args):
1651 1651 '''Find revisions matching a revset and emit changectx instances.
1652 1652
1653 1653 This is a convenience wrapper around ``revs()`` that iterates the
1654 1654 result and is a generator of changectx instances.
1655 1655
1656 1656 Revset aliases from the configuration are not expanded. To expand
1657 1657 user aliases, consider calling ``scmutil.revrange()``.
1658 1658 '''
1659 1659 for r in self.revs(expr, *args):
1660 1660 yield self[r]
1661 1661
1662 1662 def anyrevs(self, specs, user=False, localalias=None):
1663 1663 '''Find revisions matching one of the given revsets.
1664 1664
1665 1665 Revset aliases from the configuration are not expanded by default. To
1666 1666 expand user aliases, specify ``user=True``. To provide some local
1667 1667 definitions overriding user aliases, set ``localalias`` to
1668 1668 ``{name: definitionstring}``.
1669 1669 '''
1670 1670 if specs == [b'null']:
1671 1671 return revset.baseset([nullrev])
1672 1672 if user:
1673 1673 m = revset.matchany(
1674 1674 self.ui,
1675 1675 specs,
1676 1676 lookup=revset.lookupfn(self),
1677 1677 localalias=localalias,
1678 1678 )
1679 1679 else:
1680 1680 m = revset.matchany(None, specs, localalias=localalias)
1681 1681 return m(self)
1682 1682
1683 1683 def url(self):
1684 1684 return b'file:' + self.root
1685 1685
1686 1686 def hook(self, name, throw=False, **args):
1687 1687 """Call a hook, passing this repo instance.
1688 1688
1689 1689 This a convenience method to aid invoking hooks. Extensions likely
1690 1690 won't call this unless they have registered a custom hook or are
1691 1691 replacing code that is expected to call a hook.
1692 1692 """
1693 1693 return hook.hook(self.ui, self, name, throw, **args)
1694 1694
1695 1695 @filteredpropertycache
1696 1696 def _tagscache(self):
1697 1697 '''Returns a tagscache object that contains various tags related
1698 1698 caches.'''
1699 1699
1700 1700 # This simplifies its cache management by having one decorated
1701 1701 # function (this one) and the rest simply fetch things from it.
1702 1702 class tagscache(object):
1703 1703 def __init__(self):
1704 1704 # These two define the set of tags for this repository. tags
1705 1705 # maps tag name to node; tagtypes maps tag name to 'global' or
1706 1706 # 'local'. (Global tags are defined by .hgtags across all
1707 1707 # heads, and local tags are defined in .hg/localtags.)
1708 1708 # They constitute the in-memory cache of tags.
1709 1709 self.tags = self.tagtypes = None
1710 1710
1711 1711 self.nodetagscache = self.tagslist = None
1712 1712
1713 1713 cache = tagscache()
1714 1714 cache.tags, cache.tagtypes = self._findtags()
1715 1715
1716 1716 return cache
1717 1717
1718 1718 def tags(self):
1719 1719 '''return a mapping of tag to node'''
1720 1720 t = {}
1721 1721 if self.changelog.filteredrevs:
1722 1722 tags, tt = self._findtags()
1723 1723 else:
1724 1724 tags = self._tagscache.tags
1725 1725 rev = self.changelog.rev
1726 1726 for k, v in pycompat.iteritems(tags):
1727 1727 try:
1728 1728 # ignore tags to unknown nodes
1729 1729 rev(v)
1730 1730 t[k] = v
1731 1731 except (error.LookupError, ValueError):
1732 1732 pass
1733 1733 return t
1734 1734
1735 1735 def _findtags(self):
1736 1736 '''Do the hard work of finding tags. Return a pair of dicts
1737 1737 (tags, tagtypes) where tags maps tag name to node, and tagtypes
1738 1738 maps tag name to a string like \'global\' or \'local\'.
1739 1739 Subclasses or extensions are free to add their own tags, but
1740 1740 should be aware that the returned dicts will be retained for the
1741 1741 duration of the localrepo object.'''
1742 1742
1743 1743 # XXX what tagtype should subclasses/extensions use? Currently
1744 1744 # mq and bookmarks add tags, but do not set the tagtype at all.
1745 1745 # Should each extension invent its own tag type? Should there
1746 1746 # be one tagtype for all such "virtual" tags? Or is the status
1747 1747 # quo fine?
1748 1748
1749 1749 # map tag name to (node, hist)
1750 1750 alltags = tagsmod.findglobaltags(self.ui, self)
1751 1751 # map tag name to tag type
1752 1752 tagtypes = dict((tag, b'global') for tag in alltags)
1753 1753
1754 1754 tagsmod.readlocaltags(self.ui, self, alltags, tagtypes)
1755 1755
1756 1756 # Build the return dicts. Have to re-encode tag names because
1757 1757 # the tags module always uses UTF-8 (in order not to lose info
1758 1758 # writing to the cache), but the rest of Mercurial wants them in
1759 1759 # local encoding.
1760 1760 tags = {}
1761 1761 for (name, (node, hist)) in pycompat.iteritems(alltags):
1762 1762 if node != nullid:
1763 1763 tags[encoding.tolocal(name)] = node
1764 1764 tags[b'tip'] = self.changelog.tip()
1765 1765 tagtypes = dict(
1766 1766 [
1767 1767 (encoding.tolocal(name), value)
1768 1768 for (name, value) in pycompat.iteritems(tagtypes)
1769 1769 ]
1770 1770 )
1771 1771 return (tags, tagtypes)
1772 1772
1773 1773 def tagtype(self, tagname):
1774 1774 '''
1775 1775 return the type of the given tag. result can be:
1776 1776
1777 1777 'local' : a local tag
1778 1778 'global' : a global tag
1779 1779 None : tag does not exist
1780 1780 '''
1781 1781
1782 1782 return self._tagscache.tagtypes.get(tagname)
1783 1783
1784 1784 def tagslist(self):
1785 1785 '''return a list of tags ordered by revision'''
1786 1786 if not self._tagscache.tagslist:
1787 1787 l = []
1788 1788 for t, n in pycompat.iteritems(self.tags()):
1789 1789 l.append((self.changelog.rev(n), t, n))
1790 1790 self._tagscache.tagslist = [(t, n) for r, t, n in sorted(l)]
1791 1791
1792 1792 return self._tagscache.tagslist
1793 1793
1794 1794 def nodetags(self, node):
1795 1795 '''return the tags associated with a node'''
1796 1796 if not self._tagscache.nodetagscache:
1797 1797 nodetagscache = {}
1798 1798 for t, n in pycompat.iteritems(self._tagscache.tags):
1799 1799 nodetagscache.setdefault(n, []).append(t)
1800 1800 for tags in pycompat.itervalues(nodetagscache):
1801 1801 tags.sort()
1802 1802 self._tagscache.nodetagscache = nodetagscache
1803 1803 return self._tagscache.nodetagscache.get(node, [])
1804 1804
1805 1805 def nodebookmarks(self, node):
1806 1806 """return the list of bookmarks pointing to the specified node"""
1807 1807 return self._bookmarks.names(node)
1808 1808
1809 1809 def branchmap(self):
1810 1810 '''returns a dictionary {branch: [branchheads]} with branchheads
1811 1811 ordered by increasing revision number'''
1812 1812 return self._branchcaches[self]
1813 1813
1814 1814 @unfilteredmethod
1815 1815 def revbranchcache(self):
1816 1816 if not self._revbranchcache:
1817 1817 self._revbranchcache = branchmap.revbranchcache(self.unfiltered())
1818 1818 return self._revbranchcache
1819 1819
1820 1820 def branchtip(self, branch, ignoremissing=False):
1821 1821 '''return the tip node for a given branch
1822 1822
1823 1823 If ignoremissing is True, then this method will not raise an error.
1824 1824 This is helpful for callers that only expect None for a missing branch
1825 1825 (e.g. namespace).
1826 1826
1827 1827 '''
1828 1828 try:
1829 1829 return self.branchmap().branchtip(branch)
1830 1830 except KeyError:
1831 1831 if not ignoremissing:
1832 1832 raise error.RepoLookupError(_(b"unknown branch '%s'") % branch)
1833 1833 else:
1834 1834 pass
1835 1835
1836 1836 def lookup(self, key):
1837 1837 node = scmutil.revsymbol(self, key).node()
1838 1838 if node is None:
1839 1839 raise error.RepoLookupError(_(b"unknown revision '%s'") % key)
1840 1840 return node
1841 1841
1842 1842 def lookupbranch(self, key):
1843 1843 if self.branchmap().hasbranch(key):
1844 1844 return key
1845 1845
1846 1846 return scmutil.revsymbol(self, key).branch()
1847 1847
1848 1848 def known(self, nodes):
1849 1849 cl = self.changelog
1850 1850 get_rev = cl.index.get_rev
1851 1851 filtered = cl.filteredrevs
1852 1852 result = []
1853 1853 for n in nodes:
1854 1854 r = get_rev(n)
1855 1855 resp = not (r is None or r in filtered)
1856 1856 result.append(resp)
1857 1857 return result
1858 1858
1859 1859 def local(self):
1860 1860 return self
1861 1861
1862 1862 def publishing(self):
1863 1863 # it's safe (and desirable) to trust the publish flag unconditionally
1864 1864 # so that we don't finalize changes shared between users via ssh or nfs
1865 1865 return self.ui.configbool(b'phases', b'publish', untrusted=True)
1866 1866
1867 1867 def cancopy(self):
1868 1868 # so statichttprepo's override of local() works
1869 1869 if not self.local():
1870 1870 return False
1871 1871 if not self.publishing():
1872 1872 return True
1873 1873 # if publishing we can't copy if there is filtered content
1874 1874 return not self.filtered(b'visible').changelog.filteredrevs
1875 1875
1876 1876 def shared(self):
1877 1877 '''the type of shared repository (None if not shared)'''
1878 1878 if self.sharedpath != self.path:
1879 1879 return b'store'
1880 1880 return None
1881 1881
1882 1882 def wjoin(self, f, *insidef):
1883 1883 return self.vfs.reljoin(self.root, f, *insidef)
1884 1884
1885 1885 def setparents(self, p1, p2=nullid):
1886 1886 with self.dirstate.parentchange():
1887 1887 copies = self.dirstate.setparents(p1, p2)
1888 1888 pctx = self[p1]
1889 1889 if copies:
1890 1890 # Adjust copy records, the dirstate cannot do it, it
1891 1891 # requires access to parents manifests. Preserve them
1892 1892 # only for entries added to first parent.
1893 1893 for f in copies:
1894 1894 if f not in pctx and copies[f] in pctx:
1895 1895 self.dirstate.copy(copies[f], f)
1896 1896 if p2 == nullid:
1897 1897 for f, s in sorted(self.dirstate.copies().items()):
1898 1898 if f not in pctx and s not in pctx:
1899 1899 self.dirstate.copy(None, f)
1900 1900
1901 1901 def filectx(self, path, changeid=None, fileid=None, changectx=None):
1902 1902 """changeid must be a changeset revision, if specified.
1903 1903 fileid can be a file revision or node."""
1904 1904 return context.filectx(
1905 1905 self, path, changeid, fileid, changectx=changectx
1906 1906 )
1907 1907
1908 1908 def getcwd(self):
1909 1909 return self.dirstate.getcwd()
1910 1910
1911 1911 def pathto(self, f, cwd=None):
1912 1912 return self.dirstate.pathto(f, cwd)
1913 1913
1914 1914 def _loadfilter(self, filter):
1915 1915 if filter not in self._filterpats:
1916 1916 l = []
1917 1917 for pat, cmd in self.ui.configitems(filter):
1918 1918 if cmd == b'!':
1919 1919 continue
1920 1920 mf = matchmod.match(self.root, b'', [pat])
1921 1921 fn = None
1922 1922 params = cmd
1923 1923 for name, filterfn in pycompat.iteritems(self._datafilters):
1924 1924 if cmd.startswith(name):
1925 1925 fn = filterfn
1926 1926 params = cmd[len(name) :].lstrip()
1927 1927 break
1928 1928 if not fn:
1929 1929 fn = lambda s, c, **kwargs: procutil.filter(s, c)
1930 1930 fn.__name__ = 'commandfilter'
1931 1931 # Wrap old filters not supporting keyword arguments
1932 1932 if not pycompat.getargspec(fn)[2]:
1933 1933 oldfn = fn
1934 1934 fn = lambda s, c, oldfn=oldfn, **kwargs: oldfn(s, c)
1935 1935 fn.__name__ = 'compat-' + oldfn.__name__
1936 1936 l.append((mf, fn, params))
1937 1937 self._filterpats[filter] = l
1938 1938 return self._filterpats[filter]
1939 1939
1940 1940 def _filter(self, filterpats, filename, data):
1941 1941 for mf, fn, cmd in filterpats:
1942 1942 if mf(filename):
1943 1943 self.ui.debug(
1944 1944 b"filtering %s through %s\n"
1945 1945 % (filename, cmd or pycompat.sysbytes(fn.__name__))
1946 1946 )
1947 1947 data = fn(data, cmd, ui=self.ui, repo=self, filename=filename)
1948 1948 break
1949 1949
1950 1950 return data
1951 1951
1952 1952 @unfilteredpropertycache
1953 1953 def _encodefilterpats(self):
1954 1954 return self._loadfilter(b'encode')
1955 1955
1956 1956 @unfilteredpropertycache
1957 1957 def _decodefilterpats(self):
1958 1958 return self._loadfilter(b'decode')
1959 1959
1960 1960 def adddatafilter(self, name, filter):
1961 1961 self._datafilters[name] = filter
1962 1962
1963 1963 def wread(self, filename):
1964 1964 if self.wvfs.islink(filename):
1965 1965 data = self.wvfs.readlink(filename)
1966 1966 else:
1967 1967 data = self.wvfs.read(filename)
1968 1968 return self._filter(self._encodefilterpats, filename, data)
1969 1969
1970 1970 def wwrite(self, filename, data, flags, backgroundclose=False, **kwargs):
1971 1971 """write ``data`` into ``filename`` in the working directory
1972 1972
1973 1973 This returns length of written (maybe decoded) data.
1974 1974 """
1975 1975 data = self._filter(self._decodefilterpats, filename, data)
1976 1976 if b'l' in flags:
1977 1977 self.wvfs.symlink(data, filename)
1978 1978 else:
1979 1979 self.wvfs.write(
1980 1980 filename, data, backgroundclose=backgroundclose, **kwargs
1981 1981 )
1982 1982 if b'x' in flags:
1983 1983 self.wvfs.setflags(filename, False, True)
1984 1984 else:
1985 1985 self.wvfs.setflags(filename, False, False)
1986 1986 return len(data)
1987 1987
1988 1988 def wwritedata(self, filename, data):
1989 1989 return self._filter(self._decodefilterpats, filename, data)
1990 1990
1991 1991 def currenttransaction(self):
1992 1992 """return the current transaction or None if non exists"""
1993 1993 if self._transref:
1994 1994 tr = self._transref()
1995 1995 else:
1996 1996 tr = None
1997 1997
1998 1998 if tr and tr.running():
1999 1999 return tr
2000 2000 return None
2001 2001
2002 2002 def transaction(self, desc, report=None):
2003 2003 if self.ui.configbool(b'devel', b'all-warnings') or self.ui.configbool(
2004 2004 b'devel', b'check-locks'
2005 2005 ):
2006 2006 if self._currentlock(self._lockref) is None:
2007 2007 raise error.ProgrammingError(b'transaction requires locking')
2008 2008 tr = self.currenttransaction()
2009 2009 if tr is not None:
2010 2010 return tr.nest(name=desc)
2011 2011
2012 2012 # abort here if the journal already exists
2013 2013 if self.svfs.exists(b"journal"):
2014 2014 raise error.RepoError(
2015 2015 _(b"abandoned transaction found"),
2016 2016 hint=_(b"run 'hg recover' to clean up transaction"),
2017 2017 )
2018 2018
2019 2019 idbase = b"%.40f#%f" % (random.random(), time.time())
2020 2020 ha = hex(hashlib.sha1(idbase).digest())
2021 2021 txnid = b'TXN:' + ha
2022 2022 self.hook(b'pretxnopen', throw=True, txnname=desc, txnid=txnid)
2023 2023
2024 2024 self._writejournal(desc)
2025 2025 renames = [(vfs, x, undoname(x)) for vfs, x in self._journalfiles()]
2026 2026 if report:
2027 2027 rp = report
2028 2028 else:
2029 2029 rp = self.ui.warn
2030 2030 vfsmap = {b'plain': self.vfs, b'store': self.svfs} # root of .hg/
2031 2031 # we must avoid cyclic reference between repo and transaction.
2032 2032 reporef = weakref.ref(self)
2033 2033 # Code to track tag movement
2034 2034 #
2035 2035 # Since tags are all handled as file content, it is actually quite hard
2036 2036 # to track these movement from a code perspective. So we fallback to a
2037 2037 # tracking at the repository level. One could envision to track changes
2038 2038 # to the '.hgtags' file through changegroup apply but that fails to
2039 2039 # cope with case where transaction expose new heads without changegroup
2040 2040 # being involved (eg: phase movement).
2041 2041 #
2042 2042 # For now, We gate the feature behind a flag since this likely comes
2043 2043 # with performance impacts. The current code run more often than needed
2044 2044 # and do not use caches as much as it could. The current focus is on
2045 2045 # the behavior of the feature so we disable it by default. The flag
2046 2046 # will be removed when we are happy with the performance impact.
2047 2047 #
2048 2048 # Once this feature is no longer experimental move the following
2049 2049 # documentation to the appropriate help section:
2050 2050 #
2051 2051 # The ``HG_TAG_MOVED`` variable will be set if the transaction touched
2052 2052 # tags (new or changed or deleted tags). In addition the details of
2053 2053 # these changes are made available in a file at:
2054 2054 # ``REPOROOT/.hg/changes/tags.changes``.
2055 2055 # Make sure you check for HG_TAG_MOVED before reading that file as it
2056 2056 # might exist from a previous transaction even if no tag were touched
2057 2057 # in this one. Changes are recorded in a line base format::
2058 2058 #
2059 2059 # <action> <hex-node> <tag-name>\n
2060 2060 #
2061 2061 # Actions are defined as follow:
2062 2062 # "-R": tag is removed,
2063 2063 # "+A": tag is added,
2064 2064 # "-M": tag is moved (old value),
2065 2065 # "+M": tag is moved (new value),
2066 2066 tracktags = lambda x: None
2067 2067 # experimental config: experimental.hook-track-tags
2068 2068 shouldtracktags = self.ui.configbool(
2069 2069 b'experimental', b'hook-track-tags'
2070 2070 )
2071 2071 if desc != b'strip' and shouldtracktags:
2072 2072 oldheads = self.changelog.headrevs()
2073 2073
2074 2074 def tracktags(tr2):
2075 2075 repo = reporef()
2076 2076 oldfnodes = tagsmod.fnoderevs(repo.ui, repo, oldheads)
2077 2077 newheads = repo.changelog.headrevs()
2078 2078 newfnodes = tagsmod.fnoderevs(repo.ui, repo, newheads)
2079 2079 # notes: we compare lists here.
2080 2080 # As we do it only once buiding set would not be cheaper
2081 2081 changes = tagsmod.difftags(repo.ui, repo, oldfnodes, newfnodes)
2082 2082 if changes:
2083 2083 tr2.hookargs[b'tag_moved'] = b'1'
2084 2084 with repo.vfs(
2085 2085 b'changes/tags.changes', b'w', atomictemp=True
2086 2086 ) as changesfile:
2087 2087 # note: we do not register the file to the transaction
2088 2088 # because we needs it to still exist on the transaction
2089 2089 # is close (for txnclose hooks)
2090 2090 tagsmod.writediff(changesfile, changes)
2091 2091
2092 2092 def validate(tr2):
2093 2093 """will run pre-closing hooks"""
2094 2094 # XXX the transaction API is a bit lacking here so we take a hacky
2095 2095 # path for now
2096 2096 #
2097 2097 # We cannot add this as a "pending" hooks since the 'tr.hookargs'
2098 2098 # dict is copied before these run. In addition we needs the data
2099 2099 # available to in memory hooks too.
2100 2100 #
2101 2101 # Moreover, we also need to make sure this runs before txnclose
2102 2102 # hooks and there is no "pending" mechanism that would execute
2103 2103 # logic only if hooks are about to run.
2104 2104 #
2105 2105 # Fixing this limitation of the transaction is also needed to track
2106 2106 # other families of changes (bookmarks, phases, obsolescence).
2107 2107 #
2108 2108 # This will have to be fixed before we remove the experimental
2109 2109 # gating.
2110 2110 tracktags(tr2)
2111 2111 repo = reporef()
2112 2112
2113 2113 singleheadopt = (b'experimental', b'single-head-per-branch')
2114 2114 singlehead = repo.ui.configbool(*singleheadopt)
2115 2115 if singlehead:
2116 2116 singleheadsub = repo.ui.configsuboptions(*singleheadopt)[1]
2117 2117 accountclosed = singleheadsub.get(
2118 2118 b"account-closed-heads", False
2119 2119 )
2120 2120 scmutil.enforcesinglehead(repo, tr2, desc, accountclosed)
2121 2121 if hook.hashook(repo.ui, b'pretxnclose-bookmark'):
2122 2122 for name, (old, new) in sorted(
2123 2123 tr.changes[b'bookmarks'].items()
2124 2124 ):
2125 2125 args = tr.hookargs.copy()
2126 2126 args.update(bookmarks.preparehookargs(name, old, new))
2127 2127 repo.hook(
2128 2128 b'pretxnclose-bookmark',
2129 2129 throw=True,
2130 2130 **pycompat.strkwargs(args)
2131 2131 )
2132 2132 if hook.hashook(repo.ui, b'pretxnclose-phase'):
2133 2133 cl = repo.unfiltered().changelog
2134 2134 for rev, (old, new) in tr.changes[b'phases'].items():
2135 2135 args = tr.hookargs.copy()
2136 2136 node = hex(cl.node(rev))
2137 2137 args.update(phases.preparehookargs(node, old, new))
2138 2138 repo.hook(
2139 2139 b'pretxnclose-phase',
2140 2140 throw=True,
2141 2141 **pycompat.strkwargs(args)
2142 2142 )
2143 2143
2144 2144 repo.hook(
2145 2145 b'pretxnclose', throw=True, **pycompat.strkwargs(tr.hookargs)
2146 2146 )
2147 2147
2148 2148 def releasefn(tr, success):
2149 2149 repo = reporef()
2150 2150 if repo is None:
2151 2151 # If the repo has been GC'd (and this release function is being
2152 2152 # called from transaction.__del__), there's not much we can do,
2153 2153 # so just leave the unfinished transaction there and let the
2154 2154 # user run `hg recover`.
2155 2155 return
2156 2156 if success:
2157 2157 # this should be explicitly invoked here, because
2158 2158 # in-memory changes aren't written out at closing
2159 2159 # transaction, if tr.addfilegenerator (via
2160 2160 # dirstate.write or so) isn't invoked while
2161 2161 # transaction running
2162 2162 repo.dirstate.write(None)
2163 2163 else:
2164 2164 # discard all changes (including ones already written
2165 2165 # out) in this transaction
2166 2166 narrowspec.restorebackup(self, b'journal.narrowspec')
2167 2167 narrowspec.restorewcbackup(self, b'journal.narrowspec.dirstate')
2168 2168 repo.dirstate.restorebackup(None, b'journal.dirstate')
2169 2169
2170 2170 repo.invalidate(clearfilecache=True)
2171 2171
2172 2172 tr = transaction.transaction(
2173 2173 rp,
2174 2174 self.svfs,
2175 2175 vfsmap,
2176 2176 b"journal",
2177 2177 b"undo",
2178 2178 aftertrans(renames),
2179 2179 self.store.createmode,
2180 2180 validator=validate,
2181 2181 releasefn=releasefn,
2182 2182 checkambigfiles=_cachedfiles,
2183 2183 name=desc,
2184 2184 )
2185 2185 tr.changes[b'origrepolen'] = len(self)
2186 2186 tr.changes[b'obsmarkers'] = set()
2187 2187 tr.changes[b'phases'] = {}
2188 2188 tr.changes[b'bookmarks'] = {}
2189 2189
2190 2190 tr.hookargs[b'txnid'] = txnid
2191 2191 tr.hookargs[b'txnname'] = desc
2192 2192 # note: writing the fncache only during finalize mean that the file is
2193 2193 # outdated when running hooks. As fncache is used for streaming clone,
2194 2194 # this is not expected to break anything that happen during the hooks.
2195 2195 tr.addfinalize(b'flush-fncache', self.store.write)
2196 2196
2197 2197 def txnclosehook(tr2):
2198 2198 """To be run if transaction is successful, will schedule a hook run
2199 2199 """
2200 2200 # Don't reference tr2 in hook() so we don't hold a reference.
2201 2201 # This reduces memory consumption when there are multiple
2202 2202 # transactions per lock. This can likely go away if issue5045
2203 2203 # fixes the function accumulation.
2204 2204 hookargs = tr2.hookargs
2205 2205
2206 def hookfunc():
2206 def hookfunc(unused_success):
2207 2207 repo = reporef()
2208 2208 if hook.hashook(repo.ui, b'txnclose-bookmark'):
2209 2209 bmchanges = sorted(tr.changes[b'bookmarks'].items())
2210 2210 for name, (old, new) in bmchanges:
2211 2211 args = tr.hookargs.copy()
2212 2212 args.update(bookmarks.preparehookargs(name, old, new))
2213 2213 repo.hook(
2214 2214 b'txnclose-bookmark',
2215 2215 throw=False,
2216 2216 **pycompat.strkwargs(args)
2217 2217 )
2218 2218
2219 2219 if hook.hashook(repo.ui, b'txnclose-phase'):
2220 2220 cl = repo.unfiltered().changelog
2221 2221 phasemv = sorted(tr.changes[b'phases'].items())
2222 2222 for rev, (old, new) in phasemv:
2223 2223 args = tr.hookargs.copy()
2224 2224 node = hex(cl.node(rev))
2225 2225 args.update(phases.preparehookargs(node, old, new))
2226 2226 repo.hook(
2227 2227 b'txnclose-phase',
2228 2228 throw=False,
2229 2229 **pycompat.strkwargs(args)
2230 2230 )
2231 2231
2232 2232 repo.hook(
2233 2233 b'txnclose', throw=False, **pycompat.strkwargs(hookargs)
2234 2234 )
2235 2235
2236 2236 reporef()._afterlock(hookfunc)
2237 2237
2238 2238 tr.addfinalize(b'txnclose-hook', txnclosehook)
2239 2239 # Include a leading "-" to make it happen before the transaction summary
2240 2240 # reports registered via scmutil.registersummarycallback() whose names
2241 2241 # are 00-txnreport etc. That way, the caches will be warm when the
2242 2242 # callbacks run.
2243 2243 tr.addpostclose(b'-warm-cache', self._buildcacheupdater(tr))
2244 2244
2245 2245 def txnaborthook(tr2):
2246 2246 """To be run if transaction is aborted
2247 2247 """
2248 2248 reporef().hook(
2249 2249 b'txnabort', throw=False, **pycompat.strkwargs(tr2.hookargs)
2250 2250 )
2251 2251
2252 2252 tr.addabort(b'txnabort-hook', txnaborthook)
2253 2253 # avoid eager cache invalidation. in-memory data should be identical
2254 2254 # to stored data if transaction has no error.
2255 2255 tr.addpostclose(b'refresh-filecachestats', self._refreshfilecachestats)
2256 2256 self._transref = weakref.ref(tr)
2257 2257 scmutil.registersummarycallback(self, tr, desc)
2258 2258 return tr
2259 2259
2260 2260 def _journalfiles(self):
2261 2261 return (
2262 2262 (self.svfs, b'journal'),
2263 2263 (self.svfs, b'journal.narrowspec'),
2264 2264 (self.vfs, b'journal.narrowspec.dirstate'),
2265 2265 (self.vfs, b'journal.dirstate'),
2266 2266 (self.vfs, b'journal.branch'),
2267 2267 (self.vfs, b'journal.desc'),
2268 2268 (bookmarks.bookmarksvfs(self), b'journal.bookmarks'),
2269 2269 (self.svfs, b'journal.phaseroots'),
2270 2270 )
2271 2271
2272 2272 def undofiles(self):
2273 2273 return [(vfs, undoname(x)) for vfs, x in self._journalfiles()]
2274 2274
2275 2275 @unfilteredmethod
2276 2276 def _writejournal(self, desc):
2277 2277 self.dirstate.savebackup(None, b'journal.dirstate')
2278 2278 narrowspec.savewcbackup(self, b'journal.narrowspec.dirstate')
2279 2279 narrowspec.savebackup(self, b'journal.narrowspec')
2280 2280 self.vfs.write(
2281 2281 b"journal.branch", encoding.fromlocal(self.dirstate.branch())
2282 2282 )
2283 2283 self.vfs.write(b"journal.desc", b"%d\n%s\n" % (len(self), desc))
2284 2284 bookmarksvfs = bookmarks.bookmarksvfs(self)
2285 2285 bookmarksvfs.write(
2286 2286 b"journal.bookmarks", bookmarksvfs.tryread(b"bookmarks")
2287 2287 )
2288 2288 self.svfs.write(b"journal.phaseroots", self.svfs.tryread(b"phaseroots"))
2289 2289
2290 2290 def recover(self):
2291 2291 with self.lock():
2292 2292 if self.svfs.exists(b"journal"):
2293 2293 self.ui.status(_(b"rolling back interrupted transaction\n"))
2294 2294 vfsmap = {
2295 2295 b'': self.svfs,
2296 2296 b'plain': self.vfs,
2297 2297 }
2298 2298 transaction.rollback(
2299 2299 self.svfs,
2300 2300 vfsmap,
2301 2301 b"journal",
2302 2302 self.ui.warn,
2303 2303 checkambigfiles=_cachedfiles,
2304 2304 )
2305 2305 self.invalidate()
2306 2306 return True
2307 2307 else:
2308 2308 self.ui.warn(_(b"no interrupted transaction available\n"))
2309 2309 return False
2310 2310
2311 2311 def rollback(self, dryrun=False, force=False):
2312 2312 wlock = lock = dsguard = None
2313 2313 try:
2314 2314 wlock = self.wlock()
2315 2315 lock = self.lock()
2316 2316 if self.svfs.exists(b"undo"):
2317 2317 dsguard = dirstateguard.dirstateguard(self, b'rollback')
2318 2318
2319 2319 return self._rollback(dryrun, force, dsguard)
2320 2320 else:
2321 2321 self.ui.warn(_(b"no rollback information available\n"))
2322 2322 return 1
2323 2323 finally:
2324 2324 release(dsguard, lock, wlock)
2325 2325
2326 2326 @unfilteredmethod # Until we get smarter cache management
2327 2327 def _rollback(self, dryrun, force, dsguard):
2328 2328 ui = self.ui
2329 2329 try:
2330 2330 args = self.vfs.read(b'undo.desc').splitlines()
2331 2331 (oldlen, desc, detail) = (int(args[0]), args[1], None)
2332 2332 if len(args) >= 3:
2333 2333 detail = args[2]
2334 2334 oldtip = oldlen - 1
2335 2335
2336 2336 if detail and ui.verbose:
2337 2337 msg = _(
2338 2338 b'repository tip rolled back to revision %d'
2339 2339 b' (undo %s: %s)\n'
2340 2340 ) % (oldtip, desc, detail)
2341 2341 else:
2342 2342 msg = _(
2343 2343 b'repository tip rolled back to revision %d (undo %s)\n'
2344 2344 ) % (oldtip, desc)
2345 2345 except IOError:
2346 2346 msg = _(b'rolling back unknown transaction\n')
2347 2347 desc = None
2348 2348
2349 2349 if not force and self[b'.'] != self[b'tip'] and desc == b'commit':
2350 2350 raise error.Abort(
2351 2351 _(
2352 2352 b'rollback of last commit while not checked out '
2353 2353 b'may lose data'
2354 2354 ),
2355 2355 hint=_(b'use -f to force'),
2356 2356 )
2357 2357
2358 2358 ui.status(msg)
2359 2359 if dryrun:
2360 2360 return 0
2361 2361
2362 2362 parents = self.dirstate.parents()
2363 2363 self.destroying()
2364 2364 vfsmap = {b'plain': self.vfs, b'': self.svfs}
2365 2365 transaction.rollback(
2366 2366 self.svfs, vfsmap, b'undo', ui.warn, checkambigfiles=_cachedfiles
2367 2367 )
2368 2368 bookmarksvfs = bookmarks.bookmarksvfs(self)
2369 2369 if bookmarksvfs.exists(b'undo.bookmarks'):
2370 2370 bookmarksvfs.rename(
2371 2371 b'undo.bookmarks', b'bookmarks', checkambig=True
2372 2372 )
2373 2373 if self.svfs.exists(b'undo.phaseroots'):
2374 2374 self.svfs.rename(b'undo.phaseroots', b'phaseroots', checkambig=True)
2375 2375 self.invalidate()
2376 2376
2377 2377 has_node = self.changelog.index.has_node
2378 2378 parentgone = any(not has_node(p) for p in parents)
2379 2379 if parentgone:
2380 2380 # prevent dirstateguard from overwriting already restored one
2381 2381 dsguard.close()
2382 2382
2383 2383 narrowspec.restorebackup(self, b'undo.narrowspec')
2384 2384 narrowspec.restorewcbackup(self, b'undo.narrowspec.dirstate')
2385 2385 self.dirstate.restorebackup(None, b'undo.dirstate')
2386 2386 try:
2387 2387 branch = self.vfs.read(b'undo.branch')
2388 2388 self.dirstate.setbranch(encoding.tolocal(branch))
2389 2389 except IOError:
2390 2390 ui.warn(
2391 2391 _(
2392 2392 b'named branch could not be reset: '
2393 2393 b'current branch is still \'%s\'\n'
2394 2394 )
2395 2395 % self.dirstate.branch()
2396 2396 )
2397 2397
2398 2398 parents = tuple([p.rev() for p in self[None].parents()])
2399 2399 if len(parents) > 1:
2400 2400 ui.status(
2401 2401 _(
2402 2402 b'working directory now based on '
2403 2403 b'revisions %d and %d\n'
2404 2404 )
2405 2405 % parents
2406 2406 )
2407 2407 else:
2408 2408 ui.status(
2409 2409 _(b'working directory now based on revision %d\n') % parents
2410 2410 )
2411 2411 mergemod.mergestate.clean(self, self[b'.'].node())
2412 2412
2413 2413 # TODO: if we know which new heads may result from this rollback, pass
2414 2414 # them to destroy(), which will prevent the branchhead cache from being
2415 2415 # invalidated.
2416 2416 self.destroyed()
2417 2417 return 0
2418 2418
2419 2419 def _buildcacheupdater(self, newtransaction):
2420 2420 """called during transaction to build the callback updating cache
2421 2421
2422 2422 Lives on the repository to help extension who might want to augment
2423 2423 this logic. For this purpose, the created transaction is passed to the
2424 2424 method.
2425 2425 """
2426 2426 # we must avoid cyclic reference between repo and transaction.
2427 2427 reporef = weakref.ref(self)
2428 2428
2429 2429 def updater(tr):
2430 2430 repo = reporef()
2431 2431 repo.updatecaches(tr)
2432 2432
2433 2433 return updater
2434 2434
2435 2435 @unfilteredmethod
2436 2436 def updatecaches(self, tr=None, full=False):
2437 2437 """warm appropriate caches
2438 2438
2439 2439 If this function is called after a transaction closed. The transaction
2440 2440 will be available in the 'tr' argument. This can be used to selectively
2441 2441 update caches relevant to the changes in that transaction.
2442 2442
2443 2443 If 'full' is set, make sure all caches the function knows about have
2444 2444 up-to-date data. Even the ones usually loaded more lazily.
2445 2445 """
2446 2446 if tr is not None and tr.hookargs.get(b'source') == b'strip':
2447 2447 # During strip, many caches are invalid but
2448 2448 # later call to `destroyed` will refresh them.
2449 2449 return
2450 2450
2451 2451 if tr is None or tr.changes[b'origrepolen'] < len(self):
2452 2452 # accessing the 'ser ved' branchmap should refresh all the others,
2453 2453 self.ui.debug(b'updating the branch cache\n')
2454 2454 self.filtered(b'served').branchmap()
2455 2455 self.filtered(b'served.hidden').branchmap()
2456 2456
2457 2457 if full:
2458 2458 unfi = self.unfiltered()
2459 2459 rbc = unfi.revbranchcache()
2460 2460 for r in unfi.changelog:
2461 2461 rbc.branchinfo(r)
2462 2462 rbc.write()
2463 2463
2464 2464 # ensure the working copy parents are in the manifestfulltextcache
2465 2465 for ctx in self[b'.'].parents():
2466 2466 ctx.manifest() # accessing the manifest is enough
2467 2467
2468 2468 # accessing fnode cache warms the cache
2469 2469 tagsmod.fnoderevs(self.ui, unfi, unfi.changelog.revs())
2470 2470 # accessing tags warm the cache
2471 2471 self.tags()
2472 2472 self.filtered(b'served').tags()
2473 2473
2474 2474 # The `full` arg is documented as updating even the lazily-loaded
2475 2475 # caches immediately, so we're forcing a write to cause these caches
2476 2476 # to be warmed up even if they haven't explicitly been requested
2477 2477 # yet (if they've never been used by hg, they won't ever have been
2478 2478 # written, even if they're a subset of another kind of cache that
2479 2479 # *has* been used).
2480 2480 for filt in repoview.filtertable.keys():
2481 2481 filtered = self.filtered(filt)
2482 2482 filtered.branchmap().write(filtered)
2483 2483
2484 2484 def invalidatecaches(self):
2485 2485
2486 2486 if '_tagscache' in vars(self):
2487 2487 # can't use delattr on proxy
2488 2488 del self.__dict__['_tagscache']
2489 2489
2490 2490 self._branchcaches.clear()
2491 2491 self.invalidatevolatilesets()
2492 2492 self._sparsesignaturecache.clear()
2493 2493
2494 2494 def invalidatevolatilesets(self):
2495 2495 self.filteredrevcache.clear()
2496 2496 obsolete.clearobscaches(self)
2497 2497
2498 2498 def invalidatedirstate(self):
2499 2499 '''Invalidates the dirstate, causing the next call to dirstate
2500 2500 to check if it was modified since the last time it was read,
2501 2501 rereading it if it has.
2502 2502
2503 2503 This is different to dirstate.invalidate() that it doesn't always
2504 2504 rereads the dirstate. Use dirstate.invalidate() if you want to
2505 2505 explicitly read the dirstate again (i.e. restoring it to a previous
2506 2506 known good state).'''
2507 2507 if hasunfilteredcache(self, 'dirstate'):
2508 2508 for k in self.dirstate._filecache:
2509 2509 try:
2510 2510 delattr(self.dirstate, k)
2511 2511 except AttributeError:
2512 2512 pass
2513 2513 delattr(self.unfiltered(), 'dirstate')
2514 2514
2515 2515 def invalidate(self, clearfilecache=False):
2516 2516 '''Invalidates both store and non-store parts other than dirstate
2517 2517
2518 2518 If a transaction is running, invalidation of store is omitted,
2519 2519 because discarding in-memory changes might cause inconsistency
2520 2520 (e.g. incomplete fncache causes unintentional failure, but
2521 2521 redundant one doesn't).
2522 2522 '''
2523 2523 unfiltered = self.unfiltered() # all file caches are stored unfiltered
2524 2524 for k in list(self._filecache.keys()):
2525 2525 # dirstate is invalidated separately in invalidatedirstate()
2526 2526 if k == b'dirstate':
2527 2527 continue
2528 2528 if (
2529 2529 k == b'changelog'
2530 2530 and self.currenttransaction()
2531 2531 and self.changelog._delayed
2532 2532 ):
2533 2533 # The changelog object may store unwritten revisions. We don't
2534 2534 # want to lose them.
2535 2535 # TODO: Solve the problem instead of working around it.
2536 2536 continue
2537 2537
2538 2538 if clearfilecache:
2539 2539 del self._filecache[k]
2540 2540 try:
2541 2541 delattr(unfiltered, k)
2542 2542 except AttributeError:
2543 2543 pass
2544 2544 self.invalidatecaches()
2545 2545 if not self.currenttransaction():
2546 2546 # TODO: Changing contents of store outside transaction
2547 2547 # causes inconsistency. We should make in-memory store
2548 2548 # changes detectable, and abort if changed.
2549 2549 self.store.invalidatecaches()
2550 2550
2551 2551 def invalidateall(self):
2552 2552 '''Fully invalidates both store and non-store parts, causing the
2553 2553 subsequent operation to reread any outside changes.'''
2554 2554 # extension should hook this to invalidate its caches
2555 2555 self.invalidate()
2556 2556 self.invalidatedirstate()
2557 2557
2558 2558 @unfilteredmethod
2559 2559 def _refreshfilecachestats(self, tr):
2560 2560 """Reload stats of cached files so that they are flagged as valid"""
2561 2561 for k, ce in self._filecache.items():
2562 2562 k = pycompat.sysstr(k)
2563 2563 if k == 'dirstate' or k not in self.__dict__:
2564 2564 continue
2565 2565 ce.refresh()
2566 2566
2567 2567 def _lock(
2568 2568 self,
2569 2569 vfs,
2570 2570 lockname,
2571 2571 wait,
2572 2572 releasefn,
2573 2573 acquirefn,
2574 2574 desc,
2575 2575 inheritchecker=None,
2576 2576 parentenvvar=None,
2577 2577 ):
2578 2578 parentlock = None
2579 2579 # the contents of parentenvvar are used by the underlying lock to
2580 2580 # determine whether it can be inherited
2581 2581 if parentenvvar is not None:
2582 2582 parentlock = encoding.environ.get(parentenvvar)
2583 2583
2584 2584 timeout = 0
2585 2585 warntimeout = 0
2586 2586 if wait:
2587 2587 timeout = self.ui.configint(b"ui", b"timeout")
2588 2588 warntimeout = self.ui.configint(b"ui", b"timeout.warn")
2589 2589 # internal config: ui.signal-safe-lock
2590 2590 signalsafe = self.ui.configbool(b'ui', b'signal-safe-lock')
2591 2591
2592 2592 l = lockmod.trylock(
2593 2593 self.ui,
2594 2594 vfs,
2595 2595 lockname,
2596 2596 timeout,
2597 2597 warntimeout,
2598 2598 releasefn=releasefn,
2599 2599 acquirefn=acquirefn,
2600 2600 desc=desc,
2601 2601 inheritchecker=inheritchecker,
2602 2602 parentlock=parentlock,
2603 2603 signalsafe=signalsafe,
2604 2604 )
2605 2605 return l
2606 2606
2607 2607 def _afterlock(self, callback):
2608 2608 """add a callback to be run when the repository is fully unlocked
2609 2609
2610 2610 The callback will be executed when the outermost lock is released
2611 2611 (with wlock being higher level than 'lock')."""
2612 2612 for ref in (self._wlockref, self._lockref):
2613 2613 l = ref and ref()
2614 2614 if l and l.held:
2615 2615 l.postrelease.append(callback)
2616 2616 break
2617 2617 else: # no lock have been found.
2618 callback()
2618 callback(True)
2619 2619
2620 2620 def lock(self, wait=True):
2621 2621 '''Lock the repository store (.hg/store) and return a weak reference
2622 2622 to the lock. Use this before modifying the store (e.g. committing or
2623 2623 stripping). If you are opening a transaction, get a lock as well.)
2624 2624
2625 2625 If both 'lock' and 'wlock' must be acquired, ensure you always acquires
2626 2626 'wlock' first to avoid a dead-lock hazard.'''
2627 2627 l = self._currentlock(self._lockref)
2628 2628 if l is not None:
2629 2629 l.lock()
2630 2630 return l
2631 2631
2632 2632 l = self._lock(
2633 2633 vfs=self.svfs,
2634 2634 lockname=b"lock",
2635 2635 wait=wait,
2636 2636 releasefn=None,
2637 2637 acquirefn=self.invalidate,
2638 2638 desc=_(b'repository %s') % self.origroot,
2639 2639 )
2640 2640 self._lockref = weakref.ref(l)
2641 2641 return l
2642 2642
2643 2643 def _wlockchecktransaction(self):
2644 2644 if self.currenttransaction() is not None:
2645 2645 raise error.LockInheritanceContractViolation(
2646 2646 b'wlock cannot be inherited in the middle of a transaction'
2647 2647 )
2648 2648
2649 2649 def wlock(self, wait=True):
2650 2650 '''Lock the non-store parts of the repository (everything under
2651 2651 .hg except .hg/store) and return a weak reference to the lock.
2652 2652
2653 2653 Use this before modifying files in .hg.
2654 2654
2655 2655 If both 'lock' and 'wlock' must be acquired, ensure you always acquires
2656 2656 'wlock' first to avoid a dead-lock hazard.'''
2657 2657 l = self._wlockref and self._wlockref()
2658 2658 if l is not None and l.held:
2659 2659 l.lock()
2660 2660 return l
2661 2661
2662 2662 # We do not need to check for non-waiting lock acquisition. Such
2663 2663 # acquisition would not cause dead-lock as they would just fail.
2664 2664 if wait and (
2665 2665 self.ui.configbool(b'devel', b'all-warnings')
2666 2666 or self.ui.configbool(b'devel', b'check-locks')
2667 2667 ):
2668 2668 if self._currentlock(self._lockref) is not None:
2669 2669 self.ui.develwarn(b'"wlock" acquired after "lock"')
2670 2670
2671 2671 def unlock():
2672 2672 if self.dirstate.pendingparentchange():
2673 2673 self.dirstate.invalidate()
2674 2674 else:
2675 2675 self.dirstate.write(None)
2676 2676
2677 2677 self._filecache[b'dirstate'].refresh()
2678 2678
2679 2679 l = self._lock(
2680 2680 self.vfs,
2681 2681 b"wlock",
2682 2682 wait,
2683 2683 unlock,
2684 2684 self.invalidatedirstate,
2685 2685 _(b'working directory of %s') % self.origroot,
2686 2686 inheritchecker=self._wlockchecktransaction,
2687 2687 parentenvvar=b'HG_WLOCK_LOCKER',
2688 2688 )
2689 2689 self._wlockref = weakref.ref(l)
2690 2690 return l
2691 2691
2692 2692 def _currentlock(self, lockref):
2693 2693 """Returns the lock if it's held, or None if it's not."""
2694 2694 if lockref is None:
2695 2695 return None
2696 2696 l = lockref()
2697 2697 if l is None or not l.held:
2698 2698 return None
2699 2699 return l
2700 2700
2701 2701 def currentwlock(self):
2702 2702 """Returns the wlock if it's held, or None if it's not."""
2703 2703 return self._currentlock(self._wlockref)
2704 2704
2705 2705 def _filecommit(
2706 2706 self,
2707 2707 fctx,
2708 2708 manifest1,
2709 2709 manifest2,
2710 2710 linkrev,
2711 2711 tr,
2712 2712 changelist,
2713 2713 includecopymeta,
2714 2714 ):
2715 2715 """
2716 2716 commit an individual file as part of a larger transaction
2717 2717 """
2718 2718
2719 2719 fname = fctx.path()
2720 2720 fparent1 = manifest1.get(fname, nullid)
2721 2721 fparent2 = manifest2.get(fname, nullid)
2722 2722 if isinstance(fctx, context.filectx):
2723 2723 node = fctx.filenode()
2724 2724 if node in [fparent1, fparent2]:
2725 2725 self.ui.debug(b'reusing %s filelog entry\n' % fname)
2726 2726 if (
2727 2727 fparent1 != nullid
2728 2728 and manifest1.flags(fname) != fctx.flags()
2729 2729 ) or (
2730 2730 fparent2 != nullid
2731 2731 and manifest2.flags(fname) != fctx.flags()
2732 2732 ):
2733 2733 changelist.append(fname)
2734 2734 return node
2735 2735
2736 2736 flog = self.file(fname)
2737 2737 meta = {}
2738 2738 cfname = fctx.copysource()
2739 2739 if cfname and cfname != fname:
2740 2740 # Mark the new revision of this file as a copy of another
2741 2741 # file. This copy data will effectively act as a parent
2742 2742 # of this new revision. If this is a merge, the first
2743 2743 # parent will be the nullid (meaning "look up the copy data")
2744 2744 # and the second one will be the other parent. For example:
2745 2745 #
2746 2746 # 0 --- 1 --- 3 rev1 changes file foo
2747 2747 # \ / rev2 renames foo to bar and changes it
2748 2748 # \- 2 -/ rev3 should have bar with all changes and
2749 2749 # should record that bar descends from
2750 2750 # bar in rev2 and foo in rev1
2751 2751 #
2752 2752 # this allows this merge to succeed:
2753 2753 #
2754 2754 # 0 --- 1 --- 3 rev4 reverts the content change from rev2
2755 2755 # \ / merging rev3 and rev4 should use bar@rev2
2756 2756 # \- 2 --- 4 as the merge base
2757 2757 #
2758 2758
2759 2759 cnode = manifest1.get(cfname)
2760 2760 newfparent = fparent2
2761 2761
2762 2762 if manifest2: # branch merge
2763 2763 if fparent2 == nullid or cnode is None: # copied on remote side
2764 2764 if cfname in manifest2:
2765 2765 cnode = manifest2[cfname]
2766 2766 newfparent = fparent1
2767 2767
2768 2768 # Here, we used to search backwards through history to try to find
2769 2769 # where the file copy came from if the source of a copy was not in
2770 2770 # the parent directory. However, this doesn't actually make sense to
2771 2771 # do (what does a copy from something not in your working copy even
2772 2772 # mean?) and it causes bugs (eg, issue4476). Instead, we will warn
2773 2773 # the user that copy information was dropped, so if they didn't
2774 2774 # expect this outcome it can be fixed, but this is the correct
2775 2775 # behavior in this circumstance.
2776 2776
2777 2777 if cnode:
2778 2778 self.ui.debug(
2779 2779 b" %s: copy %s:%s\n" % (fname, cfname, hex(cnode))
2780 2780 )
2781 2781 if includecopymeta:
2782 2782 meta[b"copy"] = cfname
2783 2783 meta[b"copyrev"] = hex(cnode)
2784 2784 fparent1, fparent2 = nullid, newfparent
2785 2785 else:
2786 2786 self.ui.warn(
2787 2787 _(
2788 2788 b"warning: can't find ancestor for '%s' "
2789 2789 b"copied from '%s'!\n"
2790 2790 )
2791 2791 % (fname, cfname)
2792 2792 )
2793 2793
2794 2794 elif fparent1 == nullid:
2795 2795 fparent1, fparent2 = fparent2, nullid
2796 2796 elif fparent2 != nullid:
2797 2797 # is one parent an ancestor of the other?
2798 2798 fparentancestors = flog.commonancestorsheads(fparent1, fparent2)
2799 2799 if fparent1 in fparentancestors:
2800 2800 fparent1, fparent2 = fparent2, nullid
2801 2801 elif fparent2 in fparentancestors:
2802 2802 fparent2 = nullid
2803 2803
2804 2804 # is the file changed?
2805 2805 text = fctx.data()
2806 2806 if fparent2 != nullid or flog.cmp(fparent1, text) or meta:
2807 2807 changelist.append(fname)
2808 2808 return flog.add(text, meta, tr, linkrev, fparent1, fparent2)
2809 2809 # are just the flags changed during merge?
2810 2810 elif fname in manifest1 and manifest1.flags(fname) != fctx.flags():
2811 2811 changelist.append(fname)
2812 2812
2813 2813 return fparent1
2814 2814
2815 2815 def checkcommitpatterns(self, wctx, match, status, fail):
2816 2816 """check for commit arguments that aren't committable"""
2817 2817 if match.isexact() or match.prefix():
2818 2818 matched = set(status.modified + status.added + status.removed)
2819 2819
2820 2820 for f in match.files():
2821 2821 f = self.dirstate.normalize(f)
2822 2822 if f == b'.' or f in matched or f in wctx.substate:
2823 2823 continue
2824 2824 if f in status.deleted:
2825 2825 fail(f, _(b'file not found!'))
2826 2826 # Is it a directory that exists or used to exist?
2827 2827 if self.wvfs.isdir(f) or wctx.p1().hasdir(f):
2828 2828 d = f + b'/'
2829 2829 for mf in matched:
2830 2830 if mf.startswith(d):
2831 2831 break
2832 2832 else:
2833 2833 fail(f, _(b"no match under directory!"))
2834 2834 elif f not in self.dirstate:
2835 2835 fail(f, _(b"file not tracked!"))
2836 2836
2837 2837 @unfilteredmethod
2838 2838 def commit(
2839 2839 self,
2840 2840 text=b"",
2841 2841 user=None,
2842 2842 date=None,
2843 2843 match=None,
2844 2844 force=False,
2845 2845 editor=False,
2846 2846 extra=None,
2847 2847 ):
2848 2848 """Add a new revision to current repository.
2849 2849
2850 2850 Revision information is gathered from the working directory,
2851 2851 match can be used to filter the committed files. If editor is
2852 2852 supplied, it is called to get a commit message.
2853 2853 """
2854 2854 if extra is None:
2855 2855 extra = {}
2856 2856
2857 2857 def fail(f, msg):
2858 2858 raise error.Abort(b'%s: %s' % (f, msg))
2859 2859
2860 2860 if not match:
2861 2861 match = matchmod.always()
2862 2862
2863 2863 if not force:
2864 2864 match.bad = fail
2865 2865
2866 2866 # lock() for recent changelog (see issue4368)
2867 2867 with self.wlock(), self.lock():
2868 2868 wctx = self[None]
2869 2869 merge = len(wctx.parents()) > 1
2870 2870
2871 2871 if not force and merge and not match.always():
2872 2872 raise error.Abort(
2873 2873 _(
2874 2874 b'cannot partially commit a merge '
2875 2875 b'(do not specify files or patterns)'
2876 2876 )
2877 2877 )
2878 2878
2879 2879 status = self.status(match=match, clean=force)
2880 2880 if force:
2881 2881 status.modified.extend(
2882 2882 status.clean
2883 2883 ) # mq may commit clean files
2884 2884
2885 2885 # check subrepos
2886 2886 subs, commitsubs, newstate = subrepoutil.precommit(
2887 2887 self.ui, wctx, status, match, force=force
2888 2888 )
2889 2889
2890 2890 # make sure all explicit patterns are matched
2891 2891 if not force:
2892 2892 self.checkcommitpatterns(wctx, match, status, fail)
2893 2893
2894 2894 cctx = context.workingcommitctx(
2895 2895 self, status, text, user, date, extra
2896 2896 )
2897 2897
2898 2898 # internal config: ui.allowemptycommit
2899 2899 allowemptycommit = (
2900 2900 wctx.branch() != wctx.p1().branch()
2901 2901 or extra.get(b'close')
2902 2902 or merge
2903 2903 or cctx.files()
2904 2904 or self.ui.configbool(b'ui', b'allowemptycommit')
2905 2905 )
2906 2906 if not allowemptycommit:
2907 2907 return None
2908 2908
2909 2909 if merge and cctx.deleted():
2910 2910 raise error.Abort(_(b"cannot commit merge with missing files"))
2911 2911
2912 2912 ms = mergemod.mergestate.read(self)
2913 2913 mergeutil.checkunresolved(ms)
2914 2914
2915 2915 if editor:
2916 2916 cctx._text = editor(self, cctx, subs)
2917 2917 edited = text != cctx._text
2918 2918
2919 2919 # Save commit message in case this transaction gets rolled back
2920 2920 # (e.g. by a pretxncommit hook). Leave the content alone on
2921 2921 # the assumption that the user will use the same editor again.
2922 2922 msgfn = self.savecommitmessage(cctx._text)
2923 2923
2924 2924 # commit subs and write new state
2925 2925 if subs:
2926 2926 uipathfn = scmutil.getuipathfn(self)
2927 2927 for s in sorted(commitsubs):
2928 2928 sub = wctx.sub(s)
2929 2929 self.ui.status(
2930 2930 _(b'committing subrepository %s\n')
2931 2931 % uipathfn(subrepoutil.subrelpath(sub))
2932 2932 )
2933 2933 sr = sub.commit(cctx._text, user, date)
2934 2934 newstate[s] = (newstate[s][0], sr)
2935 2935 subrepoutil.writestate(self, newstate)
2936 2936
2937 2937 p1, p2 = self.dirstate.parents()
2938 2938 hookp1, hookp2 = hex(p1), (p2 != nullid and hex(p2) or b'')
2939 2939 try:
2940 2940 self.hook(
2941 2941 b"precommit", throw=True, parent1=hookp1, parent2=hookp2
2942 2942 )
2943 2943 with self.transaction(b'commit'):
2944 2944 ret = self.commitctx(cctx, True)
2945 2945 # update bookmarks, dirstate and mergestate
2946 2946 bookmarks.update(self, [p1, p2], ret)
2947 2947 cctx.markcommitted(ret)
2948 2948 ms.reset()
2949 2949 except: # re-raises
2950 2950 if edited:
2951 2951 self.ui.write(
2952 2952 _(b'note: commit message saved in %s\n') % msgfn
2953 2953 )
2954 2954 raise
2955 2955
2956 def commithook():
2956 def commithook(unused_success):
2957 2957 # hack for command that use a temporary commit (eg: histedit)
2958 2958 # temporary commit got stripped before hook release
2959 2959 if self.changelog.hasnode(ret):
2960 2960 self.hook(
2961 2961 b"commit", node=hex(ret), parent1=hookp1, parent2=hookp2
2962 2962 )
2963 2963
2964 2964 self._afterlock(commithook)
2965 2965 return ret
2966 2966
2967 2967 @unfilteredmethod
2968 2968 def commitctx(self, ctx, error=False, origctx=None):
2969 2969 """Add a new revision to current repository.
2970 2970 Revision information is passed via the context argument.
2971 2971
2972 2972 ctx.files() should list all files involved in this commit, i.e.
2973 2973 modified/added/removed files. On merge, it may be wider than the
2974 2974 ctx.files() to be committed, since any file nodes derived directly
2975 2975 from p1 or p2 are excluded from the committed ctx.files().
2976 2976
2977 2977 origctx is for convert to work around the problem that bug
2978 2978 fixes to the files list in changesets change hashes. For
2979 2979 convert to be the identity, it can pass an origctx and this
2980 2980 function will use the same files list when it makes sense to
2981 2981 do so.
2982 2982 """
2983 2983
2984 2984 p1, p2 = ctx.p1(), ctx.p2()
2985 2985 user = ctx.user()
2986 2986
2987 2987 if self.filecopiesmode == b'changeset-sidedata':
2988 2988 writechangesetcopy = True
2989 2989 writefilecopymeta = True
2990 2990 writecopiesto = None
2991 2991 else:
2992 2992 writecopiesto = self.ui.config(b'experimental', b'copies.write-to')
2993 2993 writefilecopymeta = writecopiesto != b'changeset-only'
2994 2994 writechangesetcopy = writecopiesto in (
2995 2995 b'changeset-only',
2996 2996 b'compatibility',
2997 2997 )
2998 2998 p1copies, p2copies = None, None
2999 2999 if writechangesetcopy:
3000 3000 p1copies = ctx.p1copies()
3001 3001 p2copies = ctx.p2copies()
3002 3002 filesadded, filesremoved = None, None
3003 3003 with self.lock(), self.transaction(b"commit") as tr:
3004 3004 trp = weakref.proxy(tr)
3005 3005
3006 3006 if ctx.manifestnode():
3007 3007 # reuse an existing manifest revision
3008 3008 self.ui.debug(b'reusing known manifest\n')
3009 3009 mn = ctx.manifestnode()
3010 3010 files = ctx.files()
3011 3011 if writechangesetcopy:
3012 3012 filesadded = ctx.filesadded()
3013 3013 filesremoved = ctx.filesremoved()
3014 3014 elif ctx.files():
3015 3015 m1ctx = p1.manifestctx()
3016 3016 m2ctx = p2.manifestctx()
3017 3017 mctx = m1ctx.copy()
3018 3018
3019 3019 m = mctx.read()
3020 3020 m1 = m1ctx.read()
3021 3021 m2 = m2ctx.read()
3022 3022
3023 3023 # check in files
3024 3024 added = []
3025 3025 changed = []
3026 3026 removed = list(ctx.removed())
3027 3027 linkrev = len(self)
3028 3028 self.ui.note(_(b"committing files:\n"))
3029 3029 uipathfn = scmutil.getuipathfn(self)
3030 3030 for f in sorted(ctx.modified() + ctx.added()):
3031 3031 self.ui.note(uipathfn(f) + b"\n")
3032 3032 try:
3033 3033 fctx = ctx[f]
3034 3034 if fctx is None:
3035 3035 removed.append(f)
3036 3036 else:
3037 3037 added.append(f)
3038 3038 m[f] = self._filecommit(
3039 3039 fctx,
3040 3040 m1,
3041 3041 m2,
3042 3042 linkrev,
3043 3043 trp,
3044 3044 changed,
3045 3045 writefilecopymeta,
3046 3046 )
3047 3047 m.setflag(f, fctx.flags())
3048 3048 except OSError:
3049 3049 self.ui.warn(
3050 3050 _(b"trouble committing %s!\n") % uipathfn(f)
3051 3051 )
3052 3052 raise
3053 3053 except IOError as inst:
3054 3054 errcode = getattr(inst, 'errno', errno.ENOENT)
3055 3055 if error or errcode and errcode != errno.ENOENT:
3056 3056 self.ui.warn(
3057 3057 _(b"trouble committing %s!\n") % uipathfn(f)
3058 3058 )
3059 3059 raise
3060 3060
3061 3061 # update manifest
3062 3062 removed = [f for f in removed if f in m1 or f in m2]
3063 3063 drop = sorted([f for f in removed if f in m])
3064 3064 for f in drop:
3065 3065 del m[f]
3066 3066 if p2.rev() != nullrev:
3067 3067
3068 3068 @util.cachefunc
3069 3069 def mas():
3070 3070 p1n = p1.node()
3071 3071 p2n = p2.node()
3072 3072 cahs = self.changelog.commonancestorsheads(p1n, p2n)
3073 3073 if not cahs:
3074 3074 cahs = [nullrev]
3075 3075 return [self[r].manifest() for r in cahs]
3076 3076
3077 3077 def deletionfromparent(f):
3078 3078 # When a file is removed relative to p1 in a merge, this
3079 3079 # function determines whether the absence is due to a
3080 3080 # deletion from a parent, or whether the merge commit
3081 3081 # itself deletes the file. We decide this by doing a
3082 3082 # simplified three way merge of the manifest entry for
3083 3083 # the file. There are two ways we decide the merge
3084 3084 # itself didn't delete a file:
3085 3085 # - neither parent (nor the merge) contain the file
3086 3086 # - exactly one parent contains the file, and that
3087 3087 # parent has the same filelog entry as the merge
3088 3088 # ancestor (or all of them if there two). In other
3089 3089 # words, that parent left the file unchanged while the
3090 3090 # other one deleted it.
3091 3091 # One way to think about this is that deleting a file is
3092 3092 # similar to emptying it, so the list of changed files
3093 3093 # should be similar either way. The computation
3094 3094 # described above is not done directly in _filecommit
3095 3095 # when creating the list of changed files, however
3096 3096 # it does something very similar by comparing filelog
3097 3097 # nodes.
3098 3098 if f in m1:
3099 3099 return f not in m2 and all(
3100 3100 f in ma and ma.find(f) == m1.find(f)
3101 3101 for ma in mas()
3102 3102 )
3103 3103 elif f in m2:
3104 3104 return all(
3105 3105 f in ma and ma.find(f) == m2.find(f)
3106 3106 for ma in mas()
3107 3107 )
3108 3108 else:
3109 3109 return True
3110 3110
3111 3111 removed = [f for f in removed if not deletionfromparent(f)]
3112 3112
3113 3113 files = changed + removed
3114 3114 md = None
3115 3115 if not files:
3116 3116 # if no "files" actually changed in terms of the changelog,
3117 3117 # try hard to detect unmodified manifest entry so that the
3118 3118 # exact same commit can be reproduced later on convert.
3119 3119 md = m1.diff(m, scmutil.matchfiles(self, ctx.files()))
3120 3120 if not files and md:
3121 3121 self.ui.debug(
3122 3122 b'not reusing manifest (no file change in '
3123 3123 b'changelog, but manifest differs)\n'
3124 3124 )
3125 3125 if files or md:
3126 3126 self.ui.note(_(b"committing manifest\n"))
3127 3127 # we're using narrowmatch here since it's already applied at
3128 3128 # other stages (such as dirstate.walk), so we're already
3129 3129 # ignoring things outside of narrowspec in most cases. The
3130 3130 # one case where we might have files outside the narrowspec
3131 3131 # at this point is merges, and we already error out in the
3132 3132 # case where the merge has files outside of the narrowspec,
3133 3133 # so this is safe.
3134 3134 mn = mctx.write(
3135 3135 trp,
3136 3136 linkrev,
3137 3137 p1.manifestnode(),
3138 3138 p2.manifestnode(),
3139 3139 added,
3140 3140 drop,
3141 3141 match=self.narrowmatch(),
3142 3142 )
3143 3143
3144 3144 if writechangesetcopy:
3145 3145 filesadded = [
3146 3146 f for f in changed if not (f in m1 or f in m2)
3147 3147 ]
3148 3148 filesremoved = removed
3149 3149 else:
3150 3150 self.ui.debug(
3151 3151 b'reusing manifest from p1 (listed files '
3152 3152 b'actually unchanged)\n'
3153 3153 )
3154 3154 mn = p1.manifestnode()
3155 3155 else:
3156 3156 self.ui.debug(b'reusing manifest from p1 (no file change)\n')
3157 3157 mn = p1.manifestnode()
3158 3158 files = []
3159 3159
3160 3160 if writecopiesto == b'changeset-only':
3161 3161 # If writing only to changeset extras, use None to indicate that
3162 3162 # no entry should be written. If writing to both, write an empty
3163 3163 # entry to prevent the reader from falling back to reading
3164 3164 # filelogs.
3165 3165 p1copies = p1copies or None
3166 3166 p2copies = p2copies or None
3167 3167 filesadded = filesadded or None
3168 3168 filesremoved = filesremoved or None
3169 3169
3170 3170 if origctx and origctx.manifestnode() == mn:
3171 3171 files = origctx.files()
3172 3172
3173 3173 # update changelog
3174 3174 self.ui.note(_(b"committing changelog\n"))
3175 3175 self.changelog.delayupdate(tr)
3176 3176 n = self.changelog.add(
3177 3177 mn,
3178 3178 files,
3179 3179 ctx.description(),
3180 3180 trp,
3181 3181 p1.node(),
3182 3182 p2.node(),
3183 3183 user,
3184 3184 ctx.date(),
3185 3185 ctx.extra().copy(),
3186 3186 p1copies,
3187 3187 p2copies,
3188 3188 filesadded,
3189 3189 filesremoved,
3190 3190 )
3191 3191 xp1, xp2 = p1.hex(), p2 and p2.hex() or b''
3192 3192 self.hook(
3193 3193 b'pretxncommit',
3194 3194 throw=True,
3195 3195 node=hex(n),
3196 3196 parent1=xp1,
3197 3197 parent2=xp2,
3198 3198 )
3199 3199 # set the new commit is proper phase
3200 3200 targetphase = subrepoutil.newcommitphase(self.ui, ctx)
3201 3201 if targetphase:
3202 3202 # retract boundary do not alter parent changeset.
3203 3203 # if a parent have higher the resulting phase will
3204 3204 # be compliant anyway
3205 3205 #
3206 3206 # if minimal phase was 0 we don't need to retract anything
3207 3207 phases.registernew(self, tr, targetphase, [n])
3208 3208 return n
3209 3209
3210 3210 @unfilteredmethod
3211 3211 def destroying(self):
3212 3212 '''Inform the repository that nodes are about to be destroyed.
3213 3213 Intended for use by strip and rollback, so there's a common
3214 3214 place for anything that has to be done before destroying history.
3215 3215
3216 3216 This is mostly useful for saving state that is in memory and waiting
3217 3217 to be flushed when the current lock is released. Because a call to
3218 3218 destroyed is imminent, the repo will be invalidated causing those
3219 3219 changes to stay in memory (waiting for the next unlock), or vanish
3220 3220 completely.
3221 3221 '''
3222 3222 # When using the same lock to commit and strip, the phasecache is left
3223 3223 # dirty after committing. Then when we strip, the repo is invalidated,
3224 3224 # causing those changes to disappear.
3225 3225 if '_phasecache' in vars(self):
3226 3226 self._phasecache.write()
3227 3227
3228 3228 @unfilteredmethod
3229 3229 def destroyed(self):
3230 3230 '''Inform the repository that nodes have been destroyed.
3231 3231 Intended for use by strip and rollback, so there's a common
3232 3232 place for anything that has to be done after destroying history.
3233 3233 '''
3234 3234 # When one tries to:
3235 3235 # 1) destroy nodes thus calling this method (e.g. strip)
3236 3236 # 2) use phasecache somewhere (e.g. commit)
3237 3237 #
3238 3238 # then 2) will fail because the phasecache contains nodes that were
3239 3239 # removed. We can either remove phasecache from the filecache,
3240 3240 # causing it to reload next time it is accessed, or simply filter
3241 3241 # the removed nodes now and write the updated cache.
3242 3242 self._phasecache.filterunknown(self)
3243 3243 self._phasecache.write()
3244 3244
3245 3245 # refresh all repository caches
3246 3246 self.updatecaches()
3247 3247
3248 3248 # Ensure the persistent tag cache is updated. Doing it now
3249 3249 # means that the tag cache only has to worry about destroyed
3250 3250 # heads immediately after a strip/rollback. That in turn
3251 3251 # guarantees that "cachetip == currenttip" (comparing both rev
3252 3252 # and node) always means no nodes have been added or destroyed.
3253 3253
3254 3254 # XXX this is suboptimal when qrefresh'ing: we strip the current
3255 3255 # head, refresh the tag cache, then immediately add a new head.
3256 3256 # But I think doing it this way is necessary for the "instant
3257 3257 # tag cache retrieval" case to work.
3258 3258 self.invalidate()
3259 3259
3260 3260 def status(
3261 3261 self,
3262 3262 node1=b'.',
3263 3263 node2=None,
3264 3264 match=None,
3265 3265 ignored=False,
3266 3266 clean=False,
3267 3267 unknown=False,
3268 3268 listsubrepos=False,
3269 3269 ):
3270 3270 '''a convenience method that calls node1.status(node2)'''
3271 3271 return self[node1].status(
3272 3272 node2, match, ignored, clean, unknown, listsubrepos
3273 3273 )
3274 3274
3275 3275 def addpostdsstatus(self, ps):
3276 3276 """Add a callback to run within the wlock, at the point at which status
3277 3277 fixups happen.
3278 3278
3279 3279 On status completion, callback(wctx, status) will be called with the
3280 3280 wlock held, unless the dirstate has changed from underneath or the wlock
3281 3281 couldn't be grabbed.
3282 3282
3283 3283 Callbacks should not capture and use a cached copy of the dirstate --
3284 3284 it might change in the meanwhile. Instead, they should access the
3285 3285 dirstate via wctx.repo().dirstate.
3286 3286
3287 3287 This list is emptied out after each status run -- extensions should
3288 3288 make sure it adds to this list each time dirstate.status is called.
3289 3289 Extensions should also make sure they don't call this for statuses
3290 3290 that don't involve the dirstate.
3291 3291 """
3292 3292
3293 3293 # The list is located here for uniqueness reasons -- it is actually
3294 3294 # managed by the workingctx, but that isn't unique per-repo.
3295 3295 self._postdsstatus.append(ps)
3296 3296
3297 3297 def postdsstatus(self):
3298 3298 """Used by workingctx to get the list of post-dirstate-status hooks."""
3299 3299 return self._postdsstatus
3300 3300
3301 3301 def clearpostdsstatus(self):
3302 3302 """Used by workingctx to clear post-dirstate-status hooks."""
3303 3303 del self._postdsstatus[:]
3304 3304
3305 3305 def heads(self, start=None):
3306 3306 if start is None:
3307 3307 cl = self.changelog
3308 3308 headrevs = reversed(cl.headrevs())
3309 3309 return [cl.node(rev) for rev in headrevs]
3310 3310
3311 3311 heads = self.changelog.heads(start)
3312 3312 # sort the output in rev descending order
3313 3313 return sorted(heads, key=self.changelog.rev, reverse=True)
3314 3314
3315 3315 def branchheads(self, branch=None, start=None, closed=False):
3316 3316 '''return a (possibly filtered) list of heads for the given branch
3317 3317
3318 3318 Heads are returned in topological order, from newest to oldest.
3319 3319 If branch is None, use the dirstate branch.
3320 3320 If start is not None, return only heads reachable from start.
3321 3321 If closed is True, return heads that are marked as closed as well.
3322 3322 '''
3323 3323 if branch is None:
3324 3324 branch = self[None].branch()
3325 3325 branches = self.branchmap()
3326 3326 if not branches.hasbranch(branch):
3327 3327 return []
3328 3328 # the cache returns heads ordered lowest to highest
3329 3329 bheads = list(reversed(branches.branchheads(branch, closed=closed)))
3330 3330 if start is not None:
3331 3331 # filter out the heads that cannot be reached from startrev
3332 3332 fbheads = set(self.changelog.nodesbetween([start], bheads)[2])
3333 3333 bheads = [h for h in bheads if h in fbheads]
3334 3334 return bheads
3335 3335
3336 3336 def branches(self, nodes):
3337 3337 if not nodes:
3338 3338 nodes = [self.changelog.tip()]
3339 3339 b = []
3340 3340 for n in nodes:
3341 3341 t = n
3342 3342 while True:
3343 3343 p = self.changelog.parents(n)
3344 3344 if p[1] != nullid or p[0] == nullid:
3345 3345 b.append((t, n, p[0], p[1]))
3346 3346 break
3347 3347 n = p[0]
3348 3348 return b
3349 3349
3350 3350 def between(self, pairs):
3351 3351 r = []
3352 3352
3353 3353 for top, bottom in pairs:
3354 3354 n, l, i = top, [], 0
3355 3355 f = 1
3356 3356
3357 3357 while n != bottom and n != nullid:
3358 3358 p = self.changelog.parents(n)[0]
3359 3359 if i == f:
3360 3360 l.append(n)
3361 3361 f = f * 2
3362 3362 n = p
3363 3363 i += 1
3364 3364
3365 3365 r.append(l)
3366 3366
3367 3367 return r
3368 3368
3369 3369 def checkpush(self, pushop):
3370 3370 """Extensions can override this function if additional checks have
3371 3371 to be performed before pushing, or call it if they override push
3372 3372 command.
3373 3373 """
3374 3374
3375 3375 @unfilteredpropertycache
3376 3376 def prepushoutgoinghooks(self):
3377 3377 """Return util.hooks consists of a pushop with repo, remote, outgoing
3378 3378 methods, which are called before pushing changesets.
3379 3379 """
3380 3380 return util.hooks()
3381 3381
3382 3382 def pushkey(self, namespace, key, old, new):
3383 3383 try:
3384 3384 tr = self.currenttransaction()
3385 3385 hookargs = {}
3386 3386 if tr is not None:
3387 3387 hookargs.update(tr.hookargs)
3388 3388 hookargs = pycompat.strkwargs(hookargs)
3389 3389 hookargs['namespace'] = namespace
3390 3390 hookargs['key'] = key
3391 3391 hookargs['old'] = old
3392 3392 hookargs['new'] = new
3393 3393 self.hook(b'prepushkey', throw=True, **hookargs)
3394 3394 except error.HookAbort as exc:
3395 3395 self.ui.write_err(_(b"pushkey-abort: %s\n") % exc)
3396 3396 if exc.hint:
3397 3397 self.ui.write_err(_(b"(%s)\n") % exc.hint)
3398 3398 return False
3399 3399 self.ui.debug(b'pushing key for "%s:%s"\n' % (namespace, key))
3400 3400 ret = pushkey.push(self, namespace, key, old, new)
3401 3401
3402 def runhook():
3402 def runhook(unused_success):
3403 3403 self.hook(
3404 3404 b'pushkey',
3405 3405 namespace=namespace,
3406 3406 key=key,
3407 3407 old=old,
3408 3408 new=new,
3409 3409 ret=ret,
3410 3410 )
3411 3411
3412 3412 self._afterlock(runhook)
3413 3413 return ret
3414 3414
3415 3415 def listkeys(self, namespace):
3416 3416 self.hook(b'prelistkeys', throw=True, namespace=namespace)
3417 3417 self.ui.debug(b'listing keys for "%s"\n' % namespace)
3418 3418 values = pushkey.list(self, namespace)
3419 3419 self.hook(b'listkeys', namespace=namespace, values=values)
3420 3420 return values
3421 3421
3422 3422 def debugwireargs(self, one, two, three=None, four=None, five=None):
3423 3423 '''used to test argument passing over the wire'''
3424 3424 return b"%s %s %s %s %s" % (
3425 3425 one,
3426 3426 two,
3427 3427 pycompat.bytestr(three),
3428 3428 pycompat.bytestr(four),
3429 3429 pycompat.bytestr(five),
3430 3430 )
3431 3431
3432 3432 def savecommitmessage(self, text):
3433 3433 fp = self.vfs(b'last-message.txt', b'wb')
3434 3434 try:
3435 3435 fp.write(text)
3436 3436 finally:
3437 3437 fp.close()
3438 3438 return self.pathto(fp.name[len(self.root) + 1 :])
3439 3439
3440 3440
3441 3441 # used to avoid circular references so destructors work
3442 3442 def aftertrans(files):
3443 3443 renamefiles = [tuple(t) for t in files]
3444 3444
3445 3445 def a():
3446 3446 for vfs, src, dest in renamefiles:
3447 3447 # if src and dest refer to a same file, vfs.rename is a no-op,
3448 3448 # leaving both src and dest on disk. delete dest to make sure
3449 3449 # the rename couldn't be such a no-op.
3450 3450 vfs.tryunlink(dest)
3451 3451 try:
3452 3452 vfs.rename(src, dest)
3453 3453 except OSError: # journal file does not yet exist
3454 3454 pass
3455 3455
3456 3456 return a
3457 3457
3458 3458
3459 3459 def undoname(fn):
3460 3460 base, name = os.path.split(fn)
3461 3461 assert name.startswith(b'journal')
3462 3462 return os.path.join(base, name.replace(b'journal', b'undo', 1))
3463 3463
3464 3464
3465 3465 def instance(ui, path, create, intents=None, createopts=None):
3466 3466 localpath = util.urllocalpath(path)
3467 3467 if create:
3468 3468 createrepository(ui, localpath, createopts=createopts)
3469 3469
3470 3470 return makelocalrepository(ui, localpath, intents=intents)
3471 3471
3472 3472
3473 3473 def islocal(path):
3474 3474 return True
3475 3475
3476 3476
3477 3477 def defaultcreateopts(ui, createopts=None):
3478 3478 """Populate the default creation options for a repository.
3479 3479
3480 3480 A dictionary of explicitly requested creation options can be passed
3481 3481 in. Missing keys will be populated.
3482 3482 """
3483 3483 createopts = dict(createopts or {})
3484 3484
3485 3485 if b'backend' not in createopts:
3486 3486 # experimental config: storage.new-repo-backend
3487 3487 createopts[b'backend'] = ui.config(b'storage', b'new-repo-backend')
3488 3488
3489 3489 return createopts
3490 3490
3491 3491
3492 3492 def newreporequirements(ui, createopts):
3493 3493 """Determine the set of requirements for a new local repository.
3494 3494
3495 3495 Extensions can wrap this function to specify custom requirements for
3496 3496 new repositories.
3497 3497 """
3498 3498 # If the repo is being created from a shared repository, we copy
3499 3499 # its requirements.
3500 3500 if b'sharedrepo' in createopts:
3501 3501 requirements = set(createopts[b'sharedrepo'].requirements)
3502 3502 if createopts.get(b'sharedrelative'):
3503 3503 requirements.add(b'relshared')
3504 3504 else:
3505 3505 requirements.add(b'shared')
3506 3506
3507 3507 return requirements
3508 3508
3509 3509 if b'backend' not in createopts:
3510 3510 raise error.ProgrammingError(
3511 3511 b'backend key not present in createopts; '
3512 3512 b'was defaultcreateopts() called?'
3513 3513 )
3514 3514
3515 3515 if createopts[b'backend'] != b'revlogv1':
3516 3516 raise error.Abort(
3517 3517 _(
3518 3518 b'unable to determine repository requirements for '
3519 3519 b'storage backend: %s'
3520 3520 )
3521 3521 % createopts[b'backend']
3522 3522 )
3523 3523
3524 3524 requirements = {b'revlogv1'}
3525 3525 if ui.configbool(b'format', b'usestore'):
3526 3526 requirements.add(b'store')
3527 3527 if ui.configbool(b'format', b'usefncache'):
3528 3528 requirements.add(b'fncache')
3529 3529 if ui.configbool(b'format', b'dotencode'):
3530 3530 requirements.add(b'dotencode')
3531 3531
3532 3532 compengine = ui.config(b'format', b'revlog-compression')
3533 3533 if compengine not in util.compengines:
3534 3534 raise error.Abort(
3535 3535 _(
3536 3536 b'compression engine %s defined by '
3537 3537 b'format.revlog-compression not available'
3538 3538 )
3539 3539 % compengine,
3540 3540 hint=_(
3541 3541 b'run "hg debuginstall" to list available '
3542 3542 b'compression engines'
3543 3543 ),
3544 3544 )
3545 3545
3546 3546 # zlib is the historical default and doesn't need an explicit requirement.
3547 3547 elif compengine == b'zstd':
3548 3548 requirements.add(b'revlog-compression-zstd')
3549 3549 elif compengine != b'zlib':
3550 3550 requirements.add(b'exp-compression-%s' % compengine)
3551 3551
3552 3552 if scmutil.gdinitconfig(ui):
3553 3553 requirements.add(b'generaldelta')
3554 3554 if ui.configbool(b'format', b'sparse-revlog'):
3555 3555 requirements.add(SPARSEREVLOG_REQUIREMENT)
3556 3556
3557 3557 # experimental config: format.exp-use-side-data
3558 3558 if ui.configbool(b'format', b'exp-use-side-data'):
3559 3559 requirements.add(SIDEDATA_REQUIREMENT)
3560 3560 # experimental config: format.exp-use-copies-side-data-changeset
3561 3561 if ui.configbool(b'format', b'exp-use-copies-side-data-changeset'):
3562 3562 requirements.add(SIDEDATA_REQUIREMENT)
3563 3563 requirements.add(COPIESSDC_REQUIREMENT)
3564 3564 if ui.configbool(b'experimental', b'treemanifest'):
3565 3565 requirements.add(b'treemanifest')
3566 3566
3567 3567 revlogv2 = ui.config(b'experimental', b'revlogv2')
3568 3568 if revlogv2 == b'enable-unstable-format-and-corrupt-my-data':
3569 3569 requirements.remove(b'revlogv1')
3570 3570 # generaldelta is implied by revlogv2.
3571 3571 requirements.discard(b'generaldelta')
3572 3572 requirements.add(REVLOGV2_REQUIREMENT)
3573 3573 # experimental config: format.internal-phase
3574 3574 if ui.configbool(b'format', b'internal-phase'):
3575 3575 requirements.add(b'internal-phase')
3576 3576
3577 3577 if createopts.get(b'narrowfiles'):
3578 3578 requirements.add(repository.NARROW_REQUIREMENT)
3579 3579
3580 3580 if createopts.get(b'lfs'):
3581 3581 requirements.add(b'lfs')
3582 3582
3583 3583 if ui.configbool(b'format', b'bookmarks-in-store'):
3584 3584 requirements.add(bookmarks.BOOKMARKS_IN_STORE_REQUIREMENT)
3585 3585
3586 3586 return requirements
3587 3587
3588 3588
3589 3589 def filterknowncreateopts(ui, createopts):
3590 3590 """Filters a dict of repo creation options against options that are known.
3591 3591
3592 3592 Receives a dict of repo creation options and returns a dict of those
3593 3593 options that we don't know how to handle.
3594 3594
3595 3595 This function is called as part of repository creation. If the
3596 3596 returned dict contains any items, repository creation will not
3597 3597 be allowed, as it means there was a request to create a repository
3598 3598 with options not recognized by loaded code.
3599 3599
3600 3600 Extensions can wrap this function to filter out creation options
3601 3601 they know how to handle.
3602 3602 """
3603 3603 known = {
3604 3604 b'backend',
3605 3605 b'lfs',
3606 3606 b'narrowfiles',
3607 3607 b'sharedrepo',
3608 3608 b'sharedrelative',
3609 3609 b'shareditems',
3610 3610 b'shallowfilestore',
3611 3611 }
3612 3612
3613 3613 return {k: v for k, v in createopts.items() if k not in known}
3614 3614
3615 3615
3616 3616 def createrepository(ui, path, createopts=None):
3617 3617 """Create a new repository in a vfs.
3618 3618
3619 3619 ``path`` path to the new repo's working directory.
3620 3620 ``createopts`` options for the new repository.
3621 3621
3622 3622 The following keys for ``createopts`` are recognized:
3623 3623
3624 3624 backend
3625 3625 The storage backend to use.
3626 3626 lfs
3627 3627 Repository will be created with ``lfs`` requirement. The lfs extension
3628 3628 will automatically be loaded when the repository is accessed.
3629 3629 narrowfiles
3630 3630 Set up repository to support narrow file storage.
3631 3631 sharedrepo
3632 3632 Repository object from which storage should be shared.
3633 3633 sharedrelative
3634 3634 Boolean indicating if the path to the shared repo should be
3635 3635 stored as relative. By default, the pointer to the "parent" repo
3636 3636 is stored as an absolute path.
3637 3637 shareditems
3638 3638 Set of items to share to the new repository (in addition to storage).
3639 3639 shallowfilestore
3640 3640 Indicates that storage for files should be shallow (not all ancestor
3641 3641 revisions are known).
3642 3642 """
3643 3643 createopts = defaultcreateopts(ui, createopts=createopts)
3644 3644
3645 3645 unknownopts = filterknowncreateopts(ui, createopts)
3646 3646
3647 3647 if not isinstance(unknownopts, dict):
3648 3648 raise error.ProgrammingError(
3649 3649 b'filterknowncreateopts() did not return a dict'
3650 3650 )
3651 3651
3652 3652 if unknownopts:
3653 3653 raise error.Abort(
3654 3654 _(
3655 3655 b'unable to create repository because of unknown '
3656 3656 b'creation option: %s'
3657 3657 )
3658 3658 % b', '.join(sorted(unknownopts)),
3659 3659 hint=_(b'is a required extension not loaded?'),
3660 3660 )
3661 3661
3662 3662 requirements = newreporequirements(ui, createopts=createopts)
3663 3663
3664 3664 wdirvfs = vfsmod.vfs(path, expandpath=True, realpath=True)
3665 3665
3666 3666 hgvfs = vfsmod.vfs(wdirvfs.join(b'.hg'))
3667 3667 if hgvfs.exists():
3668 3668 raise error.RepoError(_(b'repository %s already exists') % path)
3669 3669
3670 3670 if b'sharedrepo' in createopts:
3671 3671 sharedpath = createopts[b'sharedrepo'].sharedpath
3672 3672
3673 3673 if createopts.get(b'sharedrelative'):
3674 3674 try:
3675 3675 sharedpath = os.path.relpath(sharedpath, hgvfs.base)
3676 3676 except (IOError, ValueError) as e:
3677 3677 # ValueError is raised on Windows if the drive letters differ
3678 3678 # on each path.
3679 3679 raise error.Abort(
3680 3680 _(b'cannot calculate relative path'),
3681 3681 hint=stringutil.forcebytestr(e),
3682 3682 )
3683 3683
3684 3684 if not wdirvfs.exists():
3685 3685 wdirvfs.makedirs()
3686 3686
3687 3687 hgvfs.makedir(notindexed=True)
3688 3688 if b'sharedrepo' not in createopts:
3689 3689 hgvfs.mkdir(b'cache')
3690 3690 hgvfs.mkdir(b'wcache')
3691 3691
3692 3692 if b'store' in requirements and b'sharedrepo' not in createopts:
3693 3693 hgvfs.mkdir(b'store')
3694 3694
3695 3695 # We create an invalid changelog outside the store so very old
3696 3696 # Mercurial versions (which didn't know about the requirements
3697 3697 # file) encounter an error on reading the changelog. This
3698 3698 # effectively locks out old clients and prevents them from
3699 3699 # mucking with a repo in an unknown format.
3700 3700 #
3701 3701 # The revlog header has version 2, which won't be recognized by
3702 3702 # such old clients.
3703 3703 hgvfs.append(
3704 3704 b'00changelog.i',
3705 3705 b'\0\0\0\2 dummy changelog to prevent using the old repo '
3706 3706 b'layout',
3707 3707 )
3708 3708
3709 3709 scmutil.writerequires(hgvfs, requirements)
3710 3710
3711 3711 # Write out file telling readers where to find the shared store.
3712 3712 if b'sharedrepo' in createopts:
3713 3713 hgvfs.write(b'sharedpath', sharedpath)
3714 3714
3715 3715 if createopts.get(b'shareditems'):
3716 3716 shared = b'\n'.join(sorted(createopts[b'shareditems'])) + b'\n'
3717 3717 hgvfs.write(b'shared', shared)
3718 3718
3719 3719
3720 3720 def poisonrepository(repo):
3721 3721 """Poison a repository instance so it can no longer be used."""
3722 3722 # Perform any cleanup on the instance.
3723 3723 repo.close()
3724 3724
3725 3725 # Our strategy is to replace the type of the object with one that
3726 3726 # has all attribute lookups result in error.
3727 3727 #
3728 3728 # But we have to allow the close() method because some constructors
3729 3729 # of repos call close() on repo references.
3730 3730 class poisonedrepository(object):
3731 3731 def __getattribute__(self, item):
3732 3732 if item == 'close':
3733 3733 return object.__getattribute__(self, item)
3734 3734
3735 3735 raise error.ProgrammingError(
3736 3736 b'repo instances should not be used after unshare'
3737 3737 )
3738 3738
3739 3739 def close(self):
3740 3740 pass
3741 3741
3742 3742 # We may have a repoview, which intercepts __setattr__. So be sure
3743 3743 # we operate at the lowest level possible.
3744 3744 object.__setattr__(repo, '__class__', poisonedrepository)
@@ -1,444 +1,445 b''
1 1 # lock.py - simple advisory locking scheme for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import contextlib
11 11 import errno
12 12 import os
13 13 import signal
14 14 import socket
15 15 import time
16 16 import warnings
17 17
18 18 from .i18n import _
19 19 from .pycompat import getattr
20 20
21 21 from . import (
22 22 encoding,
23 23 error,
24 24 pycompat,
25 25 util,
26 26 )
27 27
28 28 from .utils import procutil
29 29
30 30
31 31 def _getlockprefix():
32 32 """Return a string which is used to differentiate pid namespaces
33 33
34 34 It's useful to detect "dead" processes and remove stale locks with
35 35 confidence. Typically it's just hostname. On modern linux, we include an
36 36 extra Linux-specific pid namespace identifier.
37 37 """
38 38 result = encoding.strtolocal(socket.gethostname())
39 39 if pycompat.sysplatform.startswith(b'linux'):
40 40 try:
41 41 result += b'/%x' % os.stat(b'/proc/self/ns/pid').st_ino
42 42 except OSError as ex:
43 43 if ex.errno not in (errno.ENOENT, errno.EACCES, errno.ENOTDIR):
44 44 raise
45 45 return result
46 46
47 47
48 48 @contextlib.contextmanager
49 49 def _delayedinterrupt():
50 50 """Block signal interrupt while doing something critical
51 51
52 52 This makes sure that the code block wrapped by this context manager won't
53 53 be interrupted.
54 54
55 55 For Windows developers: It appears not possible to guard time.sleep()
56 56 from CTRL_C_EVENT, so please don't use time.sleep() to test if this is
57 57 working.
58 58 """
59 59 assertedsigs = []
60 60 blocked = False
61 61 orighandlers = {}
62 62
63 63 def raiseinterrupt(num):
64 64 if num == getattr(signal, 'SIGINT', None) or num == getattr(
65 65 signal, 'CTRL_C_EVENT', None
66 66 ):
67 67 raise KeyboardInterrupt
68 68 else:
69 69 raise error.SignalInterrupt
70 70
71 71 def catchterm(num, frame):
72 72 if blocked:
73 73 assertedsigs.append(num)
74 74 else:
75 75 raiseinterrupt(num)
76 76
77 77 try:
78 78 # save handlers first so they can be restored even if a setup is
79 79 # interrupted between signal.signal() and orighandlers[] =.
80 80 for name in [
81 81 b'CTRL_C_EVENT',
82 82 b'SIGINT',
83 83 b'SIGBREAK',
84 84 b'SIGHUP',
85 85 b'SIGTERM',
86 86 ]:
87 87 num = getattr(signal, name, None)
88 88 if num and num not in orighandlers:
89 89 orighandlers[num] = signal.getsignal(num)
90 90 try:
91 91 for num in orighandlers:
92 92 signal.signal(num, catchterm)
93 93 except ValueError:
94 94 pass # in a thread? no luck
95 95
96 96 blocked = True
97 97 yield
98 98 finally:
99 99 # no simple way to reliably restore all signal handlers because
100 100 # any loops, recursive function calls, except blocks, etc. can be
101 101 # interrupted. so instead, make catchterm() raise interrupt.
102 102 blocked = False
103 103 try:
104 104 for num, handler in orighandlers.items():
105 105 signal.signal(num, handler)
106 106 except ValueError:
107 107 pass # in a thread?
108 108
109 109 # re-raise interrupt exception if any, which may be shadowed by a new
110 110 # interrupt occurred while re-raising the first one
111 111 if assertedsigs:
112 112 raiseinterrupt(assertedsigs[0])
113 113
114 114
115 115 def trylock(ui, vfs, lockname, timeout, warntimeout, *args, **kwargs):
116 116 """return an acquired lock or raise an a LockHeld exception
117 117
118 118 This function is responsible to issue warnings and or debug messages about
119 119 the held lock while trying to acquires it."""
120 120
121 121 def printwarning(printer, locker):
122 122 """issue the usual "waiting on lock" message through any channel"""
123 123 # show more details for new-style locks
124 124 if b':' in locker:
125 125 host, pid = locker.split(b":", 1)
126 126 msg = _(
127 127 b"waiting for lock on %s held by process %r on host %r\n"
128 128 ) % (
129 129 pycompat.bytestr(l.desc),
130 130 pycompat.bytestr(pid),
131 131 pycompat.bytestr(host),
132 132 )
133 133 else:
134 134 msg = _(b"waiting for lock on %s held by %r\n") % (
135 135 l.desc,
136 136 pycompat.bytestr(locker),
137 137 )
138 138 printer(msg)
139 139
140 140 l = lock(vfs, lockname, 0, *args, dolock=False, **kwargs)
141 141
142 142 debugidx = 0 if (warntimeout and timeout) else -1
143 143 warningidx = 0
144 144 if not timeout:
145 145 warningidx = -1
146 146 elif warntimeout:
147 147 warningidx = warntimeout
148 148
149 149 delay = 0
150 150 while True:
151 151 try:
152 152 l._trylock()
153 153 break
154 154 except error.LockHeld as inst:
155 155 if delay == debugidx:
156 156 printwarning(ui.debug, inst.locker)
157 157 if delay == warningidx:
158 158 printwarning(ui.warn, inst.locker)
159 159 if timeout <= delay:
160 160 raise error.LockHeld(
161 161 errno.ETIMEDOUT, inst.filename, l.desc, inst.locker
162 162 )
163 163 time.sleep(1)
164 164 delay += 1
165 165
166 166 l.delay = delay
167 167 if l.delay:
168 168 if 0 <= warningidx <= l.delay:
169 169 ui.warn(_(b"got lock after %d seconds\n") % l.delay)
170 170 else:
171 171 ui.debug(b"got lock after %d seconds\n" % l.delay)
172 172 if l.acquirefn:
173 173 l.acquirefn()
174 174 return l
175 175
176 176
177 177 class lock(object):
178 178 '''An advisory lock held by one process to control access to a set
179 179 of files. Non-cooperating processes or incorrectly written scripts
180 180 can ignore Mercurial's locking scheme and stomp all over the
181 181 repository, so don't do that.
182 182
183 183 Typically used via localrepository.lock() to lock the repository
184 184 store (.hg/store/) or localrepository.wlock() to lock everything
185 185 else under .hg/.'''
186 186
187 187 # lock is symlink on platforms that support it, file on others.
188 188
189 189 # symlink is used because create of directory entry and contents
190 190 # are atomic even over nfs.
191 191
192 192 # old-style lock: symlink to pid
193 193 # new-style lock: symlink to hostname:pid
194 194
195 195 _host = None
196 196
197 197 def __init__(
198 198 self,
199 199 vfs,
200 200 fname,
201 201 timeout=-1,
202 202 releasefn=None,
203 203 acquirefn=None,
204 204 desc=None,
205 205 inheritchecker=None,
206 206 parentlock=None,
207 207 signalsafe=True,
208 208 dolock=True,
209 209 ):
210 210 self.vfs = vfs
211 211 self.f = fname
212 212 self.held = 0
213 213 self.timeout = timeout
214 214 self.releasefn = releasefn
215 215 self.acquirefn = acquirefn
216 216 self.desc = desc
217 217 self._inheritchecker = inheritchecker
218 218 self.parentlock = parentlock
219 219 self._parentheld = False
220 220 self._inherited = False
221 221 if signalsafe:
222 222 self._maybedelayedinterrupt = _delayedinterrupt
223 223 else:
224 224 self._maybedelayedinterrupt = util.nullcontextmanager
225 225 self.postrelease = []
226 226 self.pid = self._getpid()
227 227 if dolock:
228 228 self.delay = self.lock()
229 229 if self.acquirefn:
230 230 self.acquirefn()
231 231
232 232 def __enter__(self):
233 233 return self
234 234
235 235 def __exit__(self, exc_type, exc_value, exc_tb):
236 self.release()
236 success = all(a is None for a in (exc_type, exc_value, exc_tb))
237 self.release(success=success)
237 238
238 239 def __del__(self):
239 240 if self.held:
240 241 warnings.warn(
241 242 "use lock.release instead of del lock",
242 243 category=DeprecationWarning,
243 244 stacklevel=2,
244 245 )
245 246
246 247 # ensure the lock will be removed
247 248 # even if recursive locking did occur
248 249 self.held = 1
249 250
250 251 self.release()
251 252
252 253 def _getpid(self):
253 254 # wrapper around procutil.getpid() to make testing easier
254 255 return procutil.getpid()
255 256
256 257 def lock(self):
257 258 timeout = self.timeout
258 259 while True:
259 260 try:
260 261 self._trylock()
261 262 return self.timeout - timeout
262 263 except error.LockHeld as inst:
263 264 if timeout != 0:
264 265 time.sleep(1)
265 266 if timeout > 0:
266 267 timeout -= 1
267 268 continue
268 269 raise error.LockHeld(
269 270 errno.ETIMEDOUT, inst.filename, self.desc, inst.locker
270 271 )
271 272
272 273 def _trylock(self):
273 274 if self.held:
274 275 self.held += 1
275 276 return
276 277 if lock._host is None:
277 278 lock._host = _getlockprefix()
278 279 lockname = b'%s:%d' % (lock._host, self.pid)
279 280 retry = 5
280 281 while not self.held and retry:
281 282 retry -= 1
282 283 try:
283 284 with self._maybedelayedinterrupt():
284 285 self.vfs.makelock(lockname, self.f)
285 286 self.held = 1
286 287 except (OSError, IOError) as why:
287 288 if why.errno == errno.EEXIST:
288 289 locker = self._readlock()
289 290 if locker is None:
290 291 continue
291 292
292 293 # special case where a parent process holds the lock -- this
293 294 # is different from the pid being different because we do
294 295 # want the unlock and postrelease functions to be called,
295 296 # but the lockfile to not be removed.
296 297 if locker == self.parentlock:
297 298 self._parentheld = True
298 299 self.held = 1
299 300 return
300 301 locker = self._testlock(locker)
301 302 if locker is not None:
302 303 raise error.LockHeld(
303 304 errno.EAGAIN,
304 305 self.vfs.join(self.f),
305 306 self.desc,
306 307 locker,
307 308 )
308 309 else:
309 310 raise error.LockUnavailable(
310 311 why.errno, why.strerror, why.filename, self.desc
311 312 )
312 313
313 314 if not self.held:
314 315 # use empty locker to mean "busy for frequent lock/unlock
315 316 # by many processes"
316 317 raise error.LockHeld(
317 318 errno.EAGAIN, self.vfs.join(self.f), self.desc, b""
318 319 )
319 320
320 321 def _readlock(self):
321 322 """read lock and return its value
322 323
323 324 Returns None if no lock exists, pid for old-style locks, and host:pid
324 325 for new-style locks.
325 326 """
326 327 try:
327 328 return self.vfs.readlock(self.f)
328 329 except (OSError, IOError) as why:
329 330 if why.errno == errno.ENOENT:
330 331 return None
331 332 raise
332 333
333 334 def _lockshouldbebroken(self, locker):
334 335 if locker is None:
335 336 return False
336 337 try:
337 338 host, pid = locker.split(b":", 1)
338 339 except ValueError:
339 340 return False
340 341 if host != lock._host:
341 342 return False
342 343 try:
343 344 pid = int(pid)
344 345 except ValueError:
345 346 return False
346 347 if procutil.testpid(pid):
347 348 return False
348 349 return True
349 350
350 351 def _testlock(self, locker):
351 352 if not self._lockshouldbebroken(locker):
352 353 return locker
353 354
354 355 # if locker dead, break lock. must do this with another lock
355 356 # held, or can race and break valid lock.
356 357 try:
357 358 with lock(self.vfs, self.f + b'.break', timeout=0):
358 359 locker = self._readlock()
359 360 if not self._lockshouldbebroken(locker):
360 361 return locker
361 362 self.vfs.unlink(self.f)
362 363 except error.LockError:
363 364 return locker
364 365
365 366 def testlock(self):
366 367 """return id of locker if lock is valid, else None.
367 368
368 369 If old-style lock, we cannot tell what machine locker is on.
369 370 with new-style lock, if locker is on this machine, we can
370 371 see if locker is alive. If locker is on this machine but
371 372 not alive, we can safely break lock.
372 373
373 374 The lock file is only deleted when None is returned.
374 375
375 376 """
376 377 locker = self._readlock()
377 378 return self._testlock(locker)
378 379
379 380 @contextlib.contextmanager
380 381 def inherit(self):
381 382 """context for the lock to be inherited by a Mercurial subprocess.
382 383
383 384 Yields a string that will be recognized by the lock in the subprocess.
384 385 Communicating this string to the subprocess needs to be done separately
385 386 -- typically by an environment variable.
386 387 """
387 388 if not self.held:
388 389 raise error.LockInheritanceContractViolation(
389 390 b'inherit can only be called while lock is held'
390 391 )
391 392 if self._inherited:
392 393 raise error.LockInheritanceContractViolation(
393 394 b'inherit cannot be called while lock is already inherited'
394 395 )
395 396 if self._inheritchecker is not None:
396 397 self._inheritchecker()
397 398 if self.releasefn:
398 399 self.releasefn()
399 400 if self._parentheld:
400 401 lockname = self.parentlock
401 402 else:
402 403 lockname = b'%s:%d' % (lock._host, self.pid)
403 404 self._inherited = True
404 405 try:
405 406 yield lockname
406 407 finally:
407 408 if self.acquirefn:
408 409 self.acquirefn()
409 410 self._inherited = False
410 411
411 def release(self):
412 def release(self, success=True):
412 413 """release the lock and execute callback function if any
413 414
414 415 If the lock has been acquired multiple times, the actual release is
415 416 delayed to the last release call."""
416 417 if self.held > 1:
417 418 self.held -= 1
418 419 elif self.held == 1:
419 420 self.held = 0
420 421 if self._getpid() != self.pid:
421 422 # we forked, and are not the parent
422 423 return
423 424 try:
424 425 if self.releasefn:
425 426 self.releasefn()
426 427 finally:
427 428 if not self._parentheld:
428 429 try:
429 430 self.vfs.unlink(self.f)
430 431 except OSError:
431 432 pass
432 433 # The postrelease functions typically assume the lock is not held
433 434 # at all.
434 435 if not self._parentheld:
435 436 for callback in self.postrelease:
436 callback()
437 callback(success)
437 438 # Prevent double usage and help clear cycles.
438 439 self.postrelease = None
439 440
440 441
441 442 def release(*locks):
442 443 for lock in locks:
443 444 if lock is not None:
444 445 lock.release()
@@ -1,2269 +1,2273 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11 import itertools
12 12 import struct
13 13 import weakref
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 bin,
18 18 hex,
19 19 nullid,
20 20 nullrev,
21 21 )
22 22 from .pycompat import getattr
23 23 from . import (
24 24 error,
25 25 mdiff,
26 26 pathutil,
27 27 policy,
28 28 pycompat,
29 29 revlog,
30 30 util,
31 31 )
32 32 from .interfaces import (
33 33 repository,
34 34 util as interfaceutil,
35 35 )
36 36
37 37 parsers = policy.importmod('parsers')
38 38 propertycache = util.propertycache
39 39
40 40 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
41 41 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
42 42
43 43
44 44 def _parse(data):
45 45 # This method does a little bit of excessive-looking
46 46 # precondition checking. This is so that the behavior of this
47 47 # class exactly matches its C counterpart to try and help
48 48 # prevent surprise breakage for anyone that develops against
49 49 # the pure version.
50 50 if data and data[-1:] != b'\n':
51 51 raise ValueError(b'Manifest did not end in a newline.')
52 52 prev = None
53 53 for l in data.splitlines():
54 54 if prev is not None and prev > l:
55 55 raise ValueError(b'Manifest lines not in sorted order.')
56 56 prev = l
57 57 f, n = l.split(b'\0')
58 58 if len(n) > 40:
59 59 yield f, bin(n[:40]), n[40:]
60 60 else:
61 61 yield f, bin(n), b''
62 62
63 63
64 64 def _text(it):
65 65 files = []
66 66 lines = []
67 67 for f, n, fl in it:
68 68 files.append(f)
69 69 # if this is changed to support newlines in filenames,
70 70 # be sure to check the templates/ dir again (especially *-raw.tmpl)
71 71 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
72 72
73 73 _checkforbidden(files)
74 74 return b''.join(lines)
75 75
76 76
77 77 class lazymanifestiter(object):
78 78 def __init__(self, lm):
79 79 self.pos = 0
80 80 self.lm = lm
81 81
82 82 def __iter__(self):
83 83 return self
84 84
85 85 def next(self):
86 86 try:
87 87 data, pos = self.lm._get(self.pos)
88 88 except IndexError:
89 89 raise StopIteration
90 90 if pos == -1:
91 91 self.pos += 1
92 92 return data[0]
93 93 self.pos += 1
94 94 zeropos = data.find(b'\x00', pos)
95 95 return data[pos:zeropos]
96 96
97 97 __next__ = next
98 98
99 99
100 100 class lazymanifestiterentries(object):
101 101 def __init__(self, lm):
102 102 self.lm = lm
103 103 self.pos = 0
104 104
105 105 def __iter__(self):
106 106 return self
107 107
108 108 def next(self):
109 109 try:
110 110 data, pos = self.lm._get(self.pos)
111 111 except IndexError:
112 112 raise StopIteration
113 113 if pos == -1:
114 114 self.pos += 1
115 115 return data
116 116 zeropos = data.find(b'\x00', pos)
117 117 hashval = unhexlify(data, self.lm.extrainfo[self.pos], zeropos + 1, 40)
118 118 flags = self.lm._getflags(data, self.pos, zeropos)
119 119 self.pos += 1
120 120 return (data[pos:zeropos], hashval, flags)
121 121
122 122 __next__ = next
123 123
124 124
125 125 def unhexlify(data, extra, pos, length):
126 126 s = bin(data[pos : pos + length])
127 127 if extra:
128 128 s += chr(extra & 0xFF)
129 129 return s
130 130
131 131
132 132 def _cmp(a, b):
133 133 return (a > b) - (a < b)
134 134
135 135
136 136 class _lazymanifest(object):
137 137 """A pure python manifest backed by a byte string. It is supplimented with
138 138 internal lists as it is modified, until it is compacted back to a pure byte
139 139 string.
140 140
141 141 ``data`` is the initial manifest data.
142 142
143 143 ``positions`` is a list of offsets, one per manifest entry. Positive
144 144 values are offsets into ``data``, negative values are offsets into the
145 145 ``extradata`` list. When an entry is removed, its entry is dropped from
146 146 ``positions``. The values are encoded such that when walking the list and
147 147 indexing into ``data`` or ``extradata`` as appropriate, the entries are
148 148 sorted by filename.
149 149
150 150 ``extradata`` is a list of (key, hash, flags) for entries that were added or
151 151 modified since the manifest was created or compacted.
152 152 """
153 153
154 154 def __init__(
155 155 self,
156 156 data,
157 157 positions=None,
158 158 extrainfo=None,
159 159 extradata=None,
160 160 hasremovals=False,
161 161 ):
162 162 if positions is None:
163 163 self.positions = self.findlines(data)
164 164 self.extrainfo = [0] * len(self.positions)
165 165 self.data = data
166 166 self.extradata = []
167 167 self.hasremovals = False
168 168 else:
169 169 self.positions = positions[:]
170 170 self.extrainfo = extrainfo[:]
171 171 self.extradata = extradata[:]
172 172 self.data = data
173 173 self.hasremovals = hasremovals
174 174
175 175 def findlines(self, data):
176 176 if not data:
177 177 return []
178 178 pos = data.find(b"\n")
179 179 if pos == -1 or data[-1:] != b'\n':
180 180 raise ValueError(b"Manifest did not end in a newline.")
181 181 positions = [0]
182 182 prev = data[: data.find(b'\x00')]
183 183 while pos < len(data) - 1 and pos != -1:
184 184 positions.append(pos + 1)
185 185 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
186 186 if nexts < prev:
187 187 raise ValueError(b"Manifest lines not in sorted order.")
188 188 prev = nexts
189 189 pos = data.find(b"\n", pos + 1)
190 190 return positions
191 191
192 192 def _get(self, index):
193 193 # get the position encoded in pos:
194 194 # positive number is an index in 'data'
195 195 # negative number is in extrapieces
196 196 pos = self.positions[index]
197 197 if pos >= 0:
198 198 return self.data, pos
199 199 return self.extradata[-pos - 1], -1
200 200
201 201 def _getkey(self, pos):
202 202 if pos >= 0:
203 203 return self.data[pos : self.data.find(b'\x00', pos + 1)]
204 204 return self.extradata[-pos - 1][0]
205 205
206 206 def bsearch(self, key):
207 207 first = 0
208 208 last = len(self.positions) - 1
209 209
210 210 while first <= last:
211 211 midpoint = (first + last) // 2
212 212 nextpos = self.positions[midpoint]
213 213 candidate = self._getkey(nextpos)
214 214 r = _cmp(key, candidate)
215 215 if r == 0:
216 216 return midpoint
217 217 else:
218 218 if r < 0:
219 219 last = midpoint - 1
220 220 else:
221 221 first = midpoint + 1
222 222 return -1
223 223
224 224 def bsearch2(self, key):
225 225 # same as the above, but will always return the position
226 226 # done for performance reasons
227 227 first = 0
228 228 last = len(self.positions) - 1
229 229
230 230 while first <= last:
231 231 midpoint = (first + last) // 2
232 232 nextpos = self.positions[midpoint]
233 233 candidate = self._getkey(nextpos)
234 234 r = _cmp(key, candidate)
235 235 if r == 0:
236 236 return (midpoint, True)
237 237 else:
238 238 if r < 0:
239 239 last = midpoint - 1
240 240 else:
241 241 first = midpoint + 1
242 242 return (first, False)
243 243
244 244 def __contains__(self, key):
245 245 return self.bsearch(key) != -1
246 246
247 247 def _getflags(self, data, needle, pos):
248 248 start = pos + 41
249 249 end = data.find(b"\n", start)
250 250 if end == -1:
251 251 end = len(data) - 1
252 252 if start == end:
253 253 return b''
254 254 return self.data[start:end]
255 255
256 256 def __getitem__(self, key):
257 257 if not isinstance(key, bytes):
258 258 raise TypeError(b"getitem: manifest keys must be a bytes.")
259 259 needle = self.bsearch(key)
260 260 if needle == -1:
261 261 raise KeyError
262 262 data, pos = self._get(needle)
263 263 if pos == -1:
264 264 return (data[1], data[2])
265 265 zeropos = data.find(b'\x00', pos)
266 266 assert 0 <= needle <= len(self.positions)
267 267 assert len(self.extrainfo) == len(self.positions)
268 268 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40)
269 269 flags = self._getflags(data, needle, zeropos)
270 270 return (hashval, flags)
271 271
272 272 def __delitem__(self, key):
273 273 needle, found = self.bsearch2(key)
274 274 if not found:
275 275 raise KeyError
276 276 cur = self.positions[needle]
277 277 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
278 278 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
279 279 if cur >= 0:
280 280 # This does NOT unsort the list as far as the search functions are
281 281 # concerned, as they only examine lines mapped by self.positions.
282 282 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
283 283 self.hasremovals = True
284 284
285 285 def __setitem__(self, key, value):
286 286 if not isinstance(key, bytes):
287 287 raise TypeError(b"setitem: manifest keys must be a byte string.")
288 288 if not isinstance(value, tuple) or len(value) != 2:
289 289 raise TypeError(
290 290 b"Manifest values must be a tuple of (node, flags)."
291 291 )
292 292 hashval = value[0]
293 293 if not isinstance(hashval, bytes) or not 20 <= len(hashval) <= 22:
294 294 raise TypeError(b"node must be a 20-byte byte string")
295 295 flags = value[1]
296 296 if len(hashval) == 22:
297 297 hashval = hashval[:-1]
298 298 if not isinstance(flags, bytes) or len(flags) > 1:
299 299 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
300 300 needle, found = self.bsearch2(key)
301 301 if found:
302 302 # put the item
303 303 pos = self.positions[needle]
304 304 if pos < 0:
305 305 self.extradata[-pos - 1] = (key, hashval, value[1])
306 306 else:
307 307 # just don't bother
308 308 self.extradata.append((key, hashval, value[1]))
309 309 self.positions[needle] = -len(self.extradata)
310 310 else:
311 311 # not found, put it in with extra positions
312 312 self.extradata.append((key, hashval, value[1]))
313 313 self.positions = (
314 314 self.positions[:needle]
315 315 + [-len(self.extradata)]
316 316 + self.positions[needle:]
317 317 )
318 318 self.extrainfo = (
319 319 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
320 320 )
321 321
322 322 def copy(self):
323 323 # XXX call _compact like in C?
324 324 return _lazymanifest(
325 325 self.data,
326 326 self.positions,
327 327 self.extrainfo,
328 328 self.extradata,
329 329 self.hasremovals,
330 330 )
331 331
332 332 def _compact(self):
333 333 # hopefully not called TOO often
334 334 if len(self.extradata) == 0 and not self.hasremovals:
335 335 return
336 336 l = []
337 337 i = 0
338 338 offset = 0
339 339 self.extrainfo = [0] * len(self.positions)
340 340 while i < len(self.positions):
341 341 if self.positions[i] >= 0:
342 342 cur = self.positions[i]
343 343 last_cut = cur
344 344
345 345 # Collect all contiguous entries in the buffer at the current
346 346 # offset, breaking out only for added/modified items held in
347 347 # extradata, or a deleted line prior to the next position.
348 348 while True:
349 349 self.positions[i] = offset
350 350 i += 1
351 351 if i == len(self.positions) or self.positions[i] < 0:
352 352 break
353 353
354 354 # A removed file has no positions[] entry, but does have an
355 355 # overwritten first byte. Break out and find the end of the
356 356 # current good entry/entries if there is a removed file
357 357 # before the next position.
358 358 if (
359 359 self.hasremovals
360 360 and self.data.find(b'\n\x00', cur, self.positions[i])
361 361 != -1
362 362 ):
363 363 break
364 364
365 365 offset += self.positions[i] - cur
366 366 cur = self.positions[i]
367 367 end_cut = self.data.find(b'\n', cur)
368 368 if end_cut != -1:
369 369 end_cut += 1
370 370 offset += end_cut - cur
371 371 l.append(self.data[last_cut:end_cut])
372 372 else:
373 373 while i < len(self.positions) and self.positions[i] < 0:
374 374 cur = self.positions[i]
375 375 t = self.extradata[-cur - 1]
376 376 l.append(self._pack(t))
377 377 self.positions[i] = offset
378 378 if len(t[1]) > 20:
379 379 self.extrainfo[i] = ord(t[1][21])
380 380 offset += len(l[-1])
381 381 i += 1
382 382 self.data = b''.join(l)
383 383 self.hasremovals = False
384 384 self.extradata = []
385 385
386 386 def _pack(self, d):
387 387 return d[0] + b'\x00' + hex(d[1][:20]) + d[2] + b'\n'
388 388
389 389 def text(self):
390 390 self._compact()
391 391 return self.data
392 392
393 393 def diff(self, m2, clean=False):
394 394 '''Finds changes between the current manifest and m2.'''
395 395 # XXX think whether efficiency matters here
396 396 diff = {}
397 397
398 398 for fn, e1, flags in self.iterentries():
399 399 if fn not in m2:
400 400 diff[fn] = (e1, flags), (None, b'')
401 401 else:
402 402 e2 = m2[fn]
403 403 if (e1, flags) != e2:
404 404 diff[fn] = (e1, flags), e2
405 405 elif clean:
406 406 diff[fn] = None
407 407
408 408 for fn, e2, flags in m2.iterentries():
409 409 if fn not in self:
410 410 diff[fn] = (None, b''), (e2, flags)
411 411
412 412 return diff
413 413
414 414 def iterentries(self):
415 415 return lazymanifestiterentries(self)
416 416
417 417 def iterkeys(self):
418 418 return lazymanifestiter(self)
419 419
420 420 def __iter__(self):
421 421 return lazymanifestiter(self)
422 422
423 423 def __len__(self):
424 424 return len(self.positions)
425 425
426 426 def filtercopy(self, filterfn):
427 427 # XXX should be optimized
428 428 c = _lazymanifest(b'')
429 429 for f, n, fl in self.iterentries():
430 430 if filterfn(f):
431 431 c[f] = n, fl
432 432 return c
433 433
434 434
435 435 try:
436 436 _lazymanifest = parsers.lazymanifest
437 437 except AttributeError:
438 438 pass
439 439
440 440
441 441 @interfaceutil.implementer(repository.imanifestdict)
442 442 class manifestdict(object):
443 443 def __init__(self, data=b''):
444 444 self._lm = _lazymanifest(data)
445 445
446 446 def __getitem__(self, key):
447 447 return self._lm[key][0]
448 448
449 449 def find(self, key):
450 450 return self._lm[key]
451 451
452 452 def __len__(self):
453 453 return len(self._lm)
454 454
455 455 def __nonzero__(self):
456 456 # nonzero is covered by the __len__ function, but implementing it here
457 457 # makes it easier for extensions to override.
458 458 return len(self._lm) != 0
459 459
460 460 __bool__ = __nonzero__
461 461
462 462 def __setitem__(self, key, node):
463 463 self._lm[key] = node, self.flags(key, b'')
464 464
465 465 def __contains__(self, key):
466 466 if key is None:
467 467 return False
468 468 return key in self._lm
469 469
470 470 def __delitem__(self, key):
471 471 del self._lm[key]
472 472
473 473 def __iter__(self):
474 474 return self._lm.__iter__()
475 475
476 476 def iterkeys(self):
477 477 return self._lm.iterkeys()
478 478
479 479 def keys(self):
480 480 return list(self.iterkeys())
481 481
482 482 def filesnotin(self, m2, match=None):
483 483 '''Set of files in this manifest that are not in the other'''
484 484 if match:
485 485 m1 = self.matches(match)
486 486 m2 = m2.matches(match)
487 487 return m1.filesnotin(m2)
488 488 diff = self.diff(m2)
489 489 files = set(
490 490 filepath
491 491 for filepath, hashflags in pycompat.iteritems(diff)
492 492 if hashflags[1][0] is None
493 493 )
494 494 return files
495 495
496 496 @propertycache
497 497 def _dirs(self):
498 498 return pathutil.dirs(self)
499 499
500 500 def dirs(self):
501 501 return self._dirs
502 502
503 503 def hasdir(self, dir):
504 504 return dir in self._dirs
505 505
506 506 def _filesfastpath(self, match):
507 507 '''Checks whether we can correctly and quickly iterate over matcher
508 508 files instead of over manifest files.'''
509 509 files = match.files()
510 510 return len(files) < 100 and (
511 511 match.isexact()
512 512 or (match.prefix() and all(fn in self for fn in files))
513 513 )
514 514
515 515 def walk(self, match):
516 516 '''Generates matching file names.
517 517
518 518 Equivalent to manifest.matches(match).iterkeys(), but without creating
519 519 an entirely new manifest.
520 520
521 521 It also reports nonexistent files by marking them bad with match.bad().
522 522 '''
523 523 if match.always():
524 524 for f in iter(self):
525 525 yield f
526 526 return
527 527
528 528 fset = set(match.files())
529 529
530 530 # avoid the entire walk if we're only looking for specific files
531 531 if self._filesfastpath(match):
532 532 for fn in sorted(fset):
533 533 yield fn
534 534 return
535 535
536 536 for fn in self:
537 537 if fn in fset:
538 538 # specified pattern is the exact name
539 539 fset.remove(fn)
540 540 if match(fn):
541 541 yield fn
542 542
543 543 # for dirstate.walk, files=[''] means "walk the whole tree".
544 544 # follow that here, too
545 545 fset.discard(b'')
546 546
547 547 for fn in sorted(fset):
548 548 if not self.hasdir(fn):
549 549 match.bad(fn, None)
550 550
551 551 def matches(self, match):
552 552 '''generate a new manifest filtered by the match argument'''
553 553 if match.always():
554 554 return self.copy()
555 555
556 556 if self._filesfastpath(match):
557 557 m = manifestdict()
558 558 lm = self._lm
559 559 for fn in match.files():
560 560 if fn in lm:
561 561 m._lm[fn] = lm[fn]
562 562 return m
563 563
564 564 m = manifestdict()
565 565 m._lm = self._lm.filtercopy(match)
566 566 return m
567 567
568 568 def diff(self, m2, match=None, clean=False):
569 569 '''Finds changes between the current manifest and m2.
570 570
571 571 Args:
572 572 m2: the manifest to which this manifest should be compared.
573 573 clean: if true, include files unchanged between these manifests
574 574 with a None value in the returned dictionary.
575 575
576 576 The result is returned as a dict with filename as key and
577 577 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
578 578 nodeid in the current/other manifest and fl1/fl2 is the flag
579 579 in the current/other manifest. Where the file does not exist,
580 580 the nodeid will be None and the flags will be the empty
581 581 string.
582 582 '''
583 583 if match:
584 584 m1 = self.matches(match)
585 585 m2 = m2.matches(match)
586 586 return m1.diff(m2, clean=clean)
587 587 return self._lm.diff(m2._lm, clean)
588 588
589 589 def setflag(self, key, flag):
590 590 self._lm[key] = self[key], flag
591 591
592 592 def get(self, key, default=None):
593 593 try:
594 594 return self._lm[key][0]
595 595 except KeyError:
596 596 return default
597 597
598 598 def flags(self, key, default=b''):
599 599 try:
600 600 return self._lm[key][1]
601 601 except KeyError:
602 602 return default
603 603
604 604 def copy(self):
605 605 c = manifestdict()
606 606 c._lm = self._lm.copy()
607 607 return c
608 608
609 609 def items(self):
610 610 return (x[:2] for x in self._lm.iterentries())
611 611
612 612 def iteritems(self):
613 613 return (x[:2] for x in self._lm.iterentries())
614 614
615 615 def iterentries(self):
616 616 return self._lm.iterentries()
617 617
618 618 def text(self):
619 619 # most likely uses native version
620 620 return self._lm.text()
621 621
622 622 def fastdelta(self, base, changes):
623 623 """Given a base manifest text as a bytearray and a list of changes
624 624 relative to that text, compute a delta that can be used by revlog.
625 625 """
626 626 delta = []
627 627 dstart = None
628 628 dend = None
629 629 dline = [b""]
630 630 start = 0
631 631 # zero copy representation of base as a buffer
632 632 addbuf = util.buffer(base)
633 633
634 634 changes = list(changes)
635 635 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
636 636 # start with a readonly loop that finds the offset of
637 637 # each line and creates the deltas
638 638 for f, todelete in changes:
639 639 # bs will either be the index of the item or the insert point
640 640 start, end = _msearch(addbuf, f, start)
641 641 if not todelete:
642 642 h, fl = self._lm[f]
643 643 l = b"%s\0%s%s\n" % (f, hex(h), fl)
644 644 else:
645 645 if start == end:
646 646 # item we want to delete was not found, error out
647 647 raise AssertionError(
648 648 _(b"failed to remove %s from manifest") % f
649 649 )
650 650 l = b""
651 651 if dstart is not None and dstart <= start and dend >= start:
652 652 if dend < end:
653 653 dend = end
654 654 if l:
655 655 dline.append(l)
656 656 else:
657 657 if dstart is not None:
658 658 delta.append([dstart, dend, b"".join(dline)])
659 659 dstart = start
660 660 dend = end
661 661 dline = [l]
662 662
663 663 if dstart is not None:
664 664 delta.append([dstart, dend, b"".join(dline)])
665 665 # apply the delta to the base, and get a delta for addrevision
666 666 deltatext, arraytext = _addlistdelta(base, delta)
667 667 else:
668 668 # For large changes, it's much cheaper to just build the text and
669 669 # diff it.
670 670 arraytext = bytearray(self.text())
671 671 deltatext = mdiff.textdiff(
672 672 util.buffer(base), util.buffer(arraytext)
673 673 )
674 674
675 675 return arraytext, deltatext
676 676
677 677
678 678 def _msearch(m, s, lo=0, hi=None):
679 679 '''return a tuple (start, end) that says where to find s within m.
680 680
681 681 If the string is found m[start:end] are the line containing
682 682 that string. If start == end the string was not found and
683 683 they indicate the proper sorted insertion point.
684 684
685 685 m should be a buffer, a memoryview or a byte string.
686 686 s is a byte string'''
687 687
688 688 def advance(i, c):
689 689 while i < lenm and m[i : i + 1] != c:
690 690 i += 1
691 691 return i
692 692
693 693 if not s:
694 694 return (lo, lo)
695 695 lenm = len(m)
696 696 if not hi:
697 697 hi = lenm
698 698 while lo < hi:
699 699 mid = (lo + hi) // 2
700 700 start = mid
701 701 while start > 0 and m[start - 1 : start] != b'\n':
702 702 start -= 1
703 703 end = advance(start, b'\0')
704 704 if bytes(m[start:end]) < s:
705 705 # we know that after the null there are 40 bytes of sha1
706 706 # this translates to the bisect lo = mid + 1
707 707 lo = advance(end + 40, b'\n') + 1
708 708 else:
709 709 # this translates to the bisect hi = mid
710 710 hi = start
711 711 end = advance(lo, b'\0')
712 712 found = m[lo:end]
713 713 if s == found:
714 714 # we know that after the null there are 40 bytes of sha1
715 715 end = advance(end + 40, b'\n')
716 716 return (lo, end + 1)
717 717 else:
718 718 return (lo, lo)
719 719
720 720
721 721 def _checkforbidden(l):
722 722 """Check filenames for illegal characters."""
723 723 for f in l:
724 724 if b'\n' in f or b'\r' in f:
725 725 raise error.StorageError(
726 726 _(b"'\\n' and '\\r' disallowed in filenames: %r")
727 727 % pycompat.bytestr(f)
728 728 )
729 729
730 730
731 731 # apply the changes collected during the bisect loop to our addlist
732 732 # return a delta suitable for addrevision
733 733 def _addlistdelta(addlist, x):
734 734 # for large addlist arrays, building a new array is cheaper
735 735 # than repeatedly modifying the existing one
736 736 currentposition = 0
737 737 newaddlist = bytearray()
738 738
739 739 for start, end, content in x:
740 740 newaddlist += addlist[currentposition:start]
741 741 if content:
742 742 newaddlist += bytearray(content)
743 743
744 744 currentposition = end
745 745
746 746 newaddlist += addlist[currentposition:]
747 747
748 748 deltatext = b"".join(
749 749 struct.pack(b">lll", start, end, len(content)) + content
750 750 for start, end, content in x
751 751 )
752 752 return deltatext, newaddlist
753 753
754 754
755 755 def _splittopdir(f):
756 756 if b'/' in f:
757 757 dir, subpath = f.split(b'/', 1)
758 758 return dir + b'/', subpath
759 759 else:
760 760 return b'', f
761 761
762 762
763 763 _noop = lambda s: None
764 764
765 765
766 766 class treemanifest(object):
767 767 def __init__(self, dir=b'', text=b''):
768 768 self._dir = dir
769 769 self._node = nullid
770 770 self._loadfunc = _noop
771 771 self._copyfunc = _noop
772 772 self._dirty = False
773 773 self._dirs = {}
774 774 self._lazydirs = {}
775 775 # Using _lazymanifest here is a little slower than plain old dicts
776 776 self._files = {}
777 777 self._flags = {}
778 778 if text:
779 779
780 780 def readsubtree(subdir, subm):
781 781 raise AssertionError(
782 782 b'treemanifest constructor only accepts flat manifests'
783 783 )
784 784
785 785 self.parse(text, readsubtree)
786 786 self._dirty = True # Mark flat manifest dirty after parsing
787 787
788 788 def _subpath(self, path):
789 789 return self._dir + path
790 790
791 791 def _loadalllazy(self):
792 792 selfdirs = self._dirs
793 793 for d, (path, node, readsubtree, docopy) in pycompat.iteritems(
794 794 self._lazydirs
795 795 ):
796 796 if docopy:
797 797 selfdirs[d] = readsubtree(path, node).copy()
798 798 else:
799 799 selfdirs[d] = readsubtree(path, node)
800 800 self._lazydirs = {}
801 801
802 802 def _loadlazy(self, d):
803 803 v = self._lazydirs.get(d)
804 804 if v:
805 805 path, node, readsubtree, docopy = v
806 806 if docopy:
807 807 self._dirs[d] = readsubtree(path, node).copy()
808 808 else:
809 809 self._dirs[d] = readsubtree(path, node)
810 810 del self._lazydirs[d]
811 811
812 812 def _loadchildrensetlazy(self, visit):
813 813 if not visit:
814 814 return None
815 815 if visit == b'all' or visit == b'this':
816 816 self._loadalllazy()
817 817 return None
818 818
819 819 loadlazy = self._loadlazy
820 820 for k in visit:
821 821 loadlazy(k + b'/')
822 822 return visit
823 823
824 824 def _loaddifflazy(self, t1, t2):
825 825 """load items in t1 and t2 if they're needed for diffing.
826 826
827 827 The criteria currently is:
828 828 - if it's not present in _lazydirs in either t1 or t2, load it in the
829 829 other (it may already be loaded or it may not exist, doesn't matter)
830 830 - if it's present in _lazydirs in both, compare the nodeid; if it
831 831 differs, load it in both
832 832 """
833 833 toloadlazy = []
834 834 for d, v1 in pycompat.iteritems(t1._lazydirs):
835 835 v2 = t2._lazydirs.get(d)
836 836 if not v2 or v2[1] != v1[1]:
837 837 toloadlazy.append(d)
838 838 for d, v1 in pycompat.iteritems(t2._lazydirs):
839 839 if d not in t1._lazydirs:
840 840 toloadlazy.append(d)
841 841
842 842 for d in toloadlazy:
843 843 t1._loadlazy(d)
844 844 t2._loadlazy(d)
845 845
846 846 def __len__(self):
847 847 self._load()
848 848 size = len(self._files)
849 849 self._loadalllazy()
850 850 for m in self._dirs.values():
851 851 size += m.__len__()
852 852 return size
853 853
854 854 def __nonzero__(self):
855 855 # Faster than "__len() != 0" since it avoids loading sub-manifests
856 856 return not self._isempty()
857 857
858 858 __bool__ = __nonzero__
859 859
860 860 def _isempty(self):
861 861 self._load() # for consistency; already loaded by all callers
862 862 # See if we can skip loading everything.
863 863 if self._files or (
864 864 self._dirs and any(not m._isempty() for m in self._dirs.values())
865 865 ):
866 866 return False
867 867 self._loadalllazy()
868 868 return not self._dirs or all(m._isempty() for m in self._dirs.values())
869 869
870 870 def __repr__(self):
871 871 return (
872 872 b'<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s at 0x%x>'
873 873 % (
874 874 self._dir,
875 875 hex(self._node),
876 876 bool(self._loadfunc is _noop),
877 877 self._dirty,
878 878 id(self),
879 879 )
880 880 )
881 881
882 882 def dir(self):
883 883 '''The directory that this tree manifest represents, including a
884 884 trailing '/'. Empty string for the repo root directory.'''
885 885 return self._dir
886 886
887 887 def node(self):
888 888 '''This node of this instance. nullid for unsaved instances. Should
889 889 be updated when the instance is read or written from a revlog.
890 890 '''
891 891 assert not self._dirty
892 892 return self._node
893 893
894 894 def setnode(self, node):
895 895 self._node = node
896 896 self._dirty = False
897 897
898 898 def iterentries(self):
899 899 self._load()
900 900 self._loadalllazy()
901 901 for p, n in sorted(
902 902 itertools.chain(self._dirs.items(), self._files.items())
903 903 ):
904 904 if p in self._files:
905 905 yield self._subpath(p), n, self._flags.get(p, b'')
906 906 else:
907 907 for x in n.iterentries():
908 908 yield x
909 909
910 910 def items(self):
911 911 self._load()
912 912 self._loadalllazy()
913 913 for p, n in sorted(
914 914 itertools.chain(self._dirs.items(), self._files.items())
915 915 ):
916 916 if p in self._files:
917 917 yield self._subpath(p), n
918 918 else:
919 919 for f, sn in pycompat.iteritems(n):
920 920 yield f, sn
921 921
922 922 iteritems = items
923 923
924 924 def iterkeys(self):
925 925 self._load()
926 926 self._loadalllazy()
927 927 for p in sorted(itertools.chain(self._dirs, self._files)):
928 928 if p in self._files:
929 929 yield self._subpath(p)
930 930 else:
931 931 for f in self._dirs[p]:
932 932 yield f
933 933
934 934 def keys(self):
935 935 return list(self.iterkeys())
936 936
937 937 def __iter__(self):
938 938 return self.iterkeys()
939 939
940 940 def __contains__(self, f):
941 941 if f is None:
942 942 return False
943 943 self._load()
944 944 dir, subpath = _splittopdir(f)
945 945 if dir:
946 946 self._loadlazy(dir)
947 947
948 948 if dir not in self._dirs:
949 949 return False
950 950
951 951 return self._dirs[dir].__contains__(subpath)
952 952 else:
953 953 return f in self._files
954 954
955 955 def get(self, f, default=None):
956 956 self._load()
957 957 dir, subpath = _splittopdir(f)
958 958 if dir:
959 959 self._loadlazy(dir)
960 960
961 961 if dir not in self._dirs:
962 962 return default
963 963 return self._dirs[dir].get(subpath, default)
964 964 else:
965 965 return self._files.get(f, default)
966 966
967 967 def __getitem__(self, f):
968 968 self._load()
969 969 dir, subpath = _splittopdir(f)
970 970 if dir:
971 971 self._loadlazy(dir)
972 972
973 973 return self._dirs[dir].__getitem__(subpath)
974 974 else:
975 975 return self._files[f]
976 976
977 977 def flags(self, f):
978 978 self._load()
979 979 dir, subpath = _splittopdir(f)
980 980 if dir:
981 981 self._loadlazy(dir)
982 982
983 983 if dir not in self._dirs:
984 984 return b''
985 985 return self._dirs[dir].flags(subpath)
986 986 else:
987 987 if f in self._lazydirs or f in self._dirs:
988 988 return b''
989 989 return self._flags.get(f, b'')
990 990
991 991 def find(self, f):
992 992 self._load()
993 993 dir, subpath = _splittopdir(f)
994 994 if dir:
995 995 self._loadlazy(dir)
996 996
997 997 return self._dirs[dir].find(subpath)
998 998 else:
999 999 return self._files[f], self._flags.get(f, b'')
1000 1000
1001 1001 def __delitem__(self, f):
1002 1002 self._load()
1003 1003 dir, subpath = _splittopdir(f)
1004 1004 if dir:
1005 1005 self._loadlazy(dir)
1006 1006
1007 1007 self._dirs[dir].__delitem__(subpath)
1008 1008 # If the directory is now empty, remove it
1009 1009 if self._dirs[dir]._isempty():
1010 1010 del self._dirs[dir]
1011 1011 else:
1012 1012 del self._files[f]
1013 1013 if f in self._flags:
1014 1014 del self._flags[f]
1015 1015 self._dirty = True
1016 1016
1017 1017 def __setitem__(self, f, n):
1018 1018 assert n is not None
1019 1019 self._load()
1020 1020 dir, subpath = _splittopdir(f)
1021 1021 if dir:
1022 1022 self._loadlazy(dir)
1023 1023 if dir not in self._dirs:
1024 1024 self._dirs[dir] = treemanifest(self._subpath(dir))
1025 1025 self._dirs[dir].__setitem__(subpath, n)
1026 1026 else:
1027 1027 self._files[f] = n[:21] # to match manifestdict's behavior
1028 1028 self._dirty = True
1029 1029
1030 1030 def _load(self):
1031 1031 if self._loadfunc is not _noop:
1032 1032 lf, self._loadfunc = self._loadfunc, _noop
1033 1033 lf(self)
1034 1034 elif self._copyfunc is not _noop:
1035 1035 cf, self._copyfunc = self._copyfunc, _noop
1036 1036 cf(self)
1037 1037
1038 1038 def setflag(self, f, flags):
1039 1039 """Set the flags (symlink, executable) for path f."""
1040 1040 self._load()
1041 1041 dir, subpath = _splittopdir(f)
1042 1042 if dir:
1043 1043 self._loadlazy(dir)
1044 1044 if dir not in self._dirs:
1045 1045 self._dirs[dir] = treemanifest(self._subpath(dir))
1046 1046 self._dirs[dir].setflag(subpath, flags)
1047 1047 else:
1048 1048 self._flags[f] = flags
1049 1049 self._dirty = True
1050 1050
1051 1051 def copy(self):
1052 1052 copy = treemanifest(self._dir)
1053 1053 copy._node = self._node
1054 1054 copy._dirty = self._dirty
1055 1055 if self._copyfunc is _noop:
1056 1056
1057 1057 def _copyfunc(s):
1058 1058 self._load()
1059 1059 s._lazydirs = {
1060 1060 d: (p, n, r, True)
1061 1061 for d, (p, n, r, c) in pycompat.iteritems(self._lazydirs)
1062 1062 }
1063 1063 sdirs = s._dirs
1064 1064 for d, v in pycompat.iteritems(self._dirs):
1065 1065 sdirs[d] = v.copy()
1066 1066 s._files = dict.copy(self._files)
1067 1067 s._flags = dict.copy(self._flags)
1068 1068
1069 1069 if self._loadfunc is _noop:
1070 1070 _copyfunc(copy)
1071 1071 else:
1072 1072 copy._copyfunc = _copyfunc
1073 1073 else:
1074 1074 copy._copyfunc = self._copyfunc
1075 1075 return copy
1076 1076
1077 1077 def filesnotin(self, m2, match=None):
1078 1078 '''Set of files in this manifest that are not in the other'''
1079 1079 if match and not match.always():
1080 1080 m1 = self.matches(match)
1081 1081 m2 = m2.matches(match)
1082 1082 return m1.filesnotin(m2)
1083 1083
1084 1084 files = set()
1085 1085
1086 1086 def _filesnotin(t1, t2):
1087 1087 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1088 1088 return
1089 1089 t1._load()
1090 1090 t2._load()
1091 1091 self._loaddifflazy(t1, t2)
1092 1092 for d, m1 in pycompat.iteritems(t1._dirs):
1093 1093 if d in t2._dirs:
1094 1094 m2 = t2._dirs[d]
1095 1095 _filesnotin(m1, m2)
1096 1096 else:
1097 1097 files.update(m1.iterkeys())
1098 1098
1099 1099 for fn in t1._files:
1100 1100 if fn not in t2._files:
1101 1101 files.add(t1._subpath(fn))
1102 1102
1103 1103 _filesnotin(self, m2)
1104 1104 return files
1105 1105
1106 1106 @propertycache
1107 1107 def _alldirs(self):
1108 1108 return pathutil.dirs(self)
1109 1109
1110 1110 def dirs(self):
1111 1111 return self._alldirs
1112 1112
1113 1113 def hasdir(self, dir):
1114 1114 self._load()
1115 1115 topdir, subdir = _splittopdir(dir)
1116 1116 if topdir:
1117 1117 self._loadlazy(topdir)
1118 1118 if topdir in self._dirs:
1119 1119 return self._dirs[topdir].hasdir(subdir)
1120 1120 return False
1121 1121 dirslash = dir + b'/'
1122 1122 return dirslash in self._dirs or dirslash in self._lazydirs
1123 1123
1124 1124 def walk(self, match):
1125 1125 '''Generates matching file names.
1126 1126
1127 1127 Equivalent to manifest.matches(match).iterkeys(), but without creating
1128 1128 an entirely new manifest.
1129 1129
1130 1130 It also reports nonexistent files by marking them bad with match.bad().
1131 1131 '''
1132 1132 if match.always():
1133 1133 for f in iter(self):
1134 1134 yield f
1135 1135 return
1136 1136
1137 1137 fset = set(match.files())
1138 1138
1139 1139 for fn in self._walk(match):
1140 1140 if fn in fset:
1141 1141 # specified pattern is the exact name
1142 1142 fset.remove(fn)
1143 1143 yield fn
1144 1144
1145 1145 # for dirstate.walk, files=[''] means "walk the whole tree".
1146 1146 # follow that here, too
1147 1147 fset.discard(b'')
1148 1148
1149 1149 for fn in sorted(fset):
1150 1150 if not self.hasdir(fn):
1151 1151 match.bad(fn, None)
1152 1152
1153 1153 def _walk(self, match):
1154 1154 '''Recursively generates matching file names for walk().'''
1155 1155 visit = match.visitchildrenset(self._dir[:-1])
1156 1156 if not visit:
1157 1157 return
1158 1158
1159 1159 # yield this dir's files and walk its submanifests
1160 1160 self._load()
1161 1161 visit = self._loadchildrensetlazy(visit)
1162 1162 for p in sorted(list(self._dirs) + list(self._files)):
1163 1163 if p in self._files:
1164 1164 fullp = self._subpath(p)
1165 1165 if match(fullp):
1166 1166 yield fullp
1167 1167 else:
1168 1168 if not visit or p[:-1] in visit:
1169 1169 for f in self._dirs[p]._walk(match):
1170 1170 yield f
1171 1171
1172 1172 def matches(self, match):
1173 1173 '''generate a new manifest filtered by the match argument'''
1174 1174 if match.always():
1175 1175 return self.copy()
1176 1176
1177 1177 return self._matches(match)
1178 1178
1179 1179 def _matches(self, match):
1180 1180 '''recursively generate a new manifest filtered by the match argument.
1181 1181 '''
1182 1182
1183 1183 visit = match.visitchildrenset(self._dir[:-1])
1184 1184 if visit == b'all':
1185 1185 return self.copy()
1186 1186 ret = treemanifest(self._dir)
1187 1187 if not visit:
1188 1188 return ret
1189 1189
1190 1190 self._load()
1191 1191 for fn in self._files:
1192 1192 # While visitchildrenset *usually* lists only subdirs, this is
1193 1193 # actually up to the matcher and may have some files in the set().
1194 1194 # If visit == 'this', we should obviously look at the files in this
1195 1195 # directory; if visit is a set, and fn is in it, we should inspect
1196 1196 # fn (but no need to inspect things not in the set).
1197 1197 if visit != b'this' and fn not in visit:
1198 1198 continue
1199 1199 fullp = self._subpath(fn)
1200 1200 # visitchildrenset isn't perfect, we still need to call the regular
1201 1201 # matcher code to further filter results.
1202 1202 if not match(fullp):
1203 1203 continue
1204 1204 ret._files[fn] = self._files[fn]
1205 1205 if fn in self._flags:
1206 1206 ret._flags[fn] = self._flags[fn]
1207 1207
1208 1208 visit = self._loadchildrensetlazy(visit)
1209 1209 for dir, subm in pycompat.iteritems(self._dirs):
1210 1210 if visit and dir[:-1] not in visit:
1211 1211 continue
1212 1212 m = subm._matches(match)
1213 1213 if not m._isempty():
1214 1214 ret._dirs[dir] = m
1215 1215
1216 1216 if not ret._isempty():
1217 1217 ret._dirty = True
1218 1218 return ret
1219 1219
1220 1220 def diff(self, m2, match=None, clean=False):
1221 1221 '''Finds changes between the current manifest and m2.
1222 1222
1223 1223 Args:
1224 1224 m2: the manifest to which this manifest should be compared.
1225 1225 clean: if true, include files unchanged between these manifests
1226 1226 with a None value in the returned dictionary.
1227 1227
1228 1228 The result is returned as a dict with filename as key and
1229 1229 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1230 1230 nodeid in the current/other manifest and fl1/fl2 is the flag
1231 1231 in the current/other manifest. Where the file does not exist,
1232 1232 the nodeid will be None and the flags will be the empty
1233 1233 string.
1234 1234 '''
1235 1235 if match and not match.always():
1236 1236 m1 = self.matches(match)
1237 1237 m2 = m2.matches(match)
1238 1238 return m1.diff(m2, clean=clean)
1239 1239 result = {}
1240 1240 emptytree = treemanifest()
1241 1241
1242 1242 def _iterativediff(t1, t2, stack):
1243 1243 """compares two tree manifests and append new tree-manifests which
1244 1244 needs to be compared to stack"""
1245 1245 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1246 1246 return
1247 1247 t1._load()
1248 1248 t2._load()
1249 1249 self._loaddifflazy(t1, t2)
1250 1250
1251 1251 for d, m1 in pycompat.iteritems(t1._dirs):
1252 1252 m2 = t2._dirs.get(d, emptytree)
1253 1253 stack.append((m1, m2))
1254 1254
1255 1255 for d, m2 in pycompat.iteritems(t2._dirs):
1256 1256 if d not in t1._dirs:
1257 1257 stack.append((emptytree, m2))
1258 1258
1259 1259 for fn, n1 in pycompat.iteritems(t1._files):
1260 1260 fl1 = t1._flags.get(fn, b'')
1261 1261 n2 = t2._files.get(fn, None)
1262 1262 fl2 = t2._flags.get(fn, b'')
1263 1263 if n1 != n2 or fl1 != fl2:
1264 1264 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1265 1265 elif clean:
1266 1266 result[t1._subpath(fn)] = None
1267 1267
1268 1268 for fn, n2 in pycompat.iteritems(t2._files):
1269 1269 if fn not in t1._files:
1270 1270 fl2 = t2._flags.get(fn, b'')
1271 1271 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1272 1272
1273 1273 stackls = []
1274 1274 _iterativediff(self, m2, stackls)
1275 1275 while stackls:
1276 1276 t1, t2 = stackls.pop()
1277 1277 # stackls is populated in the function call
1278 1278 _iterativediff(t1, t2, stackls)
1279 1279 return result
1280 1280
1281 1281 def unmodifiedsince(self, m2):
1282 1282 return not self._dirty and not m2._dirty and self._node == m2._node
1283 1283
1284 1284 def parse(self, text, readsubtree):
1285 1285 selflazy = self._lazydirs
1286 1286 subpath = self._subpath
1287 1287 for f, n, fl in _parse(text):
1288 1288 if fl == b't':
1289 1289 f = f + b'/'
1290 1290 # False below means "doesn't need to be copied" and can use the
1291 1291 # cached value from readsubtree directly.
1292 1292 selflazy[f] = (subpath(f), n, readsubtree, False)
1293 1293 elif b'/' in f:
1294 1294 # This is a flat manifest, so use __setitem__ and setflag rather
1295 1295 # than assigning directly to _files and _flags, so we can
1296 1296 # assign a path in a subdirectory, and to mark dirty (compared
1297 1297 # to nullid).
1298 1298 self[f] = n
1299 1299 if fl:
1300 1300 self.setflag(f, fl)
1301 1301 else:
1302 1302 # Assigning to _files and _flags avoids marking as dirty,
1303 1303 # and should be a little faster.
1304 1304 self._files[f] = n
1305 1305 if fl:
1306 1306 self._flags[f] = fl
1307 1307
1308 1308 def text(self):
1309 1309 """Get the full data of this manifest as a bytestring."""
1310 1310 self._load()
1311 1311 return _text(self.iterentries())
1312 1312
1313 1313 def dirtext(self):
1314 1314 """Get the full data of this directory as a bytestring. Make sure that
1315 1315 any submanifests have been written first, so their nodeids are correct.
1316 1316 """
1317 1317 self._load()
1318 1318 flags = self.flags
1319 1319 lazydirs = [
1320 1320 (d[:-1], v[1], b't') for d, v in pycompat.iteritems(self._lazydirs)
1321 1321 ]
1322 1322 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1323 1323 files = [(f, self._files[f], flags(f)) for f in self._files]
1324 1324 return _text(sorted(dirs + files + lazydirs))
1325 1325
1326 1326 def read(self, gettext, readsubtree):
1327 1327 def _load_for_read(s):
1328 1328 s.parse(gettext(), readsubtree)
1329 1329 s._dirty = False
1330 1330
1331 1331 self._loadfunc = _load_for_read
1332 1332
1333 1333 def writesubtrees(self, m1, m2, writesubtree, match):
1334 1334 self._load() # for consistency; should never have any effect here
1335 1335 m1._load()
1336 1336 m2._load()
1337 1337 emptytree = treemanifest()
1338 1338
1339 1339 def getnode(m, d):
1340 1340 ld = m._lazydirs.get(d)
1341 1341 if ld:
1342 1342 return ld[1]
1343 1343 return m._dirs.get(d, emptytree)._node
1344 1344
1345 1345 # let's skip investigating things that `match` says we do not need.
1346 1346 visit = match.visitchildrenset(self._dir[:-1])
1347 1347 visit = self._loadchildrensetlazy(visit)
1348 1348 if visit == b'this' or visit == b'all':
1349 1349 visit = None
1350 1350 for d, subm in pycompat.iteritems(self._dirs):
1351 1351 if visit and d[:-1] not in visit:
1352 1352 continue
1353 1353 subp1 = getnode(m1, d)
1354 1354 subp2 = getnode(m2, d)
1355 1355 if subp1 == nullid:
1356 1356 subp1, subp2 = subp2, subp1
1357 1357 writesubtree(subm, subp1, subp2, match)
1358 1358
1359 1359 def walksubtrees(self, matcher=None):
1360 1360 """Returns an iterator of the subtrees of this manifest, including this
1361 1361 manifest itself.
1362 1362
1363 1363 If `matcher` is provided, it only returns subtrees that match.
1364 1364 """
1365 1365 if matcher and not matcher.visitdir(self._dir[:-1]):
1366 1366 return
1367 1367 if not matcher or matcher(self._dir[:-1]):
1368 1368 yield self
1369 1369
1370 1370 self._load()
1371 1371 # OPT: use visitchildrenset to avoid loading everything.
1372 1372 self._loadalllazy()
1373 1373 for d, subm in pycompat.iteritems(self._dirs):
1374 1374 for subtree in subm.walksubtrees(matcher=matcher):
1375 1375 yield subtree
1376 1376
1377 1377
1378 1378 class manifestfulltextcache(util.lrucachedict):
1379 1379 """File-backed LRU cache for the manifest cache
1380 1380
1381 1381 File consists of entries, up to EOF:
1382 1382
1383 1383 - 20 bytes node, 4 bytes length, <length> manifest data
1384 1384
1385 1385 These are written in reverse cache order (oldest to newest).
1386 1386
1387 1387 """
1388 1388
1389 1389 _file = b'manifestfulltextcache'
1390 1390
1391 1391 def __init__(self, max):
1392 1392 super(manifestfulltextcache, self).__init__(max)
1393 1393 self._dirty = False
1394 1394 self._read = False
1395 1395 self._opener = None
1396 1396
1397 1397 def read(self):
1398 1398 if self._read or self._opener is None:
1399 1399 return
1400 1400
1401 1401 try:
1402 1402 with self._opener(self._file) as fp:
1403 1403 set = super(manifestfulltextcache, self).__setitem__
1404 1404 # ignore trailing data, this is a cache, corruption is skipped
1405 1405 while True:
1406 1406 node = fp.read(20)
1407 1407 if len(node) < 20:
1408 1408 break
1409 1409 try:
1410 1410 size = struct.unpack(b'>L', fp.read(4))[0]
1411 1411 except struct.error:
1412 1412 break
1413 1413 value = bytearray(fp.read(size))
1414 1414 if len(value) != size:
1415 1415 break
1416 1416 set(node, value)
1417 1417 except IOError:
1418 1418 # the file is allowed to be missing
1419 1419 pass
1420 1420
1421 1421 self._read = True
1422 1422 self._dirty = False
1423 1423
1424 1424 def write(self):
1425 1425 if not self._dirty or self._opener is None:
1426 1426 return
1427 1427 # rotate backwards to the first used node
1428 1428 with self._opener(
1429 1429 self._file, b'w', atomictemp=True, checkambig=True
1430 1430 ) as fp:
1431 1431 node = self._head.prev
1432 1432 while True:
1433 1433 if node.key in self._cache:
1434 1434 fp.write(node.key)
1435 1435 fp.write(struct.pack(b'>L', len(node.value)))
1436 1436 fp.write(node.value)
1437 1437 if node is self._head:
1438 1438 break
1439 1439 node = node.prev
1440 1440
1441 1441 def __len__(self):
1442 1442 if not self._read:
1443 1443 self.read()
1444 1444 return super(manifestfulltextcache, self).__len__()
1445 1445
1446 1446 def __contains__(self, k):
1447 1447 if not self._read:
1448 1448 self.read()
1449 1449 return super(manifestfulltextcache, self).__contains__(k)
1450 1450
1451 1451 def __iter__(self):
1452 1452 if not self._read:
1453 1453 self.read()
1454 1454 return super(manifestfulltextcache, self).__iter__()
1455 1455
1456 1456 def __getitem__(self, k):
1457 1457 if not self._read:
1458 1458 self.read()
1459 1459 # the cache lru order can change on read
1460 1460 setdirty = self._cache.get(k) is not self._head
1461 1461 value = super(manifestfulltextcache, self).__getitem__(k)
1462 1462 if setdirty:
1463 1463 self._dirty = True
1464 1464 return value
1465 1465
1466 1466 def __setitem__(self, k, v):
1467 1467 if not self._read:
1468 1468 self.read()
1469 1469 super(manifestfulltextcache, self).__setitem__(k, v)
1470 1470 self._dirty = True
1471 1471
1472 1472 def __delitem__(self, k):
1473 1473 if not self._read:
1474 1474 self.read()
1475 1475 super(manifestfulltextcache, self).__delitem__(k)
1476 1476 self._dirty = True
1477 1477
1478 1478 def get(self, k, default=None):
1479 1479 if not self._read:
1480 1480 self.read()
1481 1481 return super(manifestfulltextcache, self).get(k, default=default)
1482 1482
1483 1483 def clear(self, clear_persisted_data=False):
1484 1484 super(manifestfulltextcache, self).clear()
1485 1485 if clear_persisted_data:
1486 1486 self._dirty = True
1487 1487 self.write()
1488 1488 self._read = False
1489 1489
1490 1490
1491 1491 # and upper bound of what we expect from compression
1492 1492 # (real live value seems to be "3")
1493 1493 MAXCOMPRESSION = 3
1494 1494
1495 1495
1496 1496 @interfaceutil.implementer(repository.imanifeststorage)
1497 1497 class manifestrevlog(object):
1498 1498 '''A revlog that stores manifest texts. This is responsible for caching the
1499 1499 full-text manifest contents.
1500 1500 '''
1501 1501
1502 1502 def __init__(
1503 1503 self,
1504 1504 opener,
1505 1505 tree=b'',
1506 1506 dirlogcache=None,
1507 1507 indexfile=None,
1508 1508 treemanifest=False,
1509 1509 ):
1510 1510 """Constructs a new manifest revlog
1511 1511
1512 1512 `indexfile` - used by extensions to have two manifests at once, like
1513 1513 when transitioning between flatmanifeset and treemanifests.
1514 1514
1515 1515 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1516 1516 options can also be used to make this a tree manifest revlog. The opener
1517 1517 option takes precedence, so if it is set to True, we ignore whatever
1518 1518 value is passed in to the constructor.
1519 1519 """
1520 1520 # During normal operations, we expect to deal with not more than four
1521 1521 # revs at a time (such as during commit --amend). When rebasing large
1522 1522 # stacks of commits, the number can go up, hence the config knob below.
1523 1523 cachesize = 4
1524 1524 optiontreemanifest = False
1525 1525 opts = getattr(opener, 'options', None)
1526 1526 if opts is not None:
1527 1527 cachesize = opts.get(b'manifestcachesize', cachesize)
1528 1528 optiontreemanifest = opts.get(b'treemanifest', False)
1529 1529
1530 1530 self._treeondisk = optiontreemanifest or treemanifest
1531 1531
1532 1532 self._fulltextcache = manifestfulltextcache(cachesize)
1533 1533
1534 1534 if tree:
1535 1535 assert self._treeondisk, b'opts is %r' % opts
1536 1536
1537 1537 if indexfile is None:
1538 1538 indexfile = b'00manifest.i'
1539 1539 if tree:
1540 1540 indexfile = b"meta/" + tree + indexfile
1541 1541
1542 1542 self.tree = tree
1543 1543
1544 1544 # The dirlogcache is kept on the root manifest log
1545 1545 if tree:
1546 1546 self._dirlogcache = dirlogcache
1547 1547 else:
1548 1548 self._dirlogcache = {b'': self}
1549 1549
1550 1550 self._revlog = revlog.revlog(
1551 1551 opener,
1552 1552 indexfile,
1553 1553 # only root indexfile is cached
1554 1554 checkambig=not bool(tree),
1555 1555 mmaplargeindex=True,
1556 1556 upperboundcomp=MAXCOMPRESSION,
1557 1557 )
1558 1558
1559 1559 self.index = self._revlog.index
1560 1560 self.version = self._revlog.version
1561 1561 self._generaldelta = self._revlog._generaldelta
1562 1562
1563 1563 def _setupmanifestcachehooks(self, repo):
1564 1564 """Persist the manifestfulltextcache on lock release"""
1565 1565 if not util.safehasattr(repo, b'_wlockref'):
1566 1566 return
1567 1567
1568 1568 self._fulltextcache._opener = repo.wcachevfs
1569 1569 if repo._currentlock(repo._wlockref) is None:
1570 1570 return
1571 1571
1572 1572 reporef = weakref.ref(repo)
1573 1573 manifestrevlogref = weakref.ref(self)
1574 1574
1575 def persistmanifestcache():
1575 def persistmanifestcache(success):
1576 # Repo is in an unknown state, do not persist.
1577 if not success:
1578 return
1579
1576 1580 repo = reporef()
1577 1581 self = manifestrevlogref()
1578 1582 if repo is None or self is None:
1579 1583 return
1580 1584 if repo.manifestlog.getstorage(b'') is not self:
1581 1585 # there's a different manifest in play now, abort
1582 1586 return
1583 1587 self._fulltextcache.write()
1584 1588
1585 1589 repo._afterlock(persistmanifestcache)
1586 1590
1587 1591 @property
1588 1592 def fulltextcache(self):
1589 1593 return self._fulltextcache
1590 1594
1591 1595 def clearcaches(self, clear_persisted_data=False):
1592 1596 self._revlog.clearcaches()
1593 1597 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1594 1598 self._dirlogcache = {self.tree: self}
1595 1599
1596 1600 def dirlog(self, d):
1597 1601 if d:
1598 1602 assert self._treeondisk
1599 1603 if d not in self._dirlogcache:
1600 1604 mfrevlog = manifestrevlog(
1601 1605 self.opener, d, self._dirlogcache, treemanifest=self._treeondisk
1602 1606 )
1603 1607 self._dirlogcache[d] = mfrevlog
1604 1608 return self._dirlogcache[d]
1605 1609
1606 1610 def add(
1607 1611 self,
1608 1612 m,
1609 1613 transaction,
1610 1614 link,
1611 1615 p1,
1612 1616 p2,
1613 1617 added,
1614 1618 removed,
1615 1619 readtree=None,
1616 1620 match=None,
1617 1621 ):
1618 1622 if p1 in self.fulltextcache and util.safehasattr(m, b'fastdelta'):
1619 1623 # If our first parent is in the manifest cache, we can
1620 1624 # compute a delta here using properties we know about the
1621 1625 # manifest up-front, which may save time later for the
1622 1626 # revlog layer.
1623 1627
1624 1628 _checkforbidden(added)
1625 1629 # combine the changed lists into one sorted iterator
1626 1630 work = heapq.merge(
1627 1631 [(x, False) for x in sorted(added)],
1628 1632 [(x, True) for x in sorted(removed)],
1629 1633 )
1630 1634
1631 1635 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1632 1636 cachedelta = self._revlog.rev(p1), deltatext
1633 1637 text = util.buffer(arraytext)
1634 1638 n = self._revlog.addrevision(
1635 1639 text, transaction, link, p1, p2, cachedelta
1636 1640 )
1637 1641 else:
1638 1642 # The first parent manifest isn't already loaded, so we'll
1639 1643 # just encode a fulltext of the manifest and pass that
1640 1644 # through to the revlog layer, and let it handle the delta
1641 1645 # process.
1642 1646 if self._treeondisk:
1643 1647 assert readtree, b"readtree must be set for treemanifest writes"
1644 1648 assert match, b"match must be specified for treemanifest writes"
1645 1649 m1 = readtree(self.tree, p1)
1646 1650 m2 = readtree(self.tree, p2)
1647 1651 n = self._addtree(
1648 1652 m, transaction, link, m1, m2, readtree, match=match
1649 1653 )
1650 1654 arraytext = None
1651 1655 else:
1652 1656 text = m.text()
1653 1657 n = self._revlog.addrevision(text, transaction, link, p1, p2)
1654 1658 arraytext = bytearray(text)
1655 1659
1656 1660 if arraytext is not None:
1657 1661 self.fulltextcache[n] = arraytext
1658 1662
1659 1663 return n
1660 1664
1661 1665 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1662 1666 # If the manifest is unchanged compared to one parent,
1663 1667 # don't write a new revision
1664 1668 if self.tree != b'' and (
1665 1669 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1666 1670 ):
1667 1671 return m.node()
1668 1672
1669 1673 def writesubtree(subm, subp1, subp2, match):
1670 1674 sublog = self.dirlog(subm.dir())
1671 1675 sublog.add(
1672 1676 subm,
1673 1677 transaction,
1674 1678 link,
1675 1679 subp1,
1676 1680 subp2,
1677 1681 None,
1678 1682 None,
1679 1683 readtree=readtree,
1680 1684 match=match,
1681 1685 )
1682 1686
1683 1687 m.writesubtrees(m1, m2, writesubtree, match)
1684 1688 text = m.dirtext()
1685 1689 n = None
1686 1690 if self.tree != b'':
1687 1691 # Double-check whether contents are unchanged to one parent
1688 1692 if text == m1.dirtext():
1689 1693 n = m1.node()
1690 1694 elif text == m2.dirtext():
1691 1695 n = m2.node()
1692 1696
1693 1697 if not n:
1694 1698 n = self._revlog.addrevision(
1695 1699 text, transaction, link, m1.node(), m2.node()
1696 1700 )
1697 1701
1698 1702 # Save nodeid so parent manifest can calculate its nodeid
1699 1703 m.setnode(n)
1700 1704 return n
1701 1705
1702 1706 def __len__(self):
1703 1707 return len(self._revlog)
1704 1708
1705 1709 def __iter__(self):
1706 1710 return self._revlog.__iter__()
1707 1711
1708 1712 def rev(self, node):
1709 1713 return self._revlog.rev(node)
1710 1714
1711 1715 def node(self, rev):
1712 1716 return self._revlog.node(rev)
1713 1717
1714 1718 def lookup(self, value):
1715 1719 return self._revlog.lookup(value)
1716 1720
1717 1721 def parentrevs(self, rev):
1718 1722 return self._revlog.parentrevs(rev)
1719 1723
1720 1724 def parents(self, node):
1721 1725 return self._revlog.parents(node)
1722 1726
1723 1727 def linkrev(self, rev):
1724 1728 return self._revlog.linkrev(rev)
1725 1729
1726 1730 def checksize(self):
1727 1731 return self._revlog.checksize()
1728 1732
1729 1733 def revision(self, node, _df=None, raw=False):
1730 1734 return self._revlog.revision(node, _df=_df, raw=raw)
1731 1735
1732 1736 def rawdata(self, node, _df=None):
1733 1737 return self._revlog.rawdata(node, _df=_df)
1734 1738
1735 1739 def revdiff(self, rev1, rev2):
1736 1740 return self._revlog.revdiff(rev1, rev2)
1737 1741
1738 1742 def cmp(self, node, text):
1739 1743 return self._revlog.cmp(node, text)
1740 1744
1741 1745 def deltaparent(self, rev):
1742 1746 return self._revlog.deltaparent(rev)
1743 1747
1744 1748 def emitrevisions(
1745 1749 self,
1746 1750 nodes,
1747 1751 nodesorder=None,
1748 1752 revisiondata=False,
1749 1753 assumehaveparentrevisions=False,
1750 1754 deltamode=repository.CG_DELTAMODE_STD,
1751 1755 ):
1752 1756 return self._revlog.emitrevisions(
1753 1757 nodes,
1754 1758 nodesorder=nodesorder,
1755 1759 revisiondata=revisiondata,
1756 1760 assumehaveparentrevisions=assumehaveparentrevisions,
1757 1761 deltamode=deltamode,
1758 1762 )
1759 1763
1760 1764 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1761 1765 return self._revlog.addgroup(
1762 1766 deltas, linkmapper, transaction, addrevisioncb=addrevisioncb
1763 1767 )
1764 1768
1765 1769 def rawsize(self, rev):
1766 1770 return self._revlog.rawsize(rev)
1767 1771
1768 1772 def getstrippoint(self, minlink):
1769 1773 return self._revlog.getstrippoint(minlink)
1770 1774
1771 1775 def strip(self, minlink, transaction):
1772 1776 return self._revlog.strip(minlink, transaction)
1773 1777
1774 1778 def files(self):
1775 1779 return self._revlog.files()
1776 1780
1777 1781 def clone(self, tr, destrevlog, **kwargs):
1778 1782 if not isinstance(destrevlog, manifestrevlog):
1779 1783 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
1780 1784
1781 1785 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1782 1786
1783 1787 def storageinfo(
1784 1788 self,
1785 1789 exclusivefiles=False,
1786 1790 sharedfiles=False,
1787 1791 revisionscount=False,
1788 1792 trackedsize=False,
1789 1793 storedsize=False,
1790 1794 ):
1791 1795 return self._revlog.storageinfo(
1792 1796 exclusivefiles=exclusivefiles,
1793 1797 sharedfiles=sharedfiles,
1794 1798 revisionscount=revisionscount,
1795 1799 trackedsize=trackedsize,
1796 1800 storedsize=storedsize,
1797 1801 )
1798 1802
1799 1803 @property
1800 1804 def indexfile(self):
1801 1805 return self._revlog.indexfile
1802 1806
1803 1807 @indexfile.setter
1804 1808 def indexfile(self, value):
1805 1809 self._revlog.indexfile = value
1806 1810
1807 1811 @property
1808 1812 def opener(self):
1809 1813 return self._revlog.opener
1810 1814
1811 1815 @opener.setter
1812 1816 def opener(self, value):
1813 1817 self._revlog.opener = value
1814 1818
1815 1819
1816 1820 @interfaceutil.implementer(repository.imanifestlog)
1817 1821 class manifestlog(object):
1818 1822 """A collection class representing the collection of manifest snapshots
1819 1823 referenced by commits in the repository.
1820 1824
1821 1825 In this situation, 'manifest' refers to the abstract concept of a snapshot
1822 1826 of the list of files in the given commit. Consumers of the output of this
1823 1827 class do not care about the implementation details of the actual manifests
1824 1828 they receive (i.e. tree or flat or lazily loaded, etc)."""
1825 1829
1826 1830 def __init__(self, opener, repo, rootstore, narrowmatch):
1827 1831 usetreemanifest = False
1828 1832 cachesize = 4
1829 1833
1830 1834 opts = getattr(opener, 'options', None)
1831 1835 if opts is not None:
1832 1836 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
1833 1837 cachesize = opts.get(b'manifestcachesize', cachesize)
1834 1838
1835 1839 self._treemanifests = usetreemanifest
1836 1840
1837 1841 self._rootstore = rootstore
1838 1842 self._rootstore._setupmanifestcachehooks(repo)
1839 1843 self._narrowmatch = narrowmatch
1840 1844
1841 1845 # A cache of the manifestctx or treemanifestctx for each directory
1842 1846 self._dirmancache = {}
1843 1847 self._dirmancache[b''] = util.lrucachedict(cachesize)
1844 1848
1845 1849 self._cachesize = cachesize
1846 1850
1847 1851 def __getitem__(self, node):
1848 1852 """Retrieves the manifest instance for the given node. Throws a
1849 1853 LookupError if not found.
1850 1854 """
1851 1855 return self.get(b'', node)
1852 1856
1853 1857 def get(self, tree, node, verify=True):
1854 1858 """Retrieves the manifest instance for the given node. Throws a
1855 1859 LookupError if not found.
1856 1860
1857 1861 `verify` - if True an exception will be thrown if the node is not in
1858 1862 the revlog
1859 1863 """
1860 1864 if node in self._dirmancache.get(tree, ()):
1861 1865 return self._dirmancache[tree][node]
1862 1866
1863 1867 if not self._narrowmatch.always():
1864 1868 if not self._narrowmatch.visitdir(tree[:-1]):
1865 1869 return excludeddirmanifestctx(tree, node)
1866 1870 if tree:
1867 1871 if self._rootstore._treeondisk:
1868 1872 if verify:
1869 1873 # Side-effect is LookupError is raised if node doesn't
1870 1874 # exist.
1871 1875 self.getstorage(tree).rev(node)
1872 1876
1873 1877 m = treemanifestctx(self, tree, node)
1874 1878 else:
1875 1879 raise error.Abort(
1876 1880 _(
1877 1881 b"cannot ask for manifest directory '%s' in a flat "
1878 1882 b"manifest"
1879 1883 )
1880 1884 % tree
1881 1885 )
1882 1886 else:
1883 1887 if verify:
1884 1888 # Side-effect is LookupError is raised if node doesn't exist.
1885 1889 self._rootstore.rev(node)
1886 1890
1887 1891 if self._treemanifests:
1888 1892 m = treemanifestctx(self, b'', node)
1889 1893 else:
1890 1894 m = manifestctx(self, node)
1891 1895
1892 1896 if node != nullid:
1893 1897 mancache = self._dirmancache.get(tree)
1894 1898 if not mancache:
1895 1899 mancache = util.lrucachedict(self._cachesize)
1896 1900 self._dirmancache[tree] = mancache
1897 1901 mancache[node] = m
1898 1902 return m
1899 1903
1900 1904 def getstorage(self, tree):
1901 1905 return self._rootstore.dirlog(tree)
1902 1906
1903 1907 def clearcaches(self, clear_persisted_data=False):
1904 1908 self._dirmancache.clear()
1905 1909 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
1906 1910
1907 1911 def rev(self, node):
1908 1912 return self._rootstore.rev(node)
1909 1913
1910 1914
1911 1915 @interfaceutil.implementer(repository.imanifestrevisionwritable)
1912 1916 class memmanifestctx(object):
1913 1917 def __init__(self, manifestlog):
1914 1918 self._manifestlog = manifestlog
1915 1919 self._manifestdict = manifestdict()
1916 1920
1917 1921 def _storage(self):
1918 1922 return self._manifestlog.getstorage(b'')
1919 1923
1920 1924 def new(self):
1921 1925 return memmanifestctx(self._manifestlog)
1922 1926
1923 1927 def copy(self):
1924 1928 memmf = memmanifestctx(self._manifestlog)
1925 1929 memmf._manifestdict = self.read().copy()
1926 1930 return memmf
1927 1931
1928 1932 def read(self):
1929 1933 return self._manifestdict
1930 1934
1931 1935 def write(self, transaction, link, p1, p2, added, removed, match=None):
1932 1936 return self._storage().add(
1933 1937 self._manifestdict,
1934 1938 transaction,
1935 1939 link,
1936 1940 p1,
1937 1941 p2,
1938 1942 added,
1939 1943 removed,
1940 1944 match=match,
1941 1945 )
1942 1946
1943 1947
1944 1948 @interfaceutil.implementer(repository.imanifestrevisionstored)
1945 1949 class manifestctx(object):
1946 1950 """A class representing a single revision of a manifest, including its
1947 1951 contents, its parent revs, and its linkrev.
1948 1952 """
1949 1953
1950 1954 def __init__(self, manifestlog, node):
1951 1955 self._manifestlog = manifestlog
1952 1956 self._data = None
1953 1957
1954 1958 self._node = node
1955 1959
1956 1960 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
1957 1961 # but let's add it later when something needs it and we can load it
1958 1962 # lazily.
1959 1963 # self.p1, self.p2 = store.parents(node)
1960 1964 # rev = store.rev(node)
1961 1965 # self.linkrev = store.linkrev(rev)
1962 1966
1963 1967 def _storage(self):
1964 1968 return self._manifestlog.getstorage(b'')
1965 1969
1966 1970 def node(self):
1967 1971 return self._node
1968 1972
1969 1973 def new(self):
1970 1974 return memmanifestctx(self._manifestlog)
1971 1975
1972 1976 def copy(self):
1973 1977 memmf = memmanifestctx(self._manifestlog)
1974 1978 memmf._manifestdict = self.read().copy()
1975 1979 return memmf
1976 1980
1977 1981 @propertycache
1978 1982 def parents(self):
1979 1983 return self._storage().parents(self._node)
1980 1984
1981 1985 def read(self):
1982 1986 if self._data is None:
1983 1987 if self._node == nullid:
1984 1988 self._data = manifestdict()
1985 1989 else:
1986 1990 store = self._storage()
1987 1991 if self._node in store.fulltextcache:
1988 1992 text = pycompat.bytestr(store.fulltextcache[self._node])
1989 1993 else:
1990 1994 text = store.revision(self._node)
1991 1995 arraytext = bytearray(text)
1992 1996 store.fulltextcache[self._node] = arraytext
1993 1997 self._data = manifestdict(text)
1994 1998 return self._data
1995 1999
1996 2000 def readfast(self, shallow=False):
1997 2001 '''Calls either readdelta or read, based on which would be less work.
1998 2002 readdelta is called if the delta is against the p1, and therefore can be
1999 2003 read quickly.
2000 2004
2001 2005 If `shallow` is True, nothing changes since this is a flat manifest.
2002 2006 '''
2003 2007 store = self._storage()
2004 2008 r = store.rev(self._node)
2005 2009 deltaparent = store.deltaparent(r)
2006 2010 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2007 2011 return self.readdelta()
2008 2012 return self.read()
2009 2013
2010 2014 def readdelta(self, shallow=False):
2011 2015 '''Returns a manifest containing just the entries that are present
2012 2016 in this manifest, but not in its p1 manifest. This is efficient to read
2013 2017 if the revlog delta is already p1.
2014 2018
2015 2019 Changing the value of `shallow` has no effect on flat manifests.
2016 2020 '''
2017 2021 store = self._storage()
2018 2022 r = store.rev(self._node)
2019 2023 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2020 2024 return manifestdict(d)
2021 2025
2022 2026 def find(self, key):
2023 2027 return self.read().find(key)
2024 2028
2025 2029
2026 2030 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2027 2031 class memtreemanifestctx(object):
2028 2032 def __init__(self, manifestlog, dir=b''):
2029 2033 self._manifestlog = manifestlog
2030 2034 self._dir = dir
2031 2035 self._treemanifest = treemanifest()
2032 2036
2033 2037 def _storage(self):
2034 2038 return self._manifestlog.getstorage(b'')
2035 2039
2036 2040 def new(self, dir=b''):
2037 2041 return memtreemanifestctx(self._manifestlog, dir=dir)
2038 2042
2039 2043 def copy(self):
2040 2044 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2041 2045 memmf._treemanifest = self._treemanifest.copy()
2042 2046 return memmf
2043 2047
2044 2048 def read(self):
2045 2049 return self._treemanifest
2046 2050
2047 2051 def write(self, transaction, link, p1, p2, added, removed, match=None):
2048 2052 def readtree(dir, node):
2049 2053 return self._manifestlog.get(dir, node).read()
2050 2054
2051 2055 return self._storage().add(
2052 2056 self._treemanifest,
2053 2057 transaction,
2054 2058 link,
2055 2059 p1,
2056 2060 p2,
2057 2061 added,
2058 2062 removed,
2059 2063 readtree=readtree,
2060 2064 match=match,
2061 2065 )
2062 2066
2063 2067
2064 2068 @interfaceutil.implementer(repository.imanifestrevisionstored)
2065 2069 class treemanifestctx(object):
2066 2070 def __init__(self, manifestlog, dir, node):
2067 2071 self._manifestlog = manifestlog
2068 2072 self._dir = dir
2069 2073 self._data = None
2070 2074
2071 2075 self._node = node
2072 2076
2073 2077 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2074 2078 # we can instantiate treemanifestctx objects for directories we don't
2075 2079 # have on disk.
2076 2080 # self.p1, self.p2 = store.parents(node)
2077 2081 # rev = store.rev(node)
2078 2082 # self.linkrev = store.linkrev(rev)
2079 2083
2080 2084 def _storage(self):
2081 2085 narrowmatch = self._manifestlog._narrowmatch
2082 2086 if not narrowmatch.always():
2083 2087 if not narrowmatch.visitdir(self._dir[:-1]):
2084 2088 return excludedmanifestrevlog(self._dir)
2085 2089 return self._manifestlog.getstorage(self._dir)
2086 2090
2087 2091 def read(self):
2088 2092 if self._data is None:
2089 2093 store = self._storage()
2090 2094 if self._node == nullid:
2091 2095 self._data = treemanifest()
2092 2096 # TODO accessing non-public API
2093 2097 elif store._treeondisk:
2094 2098 m = treemanifest(dir=self._dir)
2095 2099
2096 2100 def gettext():
2097 2101 return store.revision(self._node)
2098 2102
2099 2103 def readsubtree(dir, subm):
2100 2104 # Set verify to False since we need to be able to create
2101 2105 # subtrees for trees that don't exist on disk.
2102 2106 return self._manifestlog.get(dir, subm, verify=False).read()
2103 2107
2104 2108 m.read(gettext, readsubtree)
2105 2109 m.setnode(self._node)
2106 2110 self._data = m
2107 2111 else:
2108 2112 if self._node in store.fulltextcache:
2109 2113 text = pycompat.bytestr(store.fulltextcache[self._node])
2110 2114 else:
2111 2115 text = store.revision(self._node)
2112 2116 arraytext = bytearray(text)
2113 2117 store.fulltextcache[self._node] = arraytext
2114 2118 self._data = treemanifest(dir=self._dir, text=text)
2115 2119
2116 2120 return self._data
2117 2121
2118 2122 def node(self):
2119 2123 return self._node
2120 2124
2121 2125 def new(self, dir=b''):
2122 2126 return memtreemanifestctx(self._manifestlog, dir=dir)
2123 2127
2124 2128 def copy(self):
2125 2129 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2126 2130 memmf._treemanifest = self.read().copy()
2127 2131 return memmf
2128 2132
2129 2133 @propertycache
2130 2134 def parents(self):
2131 2135 return self._storage().parents(self._node)
2132 2136
2133 2137 def readdelta(self, shallow=False):
2134 2138 '''Returns a manifest containing just the entries that are present
2135 2139 in this manifest, but not in its p1 manifest. This is efficient to read
2136 2140 if the revlog delta is already p1.
2137 2141
2138 2142 If `shallow` is True, this will read the delta for this directory,
2139 2143 without recursively reading subdirectory manifests. Instead, any
2140 2144 subdirectory entry will be reported as it appears in the manifest, i.e.
2141 2145 the subdirectory will be reported among files and distinguished only by
2142 2146 its 't' flag.
2143 2147 '''
2144 2148 store = self._storage()
2145 2149 if shallow:
2146 2150 r = store.rev(self._node)
2147 2151 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2148 2152 return manifestdict(d)
2149 2153 else:
2150 2154 # Need to perform a slow delta
2151 2155 r0 = store.deltaparent(store.rev(self._node))
2152 2156 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2153 2157 m1 = self.read()
2154 2158 md = treemanifest(dir=self._dir)
2155 2159 for f, ((n0, fl0), (n1, fl1)) in pycompat.iteritems(m0.diff(m1)):
2156 2160 if n1:
2157 2161 md[f] = n1
2158 2162 if fl1:
2159 2163 md.setflag(f, fl1)
2160 2164 return md
2161 2165
2162 2166 def readfast(self, shallow=False):
2163 2167 '''Calls either readdelta or read, based on which would be less work.
2164 2168 readdelta is called if the delta is against the p1, and therefore can be
2165 2169 read quickly.
2166 2170
2167 2171 If `shallow` is True, it only returns the entries from this manifest,
2168 2172 and not any submanifests.
2169 2173 '''
2170 2174 store = self._storage()
2171 2175 r = store.rev(self._node)
2172 2176 deltaparent = store.deltaparent(r)
2173 2177 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2174 2178 return self.readdelta(shallow=shallow)
2175 2179
2176 2180 if shallow:
2177 2181 return manifestdict(store.revision(self._node))
2178 2182 else:
2179 2183 return self.read()
2180 2184
2181 2185 def find(self, key):
2182 2186 return self.read().find(key)
2183 2187
2184 2188
2185 2189 class excludeddir(treemanifest):
2186 2190 """Stand-in for a directory that is excluded from the repository.
2187 2191
2188 2192 With narrowing active on a repository that uses treemanifests,
2189 2193 some of the directory revlogs will be excluded from the resulting
2190 2194 clone. This is a huge storage win for clients, but means we need
2191 2195 some sort of pseudo-manifest to surface to internals so we can
2192 2196 detect a merge conflict outside the narrowspec. That's what this
2193 2197 class is: it stands in for a directory whose node is known, but
2194 2198 whose contents are unknown.
2195 2199 """
2196 2200
2197 2201 def __init__(self, dir, node):
2198 2202 super(excludeddir, self).__init__(dir)
2199 2203 self._node = node
2200 2204 # Add an empty file, which will be included by iterators and such,
2201 2205 # appearing as the directory itself (i.e. something like "dir/")
2202 2206 self._files[b''] = node
2203 2207 self._flags[b''] = b't'
2204 2208
2205 2209 # Manifests outside the narrowspec should never be modified, so avoid
2206 2210 # copying. This makes a noticeable difference when there are very many
2207 2211 # directories outside the narrowspec. Also, it makes sense for the copy to
2208 2212 # be of the same type as the original, which would not happen with the
2209 2213 # super type's copy().
2210 2214 def copy(self):
2211 2215 return self
2212 2216
2213 2217
2214 2218 class excludeddirmanifestctx(treemanifestctx):
2215 2219 """context wrapper for excludeddir - see that docstring for rationale"""
2216 2220
2217 2221 def __init__(self, dir, node):
2218 2222 self._dir = dir
2219 2223 self._node = node
2220 2224
2221 2225 def read(self):
2222 2226 return excludeddir(self._dir, self._node)
2223 2227
2224 2228 def write(self, *args):
2225 2229 raise error.ProgrammingError(
2226 2230 b'attempt to write manifest from excluded dir %s' % self._dir
2227 2231 )
2228 2232
2229 2233
2230 2234 class excludedmanifestrevlog(manifestrevlog):
2231 2235 """Stand-in for excluded treemanifest revlogs.
2232 2236
2233 2237 When narrowing is active on a treemanifest repository, we'll have
2234 2238 references to directories we can't see due to the revlog being
2235 2239 skipped. This class exists to conform to the manifestrevlog
2236 2240 interface for those directories and proactively prevent writes to
2237 2241 outside the narrowspec.
2238 2242 """
2239 2243
2240 2244 def __init__(self, dir):
2241 2245 self._dir = dir
2242 2246
2243 2247 def __len__(self):
2244 2248 raise error.ProgrammingError(
2245 2249 b'attempt to get length of excluded dir %s' % self._dir
2246 2250 )
2247 2251
2248 2252 def rev(self, node):
2249 2253 raise error.ProgrammingError(
2250 2254 b'attempt to get rev from excluded dir %s' % self._dir
2251 2255 )
2252 2256
2253 2257 def linkrev(self, node):
2254 2258 raise error.ProgrammingError(
2255 2259 b'attempt to get linkrev from excluded dir %s' % self._dir
2256 2260 )
2257 2261
2258 2262 def node(self, rev):
2259 2263 raise error.ProgrammingError(
2260 2264 b'attempt to get node from excluded dir %s' % self._dir
2261 2265 )
2262 2266
2263 2267 def add(self, *args, **kwargs):
2264 2268 # We should never write entries in dirlogs outside the narrow clone.
2265 2269 # However, the method still gets called from writesubtree() in
2266 2270 # _addtree(), so we need to handle it. We should possibly make that
2267 2271 # avoid calling add() with a clean manifest (_dirty is always False
2268 2272 # in excludeddir instances).
2269 2273 pass
@@ -1,316 +1,316 b''
1 1 from __future__ import absolute_import
2 2
3 3 import copy
4 4 import errno
5 5 import tempfile
6 6 import types
7 7 import unittest
8 8
9 9 import silenttestrunner
10 10
11 11 from mercurial import (
12 12 encoding,
13 13 error,
14 14 lock,
15 15 vfs as vfsmod,
16 16 )
17 17
18 18 testlockname = b'testlock'
19 19
20 20 # work around http://bugs.python.org/issue1515
21 21 if types.MethodType not in copy._deepcopy_dispatch:
22 22
23 23 def _deepcopy_method(x, memo):
24 24 return type(x)(x.__func__, copy.deepcopy(x.__self__, memo), x.im_class)
25 25
26 26 copy._deepcopy_dispatch[types.MethodType] = _deepcopy_method
27 27
28 28
29 29 class lockwrapper(lock.lock):
30 30 def __init__(self, pidoffset, *args, **kwargs):
31 31 # lock.lock.__init__() calls lock(), so the pidoffset assignment needs
32 32 # to be earlier
33 33 self._pidoffset = pidoffset
34 34 super(lockwrapper, self).__init__(*args, **kwargs)
35 35
36 36 def _getpid(self):
37 37 return super(lockwrapper, self)._getpid() + self._pidoffset
38 38
39 39
40 40 class teststate(object):
41 41 def __init__(self, testcase, dir, pidoffset=0):
42 42 self._testcase = testcase
43 43 self._acquirecalled = False
44 44 self._releasecalled = False
45 45 self._postreleasecalled = False
46 46 self.vfs = vfsmod.vfs(dir, audit=False)
47 47 self._pidoffset = pidoffset
48 48
49 49 def makelock(self, *args, **kwargs):
50 50 l = lockwrapper(
51 51 self._pidoffset,
52 52 self.vfs,
53 53 testlockname,
54 54 releasefn=self.releasefn,
55 55 acquirefn=self.acquirefn,
56 56 *args,
57 57 **kwargs
58 58 )
59 59 l.postrelease.append(self.postreleasefn)
60 60 return l
61 61
62 62 def acquirefn(self):
63 63 self._acquirecalled = True
64 64
65 65 def releasefn(self):
66 66 self._releasecalled = True
67 67
68 def postreleasefn(self):
68 def postreleasefn(self, success):
69 69 self._postreleasecalled = True
70 70
71 71 def assertacquirecalled(self, called):
72 72 self._testcase.assertEqual(
73 73 self._acquirecalled,
74 74 called,
75 75 'expected acquire to be %s but was actually %s'
76 76 % (self._tocalled(called), self._tocalled(self._acquirecalled),),
77 77 )
78 78
79 79 def resetacquirefn(self):
80 80 self._acquirecalled = False
81 81
82 82 def assertreleasecalled(self, called):
83 83 self._testcase.assertEqual(
84 84 self._releasecalled,
85 85 called,
86 86 'expected release to be %s but was actually %s'
87 87 % (self._tocalled(called), self._tocalled(self._releasecalled),),
88 88 )
89 89
90 90 def assertpostreleasecalled(self, called):
91 91 self._testcase.assertEqual(
92 92 self._postreleasecalled,
93 93 called,
94 94 'expected postrelease to be %s but was actually %s'
95 95 % (
96 96 self._tocalled(called),
97 97 self._tocalled(self._postreleasecalled),
98 98 ),
99 99 )
100 100
101 101 def assertlockexists(self, exists):
102 102 actual = self.vfs.lexists(testlockname)
103 103 self._testcase.assertEqual(
104 104 actual,
105 105 exists,
106 106 'expected lock to %s but actually did %s'
107 107 % (self._toexists(exists), self._toexists(actual),),
108 108 )
109 109
110 110 def _tocalled(self, called):
111 111 if called:
112 112 return 'called'
113 113 else:
114 114 return 'not called'
115 115
116 116 def _toexists(self, exists):
117 117 if exists:
118 118 return 'exist'
119 119 else:
120 120 return 'not exist'
121 121
122 122
123 123 class testlock(unittest.TestCase):
124 124 def testlock(self):
125 125 state = teststate(self, tempfile.mkdtemp(dir=encoding.getcwd()))
126 126 lock = state.makelock()
127 127 state.assertacquirecalled(True)
128 128 lock.release()
129 129 state.assertreleasecalled(True)
130 130 state.assertpostreleasecalled(True)
131 131 state.assertlockexists(False)
132 132
133 133 def testrecursivelock(self):
134 134 state = teststate(self, tempfile.mkdtemp(dir=encoding.getcwd()))
135 135 lock = state.makelock()
136 136 state.assertacquirecalled(True)
137 137
138 138 state.resetacquirefn()
139 139 lock.lock()
140 140 # recursive lock should not call acquirefn again
141 141 state.assertacquirecalled(False)
142 142
143 143 lock.release() # brings lock refcount down from 2 to 1
144 144 state.assertreleasecalled(False)
145 145 state.assertpostreleasecalled(False)
146 146 state.assertlockexists(True)
147 147
148 148 lock.release() # releases the lock
149 149 state.assertreleasecalled(True)
150 150 state.assertpostreleasecalled(True)
151 151 state.assertlockexists(False)
152 152
153 153 def testlockfork(self):
154 154 state = teststate(self, tempfile.mkdtemp(dir=encoding.getcwd()))
155 155 lock = state.makelock()
156 156 state.assertacquirecalled(True)
157 157
158 158 # fake a fork
159 159 forklock = copy.copy(lock)
160 160 forklock._pidoffset = 1
161 161 forklock.release()
162 162 state.assertreleasecalled(False)
163 163 state.assertpostreleasecalled(False)
164 164 state.assertlockexists(True)
165 165
166 166 # release the actual lock
167 167 lock.release()
168 168 state.assertreleasecalled(True)
169 169 state.assertpostreleasecalled(True)
170 170 state.assertlockexists(False)
171 171
172 172 def testinheritlock(self):
173 173 d = tempfile.mkdtemp(dir=encoding.getcwd())
174 174 parentstate = teststate(self, d)
175 175 parentlock = parentstate.makelock()
176 176 parentstate.assertacquirecalled(True)
177 177
178 178 # set up lock inheritance
179 179 with parentlock.inherit() as lockname:
180 180 parentstate.assertreleasecalled(True)
181 181 parentstate.assertpostreleasecalled(False)
182 182 parentstate.assertlockexists(True)
183 183
184 184 childstate = teststate(self, d, pidoffset=1)
185 185 childlock = childstate.makelock(parentlock=lockname)
186 186 childstate.assertacquirecalled(True)
187 187
188 188 childlock.release()
189 189 childstate.assertreleasecalled(True)
190 190 childstate.assertpostreleasecalled(False)
191 191 childstate.assertlockexists(True)
192 192
193 193 parentstate.resetacquirefn()
194 194
195 195 parentstate.assertacquirecalled(True)
196 196
197 197 parentlock.release()
198 198 parentstate.assertreleasecalled(True)
199 199 parentstate.assertpostreleasecalled(True)
200 200 parentstate.assertlockexists(False)
201 201
202 202 def testmultilock(self):
203 203 d = tempfile.mkdtemp(dir=encoding.getcwd())
204 204 state0 = teststate(self, d)
205 205 lock0 = state0.makelock()
206 206 state0.assertacquirecalled(True)
207 207
208 208 with lock0.inherit() as lock0name:
209 209 state0.assertreleasecalled(True)
210 210 state0.assertpostreleasecalled(False)
211 211 state0.assertlockexists(True)
212 212
213 213 state1 = teststate(self, d, pidoffset=1)
214 214 lock1 = state1.makelock(parentlock=lock0name)
215 215 state1.assertacquirecalled(True)
216 216
217 217 # from within lock1, acquire another lock
218 218 with lock1.inherit() as lock1name:
219 219 # since the file on disk is lock0's this should have the same
220 220 # name
221 221 self.assertEqual(lock0name, lock1name)
222 222
223 223 state2 = teststate(self, d, pidoffset=2)
224 224 lock2 = state2.makelock(parentlock=lock1name)
225 225 state2.assertacquirecalled(True)
226 226
227 227 lock2.release()
228 228 state2.assertreleasecalled(True)
229 229 state2.assertpostreleasecalled(False)
230 230 state2.assertlockexists(True)
231 231
232 232 state1.resetacquirefn()
233 233
234 234 state1.assertacquirecalled(True)
235 235
236 236 lock1.release()
237 237 state1.assertreleasecalled(True)
238 238 state1.assertpostreleasecalled(False)
239 239 state1.assertlockexists(True)
240 240
241 241 lock0.release()
242 242
243 243 def testinheritlockfork(self):
244 244 d = tempfile.mkdtemp(dir=encoding.getcwd())
245 245 parentstate = teststate(self, d)
246 246 parentlock = parentstate.makelock()
247 247 parentstate.assertacquirecalled(True)
248 248
249 249 # set up lock inheritance
250 250 with parentlock.inherit() as lockname:
251 251 childstate = teststate(self, d, pidoffset=1)
252 252 childlock = childstate.makelock(parentlock=lockname)
253 253 childstate.assertacquirecalled(True)
254 254
255 255 # fork the child lock
256 256 forkchildlock = copy.copy(childlock)
257 257 forkchildlock._pidoffset += 1
258 258 forkchildlock.release()
259 259 childstate.assertreleasecalled(False)
260 260 childstate.assertpostreleasecalled(False)
261 261 childstate.assertlockexists(True)
262 262
263 263 # release the child lock
264 264 childlock.release()
265 265 childstate.assertreleasecalled(True)
266 266 childstate.assertpostreleasecalled(False)
267 267 childstate.assertlockexists(True)
268 268
269 269 parentlock.release()
270 270
271 271 def testinheritcheck(self):
272 272 d = tempfile.mkdtemp(dir=encoding.getcwd())
273 273 state = teststate(self, d)
274 274
275 275 def check():
276 276 raise error.LockInheritanceContractViolation('check failed')
277 277
278 278 lock = state.makelock(inheritchecker=check)
279 279 state.assertacquirecalled(True)
280 280
281 281 with self.assertRaises(error.LockInheritanceContractViolation):
282 282 with lock.inherit():
283 283 pass
284 284
285 285 lock.release()
286 286
287 287 def testfrequentlockunlock(self):
288 288 """This tests whether lock acquisition fails as expected, even if
289 289 (1) lock can't be acquired (makelock fails by EEXIST), and
290 290 (2) locker info can't be read in (readlock fails by ENOENT) while
291 291 retrying 5 times.
292 292 """
293 293
294 294 d = tempfile.mkdtemp(dir=encoding.getcwd())
295 295 state = teststate(self, d)
296 296
297 297 def emulatefrequentlock(*args):
298 298 raise OSError(errno.EEXIST, "File exists")
299 299
300 300 def emulatefrequentunlock(*args):
301 301 raise OSError(errno.ENOENT, "No such file or directory")
302 302
303 303 state.vfs.makelock = emulatefrequentlock
304 304 state.vfs.readlock = emulatefrequentunlock
305 305
306 306 try:
307 307 state.makelock(timeout=0)
308 308 self.fail("unexpected lock acquisition")
309 309 except error.LockHeld as why:
310 310 self.assertTrue(why.errno == errno.ETIMEDOUT)
311 311 self.assertTrue(why.locker == b"")
312 312 state.assertlockexists(False)
313 313
314 314
315 315 if __name__ == '__main__':
316 316 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now