##// END OF EJS Templates
store: introduce a EntryFile object to actually access file info...
marmoute -
r51365:9fdc28e2 default
parent child Browse files
Show More
@@ -1,696 +1,698
1 1 # narrowcommands.py - command modifications for narrowhg extension
2 2 #
3 3 # Copyright 2017 Google, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import itertools
9 9 import os
10 10
11 11 from mercurial.i18n import _
12 12 from mercurial.node import (
13 13 hex,
14 14 short,
15 15 )
16 16 from mercurial import (
17 17 bundle2,
18 18 cmdutil,
19 19 commands,
20 20 discovery,
21 21 encoding,
22 22 error,
23 23 exchange,
24 24 extensions,
25 25 hg,
26 26 narrowspec,
27 27 pathutil,
28 28 pycompat,
29 29 registrar,
30 30 repair,
31 31 repoview,
32 32 requirements,
33 33 sparse,
34 34 util,
35 35 wireprototypes,
36 36 )
37 37 from mercurial.utils import (
38 38 urlutil,
39 39 )
40 40
41 41 table = {}
42 42 command = registrar.command(table)
43 43
44 44
45 45 def setup():
46 46 """Wraps user-facing mercurial commands with narrow-aware versions."""
47 47
48 48 entry = extensions.wrapcommand(commands.table, b'clone', clonenarrowcmd)
49 49 entry[1].append(
50 50 (b'', b'narrow', None, _(b"create a narrow clone of select files"))
51 51 )
52 52 entry[1].append(
53 53 (
54 54 b'',
55 55 b'depth',
56 56 b'',
57 57 _(b"limit the history fetched by distance from heads"),
58 58 )
59 59 )
60 60 entry[1].append((b'', b'narrowspec', b'', _(b"read narrowspecs from file")))
61 61 # TODO(durin42): unify sparse/narrow --include/--exclude logic a bit
62 62 if b'sparse' not in extensions.enabled():
63 63 entry[1].append(
64 64 (b'', b'include', [], _(b"specifically fetch this file/directory"))
65 65 )
66 66 entry[1].append(
67 67 (
68 68 b'',
69 69 b'exclude',
70 70 [],
71 71 _(b"do not fetch this file/directory, even if included"),
72 72 )
73 73 )
74 74
75 75 entry = extensions.wrapcommand(commands.table, b'pull', pullnarrowcmd)
76 76 entry[1].append(
77 77 (
78 78 b'',
79 79 b'depth',
80 80 b'',
81 81 _(b"limit the history fetched by distance from heads"),
82 82 )
83 83 )
84 84
85 85 extensions.wrapcommand(commands.table, b'archive', archivenarrowcmd)
86 86
87 87
88 88 def clonenarrowcmd(orig, ui, repo, *args, **opts):
89 89 """Wraps clone command, so 'hg clone' first wraps localrepo.clone()."""
90 90 opts = pycompat.byteskwargs(opts)
91 91 wrappedextraprepare = util.nullcontextmanager()
92 92 narrowspecfile = opts[b'narrowspec']
93 93
94 94 if narrowspecfile:
95 95 filepath = os.path.join(encoding.getcwd(), narrowspecfile)
96 96 ui.status(_(b"reading narrowspec from '%s'\n") % filepath)
97 97 try:
98 98 fdata = util.readfile(filepath)
99 99 except IOError as inst:
100 100 raise error.Abort(
101 101 _(b"cannot read narrowspecs from '%s': %s")
102 102 % (filepath, encoding.strtolocal(inst.strerror))
103 103 )
104 104
105 105 includes, excludes, profiles = sparse.parseconfig(ui, fdata, b'narrow')
106 106 if profiles:
107 107 raise error.ConfigError(
108 108 _(
109 109 b"cannot specify other files using '%include' in"
110 110 b" narrowspec"
111 111 )
112 112 )
113 113
114 114 narrowspec.validatepatterns(includes)
115 115 narrowspec.validatepatterns(excludes)
116 116
117 117 # narrowspec is passed so we should assume that user wants narrow clone
118 118 opts[b'narrow'] = True
119 119 opts[b'include'].extend(includes)
120 120 opts[b'exclude'].extend(excludes)
121 121
122 122 if opts[b'narrow']:
123 123
124 124 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
125 125 orig(pullop, kwargs)
126 126
127 127 if opts.get(b'depth'):
128 128 kwargs[b'depth'] = opts[b'depth']
129 129
130 130 wrappedextraprepare = extensions.wrappedfunction(
131 131 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
132 132 )
133 133
134 134 with wrappedextraprepare:
135 135 return orig(ui, repo, *args, **pycompat.strkwargs(opts))
136 136
137 137
138 138 def pullnarrowcmd(orig, ui, repo, *args, **opts):
139 139 """Wraps pull command to allow modifying narrow spec."""
140 140 wrappedextraprepare = util.nullcontextmanager()
141 141 if requirements.NARROW_REQUIREMENT in repo.requirements:
142 142
143 143 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
144 144 orig(pullop, kwargs)
145 145 if opts.get('depth'):
146 146 kwargs[b'depth'] = opts['depth']
147 147
148 148 wrappedextraprepare = extensions.wrappedfunction(
149 149 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
150 150 )
151 151
152 152 with wrappedextraprepare:
153 153 return orig(ui, repo, *args, **opts)
154 154
155 155
156 156 def archivenarrowcmd(orig, ui, repo, *args, **opts):
157 157 """Wraps archive command to narrow the default includes."""
158 158 if requirements.NARROW_REQUIREMENT in repo.requirements:
159 159 repo_includes, repo_excludes = repo.narrowpats
160 160 includes = set(opts.get('include', []))
161 161 excludes = set(opts.get('exclude', []))
162 162 includes, excludes, unused_invalid = narrowspec.restrictpatterns(
163 163 includes, excludes, repo_includes, repo_excludes
164 164 )
165 165 if includes:
166 166 opts['include'] = includes
167 167 if excludes:
168 168 opts['exclude'] = excludes
169 169 return orig(ui, repo, *args, **opts)
170 170
171 171
172 172 def pullbundle2extraprepare(orig, pullop, kwargs):
173 173 repo = pullop.repo
174 174 if requirements.NARROW_REQUIREMENT not in repo.requirements:
175 175 return orig(pullop, kwargs)
176 176
177 177 if wireprototypes.NARROWCAP not in pullop.remote.capabilities():
178 178 raise error.Abort(_(b"server does not support narrow clones"))
179 179 orig(pullop, kwargs)
180 180 kwargs[b'narrow'] = True
181 181 include, exclude = repo.narrowpats
182 182 kwargs[b'oldincludepats'] = include
183 183 kwargs[b'oldexcludepats'] = exclude
184 184 if include:
185 185 kwargs[b'includepats'] = include
186 186 if exclude:
187 187 kwargs[b'excludepats'] = exclude
188 188 # calculate known nodes only in ellipses cases because in non-ellipses cases
189 189 # we have all the nodes
190 190 if wireprototypes.ELLIPSESCAP1 in pullop.remote.capabilities():
191 191 kwargs[b'known'] = [
192 192 hex(ctx.node())
193 193 for ctx in repo.set(b'::%ln', pullop.common)
194 194 if ctx.node() != repo.nullid
195 195 ]
196 196 if not kwargs[b'known']:
197 197 # Mercurial serializes an empty list as '' and deserializes it as
198 198 # [''], so delete it instead to avoid handling the empty string on
199 199 # the server.
200 200 del kwargs[b'known']
201 201
202 202
203 203 extensions.wrapfunction(
204 204 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare
205 205 )
206 206
207 207
208 208 def _narrow(
209 209 ui,
210 210 repo,
211 211 remote,
212 212 commoninc,
213 213 oldincludes,
214 214 oldexcludes,
215 215 newincludes,
216 216 newexcludes,
217 217 force,
218 218 backup,
219 219 ):
220 220 oldmatch = narrowspec.match(repo.root, oldincludes, oldexcludes)
221 221 newmatch = narrowspec.match(repo.root, newincludes, newexcludes)
222 222
223 223 # This is essentially doing "hg outgoing" to find all local-only
224 224 # commits. We will then check that the local-only commits don't
225 225 # have any changes to files that will be untracked.
226 226 unfi = repo.unfiltered()
227 227 outgoing = discovery.findcommonoutgoing(unfi, remote, commoninc=commoninc)
228 228 ui.status(_(b'looking for local changes to affected paths\n'))
229 229 progress = ui.makeprogress(
230 230 topic=_(b'changesets'),
231 231 unit=_(b'changesets'),
232 232 total=len(outgoing.missing) + len(outgoing.excluded),
233 233 )
234 234 localnodes = []
235 235 with progress:
236 236 for n in itertools.chain(outgoing.missing, outgoing.excluded):
237 237 progress.increment()
238 238 if any(oldmatch(f) and not newmatch(f) for f in unfi[n].files()):
239 239 localnodes.append(n)
240 240 revstostrip = unfi.revs(b'descendants(%ln)', localnodes)
241 241 hiddenrevs = repoview.filterrevs(repo, b'visible')
242 242 visibletostrip = list(
243 243 repo.changelog.node(r) for r in (revstostrip - hiddenrevs)
244 244 )
245 245 if visibletostrip:
246 246 ui.status(
247 247 _(
248 248 b'The following changeset(s) or their ancestors have '
249 249 b'local changes not on the remote:\n'
250 250 )
251 251 )
252 252 maxnodes = 10
253 253 if ui.verbose or len(visibletostrip) <= maxnodes:
254 254 for n in visibletostrip:
255 255 ui.status(b'%s\n' % short(n))
256 256 else:
257 257 for n in visibletostrip[:maxnodes]:
258 258 ui.status(b'%s\n' % short(n))
259 259 ui.status(
260 260 _(b'...and %d more, use --verbose to list all\n')
261 261 % (len(visibletostrip) - maxnodes)
262 262 )
263 263 if not force:
264 264 raise error.StateError(
265 265 _(b'local changes found'),
266 266 hint=_(b'use --force-delete-local-changes to ignore'),
267 267 )
268 268
269 269 with ui.uninterruptible():
270 270 if revstostrip:
271 271 tostrip = [unfi.changelog.node(r) for r in revstostrip]
272 272 if repo[b'.'].node() in tostrip:
273 273 # stripping working copy, so move to a different commit first
274 274 urev = max(
275 275 repo.revs(
276 276 b'(::%n) - %ln + null',
277 277 repo[b'.'].node(),
278 278 visibletostrip,
279 279 )
280 280 )
281 281 hg.clean(repo, urev)
282 282 overrides = {(b'devel', b'strip-obsmarkers'): False}
283 283 if backup:
284 284 ui.status(_(b'moving unwanted changesets to backup\n'))
285 285 else:
286 286 ui.status(_(b'deleting unwanted changesets\n'))
287 287 with ui.configoverride(overrides, b'narrow'):
288 288 repair.strip(ui, unfi, tostrip, topic=b'narrow', backup=backup)
289 289
290 290 todelete = []
291 291 for entry in repo.store.datafiles():
292 292 f = entry.unencoded_path
293 293 if f.startswith(b'data/'):
294 294 file = f[5:-2]
295 295 if not newmatch(file):
296 todelete.append(f)
296 for file_ in entry.files():
297 todelete.append(file_.unencoded_path)
297 298 elif f.startswith(b'meta/'):
298 299 dir = f[5:-13]
299 300 dirs = sorted(pathutil.dirs({dir})) + [dir]
300 301 include = True
301 302 for d in dirs:
302 303 visit = newmatch.visitdir(d)
303 304 if not visit:
304 305 include = False
305 306 break
306 307 if visit == b'all':
307 308 break
308 309 if not include:
309 todelete.append(f)
310 for file_ in entry.files():
311 todelete.append(file_.unencoded_path)
310 312
311 313 repo.destroying()
312 314
313 315 with repo.transaction(b'narrowing'):
314 316 # Update narrowspec before removing revlogs, so repo won't be
315 317 # corrupt in case of crash
316 318 repo.setnarrowpats(newincludes, newexcludes)
317 319
318 320 for f in todelete:
319 321 ui.status(_(b'deleting %s\n') % f)
320 322 util.unlinkpath(repo.svfs.join(f))
321 323 repo.store.markremoved(f)
322 324
323 325 ui.status(_(b'deleting unwanted files from working copy\n'))
324 326 with repo.dirstate.changing_parents(repo):
325 327 narrowspec.updateworkingcopy(repo, assumeclean=True)
326 328 narrowspec.copytoworkingcopy(repo)
327 329
328 330 repo.destroyed()
329 331
330 332
331 333 def _widen(
332 334 ui,
333 335 repo,
334 336 remote,
335 337 commoninc,
336 338 oldincludes,
337 339 oldexcludes,
338 340 newincludes,
339 341 newexcludes,
340 342 ):
341 343 # for now we assume that if a server has ellipses enabled, we will be
342 344 # exchanging ellipses nodes. In future we should add ellipses as a client
343 345 # side requirement (maybe) to distinguish a client is shallow or not and
344 346 # then send that information to server whether we want ellipses or not.
345 347 # Theoretically a non-ellipses repo should be able to use narrow
346 348 # functionality from an ellipses enabled server
347 349 remotecap = remote.capabilities()
348 350 ellipsesremote = any(
349 351 cap in remotecap for cap in wireprototypes.SUPPORTED_ELLIPSESCAP
350 352 )
351 353
352 354 # check whether we are talking to a server which supports old version of
353 355 # ellipses capabilities
354 356 isoldellipses = (
355 357 ellipsesremote
356 358 and wireprototypes.ELLIPSESCAP1 in remotecap
357 359 and wireprototypes.ELLIPSESCAP not in remotecap
358 360 )
359 361
360 362 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
361 363 orig(pullop, kwargs)
362 364 # The old{in,ex}cludepats have already been set by orig()
363 365 kwargs[b'includepats'] = newincludes
364 366 kwargs[b'excludepats'] = newexcludes
365 367
366 368 wrappedextraprepare = extensions.wrappedfunction(
367 369 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
368 370 )
369 371
370 372 # define a function that narrowbundle2 can call after creating the
371 373 # backup bundle, but before applying the bundle from the server
372 374 def setnewnarrowpats():
373 375 repo.setnarrowpats(newincludes, newexcludes)
374 376
375 377 repo.setnewnarrowpats = setnewnarrowpats
376 378 # silence the devel-warning of applying an empty changegroup
377 379 overrides = {(b'devel', b'all-warnings'): False}
378 380
379 381 common = commoninc[0]
380 382 with ui.uninterruptible():
381 383 if ellipsesremote:
382 384 ds = repo.dirstate
383 385 p1, p2 = ds.p1(), ds.p2()
384 386 with ds.changing_parents(repo):
385 387 ds.setparents(repo.nullid, repo.nullid)
386 388 if isoldellipses:
387 389 with wrappedextraprepare:
388 390 exchange.pull(repo, remote, heads=common)
389 391 else:
390 392 known = []
391 393 if ellipsesremote:
392 394 known = [
393 395 ctx.node()
394 396 for ctx in repo.set(b'::%ln', common)
395 397 if ctx.node() != repo.nullid
396 398 ]
397 399 with remote.commandexecutor() as e:
398 400 bundle = e.callcommand(
399 401 b'narrow_widen',
400 402 {
401 403 b'oldincludes': oldincludes,
402 404 b'oldexcludes': oldexcludes,
403 405 b'newincludes': newincludes,
404 406 b'newexcludes': newexcludes,
405 407 b'cgversion': b'03',
406 408 b'commonheads': common,
407 409 b'known': known,
408 410 b'ellipses': ellipsesremote,
409 411 },
410 412 ).result()
411 413
412 414 trmanager = exchange.transactionmanager(
413 415 repo, b'widen', remote.url()
414 416 )
415 417 with trmanager, repo.ui.configoverride(overrides, b'widen'):
416 418 op = bundle2.bundleoperation(
417 419 repo, trmanager.transaction, source=b'widen'
418 420 )
419 421 # TODO: we should catch error.Abort here
420 422 bundle2.processbundle(repo, bundle, op=op, remote=remote)
421 423
422 424 if ellipsesremote:
423 425 with ds.changing_parents(repo):
424 426 ds.setparents(p1, p2)
425 427
426 428 with repo.transaction(b'widening'), repo.dirstate.changing_parents(
427 429 repo
428 430 ):
429 431 repo.setnewnarrowpats()
430 432 narrowspec.updateworkingcopy(repo)
431 433 narrowspec.copytoworkingcopy(repo)
432 434
433 435
434 436 # TODO(rdamazio): Make new matcher format and update description
435 437 @command(
436 438 b'tracked',
437 439 [
438 440 (b'', b'addinclude', [], _(b'new paths to include')),
439 441 (b'', b'removeinclude', [], _(b'old paths to no longer include')),
440 442 (
441 443 b'',
442 444 b'auto-remove-includes',
443 445 False,
444 446 _(b'automatically choose unused includes to remove'),
445 447 ),
446 448 (b'', b'addexclude', [], _(b'new paths to exclude')),
447 449 (b'', b'import-rules', b'', _(b'import narrowspecs from a file')),
448 450 (b'', b'removeexclude', [], _(b'old paths to no longer exclude')),
449 451 (
450 452 b'',
451 453 b'clear',
452 454 False,
453 455 _(b'whether to replace the existing narrowspec'),
454 456 ),
455 457 (
456 458 b'',
457 459 b'force-delete-local-changes',
458 460 False,
459 461 _(b'forces deletion of local changes when narrowing'),
460 462 ),
461 463 (
462 464 b'',
463 465 b'backup',
464 466 True,
465 467 _(b'back up local changes when narrowing'),
466 468 ),
467 469 (
468 470 b'',
469 471 b'update-working-copy',
470 472 False,
471 473 _(b'update working copy when the store has changed'),
472 474 ),
473 475 ]
474 476 + commands.remoteopts,
475 477 _(b'[OPTIONS]... [REMOTE]'),
476 478 inferrepo=True,
477 479 helpcategory=command.CATEGORY_MAINTENANCE,
478 480 )
479 481 def trackedcmd(ui, repo, remotepath=None, *pats, **opts):
480 482 """show or change the current narrowspec
481 483
482 484 With no argument, shows the current narrowspec entries, one per line. Each
483 485 line will be prefixed with 'I' or 'X' for included or excluded patterns,
484 486 respectively.
485 487
486 488 The narrowspec is comprised of expressions to match remote files and/or
487 489 directories that should be pulled into your client.
488 490 The narrowspec has *include* and *exclude* expressions, with excludes always
489 491 trumping includes: that is, if a file matches an exclude expression, it will
490 492 be excluded even if it also matches an include expression.
491 493 Excluding files that were never included has no effect.
492 494
493 495 Each included or excluded entry is in the format described by
494 496 'hg help patterns'.
495 497
496 498 The options allow you to add or remove included and excluded expressions.
497 499
498 500 If --clear is specified, then all previous includes and excludes are DROPPED
499 501 and replaced by the new ones specified to --addinclude and --addexclude.
500 502 If --clear is specified without any further options, the narrowspec will be
501 503 empty and will not match any files.
502 504
503 505 If --auto-remove-includes is specified, then those includes that don't match
504 506 any files modified by currently visible local commits (those not shared by
505 507 the remote) will be added to the set of explicitly specified includes to
506 508 remove.
507 509
508 510 --import-rules accepts a path to a file containing rules, allowing you to
509 511 add --addinclude, --addexclude rules in bulk. Like the other include and
510 512 exclude switches, the changes are applied immediately.
511 513 """
512 514 opts = pycompat.byteskwargs(opts)
513 515 if requirements.NARROW_REQUIREMENT not in repo.requirements:
514 516 raise error.InputError(
515 517 _(
516 518 b'the tracked command is only supported on '
517 519 b'repositories cloned with --narrow'
518 520 )
519 521 )
520 522
521 523 # Before supporting, decide whether it "hg tracked --clear" should mean
522 524 # tracking no paths or all paths.
523 525 if opts[b'clear']:
524 526 raise error.InputError(_(b'the --clear option is not yet supported'))
525 527
526 528 # import rules from a file
527 529 newrules = opts.get(b'import_rules')
528 530 if newrules:
529 531 try:
530 532 filepath = os.path.join(encoding.getcwd(), newrules)
531 533 fdata = util.readfile(filepath)
532 534 except IOError as inst:
533 535 raise error.StorageError(
534 536 _(b"cannot read narrowspecs from '%s': %s")
535 537 % (filepath, encoding.strtolocal(inst.strerror))
536 538 )
537 539 includepats, excludepats, profiles = sparse.parseconfig(
538 540 ui, fdata, b'narrow'
539 541 )
540 542 if profiles:
541 543 raise error.InputError(
542 544 _(
543 545 b"including other spec files using '%include' "
544 546 b"is not supported in narrowspec"
545 547 )
546 548 )
547 549 opts[b'addinclude'].extend(includepats)
548 550 opts[b'addexclude'].extend(excludepats)
549 551
550 552 addedincludes = narrowspec.parsepatterns(opts[b'addinclude'])
551 553 removedincludes = narrowspec.parsepatterns(opts[b'removeinclude'])
552 554 addedexcludes = narrowspec.parsepatterns(opts[b'addexclude'])
553 555 removedexcludes = narrowspec.parsepatterns(opts[b'removeexclude'])
554 556 autoremoveincludes = opts[b'auto_remove_includes']
555 557
556 558 update_working_copy = opts[b'update_working_copy']
557 559 only_show = not (
558 560 addedincludes
559 561 or removedincludes
560 562 or addedexcludes
561 563 or removedexcludes
562 564 or newrules
563 565 or autoremoveincludes
564 566 or update_working_copy
565 567 )
566 568
567 569 # Only print the current narrowspec.
568 570 if only_show:
569 571 oldincludes, oldexcludes = repo.narrowpats
570 572 ui.pager(b'tracked')
571 573 fm = ui.formatter(b'narrow', opts)
572 574 for i in sorted(oldincludes):
573 575 fm.startitem()
574 576 fm.write(b'status', b'%s ', b'I', label=b'narrow.included')
575 577 fm.write(b'pat', b'%s\n', i, label=b'narrow.included')
576 578 for i in sorted(oldexcludes):
577 579 fm.startitem()
578 580 fm.write(b'status', b'%s ', b'X', label=b'narrow.excluded')
579 581 fm.write(b'pat', b'%s\n', i, label=b'narrow.excluded')
580 582 fm.end()
581 583 return 0
582 584
583 585 with repo.wlock(), repo.lock():
584 586 oldincludes, oldexcludes = repo.narrowpats
585 587
586 588 # filter the user passed additions and deletions into actual additions and
587 589 # deletions of excludes and includes
588 590 addedincludes -= oldincludes
589 591 removedincludes &= oldincludes
590 592 addedexcludes -= oldexcludes
591 593 removedexcludes &= oldexcludes
592 594
593 595 widening = addedincludes or removedexcludes
594 596 narrowing = removedincludes or addedexcludes
595 597
596 598 if update_working_copy:
597 599 with repo.transaction(b'narrow-wc'), repo.dirstate.changing_parents(
598 600 repo
599 601 ):
600 602 narrowspec.updateworkingcopy(repo)
601 603 narrowspec.copytoworkingcopy(repo)
602 604 return 0
603 605
604 606 if not (widening or narrowing or autoremoveincludes):
605 607 ui.status(_(b"nothing to widen or narrow\n"))
606 608 return 0
607 609
608 610 cmdutil.bailifchanged(repo)
609 611
610 612 # Find the revisions we have in common with the remote. These will
611 613 # be used for finding local-only changes for narrowing. They will
612 614 # also define the set of revisions to update for widening.
613 615 path = urlutil.get_unique_pull_path_obj(b'tracked', ui, remotepath)
614 616 ui.status(_(b'comparing with %s\n') % urlutil.hidepassword(path.loc))
615 617 remote = hg.peer(repo, opts, path)
616 618
617 619 try:
618 620 # check narrow support before doing anything if widening needs to be
619 621 # performed. In future we should also abort if client is ellipses and
620 622 # server does not support ellipses
621 623 if (
622 624 widening
623 625 and wireprototypes.NARROWCAP not in remote.capabilities()
624 626 ):
625 627 raise error.Abort(_(b"server does not support narrow clones"))
626 628
627 629 commoninc = discovery.findcommonincoming(repo, remote)
628 630
629 631 if autoremoveincludes:
630 632 outgoing = discovery.findcommonoutgoing(
631 633 repo, remote, commoninc=commoninc
632 634 )
633 635 ui.status(_(b'looking for unused includes to remove\n'))
634 636 localfiles = set()
635 637 for n in itertools.chain(outgoing.missing, outgoing.excluded):
636 638 localfiles.update(repo[n].files())
637 639 suggestedremovals = []
638 640 for include in sorted(oldincludes):
639 641 match = narrowspec.match(repo.root, [include], oldexcludes)
640 642 if not any(match(f) for f in localfiles):
641 643 suggestedremovals.append(include)
642 644 if suggestedremovals:
643 645 for s in suggestedremovals:
644 646 ui.status(b'%s\n' % s)
645 647 if (
646 648 ui.promptchoice(
647 649 _(
648 650 b'remove these unused includes (yn)?'
649 651 b'$$ &Yes $$ &No'
650 652 )
651 653 )
652 654 == 0
653 655 ):
654 656 removedincludes.update(suggestedremovals)
655 657 narrowing = True
656 658 else:
657 659 ui.status(_(b'found no unused includes\n'))
658 660
659 661 if narrowing:
660 662 newincludes = oldincludes - removedincludes
661 663 newexcludes = oldexcludes | addedexcludes
662 664 _narrow(
663 665 ui,
664 666 repo,
665 667 remote,
666 668 commoninc,
667 669 oldincludes,
668 670 oldexcludes,
669 671 newincludes,
670 672 newexcludes,
671 673 opts[b'force_delete_local_changes'],
672 674 opts[b'backup'],
673 675 )
674 676 # _narrow() updated the narrowspec and _widen() below needs to
675 677 # use the updated values as its base (otherwise removed includes
676 678 # and addedexcludes will be lost in the resulting narrowspec)
677 679 oldincludes = newincludes
678 680 oldexcludes = newexcludes
679 681
680 682 if widening:
681 683 newincludes = oldincludes | addedincludes
682 684 newexcludes = oldexcludes - removedexcludes
683 685 _widen(
684 686 ui,
685 687 repo,
686 688 remote,
687 689 commoninc,
688 690 oldincludes,
689 691 oldexcludes,
690 692 newincludes,
691 693 newexcludes,
692 694 )
693 695 finally:
694 696 remote.close()
695 697
696 698 return 0
@@ -1,901 +1,919
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import functools
10 10 import os
11 11 import re
12 12 import stat
13 13 from typing import Generator
14 14
15 15 from .i18n import _
16 16 from .pycompat import getattr
17 17 from .thirdparty import attr
18 18 from .node import hex
19 19 from . import (
20 20 changelog,
21 21 error,
22 22 manifest,
23 23 policy,
24 24 pycompat,
25 25 util,
26 26 vfs as vfsmod,
27 27 )
28 28 from .utils import hashutil
29 29
30 30 parsers = policy.importmod('parsers')
31 31 # how much bytes should be read from fncache in one read
32 32 # It is done to prevent loading large fncache files into memory
33 33 fncache_chunksize = 10 ** 6
34 34
35 35
36 36 def _matchtrackedpath(path, matcher):
37 37 """parses a fncache entry and returns whether the entry is tracking a path
38 38 matched by matcher or not.
39 39
40 40 If matcher is None, returns True"""
41 41
42 42 if matcher is None:
43 43 return True
44 44 path = decodedir(path)
45 45 if path.startswith(b'data/'):
46 46 return matcher(path[len(b'data/') : -len(b'.i')])
47 47 elif path.startswith(b'meta/'):
48 48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
49 49
50 50 raise error.ProgrammingError(b"cannot decode path %s" % path)
51 51
52 52
53 53 # This avoids a collision between a file named foo and a dir named
54 54 # foo.i or foo.d
55 55 def _encodedir(path):
56 56 """
57 57 >>> _encodedir(b'data/foo.i')
58 58 'data/foo.i'
59 59 >>> _encodedir(b'data/foo.i/bla.i')
60 60 'data/foo.i.hg/bla.i'
61 61 >>> _encodedir(b'data/foo.i.hg/bla.i')
62 62 'data/foo.i.hg.hg/bla.i'
63 63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
64 64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
65 65 """
66 66 return (
67 67 path.replace(b".hg/", b".hg.hg/")
68 68 .replace(b".i/", b".i.hg/")
69 69 .replace(b".d/", b".d.hg/")
70 70 )
71 71
72 72
73 73 encodedir = getattr(parsers, 'encodedir', _encodedir)
74 74
75 75
76 76 def decodedir(path):
77 77 """
78 78 >>> decodedir(b'data/foo.i')
79 79 'data/foo.i'
80 80 >>> decodedir(b'data/foo.i.hg/bla.i')
81 81 'data/foo.i/bla.i'
82 82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
83 83 'data/foo.i.hg/bla.i'
84 84 """
85 85 if b".hg/" not in path:
86 86 return path
87 87 return (
88 88 path.replace(b".d.hg/", b".d/")
89 89 .replace(b".i.hg/", b".i/")
90 90 .replace(b".hg.hg/", b".hg/")
91 91 )
92 92
93 93
94 94 def _reserved():
95 95 """characters that are problematic for filesystems
96 96
97 97 * ascii escapes (0..31)
98 98 * ascii hi (126..255)
99 99 * windows specials
100 100
101 101 these characters will be escaped by encodefunctions
102 102 """
103 103 winreserved = [ord(x) for x in u'\\:*?"<>|']
104 104 for x in range(32):
105 105 yield x
106 106 for x in range(126, 256):
107 107 yield x
108 108 for x in winreserved:
109 109 yield x
110 110
111 111
112 112 def _buildencodefun():
113 113 """
114 114 >>> enc, dec = _buildencodefun()
115 115
116 116 >>> enc(b'nothing/special.txt')
117 117 'nothing/special.txt'
118 118 >>> dec(b'nothing/special.txt')
119 119 'nothing/special.txt'
120 120
121 121 >>> enc(b'HELLO')
122 122 '_h_e_l_l_o'
123 123 >>> dec(b'_h_e_l_l_o')
124 124 'HELLO'
125 125
126 126 >>> enc(b'hello:world?')
127 127 'hello~3aworld~3f'
128 128 >>> dec(b'hello~3aworld~3f')
129 129 'hello:world?'
130 130
131 131 >>> enc(b'the\\x07quick\\xADshot')
132 132 'the~07quick~adshot'
133 133 >>> dec(b'the~07quick~adshot')
134 134 'the\\x07quick\\xadshot'
135 135 """
136 136 e = b'_'
137 137 xchr = pycompat.bytechr
138 138 asciistr = list(map(xchr, range(127)))
139 139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
140 140
141 141 cmap = {x: x for x in asciistr}
142 142 for x in _reserved():
143 143 cmap[xchr(x)] = b"~%02x" % x
144 144 for x in capitals + [ord(e)]:
145 145 cmap[xchr(x)] = e + xchr(x).lower()
146 146
147 147 dmap = {}
148 148 for k, v in cmap.items():
149 149 dmap[v] = k
150 150
151 151 def decode(s):
152 152 i = 0
153 153 while i < len(s):
154 154 for l in range(1, 4):
155 155 try:
156 156 yield dmap[s[i : i + l]]
157 157 i += l
158 158 break
159 159 except KeyError:
160 160 pass
161 161 else:
162 162 raise KeyError
163 163
164 164 return (
165 165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
166 166 lambda s: b''.join(list(decode(s))),
167 167 )
168 168
169 169
170 170 _encodefname, _decodefname = _buildencodefun()
171 171
172 172
173 173 def encodefilename(s):
174 174 """
175 175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
176 176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
177 177 """
178 178 return _encodefname(encodedir(s))
179 179
180 180
181 181 def decodefilename(s):
182 182 """
183 183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
184 184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
185 185 """
186 186 return decodedir(_decodefname(s))
187 187
188 188
189 189 def _buildlowerencodefun():
190 190 """
191 191 >>> f = _buildlowerencodefun()
192 192 >>> f(b'nothing/special.txt')
193 193 'nothing/special.txt'
194 194 >>> f(b'HELLO')
195 195 'hello'
196 196 >>> f(b'hello:world?')
197 197 'hello~3aworld~3f'
198 198 >>> f(b'the\\x07quick\\xADshot')
199 199 'the~07quick~adshot'
200 200 """
201 201 xchr = pycompat.bytechr
202 202 cmap = {xchr(x): xchr(x) for x in range(127)}
203 203 for x in _reserved():
204 204 cmap[xchr(x)] = b"~%02x" % x
205 205 for x in range(ord(b"A"), ord(b"Z") + 1):
206 206 cmap[xchr(x)] = xchr(x).lower()
207 207
208 208 def lowerencode(s):
209 209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
210 210
211 211 return lowerencode
212 212
213 213
214 214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
215 215
216 216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
217 217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
218 218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
219 219
220 220
221 221 def _auxencode(path, dotencode):
222 222 """
223 223 Encodes filenames containing names reserved by Windows or which end in
224 224 period or space. Does not touch other single reserved characters c.
225 225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
226 226 Additionally encodes space or period at the beginning, if dotencode is
227 227 True. Parameter path is assumed to be all lowercase.
228 228 A segment only needs encoding if a reserved name appears as a
229 229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
230 230 doesn't need encoding.
231 231
232 232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
233 233 >>> _auxencode(s.split(b'/'), True)
234 234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
235 235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
236 236 >>> _auxencode(s.split(b'/'), False)
237 237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
238 238 >>> _auxencode([b'foo. '], True)
239 239 ['foo.~20']
240 240 >>> _auxencode([b' .foo'], True)
241 241 ['~20.foo']
242 242 """
243 243 for i, n in enumerate(path):
244 244 if not n:
245 245 continue
246 246 if dotencode and n[0] in b'. ':
247 247 n = b"~%02x" % ord(n[0:1]) + n[1:]
248 248 path[i] = n
249 249 else:
250 250 l = n.find(b'.')
251 251 if l == -1:
252 252 l = len(n)
253 253 if (l == 3 and n[:3] in _winres3) or (
254 254 l == 4
255 255 and n[3:4] <= b'9'
256 256 and n[3:4] >= b'1'
257 257 and n[:3] in _winres4
258 258 ):
259 259 # encode third letter ('aux' -> 'au~78')
260 260 ec = b"~%02x" % ord(n[2:3])
261 261 n = n[0:2] + ec + n[3:]
262 262 path[i] = n
263 263 if n[-1] in b'. ':
264 264 # encode last period or space ('foo...' -> 'foo..~2e')
265 265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
266 266 return path
267 267
268 268
269 269 _maxstorepathlen = 120
270 270 _dirprefixlen = 8
271 271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
272 272
273 273
274 274 def _hashencode(path, dotencode):
275 275 digest = hex(hashutil.sha1(path).digest())
276 276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
277 277 parts = _auxencode(le, dotencode)
278 278 basename = parts[-1]
279 279 _root, ext = os.path.splitext(basename)
280 280 sdirs = []
281 281 sdirslen = 0
282 282 for p in parts[:-1]:
283 283 d = p[:_dirprefixlen]
284 284 if d[-1] in b'. ':
285 285 # Windows can't access dirs ending in period or space
286 286 d = d[:-1] + b'_'
287 287 if sdirslen == 0:
288 288 t = len(d)
289 289 else:
290 290 t = sdirslen + 1 + len(d)
291 291 if t > _maxshortdirslen:
292 292 break
293 293 sdirs.append(d)
294 294 sdirslen = t
295 295 dirs = b'/'.join(sdirs)
296 296 if len(dirs) > 0:
297 297 dirs += b'/'
298 298 res = b'dh/' + dirs + digest + ext
299 299 spaceleft = _maxstorepathlen - len(res)
300 300 if spaceleft > 0:
301 301 filler = basename[:spaceleft]
302 302 res = b'dh/' + dirs + filler + digest + ext
303 303 return res
304 304
305 305
306 306 def _hybridencode(path, dotencode):
307 307 """encodes path with a length limit
308 308
309 309 Encodes all paths that begin with 'data/', according to the following.
310 310
311 311 Default encoding (reversible):
312 312
313 313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
314 314 characters are encoded as '~xx', where xx is the two digit hex code
315 315 of the character (see encodefilename).
316 316 Relevant path components consisting of Windows reserved filenames are
317 317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
318 318
319 319 Hashed encoding (not reversible):
320 320
321 321 If the default-encoded path is longer than _maxstorepathlen, a
322 322 non-reversible hybrid hashing of the path is done instead.
323 323 This encoding uses up to _dirprefixlen characters of all directory
324 324 levels of the lowerencoded path, but not more levels than can fit into
325 325 _maxshortdirslen.
326 326 Then follows the filler followed by the sha digest of the full path.
327 327 The filler is the beginning of the basename of the lowerencoded path
328 328 (the basename is everything after the last path separator). The filler
329 329 is as long as possible, filling in characters from the basename until
330 330 the encoded path has _maxstorepathlen characters (or all chars of the
331 331 basename have been taken).
332 332 The extension (e.g. '.i' or '.d') is preserved.
333 333
334 334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
335 335 encoding was used.
336 336 """
337 337 path = encodedir(path)
338 338 ef = _encodefname(path).split(b'/')
339 339 res = b'/'.join(_auxencode(ef, dotencode))
340 340 if len(res) > _maxstorepathlen:
341 341 res = _hashencode(path, dotencode)
342 342 return res
343 343
344 344
345 345 def _pathencode(path):
346 346 de = encodedir(path)
347 347 if len(path) > _maxstorepathlen:
348 348 return _hashencode(de, True)
349 349 ef = _encodefname(de).split(b'/')
350 350 res = b'/'.join(_auxencode(ef, True))
351 351 if len(res) > _maxstorepathlen:
352 352 return _hashencode(de, True)
353 353 return res
354 354
355 355
356 356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
357 357
358 358
359 359 def _plainhybridencode(f):
360 360 return _hybridencode(f, False)
361 361
362 362
363 363 def _calcmode(vfs):
364 364 try:
365 365 # files in .hg/ will be created using this mode
366 366 mode = vfs.stat().st_mode
367 367 # avoid some useless chmods
368 368 if (0o777 & ~util.umask) == (0o777 & mode):
369 369 mode = None
370 370 except OSError:
371 371 mode = None
372 372 return mode
373 373
374 374
375 375 _data = [
376 376 b'bookmarks',
377 377 b'narrowspec',
378 378 b'data',
379 379 b'meta',
380 380 b'00manifest.d',
381 381 b'00manifest.i',
382 382 b'00changelog.d',
383 383 b'00changelog.i',
384 384 b'phaseroots',
385 385 b'obsstore',
386 386 b'requires',
387 387 ]
388 388
389 389 REVLOG_FILES_MAIN_EXT = (b'.i',)
390 390 REVLOG_FILES_OTHER_EXT = (
391 391 b'.idx',
392 392 b'.d',
393 393 b'.dat',
394 394 b'.n',
395 395 b'.nd',
396 396 b'.sda',
397 397 )
398 398 # files that are "volatile" and might change between listing and streaming
399 399 #
400 400 # note: the ".nd" file are nodemap data and won't "change" but they might be
401 401 # deleted.
402 402 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
403 403
404 404 # some exception to the above matching
405 405 #
406 406 # XXX This is currently not in use because of issue6542
407 407 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
408 408
409 409
410 410 def is_revlog(f, kind, st):
411 411 if kind != stat.S_IFREG:
412 412 return None
413 413 return revlog_type(f)
414 414
415 415
416 416 def revlog_type(f):
417 417 # XXX we need to filter `undo.` created by the transaction here, however
418 418 # being naive about it also filter revlog for `undo.*` files, leading to
419 419 # issue6542. So we no longer use EXCLUDED.
420 420 if f.endswith(REVLOG_FILES_MAIN_EXT):
421 421 return FILEFLAGS_REVLOG_MAIN
422 422 elif f.endswith(REVLOG_FILES_OTHER_EXT):
423 423 t = FILETYPE_FILELOG_OTHER
424 424 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
425 425 t |= FILEFLAGS_VOLATILE
426 426 return t
427 427 return None
428 428
429 429
430 430 # the file is part of changelog data
431 431 FILEFLAGS_CHANGELOG = 1 << 13
432 432 # the file is part of manifest data
433 433 FILEFLAGS_MANIFESTLOG = 1 << 12
434 434 # the file is part of filelog data
435 435 FILEFLAGS_FILELOG = 1 << 11
436 436 # file that are not directly part of a revlog
437 437 FILEFLAGS_OTHER = 1 << 10
438 438
439 439 # the main entry point for a revlog
440 440 FILEFLAGS_REVLOG_MAIN = 1 << 1
441 441 # a secondary file for a revlog
442 442 FILEFLAGS_REVLOG_OTHER = 1 << 0
443 443
444 444 # files that are "volatile" and might change between listing and streaming
445 445 FILEFLAGS_VOLATILE = 1 << 20
446 446
447 447 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
448 448 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
449 449 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
450 450 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
451 451 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
452 452 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
453 453 FILETYPE_OTHER = FILEFLAGS_OTHER
454 454
455 455
456 456 @attr.s(slots=True)
457 457 class StoreEntry:
458 458 """An entry in the store
459 459
460 460 This is returned by `store.walk` and represent some data in the store."""
461 461
462 462 unencoded_path = attr.ib()
463 463 is_revlog = attr.ib(default=False)
464 464 revlog_type = attr.ib(default=None)
465 465 is_revlog_main = attr.ib(default=None)
466 466 is_volatile = attr.ib(default=False)
467 467 file_size = attr.ib(default=None)
468 468
469 def files(self):
470 return [
471 StoreFile(
472 unencoded_path=self.unencoded_path,
473 file_size=self.file_size,
474 is_volatile=self.is_volatile,
475 )
476 ]
477
478
479 @attr.s(slots=True)
480 class StoreFile:
481 """a file matching an entry"""
482
483 unencoded_path = attr.ib()
484 file_size = attr.ib()
485 is_volatile = attr.ib(default=False)
486
469 487
470 488 class basicstore:
471 489 '''base class for local repository stores'''
472 490
473 491 def __init__(self, path, vfstype):
474 492 vfs = vfstype(path)
475 493 self.path = vfs.base
476 494 self.createmode = _calcmode(vfs)
477 495 vfs.createmode = self.createmode
478 496 self.rawvfs = vfs
479 497 self.vfs = vfsmod.filtervfs(vfs, encodedir)
480 498 self.opener = self.vfs
481 499
482 500 def join(self, f):
483 501 return self.path + b'/' + encodedir(f)
484 502
485 503 def _walk(self, relpath, recurse):
486 504 '''yields (revlog_type, unencoded, size)'''
487 505 path = self.path
488 506 if relpath:
489 507 path += b'/' + relpath
490 508 striplen = len(self.path) + 1
491 509 l = []
492 510 if self.rawvfs.isdir(path):
493 511 visit = [path]
494 512 readdir = self.rawvfs.readdir
495 513 while visit:
496 514 p = visit.pop()
497 515 for f, kind, st in readdir(p, stat=True):
498 516 fp = p + b'/' + f
499 517 rl_type = is_revlog(f, kind, st)
500 518 if rl_type is not None:
501 519 n = util.pconvert(fp[striplen:])
502 520 l.append((rl_type, decodedir(n), st.st_size))
503 521 elif kind == stat.S_IFDIR and recurse:
504 522 visit.append(fp)
505 523 l.sort()
506 524 return l
507 525
508 526 def changelog(self, trypending, concurrencychecker=None):
509 527 return changelog.changelog(
510 528 self.vfs,
511 529 trypending=trypending,
512 530 concurrencychecker=concurrencychecker,
513 531 )
514 532
515 533 def manifestlog(self, repo, storenarrowmatch):
516 534 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
517 535 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
518 536
519 537 def datafiles(
520 538 self, matcher=None, undecodable=None
521 539 ) -> Generator[StoreEntry, None, None]:
522 540 """Like walk, but excluding the changelog and root manifest.
523 541
524 542 When [undecodable] is None, revlogs names that can't be
525 543 decoded cause an exception. When it is provided, it should
526 544 be a list and the filenames that can't be decoded are added
527 545 to it instead. This is very rarely needed."""
528 546 files = self._walk(b'data', True) + self._walk(b'meta', True)
529 547 for (t, u, s) in files:
530 548 if t is not None:
531 549 yield StoreEntry(
532 550 unencoded_path=u,
533 551 is_revlog=True,
534 552 revlog_type=FILEFLAGS_FILELOG,
535 553 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
536 554 is_volatile=bool(t & FILEFLAGS_VOLATILE),
537 555 file_size=s,
538 556 )
539 557
540 558 def topfiles(self) -> Generator[StoreEntry, None, None]:
541 559 # yield manifest before changelog
542 560 files = reversed(self._walk(b'', False))
543 561 for (t, u, s) in files:
544 562 if u.startswith(b'00changelog'):
545 563 revlog_type = FILEFLAGS_CHANGELOG
546 564 elif u.startswith(b'00manifest'):
547 565 revlog_type = FILEFLAGS_MANIFESTLOG
548 566 else:
549 567 revlog_type = None
550 568 yield StoreEntry(
551 569 unencoded_path=u,
552 570 is_revlog=revlog_type is not None,
553 571 revlog_type=revlog_type,
554 572 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
555 573 is_volatile=bool(t & FILEFLAGS_VOLATILE),
556 574 file_size=s,
557 575 )
558 576
559 577 def walk(self, matcher=None) -> Generator[StoreEntry, None, None]:
560 578 """return files related to data storage (ie: revlogs)
561 579
562 580 yields (file_type, unencoded, size)
563 581
564 582 if a matcher is passed, storage files of only those tracked paths
565 583 are passed with matches the matcher
566 584 """
567 585 # yield data files first
568 586 for x in self.datafiles(matcher):
569 587 yield x
570 588 for x in self.topfiles():
571 589 yield x
572 590
573 591 def copylist(self):
574 592 return _data
575 593
576 594 def write(self, tr):
577 595 pass
578 596
579 597 def invalidatecaches(self):
580 598 pass
581 599
582 600 def markremoved(self, fn):
583 601 pass
584 602
585 603 def __contains__(self, path):
586 604 '''Checks if the store contains path'''
587 605 path = b"/".join((b"data", path))
588 606 # file?
589 607 if self.vfs.exists(path + b".i"):
590 608 return True
591 609 # dir?
592 610 if not path.endswith(b"/"):
593 611 path = path + b"/"
594 612 return self.vfs.exists(path)
595 613
596 614
597 615 class encodedstore(basicstore):
598 616 def __init__(self, path, vfstype):
599 617 vfs = vfstype(path + b'/store')
600 618 self.path = vfs.base
601 619 self.createmode = _calcmode(vfs)
602 620 vfs.createmode = self.createmode
603 621 self.rawvfs = vfs
604 622 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
605 623 self.opener = self.vfs
606 624
607 625 # note: topfiles would also need a decode phase. It is just that in
608 626 # practice we do not have any file outside of `data/` that needs encoding.
609 627 # However that might change so we should probably add a test and encoding
610 628 # decoding for it too. see issue6548
611 629
612 630 def datafiles(
613 631 self, matcher=None, undecodable=None
614 632 ) -> Generator[StoreEntry, None, None]:
615 633 for entry in super(encodedstore, self).datafiles():
616 634 try:
617 635 f1 = entry.unencoded_path
618 636 f2 = decodefilename(f1)
619 637 except KeyError:
620 638 if undecodable is None:
621 639 msg = _(b'undecodable revlog name %s') % f1
622 640 raise error.StorageError(msg)
623 641 else:
624 642 undecodable.append(f1)
625 643 continue
626 644 if not _matchtrackedpath(f2, matcher):
627 645 continue
628 646 entry.unencoded_path = f2
629 647 yield entry
630 648
631 649 def join(self, f):
632 650 return self.path + b'/' + encodefilename(f)
633 651
634 652 def copylist(self):
635 653 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
636 654
637 655
638 656 class fncache:
639 657 # the filename used to be partially encoded
640 658 # hence the encodedir/decodedir dance
641 659 def __init__(self, vfs):
642 660 self.vfs = vfs
643 661 self._ignores = set()
644 662 self.entries = None
645 663 self._dirty = False
646 664 # set of new additions to fncache
647 665 self.addls = set()
648 666
649 667 def ensureloaded(self, warn=None):
650 668 """read the fncache file if not already read.
651 669
652 670 If the file on disk is corrupted, raise. If warn is provided,
653 671 warn and keep going instead."""
654 672 if self.entries is None:
655 673 self._load(warn)
656 674
657 675 def _load(self, warn=None):
658 676 '''fill the entries from the fncache file'''
659 677 self._dirty = False
660 678 try:
661 679 fp = self.vfs(b'fncache', mode=b'rb')
662 680 except IOError:
663 681 # skip nonexistent file
664 682 self.entries = set()
665 683 return
666 684
667 685 self.entries = set()
668 686 chunk = b''
669 687 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
670 688 chunk += c
671 689 try:
672 690 p = chunk.rindex(b'\n')
673 691 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
674 692 chunk = chunk[p + 1 :]
675 693 except ValueError:
676 694 # substring '\n' not found, maybe the entry is bigger than the
677 695 # chunksize, so let's keep iterating
678 696 pass
679 697
680 698 if chunk:
681 699 msg = _(b"fncache does not ends with a newline")
682 700 if warn:
683 701 warn(msg + b'\n')
684 702 else:
685 703 raise error.Abort(
686 704 msg,
687 705 hint=_(
688 706 b"use 'hg debugrebuildfncache' to "
689 707 b"rebuild the fncache"
690 708 ),
691 709 )
692 710 self._checkentries(fp, warn)
693 711 fp.close()
694 712
695 713 def _checkentries(self, fp, warn):
696 714 """make sure there is no empty string in entries"""
697 715 if b'' in self.entries:
698 716 fp.seek(0)
699 717 for n, line in enumerate(fp):
700 718 if not line.rstrip(b'\n'):
701 719 t = _(b'invalid entry in fncache, line %d') % (n + 1)
702 720 if warn:
703 721 warn(t + b'\n')
704 722 else:
705 723 raise error.Abort(t)
706 724
707 725 def write(self, tr):
708 726 if self._dirty:
709 727 assert self.entries is not None
710 728 self.entries = self.entries | self.addls
711 729 self.addls = set()
712 730 tr.addbackup(b'fncache')
713 731 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
714 732 if self.entries:
715 733 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
716 734 fp.close()
717 735 self._dirty = False
718 736 if self.addls:
719 737 # if we have just new entries, let's append them to the fncache
720 738 tr.addbackup(b'fncache')
721 739 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
722 740 if self.addls:
723 741 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
724 742 fp.close()
725 743 self.entries = None
726 744 self.addls = set()
727 745
728 746 def addignore(self, fn):
729 747 self._ignores.add(fn)
730 748
731 749 def add(self, fn):
732 750 if fn in self._ignores:
733 751 return
734 752 if self.entries is None:
735 753 self._load()
736 754 if fn not in self.entries:
737 755 self.addls.add(fn)
738 756
739 757 def remove(self, fn):
740 758 if self.entries is None:
741 759 self._load()
742 760 if fn in self.addls:
743 761 self.addls.remove(fn)
744 762 return
745 763 try:
746 764 self.entries.remove(fn)
747 765 self._dirty = True
748 766 except KeyError:
749 767 pass
750 768
751 769 def __contains__(self, fn):
752 770 if fn in self.addls:
753 771 return True
754 772 if self.entries is None:
755 773 self._load()
756 774 return fn in self.entries
757 775
758 776 def __iter__(self):
759 777 if self.entries is None:
760 778 self._load()
761 779 return iter(self.entries | self.addls)
762 780
763 781
764 782 class _fncachevfs(vfsmod.proxyvfs):
765 783 def __init__(self, vfs, fnc, encode):
766 784 vfsmod.proxyvfs.__init__(self, vfs)
767 785 self.fncache = fnc
768 786 self.encode = encode
769 787
770 788 def __call__(self, path, mode=b'r', *args, **kw):
771 789 encoded = self.encode(path)
772 790 if (
773 791 mode not in (b'r', b'rb')
774 792 and (path.startswith(b'data/') or path.startswith(b'meta/'))
775 793 and revlog_type(path) is not None
776 794 ):
777 795 # do not trigger a fncache load when adding a file that already is
778 796 # known to exist.
779 797 notload = self.fncache.entries is None and self.vfs.exists(encoded)
780 798 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
781 799 # when appending to an existing file, if the file has size zero,
782 800 # it should be considered as missing. Such zero-size files are
783 801 # the result of truncation when a transaction is aborted.
784 802 notload = False
785 803 if not notload:
786 804 self.fncache.add(path)
787 805 return self.vfs(encoded, mode, *args, **kw)
788 806
789 807 def join(self, path):
790 808 if path:
791 809 return self.vfs.join(self.encode(path))
792 810 else:
793 811 return self.vfs.join(path)
794 812
795 813 def register_file(self, path):
796 814 """generic hook point to lets fncache steer its stew"""
797 815 if path.startswith(b'data/') or path.startswith(b'meta/'):
798 816 self.fncache.add(path)
799 817
800 818
801 819 class fncachestore(basicstore):
802 820 def __init__(self, path, vfstype, dotencode):
803 821 if dotencode:
804 822 encode = _pathencode
805 823 else:
806 824 encode = _plainhybridencode
807 825 self.encode = encode
808 826 vfs = vfstype(path + b'/store')
809 827 self.path = vfs.base
810 828 self.pathsep = self.path + b'/'
811 829 self.createmode = _calcmode(vfs)
812 830 vfs.createmode = self.createmode
813 831 self.rawvfs = vfs
814 832 fnc = fncache(vfs)
815 833 self.fncache = fnc
816 834 self.vfs = _fncachevfs(vfs, fnc, encode)
817 835 self.opener = self.vfs
818 836
819 837 def join(self, f):
820 838 return self.pathsep + self.encode(f)
821 839
822 840 def getsize(self, path):
823 841 return self.rawvfs.stat(path).st_size
824 842
825 843 def datafiles(
826 844 self, matcher=None, undecodable=None
827 845 ) -> Generator[StoreEntry, None, None]:
828 846 for f in sorted(self.fncache):
829 847 if not _matchtrackedpath(f, matcher):
830 848 continue
831 849 ef = self.encode(f)
832 850 t = revlog_type(f)
833 851 if t is None:
834 852 # Note: this should not be in the fncache then…
835 853 #
836 854 # However the fncache might contains such file added by
837 855 # previous version of Mercurial.
838 856 continue
839 857 t |= FILEFLAGS_FILELOG
840 858 try:
841 859 yield StoreEntry(
842 860 unencoded_path=f,
843 861 is_revlog=True,
844 862 revlog_type=FILEFLAGS_FILELOG,
845 863 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
846 864 is_volatile=bool(t & FILEFLAGS_VOLATILE),
847 865 file_size=self.getsize(ef),
848 866 )
849 867 except FileNotFoundError:
850 868 pass
851 869
852 870 def copylist(self):
853 871 d = (
854 872 b'bookmarks',
855 873 b'narrowspec',
856 874 b'data',
857 875 b'meta',
858 876 b'dh',
859 877 b'fncache',
860 878 b'phaseroots',
861 879 b'obsstore',
862 880 b'00manifest.d',
863 881 b'00manifest.i',
864 882 b'00changelog.d',
865 883 b'00changelog.i',
866 884 b'requires',
867 885 )
868 886 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
869 887
870 888 def write(self, tr):
871 889 self.fncache.write(tr)
872 890
873 891 def invalidatecaches(self):
874 892 self.fncache.entries = None
875 893 self.fncache.addls = set()
876 894
877 895 def markremoved(self, fn):
878 896 self.fncache.remove(fn)
879 897
880 898 def _exists(self, f):
881 899 ef = self.encode(f)
882 900 try:
883 901 self.getsize(ef)
884 902 return True
885 903 except FileNotFoundError:
886 904 return False
887 905
888 906 def __contains__(self, path):
889 907 '''Checks if the store contains path'''
890 908 path = b"/".join((b"data", path))
891 909 # check for files (exact match)
892 910 e = path + b'.i'
893 911 if e in self.fncache and self._exists(e):
894 912 return True
895 913 # now check for directories (prefix match)
896 914 if not path.endswith(b'/'):
897 915 path += b'/'
898 916 for e in self.fncache:
899 917 if e.startswith(path) and self._exists(e):
900 918 return True
901 919 return False
@@ -1,937 +1,937
1 1 # streamclone.py - producing and consuming streaming repository data
2 2 #
3 3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import contextlib
10 10 import os
11 11 import struct
12 12
13 13 from .i18n import _
14 14 from .pycompat import open
15 15 from .interfaces import repository
16 16 from . import (
17 17 bookmarks,
18 18 cacheutil,
19 19 error,
20 20 narrowspec,
21 21 phases,
22 22 pycompat,
23 23 requirements as requirementsmod,
24 24 scmutil,
25 25 store,
26 26 transaction,
27 27 util,
28 28 )
29 29 from .revlogutils import (
30 30 nodemap,
31 31 )
32 32
33 33
34 34 def new_stream_clone_requirements(default_requirements, streamed_requirements):
35 35 """determine the final set of requirement for a new stream clone
36 36
37 37 this method combine the "default" requirements that a new repository would
38 38 use with the constaint we get from the stream clone content. We keep local
39 39 configuration choice when possible.
40 40 """
41 41 requirements = set(default_requirements)
42 42 requirements -= requirementsmod.STREAM_FIXED_REQUIREMENTS
43 43 requirements.update(streamed_requirements)
44 44 return requirements
45 45
46 46
47 47 def streamed_requirements(repo):
48 48 """the set of requirement the new clone will have to support
49 49
50 50 This is used for advertising the stream options and to generate the actual
51 51 stream content."""
52 52 requiredformats = (
53 53 repo.requirements & requirementsmod.STREAM_FIXED_REQUIREMENTS
54 54 )
55 55 return requiredformats
56 56
57 57
58 58 def canperformstreamclone(pullop, bundle2=False):
59 59 """Whether it is possible to perform a streaming clone as part of pull.
60 60
61 61 ``bundle2`` will cause the function to consider stream clone through
62 62 bundle2 and only through bundle2.
63 63
64 64 Returns a tuple of (supported, requirements). ``supported`` is True if
65 65 streaming clone is supported and False otherwise. ``requirements`` is
66 66 a set of repo requirements from the remote, or ``None`` if stream clone
67 67 isn't supported.
68 68 """
69 69 repo = pullop.repo
70 70 remote = pullop.remote
71 71
72 72 bundle2supported = False
73 73 if pullop.canusebundle2:
74 74 if b'v2' in pullop.remotebundle2caps.get(b'stream', []):
75 75 bundle2supported = True
76 76 # else
77 77 # Server doesn't support bundle2 stream clone or doesn't support
78 78 # the versions we support. Fall back and possibly allow legacy.
79 79
80 80 # Ensures legacy code path uses available bundle2.
81 81 if bundle2supported and not bundle2:
82 82 return False, None
83 83 # Ensures bundle2 doesn't try to do a stream clone if it isn't supported.
84 84 elif bundle2 and not bundle2supported:
85 85 return False, None
86 86
87 87 # Streaming clone only works on empty repositories.
88 88 if len(repo):
89 89 return False, None
90 90
91 91 # Streaming clone only works if all data is being requested.
92 92 if pullop.heads:
93 93 return False, None
94 94
95 95 streamrequested = pullop.streamclonerequested
96 96
97 97 # If we don't have a preference, let the server decide for us. This
98 98 # likely only comes into play in LANs.
99 99 if streamrequested is None:
100 100 # The server can advertise whether to prefer streaming clone.
101 101 streamrequested = remote.capable(b'stream-preferred')
102 102
103 103 if not streamrequested:
104 104 return False, None
105 105
106 106 # In order for stream clone to work, the client has to support all the
107 107 # requirements advertised by the server.
108 108 #
109 109 # The server advertises its requirements via the "stream" and "streamreqs"
110 110 # capability. "stream" (a value-less capability) is advertised if and only
111 111 # if the only requirement is "revlogv1." Else, the "streamreqs" capability
112 112 # is advertised and contains a comma-delimited list of requirements.
113 113 requirements = set()
114 114 if remote.capable(b'stream'):
115 115 requirements.add(requirementsmod.REVLOGV1_REQUIREMENT)
116 116 else:
117 117 streamreqs = remote.capable(b'streamreqs')
118 118 # This is weird and shouldn't happen with modern servers.
119 119 if not streamreqs:
120 120 pullop.repo.ui.warn(
121 121 _(
122 122 b'warning: stream clone requested but server has them '
123 123 b'disabled\n'
124 124 )
125 125 )
126 126 return False, None
127 127
128 128 streamreqs = set(streamreqs.split(b','))
129 129 # Server requires something we don't support. Bail.
130 130 missingreqs = streamreqs - repo.supported
131 131 if missingreqs:
132 132 pullop.repo.ui.warn(
133 133 _(
134 134 b'warning: stream clone requested but client is missing '
135 135 b'requirements: %s\n'
136 136 )
137 137 % b', '.join(sorted(missingreqs))
138 138 )
139 139 pullop.repo.ui.warn(
140 140 _(
141 141 b'(see https://www.mercurial-scm.org/wiki/MissingRequirement '
142 142 b'for more information)\n'
143 143 )
144 144 )
145 145 return False, None
146 146 requirements = streamreqs
147 147
148 148 return True, requirements
149 149
150 150
151 151 def maybeperformlegacystreamclone(pullop):
152 152 """Possibly perform a legacy stream clone operation.
153 153
154 154 Legacy stream clones are performed as part of pull but before all other
155 155 operations.
156 156
157 157 A legacy stream clone will not be performed if a bundle2 stream clone is
158 158 supported.
159 159 """
160 160 from . import localrepo
161 161
162 162 supported, requirements = canperformstreamclone(pullop)
163 163
164 164 if not supported:
165 165 return
166 166
167 167 repo = pullop.repo
168 168 remote = pullop.remote
169 169
170 170 # Save remote branchmap. We will use it later to speed up branchcache
171 171 # creation.
172 172 rbranchmap = None
173 173 if remote.capable(b'branchmap'):
174 174 with remote.commandexecutor() as e:
175 175 rbranchmap = e.callcommand(b'branchmap', {}).result()
176 176
177 177 repo.ui.status(_(b'streaming all changes\n'))
178 178
179 179 with remote.commandexecutor() as e:
180 180 fp = e.callcommand(b'stream_out', {}).result()
181 181
182 182 # TODO strictly speaking, this code should all be inside the context
183 183 # manager because the context manager is supposed to ensure all wire state
184 184 # is flushed when exiting. But the legacy peers don't do this, so it
185 185 # doesn't matter.
186 186 l = fp.readline()
187 187 try:
188 188 resp = int(l)
189 189 except ValueError:
190 190 raise error.ResponseError(
191 191 _(b'unexpected response from remote server:'), l
192 192 )
193 193 if resp == 1:
194 194 raise error.Abort(_(b'operation forbidden by server'))
195 195 elif resp == 2:
196 196 raise error.Abort(_(b'locking the remote repository failed'))
197 197 elif resp != 0:
198 198 raise error.Abort(_(b'the server sent an unknown error code'))
199 199
200 200 l = fp.readline()
201 201 try:
202 202 filecount, bytecount = map(int, l.split(b' ', 1))
203 203 except (ValueError, TypeError):
204 204 raise error.ResponseError(
205 205 _(b'unexpected response from remote server:'), l
206 206 )
207 207
208 208 with repo.lock():
209 209 consumev1(repo, fp, filecount, bytecount)
210 210 repo.requirements = new_stream_clone_requirements(
211 211 repo.requirements,
212 212 requirements,
213 213 )
214 214 repo.svfs.options = localrepo.resolvestorevfsoptions(
215 215 repo.ui, repo.requirements, repo.features
216 216 )
217 217 scmutil.writereporequirements(repo)
218 218 nodemap.post_stream_cleanup(repo)
219 219
220 220 if rbranchmap:
221 221 repo._branchcaches.replace(repo, rbranchmap)
222 222
223 223 repo.invalidate()
224 224
225 225
226 226 def allowservergeneration(repo):
227 227 """Whether streaming clones are allowed from the server."""
228 228 if repository.REPO_FEATURE_STREAM_CLONE not in repo.features:
229 229 return False
230 230
231 231 if not repo.ui.configbool(b'server', b'uncompressed', untrusted=True):
232 232 return False
233 233
234 234 # The way stream clone works makes it impossible to hide secret changesets.
235 235 # So don't allow this by default.
236 236 secret = phases.hassecret(repo)
237 237 if secret:
238 238 return repo.ui.configbool(b'server', b'uncompressedallowsecret')
239 239
240 240 return True
241 241
242 242
243 243 # This is it's own function so extensions can override it.
244 244 def _walkstreamfiles(repo, matcher=None):
245 245 return repo.store.walk(matcher)
246 246
247 247
248 248 def generatev1(repo):
249 249 """Emit content for version 1 of a streaming clone.
250 250
251 251 This returns a 3-tuple of (file count, byte size, data iterator).
252 252
253 253 The data iterator consists of N entries for each file being transferred.
254 254 Each file entry starts as a line with the file name and integer size
255 255 delimited by a null byte.
256 256
257 257 The raw file data follows. Following the raw file data is the next file
258 258 entry, or EOF.
259 259
260 260 When used on the wire protocol, an additional line indicating protocol
261 261 success will be prepended to the stream. This function is not responsible
262 262 for adding it.
263 263
264 264 This function will obtain a repository lock to ensure a consistent view of
265 265 the store is captured. It therefore may raise LockError.
266 266 """
267 267 entries = []
268 268 total_bytes = 0
269 269 # Get consistent snapshot of repo, lock during scan.
270 270 with repo.lock():
271 271 repo.ui.debug(b'scanning\n')
272 272 for entry in _walkstreamfiles(repo):
273 if entry.file_size:
274 entries.append((entry.unencoded_path, entry.file_size))
275 total_bytes += entry.file_size
273 for f in entry.files():
274 if f.file_size:
275 entries.append((f.unencoded_path, f.file_size))
276 total_bytes += f.file_size
276 277 _test_sync_point_walk_1(repo)
277 278 _test_sync_point_walk_2(repo)
278 279
279 280 repo.ui.debug(
280 281 b'%d files, %d bytes to transfer\n' % (len(entries), total_bytes)
281 282 )
282 283
283 284 svfs = repo.svfs
284 285 debugflag = repo.ui.debugflag
285 286
286 287 def emitrevlogdata():
287 288 for name, size in entries:
288 289 if debugflag:
289 290 repo.ui.debug(b'sending %s (%d bytes)\n' % (name, size))
290 291 # partially encode name over the wire for backwards compat
291 292 yield b'%s\0%d\n' % (store.encodedir(name), size)
292 293 # auditing at this stage is both pointless (paths are already
293 294 # trusted by the local repo) and expensive
294 295 with svfs(name, b'rb', auditpath=False) as fp:
295 296 if size <= 65536:
296 297 yield fp.read(size)
297 298 else:
298 299 for chunk in util.filechunkiter(fp, limit=size):
299 300 yield chunk
300 301
301 302 return len(entries), total_bytes, emitrevlogdata()
302 303
303 304
304 305 def generatev1wireproto(repo):
305 306 """Emit content for version 1 of streaming clone suitable for the wire.
306 307
307 308 This is the data output from ``generatev1()`` with 2 header lines. The
308 309 first line indicates overall success. The 2nd contains the file count and
309 310 byte size of payload.
310 311
311 312 The success line contains "0" for success, "1" for stream generation not
312 313 allowed, and "2" for error locking the repository (possibly indicating
313 314 a permissions error for the server process).
314 315 """
315 316 if not allowservergeneration(repo):
316 317 yield b'1\n'
317 318 return
318 319
319 320 try:
320 321 filecount, bytecount, it = generatev1(repo)
321 322 except error.LockError:
322 323 yield b'2\n'
323 324 return
324 325
325 326 # Indicates successful response.
326 327 yield b'0\n'
327 328 yield b'%d %d\n' % (filecount, bytecount)
328 329 for chunk in it:
329 330 yield chunk
330 331
331 332
332 333 def generatebundlev1(repo, compression=b'UN'):
333 334 """Emit content for version 1 of a stream clone bundle.
334 335
335 336 The first 4 bytes of the output ("HGS1") denote this as stream clone
336 337 bundle version 1.
337 338
338 339 The next 2 bytes indicate the compression type. Only "UN" is currently
339 340 supported.
340 341
341 342 The next 16 bytes are two 64-bit big endian unsigned integers indicating
342 343 file count and byte count, respectively.
343 344
344 345 The next 2 bytes is a 16-bit big endian unsigned short declaring the length
345 346 of the requirements string, including a trailing \0. The following N bytes
346 347 are the requirements string, which is ASCII containing a comma-delimited
347 348 list of repo requirements that are needed to support the data.
348 349
349 350 The remaining content is the output of ``generatev1()`` (which may be
350 351 compressed in the future).
351 352
352 353 Returns a tuple of (requirements, data generator).
353 354 """
354 355 if compression != b'UN':
355 356 raise ValueError(b'we do not support the compression argument yet')
356 357
357 358 requirements = streamed_requirements(repo)
358 359 requires = b','.join(sorted(requirements))
359 360
360 361 def gen():
361 362 yield b'HGS1'
362 363 yield compression
363 364
364 365 filecount, bytecount, it = generatev1(repo)
365 366 repo.ui.status(
366 367 _(b'writing %d bytes for %d files\n') % (bytecount, filecount)
367 368 )
368 369
369 370 yield struct.pack(b'>QQ', filecount, bytecount)
370 371 yield struct.pack(b'>H', len(requires) + 1)
371 372 yield requires + b'\0'
372 373
373 374 # This is where we'll add compression in the future.
374 375 assert compression == b'UN'
375 376
376 377 progress = repo.ui.makeprogress(
377 378 _(b'bundle'), total=bytecount, unit=_(b'bytes')
378 379 )
379 380 progress.update(0)
380 381
381 382 for chunk in it:
382 383 progress.increment(step=len(chunk))
383 384 yield chunk
384 385
385 386 progress.complete()
386 387
387 388 return requirements, gen()
388 389
389 390
390 391 def consumev1(repo, fp, filecount, bytecount):
391 392 """Apply the contents from version 1 of a streaming clone file handle.
392 393
393 394 This takes the output from "stream_out" and applies it to the specified
394 395 repository.
395 396
396 397 Like "stream_out," the status line added by the wire protocol is not
397 398 handled by this function.
398 399 """
399 400 with repo.lock():
400 401 repo.ui.status(
401 402 _(b'%d files to transfer, %s of data\n')
402 403 % (filecount, util.bytecount(bytecount))
403 404 )
404 405 progress = repo.ui.makeprogress(
405 406 _(b'clone'), total=bytecount, unit=_(b'bytes')
406 407 )
407 408 progress.update(0)
408 409 start = util.timer()
409 410
410 411 # TODO: get rid of (potential) inconsistency
411 412 #
412 413 # If transaction is started and any @filecache property is
413 414 # changed at this point, it causes inconsistency between
414 415 # in-memory cached property and streamclone-ed file on the
415 416 # disk. Nested transaction prevents transaction scope "clone"
416 417 # below from writing in-memory changes out at the end of it,
417 418 # even though in-memory changes are discarded at the end of it
418 419 # regardless of transaction nesting.
419 420 #
420 421 # But transaction nesting can't be simply prohibited, because
421 422 # nesting occurs also in ordinary case (e.g. enabling
422 423 # clonebundles).
423 424
424 425 with repo.transaction(b'clone'):
425 426 with repo.svfs.backgroundclosing(repo.ui, expectedcount=filecount):
426 427 for i in range(filecount):
427 428 # XXX doesn't support '\n' or '\r' in filenames
428 429 l = fp.readline()
429 430 try:
430 431 name, size = l.split(b'\0', 1)
431 432 size = int(size)
432 433 except (ValueError, TypeError):
433 434 raise error.ResponseError(
434 435 _(b'unexpected response from remote server:'), l
435 436 )
436 437 if repo.ui.debugflag:
437 438 repo.ui.debug(
438 439 b'adding %s (%s)\n' % (name, util.bytecount(size))
439 440 )
440 441 # for backwards compat, name was partially encoded
441 442 path = store.decodedir(name)
442 443 with repo.svfs(path, b'w', backgroundclose=True) as ofp:
443 444 for chunk in util.filechunkiter(fp, limit=size):
444 445 progress.increment(step=len(chunk))
445 446 ofp.write(chunk)
446 447
447 448 # force @filecache properties to be reloaded from
448 449 # streamclone-ed file at next access
449 450 repo.invalidate(clearfilecache=True)
450 451
451 452 elapsed = util.timer() - start
452 453 if elapsed <= 0:
453 454 elapsed = 0.001
454 455 progress.complete()
455 456 repo.ui.status(
456 457 _(b'transferred %s in %.1f seconds (%s/sec)\n')
457 458 % (
458 459 util.bytecount(bytecount),
459 460 elapsed,
460 461 util.bytecount(bytecount / elapsed),
461 462 )
462 463 )
463 464
464 465
465 466 def readbundle1header(fp):
466 467 compression = fp.read(2)
467 468 if compression != b'UN':
468 469 raise error.Abort(
469 470 _(
470 471 b'only uncompressed stream clone bundles are '
471 472 b'supported; got %s'
472 473 )
473 474 % compression
474 475 )
475 476
476 477 filecount, bytecount = struct.unpack(b'>QQ', fp.read(16))
477 478 requireslen = struct.unpack(b'>H', fp.read(2))[0]
478 479 requires = fp.read(requireslen)
479 480
480 481 if not requires.endswith(b'\0'):
481 482 raise error.Abort(
482 483 _(
483 484 b'malformed stream clone bundle: '
484 485 b'requirements not properly encoded'
485 486 )
486 487 )
487 488
488 489 requirements = set(requires.rstrip(b'\0').split(b','))
489 490
490 491 return filecount, bytecount, requirements
491 492
492 493
493 494 def applybundlev1(repo, fp):
494 495 """Apply the content from a stream clone bundle version 1.
495 496
496 497 We assume the 4 byte header has been read and validated and the file handle
497 498 is at the 2 byte compression identifier.
498 499 """
499 500 if len(repo):
500 501 raise error.Abort(
501 502 _(b'cannot apply stream clone bundle on non-empty repo')
502 503 )
503 504
504 505 filecount, bytecount, requirements = readbundle1header(fp)
505 506 missingreqs = requirements - repo.supported
506 507 if missingreqs:
507 508 raise error.Abort(
508 509 _(b'unable to apply stream clone: unsupported format: %s')
509 510 % b', '.join(sorted(missingreqs))
510 511 )
511 512
512 513 consumev1(repo, fp, filecount, bytecount)
513 514 nodemap.post_stream_cleanup(repo)
514 515
515 516
516 517 class streamcloneapplier:
517 518 """Class to manage applying streaming clone bundles.
518 519
519 520 We need to wrap ``applybundlev1()`` in a dedicated type to enable bundle
520 521 readers to perform bundle type-specific functionality.
521 522 """
522 523
523 524 def __init__(self, fh):
524 525 self._fh = fh
525 526
526 527 def apply(self, repo):
527 528 return applybundlev1(repo, self._fh)
528 529
529 530
530 531 # type of file to stream
531 532 _fileappend = 0 # append only file
532 533 _filefull = 1 # full snapshot file
533 534
534 535 # Source of the file
535 536 _srcstore = b's' # store (svfs)
536 537 _srccache = b'c' # cache (cache)
537 538
538 539 # This is it's own function so extensions can override it.
539 540 def _walkstreamfullstorefiles(repo):
540 541 """list snapshot file from the store"""
541 542 fnames = []
542 543 if not repo.publishing():
543 544 fnames.append(b'phaseroots')
544 545 return fnames
545 546
546 547
547 548 def _filterfull(entry, copy, vfsmap):
548 549 """actually copy the snapshot files"""
549 550 src, name, ftype, data = entry
550 551 if ftype != _filefull:
551 552 return entry
552 553 return (src, name, ftype, copy(vfsmap[src].join(name)))
553 554
554 555
555 556 @contextlib.contextmanager
556 557 def maketempcopies():
557 558 """return a function to temporary copy file"""
558 559
559 560 files = []
560 561 dst_dir = pycompat.mkdtemp(prefix=b'hg-clone-')
561 562 try:
562 563
563 564 def copy(src):
564 565 fd, dst = pycompat.mkstemp(
565 566 prefix=os.path.basename(src), dir=dst_dir
566 567 )
567 568 os.close(fd)
568 569 files.append(dst)
569 570 util.copyfiles(src, dst, hardlink=True)
570 571 return dst
571 572
572 573 yield copy
573 574 finally:
574 575 for tmp in files:
575 576 util.tryunlink(tmp)
576 577 util.tryrmdir(dst_dir)
577 578
578 579
579 580 def _makemap(repo):
580 581 """make a (src -> vfs) map for the repo"""
581 582 vfsmap = {
582 583 _srcstore: repo.svfs,
583 584 _srccache: repo.cachevfs,
584 585 }
585 586 # we keep repo.vfs out of the on purpose, ther are too many danger there
586 587 # (eg: .hg/hgrc)
587 588 assert repo.vfs not in vfsmap.values()
588 589
589 590 return vfsmap
590 591
591 592
592 593 def _emit2(repo, entries, totalfilesize):
593 594 """actually emit the stream bundle"""
594 595 vfsmap = _makemap(repo)
595 596 # we keep repo.vfs out of the on purpose, ther are too many danger there
596 597 # (eg: .hg/hgrc),
597 598 #
598 599 # this assert is duplicated (from _makemap) as author might think this is
599 600 # fine, while this is really not fine.
600 601 if repo.vfs in vfsmap.values():
601 602 raise error.ProgrammingError(
602 603 b'repo.vfs must not be added to vfsmap for security reasons'
603 604 )
604 605
605 606 progress = repo.ui.makeprogress(
606 607 _(b'bundle'), total=totalfilesize, unit=_(b'bytes')
607 608 )
608 609 progress.update(0)
609 610 with maketempcopies() as copy, progress:
610 611 # copy is delayed until we are in the try
611 612 entries = [_filterfull(e, copy, vfsmap) for e in entries]
612 613 yield None # this release the lock on the repository
613 614 totalbytecount = 0
614 615
615 616 for src, name, ftype, data in entries:
616 617 vfs = vfsmap[src]
617 618 yield src
618 619 yield util.uvarintencode(len(name))
619 620 if ftype == _fileappend:
620 621 fp = vfs(name)
621 622 size = data
622 623 elif ftype == _filefull:
623 624 fp = open(data, b'rb')
624 625 size = util.fstat(fp).st_size
625 626 bytecount = 0
626 627 try:
627 628 yield util.uvarintencode(size)
628 629 yield name
629 630 if size <= 65536:
630 631 chunks = (fp.read(size),)
631 632 else:
632 633 chunks = util.filechunkiter(fp, limit=size)
633 634 for chunk in chunks:
634 635 bytecount += len(chunk)
635 636 totalbytecount += len(chunk)
636 637 progress.update(totalbytecount)
637 638 yield chunk
638 639 if bytecount != size:
639 640 # Would most likely be caused by a race due to `hg strip` or
640 641 # a revlog split
641 642 raise error.Abort(
642 643 _(
643 644 b'clone could only read %d bytes from %s, but '
644 645 b'expected %d bytes'
645 646 )
646 647 % (bytecount, name, size)
647 648 )
648 649 finally:
649 650 fp.close()
650 651
651 652
652 653 def _test_sync_point_walk_1(repo):
653 654 """a function for synchronisation during tests"""
654 655
655 656
656 657 def _test_sync_point_walk_2(repo):
657 658 """a function for synchronisation during tests"""
658 659
659 660
660 661 def _v2_walk(repo, includes, excludes, includeobsmarkers):
661 662 """emit a seris of files information useful to clone a repo
662 663
663 664 return (entries, totalfilesize)
664 665
665 666 entries is a list of tuple (vfs-key, file-path, file-type, size)
666 667
667 668 - `vfs-key`: is a key to the right vfs to write the file (see _makemap)
668 669 - `name`: file path of the file to copy (to be feed to the vfss)
669 670 - `file-type`: do this file need to be copied with the source lock ?
670 671 - `size`: the size of the file (or None)
671 672 """
672 673 assert repo._currentlock(repo._lockref) is not None
673 674 entries = []
674 675 totalfilesize = 0
675 676
676 677 matcher = None
677 678 if includes or excludes:
678 679 matcher = narrowspec.match(repo.root, includes, excludes)
679 680
680 681 for entry in _walkstreamfiles(repo, matcher):
681 if entry.file_size:
682 ft = _fileappend
683 if entry.is_volatile:
684 ft = _filefull
685 entries.append(
686 (_srcstore, entry.unencoded_path, ft, entry.file_size)
687 )
688 totalfilesize += entry.file_size
682 for f in entry.files():
683 if f.file_size:
684 ft = _fileappend
685 if f.is_volatile:
686 ft = _filefull
687 entries.append((_srcstore, f.unencoded_path, ft, f.file_size))
688 totalfilesize += f.file_size
689 689 for name in _walkstreamfullstorefiles(repo):
690 690 if repo.svfs.exists(name):
691 691 totalfilesize += repo.svfs.lstat(name).st_size
692 692 entries.append((_srcstore, name, _filefull, None))
693 693 if includeobsmarkers and repo.svfs.exists(b'obsstore'):
694 694 totalfilesize += repo.svfs.lstat(b'obsstore').st_size
695 695 entries.append((_srcstore, b'obsstore', _filefull, None))
696 696 for name in cacheutil.cachetocopy(repo):
697 697 if repo.cachevfs.exists(name):
698 698 totalfilesize += repo.cachevfs.lstat(name).st_size
699 699 entries.append((_srccache, name, _filefull, None))
700 700 return entries, totalfilesize
701 701
702 702
703 703 def generatev2(repo, includes, excludes, includeobsmarkers):
704 704 """Emit content for version 2 of a streaming clone.
705 705
706 706 the data stream consists the following entries:
707 707 1) A char representing the file destination (eg: store or cache)
708 708 2) A varint containing the length of the filename
709 709 3) A varint containing the length of file data
710 710 4) N bytes containing the filename (the internal, store-agnostic form)
711 711 5) N bytes containing the file data
712 712
713 713 Returns a 3-tuple of (file count, file size, data iterator).
714 714 """
715 715
716 716 with repo.lock():
717 717
718 718 repo.ui.debug(b'scanning\n')
719 719
720 720 entries, totalfilesize = _v2_walk(
721 721 repo,
722 722 includes=includes,
723 723 excludes=excludes,
724 724 includeobsmarkers=includeobsmarkers,
725 725 )
726 726
727 727 chunks = _emit2(repo, entries, totalfilesize)
728 728 first = next(chunks)
729 729 assert first is None
730 730 _test_sync_point_walk_1(repo)
731 731 _test_sync_point_walk_2(repo)
732 732
733 733 return len(entries), totalfilesize, chunks
734 734
735 735
736 736 @contextlib.contextmanager
737 737 def nested(*ctxs):
738 738 this = ctxs[0]
739 739 rest = ctxs[1:]
740 740 with this:
741 741 if rest:
742 742 with nested(*rest):
743 743 yield
744 744 else:
745 745 yield
746 746
747 747
748 748 def consumev2(repo, fp, filecount, filesize):
749 749 """Apply the contents from a version 2 streaming clone.
750 750
751 751 Data is read from an object that only needs to provide a ``read(size)``
752 752 method.
753 753 """
754 754 with repo.lock():
755 755 repo.ui.status(
756 756 _(b'%d files to transfer, %s of data\n')
757 757 % (filecount, util.bytecount(filesize))
758 758 )
759 759
760 760 start = util.timer()
761 761 progress = repo.ui.makeprogress(
762 762 _(b'clone'), total=filesize, unit=_(b'bytes')
763 763 )
764 764 progress.update(0)
765 765
766 766 vfsmap = _makemap(repo)
767 767 # we keep repo.vfs out of the on purpose, ther are too many danger
768 768 # there (eg: .hg/hgrc),
769 769 #
770 770 # this assert is duplicated (from _makemap) as author might think this
771 771 # is fine, while this is really not fine.
772 772 if repo.vfs in vfsmap.values():
773 773 raise error.ProgrammingError(
774 774 b'repo.vfs must not be added to vfsmap for security reasons'
775 775 )
776 776
777 777 with repo.transaction(b'clone'):
778 778 ctxs = (vfs.backgroundclosing(repo.ui) for vfs in vfsmap.values())
779 779 with nested(*ctxs):
780 780 for i in range(filecount):
781 781 src = util.readexactly(fp, 1)
782 782 vfs = vfsmap[src]
783 783 namelen = util.uvarintdecodestream(fp)
784 784 datalen = util.uvarintdecodestream(fp)
785 785
786 786 name = util.readexactly(fp, namelen)
787 787
788 788 if repo.ui.debugflag:
789 789 repo.ui.debug(
790 790 b'adding [%s] %s (%s)\n'
791 791 % (src, name, util.bytecount(datalen))
792 792 )
793 793
794 794 with vfs(name, b'w') as ofp:
795 795 for chunk in util.filechunkiter(fp, limit=datalen):
796 796 progress.increment(step=len(chunk))
797 797 ofp.write(chunk)
798 798
799 799 # force @filecache properties to be reloaded from
800 800 # streamclone-ed file at next access
801 801 repo.invalidate(clearfilecache=True)
802 802
803 803 elapsed = util.timer() - start
804 804 if elapsed <= 0:
805 805 elapsed = 0.001
806 806 repo.ui.status(
807 807 _(b'transferred %s in %.1f seconds (%s/sec)\n')
808 808 % (
809 809 util.bytecount(progress.pos),
810 810 elapsed,
811 811 util.bytecount(progress.pos / elapsed),
812 812 )
813 813 )
814 814 progress.complete()
815 815
816 816
817 817 def applybundlev2(repo, fp, filecount, filesize, requirements):
818 818 from . import localrepo
819 819
820 820 missingreqs = [r for r in requirements if r not in repo.supported]
821 821 if missingreqs:
822 822 raise error.Abort(
823 823 _(b'unable to apply stream clone: unsupported format: %s')
824 824 % b', '.join(sorted(missingreqs))
825 825 )
826 826
827 827 consumev2(repo, fp, filecount, filesize)
828 828
829 829 repo.requirements = new_stream_clone_requirements(
830 830 repo.requirements,
831 831 requirements,
832 832 )
833 833 repo.svfs.options = localrepo.resolvestorevfsoptions(
834 834 repo.ui, repo.requirements, repo.features
835 835 )
836 836 scmutil.writereporequirements(repo)
837 837 nodemap.post_stream_cleanup(repo)
838 838
839 839
840 840 def _copy_files(src_vfs_map, dst_vfs_map, entries, progress):
841 841 hardlink = [True]
842 842
843 843 def copy_used():
844 844 hardlink[0] = False
845 845 progress.topic = _(b'copying')
846 846
847 847 for k, path, size in entries:
848 848 src_vfs = src_vfs_map[k]
849 849 dst_vfs = dst_vfs_map[k]
850 850 src_path = src_vfs.join(path)
851 851 dst_path = dst_vfs.join(path)
852 852 # We cannot use dirname and makedirs of dst_vfs here because the store
853 853 # encoding confuses them. See issue 6581 for details.
854 854 dirname = os.path.dirname(dst_path)
855 855 if not os.path.exists(dirname):
856 856 util.makedirs(dirname)
857 857 dst_vfs.register_file(path)
858 858 # XXX we could use the #nb_bytes argument.
859 859 util.copyfile(
860 860 src_path,
861 861 dst_path,
862 862 hardlink=hardlink[0],
863 863 no_hardlink_cb=copy_used,
864 864 check_fs_hardlink=False,
865 865 )
866 866 progress.increment()
867 867 return hardlink[0]
868 868
869 869
870 870 def local_copy(src_repo, dest_repo):
871 871 """copy all content from one local repository to another
872 872
873 873 This is useful for local clone"""
874 874 src_store_requirements = {
875 875 r
876 876 for r in src_repo.requirements
877 877 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
878 878 }
879 879 dest_store_requirements = {
880 880 r
881 881 for r in dest_repo.requirements
882 882 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
883 883 }
884 884 assert src_store_requirements == dest_store_requirements
885 885
886 886 with dest_repo.lock():
887 887 with src_repo.lock():
888 888
889 889 # bookmark is not integrated to the streaming as it might use the
890 890 # `repo.vfs` and they are too many sentitive data accessible
891 891 # through `repo.vfs` to expose it to streaming clone.
892 892 src_book_vfs = bookmarks.bookmarksvfs(src_repo)
893 893 srcbookmarks = src_book_vfs.join(b'bookmarks')
894 894 bm_count = 0
895 895 if os.path.exists(srcbookmarks):
896 896 bm_count = 1
897 897
898 898 entries, totalfilesize = _v2_walk(
899 899 src_repo,
900 900 includes=None,
901 901 excludes=None,
902 902 includeobsmarkers=True,
903 903 )
904 904 src_vfs_map = _makemap(src_repo)
905 905 dest_vfs_map = _makemap(dest_repo)
906 906 progress = src_repo.ui.makeprogress(
907 907 topic=_(b'linking'),
908 908 total=len(entries) + bm_count,
909 909 unit=_(b'files'),
910 910 )
911 911 # copy files
912 912 #
913 913 # We could copy the full file while the source repository is locked
914 914 # and the other one without the lock. However, in the linking case,
915 915 # this would also requires checks that nobody is appending any data
916 916 # to the files while we do the clone, so this is not done yet. We
917 917 # could do this blindly when copying files.
918 918 files = ((k, path, size) for k, path, ftype, size in entries)
919 919 hardlink = _copy_files(src_vfs_map, dest_vfs_map, files, progress)
920 920
921 921 # copy bookmarks over
922 922 if bm_count:
923 923 dst_book_vfs = bookmarks.bookmarksvfs(dest_repo)
924 924 dstbookmarks = dst_book_vfs.join(b'bookmarks')
925 925 util.copyfile(srcbookmarks, dstbookmarks)
926 926 progress.complete()
927 927 if hardlink:
928 928 msg = b'linked %d files\n'
929 929 else:
930 930 msg = b'copied %d files\n'
931 931 src_repo.ui.debug(msg % (len(entries) + bm_count))
932 932
933 933 with dest_repo.transaction(b"localclone") as tr:
934 934 dest_repo.store.write(tr)
935 935
936 936 # clean up transaction file as they do not make sense
937 937 transaction.cleanup_undo_files(dest_repo.ui.warn, dest_repo.vfs_map)
@@ -1,625 +1,627
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import os
10 10
11 11 from .i18n import _
12 12 from .node import short
13 13 from .utils import stringutil
14 14
15 15 from . import (
16 16 error,
17 17 pycompat,
18 18 requirements,
19 19 revlog,
20 20 util,
21 21 )
22 22
23 23 VERIFY_DEFAULT = 0
24 24 VERIFY_FULL = 1
25 25
26 26
27 27 def verify(repo, level=None):
28 28 with repo.lock():
29 29 v = verifier(repo, level)
30 30 return v.verify()
31 31
32 32
33 33 def _normpath(f):
34 34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 35 # converted repo may contain repeated slashes
36 36 while b'//' in f:
37 37 f = f.replace(b'//', b'/')
38 38 return f
39 39
40 40
41 41 HINT_FNCACHE = _(
42 42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
43 43 )
44 44
45 45 WARN_PARENT_DIR_UNKNOWN_REV = _(
46 46 b"parent-directory manifest refers to unknown revision %s"
47 47 )
48 48
49 49 WARN_UNKNOWN_COPY_SOURCE = _(
50 50 b"warning: copy source of '%s' not in parents of %s"
51 51 )
52 52
53 53 WARN_NULLID_COPY_SOURCE = _(
54 54 b"warning: %s@%s: copy source revision is nullid %s:%s\n"
55 55 )
56 56
57 57
58 58 class verifier:
59 59 def __init__(self, repo, level=None):
60 60 self.repo = repo.unfiltered()
61 61 self.ui = repo.ui
62 62 self.match = repo.narrowmatch()
63 63 if level is None:
64 64 level = VERIFY_DEFAULT
65 65 self._level = level
66 66 self.badrevs = set()
67 67 self.errors = 0
68 68 self.warnings = 0
69 69 self.havecl = len(repo.changelog) > 0
70 70 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
71 71 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
72 72 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
73 73 self.refersmf = False
74 74 self.fncachewarned = False
75 75 # developer config: verify.skipflags
76 76 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
77 77 self.warnorphanstorefiles = True
78 78
79 79 def _warn(self, msg):
80 80 """record a "warning" level issue"""
81 81 self.ui.warn(msg + b"\n")
82 82 self.warnings += 1
83 83
84 84 def _err(self, linkrev, msg, filename=None):
85 85 """record a "error" level issue"""
86 86 if linkrev is not None:
87 87 self.badrevs.add(linkrev)
88 88 linkrev = b"%d" % linkrev
89 89 else:
90 90 linkrev = b'?'
91 91 msg = b"%s: %s" % (linkrev, msg)
92 92 if filename:
93 93 msg = b"%s@%s" % (filename, msg)
94 94 self.ui.warn(b" " + msg + b"\n")
95 95 self.errors += 1
96 96
97 97 def _exc(self, linkrev, msg, inst, filename=None):
98 98 """record exception raised during the verify process"""
99 99 fmsg = stringutil.forcebytestr(inst)
100 100 if not fmsg:
101 101 fmsg = pycompat.byterepr(inst)
102 102 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
103 103
104 104 def _checkrevlog(self, obj, name, linkrev):
105 105 """verify high level property of a revlog
106 106
107 107 - revlog is present,
108 108 - revlog is non-empty,
109 109 - sizes (index and data) are correct,
110 110 - revlog's format version is correct.
111 111 """
112 112 if not len(obj) and (self.havecl or self.havemf):
113 113 self._err(linkrev, _(b"empty or missing %s") % name)
114 114 return
115 115
116 116 d = obj.checksize()
117 117 if d[0]:
118 118 self._err(None, _(b"data length off by %d bytes") % d[0], name)
119 119 if d[1]:
120 120 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
121 121
122 122 if obj._format_version != revlog.REVLOGV0:
123 123 if not self.revlogv1:
124 124 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
125 125 elif self.revlogv1:
126 126 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
127 127
128 128 def _checkentry(self, obj, i, node, seen, linkrevs, f):
129 129 """verify a single revlog entry
130 130
131 131 arguments are:
132 132 - obj: the source revlog
133 133 - i: the revision number
134 134 - node: the revision node id
135 135 - seen: nodes previously seen for this revlog
136 136 - linkrevs: [changelog-revisions] introducing "node"
137 137 - f: string label ("changelog", "manifest", or filename)
138 138
139 139 Performs the following checks:
140 140 - linkrev points to an existing changelog revision,
141 141 - linkrev points to a changelog revision that introduces this revision,
142 142 - linkrev points to the lowest of these changesets,
143 143 - both parents exist in the revlog,
144 144 - the revision is not duplicated.
145 145
146 146 Return the linkrev of the revision (or None for changelog's revisions).
147 147 """
148 148 lr = obj.linkrev(obj.rev(node))
149 149 if lr < 0 or (self.havecl and lr not in linkrevs):
150 150 if lr < 0 or lr >= len(self.repo.changelog):
151 151 msg = _(b"rev %d points to nonexistent changeset %d")
152 152 else:
153 153 msg = _(b"rev %d points to unexpected changeset %d")
154 154 self._err(None, msg % (i, lr), f)
155 155 if linkrevs:
156 156 if f and len(linkrevs) > 1:
157 157 try:
158 158 # attempt to filter down to real linkrevs
159 159 linkrevs = []
160 160 for lr in linkrevs:
161 161 if self.lrugetctx(lr)[f].filenode() == node:
162 162 linkrevs.append(lr)
163 163 except Exception:
164 164 pass
165 165 msg = _(b" (expected %s)")
166 166 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
167 167 self._warn(msg)
168 168 lr = None # can't be trusted
169 169
170 170 try:
171 171 p1, p2 = obj.parents(node)
172 172 if p1 not in seen and p1 != self.repo.nullid:
173 173 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
174 174 self._err(lr, msg, f)
175 175 if p2 not in seen and p2 != self.repo.nullid:
176 176 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
177 177 self._err(lr, msg, f)
178 178 except Exception as inst:
179 179 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
180 180
181 181 if node in seen:
182 182 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
183 183 seen[node] = i
184 184 return lr
185 185
186 186 def verify(self):
187 187 """verify the content of the Mercurial repository
188 188
189 189 This method run all verifications, displaying issues as they are found.
190 190
191 191 return 1 if any error have been encountered, 0 otherwise."""
192 192 # initial validation and generic report
193 193 repo = self.repo
194 194 ui = repo.ui
195 195 if not repo.url().startswith(b'file:'):
196 196 raise error.Abort(_(b"cannot verify bundle or remote repos"))
197 197
198 198 if os.path.exists(repo.sjoin(b"journal")):
199 199 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
200 200
201 201 if ui.verbose or not self.revlogv1:
202 202 ui.status(
203 203 _(b"repository uses revlog format %d\n")
204 204 % (self.revlogv1 and 1 or 0)
205 205 )
206 206
207 207 # data verification
208 208 mflinkrevs, filelinkrevs = self._verifychangelog()
209 209 filenodes = self._verifymanifest(mflinkrevs)
210 210 del mflinkrevs
211 211 self._crosscheckfiles(filelinkrevs, filenodes)
212 212 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
213 213
214 214 if self.errors:
215 215 ui.warn(_(b"not checking dirstate because of previous errors\n"))
216 216 dirstate_errors = 0
217 217 else:
218 218 dirstate_errors = self._verify_dirstate()
219 219
220 220 # final report
221 221 ui.status(
222 222 _(b"checked %d changesets with %d changes to %d files\n")
223 223 % (len(repo.changelog), filerevisions, totalfiles)
224 224 )
225 225 if self.warnings:
226 226 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
227 227 if self.fncachewarned:
228 228 ui.warn(HINT_FNCACHE)
229 229 if self.errors:
230 230 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
231 231 if self.badrevs:
232 232 msg = _(b"(first damaged changeset appears to be %d)\n")
233 233 msg %= min(self.badrevs)
234 234 ui.warn(msg)
235 235 if dirstate_errors:
236 236 ui.warn(
237 237 _(b"dirstate inconsistent with current parent's manifest\n")
238 238 )
239 239 ui.warn(_(b"%d dirstate errors\n") % dirstate_errors)
240 240 return 1
241 241 return 0
242 242
243 243 def _verifychangelog(self):
244 244 """verify the changelog of a repository
245 245
246 246 The following checks are performed:
247 247 - all of `_checkrevlog` checks,
248 248 - all of `_checkentry` checks (for each revisions),
249 249 - each revision can be read.
250 250
251 251 The function returns some of the data observed in the changesets as a
252 252 (mflinkrevs, filelinkrevs) tuples:
253 253 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
254 254 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
255 255
256 256 If a matcher was specified, filelinkrevs will only contains matched
257 257 files.
258 258 """
259 259 ui = self.ui
260 260 repo = self.repo
261 261 match = self.match
262 262 cl = repo.changelog
263 263
264 264 ui.status(_(b"checking changesets\n"))
265 265 mflinkrevs = {}
266 266 filelinkrevs = {}
267 267 seen = {}
268 268 self._checkrevlog(cl, b"changelog", 0)
269 269 progress = ui.makeprogress(
270 270 _(b'checking'), unit=_(b'changesets'), total=len(repo)
271 271 )
272 272 for i in repo:
273 273 progress.update(i)
274 274 n = cl.node(i)
275 275 self._checkentry(cl, i, n, seen, [i], b"changelog")
276 276
277 277 try:
278 278 changes = cl.read(n)
279 279 if changes[0] != self.repo.nullid:
280 280 mflinkrevs.setdefault(changes[0], []).append(i)
281 281 self.refersmf = True
282 282 for f in changes[3]:
283 283 if match(f):
284 284 filelinkrevs.setdefault(_normpath(f), []).append(i)
285 285 except Exception as inst:
286 286 self.refersmf = True
287 287 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
288 288 progress.complete()
289 289 return mflinkrevs, filelinkrevs
290 290
291 291 def _verifymanifest(
292 292 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
293 293 ):
294 294 """verify the manifestlog content
295 295
296 296 Inputs:
297 297 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
298 298 - dir: a subdirectory to check (for tree manifest repo)
299 299 - storefiles: set of currently "orphan" files.
300 300 - subdirprogress: a progress object
301 301
302 302 This function checks:
303 303 * all of `_checkrevlog` checks (for all manifest related revlogs)
304 304 * all of `_checkentry` checks (for all manifest related revisions)
305 305 * nodes for subdirectory exists in the sub-directory manifest
306 306 * each manifest entries have a file path
307 307 * each manifest node refered in mflinkrevs exist in the manifest log
308 308
309 309 If tree manifest is in use and a matchers is specified, only the
310 310 sub-directories matching it will be verified.
311 311
312 312 return a two level mapping:
313 313 {"path" -> { filenode -> changelog-revision}}
314 314
315 315 This mapping primarily contains entries for every files in the
316 316 repository. In addition, when tree-manifest is used, it also contains
317 317 sub-directory entries.
318 318
319 319 If a matcher is provided, only matching paths will be included.
320 320 """
321 321 repo = self.repo
322 322 ui = self.ui
323 323 match = self.match
324 324 mfl = self.repo.manifestlog
325 325 mf = mfl.getstorage(dir)
326 326
327 327 if not dir:
328 328 self.ui.status(_(b"checking manifests\n"))
329 329
330 330 filenodes = {}
331 331 subdirnodes = {}
332 332 seen = {}
333 333 label = b"manifest"
334 334 if dir:
335 335 label = dir
336 336 revlogfiles = mf.files()
337 337 storefiles.difference_update(revlogfiles)
338 338 if subdirprogress: # should be true since we're in a subdirectory
339 339 subdirprogress.increment()
340 340 if self.refersmf:
341 341 # Do not check manifest if there are only changelog entries with
342 342 # null manifests.
343 343 self._checkrevlog(mf._revlog, label, 0)
344 344 progress = ui.makeprogress(
345 345 _(b'checking'), unit=_(b'manifests'), total=len(mf)
346 346 )
347 347 for i in mf:
348 348 if not dir:
349 349 progress.update(i)
350 350 n = mf.node(i)
351 351 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
352 352 if n in mflinkrevs:
353 353 del mflinkrevs[n]
354 354 elif dir:
355 355 msg = _(b"%s not in parent-directory manifest") % short(n)
356 356 self._err(lr, msg, label)
357 357 else:
358 358 self._err(lr, _(b"%s not in changesets") % short(n), label)
359 359
360 360 try:
361 361 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
362 362 for f, fn, fl in mfdelta.iterentries():
363 363 if not f:
364 364 self._err(lr, _(b"entry without name in manifest"))
365 365 elif f == b"/dev/null": # ignore this in very old repos
366 366 continue
367 367 fullpath = dir + _normpath(f)
368 368 if fl == b't':
369 369 if not match.visitdir(fullpath):
370 370 continue
371 371 sdn = subdirnodes.setdefault(fullpath + b'/', {})
372 372 sdn.setdefault(fn, []).append(lr)
373 373 else:
374 374 if not match(fullpath):
375 375 continue
376 376 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
377 377 except Exception as inst:
378 378 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
379 379 if self._level >= VERIFY_FULL:
380 380 try:
381 381 # Various issues can affect manifest. So we read each full
382 382 # text from storage. This triggers the checks from the core
383 383 # code (eg: hash verification, filename are ordered, etc.)
384 384 mfdelta = mfl.get(dir, n).read()
385 385 except Exception as inst:
386 386 msg = _(b"reading full manifest %s") % short(n)
387 387 self._exc(lr, msg, inst, label)
388 388
389 389 if not dir:
390 390 progress.complete()
391 391
392 392 if self.havemf:
393 393 # since we delete entry in `mflinkrevs` during iteration, any
394 394 # remaining entries are "missing". We need to issue errors for them.
395 395 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
396 396 for c, m in sorted(changesetpairs):
397 397 if dir:
398 398 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
399 399 else:
400 400 msg = _(b"changeset refers to unknown revision %s")
401 401 msg %= short(m)
402 402 self._err(c, msg, label)
403 403
404 404 if not dir and subdirnodes:
405 405 self.ui.status(_(b"checking directory manifests\n"))
406 406 storefiles = set()
407 407 subdirs = set()
408 408 revlogv1 = self.revlogv1
409 409 undecodable = []
410 410 for entry in repo.store.datafiles(undecodable=undecodable):
411 f = entry.unencoded_path
412 size = entry.file_size
413 if (size > 0 or not revlogv1) and f.startswith(b'meta/'):
414 storefiles.add(_normpath(f))
415 subdirs.add(os.path.dirname(f))
411 for file_ in entry.files():
412 f = file_.unencoded_path
413 size = file_.file_size
414 if (size > 0 or not revlogv1) and f.startswith(b'meta/'):
415 storefiles.add(_normpath(f))
416 subdirs.add(os.path.dirname(f))
416 417 for f in undecodable:
417 418 self._err(None, _(b"cannot decode filename '%s'") % f)
418 419 subdirprogress = ui.makeprogress(
419 420 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
420 421 )
421 422
422 423 for subdir, linkrevs in subdirnodes.items():
423 424 subdirfilenodes = self._verifymanifest(
424 425 linkrevs, subdir, storefiles, subdirprogress
425 426 )
426 427 for f, onefilenodes in subdirfilenodes.items():
427 428 filenodes.setdefault(f, {}).update(onefilenodes)
428 429
429 430 if not dir and subdirnodes:
430 431 assert subdirprogress is not None # help pytype
431 432 subdirprogress.complete()
432 433 if self.warnorphanstorefiles:
433 434 for f in sorted(storefiles):
434 435 self._warn(_(b"warning: orphan data file '%s'") % f)
435 436
436 437 return filenodes
437 438
438 439 def _crosscheckfiles(self, filelinkrevs, filenodes):
439 440 repo = self.repo
440 441 ui = self.ui
441 442 ui.status(_(b"crosschecking files in changesets and manifests\n"))
442 443
443 444 total = len(filelinkrevs) + len(filenodes)
444 445 progress = ui.makeprogress(
445 446 _(b'crosschecking'), unit=_(b'files'), total=total
446 447 )
447 448 if self.havemf:
448 449 for f in sorted(filelinkrevs):
449 450 progress.increment()
450 451 if f not in filenodes:
451 452 lr = filelinkrevs[f][0]
452 453 self._err(lr, _(b"in changeset but not in manifest"), f)
453 454
454 455 if self.havecl:
455 456 for f in sorted(filenodes):
456 457 progress.increment()
457 458 if f not in filelinkrevs:
458 459 try:
459 460 fl = repo.file(f)
460 461 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
461 462 except Exception:
462 463 lr = None
463 464 self._err(lr, _(b"in manifest but not in changeset"), f)
464 465
465 466 progress.complete()
466 467
467 468 def _verifyfiles(self, filenodes, filelinkrevs):
468 469 repo = self.repo
469 470 ui = self.ui
470 471 lrugetctx = self.lrugetctx
471 472 revlogv1 = self.revlogv1
472 473 havemf = self.havemf
473 474 ui.status(_(b"checking files\n"))
474 475
475 476 storefiles = set()
476 477 undecodable = []
477 478 for entry in repo.store.datafiles(undecodable=undecodable):
478 size = entry.file_size
479 f = entry.unencoded_path
480 if (size > 0 or not revlogv1) and f.startswith(b'data/'):
481 storefiles.add(_normpath(f))
479 for file_ in entry.files():
480 size = file_.file_size
481 f = file_.unencoded_path
482 if (size > 0 or not revlogv1) and f.startswith(b'data/'):
483 storefiles.add(_normpath(f))
482 484 for f in undecodable:
483 485 self._err(None, _(b"cannot decode filename '%s'") % f)
484 486
485 487 state = {
486 488 # TODO this assumes revlog storage for changelog.
487 489 b'expectedversion': self.repo.changelog._format_version,
488 490 b'skipflags': self.skipflags,
489 491 # experimental config: censor.policy
490 492 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
491 493 }
492 494
493 495 files = sorted(set(filenodes) | set(filelinkrevs))
494 496 revisions = 0
495 497 progress = ui.makeprogress(
496 498 _(b'checking'), unit=_(b'files'), total=len(files)
497 499 )
498 500 for i, f in enumerate(files):
499 501 progress.update(i, item=f)
500 502 try:
501 503 linkrevs = filelinkrevs[f]
502 504 except KeyError:
503 505 # in manifest but not in changelog
504 506 linkrevs = []
505 507
506 508 if linkrevs:
507 509 lr = linkrevs[0]
508 510 else:
509 511 lr = None
510 512
511 513 try:
512 514 fl = repo.file(f)
513 515 except error.StorageError as e:
514 516 self._err(lr, _(b"broken revlog! (%s)") % e, f)
515 517 continue
516 518
517 519 for ff in fl.files():
518 520 try:
519 521 storefiles.remove(ff)
520 522 except KeyError:
521 523 if self.warnorphanstorefiles:
522 524 msg = _(b" warning: revlog '%s' not in fncache!")
523 525 self._warn(msg % ff)
524 526 self.fncachewarned = True
525 527
526 528 if not len(fl) and (self.havecl or self.havemf):
527 529 self._err(lr, _(b"empty or missing %s") % f)
528 530 else:
529 531 # Guard against implementations not setting this.
530 532 state[b'skipread'] = set()
531 533 state[b'safe_renamed'] = set()
532 534
533 535 for problem in fl.verifyintegrity(state):
534 536 if problem.node is not None:
535 537 linkrev = fl.linkrev(fl.rev(problem.node))
536 538 else:
537 539 linkrev = None
538 540
539 541 if problem.warning:
540 542 self._warn(problem.warning)
541 543 elif problem.error:
542 544 linkrev_msg = linkrev if linkrev is not None else lr
543 545 self._err(linkrev_msg, problem.error, f)
544 546 else:
545 547 raise error.ProgrammingError(
546 548 b'problem instance does not set warning or error '
547 549 b'attribute: %s' % problem.msg
548 550 )
549 551
550 552 seen = {}
551 553 for i in fl:
552 554 revisions += 1
553 555 n = fl.node(i)
554 556 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
555 557 if f in filenodes:
556 558 if havemf and n not in filenodes[f]:
557 559 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
558 560 else:
559 561 del filenodes[f][n]
560 562
561 563 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
562 564 continue
563 565
564 566 # check renames
565 567 try:
566 568 # This requires resolving fulltext (at least on revlogs,
567 569 # though not with LFS revisions). We may want
568 570 # ``verifyintegrity()`` to pass a set of nodes with
569 571 # rename metadata as an optimization.
570 572 rp = fl.renamed(n)
571 573 if rp:
572 574 if lr is not None and ui.verbose:
573 575 ctx = lrugetctx(lr)
574 576 if not any(rp[0] in pctx for pctx in ctx.parents()):
575 577 self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx))
576 578 fl2 = repo.file(rp[0])
577 579 if not len(fl2):
578 580 m = _(b"empty or missing copy source revlog %s:%s")
579 581 self._err(lr, m % (rp[0], short(rp[1])), f)
580 582 elif rp[1] == self.repo.nullid:
581 583 msg = WARN_NULLID_COPY_SOURCE
582 584 msg %= (f, lr, rp[0], short(rp[1]))
583 585 ui.note(msg)
584 586 else:
585 587 fl2.rev(rp[1])
586 588 except Exception as inst:
587 589 self._exc(
588 590 lr, _(b"checking rename of %s") % short(n), inst, f
589 591 )
590 592
591 593 # cross-check
592 594 if f in filenodes:
593 595 fns = [(v, k) for k, v in filenodes[f].items()]
594 596 for lr, node in sorted(fns):
595 597 msg = _(b"manifest refers to unknown revision %s")
596 598 self._err(lr, msg % short(node), f)
597 599 progress.complete()
598 600
599 601 if self.warnorphanstorefiles:
600 602 for f in sorted(storefiles):
601 603 self._warn(_(b"warning: orphan data file '%s'") % f)
602 604
603 605 return len(files), revisions
604 606
605 607 def _verify_dirstate(self):
606 608 """Check that the dirstate is consistent with the parent's manifest"""
607 609 repo = self.repo
608 610 ui = self.ui
609 611 ui.status(_(b"checking dirstate\n"))
610 612
611 613 parent1, parent2 = repo.dirstate.parents()
612 614 m1 = repo[parent1].manifest()
613 615 m2 = repo[parent2].manifest()
614 616 dirstate_errors = 0
615 617
616 618 is_narrow = requirements.NARROW_REQUIREMENT in repo.requirements
617 619 narrow_matcher = repo.narrowmatch() if is_narrow else None
618 620
619 621 for err in repo.dirstate.verify(m1, m2, parent1, narrow_matcher):
620 622 ui.error(err)
621 623 dirstate_errors += 1
622 624
623 625 if dirstate_errors:
624 626 self.errors += dirstate_errors
625 627 return dirstate_errors
General Comments 0
You need to be logged in to leave comments. Login now