##// END OF EJS Templates
largefiles: track if a matcher was tampered with...
Arseniy Alekseyev -
r52494:ea334310 default
parent child Browse files
Show More
@@ -1,1924 +1,1932 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''Overridden Mercurial commands and functions for the largefiles extension'''
10 10
11 11 import contextlib
12 12 import copy
13 13 import os
14 14
15 15 from mercurial.i18n import _
16 16
17 17 from mercurial.pycompat import open
18 18
19 19 from mercurial.hgweb import webcommands
20 20
21 21 from mercurial import (
22 22 archival,
23 23 cmdutil,
24 24 copies as copiesmod,
25 25 dirstate,
26 26 error,
27 27 exchange,
28 28 extensions,
29 29 exthelper,
30 30 filemerge,
31 31 hg,
32 32 logcmdutil,
33 33 match as matchmod,
34 34 merge,
35 35 mergestate as mergestatemod,
36 36 pathutil,
37 37 pycompat,
38 38 scmutil,
39 39 smartset,
40 40 subrepo,
41 41 url as urlmod,
42 42 util,
43 43 )
44 44
45 45 from mercurial.upgrade_utils import (
46 46 actions as upgrade_actions,
47 47 )
48 48
49 49 from . import (
50 50 lfcommands,
51 51 lfutil,
52 52 storefactory,
53 53 )
54 54
55 55 ACTION_ADD = mergestatemod.ACTION_ADD
56 56 ACTION_DELETED_CHANGED = mergestatemod.ACTION_DELETED_CHANGED
57 57 ACTION_GET = mergestatemod.ACTION_GET
58 58 ACTION_KEEP = mergestatemod.ACTION_KEEP
59 59 ACTION_REMOVE = mergestatemod.ACTION_REMOVE
60 60
61 61 eh = exthelper.exthelper()
62 62
63 63 lfstatus = lfutil.lfstatus
64 64
65 65 MERGE_ACTION_LARGEFILE_MARK_REMOVED = mergestatemod.MergeAction('lfmr')
66 66
67 67 # -- Utility functions: commonly/repeatedly needed functionality ---------------
68 68
69 69
70 70 def composelargefilematcher(match, manifest):
71 71 """create a matcher that matches only the largefiles in the original
72 72 matcher"""
73 73 m = copy.copy(match)
74 m._was_tampered_with = True
74 75 lfile = lambda f: lfutil.standin(f) in manifest
75 76 m._files = [lf for lf in m._files if lfile(lf)]
76 77 m._fileset = set(m._files)
77 78 m.always = lambda: False
78 79 origmatchfn = m.matchfn
79 80 m.matchfn = lambda f: lfile(f) and origmatchfn(f)
80 81 return m
81 82
82 83
83 84 def composenormalfilematcher(match, manifest, exclude=None):
84 85 excluded = set()
85 86 if exclude is not None:
86 87 excluded.update(exclude)
87 88
88 89 m = copy.copy(match)
90 m._was_tampered_with = True
89 91 notlfile = lambda f: not (
90 92 lfutil.isstandin(f) or lfutil.standin(f) in manifest or f in excluded
91 93 )
92 94 m._files = [lf for lf in m._files if notlfile(lf)]
93 95 m._fileset = set(m._files)
94 96 m.always = lambda: False
95 97 origmatchfn = m.matchfn
96 98 m.matchfn = lambda f: notlfile(f) and origmatchfn(f)
97 99 return m
98 100
99 101
100 102 def addlargefiles(ui, repo, isaddremove, matcher, uipathfn, **opts):
101 103 large = opts.get('large')
102 104 lfsize = lfutil.getminsize(
103 105 ui, lfutil.islfilesrepo(repo), opts.get('lfsize')
104 106 )
105 107
106 108 lfmatcher = None
107 109 if lfutil.islfilesrepo(repo):
108 110 lfpats = ui.configlist(lfutil.longname, b'patterns')
109 111 if lfpats:
110 112 lfmatcher = matchmod.match(repo.root, b'', list(lfpats))
111 113
112 114 lfnames = []
113 115 m = matcher
114 116
115 117 wctx = repo[None]
116 118 for f in wctx.walk(matchmod.badmatch(m, lambda x, y: None)):
117 119 exact = m.exact(f)
118 120 lfile = lfutil.standin(f) in wctx
119 121 nfile = f in wctx
120 122 exists = lfile or nfile
121 123
122 124 # Don't warn the user when they attempt to add a normal tracked file.
123 125 # The normal add code will do that for us.
124 126 if exact and exists:
125 127 if lfile:
126 128 ui.warn(_(b'%s already a largefile\n') % uipathfn(f))
127 129 continue
128 130
129 131 if (exact or not exists) and not lfutil.isstandin(f):
130 132 # In case the file was removed previously, but not committed
131 133 # (issue3507)
132 134 if not repo.wvfs.exists(f):
133 135 continue
134 136
135 137 abovemin = (
136 138 lfsize and repo.wvfs.lstat(f).st_size >= lfsize * 1024 * 1024
137 139 )
138 140 if large or abovemin or (lfmatcher and lfmatcher(f)):
139 141 lfnames.append(f)
140 142 if ui.verbose or not exact:
141 143 ui.status(_(b'adding %s as a largefile\n') % uipathfn(f))
142 144
143 145 bad = []
144 146
145 147 # Need to lock, otherwise there could be a race condition between
146 148 # when standins are created and added to the repo.
147 149 with repo.wlock():
148 150 if not opts.get('dry_run'):
149 151 standins = []
150 152 lfdirstate = lfutil.openlfdirstate(ui, repo)
151 153 for f in lfnames:
152 154 standinname = lfutil.standin(f)
153 155 lfutil.writestandin(
154 156 repo,
155 157 standinname,
156 158 hash=b'',
157 159 executable=lfutil.getexecutable(repo.wjoin(f)),
158 160 )
159 161 standins.append(standinname)
160 162 lfdirstate.set_tracked(f)
161 163 lfdirstate.write(repo.currenttransaction())
162 164 bad += [
163 165 lfutil.splitstandin(f)
164 166 for f in repo[None].add(standins)
165 167 if f in m.files()
166 168 ]
167 169
168 170 added = [f for f in lfnames if f not in bad]
169 171 return added, bad
170 172
171 173
172 174 def removelargefiles(ui, repo, isaddremove, matcher, uipathfn, dryrun, **opts):
173 175 after = opts.get('after')
174 176 m = composelargefilematcher(matcher, repo[None].manifest())
175 177 with lfstatus(repo):
176 178 s = repo.status(match=m, clean=not isaddremove)
177 179 manifest = repo[None].manifest()
178 180 modified, added, deleted, clean = [
179 181 [f for f in list if lfutil.standin(f) in manifest]
180 182 for list in (s.modified, s.added, s.deleted, s.clean)
181 183 ]
182 184
183 185 def warn(files, msg):
184 186 for f in files:
185 187 ui.warn(msg % uipathfn(f))
186 188 return int(len(files) > 0)
187 189
188 190 if after:
189 191 remove = deleted
190 192 result = warn(
191 193 modified + added + clean, _(b'not removing %s: file still exists\n')
192 194 )
193 195 else:
194 196 remove = deleted + clean
195 197 result = warn(
196 198 modified,
197 199 _(
198 200 b'not removing %s: file is modified (use -f'
199 201 b' to force removal)\n'
200 202 ),
201 203 )
202 204 result = (
203 205 warn(
204 206 added,
205 207 _(
206 208 b'not removing %s: file has been marked for add'
207 209 b' (use forget to undo)\n'
208 210 ),
209 211 )
210 212 or result
211 213 )
212 214
213 215 # Need to lock because standin files are deleted then removed from the
214 216 # repository and we could race in-between.
215 217 with repo.wlock():
216 218 lfdirstate = lfutil.openlfdirstate(ui, repo)
217 219 for f in sorted(remove):
218 220 if ui.verbose or not m.exact(f):
219 221 ui.status(_(b'removing %s\n') % uipathfn(f))
220 222
221 223 if not dryrun:
222 224 if not after:
223 225 repo.wvfs.unlinkpath(f, ignoremissing=True)
224 226
225 227 if dryrun:
226 228 return result
227 229
228 230 remove = [lfutil.standin(f) for f in remove]
229 231 # If this is being called by addremove, let the original addremove
230 232 # function handle this.
231 233 if not isaddremove:
232 234 for f in remove:
233 235 repo.wvfs.unlinkpath(f, ignoremissing=True)
234 236 repo[None].forget(remove)
235 237
236 238 for f in remove:
237 239 lfdirstate.set_untracked(lfutil.splitstandin(f))
238 240
239 241 lfdirstate.write(repo.currenttransaction())
240 242
241 243 return result
242 244
243 245
244 246 # For overriding mercurial.hgweb.webcommands so that largefiles will
245 247 # appear at their right place in the manifests.
246 248 @eh.wrapfunction(webcommands, 'decodepath')
247 249 def decodepath(orig, path):
248 250 return lfutil.splitstandin(path) or path
249 251
250 252
251 253 # -- Wrappers: modify existing commands --------------------------------
252 254
253 255
254 256 @eh.wrapcommand(
255 257 b'add',
256 258 opts=[
257 259 (b'', b'large', None, _(b'add as largefile')),
258 260 (b'', b'normal', None, _(b'add as normal file')),
259 261 (
260 262 b'',
261 263 b'lfsize',
262 264 b'',
263 265 _(
264 266 b'add all files above this size (in megabytes) '
265 267 b'as largefiles (default: 10)'
266 268 ),
267 269 ),
268 270 ],
269 271 )
270 272 def overrideadd(orig, ui, repo, *pats, **opts):
271 273 if opts.get('normal') and opts.get('large'):
272 274 raise error.Abort(_(b'--normal cannot be used with --large'))
273 275 return orig(ui, repo, *pats, **opts)
274 276
275 277
276 278 @eh.wrapfunction(cmdutil, 'add')
277 279 def cmdutiladd(orig, ui, repo, matcher, prefix, uipathfn, explicitonly, **opts):
278 280 # The --normal flag short circuits this override
279 281 if opts.get('normal'):
280 282 return orig(ui, repo, matcher, prefix, uipathfn, explicitonly, **opts)
281 283
282 284 ladded, lbad = addlargefiles(ui, repo, False, matcher, uipathfn, **opts)
283 285 normalmatcher = composenormalfilematcher(
284 286 matcher, repo[None].manifest(), ladded
285 287 )
286 288 bad = orig(ui, repo, normalmatcher, prefix, uipathfn, explicitonly, **opts)
287 289
288 290 bad.extend(f for f in lbad)
289 291 return bad
290 292
291 293
292 294 @eh.wrapfunction(cmdutil, 'remove')
293 295 def cmdutilremove(
294 296 orig, ui, repo, matcher, prefix, uipathfn, after, force, subrepos, dryrun
295 297 ):
296 298 normalmatcher = composenormalfilematcher(matcher, repo[None].manifest())
297 299 result = orig(
298 300 ui,
299 301 repo,
300 302 normalmatcher,
301 303 prefix,
302 304 uipathfn,
303 305 after,
304 306 force,
305 307 subrepos,
306 308 dryrun,
307 309 )
308 310 return (
309 311 removelargefiles(
310 312 ui, repo, False, matcher, uipathfn, dryrun, after=after, force=force
311 313 )
312 314 or result
313 315 )
314 316
315 317
316 318 @eh.wrapfunction(dirstate.dirstate, '_changing')
317 319 @contextlib.contextmanager
318 320 def _changing(orig, self, repo, change_type):
319 321 pre = sub_dirstate = getattr(self, '_sub_dirstate', None)
320 322 try:
321 323 lfd = getattr(self, '_large_file_dirstate', False)
322 324 if sub_dirstate is None and not lfd:
323 325 sub_dirstate = lfutil.openlfdirstate(repo.ui, repo)
324 326 self._sub_dirstate = sub_dirstate
325 327 if not lfd:
326 328 assert self._sub_dirstate is not None
327 329 with orig(self, repo, change_type):
328 330 if sub_dirstate is None:
329 331 yield
330 332 else:
331 333 with sub_dirstate._changing(repo, change_type):
332 334 yield
333 335 finally:
334 336 self._sub_dirstate = pre
335 337
336 338
337 339 @eh.wrapfunction(dirstate.dirstate, 'running_status')
338 340 @contextlib.contextmanager
339 341 def running_status(orig, self, repo):
340 342 pre = sub_dirstate = getattr(self, '_sub_dirstate', None)
341 343 try:
342 344 lfd = getattr(self, '_large_file_dirstate', False)
343 345 if sub_dirstate is None and not lfd:
344 346 sub_dirstate = lfutil.openlfdirstate(repo.ui, repo)
345 347 self._sub_dirstate = sub_dirstate
346 348 if not lfd:
347 349 assert self._sub_dirstate is not None
348 350 with orig(self, repo):
349 351 if sub_dirstate is None:
350 352 yield
351 353 else:
352 354 with sub_dirstate.running_status(repo):
353 355 yield
354 356 finally:
355 357 self._sub_dirstate = pre
356 358
357 359
358 360 @eh.wrapfunction(subrepo.hgsubrepo, 'status')
359 361 def overridestatusfn(orig, repo, rev2, **opts):
360 362 with lfstatus(repo._repo):
361 363 return orig(repo, rev2, **opts)
362 364
363 365
364 366 @eh.wrapcommand(b'status')
365 367 def overridestatus(orig, ui, repo, *pats, **opts):
366 368 with lfstatus(repo):
367 369 return orig(ui, repo, *pats, **opts)
368 370
369 371
370 372 @eh.wrapfunction(subrepo.hgsubrepo, 'dirty')
371 373 def overridedirty(orig, repo, ignoreupdate=False, missing=False):
372 374 with lfstatus(repo._repo):
373 375 return orig(repo, ignoreupdate=ignoreupdate, missing=missing)
374 376
375 377
376 378 @eh.wrapcommand(b'log')
377 379 def overridelog(orig, ui, repo, *pats, **opts):
378 380 def overridematchandpats(
379 381 orig,
380 382 ctx,
381 383 pats=(),
382 384 opts=None,
383 385 globbed=False,
384 386 default=b'relpath',
385 387 badfn=None,
386 388 ):
387 389 """Matcher that merges root directory with .hglf, suitable for log.
388 390 It is still possible to match .hglf directly.
389 391 For any listed files run log on the standin too.
390 392 matchfn tries both the given filename and with .hglf stripped.
391 393 """
392 394 if opts is None:
393 395 opts = {}
394 396 matchandpats = orig(ctx, pats, opts, globbed, default, badfn=badfn)
395 397 m, p = copy.copy(matchandpats)
396 398
397 399 if m.always():
398 400 # We want to match everything anyway, so there's no benefit trying
399 401 # to add standins.
400 402 return matchandpats
401 403
402 404 pats = set(p)
403 405
404 406 def fixpats(pat, tostandin=lfutil.standin):
405 407 if pat.startswith(b'set:'):
406 408 return pat
407 409
408 410 kindpat = matchmod._patsplit(pat, None)
409 411
410 412 if kindpat[0] is not None:
411 413 return kindpat[0] + b':' + tostandin(kindpat[1])
412 414 return tostandin(kindpat[1])
413 415
414 416 cwd = repo.getcwd()
415 417 if cwd:
416 418 hglf = lfutil.shortname
417 419 back = util.pconvert(repo.pathto(hglf)[: -len(hglf)])
418 420
419 421 def tostandin(f):
420 422 # The file may already be a standin, so truncate the back
421 423 # prefix and test before mangling it. This avoids turning
422 424 # 'glob:../.hglf/foo*' into 'glob:../.hglf/../.hglf/foo*'.
423 425 if f.startswith(back) and lfutil.splitstandin(f[len(back) :]):
424 426 return f
425 427
426 428 # An absolute path is from outside the repo, so truncate the
427 429 # path to the root before building the standin. Otherwise cwd
428 430 # is somewhere in the repo, relative to root, and needs to be
429 431 # prepended before building the standin.
430 432 if os.path.isabs(cwd):
431 433 f = f[len(back) :]
432 434 else:
433 435 f = cwd + b'/' + f
434 436 return back + lfutil.standin(f)
435 437
436 438 else:
437 439
438 440 def tostandin(f):
439 441 if lfutil.isstandin(f):
440 442 return f
441 443 return lfutil.standin(f)
442 444
443 445 pats.update(fixpats(f, tostandin) for f in p)
444 446
447 m._was_tampered_with = True
448
445 449 for i in range(0, len(m._files)):
446 450 # Don't add '.hglf' to m.files, since that is already covered by '.'
447 451 if m._files[i] == b'.':
448 452 continue
449 453 standin = lfutil.standin(m._files[i])
450 454 # If the "standin" is a directory, append instead of replace to
451 455 # support naming a directory on the command line with only
452 456 # largefiles. The original directory is kept to support normal
453 457 # files.
454 458 if standin in ctx:
455 459 m._files[i] = standin
456 460 elif m._files[i] not in ctx and repo.wvfs.isdir(standin):
457 461 m._files.append(standin)
458 462
459 463 m._fileset = set(m._files)
460 464 m.always = lambda: False
461 465 origmatchfn = m.matchfn
462 466
463 467 def lfmatchfn(f):
464 468 lf = lfutil.splitstandin(f)
465 469 if lf is not None and origmatchfn(lf):
466 470 return True
467 471 r = origmatchfn(f)
468 472 return r
469 473
470 474 m.matchfn = lfmatchfn
471 475
472 476 ui.debug(b'updated patterns: %s\n' % b', '.join(sorted(pats)))
473 477 return m, pats
474 478
475 479 # For hg log --patch, the match object is used in two different senses:
476 480 # (1) to determine what revisions should be printed out, and
477 481 # (2) to determine what files to print out diffs for.
478 482 # The magic matchandpats override should be used for case (1) but not for
479 483 # case (2).
480 484 oldmatchandpats = scmutil.matchandpats
481 485
482 486 def overridemakefilematcher(orig, repo, pats, opts, badfn=None):
483 487 wctx = repo[None]
484 488 match, pats = oldmatchandpats(wctx, pats, opts, badfn=badfn)
485 489 return lambda ctx: match
486 490
487 491 wrappedmatchandpats = extensions.wrappedfunction(
488 492 scmutil, 'matchandpats', overridematchandpats
489 493 )
490 494 wrappedmakefilematcher = extensions.wrappedfunction(
491 495 logcmdutil, '_makenofollowfilematcher', overridemakefilematcher
492 496 )
493 497 with wrappedmatchandpats, wrappedmakefilematcher:
494 498 return orig(ui, repo, *pats, **opts)
495 499
496 500
497 501 @eh.wrapcommand(
498 502 b'verify',
499 503 opts=[
500 504 (
501 505 b'',
502 506 b'large',
503 507 None,
504 508 _(b'verify that all largefiles in current revision exists'),
505 509 ),
506 510 (
507 511 b'',
508 512 b'lfa',
509 513 None,
510 514 _(b'verify largefiles in all revisions, not just current'),
511 515 ),
512 516 (
513 517 b'',
514 518 b'lfc',
515 519 None,
516 520 _(b'verify local largefile contents, not just existence'),
517 521 ),
518 522 ],
519 523 )
520 524 def overrideverify(orig, ui, repo, *pats, **opts):
521 525 large = opts.pop('large', False)
522 526 all = opts.pop('lfa', False)
523 527 contents = opts.pop('lfc', False)
524 528
525 529 result = orig(ui, repo, *pats, **opts)
526 530 if large or all or contents:
527 531 result = result or lfcommands.verifylfiles(ui, repo, all, contents)
528 532 return result
529 533
530 534
531 535 @eh.wrapcommand(
532 536 b'debugstate',
533 537 opts=[(b'', b'large', None, _(b'display largefiles dirstate'))],
534 538 )
535 539 def overridedebugstate(orig, ui, repo, *pats, **opts):
536 540 large = opts.pop('large', False)
537 541 if large:
538 542
539 543 class fakerepo:
540 544 dirstate = lfutil.openlfdirstate(ui, repo)
541 545
542 546 orig(ui, fakerepo, *pats, **opts)
543 547 else:
544 548 orig(ui, repo, *pats, **opts)
545 549
546 550
547 551 # Before starting the manifest merge, merge.updates will call
548 552 # _checkunknownfile to check if there are any files in the merged-in
549 553 # changeset that collide with unknown files in the working copy.
550 554 #
551 555 # The largefiles are seen as unknown, so this prevents us from merging
552 556 # in a file 'foo' if we already have a largefile with the same name.
553 557 #
554 558 # The overridden function filters the unknown files by removing any
555 559 # largefiles. This makes the merge proceed and we can then handle this
556 560 # case further in the overridden calculateupdates function below.
557 561 @eh.wrapfunction(merge, '_checkunknownfile')
558 562 def overridecheckunknownfile(
559 563 origfn, dirstate, wvfs, dircache, wctx, mctx, f, f2=None
560 564 ):
561 565 if lfutil.standin(dirstate.normalize(f)) in wctx:
562 566 return False
563 567 return origfn(dirstate, wvfs, dircache, wctx, mctx, f, f2)
564 568
565 569
566 570 # The manifest merge handles conflicts on the manifest level. We want
567 571 # to handle changes in largefile-ness of files at this level too.
568 572 #
569 573 # The strategy is to run the original calculateupdates and then process
570 574 # the action list it outputs. There are two cases we need to deal with:
571 575 #
572 576 # 1. Normal file in p1, largefile in p2. Here the largefile is
573 577 # detected via its standin file, which will enter the working copy
574 578 # with a "get" action. It is not "merge" since the standin is all
575 579 # Mercurial is concerned with at this level -- the link to the
576 580 # existing normal file is not relevant here.
577 581 #
578 582 # 2. Largefile in p1, normal file in p2. Here we get a "merge" action
579 583 # since the largefile will be present in the working copy and
580 584 # different from the normal file in p2. Mercurial therefore
581 585 # triggers a merge action.
582 586 #
583 587 # In both cases, we prompt the user and emit new actions to either
584 588 # remove the standin (if the normal file was kept) or to remove the
585 589 # normal file and get the standin (if the largefile was kept). The
586 590 # default prompt answer is to use the largefile version since it was
587 591 # presumably changed on purpose.
588 592 #
589 593 # Finally, the merge.applyupdates function will then take care of
590 594 # writing the files into the working copy and lfcommands.updatelfiles
591 595 # will update the largefiles.
592 596 @eh.wrapfunction(merge, 'calculateupdates')
593 597 def overridecalculateupdates(
594 598 origfn, repo, p1, p2, pas, branchmerge, force, acceptremote, *args, **kwargs
595 599 ):
596 600 overwrite = force and not branchmerge
597 601 mresult = origfn(
598 602 repo, p1, p2, pas, branchmerge, force, acceptremote, *args, **kwargs
599 603 )
600 604
601 605 if overwrite:
602 606 return mresult
603 607
604 608 # Convert to dictionary with filename as key and action as value.
605 609 lfiles = set()
606 610 for f in mresult.files():
607 611 splitstandin = lfutil.splitstandin(f)
608 612 if splitstandin is not None and splitstandin in p1:
609 613 lfiles.add(splitstandin)
610 614 elif lfutil.standin(f) in p1:
611 615 lfiles.add(f)
612 616
613 617 for lfile in sorted(lfiles):
614 618 standin = lfutil.standin(lfile)
615 619 (lm, largs, lmsg) = mresult.getfile(lfile, (None, None, None))
616 620 (sm, sargs, smsg) = mresult.getfile(standin, (None, None, None))
617 621
618 622 if sm in (ACTION_GET, ACTION_DELETED_CHANGED) and lm != ACTION_REMOVE:
619 623 if sm == ACTION_DELETED_CHANGED:
620 624 f1, f2, fa, move, anc = sargs
621 625 sargs = (p2[f2].flags(), False)
622 626 # Case 1: normal file in the working copy, largefile in
623 627 # the second parent
624 628 usermsg = (
625 629 _(
626 630 b'remote turned local normal file %s into a largefile\n'
627 631 b'use (l)argefile or keep (n)ormal file?'
628 632 b'$$ &Largefile $$ &Normal file'
629 633 )
630 634 % lfile
631 635 )
632 636 if repo.ui.promptchoice(usermsg, 0) == 0: # pick remote largefile
633 637 mresult.addfile(
634 638 lfile, ACTION_REMOVE, None, b'replaced by standin'
635 639 )
636 640 mresult.addfile(standin, ACTION_GET, sargs, b'replaces standin')
637 641 else: # keep local normal file
638 642 mresult.addfile(lfile, ACTION_KEEP, None, b'replaces standin')
639 643 if branchmerge:
640 644 mresult.addfile(
641 645 standin,
642 646 ACTION_KEEP,
643 647 None,
644 648 b'replaced by non-standin',
645 649 )
646 650 else:
647 651 mresult.addfile(
648 652 standin,
649 653 ACTION_REMOVE,
650 654 None,
651 655 b'replaced by non-standin',
652 656 )
653 657 if lm in (ACTION_GET, ACTION_DELETED_CHANGED) and sm != ACTION_REMOVE:
654 658 if lm == ACTION_DELETED_CHANGED:
655 659 f1, f2, fa, move, anc = largs
656 660 largs = (p2[f2].flags(), False)
657 661 # Case 2: largefile in the working copy, normal file in
658 662 # the second parent
659 663 usermsg = (
660 664 _(
661 665 b'remote turned local largefile %s into a normal file\n'
662 666 b'keep (l)argefile or use (n)ormal file?'
663 667 b'$$ &Largefile $$ &Normal file'
664 668 )
665 669 % lfile
666 670 )
667 671 if repo.ui.promptchoice(usermsg, 0) == 0: # keep local largefile
668 672 if branchmerge:
669 673 # largefile can be restored from standin safely
670 674 mresult.addfile(
671 675 lfile,
672 676 ACTION_KEEP,
673 677 None,
674 678 b'replaced by standin',
675 679 )
676 680 mresult.addfile(
677 681 standin, ACTION_KEEP, None, b'replaces standin'
678 682 )
679 683 else:
680 684 # "lfile" should be marked as "removed" without
681 685 # removal of itself
682 686 mresult.addfile(
683 687 lfile,
684 688 MERGE_ACTION_LARGEFILE_MARK_REMOVED,
685 689 None,
686 690 b'forget non-standin largefile',
687 691 )
688 692
689 693 # linear-merge should treat this largefile as 're-added'
690 694 mresult.addfile(standin, ACTION_ADD, None, b'keep standin')
691 695 else: # pick remote normal file
692 696 mresult.addfile(lfile, ACTION_GET, largs, b'replaces standin')
693 697 mresult.addfile(
694 698 standin,
695 699 ACTION_REMOVE,
696 700 None,
697 701 b'replaced by non-standin',
698 702 )
699 703
700 704 return mresult
701 705
702 706
703 707 @eh.wrapfunction(mergestatemod, 'recordupdates')
704 708 def mergerecordupdates(orig, repo, actions, branchmerge, getfiledata):
705 709 if MERGE_ACTION_LARGEFILE_MARK_REMOVED in actions:
706 710 lfdirstate = lfutil.openlfdirstate(repo.ui, repo)
707 711 for lfile, args, msg in actions[MERGE_ACTION_LARGEFILE_MARK_REMOVED]:
708 712 # this should be executed before 'orig', to execute 'remove'
709 713 # before all other actions
710 714 repo.dirstate.update_file(lfile, p1_tracked=True, wc_tracked=False)
711 715 # make sure lfile doesn't get synclfdirstate'd as normal
712 716 lfdirstate.update_file(lfile, p1_tracked=False, wc_tracked=True)
713 717
714 718 return orig(repo, actions, branchmerge, getfiledata)
715 719
716 720
717 721 # Override filemerge to prompt the user about how they wish to merge
718 722 # largefiles. This will handle identical edits without prompting the user.
719 723 @eh.wrapfunction(filemerge, 'filemerge')
720 724 def overridefilemerge(
721 725 origfn, repo, wctx, mynode, orig, fcd, fco, fca, labels=None
722 726 ):
723 727 if not lfutil.isstandin(orig) or fcd.isabsent() or fco.isabsent():
724 728 return origfn(repo, wctx, mynode, orig, fcd, fco, fca, labels=labels)
725 729
726 730 ahash = lfutil.readasstandin(fca).lower()
727 731 dhash = lfutil.readasstandin(fcd).lower()
728 732 ohash = lfutil.readasstandin(fco).lower()
729 733 if (
730 734 ohash != ahash
731 735 and ohash != dhash
732 736 and (
733 737 dhash == ahash
734 738 or repo.ui.promptchoice(
735 739 _(
736 740 b'largefile %s has a merge conflict\nancestor was %s\n'
737 741 b'you can keep (l)ocal %s or take (o)ther %s.\n'
738 742 b'what do you want to do?'
739 743 b'$$ &Local $$ &Other'
740 744 )
741 745 % (lfutil.splitstandin(orig), ahash, dhash, ohash),
742 746 0,
743 747 )
744 748 == 1
745 749 )
746 750 ):
747 751 repo.wwrite(fcd.path(), fco.data(), fco.flags())
748 752 return 0, False
749 753
750 754
751 755 @eh.wrapfunction(copiesmod, 'pathcopies')
752 756 def copiespathcopies(orig, ctx1, ctx2, match=None):
753 757 copies = orig(ctx1, ctx2, match=match)
754 758 updated = {}
755 759
756 760 for k, v in copies.items():
757 761 updated[lfutil.splitstandin(k) or k] = lfutil.splitstandin(v) or v
758 762
759 763 return updated
760 764
761 765
762 766 # Copy first changes the matchers to match standins instead of
763 767 # largefiles. Then it overrides util.copyfile in that function it
764 768 # checks if the destination largefile already exists. It also keeps a
765 769 # list of copied files so that the largefiles can be copied and the
766 770 # dirstate updated.
767 771 @eh.wrapfunction(cmdutil, 'copy')
768 772 def overridecopy(orig, ui, repo, pats, opts, rename=False):
769 773 # doesn't remove largefile on rename
770 774 if len(pats) < 2:
771 775 # this isn't legal, let the original function deal with it
772 776 return orig(ui, repo, pats, opts, rename)
773 777
774 778 # This could copy both lfiles and normal files in one command,
775 779 # but we don't want to do that. First replace their matcher to
776 780 # only match normal files and run it, then replace it to just
777 781 # match largefiles and run it again.
778 782 nonormalfiles = False
779 783 nolfiles = False
780 784 manifest = repo[None].manifest()
781 785
782 786 def normalfilesmatchfn(
783 787 orig,
784 788 ctx,
785 789 pats=(),
786 790 opts=None,
787 791 globbed=False,
788 792 default=b'relpath',
789 793 badfn=None,
790 794 ):
791 795 if opts is None:
792 796 opts = {}
793 797 match = orig(ctx, pats, opts, globbed, default, badfn=badfn)
794 798 return composenormalfilematcher(match, manifest)
795 799
796 800 with extensions.wrappedfunction(scmutil, 'match', normalfilesmatchfn):
797 801 try:
798 802 result = orig(ui, repo, pats, opts, rename)
799 803 except error.Abort as e:
800 804 if e.message != _(b'no files to copy'):
801 805 raise e
802 806 else:
803 807 nonormalfiles = True
804 808 result = 0
805 809
806 810 # The first rename can cause our current working directory to be removed.
807 811 # In that case there is nothing left to copy/rename so just quit.
808 812 try:
809 813 repo.getcwd()
810 814 except OSError:
811 815 return result
812 816
813 817 def makestandin(relpath):
814 818 path = pathutil.canonpath(repo.root, repo.getcwd(), relpath)
815 819 return repo.wvfs.join(lfutil.standin(path))
816 820
817 821 fullpats = scmutil.expandpats(pats)
818 822 dest = fullpats[-1]
819 823
820 824 if os.path.isdir(dest):
821 825 if not os.path.isdir(makestandin(dest)):
822 826 os.makedirs(makestandin(dest))
823 827
824 828 try:
825 829 # When we call orig below it creates the standins but we don't add
826 830 # them to the dir state until later so lock during that time.
827 831 wlock = repo.wlock()
828 832
829 833 manifest = repo[None].manifest()
830 834
831 835 def overridematch(
832 836 orig,
833 837 ctx,
834 838 pats=(),
835 839 opts=None,
836 840 globbed=False,
837 841 default=b'relpath',
838 842 badfn=None,
839 843 ):
840 844 if opts is None:
841 845 opts = {}
842 846 newpats = []
843 847 # The patterns were previously mangled to add the standin
844 848 # directory; we need to remove that now
845 849 for pat in pats:
846 850 if matchmod.patkind(pat) is None and lfutil.shortname in pat:
847 851 newpats.append(pat.replace(lfutil.shortname, b''))
848 852 else:
849 853 newpats.append(pat)
850 854 match = orig(ctx, newpats, opts, globbed, default, badfn=badfn)
851 855 m = copy.copy(match)
856 m._was_tampered_with = True
852 857 lfile = lambda f: lfutil.standin(f) in manifest
853 858 m._files = [lfutil.standin(f) for f in m._files if lfile(f)]
854 859 m._fileset = set(m._files)
855 860 origmatchfn = m.matchfn
856 861
857 862 def matchfn(f):
858 863 lfile = lfutil.splitstandin(f)
859 864 return (
860 865 lfile is not None
861 866 and (f in manifest)
862 867 and origmatchfn(lfile)
863 868 or None
864 869 )
865 870
866 871 m.matchfn = matchfn
867 872 return m
868 873
869 874 listpats = []
870 875 for pat in pats:
871 876 if matchmod.patkind(pat) is not None:
872 877 listpats.append(pat)
873 878 else:
874 879 listpats.append(makestandin(pat))
875 880
876 881 copiedfiles = []
877 882
878 883 def overridecopyfile(orig, src, dest, *args, **kwargs):
879 884 if lfutil.shortname in src and dest.startswith(
880 885 repo.wjoin(lfutil.shortname)
881 886 ):
882 887 destlfile = dest.replace(lfutil.shortname, b'')
883 888 if not opts[b'force'] and os.path.exists(destlfile):
884 889 raise IOError(
885 890 b'', _(b'destination largefile already exists')
886 891 )
887 892 copiedfiles.append((src, dest))
888 893 orig(src, dest, *args, **kwargs)
889 894
890 895 with extensions.wrappedfunction(util, 'copyfile', overridecopyfile):
891 896 with extensions.wrappedfunction(scmutil, 'match', overridematch):
892 897 result += orig(ui, repo, listpats, opts, rename)
893 898
894 899 lfdirstate = lfutil.openlfdirstate(ui, repo)
895 900 for (src, dest) in copiedfiles:
896 901 if lfutil.shortname in src and dest.startswith(
897 902 repo.wjoin(lfutil.shortname)
898 903 ):
899 904 srclfile = src.replace(repo.wjoin(lfutil.standin(b'')), b'')
900 905 destlfile = dest.replace(repo.wjoin(lfutil.standin(b'')), b'')
901 906 destlfiledir = repo.wvfs.dirname(repo.wjoin(destlfile)) or b'.'
902 907 if not os.path.isdir(destlfiledir):
903 908 os.makedirs(destlfiledir)
904 909 if rename:
905 910 os.rename(repo.wjoin(srclfile), repo.wjoin(destlfile))
906 911
907 912 # The file is gone, but this deletes any empty parent
908 913 # directories as a side-effect.
909 914 repo.wvfs.unlinkpath(srclfile, ignoremissing=True)
910 915 lfdirstate.set_untracked(srclfile)
911 916 else:
912 917 util.copyfile(repo.wjoin(srclfile), repo.wjoin(destlfile))
913 918
914 919 lfdirstate.set_tracked(destlfile)
915 920 lfdirstate.write(repo.currenttransaction())
916 921 except error.Abort as e:
917 922 if e.message != _(b'no files to copy'):
918 923 raise e
919 924 else:
920 925 nolfiles = True
921 926 finally:
922 927 wlock.release()
923 928
924 929 if nolfiles and nonormalfiles:
925 930 raise error.Abort(_(b'no files to copy'))
926 931
927 932 return result
928 933
929 934
930 935 # When the user calls revert, we have to be careful to not revert any
931 936 # changes to other largefiles accidentally. This means we have to keep
932 937 # track of the largefiles that are being reverted so we only pull down
933 938 # the necessary largefiles.
934 939 #
935 940 # Standins are only updated (to match the hash of largefiles) before
936 941 # commits. Update the standins then run the original revert, changing
937 942 # the matcher to hit standins instead of largefiles. Based on the
938 943 # resulting standins update the largefiles.
939 944 @eh.wrapfunction(cmdutil, 'revert')
940 945 def overriderevert(orig, ui, repo, ctx, *pats, **opts):
941 946 # Because we put the standins in a bad state (by updating them)
942 947 # and then return them to a correct state we need to lock to
943 948 # prevent others from changing them in their incorrect state.
944 949 with repo.wlock(), repo.dirstate.running_status(repo):
945 950 lfdirstate = lfutil.openlfdirstate(ui, repo)
946 951 s = lfutil.lfdirstatestatus(lfdirstate, repo)
947 952 lfdirstate.write(repo.currenttransaction())
948 953 for lfile in s.modified:
949 954 lfutil.updatestandin(repo, lfile, lfutil.standin(lfile))
950 955 for lfile in s.deleted:
951 956 fstandin = lfutil.standin(lfile)
952 957 if repo.wvfs.exists(fstandin):
953 958 repo.wvfs.unlink(fstandin)
954 959
955 960 oldstandins = lfutil.getstandinsstate(repo)
956 961
957 962 def overridematch(
958 963 orig,
959 964 mctx,
960 965 pats=(),
961 966 opts=None,
962 967 globbed=False,
963 968 default=b'relpath',
964 969 badfn=None,
965 970 ):
966 971 if opts is None:
967 972 opts = {}
968 973 match = orig(mctx, pats, opts, globbed, default, badfn=badfn)
969 974 m = copy.copy(match)
975 m._was_tampered_with = True
970 976
971 977 # revert supports recursing into subrepos, and though largefiles
972 978 # currently doesn't work correctly in that case, this match is
973 979 # called, so the lfdirstate above may not be the correct one for
974 980 # this invocation of match.
975 981 lfdirstate = lfutil.openlfdirstate(
976 982 mctx.repo().ui, mctx.repo(), False
977 983 )
978 984
979 985 wctx = repo[None]
980 986 matchfiles = []
981 987 for f in m._files:
982 988 standin = lfutil.standin(f)
983 989 if standin in ctx or standin in mctx:
984 990 matchfiles.append(standin)
985 991 elif standin in wctx or lfdirstate.get_entry(f).removed:
986 992 continue
987 993 else:
988 994 matchfiles.append(f)
989 995 m._files = matchfiles
990 996 m._fileset = set(m._files)
991 997 origmatchfn = m.matchfn
992 998
993 999 def matchfn(f):
994 1000 lfile = lfutil.splitstandin(f)
995 1001 if lfile is not None:
996 1002 return origmatchfn(lfile) and (f in ctx or f in mctx)
997 1003 return origmatchfn(f)
998 1004
999 1005 m.matchfn = matchfn
1000 1006 return m
1001 1007
1002 1008 with extensions.wrappedfunction(scmutil, 'match', overridematch):
1003 1009 orig(ui, repo, ctx, *pats, **opts)
1004 1010
1005 1011 newstandins = lfutil.getstandinsstate(repo)
1006 1012 filelist = lfutil.getlfilestoupdate(oldstandins, newstandins)
1007 1013 # lfdirstate should be 'normallookup'-ed for updated files,
1008 1014 # because reverting doesn't touch dirstate for 'normal' files
1009 1015 # when target revision is explicitly specified: in such case,
1010 1016 # 'n' and valid timestamp in dirstate doesn't ensure 'clean'
1011 1017 # of target (standin) file.
1012 1018 lfcommands.updatelfiles(
1013 1019 ui, repo, filelist, printmessage=False, normallookup=True
1014 1020 )
1015 1021
1016 1022
1017 1023 # after pulling changesets, we need to take some extra care to get
1018 1024 # largefiles updated remotely
1019 1025 @eh.wrapcommand(
1020 1026 b'pull',
1021 1027 opts=[
1022 1028 (
1023 1029 b'',
1024 1030 b'all-largefiles',
1025 1031 None,
1026 1032 _(b'download all pulled versions of largefiles (DEPRECATED)'),
1027 1033 ),
1028 1034 (
1029 1035 b'',
1030 1036 b'lfrev',
1031 1037 [],
1032 1038 _(b'download largefiles for these revisions'),
1033 1039 _(b'REV'),
1034 1040 ),
1035 1041 ],
1036 1042 )
1037 1043 def overridepull(orig, ui, repo, source=None, **opts):
1038 1044 revsprepull = len(repo)
1039 1045 if not source:
1040 1046 source = b'default'
1041 1047 repo.lfpullsource = source
1042 1048 result = orig(ui, repo, source, **opts)
1043 1049 revspostpull = len(repo)
1044 1050 lfrevs = opts.get('lfrev', [])
1045 1051 if opts.get('all_largefiles'):
1046 1052 lfrevs.append(b'pulled()')
1047 1053 if lfrevs and revspostpull > revsprepull:
1048 1054 numcached = 0
1049 1055 repo.firstpulled = revsprepull # for pulled() revset expression
1050 1056 try:
1051 1057 for rev in logcmdutil.revrange(repo, lfrevs):
1052 1058 ui.note(_(b'pulling largefiles for revision %d\n') % rev)
1053 1059 (cached, missing) = lfcommands.cachelfiles(ui, repo, rev)
1054 1060 numcached += len(cached)
1055 1061 finally:
1056 1062 del repo.firstpulled
1057 1063 ui.status(_(b"%d largefiles cached\n") % numcached)
1058 1064 return result
1059 1065
1060 1066
1061 1067 @eh.wrapcommand(
1062 1068 b'push',
1063 1069 opts=[
1064 1070 (
1065 1071 b'',
1066 1072 b'lfrev',
1067 1073 [],
1068 1074 _(b'upload largefiles for these revisions'),
1069 1075 _(b'REV'),
1070 1076 )
1071 1077 ],
1072 1078 )
1073 1079 def overridepush(orig, ui, repo, *args, **kwargs):
1074 1080 """Override push command and store --lfrev parameters in opargs"""
1075 1081 lfrevs = kwargs.pop('lfrev', None)
1076 1082 if lfrevs:
1077 1083 opargs = kwargs.setdefault('opargs', {})
1078 1084 opargs[b'lfrevs'] = logcmdutil.revrange(repo, lfrevs)
1079 1085 return orig(ui, repo, *args, **kwargs)
1080 1086
1081 1087
1082 1088 @eh.wrapfunction(exchange, 'pushoperation')
1083 1089 def exchangepushoperation(orig, *args, **kwargs):
1084 1090 """Override pushoperation constructor and store lfrevs parameter"""
1085 1091 lfrevs = kwargs.pop('lfrevs', None)
1086 1092 pushop = orig(*args, **kwargs)
1087 1093 pushop.lfrevs = lfrevs
1088 1094 return pushop
1089 1095
1090 1096
1091 1097 @eh.revsetpredicate(b'pulled()')
1092 1098 def pulledrevsetsymbol(repo, subset, x):
1093 1099 """Changesets that just has been pulled.
1094 1100
1095 1101 Only available with largefiles from pull --lfrev expressions.
1096 1102
1097 1103 .. container:: verbose
1098 1104
1099 1105 Some examples:
1100 1106
1101 1107 - pull largefiles for all new changesets::
1102 1108
1103 1109 hg pull -lfrev "pulled()"
1104 1110
1105 1111 - pull largefiles for all new branch heads::
1106 1112
1107 1113 hg pull -lfrev "head(pulled()) and not closed()"
1108 1114
1109 1115 """
1110 1116
1111 1117 try:
1112 1118 firstpulled = repo.firstpulled
1113 1119 except AttributeError:
1114 1120 raise error.Abort(_(b"pulled() only available in --lfrev"))
1115 1121 return smartset.baseset([r for r in subset if r >= firstpulled])
1116 1122
1117 1123
1118 1124 @eh.wrapcommand(
1119 1125 b'clone',
1120 1126 opts=[
1121 1127 (
1122 1128 b'',
1123 1129 b'all-largefiles',
1124 1130 None,
1125 1131 _(b'download all versions of all largefiles'),
1126 1132 )
1127 1133 ],
1128 1134 )
1129 1135 def overrideclone(orig, ui, source, dest=None, **opts):
1130 1136 d = dest
1131 1137 if d is None:
1132 1138 d = hg.defaultdest(source)
1133 1139 if opts.get('all_largefiles') and not hg.islocal(d):
1134 1140 raise error.Abort(
1135 1141 _(b'--all-largefiles is incompatible with non-local destination %s')
1136 1142 % d
1137 1143 )
1138 1144
1139 1145 return orig(ui, source, dest, **opts)
1140 1146
1141 1147
1142 1148 @eh.wrapfunction(hg, 'clone')
1143 1149 def hgclone(orig, ui, opts, *args, **kwargs):
1144 1150 result = orig(ui, opts, *args, **kwargs)
1145 1151
1146 1152 if result is not None:
1147 1153 sourcerepo, destrepo = result
1148 1154 repo = destrepo.local()
1149 1155
1150 1156 # When cloning to a remote repo (like through SSH), no repo is available
1151 1157 # from the peer. Therefore the largefiles can't be downloaded and the
1152 1158 # hgrc can't be updated.
1153 1159 if not repo:
1154 1160 return result
1155 1161
1156 1162 # Caching is implicitly limited to 'rev' option, since the dest repo was
1157 1163 # truncated at that point. The user may expect a download count with
1158 1164 # this option, so attempt whether or not this is a largefile repo.
1159 1165 if opts.get(b'all_largefiles'):
1160 1166 success, missing = lfcommands.downloadlfiles(ui, repo)
1161 1167
1162 1168 if missing != 0:
1163 1169 return None
1164 1170
1165 1171 return result
1166 1172
1167 1173
1168 1174 @eh.wrapcommand(b'rebase', extension=b'rebase')
1169 1175 def overriderebasecmd(orig, ui, repo, **opts):
1170 1176 if not hasattr(repo, '_largefilesenabled'):
1171 1177 return orig(ui, repo, **opts)
1172 1178
1173 1179 resuming = opts.get('continue')
1174 1180 repo._lfcommithooks.append(lfutil.automatedcommithook(resuming))
1175 1181 repo._lfstatuswriters.append(lambda *msg, **opts: None)
1176 1182 try:
1177 1183 with ui.configoverride(
1178 1184 {(b'rebase', b'experimental.inmemory'): False}, b"largefiles"
1179 1185 ):
1180 1186 return orig(ui, repo, **opts)
1181 1187 finally:
1182 1188 repo._lfstatuswriters.pop()
1183 1189 repo._lfcommithooks.pop()
1184 1190
1185 1191
1186 1192 @eh.extsetup
1187 1193 def overriderebase(ui):
1188 1194 try:
1189 1195 rebase = extensions.find(b'rebase')
1190 1196 except KeyError:
1191 1197 pass
1192 1198 else:
1193 1199
1194 1200 def _dorebase(orig, *args, **kwargs):
1195 1201 kwargs['inmemory'] = False
1196 1202 return orig(*args, **kwargs)
1197 1203
1198 1204 extensions.wrapfunction(rebase, '_dorebase', _dorebase)
1199 1205
1200 1206
1201 1207 @eh.wrapcommand(b'archive')
1202 1208 def overridearchivecmd(orig, ui, repo, dest, **opts):
1203 1209 with lfstatus(repo.unfiltered()):
1204 1210 return orig(ui, repo.unfiltered(), dest, **opts)
1205 1211
1206 1212
1207 1213 @eh.wrapfunction(webcommands, 'archive')
1208 1214 def hgwebarchive(orig, web):
1209 1215 with lfstatus(web.repo):
1210 1216 return orig(web)
1211 1217
1212 1218
1213 1219 @eh.wrapfunction(archival, 'archive')
1214 1220 def overridearchive(
1215 1221 orig,
1216 1222 repo,
1217 1223 dest,
1218 1224 node,
1219 1225 kind,
1220 1226 decode=True,
1221 1227 match=None,
1222 1228 prefix=b'',
1223 1229 mtime=None,
1224 1230 subrepos=None,
1225 1231 ):
1226 1232 # For some reason setting repo.lfstatus in hgwebarchive only changes the
1227 1233 # unfiltered repo's attr, so check that as well.
1228 1234 if not repo.lfstatus and not repo.unfiltered().lfstatus:
1229 1235 return orig(
1230 1236 repo, dest, node, kind, decode, match, prefix, mtime, subrepos
1231 1237 )
1232 1238
1233 1239 # No need to lock because we are only reading history and
1234 1240 # largefile caches, neither of which are modified.
1235 1241 if node is not None:
1236 1242 lfcommands.cachelfiles(repo.ui, repo, node)
1237 1243
1238 1244 if kind not in archival.archivers:
1239 1245 raise error.Abort(_(b"unknown archive type '%s'") % kind)
1240 1246
1241 1247 ctx = repo[node]
1242 1248
1243 1249 if kind == b'files':
1244 1250 if prefix:
1245 1251 raise error.Abort(_(b'cannot give prefix when archiving to files'))
1246 1252 else:
1247 1253 prefix = archival.tidyprefix(dest, kind, prefix)
1248 1254
1249 1255 def write(name, mode, islink, getdata):
1250 1256 if match and not match(name):
1251 1257 return
1252 1258 data = getdata()
1253 1259 if decode:
1254 1260 data = repo.wwritedata(name, data)
1255 1261 archiver.addfile(prefix + name, mode, islink, data)
1256 1262
1257 1263 archiver = archival.archivers[kind](dest, mtime or ctx.date()[0])
1258 1264
1259 1265 if repo.ui.configbool(b"ui", b"archivemeta"):
1260 1266 write(
1261 1267 b'.hg_archival.txt',
1262 1268 0o644,
1263 1269 False,
1264 1270 lambda: archival.buildmetadata(ctx),
1265 1271 )
1266 1272
1267 1273 for f in ctx:
1268 1274 ff = ctx.flags(f)
1269 1275 getdata = ctx[f].data
1270 1276 lfile = lfutil.splitstandin(f)
1271 1277 if lfile is not None:
1272 1278 if node is not None:
1273 1279 path = lfutil.findfile(repo, getdata().strip())
1274 1280
1275 1281 if path is None:
1276 1282 raise error.Abort(
1277 1283 _(
1278 1284 b'largefile %s not found in repo store or system cache'
1279 1285 )
1280 1286 % lfile
1281 1287 )
1282 1288 else:
1283 1289 path = lfile
1284 1290
1285 1291 f = lfile
1286 1292
1287 1293 getdata = lambda: util.readfile(path)
1288 1294 write(f, b'x' in ff and 0o755 or 0o644, b'l' in ff, getdata)
1289 1295
1290 1296 if subrepos:
1291 1297 for subpath in sorted(ctx.substate):
1292 1298 sub = ctx.workingsub(subpath)
1293 1299 submatch = matchmod.subdirmatcher(subpath, match)
1294 1300 subprefix = prefix + subpath + b'/'
1295 1301
1296 1302 # TODO: Only hgsubrepo instances have `_repo`, so figure out how to
1297 1303 # infer and possibly set lfstatus in hgsubrepoarchive. That would
1298 1304 # allow only hgsubrepos to set this, instead of the current scheme
1299 1305 # where the parent sets this for the child.
1300 1306 with (
1301 1307 hasattr(sub, '_repo')
1302 1308 and lfstatus(sub._repo)
1303 1309 or util.nullcontextmanager()
1304 1310 ):
1305 1311 sub.archive(archiver, subprefix, submatch)
1306 1312
1307 1313 archiver.done()
1308 1314
1309 1315
1310 1316 @eh.wrapfunction(subrepo.hgsubrepo, 'archive')
1311 1317 def hgsubrepoarchive(orig, repo, archiver, prefix, match=None, decode=True):
1312 1318 lfenabled = hasattr(repo._repo, '_largefilesenabled')
1313 1319 if not lfenabled or not repo._repo.lfstatus:
1314 1320 return orig(repo, archiver, prefix, match, decode)
1315 1321
1316 1322 repo._get(repo._state + (b'hg',))
1317 1323 rev = repo._state[1]
1318 1324 ctx = repo._repo[rev]
1319 1325
1320 1326 if ctx.node() is not None:
1321 1327 lfcommands.cachelfiles(repo.ui, repo._repo, ctx.node())
1322 1328
1323 1329 def write(name, mode, islink, getdata):
1324 1330 # At this point, the standin has been replaced with the largefile name,
1325 1331 # so the normal matcher works here without the lfutil variants.
1326 1332 if match and not match(f):
1327 1333 return
1328 1334 data = getdata()
1329 1335 if decode:
1330 1336 data = repo._repo.wwritedata(name, data)
1331 1337
1332 1338 archiver.addfile(prefix + name, mode, islink, data)
1333 1339
1334 1340 for f in ctx:
1335 1341 ff = ctx.flags(f)
1336 1342 getdata = ctx[f].data
1337 1343 lfile = lfutil.splitstandin(f)
1338 1344 if lfile is not None:
1339 1345 if ctx.node() is not None:
1340 1346 path = lfutil.findfile(repo._repo, getdata().strip())
1341 1347
1342 1348 if path is None:
1343 1349 raise error.Abort(
1344 1350 _(
1345 1351 b'largefile %s not found in repo store or system cache'
1346 1352 )
1347 1353 % lfile
1348 1354 )
1349 1355 else:
1350 1356 path = lfile
1351 1357
1352 1358 f = lfile
1353 1359
1354 1360 getdata = lambda: util.readfile(os.path.join(prefix, path))
1355 1361
1356 1362 write(f, b'x' in ff and 0o755 or 0o644, b'l' in ff, getdata)
1357 1363
1358 1364 for subpath in sorted(ctx.substate):
1359 1365 sub = ctx.workingsub(subpath)
1360 1366 submatch = matchmod.subdirmatcher(subpath, match)
1361 1367 subprefix = prefix + subpath + b'/'
1362 1368 # TODO: Only hgsubrepo instances have `_repo`, so figure out how to
1363 1369 # infer and possibly set lfstatus at the top of this function. That
1364 1370 # would allow only hgsubrepos to set this, instead of the current scheme
1365 1371 # where the parent sets this for the child.
1366 1372 with (
1367 1373 hasattr(sub, '_repo')
1368 1374 and lfstatus(sub._repo)
1369 1375 or util.nullcontextmanager()
1370 1376 ):
1371 1377 sub.archive(archiver, subprefix, submatch, decode)
1372 1378
1373 1379
1374 1380 # If a largefile is modified, the change is not reflected in its
1375 1381 # standin until a commit. cmdutil.bailifchanged() raises an exception
1376 1382 # if the repo has uncommitted changes. Wrap it to also check if
1377 1383 # largefiles were changed. This is used by bisect, backout and fetch.
1378 1384 @eh.wrapfunction(cmdutil, 'bailifchanged')
1379 1385 def overridebailifchanged(orig, repo, *args, **kwargs):
1380 1386 orig(repo, *args, **kwargs)
1381 1387 with lfstatus(repo):
1382 1388 s = repo.status()
1383 1389 if s.modified or s.added or s.removed or s.deleted:
1384 1390 raise error.Abort(_(b'uncommitted changes'))
1385 1391
1386 1392
1387 1393 @eh.wrapfunction(cmdutil, 'postcommitstatus')
1388 1394 def postcommitstatus(orig, repo, *args, **kwargs):
1389 1395 with lfstatus(repo):
1390 1396 return orig(repo, *args, **kwargs)
1391 1397
1392 1398
1393 1399 @eh.wrapfunction(cmdutil, 'forget')
1394 1400 def cmdutilforget(
1395 1401 orig, ui, repo, match, prefix, uipathfn, explicitonly, dryrun, interactive
1396 1402 ):
1397 1403 normalmatcher = composenormalfilematcher(match, repo[None].manifest())
1398 1404 bad, forgot = orig(
1399 1405 ui,
1400 1406 repo,
1401 1407 normalmatcher,
1402 1408 prefix,
1403 1409 uipathfn,
1404 1410 explicitonly,
1405 1411 dryrun,
1406 1412 interactive,
1407 1413 )
1408 1414 m = composelargefilematcher(match, repo[None].manifest())
1409 1415
1410 1416 with lfstatus(repo):
1411 1417 s = repo.status(match=m, clean=True)
1412 1418 manifest = repo[None].manifest()
1413 1419 forget = sorted(s.modified + s.added + s.deleted + s.clean)
1414 1420 forget = [f for f in forget if lfutil.standin(f) in manifest]
1415 1421
1416 1422 for f in forget:
1417 1423 fstandin = lfutil.standin(f)
1418 1424 if fstandin not in repo.dirstate and not repo.wvfs.isdir(fstandin):
1419 1425 ui.warn(
1420 1426 _(b'not removing %s: file is already untracked\n') % uipathfn(f)
1421 1427 )
1422 1428 bad.append(f)
1423 1429
1424 1430 for f in forget:
1425 1431 if ui.verbose or not m.exact(f):
1426 1432 ui.status(_(b'removing %s\n') % uipathfn(f))
1427 1433
1428 1434 # Need to lock because standin files are deleted then removed from the
1429 1435 # repository and we could race in-between.
1430 1436 with repo.wlock():
1431 1437 lfdirstate = lfutil.openlfdirstate(ui, repo)
1432 1438 for f in forget:
1433 1439 lfdirstate.set_untracked(f)
1434 1440 lfdirstate.write(repo.currenttransaction())
1435 1441 standins = [lfutil.standin(f) for f in forget]
1436 1442 for f in standins:
1437 1443 repo.wvfs.unlinkpath(f, ignoremissing=True)
1438 1444 rejected = repo[None].forget(standins)
1439 1445
1440 1446 bad.extend(f for f in rejected if f in m.files())
1441 1447 forgot.extend(f for f in forget if f not in rejected)
1442 1448 return bad, forgot
1443 1449
1444 1450
1445 1451 def _getoutgoings(repo, other, missing, addfunc):
1446 1452 """get pairs of filename and largefile hash in outgoing revisions
1447 1453 in 'missing'.
1448 1454
1449 1455 largefiles already existing on 'other' repository are ignored.
1450 1456
1451 1457 'addfunc' is invoked with each unique pairs of filename and
1452 1458 largefile hash value.
1453 1459 """
1454 1460 knowns = set()
1455 1461 lfhashes = set()
1456 1462
1457 1463 def dedup(fn, lfhash):
1458 1464 k = (fn, lfhash)
1459 1465 if k not in knowns:
1460 1466 knowns.add(k)
1461 1467 lfhashes.add(lfhash)
1462 1468
1463 1469 lfutil.getlfilestoupload(repo, missing, dedup)
1464 1470 if lfhashes:
1465 1471 lfexists = storefactory.openstore(repo, other).exists(lfhashes)
1466 1472 for fn, lfhash in knowns:
1467 1473 if not lfexists[lfhash]: # lfhash doesn't exist on "other"
1468 1474 addfunc(fn, lfhash)
1469 1475
1470 1476
1471 1477 def outgoinghook(ui, repo, other, opts, missing):
1472 1478 if opts.pop(b'large', None):
1473 1479 lfhashes = set()
1474 1480 if ui.debugflag:
1475 1481 toupload = {}
1476 1482
1477 1483 def addfunc(fn, lfhash):
1478 1484 if fn not in toupload:
1479 1485 toupload[fn] = [] # pytype: disable=unsupported-operands
1480 1486 toupload[fn].append(lfhash)
1481 1487 lfhashes.add(lfhash)
1482 1488
1483 1489 def showhashes(fn):
1484 1490 for lfhash in sorted(toupload[fn]):
1485 1491 ui.debug(b' %s\n' % lfhash)
1486 1492
1487 1493 else:
1488 1494 toupload = set()
1489 1495
1490 1496 def addfunc(fn, lfhash):
1491 1497 toupload.add(fn)
1492 1498 lfhashes.add(lfhash)
1493 1499
1494 1500 def showhashes(fn):
1495 1501 pass
1496 1502
1497 1503 _getoutgoings(repo, other, missing, addfunc)
1498 1504
1499 1505 if not toupload:
1500 1506 ui.status(_(b'largefiles: no files to upload\n'))
1501 1507 else:
1502 1508 ui.status(
1503 1509 _(b'largefiles to upload (%d entities):\n') % (len(lfhashes))
1504 1510 )
1505 1511 for file in sorted(toupload):
1506 1512 ui.status(lfutil.splitstandin(file) + b'\n')
1507 1513 showhashes(file)
1508 1514 ui.status(b'\n')
1509 1515
1510 1516
1511 1517 @eh.wrapcommand(
1512 1518 b'outgoing', opts=[(b'', b'large', None, _(b'display outgoing largefiles'))]
1513 1519 )
1514 1520 def _outgoingcmd(orig, *args, **kwargs):
1515 1521 # Nothing to do here other than add the extra help option- the hook above
1516 1522 # processes it.
1517 1523 return orig(*args, **kwargs)
1518 1524
1519 1525
1520 1526 def summaryremotehook(ui, repo, opts, changes):
1521 1527 largeopt = opts.get(b'large', False)
1522 1528 if changes is None:
1523 1529 if largeopt:
1524 1530 return (False, True) # only outgoing check is needed
1525 1531 else:
1526 1532 return (False, False)
1527 1533 elif largeopt:
1528 1534 url, branch, peer, outgoing = changes[1]
1529 1535 if peer is None:
1530 1536 # i18n: column positioning for "hg summary"
1531 1537 ui.status(_(b'largefiles: (no remote repo)\n'))
1532 1538 return
1533 1539
1534 1540 toupload = set()
1535 1541 lfhashes = set()
1536 1542
1537 1543 def addfunc(fn, lfhash):
1538 1544 toupload.add(fn)
1539 1545 lfhashes.add(lfhash)
1540 1546
1541 1547 _getoutgoings(repo, peer, outgoing.missing, addfunc)
1542 1548
1543 1549 if not toupload:
1544 1550 # i18n: column positioning for "hg summary"
1545 1551 ui.status(_(b'largefiles: (no files to upload)\n'))
1546 1552 else:
1547 1553 # i18n: column positioning for "hg summary"
1548 1554 ui.status(
1549 1555 _(b'largefiles: %d entities for %d files to upload\n')
1550 1556 % (len(lfhashes), len(toupload))
1551 1557 )
1552 1558
1553 1559
1554 1560 @eh.wrapcommand(
1555 1561 b'summary', opts=[(b'', b'large', None, _(b'display outgoing largefiles'))]
1556 1562 )
1557 1563 def overridesummary(orig, ui, repo, *pats, **opts):
1558 1564 with lfstatus(repo):
1559 1565 orig(ui, repo, *pats, **opts)
1560 1566
1561 1567
1562 1568 @eh.wrapfunction(scmutil, 'addremove')
1563 1569 def scmutiladdremove(
1564 1570 orig,
1565 1571 repo,
1566 1572 matcher,
1567 1573 prefix,
1568 1574 uipathfn,
1569 1575 opts=None,
1570 1576 open_tr=None,
1571 1577 ):
1572 1578 if opts is None:
1573 1579 opts = {}
1574 1580 if not lfutil.islfilesrepo(repo):
1575 1581 return orig(repo, matcher, prefix, uipathfn, opts, open_tr=open_tr)
1576 1582
1577 1583 # open the transaction and changing_files context
1578 1584 if open_tr is not None:
1579 1585 open_tr()
1580 1586
1581 1587 # Get the list of missing largefiles so we can remove them
1582 1588 with repo.dirstate.running_status(repo):
1583 1589 lfdirstate = lfutil.openlfdirstate(repo.ui, repo)
1584 1590 unsure, s, mtime_boundary = lfdirstate.status(
1585 1591 matchmod.always(),
1586 1592 subrepos=[],
1587 1593 ignored=False,
1588 1594 clean=False,
1589 1595 unknown=False,
1590 1596 )
1591 1597
1592 1598 # Call into the normal remove code, but the removing of the standin, we want
1593 1599 # to have handled by original addremove. Monkey patching here makes sure
1594 1600 # we don't remove the standin in the largefiles code, preventing a very
1595 1601 # confused state later.
1596 1602 if s.deleted:
1597 1603 m = copy.copy(matcher)
1604 m._was_tampered_with = True
1598 1605
1599 1606 # The m._files and m._map attributes are not changed to the deleted list
1600 1607 # because that affects the m.exact() test, which in turn governs whether
1601 1608 # or not the file name is printed, and how. Simply limit the original
1602 1609 # matches to those in the deleted status list.
1603 1610 matchfn = m.matchfn
1604 1611 m.matchfn = lambda f: f in s.deleted and matchfn(f)
1605 1612
1606 1613 removelargefiles(
1607 1614 repo.ui,
1608 1615 repo,
1609 1616 True,
1610 1617 m,
1611 1618 uipathfn,
1612 1619 opts.get(b'dry_run'),
1613 1620 **pycompat.strkwargs(opts)
1614 1621 )
1615 1622 # Call into the normal add code, and any files that *should* be added as
1616 1623 # largefiles will be
1617 1624 added, bad = addlargefiles(
1618 1625 repo.ui, repo, True, matcher, uipathfn, **pycompat.strkwargs(opts)
1619 1626 )
1620 1627 # Now that we've handled largefiles, hand off to the original addremove
1621 1628 # function to take care of the rest. Make sure it doesn't do anything with
1622 1629 # largefiles by passing a matcher that will ignore them.
1623 1630 matcher = composenormalfilematcher(matcher, repo[None].manifest(), added)
1624 1631
1625 1632 return orig(repo, matcher, prefix, uipathfn, opts, open_tr=open_tr)
1626 1633
1627 1634
1628 1635 # Calling purge with --all will cause the largefiles to be deleted.
1629 1636 # Override repo.status to prevent this from happening.
1630 1637 @eh.wrapcommand(b'purge')
1631 1638 def overridepurge(orig, ui, repo, *dirs, **opts):
1632 1639 # XXX Monkey patching a repoview will not work. The assigned attribute will
1633 1640 # be set on the unfiltered repo, but we will only lookup attributes in the
1634 1641 # unfiltered repo if the lookup in the repoview object itself fails. As the
1635 1642 # monkey patched method exists on the repoview class the lookup will not
1636 1643 # fail. As a result, the original version will shadow the monkey patched
1637 1644 # one, defeating the monkey patch.
1638 1645 #
1639 1646 # As a work around we use an unfiltered repo here. We should do something
1640 1647 # cleaner instead.
1641 1648 repo = repo.unfiltered()
1642 1649 oldstatus = repo.status
1643 1650
1644 1651 def overridestatus(
1645 1652 node1=b'.',
1646 1653 node2=None,
1647 1654 match=None,
1648 1655 ignored=False,
1649 1656 clean=False,
1650 1657 unknown=False,
1651 1658 listsubrepos=False,
1652 1659 ):
1653 1660 r = oldstatus(
1654 1661 node1, node2, match, ignored, clean, unknown, listsubrepos
1655 1662 )
1656 1663 lfdirstate = lfutil.openlfdirstate(ui, repo)
1657 1664 unknown = [
1658 1665 f for f in r.unknown if not lfdirstate.get_entry(f).any_tracked
1659 1666 ]
1660 1667 ignored = [
1661 1668 f for f in r.ignored if not lfdirstate.get_entry(f).any_tracked
1662 1669 ]
1663 1670 return scmutil.status(
1664 1671 r.modified, r.added, r.removed, r.deleted, unknown, ignored, r.clean
1665 1672 )
1666 1673
1667 1674 repo.status = overridestatus
1668 1675 orig(ui, repo, *dirs, **opts)
1669 1676 repo.status = oldstatus
1670 1677
1671 1678
1672 1679 @eh.wrapcommand(b'rollback')
1673 1680 def overriderollback(orig, ui, repo, **opts):
1674 1681 with repo.wlock():
1675 1682 before = repo.dirstate.parents()
1676 1683 orphans = {
1677 1684 f
1678 1685 for f in repo.dirstate
1679 1686 if lfutil.isstandin(f) and not repo.dirstate.get_entry(f).removed
1680 1687 }
1681 1688 result = orig(ui, repo, **opts)
1682 1689 after = repo.dirstate.parents()
1683 1690 if before == after:
1684 1691 return result # no need to restore standins
1685 1692
1686 1693 pctx = repo[b'.']
1687 1694 for f in repo.dirstate:
1688 1695 if lfutil.isstandin(f):
1689 1696 orphans.discard(f)
1690 1697 if repo.dirstate.get_entry(f).removed:
1691 1698 repo.wvfs.unlinkpath(f, ignoremissing=True)
1692 1699 elif f in pctx:
1693 1700 fctx = pctx[f]
1694 1701 repo.wwrite(f, fctx.data(), fctx.flags())
1695 1702 else:
1696 1703 # content of standin is not so important in 'a',
1697 1704 # 'm' or 'n' (coming from the 2nd parent) cases
1698 1705 lfutil.writestandin(repo, f, b'', False)
1699 1706 for standin in orphans:
1700 1707 repo.wvfs.unlinkpath(standin, ignoremissing=True)
1701 1708
1702 1709 return result
1703 1710
1704 1711
1705 1712 @eh.wrapcommand(b'transplant', extension=b'transplant')
1706 1713 def overridetransplant(orig, ui, repo, *revs, **opts):
1707 1714 resuming = opts.get('continue')
1708 1715 repo._lfcommithooks.append(lfutil.automatedcommithook(resuming))
1709 1716 repo._lfstatuswriters.append(lambda *msg, **opts: None)
1710 1717 try:
1711 1718 result = orig(ui, repo, *revs, **opts)
1712 1719 finally:
1713 1720 repo._lfstatuswriters.pop()
1714 1721 repo._lfcommithooks.pop()
1715 1722 return result
1716 1723
1717 1724
1718 1725 @eh.wrapcommand(b'cat')
1719 1726 def overridecat(orig, ui, repo, file1, *pats, **opts):
1720 1727 ctx = logcmdutil.revsingle(repo, opts.get('rev'))
1721 1728 err = 1
1722 1729 notbad = set()
1723 1730 m = scmutil.match(ctx, (file1,) + pats, pycompat.byteskwargs(opts))
1731 m._was_tampered_with = True
1724 1732 origmatchfn = m.matchfn
1725 1733
1726 1734 def lfmatchfn(f):
1727 1735 if origmatchfn(f):
1728 1736 return True
1729 1737 lf = lfutil.splitstandin(f)
1730 1738 if lf is None:
1731 1739 return False
1732 1740 notbad.add(lf)
1733 1741 return origmatchfn(lf)
1734 1742
1735 1743 m.matchfn = lfmatchfn
1736 1744 origbadfn = m.bad
1737 1745
1738 1746 def lfbadfn(f, msg):
1739 1747 if not f in notbad:
1740 1748 origbadfn(f, msg)
1741 1749
1742 1750 m.bad = lfbadfn
1743 1751
1744 1752 origvisitdirfn = m.visitdir
1745 1753
1746 1754 def lfvisitdirfn(dir):
1747 1755 if dir == lfutil.shortname:
1748 1756 return True
1749 1757 ret = origvisitdirfn(dir)
1750 1758 if ret:
1751 1759 return ret
1752 1760 lf = lfutil.splitstandin(dir)
1753 1761 if lf is None:
1754 1762 return False
1755 1763 return origvisitdirfn(lf)
1756 1764
1757 1765 m.visitdir = lfvisitdirfn
1758 1766
1759 1767 for f in ctx.walk(m):
1760 1768 with cmdutil.makefileobj(ctx, opts.get('output'), pathname=f) as fp:
1761 1769 lf = lfutil.splitstandin(f)
1762 1770 if lf is None or origmatchfn(f):
1763 1771 # duplicating unreachable code from commands.cat
1764 1772 data = ctx[f].data()
1765 1773 if opts.get('decode'):
1766 1774 data = repo.wwritedata(f, data)
1767 1775 fp.write(data)
1768 1776 else:
1769 1777 hash = lfutil.readasstandin(ctx[f])
1770 1778 if not lfutil.inusercache(repo.ui, hash):
1771 1779 store = storefactory.openstore(repo)
1772 1780 success, missing = store.get([(lf, hash)])
1773 1781 if len(success) != 1:
1774 1782 raise error.Abort(
1775 1783 _(
1776 1784 b'largefile %s is not in cache and could not be '
1777 1785 b'downloaded'
1778 1786 )
1779 1787 % lf
1780 1788 )
1781 1789 path = lfutil.usercachepath(repo.ui, hash)
1782 1790 with open(path, b"rb") as fpin:
1783 1791 for chunk in util.filechunkiter(fpin):
1784 1792 fp.write(chunk)
1785 1793 err = 0
1786 1794 return err
1787 1795
1788 1796
1789 1797 @eh.wrapfunction(merge, '_update')
1790 1798 def mergeupdate(orig, repo, node, branchmerge, force, *args, **kwargs):
1791 1799 matcher = kwargs.get('matcher', None)
1792 1800 # note if this is a partial update
1793 1801 partial = matcher and not matcher.always()
1794 1802 with repo.wlock(), repo.dirstate.changing_parents(repo):
1795 1803 # branch | | |
1796 1804 # merge | force | partial | action
1797 1805 # -------+-------+---------+--------------
1798 1806 # x | x | x | linear-merge
1799 1807 # o | x | x | branch-merge
1800 1808 # x | o | x | overwrite (as clean update)
1801 1809 # o | o | x | force-branch-merge (*1)
1802 1810 # x | x | o | (*)
1803 1811 # o | x | o | (*)
1804 1812 # x | o | o | overwrite (as revert)
1805 1813 # o | o | o | (*)
1806 1814 #
1807 1815 # (*) don't care
1808 1816 # (*1) deprecated, but used internally (e.g: "rebase --collapse")
1809 1817 with repo.dirstate.running_status(repo):
1810 1818 lfdirstate = lfutil.openlfdirstate(repo.ui, repo)
1811 1819 unsure, s, mtime_boundary = lfdirstate.status(
1812 1820 matchmod.always(),
1813 1821 subrepos=[],
1814 1822 ignored=False,
1815 1823 clean=True,
1816 1824 unknown=False,
1817 1825 )
1818 1826 oldclean = set(s.clean)
1819 1827 pctx = repo[b'.']
1820 1828 dctx = repo[node]
1821 1829 for lfile in unsure + s.modified:
1822 1830 lfileabs = repo.wvfs.join(lfile)
1823 1831 if not repo.wvfs.exists(lfileabs):
1824 1832 continue
1825 1833 lfhash = lfutil.hashfile(lfileabs)
1826 1834 standin = lfutil.standin(lfile)
1827 1835 lfutil.writestandin(
1828 1836 repo, standin, lfhash, lfutil.getexecutable(lfileabs)
1829 1837 )
1830 1838 if standin in pctx and lfhash == lfutil.readasstandin(
1831 1839 pctx[standin]
1832 1840 ):
1833 1841 oldclean.add(lfile)
1834 1842 for lfile in s.added:
1835 1843 fstandin = lfutil.standin(lfile)
1836 1844 if fstandin not in dctx:
1837 1845 # in this case, content of standin file is meaningless
1838 1846 # (in dctx, lfile is unknown, or normal file)
1839 1847 continue
1840 1848 lfutil.updatestandin(repo, lfile, fstandin)
1841 1849 # mark all clean largefiles as dirty, just in case the update gets
1842 1850 # interrupted before largefiles and lfdirstate are synchronized
1843 1851 for lfile in oldclean:
1844 1852 entry = lfdirstate.get_entry(lfile)
1845 1853 lfdirstate.hacky_extension_update_file(
1846 1854 lfile,
1847 1855 wc_tracked=entry.tracked,
1848 1856 p1_tracked=entry.p1_tracked,
1849 1857 p2_info=entry.p2_info,
1850 1858 possibly_dirty=True,
1851 1859 )
1852 1860 lfdirstate.write(repo.currenttransaction())
1853 1861
1854 1862 oldstandins = lfutil.getstandinsstate(repo)
1855 1863 wc = kwargs.get('wc')
1856 1864 if wc and wc.isinmemory():
1857 1865 # largefiles is not a good candidate for in-memory merge (large
1858 1866 # files, custom dirstate, matcher usage).
1859 1867 raise error.ProgrammingError(
1860 1868 b'largefiles is not compatible with in-memory merge'
1861 1869 )
1862 1870 result = orig(repo, node, branchmerge, force, *args, **kwargs)
1863 1871
1864 1872 newstandins = lfutil.getstandinsstate(repo)
1865 1873 filelist = lfutil.getlfilestoupdate(oldstandins, newstandins)
1866 1874
1867 1875 # to avoid leaving all largefiles as dirty and thus rehash them, mark
1868 1876 # all the ones that didn't change as clean
1869 1877 for lfile in oldclean.difference(filelist):
1870 1878 lfdirstate.update_file(lfile, p1_tracked=True, wc_tracked=True)
1871 1879
1872 1880 if branchmerge or force or partial:
1873 1881 filelist.extend(s.deleted + s.removed)
1874 1882
1875 1883 lfcommands.updatelfiles(
1876 1884 repo.ui, repo, filelist=filelist, normallookup=partial
1877 1885 )
1878 1886
1879 1887 return result
1880 1888
1881 1889
1882 1890 @eh.wrapfunction(scmutil, 'marktouched')
1883 1891 def scmutilmarktouched(orig, repo, files, *args, **kwargs):
1884 1892 result = orig(repo, files, *args, **kwargs)
1885 1893
1886 1894 filelist = []
1887 1895 for f in files:
1888 1896 lf = lfutil.splitstandin(f)
1889 1897 if lf is not None:
1890 1898 filelist.append(lf)
1891 1899 if filelist:
1892 1900 lfcommands.updatelfiles(
1893 1901 repo.ui,
1894 1902 repo,
1895 1903 filelist=filelist,
1896 1904 printmessage=False,
1897 1905 normallookup=True,
1898 1906 )
1899 1907
1900 1908 return result
1901 1909
1902 1910
1903 1911 @eh.wrapfunction(upgrade_actions, 'preservedrequirements')
1904 1912 @eh.wrapfunction(upgrade_actions, 'supporteddestrequirements')
1905 1913 def upgraderequirements(orig, repo):
1906 1914 reqs = orig(repo)
1907 1915 if b'largefiles' in repo.requirements:
1908 1916 reqs.add(b'largefiles')
1909 1917 return reqs
1910 1918
1911 1919
1912 1920 _lfscheme = b'largefile://'
1913 1921
1914 1922
1915 1923 @eh.wrapfunction(urlmod, 'open')
1916 1924 def openlargefile(orig, ui, url_, data=None, **kwargs):
1917 1925 if url_.startswith(_lfscheme):
1918 1926 if data:
1919 1927 msg = b"cannot use data on a 'largefile://' url"
1920 1928 raise error.ProgrammingError(msg)
1921 1929 lfid = url_[len(_lfscheme) :]
1922 1930 return storefactory.getlfile(ui, lfid)
1923 1931 else:
1924 1932 return orig(ui, url_, data=data, **kwargs)
@@ -1,1706 +1,1712 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Olivia Mackall <olivia@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import bisect
10 10 import copy
11 11 import itertools
12 12 import os
13 13 import re
14 14
15 15 from .i18n import _
16 16 from .pycompat import open
17 17 from . import (
18 18 encoding,
19 19 error,
20 20 pathutil,
21 21 policy,
22 22 pycompat,
23 23 util,
24 24 )
25 25 from .utils import stringutil
26 26
27 27 rustmod = policy.importrust('dirstate')
28 28
29 29 allpatternkinds = (
30 30 b're',
31 31 b'glob',
32 32 b'path',
33 33 b'filepath',
34 34 b'relglob',
35 35 b'relpath',
36 36 b'relre',
37 37 b'rootglob',
38 38 b'listfile',
39 39 b'listfile0',
40 40 b'set',
41 41 b'include',
42 42 b'subinclude',
43 43 b'rootfilesin',
44 44 )
45 45 cwdrelativepatternkinds = (b'relpath', b'glob')
46 46
47 47 propertycache = util.propertycache
48 48
49 49
50 50 def _rematcher(regex):
51 51 """compile the regexp with the best available regexp engine and return a
52 52 matcher function"""
53 53 m = util.re.compile(regex)
54 54 try:
55 55 # slightly faster, provided by facebook's re2 bindings
56 56 return m.test_match
57 57 except AttributeError:
58 58 return m.match
59 59
60 60
61 61 def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):
62 62 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
63 63 matchers = []
64 64 other = []
65 65
66 66 for kind, pat, source in kindpats:
67 67 if kind == b'set':
68 68 if ctx is None:
69 69 raise error.ProgrammingError(
70 70 b"fileset expression with no context"
71 71 )
72 72 matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))
73 73
74 74 if listsubrepos:
75 75 for subpath in ctx.substate:
76 76 sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)
77 77 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
78 78 matchers.append(pm)
79 79
80 80 continue
81 81 other.append((kind, pat, source))
82 82 return matchers, other
83 83
84 84
85 85 def _expandsubinclude(kindpats, root):
86 86 """Returns the list of subinclude matcher args and the kindpats without the
87 87 subincludes in it."""
88 88 relmatchers = []
89 89 other = []
90 90
91 91 for kind, pat, source in kindpats:
92 92 if kind == b'subinclude':
93 93 sourceroot = pathutil.dirname(util.normpath(source))
94 94 pat = util.pconvert(pat)
95 95 path = pathutil.join(sourceroot, pat)
96 96
97 97 newroot = pathutil.dirname(path)
98 98 matcherargs = (newroot, b'', [], [b'include:%s' % path])
99 99
100 100 prefix = pathutil.canonpath(root, root, newroot)
101 101 if prefix:
102 102 prefix += b'/'
103 103 relmatchers.append((prefix, matcherargs))
104 104 else:
105 105 other.append((kind, pat, source))
106 106
107 107 return relmatchers, other
108 108
109 109
110 110 def _kindpatsalwaysmatch(kindpats):
111 111 """Checks whether the kindspats match everything, as e.g.
112 112 'relpath:.' does.
113 113 """
114 114 for kind, pat, source in kindpats:
115 115 if pat != b'' or kind not in [b'relpath', b'glob']:
116 116 return False
117 117 return True
118 118
119 119
120 120 def _buildkindpatsmatcher(
121 121 matchercls,
122 122 root,
123 123 cwd,
124 124 kindpats,
125 125 ctx=None,
126 126 listsubrepos=False,
127 127 badfn=None,
128 128 ):
129 129 matchers = []
130 130 fms, kindpats = _expandsets(
131 131 cwd,
132 132 kindpats,
133 133 ctx=ctx,
134 134 listsubrepos=listsubrepos,
135 135 badfn=badfn,
136 136 )
137 137 if kindpats:
138 138 m = matchercls(root, kindpats, badfn=badfn)
139 139 matchers.append(m)
140 140 if fms:
141 141 matchers.extend(fms)
142 142 if not matchers:
143 143 return nevermatcher(badfn=badfn)
144 144 if len(matchers) == 1:
145 145 return matchers[0]
146 146 return unionmatcher(matchers)
147 147
148 148
149 149 def match(
150 150 root,
151 151 cwd,
152 152 patterns=None,
153 153 include=None,
154 154 exclude=None,
155 155 default=b'glob',
156 156 auditor=None,
157 157 ctx=None,
158 158 listsubrepos=False,
159 159 warn=None,
160 160 badfn=None,
161 161 icasefs=False,
162 162 ):
163 163 r"""build an object to match a set of file patterns
164 164
165 165 arguments:
166 166 root - the canonical root of the tree you're matching against
167 167 cwd - the current working directory, if relevant
168 168 patterns - patterns to find
169 169 include - patterns to include (unless they are excluded)
170 170 exclude - patterns to exclude (even if they are included)
171 171 default - if a pattern in patterns has no explicit type, assume this one
172 172 auditor - optional path auditor
173 173 ctx - optional changecontext
174 174 listsubrepos - if True, recurse into subrepositories
175 175 warn - optional function used for printing warnings
176 176 badfn - optional bad() callback for this matcher instead of the default
177 177 icasefs - make a matcher for wdir on case insensitive filesystems, which
178 178 normalizes the given patterns to the case in the filesystem
179 179
180 180 a pattern is one of:
181 181 'glob:<glob>' - a glob relative to cwd
182 182 're:<regexp>' - a regular expression
183 183 'path:<path>' - a path relative to repository root, which is matched
184 184 recursively
185 185 'filepath:<path>' - an exact path to a single file, relative to the
186 186 repository root
187 187 'rootfilesin:<path>' - a path relative to repository root, which is
188 188 matched non-recursively (will not match subdirectories)
189 189 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
190 190 'relpath:<path>' - a path relative to cwd
191 191 'relre:<regexp>' - a regexp that needn't match the start of a name
192 192 'set:<fileset>' - a fileset expression
193 193 'include:<path>' - a file of patterns to read and include
194 194 'subinclude:<path>' - a file of patterns to match against files under
195 195 the same directory
196 196 '<something>' - a pattern of the specified default type
197 197
198 198 >>> def _match(root, *args, **kwargs):
199 199 ... return match(util.localpath(root), *args, **kwargs)
200 200
201 201 Usually a patternmatcher is returned:
202 202 >>> _match(b'/foo', b'.', [br're:.*\.c$', b'path:foo/a', b'*.py'])
203 203 <patternmatcher patterns='[^/]*\\.py$|foo/a(?:/|$)|.*\\.c$'>
204 204
205 205 Combining 'patterns' with 'include' (resp. 'exclude') gives an
206 206 intersectionmatcher (resp. a differencematcher):
207 207 >>> type(_match(b'/foo', b'.', [br're:.*\.c$'], include=[b'path:lib']))
208 208 <class 'mercurial.match.intersectionmatcher'>
209 209 >>> type(_match(b'/foo', b'.', [br're:.*\.c$'], exclude=[b'path:build']))
210 210 <class 'mercurial.match.differencematcher'>
211 211
212 212 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
213 213 >>> _match(b'/foo', b'.', [])
214 214 <alwaysmatcher>
215 215
216 216 The 'default' argument determines which kind of pattern is assumed if a
217 217 pattern has no prefix:
218 218 >>> _match(b'/foo', b'.', [br'.*\.c$'], default=b're')
219 219 <patternmatcher patterns='.*\\.c$'>
220 220 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
221 221 <patternmatcher patterns='main\\.py(?:/|$)'>
222 222 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
223 223 <patternmatcher patterns='main.py'>
224 224
225 225 The primary use of matchers is to check whether a value (usually a file
226 226 name) matches againset one of the patterns given at initialization. There
227 227 are two ways of doing this check.
228 228
229 229 >>> m = _match(b'/foo', b'', [br're:.*\.c$', b'relpath:a'])
230 230
231 231 1. Calling the matcher with a file name returns True if any pattern
232 232 matches that file name:
233 233 >>> m(b'a')
234 234 True
235 235 >>> m(b'main.c')
236 236 True
237 237 >>> m(b'test.py')
238 238 False
239 239
240 240 2. Using the exact() method only returns True if the file name matches one
241 241 of the exact patterns (i.e. not re: or glob: patterns):
242 242 >>> m.exact(b'a')
243 243 True
244 244 >>> m.exact(b'main.c')
245 245 False
246 246 """
247 247 assert os.path.isabs(root)
248 248 cwd = os.path.join(root, util.localpath(cwd))
249 249 normalize = _donormalize
250 250 if icasefs:
251 251 dirstate = ctx.repo().dirstate
252 252 dsnormalize = dirstate.normalize
253 253
254 254 def normalize(patterns, default, root, cwd, auditor, warn):
255 255 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
256 256 kindpats = []
257 257 for kind, pats, source in kp:
258 258 if kind not in (b're', b'relre'): # regex can't be normalized
259 259 p = pats
260 260 pats = dsnormalize(pats)
261 261
262 262 # Preserve the original to handle a case only rename.
263 263 if p != pats and p in dirstate:
264 264 kindpats.append((kind, p, source))
265 265
266 266 kindpats.append((kind, pats, source))
267 267 return kindpats
268 268
269 269 if patterns:
270 270 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
271 271 if _kindpatsalwaysmatch(kindpats):
272 272 m = alwaysmatcher(badfn)
273 273 else:
274 274 m = _buildkindpatsmatcher(
275 275 patternmatcher,
276 276 root,
277 277 cwd,
278 278 kindpats,
279 279 ctx=ctx,
280 280 listsubrepos=listsubrepos,
281 281 badfn=badfn,
282 282 )
283 283 else:
284 284 # It's a little strange that no patterns means to match everything.
285 285 # Consider changing this to match nothing (probably using nevermatcher).
286 286 m = alwaysmatcher(badfn)
287 287
288 288 if include:
289 289 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
290 290 im = _buildkindpatsmatcher(
291 291 includematcher,
292 292 root,
293 293 cwd,
294 294 kindpats,
295 295 ctx=ctx,
296 296 listsubrepos=listsubrepos,
297 297 badfn=None,
298 298 )
299 299 m = intersectmatchers(m, im)
300 300 if exclude:
301 301 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
302 302 em = _buildkindpatsmatcher(
303 303 includematcher,
304 304 root,
305 305 cwd,
306 306 kindpats,
307 307 ctx=ctx,
308 308 listsubrepos=listsubrepos,
309 309 badfn=None,
310 310 )
311 311 m = differencematcher(m, em)
312 312 return m
313 313
314 314
315 315 def exact(files, badfn=None):
316 316 return exactmatcher(files, badfn=badfn)
317 317
318 318
319 319 def always(badfn=None):
320 320 return alwaysmatcher(badfn)
321 321
322 322
323 323 def never(badfn=None):
324 324 return nevermatcher(badfn)
325 325
326 326
327 327 def badmatch(match, badfn):
328 328 """Make a copy of the given matcher, replacing its bad method with the given
329 329 one.
330 330 """
331 331 m = copy.copy(match)
332 332 m.bad = badfn
333 333 return m
334 334
335 335
336 336 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
337 337 """Convert 'kind:pat' from the patterns list to tuples with kind and
338 338 normalized and rooted patterns and with listfiles expanded."""
339 339 kindpats = []
340 340 kinds_to_normalize = (
341 341 b'relglob',
342 342 b'path',
343 343 b'filepath',
344 344 b'rootfilesin',
345 345 b'rootglob',
346 346 )
347 347
348 348 for kind, pat in [_patsplit(p, default) for p in patterns]:
349 349 if kind in cwdrelativepatternkinds:
350 350 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
351 351 elif kind in kinds_to_normalize:
352 352 pat = util.normpath(pat)
353 353 elif kind in (b'listfile', b'listfile0'):
354 354 try:
355 355 files = util.readfile(pat)
356 356 if kind == b'listfile0':
357 357 files = files.split(b'\0')
358 358 else:
359 359 files = files.splitlines()
360 360 files = [f for f in files if f]
361 361 except EnvironmentError:
362 362 raise error.Abort(_(b"unable to read file list (%s)") % pat)
363 363 for k, p, source in _donormalize(
364 364 files, default, root, cwd, auditor, warn
365 365 ):
366 366 kindpats.append((k, p, pat))
367 367 continue
368 368 elif kind == b'include':
369 369 try:
370 370 fullpath = os.path.join(root, util.localpath(pat))
371 371 includepats = readpatternfile(fullpath, warn)
372 372 for k, p, source in _donormalize(
373 373 includepats, default, root, cwd, auditor, warn
374 374 ):
375 375 kindpats.append((k, p, source or pat))
376 376 except error.Abort as inst:
377 377 raise error.Abort(
378 378 b'%s: %s'
379 379 % (
380 380 pat,
381 381 inst.message,
382 382 )
383 383 )
384 384 except IOError as inst:
385 385 if warn:
386 386 warn(
387 387 _(b"skipping unreadable pattern file '%s': %s\n")
388 388 % (pat, stringutil.forcebytestr(inst.strerror))
389 389 )
390 390 continue
391 391 # else: re or relre - which cannot be normalized
392 392 kindpats.append((kind, pat, b''))
393 393 return kindpats
394 394
395 395
396 396 class basematcher:
397 397 def __init__(self, badfn=None):
398 self._was_tampered_with = False
398 399 if badfn is not None:
399 400 self.bad = badfn
400 401
402 def was_tampered_with(self):
403 # [_was_tampered_with] is used to track if when extensions changed the matcher
404 # behavior (crazy stuff!), so we disable the rust fast path.
405 return self._was_tampered_with
406
401 407 def __call__(self, fn):
402 408 return self.matchfn(fn)
403 409
404 410 # Callbacks related to how the matcher is used by dirstate.walk.
405 411 # Subscribers to these events must monkeypatch the matcher object.
406 412 def bad(self, f, msg):
407 413 """Callback from dirstate.walk for each explicit file that can't be
408 414 found/accessed, with an error message."""
409 415
410 416 # If an traversedir is set, it will be called when a directory discovered
411 417 # by recursive traversal is visited.
412 418 traversedir = None
413 419
414 420 @propertycache
415 421 def _files(self):
416 422 return []
417 423
418 424 def files(self):
419 425 """Explicitly listed files or patterns or roots:
420 426 if no patterns or .always(): empty list,
421 427 if exact: list exact files,
422 428 if not .anypats(): list all files and dirs,
423 429 else: optimal roots"""
424 430 return self._files
425 431
426 432 @propertycache
427 433 def _fileset(self):
428 434 return set(self._files)
429 435
430 436 def exact(self, f):
431 437 '''Returns True if f is in .files().'''
432 438 return f in self._fileset
433 439
434 440 def matchfn(self, f):
435 441 return False
436 442
437 443 def visitdir(self, dir):
438 444 """Decides whether a directory should be visited based on whether it
439 445 has potential matches in it or one of its subdirectories. This is
440 446 based on the match's primary, included, and excluded patterns.
441 447
442 448 Returns the string 'all' if the given directory and all subdirectories
443 449 should be visited. Otherwise returns True or False indicating whether
444 450 the given directory should be visited.
445 451 """
446 452 return True
447 453
448 454 def visitchildrenset(self, dir):
449 455 """Decides whether a directory should be visited based on whether it
450 456 has potential matches in it or one of its subdirectories, and
451 457 potentially lists which subdirectories of that directory should be
452 458 visited. This is based on the match's primary, included, and excluded
453 459 patterns.
454 460
455 461 This function is very similar to 'visitdir', and the following mapping
456 462 can be applied:
457 463
458 464 visitdir | visitchildrenlist
459 465 ----------+-------------------
460 466 False | set()
461 467 'all' | 'all'
462 468 True | 'this' OR non-empty set of subdirs -or files- to visit
463 469
464 470 Example:
465 471 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
466 472 the following values (assuming the implementation of visitchildrenset
467 473 is capable of recognizing this; some implementations are not).
468 474
469 475 '' -> {'foo', 'qux'}
470 476 'baz' -> set()
471 477 'foo' -> {'bar'}
472 478 # Ideally this would be 'all', but since the prefix nature of matchers
473 479 # is applied to the entire matcher, we have to downgrade this to
474 480 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
475 481 # in.
476 482 'foo/bar' -> 'this'
477 483 'qux' -> 'this'
478 484
479 485 Important:
480 486 Most matchers do not know if they're representing files or
481 487 directories. They see ['path:dir/f'] and don't know whether 'f' is a
482 488 file or a directory, so visitchildrenset('dir') for most matchers will
483 489 return {'f'}, but if the matcher knows it's a file (like exactmatcher
484 490 does), it may return 'this'. Do not rely on the return being a set
485 491 indicating that there are no files in this dir to investigate (or
486 492 equivalently that if there are files to investigate in 'dir' that it
487 493 will always return 'this').
488 494 """
489 495 return b'this'
490 496
491 497 def always(self):
492 498 """Matcher will match everything and .files() will be empty --
493 499 optimization might be possible."""
494 500 return False
495 501
496 502 def isexact(self):
497 503 """Matcher will match exactly the list of files in .files() --
498 504 optimization might be possible."""
499 505 return False
500 506
501 507 def prefix(self):
502 508 """Matcher will match the paths in .files() recursively --
503 509 optimization might be possible."""
504 510 return False
505 511
506 512 def anypats(self):
507 513 """None of .always(), .isexact(), and .prefix() is true --
508 514 optimizations will be difficult."""
509 515 return not self.always() and not self.isexact() and not self.prefix()
510 516
511 517
512 518 class alwaysmatcher(basematcher):
513 519 '''Matches everything.'''
514 520
515 521 def __init__(self, badfn=None):
516 522 super(alwaysmatcher, self).__init__(badfn)
517 523
518 524 def always(self):
519 525 return True
520 526
521 527 def matchfn(self, f):
522 528 return True
523 529
524 530 def visitdir(self, dir):
525 531 return b'all'
526 532
527 533 def visitchildrenset(self, dir):
528 534 return b'all'
529 535
530 536 def __repr__(self):
531 537 return r'<alwaysmatcher>'
532 538
533 539
534 540 class nevermatcher(basematcher):
535 541 '''Matches nothing.'''
536 542
537 543 def __init__(self, badfn=None):
538 544 super(nevermatcher, self).__init__(badfn)
539 545
540 546 # It's a little weird to say that the nevermatcher is an exact matcher
541 547 # or a prefix matcher, but it seems to make sense to let callers take
542 548 # fast paths based on either. There will be no exact matches, nor any
543 549 # prefixes (files() returns []), so fast paths iterating over them should
544 550 # be efficient (and correct).
545 551 def isexact(self):
546 552 return True
547 553
548 554 def prefix(self):
549 555 return True
550 556
551 557 def visitdir(self, dir):
552 558 return False
553 559
554 560 def visitchildrenset(self, dir):
555 561 return set()
556 562
557 563 def __repr__(self):
558 564 return r'<nevermatcher>'
559 565
560 566
561 567 class predicatematcher(basematcher):
562 568 """A matcher adapter for a simple boolean function"""
563 569
564 570 def __init__(self, predfn, predrepr=None, badfn=None):
565 571 super(predicatematcher, self).__init__(badfn)
566 572 self.matchfn = predfn
567 573 self._predrepr = predrepr
568 574
569 575 @encoding.strmethod
570 576 def __repr__(self):
571 577 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
572 578 self.matchfn
573 579 )
574 580 return b'<predicatenmatcher pred=%s>' % s
575 581
576 582
577 583 def path_or_parents_in_set(path, prefix_set):
578 584 """Returns True if `path` (or any parent of `path`) is in `prefix_set`."""
579 585 l = len(prefix_set)
580 586 if l == 0:
581 587 return False
582 588 if path in prefix_set:
583 589 return True
584 590 # If there's more than 5 paths in prefix_set, it's *probably* quicker to
585 591 # "walk up" the directory hierarchy instead, with the assumption that most
586 592 # directory hierarchies are relatively shallow and hash lookup is cheap.
587 593 if l > 5:
588 594 return any(
589 595 parentdir in prefix_set for parentdir in pathutil.finddirs(path)
590 596 )
591 597
592 598 # FIXME: Ideally we'd never get to this point if this is the case - we'd
593 599 # recognize ourselves as an 'always' matcher and skip this.
594 600 if b'' in prefix_set:
595 601 return True
596 602
597 603 sl = ord(b'/')
598 604
599 605 # We already checked that path isn't in prefix_set exactly, so
600 606 # `path[len(pf)] should never raise IndexError.
601 607 return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)
602 608
603 609
604 610 class patternmatcher(basematcher):
605 611 r"""Matches a set of (kind, pat, source) against a 'root' directory.
606 612
607 613 >>> kindpats = [
608 614 ... (b're', br'.*\.c$', b''),
609 615 ... (b'path', b'foo/a', b''),
610 616 ... (b'relpath', b'b', b''),
611 617 ... (b'glob', b'*.h', b''),
612 618 ... ]
613 619 >>> m = patternmatcher(b'foo', kindpats)
614 620 >>> m(b'main.c') # matches re:.*\.c$
615 621 True
616 622 >>> m(b'b.txt')
617 623 False
618 624 >>> m(b'foo/a') # matches path:foo/a
619 625 True
620 626 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
621 627 False
622 628 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
623 629 True
624 630 >>> m(b'lib.h') # matches glob:*.h
625 631 True
626 632
627 633 >>> m.files()
628 634 [b'', b'foo/a', b'', b'b']
629 635 >>> m.exact(b'foo/a')
630 636 True
631 637 >>> m.exact(b'b')
632 638 True
633 639 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
634 640 False
635 641 """
636 642
637 643 def __init__(self, root, kindpats, badfn=None):
638 644 super(patternmatcher, self).__init__(badfn)
639 645 kindpats.sort()
640 646
641 647 roots, dirs, parents = _rootsdirsandparents(kindpats)
642 648 self._files = _explicitfiles(kindpats)
643 649 self._dirs_explicit = set(dirs)
644 650 self._dirs = parents
645 651 self._prefix = _prefix(kindpats)
646 652 self._pats, self._matchfn = _buildmatch(kindpats, b'$', root)
647 653
648 654 def matchfn(self, fn):
649 655 if fn in self._fileset:
650 656 return True
651 657 return self._matchfn(fn)
652 658
653 659 def visitdir(self, dir):
654 660 if self._prefix and dir in self._fileset:
655 661 return b'all'
656 662 return (
657 663 dir in self._dirs
658 664 or path_or_parents_in_set(dir, self._fileset)
659 665 or path_or_parents_in_set(dir, self._dirs_explicit)
660 666 )
661 667
662 668 def visitchildrenset(self, dir):
663 669 ret = self.visitdir(dir)
664 670 if ret is True:
665 671 return b'this'
666 672 elif not ret:
667 673 return set()
668 674 assert ret == b'all'
669 675 return b'all'
670 676
671 677 def prefix(self):
672 678 return self._prefix
673 679
674 680 @encoding.strmethod
675 681 def __repr__(self):
676 682 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
677 683
678 684
679 685 # This is basically a reimplementation of pathutil.dirs that stores the
680 686 # children instead of just a count of them, plus a small optional optimization
681 687 # to avoid some directories we don't need.
682 688 class _dirchildren:
683 689 def __init__(self, paths, onlyinclude=None):
684 690 self._dirs = {}
685 691 self._onlyinclude = onlyinclude or []
686 692 addpath = self.addpath
687 693 for f in paths:
688 694 addpath(f)
689 695
690 696 def addpath(self, path):
691 697 if path == b'':
692 698 return
693 699 dirs = self._dirs
694 700 findsplitdirs = _dirchildren._findsplitdirs
695 701 for d, b in findsplitdirs(path):
696 702 if d not in self._onlyinclude:
697 703 continue
698 704 dirs.setdefault(d, set()).add(b)
699 705
700 706 @staticmethod
701 707 def _findsplitdirs(path):
702 708 # yields (dirname, basename) tuples, walking back to the root. This is
703 709 # very similar to pathutil.finddirs, except:
704 710 # - produces a (dirname, basename) tuple, not just 'dirname'
705 711 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
706 712 # slash.
707 713 oldpos = len(path)
708 714 pos = path.rfind(b'/')
709 715 while pos != -1:
710 716 yield path[:pos], path[pos + 1 : oldpos]
711 717 oldpos = pos
712 718 pos = path.rfind(b'/', 0, pos)
713 719 yield b'', path[:oldpos]
714 720
715 721 def get(self, path):
716 722 return self._dirs.get(path, set())
717 723
718 724
719 725 class includematcher(basematcher):
720 726 def __init__(self, root, kindpats, badfn=None):
721 727 super(includematcher, self).__init__(badfn)
722 728 if rustmod is not None:
723 729 # We need to pass the patterns to Rust because they can contain
724 730 # patterns from the user interface
725 731 self._kindpats = kindpats
726 732 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
727 733 self._prefix = _prefix(kindpats)
728 734 roots, dirs, parents = _rootsdirsandparents(kindpats)
729 735 # roots are directories which are recursively included.
730 736 self._roots = set(roots)
731 737 # dirs are directories which are non-recursively included.
732 738 self._dirs = set(dirs)
733 739 # parents are directories which are non-recursively included because
734 740 # they are needed to get to items in _dirs or _roots.
735 741 self._parents = parents
736 742
737 743 def visitdir(self, dir):
738 744 if self._prefix and dir in self._roots:
739 745 return b'all'
740 746 return (
741 747 dir in self._dirs
742 748 or dir in self._parents
743 749 or path_or_parents_in_set(dir, self._roots)
744 750 )
745 751
746 752 @propertycache
747 753 def _allparentschildren(self):
748 754 # It may seem odd that we add dirs, roots, and parents, and then
749 755 # restrict to only parents. This is to catch the case of:
750 756 # dirs = ['foo/bar']
751 757 # parents = ['foo']
752 758 # if we asked for the children of 'foo', but had only added
753 759 # self._parents, we wouldn't be able to respond ['bar'].
754 760 return _dirchildren(
755 761 itertools.chain(self._dirs, self._roots, self._parents),
756 762 onlyinclude=self._parents,
757 763 )
758 764
759 765 def visitchildrenset(self, dir):
760 766 if self._prefix and dir in self._roots:
761 767 return b'all'
762 768 # Note: this does *not* include the 'dir in self._parents' case from
763 769 # visitdir, that's handled below.
764 770 if (
765 771 b'' in self._roots
766 772 or dir in self._dirs
767 773 or path_or_parents_in_set(dir, self._roots)
768 774 ):
769 775 return b'this'
770 776
771 777 if dir in self._parents:
772 778 return self._allparentschildren.get(dir) or set()
773 779 return set()
774 780
775 781 @encoding.strmethod
776 782 def __repr__(self):
777 783 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
778 784
779 785
780 786 class exactmatcher(basematcher):
781 787 r"""Matches the input files exactly. They are interpreted as paths, not
782 788 patterns (so no kind-prefixes).
783 789
784 790 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
785 791 >>> m(b'a.txt')
786 792 True
787 793 >>> m(b'b.txt')
788 794 False
789 795
790 796 Input files that would be matched are exactly those returned by .files()
791 797 >>> m.files()
792 798 ['a.txt', 're:.*\\.c$']
793 799
794 800 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
795 801 >>> m(b'main.c')
796 802 False
797 803 >>> m(br're:.*\.c$')
798 804 True
799 805 """
800 806
801 807 def __init__(self, files, badfn=None):
802 808 super(exactmatcher, self).__init__(badfn)
803 809
804 810 if isinstance(files, list):
805 811 self._files = files
806 812 else:
807 813 self._files = list(files)
808 814
809 815 matchfn = basematcher.exact
810 816
811 817 @propertycache
812 818 def _dirs(self):
813 819 return set(pathutil.dirs(self._fileset))
814 820
815 821 def visitdir(self, dir):
816 822 return dir in self._dirs
817 823
818 824 @propertycache
819 825 def _visitchildrenset_candidates(self):
820 826 """A memoized set of candidates for visitchildrenset."""
821 827 return self._fileset | self._dirs - {b''}
822 828
823 829 @propertycache
824 830 def _sorted_visitchildrenset_candidates(self):
825 831 """A memoized sorted list of candidates for visitchildrenset."""
826 832 return sorted(self._visitchildrenset_candidates)
827 833
828 834 def visitchildrenset(self, dir):
829 835 if not self._fileset or dir not in self._dirs:
830 836 return set()
831 837
832 838 if dir == b'':
833 839 candidates = self._visitchildrenset_candidates
834 840 else:
835 841 candidates = self._sorted_visitchildrenset_candidates
836 842 d = dir + b'/'
837 843 # Use bisect to find the first element potentially starting with d
838 844 # (i.e. >= d). This should always find at least one element (we'll
839 845 # assert later if this is not the case).
840 846 first = bisect.bisect_left(candidates, d)
841 847 # We need a representation of the first element that is > d that
842 848 # does not start with d, so since we added a `/` on the end of dir,
843 849 # we'll add whatever comes after slash (we could probably assume
844 850 # that `0` is after `/`, but let's not) to the end of dir instead.
845 851 dnext = dir + encoding.strtolocal(chr(ord(b'/') + 1))
846 852 # Use bisect to find the first element >= d_next
847 853 last = bisect.bisect_left(candidates, dnext, lo=first)
848 854 dlen = len(d)
849 855 candidates = {c[dlen:] for c in candidates[first:last]}
850 856 # self._dirs includes all of the directories, recursively, so if
851 857 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
852 858 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
853 859 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
854 860 # immediate subdir will be in there without a slash.
855 861 ret = {c for c in candidates if b'/' not in c}
856 862 # We really do not expect ret to be empty, since that would imply that
857 863 # there's something in _dirs that didn't have a file in _fileset.
858 864 assert ret
859 865 return ret
860 866
861 867 def isexact(self):
862 868 return True
863 869
864 870 @encoding.strmethod
865 871 def __repr__(self):
866 872 return b'<exactmatcher files=%r>' % self._files
867 873
868 874
869 875 class differencematcher(basematcher):
870 876 """Composes two matchers by matching if the first matches and the second
871 877 does not.
872 878
873 879 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
874 880 """
875 881
876 882 def __init__(self, m1, m2):
877 883 super(differencematcher, self).__init__()
878 884 self._m1 = m1
879 885 self._m2 = m2
880 886 self.bad = m1.bad
881 887 self.traversedir = m1.traversedir
882 888
883 889 def matchfn(self, f):
884 890 return self._m1(f) and not self._m2(f)
885 891
886 892 @propertycache
887 893 def _files(self):
888 894 if self.isexact():
889 895 return [f for f in self._m1.files() if self(f)]
890 896 # If m1 is not an exact matcher, we can't easily figure out the set of
891 897 # files, because its files() are not always files. For example, if
892 898 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
893 899 # want to remove "dir" from the set even though it would match m2,
894 900 # because the "dir" in m1 may not be a file.
895 901 return self._m1.files()
896 902
897 903 def visitdir(self, dir):
898 904 if self._m2.visitdir(dir) == b'all':
899 905 return False
900 906 elif not self._m2.visitdir(dir):
901 907 # m2 does not match dir, we can return 'all' here if possible
902 908 return self._m1.visitdir(dir)
903 909 return bool(self._m1.visitdir(dir))
904 910
905 911 def visitchildrenset(self, dir):
906 912 m2_set = self._m2.visitchildrenset(dir)
907 913 if m2_set == b'all':
908 914 return set()
909 915 m1_set = self._m1.visitchildrenset(dir)
910 916 # Possible values for m1: 'all', 'this', set(...), set()
911 917 # Possible values for m2: 'this', set(...), set()
912 918 # If m2 has nothing under here that we care about, return m1, even if
913 919 # it's 'all'. This is a change in behavior from visitdir, which would
914 920 # return True, not 'all', for some reason.
915 921 if not m2_set:
916 922 return m1_set
917 923 if m1_set in [b'all', b'this']:
918 924 # Never return 'all' here if m2_set is any kind of non-empty (either
919 925 # 'this' or set(foo)), since m2 might return set() for a
920 926 # subdirectory.
921 927 return b'this'
922 928 # Possible values for m1: set(...), set()
923 929 # Possible values for m2: 'this', set(...)
924 930 # We ignore m2's set results. They're possibly incorrect:
925 931 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
926 932 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
927 933 # return set(), which is *not* correct, we still need to visit 'dir'!
928 934 return m1_set
929 935
930 936 def isexact(self):
931 937 return self._m1.isexact()
932 938
933 939 @encoding.strmethod
934 940 def __repr__(self):
935 941 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
936 942
937 943
938 944 def intersectmatchers(m1, m2):
939 945 """Composes two matchers by matching if both of them match.
940 946
941 947 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
942 948 """
943 949 if m1 is None or m2 is None:
944 950 return m1 or m2
945 951 if m1.always():
946 952 m = copy.copy(m2)
947 953 # TODO: Consider encapsulating these things in a class so there's only
948 954 # one thing to copy from m1.
949 955 m.bad = m1.bad
950 956 m.traversedir = m1.traversedir
951 957 return m
952 958 if m2.always():
953 959 m = copy.copy(m1)
954 960 return m
955 961 return intersectionmatcher(m1, m2)
956 962
957 963
958 964 class intersectionmatcher(basematcher):
959 965 def __init__(self, m1, m2):
960 966 super(intersectionmatcher, self).__init__()
961 967 self._m1 = m1
962 968 self._m2 = m2
963 969 self.bad = m1.bad
964 970 self.traversedir = m1.traversedir
965 971
966 972 @propertycache
967 973 def _files(self):
968 974 if self.isexact():
969 975 m1, m2 = self._m1, self._m2
970 976 if not m1.isexact():
971 977 m1, m2 = m2, m1
972 978 return [f for f in m1.files() if m2(f)]
973 979 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
974 980 # the set of files, because their files() are not always files. For
975 981 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
976 982 # "path:dir2", we don't want to remove "dir2" from the set.
977 983 return self._m1.files() + self._m2.files()
978 984
979 985 def matchfn(self, f):
980 986 return self._m1(f) and self._m2(f)
981 987
982 988 def visitdir(self, dir):
983 989 visit1 = self._m1.visitdir(dir)
984 990 if visit1 == b'all':
985 991 return self._m2.visitdir(dir)
986 992 # bool() because visit1=True + visit2='all' should not be 'all'
987 993 return bool(visit1 and self._m2.visitdir(dir))
988 994
989 995 def visitchildrenset(self, dir):
990 996 m1_set = self._m1.visitchildrenset(dir)
991 997 if not m1_set:
992 998 return set()
993 999 m2_set = self._m2.visitchildrenset(dir)
994 1000 if not m2_set:
995 1001 return set()
996 1002
997 1003 if m1_set == b'all':
998 1004 return m2_set
999 1005 elif m2_set == b'all':
1000 1006 return m1_set
1001 1007
1002 1008 if m1_set == b'this' or m2_set == b'this':
1003 1009 return b'this'
1004 1010
1005 1011 assert isinstance(m1_set, set) and isinstance(m2_set, set)
1006 1012 return m1_set.intersection(m2_set)
1007 1013
1008 1014 def always(self):
1009 1015 return self._m1.always() and self._m2.always()
1010 1016
1011 1017 def isexact(self):
1012 1018 return self._m1.isexact() or self._m2.isexact()
1013 1019
1014 1020 @encoding.strmethod
1015 1021 def __repr__(self):
1016 1022 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
1017 1023
1018 1024
1019 1025 class subdirmatcher(basematcher):
1020 1026 """Adapt a matcher to work on a subdirectory only.
1021 1027
1022 1028 The paths are remapped to remove/insert the path as needed:
1023 1029
1024 1030 >>> from . import pycompat
1025 1031 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)
1026 1032 >>> m2 = subdirmatcher(b'sub', m1)
1027 1033 >>> m2(b'a.txt')
1028 1034 False
1029 1035 >>> m2(b'b.txt')
1030 1036 True
1031 1037 >>> m2.matchfn(b'a.txt')
1032 1038 False
1033 1039 >>> m2.matchfn(b'b.txt')
1034 1040 True
1035 1041 >>> m2.files()
1036 1042 ['b.txt']
1037 1043 >>> m2.exact(b'b.txt')
1038 1044 True
1039 1045 >>> def bad(f, msg):
1040 1046 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
1041 1047 >>> m1.bad = bad
1042 1048 >>> m2.bad(b'x.txt', b'No such file')
1043 1049 sub/x.txt: No such file
1044 1050 """
1045 1051
1046 1052 def __init__(self, path, matcher):
1047 1053 super(subdirmatcher, self).__init__()
1048 1054 self._path = path
1049 1055 self._matcher = matcher
1050 1056 self._always = matcher.always()
1051 1057
1052 1058 self._files = [
1053 1059 f[len(path) + 1 :]
1054 1060 for f in matcher._files
1055 1061 if f.startswith(path + b"/")
1056 1062 ]
1057 1063
1058 1064 # If the parent repo had a path to this subrepo and the matcher is
1059 1065 # a prefix matcher, this submatcher always matches.
1060 1066 if matcher.prefix():
1061 1067 self._always = any(f == path for f in matcher._files)
1062 1068
1063 1069 def bad(self, f, msg):
1064 1070 self._matcher.bad(self._path + b"/" + f, msg)
1065 1071
1066 1072 def matchfn(self, f):
1067 1073 # Some information is lost in the superclass's constructor, so we
1068 1074 # can not accurately create the matching function for the subdirectory
1069 1075 # from the inputs. Instead, we override matchfn() and visitdir() to
1070 1076 # call the original matcher with the subdirectory path prepended.
1071 1077 return self._matcher.matchfn(self._path + b"/" + f)
1072 1078
1073 1079 def visitdir(self, dir):
1074 1080 if dir == b'':
1075 1081 dir = self._path
1076 1082 else:
1077 1083 dir = self._path + b"/" + dir
1078 1084 return self._matcher.visitdir(dir)
1079 1085
1080 1086 def visitchildrenset(self, dir):
1081 1087 if dir == b'':
1082 1088 dir = self._path
1083 1089 else:
1084 1090 dir = self._path + b"/" + dir
1085 1091 return self._matcher.visitchildrenset(dir)
1086 1092
1087 1093 def always(self):
1088 1094 return self._always
1089 1095
1090 1096 def prefix(self):
1091 1097 return self._matcher.prefix() and not self._always
1092 1098
1093 1099 @encoding.strmethod
1094 1100 def __repr__(self):
1095 1101 return b'<subdirmatcher path=%r, matcher=%r>' % (
1096 1102 self._path,
1097 1103 self._matcher,
1098 1104 )
1099 1105
1100 1106
1101 1107 class prefixdirmatcher(basematcher):
1102 1108 """Adapt a matcher to work on a parent directory.
1103 1109
1104 1110 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1105 1111
1106 1112 The prefix path should usually be the relative path from the root of
1107 1113 this matcher to the root of the wrapped matcher.
1108 1114
1109 1115 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1110 1116 >>> m2 = prefixdirmatcher(b'd/e', m1)
1111 1117 >>> m2(b'a.txt')
1112 1118 False
1113 1119 >>> m2(b'd/e/a.txt')
1114 1120 True
1115 1121 >>> m2(b'd/e/b.txt')
1116 1122 False
1117 1123 >>> m2.files()
1118 1124 ['d/e/a.txt', 'd/e/f/b.txt']
1119 1125 >>> m2.exact(b'd/e/a.txt')
1120 1126 True
1121 1127 >>> m2.visitdir(b'd')
1122 1128 True
1123 1129 >>> m2.visitdir(b'd/e')
1124 1130 True
1125 1131 >>> m2.visitdir(b'd/e/f')
1126 1132 True
1127 1133 >>> m2.visitdir(b'd/e/g')
1128 1134 False
1129 1135 >>> m2.visitdir(b'd/ef')
1130 1136 False
1131 1137 """
1132 1138
1133 1139 def __init__(self, path, matcher, badfn=None):
1134 1140 super(prefixdirmatcher, self).__init__(badfn)
1135 1141 if not path:
1136 1142 raise error.ProgrammingError(b'prefix path must not be empty')
1137 1143 self._path = path
1138 1144 self._pathprefix = path + b'/'
1139 1145 self._matcher = matcher
1140 1146
1141 1147 @propertycache
1142 1148 def _files(self):
1143 1149 return [self._pathprefix + f for f in self._matcher._files]
1144 1150
1145 1151 def matchfn(self, f):
1146 1152 if not f.startswith(self._pathprefix):
1147 1153 return False
1148 1154 return self._matcher.matchfn(f[len(self._pathprefix) :])
1149 1155
1150 1156 @propertycache
1151 1157 def _pathdirs(self):
1152 1158 return set(pathutil.finddirs(self._path))
1153 1159
1154 1160 def visitdir(self, dir):
1155 1161 if dir == self._path:
1156 1162 return self._matcher.visitdir(b'')
1157 1163 if dir.startswith(self._pathprefix):
1158 1164 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1159 1165 return dir in self._pathdirs
1160 1166
1161 1167 def visitchildrenset(self, dir):
1162 1168 if dir == self._path:
1163 1169 return self._matcher.visitchildrenset(b'')
1164 1170 if dir.startswith(self._pathprefix):
1165 1171 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1166 1172 if dir in self._pathdirs:
1167 1173 return b'this'
1168 1174 return set()
1169 1175
1170 1176 def isexact(self):
1171 1177 return self._matcher.isexact()
1172 1178
1173 1179 def prefix(self):
1174 1180 return self._matcher.prefix()
1175 1181
1176 1182 @encoding.strmethod
1177 1183 def __repr__(self):
1178 1184 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1179 1185 pycompat.bytestr(self._path),
1180 1186 self._matcher,
1181 1187 )
1182 1188
1183 1189
1184 1190 class unionmatcher(basematcher):
1185 1191 """A matcher that is the union of several matchers.
1186 1192
1187 1193 The non-matching-attributes (bad, traversedir) are taken from the first
1188 1194 matcher.
1189 1195 """
1190 1196
1191 1197 def __init__(self, matchers):
1192 1198 m1 = matchers[0]
1193 1199 super(unionmatcher, self).__init__()
1194 1200 self.traversedir = m1.traversedir
1195 1201 self._matchers = matchers
1196 1202
1197 1203 def matchfn(self, f):
1198 1204 for match in self._matchers:
1199 1205 if match(f):
1200 1206 return True
1201 1207 return False
1202 1208
1203 1209 def visitdir(self, dir):
1204 1210 r = False
1205 1211 for m in self._matchers:
1206 1212 v = m.visitdir(dir)
1207 1213 if v == b'all':
1208 1214 return v
1209 1215 r |= v
1210 1216 return r
1211 1217
1212 1218 def visitchildrenset(self, dir):
1213 1219 r = set()
1214 1220 this = False
1215 1221 for m in self._matchers:
1216 1222 v = m.visitchildrenset(dir)
1217 1223 if not v:
1218 1224 continue
1219 1225 if v == b'all':
1220 1226 return v
1221 1227 if this or v == b'this':
1222 1228 this = True
1223 1229 # don't break, we might have an 'all' in here.
1224 1230 continue
1225 1231 assert isinstance(v, set)
1226 1232 r = r.union(v)
1227 1233 if this:
1228 1234 return b'this'
1229 1235 return r
1230 1236
1231 1237 @encoding.strmethod
1232 1238 def __repr__(self):
1233 1239 return b'<unionmatcher matchers=%r>' % self._matchers
1234 1240
1235 1241
1236 1242 def patkind(pattern, default=None):
1237 1243 r"""If pattern is 'kind:pat' with a known kind, return kind.
1238 1244
1239 1245 >>> patkind(br're:.*\.c$')
1240 1246 're'
1241 1247 >>> patkind(b'glob:*.c')
1242 1248 'glob'
1243 1249 >>> patkind(b'relpath:test.py')
1244 1250 'relpath'
1245 1251 >>> patkind(b'main.py')
1246 1252 >>> patkind(b'main.py', default=b're')
1247 1253 're'
1248 1254 """
1249 1255 return _patsplit(pattern, default)[0]
1250 1256
1251 1257
1252 1258 def _patsplit(pattern, default):
1253 1259 """Split a string into the optional pattern kind prefix and the actual
1254 1260 pattern."""
1255 1261 if b':' in pattern:
1256 1262 kind, pat = pattern.split(b':', 1)
1257 1263 if kind in allpatternkinds:
1258 1264 return kind, pat
1259 1265 return default, pattern
1260 1266
1261 1267
1262 1268 def _globre(pat):
1263 1269 r"""Convert an extended glob string to a regexp string.
1264 1270
1265 1271 >>> from . import pycompat
1266 1272 >>> def bprint(s):
1267 1273 ... print(pycompat.sysstr(s))
1268 1274 >>> bprint(_globre(br'?'))
1269 1275 .
1270 1276 >>> bprint(_globre(br'*'))
1271 1277 [^/]*
1272 1278 >>> bprint(_globre(br'**'))
1273 1279 .*
1274 1280 >>> bprint(_globre(br'**/a'))
1275 1281 (?:.*/)?a
1276 1282 >>> bprint(_globre(br'a/**/b'))
1277 1283 a/(?:.*/)?b
1278 1284 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1279 1285 [a*?!^][\^b][^c]
1280 1286 >>> bprint(_globre(br'{a,b}'))
1281 1287 (?:a|b)
1282 1288 >>> bprint(_globre(br'.\*\?'))
1283 1289 \.\*\?
1284 1290 """
1285 1291 i, n = 0, len(pat)
1286 1292 res = b''
1287 1293 group = 0
1288 1294 escape = util.stringutil.regexbytesescapemap.get
1289 1295
1290 1296 def peek():
1291 1297 return i < n and pat[i : i + 1]
1292 1298
1293 1299 while i < n:
1294 1300 c = pat[i : i + 1]
1295 1301 i += 1
1296 1302 if c not in b'*?[{},\\':
1297 1303 res += escape(c, c)
1298 1304 elif c == b'*':
1299 1305 if peek() == b'*':
1300 1306 i += 1
1301 1307 if peek() == b'/':
1302 1308 i += 1
1303 1309 res += b'(?:.*/)?'
1304 1310 else:
1305 1311 res += b'.*'
1306 1312 else:
1307 1313 res += b'[^/]*'
1308 1314 elif c == b'?':
1309 1315 res += b'.'
1310 1316 elif c == b'[':
1311 1317 j = i
1312 1318 if j < n and pat[j : j + 1] in b'!]':
1313 1319 j += 1
1314 1320 while j < n and pat[j : j + 1] != b']':
1315 1321 j += 1
1316 1322 if j >= n:
1317 1323 res += b'\\['
1318 1324 else:
1319 1325 stuff = pat[i:j].replace(b'\\', b'\\\\')
1320 1326 i = j + 1
1321 1327 if stuff[0:1] == b'!':
1322 1328 stuff = b'^' + stuff[1:]
1323 1329 elif stuff[0:1] == b'^':
1324 1330 stuff = b'\\' + stuff
1325 1331 res = b'%s[%s]' % (res, stuff)
1326 1332 elif c == b'{':
1327 1333 group += 1
1328 1334 res += b'(?:'
1329 1335 elif c == b'}' and group:
1330 1336 res += b')'
1331 1337 group -= 1
1332 1338 elif c == b',' and group:
1333 1339 res += b'|'
1334 1340 elif c == b'\\':
1335 1341 p = peek()
1336 1342 if p:
1337 1343 i += 1
1338 1344 res += escape(p, p)
1339 1345 else:
1340 1346 res += escape(c, c)
1341 1347 else:
1342 1348 res += escape(c, c)
1343 1349 return res
1344 1350
1345 1351
1346 1352 FLAG_RE = util.re.compile(br'^\(\?([aiLmsux]+)\)(.*)')
1347 1353
1348 1354
1349 1355 def _regex(kind, pat, globsuffix):
1350 1356 """Convert a (normalized) pattern of any kind into a
1351 1357 regular expression.
1352 1358 globsuffix is appended to the regexp of globs."""
1353 1359 if not pat and kind in (b'glob', b'relpath'):
1354 1360 return b''
1355 1361 if kind == b're':
1356 1362 return pat
1357 1363 if kind == b'filepath':
1358 1364 raise error.ProgrammingError(
1359 1365 "'filepath:' patterns should not be converted to a regex"
1360 1366 )
1361 1367 if kind in (b'path', b'relpath'):
1362 1368 if pat == b'.':
1363 1369 return b''
1364 1370 return util.stringutil.reescape(pat) + b'(?:/|$)'
1365 1371 if kind == b'rootfilesin':
1366 1372 if pat == b'.':
1367 1373 escaped = b''
1368 1374 else:
1369 1375 # Pattern is a directory name.
1370 1376 escaped = util.stringutil.reescape(pat) + b'/'
1371 1377 # Anything after the pattern must be a non-directory.
1372 1378 return escaped + b'[^/]+$'
1373 1379 if kind == b'relglob':
1374 1380 globre = _globre(pat)
1375 1381 if globre.startswith(b'[^/]*'):
1376 1382 # When pat has the form *XYZ (common), make the returned regex more
1377 1383 # legible by returning the regex for **XYZ instead of **/*XYZ.
1378 1384 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1379 1385 return b'(?:|.*/)' + globre + globsuffix
1380 1386 if kind == b'relre':
1381 1387 flag = None
1382 1388 m = FLAG_RE.match(pat)
1383 1389 if m:
1384 1390 flag, pat = m.groups()
1385 1391 if not pat.startswith(b'^'):
1386 1392 pat = b'.*' + pat
1387 1393 if flag is not None:
1388 1394 pat = br'(?%s:%s)' % (flag, pat)
1389 1395 return pat
1390 1396 if kind in (b'glob', b'rootglob'):
1391 1397 return _globre(pat) + globsuffix
1392 1398 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1393 1399
1394 1400
1395 1401 def _buildmatch(kindpats, globsuffix, root):
1396 1402 """Return regexp string and a matcher function for kindpats.
1397 1403 globsuffix is appended to the regexp of globs."""
1398 1404 matchfuncs = []
1399 1405
1400 1406 subincludes, kindpats = _expandsubinclude(kindpats, root)
1401 1407 if subincludes:
1402 1408 submatchers = {}
1403 1409
1404 1410 def matchsubinclude(f):
1405 1411 for prefix, matcherargs in subincludes:
1406 1412 if f.startswith(prefix):
1407 1413 mf = submatchers.get(prefix)
1408 1414 if mf is None:
1409 1415 mf = match(*matcherargs)
1410 1416 submatchers[prefix] = mf
1411 1417
1412 1418 if mf(f[len(prefix) :]):
1413 1419 return True
1414 1420 return False
1415 1421
1416 1422 matchfuncs.append(matchsubinclude)
1417 1423
1418 1424 regex = b''
1419 1425 if kindpats:
1420 1426 if all(k == b'rootfilesin' for k, p, s in kindpats):
1421 1427 dirs = {p for k, p, s in kindpats}
1422 1428
1423 1429 def mf(f):
1424 1430 i = f.rfind(b'/')
1425 1431 if i >= 0:
1426 1432 dir = f[:i]
1427 1433 else:
1428 1434 dir = b'.'
1429 1435 return dir in dirs
1430 1436
1431 1437 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1432 1438 matchfuncs.append(mf)
1433 1439 else:
1434 1440 regex, mf = _buildregexmatch(kindpats, globsuffix)
1435 1441 matchfuncs.append(mf)
1436 1442
1437 1443 if len(matchfuncs) == 1:
1438 1444 return regex, matchfuncs[0]
1439 1445 else:
1440 1446 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1441 1447
1442 1448
1443 1449 MAX_RE_SIZE = 20000
1444 1450
1445 1451
1446 1452 def _joinregexes(regexps):
1447 1453 """gather multiple regular expressions into a single one"""
1448 1454 return b'|'.join(regexps)
1449 1455
1450 1456
1451 1457 def _buildregexmatch(kindpats, globsuffix):
1452 1458 """Build a match function from a list of kinds and kindpats,
1453 1459 return regexp string and a matcher function.
1454 1460
1455 1461 Test too large input
1456 1462 >>> _buildregexmatch([
1457 1463 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1458 1464 ... ], b'$')
1459 1465 Traceback (most recent call last):
1460 1466 ...
1461 1467 Abort: matcher pattern is too long (20009 bytes)
1462 1468 """
1463 1469 try:
1464 1470 allgroups = []
1465 1471 regexps = []
1466 1472 exact = set()
1467 1473 for kind, pattern, _source in kindpats:
1468 1474 if kind == b'filepath':
1469 1475 exact.add(pattern)
1470 1476 continue
1471 1477 regexps.append(_regex(kind, pattern, globsuffix))
1472 1478
1473 1479 fullregexp = _joinregexes(regexps)
1474 1480
1475 1481 startidx = 0
1476 1482 groupsize = 0
1477 1483 for idx, r in enumerate(regexps):
1478 1484 piecesize = len(r)
1479 1485 if piecesize > MAX_RE_SIZE:
1480 1486 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1481 1487 raise error.Abort(msg)
1482 1488 elif (groupsize + piecesize) > MAX_RE_SIZE:
1483 1489 group = regexps[startidx:idx]
1484 1490 allgroups.append(_joinregexes(group))
1485 1491 startidx = idx
1486 1492 groupsize = 0
1487 1493 groupsize += piecesize + 1
1488 1494
1489 1495 if startidx == 0:
1490 1496 matcher = _rematcher(fullregexp)
1491 1497 func = lambda s: bool(matcher(s))
1492 1498 else:
1493 1499 group = regexps[startidx:]
1494 1500 allgroups.append(_joinregexes(group))
1495 1501 allmatchers = [_rematcher(g) for g in allgroups]
1496 1502 func = lambda s: any(m(s) for m in allmatchers)
1497 1503
1498 1504 actualfunc = func
1499 1505 if exact:
1500 1506 # An empty regex will always match, so only call the regex if
1501 1507 # there were any actual patterns to match.
1502 1508 if not regexps:
1503 1509 actualfunc = lambda s: s in exact
1504 1510 else:
1505 1511 actualfunc = lambda s: s in exact or func(s)
1506 1512 return fullregexp, actualfunc
1507 1513 except re.error:
1508 1514 for k, p, s in kindpats:
1509 1515 if k == b'filepath':
1510 1516 continue
1511 1517 try:
1512 1518 _rematcher(_regex(k, p, globsuffix))
1513 1519 except re.error:
1514 1520 if s:
1515 1521 raise error.Abort(
1516 1522 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1517 1523 )
1518 1524 else:
1519 1525 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1520 1526 raise error.Abort(_(b"invalid pattern"))
1521 1527
1522 1528
1523 1529 def _patternrootsanddirs(kindpats):
1524 1530 """Returns roots and directories corresponding to each pattern.
1525 1531
1526 1532 This calculates the roots and directories exactly matching the patterns and
1527 1533 returns a tuple of (roots, dirs) for each. It does not return other
1528 1534 directories which may also need to be considered, like the parent
1529 1535 directories.
1530 1536 """
1531 1537 r = []
1532 1538 d = []
1533 1539 for kind, pat, source in kindpats:
1534 1540 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1535 1541 root = []
1536 1542 for p in pat.split(b'/'):
1537 1543 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1538 1544 break
1539 1545 root.append(p)
1540 1546 r.append(b'/'.join(root))
1541 1547 elif kind in (b'relpath', b'path', b'filepath'):
1542 1548 if pat == b'.':
1543 1549 pat = b''
1544 1550 r.append(pat)
1545 1551 elif kind in (b'rootfilesin',):
1546 1552 if pat == b'.':
1547 1553 pat = b''
1548 1554 d.append(pat)
1549 1555 else: # relglob, re, relre
1550 1556 r.append(b'')
1551 1557 return r, d
1552 1558
1553 1559
1554 1560 def _roots(kindpats):
1555 1561 '''Returns root directories to match recursively from the given patterns.'''
1556 1562 roots, dirs = _patternrootsanddirs(kindpats)
1557 1563 return roots
1558 1564
1559 1565
1560 1566 def _rootsdirsandparents(kindpats):
1561 1567 """Returns roots and exact directories from patterns.
1562 1568
1563 1569 `roots` are directories to match recursively, `dirs` should
1564 1570 be matched non-recursively, and `parents` are the implicitly required
1565 1571 directories to walk to items in either roots or dirs.
1566 1572
1567 1573 Returns a tuple of (roots, dirs, parents).
1568 1574
1569 1575 >>> r = _rootsdirsandparents(
1570 1576 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1571 1577 ... (b'glob', b'g*', b'')])
1572 1578 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1573 1579 (['g/h', 'g/h', ''], []) ['', 'g']
1574 1580 >>> r = _rootsdirsandparents(
1575 1581 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1576 1582 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1577 1583 ([], ['g/h', '']) ['', 'g']
1578 1584 >>> r = _rootsdirsandparents(
1579 1585 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1580 1586 ... (b'path', b'', b'')])
1581 1587 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1582 1588 (['r', 'p/p', ''], []) ['', 'p']
1583 1589 >>> r = _rootsdirsandparents(
1584 1590 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1585 1591 ... (b'relre', b'rr', b'')])
1586 1592 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1587 1593 (['', '', ''], []) ['']
1588 1594 """
1589 1595 r, d = _patternrootsanddirs(kindpats)
1590 1596
1591 1597 p = set()
1592 1598 # Add the parents as non-recursive/exact directories, since they must be
1593 1599 # scanned to get to either the roots or the other exact directories.
1594 1600 p.update(pathutil.dirs(d))
1595 1601 p.update(pathutil.dirs(r))
1596 1602
1597 1603 # FIXME: all uses of this function convert these to sets, do so before
1598 1604 # returning.
1599 1605 # FIXME: all uses of this function do not need anything in 'roots' and
1600 1606 # 'dirs' to also be in 'parents', consider removing them before returning.
1601 1607 return r, d, p
1602 1608
1603 1609
1604 1610 def _explicitfiles(kindpats):
1605 1611 """Returns the potential explicit filenames from the patterns.
1606 1612
1607 1613 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1608 1614 ['foo/bar']
1609 1615 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1610 1616 []
1611 1617 """
1612 1618 # Keep only the pattern kinds where one can specify filenames (vs only
1613 1619 # directory names).
1614 1620 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1615 1621 return _roots(filable)
1616 1622
1617 1623
1618 1624 def _prefix(kindpats):
1619 1625 '''Whether all the patterns match a prefix (i.e. recursively)'''
1620 1626 for kind, pat, source in kindpats:
1621 1627 if kind not in (b'path', b'relpath'):
1622 1628 return False
1623 1629 return True
1624 1630
1625 1631
1626 1632 _commentre = None
1627 1633
1628 1634
1629 1635 def readpatternfile(filepath, warn, sourceinfo=False):
1630 1636 """parse a pattern file, returning a list of
1631 1637 patterns. These patterns should be given to compile()
1632 1638 to be validated and converted into a match function.
1633 1639
1634 1640 trailing white space is dropped.
1635 1641 the escape character is backslash.
1636 1642 comments start with #.
1637 1643 empty lines are skipped.
1638 1644
1639 1645 lines can be of the following formats:
1640 1646
1641 1647 syntax: regexp # defaults following lines to non-rooted regexps
1642 1648 syntax: glob # defaults following lines to non-rooted globs
1643 1649 re:pattern # non-rooted regular expression
1644 1650 glob:pattern # non-rooted glob
1645 1651 rootglob:pat # rooted glob (same root as ^ in regexps)
1646 1652 pattern # pattern of the current default type
1647 1653
1648 1654 if sourceinfo is set, returns a list of tuples:
1649 1655 (pattern, lineno, originalline).
1650 1656 This is useful to debug ignore patterns.
1651 1657 """
1652 1658
1653 1659 syntaxes = {
1654 1660 b're': b'relre:',
1655 1661 b'regexp': b'relre:',
1656 1662 b'glob': b'relglob:',
1657 1663 b'rootglob': b'rootglob:',
1658 1664 b'include': b'include',
1659 1665 b'subinclude': b'subinclude',
1660 1666 }
1661 1667 syntax = b'relre:'
1662 1668 patterns = []
1663 1669
1664 1670 fp = open(filepath, b'rb')
1665 1671 for lineno, line in enumerate(fp, start=1):
1666 1672 if b"#" in line:
1667 1673 global _commentre
1668 1674 if not _commentre:
1669 1675 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1670 1676 # remove comments prefixed by an even number of escapes
1671 1677 m = _commentre.search(line)
1672 1678 if m:
1673 1679 line = line[: m.end(1)]
1674 1680 # fixup properly escaped comments that survived the above
1675 1681 line = line.replace(b"\\#", b"#")
1676 1682 line = line.rstrip()
1677 1683 if not line:
1678 1684 continue
1679 1685
1680 1686 if line.startswith(b'syntax:'):
1681 1687 s = line[7:].strip()
1682 1688 try:
1683 1689 syntax = syntaxes[s]
1684 1690 except KeyError:
1685 1691 if warn:
1686 1692 warn(
1687 1693 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1688 1694 )
1689 1695 continue
1690 1696
1691 1697 linesyntax = syntax
1692 1698 for s, rels in syntaxes.items():
1693 1699 if line.startswith(rels):
1694 1700 linesyntax = rels
1695 1701 line = line[len(rels) :]
1696 1702 break
1697 1703 elif line.startswith(s + b':'):
1698 1704 linesyntax = rels
1699 1705 line = line[len(s) + 1 :]
1700 1706 break
1701 1707 if sourceinfo:
1702 1708 patterns.append((linesyntax + line, lineno, line))
1703 1709 else:
1704 1710 patterns.append(linesyntax + line)
1705 1711 fp.close()
1706 1712 return patterns
General Comments 0
You need to be logged in to leave comments. Login now