##// END OF EJS Templates
dirstate: distinct transaction callback from largefile...
marmoute -
r51018:4e95341c default
parent child Browse files
Show More
@@ -1,819 +1,820 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''largefiles utility code: must not import other modules in this package.'''
10 10
11 11 import contextlib
12 12 import copy
13 13 import os
14 14 import stat
15 15
16 16 from mercurial.i18n import _
17 17 from mercurial.node import hex
18 18 from mercurial.pycompat import open
19 19
20 20 from mercurial import (
21 21 dirstate,
22 22 encoding,
23 23 error,
24 24 httpconnection,
25 25 match as matchmod,
26 26 pycompat,
27 27 requirements,
28 28 scmutil,
29 29 sparse,
30 30 util,
31 31 vfs as vfsmod,
32 32 )
33 33 from mercurial.utils import hashutil
34 34 from mercurial.dirstateutils import timestamp
35 35
36 36 shortname = b'.hglf'
37 37 shortnameslash = shortname + b'/'
38 38 longname = b'largefiles'
39 39
40 40 # -- Private worker functions ------------------------------------------
41 41
42 42
43 43 @contextlib.contextmanager
44 44 def lfstatus(repo, value=True):
45 45 oldvalue = getattr(repo, 'lfstatus', False)
46 46 repo.lfstatus = value
47 47 try:
48 48 yield
49 49 finally:
50 50 repo.lfstatus = oldvalue
51 51
52 52
53 53 def getminsize(ui, assumelfiles, opt, default=10):
54 54 lfsize = opt
55 55 if not lfsize and assumelfiles:
56 56 lfsize = ui.config(longname, b'minsize', default=default)
57 57 if lfsize:
58 58 try:
59 59 lfsize = float(lfsize)
60 60 except ValueError:
61 61 raise error.Abort(
62 62 _(b'largefiles: size must be number (not %s)\n') % lfsize
63 63 )
64 64 if lfsize is None:
65 65 raise error.Abort(_(b'minimum size for largefiles must be specified'))
66 66 return lfsize
67 67
68 68
69 69 def link(src, dest):
70 70 """Try to create hardlink - if that fails, efficiently make a copy."""
71 71 util.makedirs(os.path.dirname(dest))
72 72 try:
73 73 util.oslink(src, dest)
74 74 except OSError:
75 75 # if hardlinks fail, fallback on atomic copy
76 76 with open(src, b'rb') as srcf, util.atomictempfile(dest) as dstf:
77 77 for chunk in util.filechunkiter(srcf):
78 78 dstf.write(chunk)
79 79 os.chmod(dest, os.stat(src).st_mode)
80 80
81 81
82 82 def usercachepath(ui, hash):
83 83 """Return the correct location in the "global" largefiles cache for a file
84 84 with the given hash.
85 85 This cache is used for sharing of largefiles across repositories - both
86 86 to preserve download bandwidth and storage space."""
87 87 return os.path.join(_usercachedir(ui), hash)
88 88
89 89
90 90 def _usercachedir(ui, name=longname):
91 91 '''Return the location of the "global" largefiles cache.'''
92 92 path = ui.configpath(name, b'usercache')
93 93 if path:
94 94 return path
95 95
96 96 hint = None
97 97
98 98 if pycompat.iswindows:
99 99 appdata = encoding.environ.get(
100 100 b'LOCALAPPDATA', encoding.environ.get(b'APPDATA')
101 101 )
102 102 if appdata:
103 103 return os.path.join(appdata, name)
104 104
105 105 hint = _(b"define %s or %s in the environment, or set %s.usercache") % (
106 106 b"LOCALAPPDATA",
107 107 b"APPDATA",
108 108 name,
109 109 )
110 110 elif pycompat.isdarwin:
111 111 home = encoding.environ.get(b'HOME')
112 112 if home:
113 113 return os.path.join(home, b'Library', b'Caches', name)
114 114
115 115 hint = _(b"define %s in the environment, or set %s.usercache") % (
116 116 b"HOME",
117 117 name,
118 118 )
119 119 elif pycompat.isposix:
120 120 path = encoding.environ.get(b'XDG_CACHE_HOME')
121 121 if path:
122 122 return os.path.join(path, name)
123 123 home = encoding.environ.get(b'HOME')
124 124 if home:
125 125 return os.path.join(home, b'.cache', name)
126 126
127 127 hint = _(b"define %s or %s in the environment, or set %s.usercache") % (
128 128 b"XDG_CACHE_HOME",
129 129 b"HOME",
130 130 name,
131 131 )
132 132 else:
133 133 raise error.Abort(
134 134 _(b'unknown operating system: %s\n') % pycompat.osname
135 135 )
136 136
137 137 raise error.Abort(_(b'unknown %s usercache location') % name, hint=hint)
138 138
139 139
140 140 def inusercache(ui, hash):
141 141 path = usercachepath(ui, hash)
142 142 return os.path.exists(path)
143 143
144 144
145 145 def findfile(repo, hash):
146 146 """Return store path of the largefile with the specified hash.
147 147 As a side effect, the file might be linked from user cache.
148 148 Return None if the file can't be found locally."""
149 149 path, exists = findstorepath(repo, hash)
150 150 if exists:
151 151 repo.ui.note(_(b'found %s in store\n') % hash)
152 152 return path
153 153 elif inusercache(repo.ui, hash):
154 154 repo.ui.note(_(b'found %s in system cache\n') % hash)
155 155 path = storepath(repo, hash)
156 156 link(usercachepath(repo.ui, hash), path)
157 157 return path
158 158 return None
159 159
160 160
161 161 class largefilesdirstate(dirstate.dirstate):
162 162 _large_file_dirstate = True
163 _tr_key_suffix = b'-large-files'
163 164
164 165 def __getitem__(self, key):
165 166 return super(largefilesdirstate, self).__getitem__(unixpath(key))
166 167
167 168 def set_tracked(self, f):
168 169 return super(largefilesdirstate, self).set_tracked(unixpath(f))
169 170
170 171 def set_untracked(self, f):
171 172 return super(largefilesdirstate, self).set_untracked(unixpath(f))
172 173
173 174 def normal(self, f, parentfiledata=None):
174 175 # not sure if we should pass the `parentfiledata` down or throw it
175 176 # away. So throwing it away to stay on the safe side.
176 177 return super(largefilesdirstate, self).normal(unixpath(f))
177 178
178 179 def remove(self, f):
179 180 return super(largefilesdirstate, self).remove(unixpath(f))
180 181
181 182 def add(self, f):
182 183 return super(largefilesdirstate, self).add(unixpath(f))
183 184
184 185 def drop(self, f):
185 186 return super(largefilesdirstate, self).drop(unixpath(f))
186 187
187 188 def forget(self, f):
188 189 return super(largefilesdirstate, self).forget(unixpath(f))
189 190
190 191 def normallookup(self, f):
191 192 return super(largefilesdirstate, self).normallookup(unixpath(f))
192 193
193 194 def _ignore(self, f):
194 195 return False
195 196
196 197 def write(self, tr):
197 198 # (1) disable PENDING mode always
198 199 # (lfdirstate isn't yet managed as a part of the transaction)
199 200 # (2) avoid develwarn 'use dirstate.write with ....'
200 201 if tr:
201 202 tr.addbackup(b'largefiles/dirstate', location=b'plain')
202 203 super(largefilesdirstate, self).write(None)
203 204
204 205
205 206 def openlfdirstate(ui, repo, create=True):
206 207 """
207 208 Return a dirstate object that tracks largefiles: i.e. its root is
208 209 the repo root, but it is saved in .hg/largefiles/dirstate.
209 210
210 211 If a dirstate object already exists and is being used for a 'changing_*'
211 212 context, it will be returned.
212 213 """
213 214 sub_dirstate = getattr(repo.dirstate, '_sub_dirstate', None)
214 215 if sub_dirstate is not None:
215 216 return sub_dirstate
216 217 vfs = repo.vfs
217 218 lfstoredir = longname
218 219 opener = vfsmod.vfs(vfs.join(lfstoredir))
219 220 use_dirstate_v2 = requirements.DIRSTATE_V2_REQUIREMENT in repo.requirements
220 221 lfdirstate = largefilesdirstate(
221 222 opener,
222 223 ui,
223 224 repo.root,
224 225 repo.dirstate._validate,
225 226 lambda: sparse.matcher(repo),
226 227 repo.nodeconstants,
227 228 use_dirstate_v2,
228 229 )
229 230
230 231 # If the largefiles dirstate does not exist, populate and create
231 232 # it. This ensures that we create it on the first meaningful
232 233 # largefiles operation in a new clone.
233 234 if create and not vfs.exists(vfs.join(lfstoredir, b'dirstate')):
234 235 try:
235 236 with repo.wlock(wait=False):
236 237 matcher = getstandinmatcher(repo)
237 238 standins = repo.dirstate.walk(
238 239 matcher, subrepos=[], unknown=False, ignored=False
239 240 )
240 241
241 242 if len(standins) > 0:
242 243 vfs.makedirs(lfstoredir)
243 244
244 245 for standin in standins:
245 246 lfile = splitstandin(standin)
246 247 lfdirstate.hacky_extension_update_file(
247 248 lfile,
248 249 p1_tracked=True,
249 250 wc_tracked=True,
250 251 possibly_dirty=True,
251 252 )
252 253 # avoid getting dirty dirstate before other operations
253 254 lfdirstate.write(repo.currenttransaction())
254 255 except error.LockError:
255 256 # Assume that whatever was holding the lock was important.
256 257 # If we were doing something important, we would already have
257 258 # either the lock or a largefile dirstate.
258 259 pass
259 260 return lfdirstate
260 261
261 262
262 263 def lfdirstatestatus(lfdirstate, repo):
263 264 pctx = repo[b'.']
264 265 match = matchmod.always()
265 266 unsure, s, mtime_boundary = lfdirstate.status(
266 267 match, subrepos=[], ignored=False, clean=False, unknown=False
267 268 )
268 269 modified, clean = s.modified, s.clean
269 270 wctx = repo[None]
270 271 for lfile in unsure:
271 272 try:
272 273 fctx = pctx[standin(lfile)]
273 274 except LookupError:
274 275 fctx = None
275 276 if not fctx or readasstandin(fctx) != hashfile(repo.wjoin(lfile)):
276 277 modified.append(lfile)
277 278 else:
278 279 clean.append(lfile)
279 280 st = wctx[lfile].lstat()
280 281 mode = st.st_mode
281 282 size = st.st_size
282 283 mtime = timestamp.reliable_mtime_of(st, mtime_boundary)
283 284 if mtime is not None:
284 285 cache_data = (mode, size, mtime)
285 286 lfdirstate.set_clean(lfile, cache_data)
286 287 return s
287 288
288 289
289 290 def listlfiles(repo, rev=None, matcher=None):
290 291 """return a list of largefiles in the working copy or the
291 292 specified changeset"""
292 293
293 294 if matcher is None:
294 295 matcher = getstandinmatcher(repo)
295 296
296 297 # ignore unknown files in working directory
297 298 return [
298 299 splitstandin(f)
299 300 for f in repo[rev].walk(matcher)
300 301 if rev is not None or repo.dirstate.get_entry(f).any_tracked
301 302 ]
302 303
303 304
304 305 def instore(repo, hash, forcelocal=False):
305 306 '''Return true if a largefile with the given hash exists in the store'''
306 307 return os.path.exists(storepath(repo, hash, forcelocal))
307 308
308 309
309 310 def storepath(repo, hash, forcelocal=False):
310 311 """Return the correct location in the repository largefiles store for a
311 312 file with the given hash."""
312 313 if not forcelocal and repo.shared():
313 314 return repo.vfs.reljoin(repo.sharedpath, longname, hash)
314 315 return repo.vfs.join(longname, hash)
315 316
316 317
317 318 def findstorepath(repo, hash):
318 319 """Search through the local store path(s) to find the file for the given
319 320 hash. If the file is not found, its path in the primary store is returned.
320 321 The return value is a tuple of (path, exists(path)).
321 322 """
322 323 # For shared repos, the primary store is in the share source. But for
323 324 # backward compatibility, force a lookup in the local store if it wasn't
324 325 # found in the share source.
325 326 path = storepath(repo, hash, False)
326 327
327 328 if instore(repo, hash):
328 329 return (path, True)
329 330 elif repo.shared() and instore(repo, hash, True):
330 331 return storepath(repo, hash, True), True
331 332
332 333 return (path, False)
333 334
334 335
335 336 def copyfromcache(repo, hash, filename):
336 337 """Copy the specified largefile from the repo or system cache to
337 338 filename in the repository. Return true on success or false if the
338 339 file was not found in either cache (which should not happened:
339 340 this is meant to be called only after ensuring that the needed
340 341 largefile exists in the cache)."""
341 342 wvfs = repo.wvfs
342 343 path = findfile(repo, hash)
343 344 if path is None:
344 345 return False
345 346 wvfs.makedirs(wvfs.dirname(wvfs.join(filename)))
346 347 # The write may fail before the file is fully written, but we
347 348 # don't use atomic writes in the working copy.
348 349 with open(path, b'rb') as srcfd, wvfs(filename, b'wb') as destfd:
349 350 gothash = copyandhash(util.filechunkiter(srcfd), destfd)
350 351 if gothash != hash:
351 352 repo.ui.warn(
352 353 _(b'%s: data corruption in %s with hash %s\n')
353 354 % (filename, path, gothash)
354 355 )
355 356 wvfs.unlink(filename)
356 357 return False
357 358 return True
358 359
359 360
360 361 def copytostore(repo, ctx, file, fstandin):
361 362 wvfs = repo.wvfs
362 363 hash = readasstandin(ctx[fstandin])
363 364 if instore(repo, hash):
364 365 return
365 366 if wvfs.exists(file):
366 367 copytostoreabsolute(repo, wvfs.join(file), hash)
367 368 else:
368 369 repo.ui.warn(
369 370 _(b"%s: largefile %s not available from local store\n")
370 371 % (file, hash)
371 372 )
372 373
373 374
374 375 def copyalltostore(repo, node):
375 376 '''Copy all largefiles in a given revision to the store'''
376 377
377 378 ctx = repo[node]
378 379 for filename in ctx.files():
379 380 realfile = splitstandin(filename)
380 381 if realfile is not None and filename in ctx.manifest():
381 382 copytostore(repo, ctx, realfile, filename)
382 383
383 384
384 385 def copytostoreabsolute(repo, file, hash):
385 386 if inusercache(repo.ui, hash):
386 387 link(usercachepath(repo.ui, hash), storepath(repo, hash))
387 388 else:
388 389 util.makedirs(os.path.dirname(storepath(repo, hash)))
389 390 with open(file, b'rb') as srcf:
390 391 with util.atomictempfile(
391 392 storepath(repo, hash), createmode=repo.store.createmode
392 393 ) as dstf:
393 394 for chunk in util.filechunkiter(srcf):
394 395 dstf.write(chunk)
395 396 linktousercache(repo, hash)
396 397
397 398
398 399 def linktousercache(repo, hash):
399 400 """Link / copy the largefile with the specified hash from the store
400 401 to the cache."""
401 402 path = usercachepath(repo.ui, hash)
402 403 link(storepath(repo, hash), path)
403 404
404 405
405 406 def getstandinmatcher(repo, rmatcher=None):
406 407 '''Return a match object that applies rmatcher to the standin directory'''
407 408 wvfs = repo.wvfs
408 409 standindir = shortname
409 410
410 411 # no warnings about missing files or directories
411 412 badfn = lambda f, msg: None
412 413
413 414 if rmatcher and not rmatcher.always():
414 415 pats = [wvfs.join(standindir, pat) for pat in rmatcher.files()]
415 416 if not pats:
416 417 pats = [wvfs.join(standindir)]
417 418 match = scmutil.match(repo[None], pats, badfn=badfn)
418 419 else:
419 420 # no patterns: relative to repo root
420 421 match = scmutil.match(repo[None], [wvfs.join(standindir)], badfn=badfn)
421 422 return match
422 423
423 424
424 425 def composestandinmatcher(repo, rmatcher):
425 426 """Return a matcher that accepts standins corresponding to the
426 427 files accepted by rmatcher. Pass the list of files in the matcher
427 428 as the paths specified by the user."""
428 429 smatcher = getstandinmatcher(repo, rmatcher)
429 430 isstandin = smatcher.matchfn
430 431
431 432 def composedmatchfn(f):
432 433 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
433 434
434 435 smatcher.matchfn = composedmatchfn
435 436
436 437 return smatcher
437 438
438 439
439 440 def standin(filename):
440 441 """Return the repo-relative path to the standin for the specified big
441 442 file."""
442 443 # Notes:
443 444 # 1) Some callers want an absolute path, but for instance addlargefiles
444 445 # needs it repo-relative so it can be passed to repo[None].add(). So
445 446 # leave it up to the caller to use repo.wjoin() to get an absolute path.
446 447 # 2) Join with '/' because that's what dirstate always uses, even on
447 448 # Windows. Change existing separator to '/' first in case we are
448 449 # passed filenames from an external source (like the command line).
449 450 return shortnameslash + util.pconvert(filename)
450 451
451 452
452 453 def isstandin(filename):
453 454 """Return true if filename is a big file standin. filename must be
454 455 in Mercurial's internal form (slash-separated)."""
455 456 return filename.startswith(shortnameslash)
456 457
457 458
458 459 def splitstandin(filename):
459 460 # Split on / because that's what dirstate always uses, even on Windows.
460 461 # Change local separator to / first just in case we are passed filenames
461 462 # from an external source (like the command line).
462 463 bits = util.pconvert(filename).split(b'/', 1)
463 464 if len(bits) == 2 and bits[0] == shortname:
464 465 return bits[1]
465 466 else:
466 467 return None
467 468
468 469
469 470 def updatestandin(repo, lfile, standin):
470 471 """Re-calculate hash value of lfile and write it into standin
471 472
472 473 This assumes that "lfutil.standin(lfile) == standin", for efficiency.
473 474 """
474 475 file = repo.wjoin(lfile)
475 476 if repo.wvfs.exists(lfile):
476 477 hash = hashfile(file)
477 478 executable = getexecutable(file)
478 479 writestandin(repo, standin, hash, executable)
479 480 else:
480 481 raise error.Abort(_(b'%s: file not found!') % lfile)
481 482
482 483
483 484 def readasstandin(fctx):
484 485 """read hex hash from given filectx of standin file
485 486
486 487 This encapsulates how "standin" data is stored into storage layer."""
487 488 return fctx.data().strip()
488 489
489 490
490 491 def writestandin(repo, standin, hash, executable):
491 492 '''write hash to <repo.root>/<standin>'''
492 493 repo.wwrite(standin, hash + b'\n', executable and b'x' or b'')
493 494
494 495
495 496 def copyandhash(instream, outfile):
496 497 """Read bytes from instream (iterable) and write them to outfile,
497 498 computing the SHA-1 hash of the data along the way. Return the hash."""
498 499 hasher = hashutil.sha1(b'')
499 500 for data in instream:
500 501 hasher.update(data)
501 502 outfile.write(data)
502 503 return hex(hasher.digest())
503 504
504 505
505 506 def hashfile(file):
506 507 if not os.path.exists(file):
507 508 return b''
508 509 with open(file, b'rb') as fd:
509 510 return hexsha1(fd)
510 511
511 512
512 513 def getexecutable(filename):
513 514 mode = os.stat(filename).st_mode
514 515 return (
515 516 (mode & stat.S_IXUSR)
516 517 and (mode & stat.S_IXGRP)
517 518 and (mode & stat.S_IXOTH)
518 519 )
519 520
520 521
521 522 def urljoin(first, second, *arg):
522 523 def join(left, right):
523 524 if not left.endswith(b'/'):
524 525 left += b'/'
525 526 if right.startswith(b'/'):
526 527 right = right[1:]
527 528 return left + right
528 529
529 530 url = join(first, second)
530 531 for a in arg:
531 532 url = join(url, a)
532 533 return url
533 534
534 535
535 536 def hexsha1(fileobj):
536 537 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
537 538 object data"""
538 539 h = hashutil.sha1()
539 540 for chunk in util.filechunkiter(fileobj):
540 541 h.update(chunk)
541 542 return hex(h.digest())
542 543
543 544
544 545 def httpsendfile(ui, filename):
545 546 return httpconnection.httpsendfile(ui, filename, b'rb')
546 547
547 548
548 549 def unixpath(path):
549 550 '''Return a version of path normalized for use with the lfdirstate.'''
550 551 return util.pconvert(os.path.normpath(path))
551 552
552 553
553 554 def islfilesrepo(repo):
554 555 '''Return true if the repo is a largefile repo.'''
555 556 if b'largefiles' in repo.requirements and any(
556 557 shortnameslash in f[1] for f in repo.store.datafiles()
557 558 ):
558 559 return True
559 560
560 561 return any(openlfdirstate(repo.ui, repo, False))
561 562
562 563
563 564 class storeprotonotcapable(Exception):
564 565 def __init__(self, storetypes):
565 566 self.storetypes = storetypes
566 567
567 568
568 569 def getstandinsstate(repo):
569 570 standins = []
570 571 matcher = getstandinmatcher(repo)
571 572 wctx = repo[None]
572 573 for standin in repo.dirstate.walk(
573 574 matcher, subrepos=[], unknown=False, ignored=False
574 575 ):
575 576 lfile = splitstandin(standin)
576 577 try:
577 578 hash = readasstandin(wctx[standin])
578 579 except IOError:
579 580 hash = None
580 581 standins.append((lfile, hash))
581 582 return standins
582 583
583 584
584 585 def synclfdirstate(repo, lfdirstate, lfile, normallookup):
585 586 lfstandin = standin(lfile)
586 587 if lfstandin not in repo.dirstate:
587 588 lfdirstate.hacky_extension_update_file(
588 589 lfile,
589 590 p1_tracked=False,
590 591 wc_tracked=False,
591 592 )
592 593 else:
593 594 entry = repo.dirstate.get_entry(lfstandin)
594 595 lfdirstate.hacky_extension_update_file(
595 596 lfile,
596 597 wc_tracked=entry.tracked,
597 598 p1_tracked=entry.p1_tracked,
598 599 p2_info=entry.p2_info,
599 600 possibly_dirty=True,
600 601 )
601 602
602 603
603 604 def markcommitted(orig, ctx, node):
604 605 repo = ctx.repo()
605 606
606 607 with repo.dirstate.changing_parents(repo):
607 608 orig(node)
608 609
609 610 # ATTENTION: "ctx.files()" may differ from "repo[node].files()"
610 611 # because files coming from the 2nd parent are omitted in the latter.
611 612 #
612 613 # The former should be used to get targets of "synclfdirstate",
613 614 # because such files:
614 615 # - are marked as "a" by "patch.patch()" (e.g. via transplant), and
615 616 # - have to be marked as "n" after commit, but
616 617 # - aren't listed in "repo[node].files()"
617 618
618 619 lfdirstate = openlfdirstate(repo.ui, repo)
619 620 for f in ctx.files():
620 621 lfile = splitstandin(f)
621 622 if lfile is not None:
622 623 synclfdirstate(repo, lfdirstate, lfile, False)
623 624
624 625 # As part of committing, copy all of the largefiles into the cache.
625 626 #
626 627 # Using "node" instead of "ctx" implies additional "repo[node]"
627 628 # lookup while copyalltostore(), but can omit redundant check for
628 629 # files comming from the 2nd parent, which should exist in store
629 630 # at merging.
630 631 copyalltostore(repo, node)
631 632
632 633
633 634 def getlfilestoupdate(oldstandins, newstandins):
634 635 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
635 636 filelist = []
636 637 for f in changedstandins:
637 638 if f[0] not in filelist:
638 639 filelist.append(f[0])
639 640 return filelist
640 641
641 642
642 643 def getlfilestoupload(repo, missing, addfunc):
643 644 makeprogress = repo.ui.makeprogress
644 645 with makeprogress(
645 646 _(b'finding outgoing largefiles'),
646 647 unit=_(b'revisions'),
647 648 total=len(missing),
648 649 ) as progress:
649 650 for i, n in enumerate(missing):
650 651 progress.update(i)
651 652 parents = [p for p in repo[n].parents() if p != repo.nullid]
652 653
653 654 with lfstatus(repo, value=False):
654 655 ctx = repo[n]
655 656
656 657 files = set(ctx.files())
657 658 if len(parents) == 2:
658 659 mc = ctx.manifest()
659 660 mp1 = ctx.p1().manifest()
660 661 mp2 = ctx.p2().manifest()
661 662 for f in mp1:
662 663 if f not in mc:
663 664 files.add(f)
664 665 for f in mp2:
665 666 if f not in mc:
666 667 files.add(f)
667 668 for f in mc:
668 669 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
669 670 files.add(f)
670 671 for fn in files:
671 672 if isstandin(fn) and fn in ctx:
672 673 addfunc(fn, readasstandin(ctx[fn]))
673 674
674 675
675 676 def updatestandinsbymatch(repo, match):
676 677 """Update standins in the working directory according to specified match
677 678
678 679 This returns (possibly modified) ``match`` object to be used for
679 680 subsequent commit process.
680 681 """
681 682
682 683 ui = repo.ui
683 684
684 685 # Case 1: user calls commit with no specific files or
685 686 # include/exclude patterns: refresh and commit all files that
686 687 # are "dirty".
687 688 if match is None or match.always():
688 689 # Spend a bit of time here to get a list of files we know
689 690 # are modified so we can compare only against those.
690 691 # It can cost a lot of time (several seconds)
691 692 # otherwise to update all standins if the largefiles are
692 693 # large.
693 694 lfdirstate = openlfdirstate(ui, repo)
694 695 dirtymatch = matchmod.always()
695 696 unsure, s, mtime_boundary = lfdirstate.status(
696 697 dirtymatch, subrepos=[], ignored=False, clean=False, unknown=False
697 698 )
698 699 modifiedfiles = unsure + s.modified + s.added + s.removed
699 700 lfiles = listlfiles(repo)
700 701 # this only loops through largefiles that exist (not
701 702 # removed/renamed)
702 703 for lfile in lfiles:
703 704 if lfile in modifiedfiles:
704 705 fstandin = standin(lfile)
705 706 if repo.wvfs.exists(fstandin):
706 707 # this handles the case where a rebase is being
707 708 # performed and the working copy is not updated
708 709 # yet.
709 710 if repo.wvfs.exists(lfile):
710 711 updatestandin(repo, lfile, fstandin)
711 712
712 713 return match
713 714
714 715 lfiles = listlfiles(repo)
715 716 match._files = repo._subdirlfs(match.files(), lfiles)
716 717
717 718 # Case 2: user calls commit with specified patterns: refresh
718 719 # any matching big files.
719 720 smatcher = composestandinmatcher(repo, match)
720 721 standins = repo.dirstate.walk(
721 722 smatcher, subrepos=[], unknown=False, ignored=False
722 723 )
723 724
724 725 # No matching big files: get out of the way and pass control to
725 726 # the usual commit() method.
726 727 if not standins:
727 728 return match
728 729
729 730 # Refresh all matching big files. It's possible that the
730 731 # commit will end up failing, in which case the big files will
731 732 # stay refreshed. No harm done: the user modified them and
732 733 # asked to commit them, so sooner or later we're going to
733 734 # refresh the standins. Might as well leave them refreshed.
734 735 lfdirstate = openlfdirstate(ui, repo)
735 736 for fstandin in standins:
736 737 lfile = splitstandin(fstandin)
737 738 if lfdirstate.get_entry(lfile).tracked:
738 739 updatestandin(repo, lfile, fstandin)
739 740
740 741 # Cook up a new matcher that only matches regular files or
741 742 # standins corresponding to the big files requested by the
742 743 # user. Have to modify _files to prevent commit() from
743 744 # complaining "not tracked" for big files.
744 745 match = copy.copy(match)
745 746 origmatchfn = match.matchfn
746 747
747 748 # Check both the list of largefiles and the list of
748 749 # standins because if a largefile was removed, it
749 750 # won't be in the list of largefiles at this point
750 751 match._files += sorted(standins)
751 752
752 753 actualfiles = []
753 754 for f in match._files:
754 755 fstandin = standin(f)
755 756
756 757 # For largefiles, only one of the normal and standin should be
757 758 # committed (except if one of them is a remove). In the case of a
758 759 # standin removal, drop the normal file if it is unknown to dirstate.
759 760 # Thus, skip plain largefile names but keep the standin.
760 761 if f in lfiles or fstandin in standins:
761 762 if not repo.dirstate.get_entry(fstandin).removed:
762 763 if not repo.dirstate.get_entry(f).removed:
763 764 continue
764 765 elif not repo.dirstate.get_entry(f).any_tracked:
765 766 continue
766 767
767 768 actualfiles.append(f)
768 769 match._files = actualfiles
769 770
770 771 def matchfn(f):
771 772 if origmatchfn(f):
772 773 return f not in lfiles
773 774 else:
774 775 return f in standins
775 776
776 777 match.matchfn = matchfn
777 778
778 779 return match
779 780
780 781
781 782 class automatedcommithook:
782 783 """Stateful hook to update standins at the 1st commit of resuming
783 784
784 785 For efficiency, updating standins in the working directory should
785 786 be avoided while automated committing (like rebase, transplant and
786 787 so on), because they should be updated before committing.
787 788
788 789 But the 1st commit of resuming automated committing (e.g. ``rebase
789 790 --continue``) should update them, because largefiles may be
790 791 modified manually.
791 792 """
792 793
793 794 def __init__(self, resuming):
794 795 self.resuming = resuming
795 796
796 797 def __call__(self, repo, match):
797 798 if self.resuming:
798 799 self.resuming = False # avoids updating at subsequent commits
799 800 return updatestandinsbymatch(repo, match)
800 801 else:
801 802 return match
802 803
803 804
804 805 def getstatuswriter(ui, repo, forcibly=None):
805 806 """Return the function to write largefiles specific status out
806 807
807 808 If ``forcibly`` is ``None``, this returns the last element of
808 809 ``repo._lfstatuswriters`` as "default" writer function.
809 810
810 811 Otherwise, this returns the function to always write out (or
811 812 ignore if ``not forcibly``) status.
812 813 """
813 814 if forcibly is None and util.safehasattr(repo, b'_largefilesenabled'):
814 815 return repo._lfstatuswriters[-1]
815 816 else:
816 817 if forcibly:
817 818 return ui.status # forcibly WRITE OUT
818 819 else:
819 820 return lambda *msg, **opts: None # forcibly IGNORE
@@ -1,1694 +1,1701 b''
1 1 # dirstate.py - working directory tracking for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import collections
10 10 import contextlib
11 11 import os
12 12 import stat
13 13 import uuid
14 14
15 15 from .i18n import _
16 16 from .pycompat import delattr
17 17
18 18 from hgdemandimport import tracing
19 19
20 20 from . import (
21 21 dirstatemap,
22 22 encoding,
23 23 error,
24 24 match as matchmod,
25 25 node,
26 26 pathutil,
27 27 policy,
28 28 pycompat,
29 29 scmutil,
30 30 util,
31 31 )
32 32
33 33 from .dirstateutils import (
34 34 timestamp,
35 35 )
36 36
37 37 from .interfaces import (
38 38 dirstate as intdirstate,
39 39 util as interfaceutil,
40 40 )
41 41
42 42 parsers = policy.importmod('parsers')
43 43 rustmod = policy.importrust('dirstate')
44 44
45 45 HAS_FAST_DIRSTATE_V2 = rustmod is not None
46 46
47 47 propertycache = util.propertycache
48 48 filecache = scmutil.filecache
49 49 _rangemask = dirstatemap.rangemask
50 50
51 51 DirstateItem = dirstatemap.DirstateItem
52 52
53 53
54 54 class repocache(filecache):
55 55 """filecache for files in .hg/"""
56 56
57 57 def join(self, obj, fname):
58 58 return obj._opener.join(fname)
59 59
60 60
61 61 class rootcache(filecache):
62 62 """filecache for files in the repository root"""
63 63
64 64 def join(self, obj, fname):
65 65 return obj._join(fname)
66 66
67 67
68 68 def check_invalidated(func):
69 69 """check we func is called a non-invalidated dirstate
70 70
71 71 The dirstate is in an "invalidated state" after an error occured during its
72 72 modification and remains so until we exited the top level scope that framed
73 73 such change.
74 74 """
75 75
76 76 def wrap(self, *args, **kwargs):
77 77 if self._invalidated_context:
78 78 msg = 'calling `%s` after the dirstate was invalidated'
79 79 msg %= func.__name__
80 80 raise error.ProgrammingError(msg)
81 81 return func(self, *args, **kwargs)
82 82
83 83 return wrap
84 84
85 85
86 86 def requires_changing_parents(func):
87 87 def wrap(self, *args, **kwargs):
88 88 if not self.is_changing_parents:
89 89 msg = 'calling `%s` outside of a changing_parents context'
90 90 msg %= func.__name__
91 91 raise error.ProgrammingError(msg)
92 92 return func(self, *args, **kwargs)
93 93
94 94 return check_invalidated(wrap)
95 95
96 96
97 97 def requires_changing_files(func):
98 98 def wrap(self, *args, **kwargs):
99 99 if not self.is_changing_files:
100 100 msg = 'calling `%s` outside of a `changing_files`'
101 101 msg %= func.__name__
102 102 raise error.ProgrammingError(msg)
103 103 return func(self, *args, **kwargs)
104 104
105 105 return check_invalidated(wrap)
106 106
107 107
108 108 def requires_changing_any(func):
109 109 def wrap(self, *args, **kwargs):
110 110 if not self.is_changing_any:
111 111 msg = 'calling `%s` outside of a changing context'
112 112 msg %= func.__name__
113 113 raise error.ProgrammingError(msg)
114 114 return func(self, *args, **kwargs)
115 115
116 116 return check_invalidated(wrap)
117 117
118 118
119 119 def requires_not_changing_parents(func):
120 120 def wrap(self, *args, **kwargs):
121 121 if self.is_changing_parents:
122 122 msg = 'calling `%s` inside of a changing_parents context'
123 123 msg %= func.__name__
124 124 raise error.ProgrammingError(msg)
125 125 return func(self, *args, **kwargs)
126 126
127 127 return check_invalidated(wrap)
128 128
129 129
130 130 CHANGE_TYPE_PARENTS = "parents"
131 131 CHANGE_TYPE_FILES = "files"
132 132
133 133
134 134 @interfaceutil.implementer(intdirstate.idirstate)
135 135 class dirstate:
136
137 # used by largefile to avoid overwritting transaction callbacK
138 _tr_key_suffix = b''
139
136 140 def __init__(
137 141 self,
138 142 opener,
139 143 ui,
140 144 root,
141 145 validate,
142 146 sparsematchfn,
143 147 nodeconstants,
144 148 use_dirstate_v2,
145 149 use_tracked_hint=False,
146 150 ):
147 151 """Create a new dirstate object.
148 152
149 153 opener is an open()-like callable that can be used to open the
150 154 dirstate file; root is the root of the directory tracked by
151 155 the dirstate.
152 156 """
153 157 self._use_dirstate_v2 = use_dirstate_v2
154 158 self._use_tracked_hint = use_tracked_hint
155 159 self._nodeconstants = nodeconstants
156 160 self._opener = opener
157 161 self._validate = validate
158 162 self._root = root
159 163 # Either build a sparse-matcher or None if sparse is disabled
160 164 self._sparsematchfn = sparsematchfn
161 165 # ntpath.join(root, '') of Python 2.7.9 does not add sep if root is
162 166 # UNC path pointing to root share (issue4557)
163 167 self._rootdir = pathutil.normasprefix(root)
164 168 # True is any internal state may be different
165 169 self._dirty = False
166 170 # True if the set of tracked file may be different
167 171 self._dirty_tracked_set = False
168 172 self._ui = ui
169 173 self._filecache = {}
170 174 # nesting level of `changing_parents` context
171 175 self._changing_level = 0
172 176 # the change currently underway
173 177 self._change_type = None
174 178 # True if the current dirstate changing operations have been
175 179 # invalidated (used to make sure all nested contexts have been exited)
176 180 self._invalidated_context = False
177 181 self._attached_to_a_transaction = False
178 182 self._filename = b'dirstate'
179 183 self._filename_th = b'dirstate-tracked-hint'
180 184 self._pendingfilename = b'%s.pending' % self._filename
181 185 self._plchangecallbacks = {}
182 186 self._origpl = None
183 187 self._mapcls = dirstatemap.dirstatemap
184 188 # Access and cache cwd early, so we don't access it for the first time
185 189 # after a working-copy update caused it to not exist (accessing it then
186 190 # raises an exception).
187 191 self._cwd
188 192
189 193 def prefetch_parents(self):
190 194 """make sure the parents are loaded
191 195
192 196 Used to avoid a race condition.
193 197 """
194 198 self._pl
195 199
196 200 @contextlib.contextmanager
197 201 @check_invalidated
198 202 def _changing(self, repo, change_type):
199 203 if repo.currentwlock() is None:
200 204 msg = b"trying to change the dirstate without holding the wlock"
201 205 raise error.ProgrammingError(msg)
202 206
203 207 has_tr = repo.currenttransaction() is not None
204 208 if not has_tr and self._changing_level == 0 and self._dirty:
205 209 msg = "entering a changing context, but dirstate is already dirty"
206 210 raise error.ProgrammingError(msg)
207 211
208 212 assert self._changing_level >= 0
209 213 # different type of change are mutually exclusive
210 214 if self._change_type is None:
211 215 assert self._changing_level == 0
212 216 self._change_type = change_type
213 217 elif self._change_type != change_type:
214 218 msg = (
215 219 'trying to open "%s" dirstate-changing context while a "%s" is'
216 220 ' already open'
217 221 )
218 222 msg %= (change_type, self._change_type)
219 223 raise error.ProgrammingError(msg)
220 224 should_write = False
221 225 self._changing_level += 1
222 226 try:
223 227 yield
224 228 except: # re-raises
225 229 self.invalidate() # this will set `_invalidated_context`
226 230 raise
227 231 finally:
228 232 assert self._changing_level > 0
229 233 self._changing_level -= 1
230 234 # If the dirstate is being invalidated, call invalidate again.
231 235 # This will throw away anything added by a upper context and
232 236 # reset the `_invalidated_context` flag when relevant
233 237 if self._changing_level <= 0:
234 238 self._change_type = None
235 239 assert self._changing_level == 0
236 240 if self._invalidated_context:
237 241 # make sure we invalidate anything an upper context might
238 242 # have changed.
239 243 self.invalidate()
240 244 else:
241 245 should_write = self._changing_level <= 0
242 246 tr = repo.currenttransaction()
243 247 if has_tr != (tr is not None):
244 248 if has_tr:
245 249 m = "transaction vanished while changing dirstate"
246 250 else:
247 251 m = "transaction appeared while changing dirstate"
248 252 raise error.ProgrammingError(m)
249 253 if should_write:
250 254 self.write(tr)
251 255
252 256 @contextlib.contextmanager
253 257 def changing_parents(self, repo):
254 258 with self._changing(repo, CHANGE_TYPE_PARENTS) as c:
255 259 yield c
256 260
257 261 @contextlib.contextmanager
258 262 def changing_files(self, repo):
259 263 with self._changing(repo, CHANGE_TYPE_FILES) as c:
260 264 yield c
261 265
262 266 # here to help migration to the new code
263 267 def parentchange(self):
264 268 msg = (
265 269 "Mercurial 6.4 and later requires call to "
266 270 "`dirstate.changing_parents(repo)`"
267 271 )
268 272 raise error.ProgrammingError(msg)
269 273
270 274 @property
271 275 def is_changing_any(self):
272 276 """Returns true if the dirstate is in the middle of a set of changes.
273 277
274 278 This returns True for any kind of change.
275 279 """
276 280 return self._changing_level > 0
277 281
278 282 def pendingparentchange(self):
279 283 return self.is_changing_parent()
280 284
281 285 def is_changing_parent(self):
282 286 """Returns true if the dirstate is in the middle of a set of changes
283 287 that modify the dirstate parent.
284 288 """
285 289 self._ui.deprecwarn(b"dirstate.is_changing_parents", b"6.5")
286 290 return self.is_changing_parents
287 291
288 292 @property
289 293 def is_changing_parents(self):
290 294 """Returns true if the dirstate is in the middle of a set of changes
291 295 that modify the dirstate parent.
292 296 """
293 297 if self._changing_level <= 0:
294 298 return False
295 299 return self._change_type == CHANGE_TYPE_PARENTS
296 300
297 301 @property
298 302 def is_changing_files(self):
299 303 """Returns true if the dirstate is in the middle of a set of changes
300 304 that modify the files tracked or their sources.
301 305 """
302 306 if self._changing_level <= 0:
303 307 return False
304 308 return self._change_type == CHANGE_TYPE_FILES
305 309
306 310 @propertycache
307 311 def _map(self):
308 312 """Return the dirstate contents (see documentation for dirstatemap)."""
309 313 self._map = self._mapcls(
310 314 self._ui,
311 315 self._opener,
312 316 self._root,
313 317 self._nodeconstants,
314 318 self._use_dirstate_v2,
315 319 )
316 320 return self._map
317 321
318 322 @property
319 323 def _sparsematcher(self):
320 324 """The matcher for the sparse checkout.
321 325
322 326 The working directory may not include every file from a manifest. The
323 327 matcher obtained by this property will match a path if it is to be
324 328 included in the working directory.
325 329
326 330 When sparse if disabled, return None.
327 331 """
328 332 if self._sparsematchfn is None:
329 333 return None
330 334 # TODO there is potential to cache this property. For now, the matcher
331 335 # is resolved on every access. (But the called function does use a
332 336 # cache to keep the lookup fast.)
333 337 return self._sparsematchfn()
334 338
335 339 @repocache(b'branch')
336 340 def _branch(self):
337 341 try:
338 342 return self._opener.read(b"branch").strip() or b"default"
339 343 except FileNotFoundError:
340 344 return b"default"
341 345
342 346 @property
343 347 def _pl(self):
344 348 return self._map.parents()
345 349
346 350 def hasdir(self, d):
347 351 return self._map.hastrackeddir(d)
348 352
349 353 @rootcache(b'.hgignore')
350 354 def _ignore(self):
351 355 files = self._ignorefiles()
352 356 if not files:
353 357 return matchmod.never()
354 358
355 359 pats = [b'include:%s' % f for f in files]
356 360 return matchmod.match(self._root, b'', [], pats, warn=self._ui.warn)
357 361
358 362 @propertycache
359 363 def _slash(self):
360 364 return self._ui.configbool(b'ui', b'slash') and pycompat.ossep != b'/'
361 365
362 366 @propertycache
363 367 def _checklink(self):
364 368 return util.checklink(self._root)
365 369
366 370 @propertycache
367 371 def _checkexec(self):
368 372 return bool(util.checkexec(self._root))
369 373
370 374 @propertycache
371 375 def _checkcase(self):
372 376 return not util.fscasesensitive(self._join(b'.hg'))
373 377
374 378 def _join(self, f):
375 379 # much faster than os.path.join()
376 380 # it's safe because f is always a relative path
377 381 return self._rootdir + f
378 382
379 383 def flagfunc(self, buildfallback):
380 384 """build a callable that returns flags associated with a filename
381 385
382 386 The information is extracted from three possible layers:
383 387 1. the file system if it supports the information
384 388 2. the "fallback" information stored in the dirstate if any
385 389 3. a more expensive mechanism inferring the flags from the parents.
386 390 """
387 391
388 392 # small hack to cache the result of buildfallback()
389 393 fallback_func = []
390 394
391 395 def get_flags(x):
392 396 entry = None
393 397 fallback_value = None
394 398 try:
395 399 st = os.lstat(self._join(x))
396 400 except OSError:
397 401 return b''
398 402
399 403 if self._checklink:
400 404 if util.statislink(st):
401 405 return b'l'
402 406 else:
403 407 entry = self.get_entry(x)
404 408 if entry.has_fallback_symlink:
405 409 if entry.fallback_symlink:
406 410 return b'l'
407 411 else:
408 412 if not fallback_func:
409 413 fallback_func.append(buildfallback())
410 414 fallback_value = fallback_func[0](x)
411 415 if b'l' in fallback_value:
412 416 return b'l'
413 417
414 418 if self._checkexec:
415 419 if util.statisexec(st):
416 420 return b'x'
417 421 else:
418 422 if entry is None:
419 423 entry = self.get_entry(x)
420 424 if entry.has_fallback_exec:
421 425 if entry.fallback_exec:
422 426 return b'x'
423 427 else:
424 428 if fallback_value is None:
425 429 if not fallback_func:
426 430 fallback_func.append(buildfallback())
427 431 fallback_value = fallback_func[0](x)
428 432 if b'x' in fallback_value:
429 433 return b'x'
430 434 return b''
431 435
432 436 return get_flags
433 437
434 438 @propertycache
435 439 def _cwd(self):
436 440 # internal config: ui.forcecwd
437 441 forcecwd = self._ui.config(b'ui', b'forcecwd')
438 442 if forcecwd:
439 443 return forcecwd
440 444 return encoding.getcwd()
441 445
442 446 def getcwd(self):
443 447 """Return the path from which a canonical path is calculated.
444 448
445 449 This path should be used to resolve file patterns or to convert
446 450 canonical paths back to file paths for display. It shouldn't be
447 451 used to get real file paths. Use vfs functions instead.
448 452 """
449 453 cwd = self._cwd
450 454 if cwd == self._root:
451 455 return b''
452 456 # self._root ends with a path separator if self._root is '/' or 'C:\'
453 457 rootsep = self._root
454 458 if not util.endswithsep(rootsep):
455 459 rootsep += pycompat.ossep
456 460 if cwd.startswith(rootsep):
457 461 return cwd[len(rootsep) :]
458 462 else:
459 463 # we're outside the repo. return an absolute path.
460 464 return cwd
461 465
462 466 def pathto(self, f, cwd=None):
463 467 if cwd is None:
464 468 cwd = self.getcwd()
465 469 path = util.pathto(self._root, cwd, f)
466 470 if self._slash:
467 471 return util.pconvert(path)
468 472 return path
469 473
470 474 def get_entry(self, path):
471 475 """return a DirstateItem for the associated path"""
472 476 entry = self._map.get(path)
473 477 if entry is None:
474 478 return DirstateItem()
475 479 return entry
476 480
477 481 def __contains__(self, key):
478 482 return key in self._map
479 483
480 484 def __iter__(self):
481 485 return iter(sorted(self._map))
482 486
483 487 def items(self):
484 488 return self._map.items()
485 489
486 490 iteritems = items
487 491
488 492 def parents(self):
489 493 return [self._validate(p) for p in self._pl]
490 494
491 495 def p1(self):
492 496 return self._validate(self._pl[0])
493 497
494 498 def p2(self):
495 499 return self._validate(self._pl[1])
496 500
497 501 @property
498 502 def in_merge(self):
499 503 """True if a merge is in progress"""
500 504 return self._pl[1] != self._nodeconstants.nullid
501 505
502 506 def branch(self):
503 507 return encoding.tolocal(self._branch)
504 508
505 509 @requires_changing_parents
506 510 def setparents(self, p1, p2=None):
507 511 """Set dirstate parents to p1 and p2.
508 512
509 513 When moving from two parents to one, "merged" entries a
510 514 adjusted to normal and previous copy records discarded and
511 515 returned by the call.
512 516
513 517 See localrepo.setparents()
514 518 """
515 519 if p2 is None:
516 520 p2 = self._nodeconstants.nullid
517 521 if self._changing_level == 0:
518 522 raise ValueError(
519 523 b"cannot set dirstate parent outside of "
520 524 b"dirstate.changing_parents context manager"
521 525 )
522 526
523 527 self._dirty = True
524 528 oldp2 = self._pl[1]
525 529 if self._origpl is None:
526 530 self._origpl = self._pl
527 531 nullid = self._nodeconstants.nullid
528 532 # True if we need to fold p2 related state back to a linear case
529 533 fold_p2 = oldp2 != nullid and p2 == nullid
530 534 return self._map.setparents(p1, p2, fold_p2=fold_p2)
531 535
532 536 def setbranch(self, branch):
533 537 self.__class__._branch.set(self, encoding.fromlocal(branch))
534 538 f = self._opener(b'branch', b'w', atomictemp=True, checkambig=True)
535 539 try:
536 540 f.write(self._branch + b'\n')
537 541 f.close()
538 542
539 543 # make sure filecache has the correct stat info for _branch after
540 544 # replacing the underlying file
541 545 ce = self._filecache[b'_branch']
542 546 if ce:
543 547 ce.refresh()
544 548 except: # re-raises
545 549 f.discard()
546 550 raise
547 551
548 552 def invalidate(self):
549 553 """Causes the next access to reread the dirstate.
550 554
551 555 This is different from localrepo.invalidatedirstate() because it always
552 556 rereads the dirstate. Use localrepo.invalidatedirstate() if you want to
553 557 check whether the dirstate has changed before rereading it."""
554 558
555 559 for a in ("_map", "_branch", "_ignore"):
556 560 if a in self.__dict__:
557 561 delattr(self, a)
558 562 self._dirty = False
559 563 self._dirty_tracked_set = False
560 564 self._invalidated_context = (
561 565 self._changing_level > 0 or self._attached_to_a_transaction
562 566 )
563 567 self._origpl = None
564 568
565 569 @requires_changing_any
566 570 def copy(self, source, dest):
567 571 """Mark dest as a copy of source. Unmark dest if source is None."""
568 572 if source == dest:
569 573 return
570 574 self._dirty = True
571 575 if source is not None:
572 576 self._check_sparse(source)
573 577 self._map.copymap[dest] = source
574 578 else:
575 579 self._map.copymap.pop(dest, None)
576 580
577 581 def copied(self, file):
578 582 return self._map.copymap.get(file, None)
579 583
580 584 def copies(self):
581 585 return self._map.copymap
582 586
583 587 @requires_changing_files
584 588 def set_tracked(self, filename, reset_copy=False):
585 589 """a "public" method for generic code to mark a file as tracked
586 590
587 591 This function is to be called outside of "update/merge" case. For
588 592 example by a command like `hg add X`.
589 593
590 594 if reset_copy is set, any existing copy information will be dropped.
591 595
592 596 return True the file was previously untracked, False otherwise.
593 597 """
594 598 self._dirty = True
595 599 entry = self._map.get(filename)
596 600 if entry is None or not entry.tracked:
597 601 self._check_new_tracked_filename(filename)
598 602 pre_tracked = self._map.set_tracked(filename)
599 603 if reset_copy:
600 604 self._map.copymap.pop(filename, None)
601 605 if pre_tracked:
602 606 self._dirty_tracked_set = True
603 607 return pre_tracked
604 608
605 609 @requires_changing_files
606 610 def set_untracked(self, filename):
607 611 """a "public" method for generic code to mark a file as untracked
608 612
609 613 This function is to be called outside of "update/merge" case. For
610 614 example by a command like `hg remove X`.
611 615
612 616 return True the file was previously tracked, False otherwise.
613 617 """
614 618 ret = self._map.set_untracked(filename)
615 619 if ret:
616 620 self._dirty = True
617 621 self._dirty_tracked_set = True
618 622 return ret
619 623
620 624 @requires_not_changing_parents
621 625 def set_clean(self, filename, parentfiledata):
622 626 """record that the current state of the file on disk is known to be clean"""
623 627 self._dirty = True
624 628 if not self._map[filename].tracked:
625 629 self._check_new_tracked_filename(filename)
626 630 (mode, size, mtime) = parentfiledata
627 631 self._map.set_clean(filename, mode, size, mtime)
628 632
629 633 @requires_not_changing_parents
630 634 def set_possibly_dirty(self, filename):
631 635 """record that the current state of the file on disk is unknown"""
632 636 self._dirty = True
633 637 self._map.set_possibly_dirty(filename)
634 638
635 639 @requires_changing_parents
636 640 def update_file_p1(
637 641 self,
638 642 filename,
639 643 p1_tracked,
640 644 ):
641 645 """Set a file as tracked in the parent (or not)
642 646
643 647 This is to be called when adjust the dirstate to a new parent after an history
644 648 rewriting operation.
645 649
646 650 It should not be called during a merge (p2 != nullid) and only within
647 651 a `with dirstate.changing_parents(repo):` context.
648 652 """
649 653 if self.in_merge:
650 654 msg = b'update_file_reference should not be called when merging'
651 655 raise error.ProgrammingError(msg)
652 656 entry = self._map.get(filename)
653 657 if entry is None:
654 658 wc_tracked = False
655 659 else:
656 660 wc_tracked = entry.tracked
657 661 if not (p1_tracked or wc_tracked):
658 662 # the file is no longer relevant to anyone
659 663 if self._map.get(filename) is not None:
660 664 self._map.reset_state(filename)
661 665 self._dirty = True
662 666 elif (not p1_tracked) and wc_tracked:
663 667 if entry is not None and entry.added:
664 668 return # avoid dropping copy information (maybe?)
665 669
666 670 self._map.reset_state(
667 671 filename,
668 672 wc_tracked,
669 673 p1_tracked,
670 674 # the underlying reference might have changed, we will have to
671 675 # check it.
672 676 has_meaningful_mtime=False,
673 677 )
674 678
675 679 @requires_changing_parents
676 680 def update_file(
677 681 self,
678 682 filename,
679 683 wc_tracked,
680 684 p1_tracked,
681 685 p2_info=False,
682 686 possibly_dirty=False,
683 687 parentfiledata=None,
684 688 ):
685 689 """update the information about a file in the dirstate
686 690
687 691 This is to be called when the direstates parent changes to keep track
688 692 of what is the file situation in regards to the working copy and its parent.
689 693
690 694 This function must be called within a `dirstate.changing_parents` context.
691 695
692 696 note: the API is at an early stage and we might need to adjust it
693 697 depending of what information ends up being relevant and useful to
694 698 other processing.
695 699 """
696 700 self._update_file(
697 701 filename=filename,
698 702 wc_tracked=wc_tracked,
699 703 p1_tracked=p1_tracked,
700 704 p2_info=p2_info,
701 705 possibly_dirty=possibly_dirty,
702 706 parentfiledata=parentfiledata,
703 707 )
704 708
705 709 # XXX since this make the dirstate dirty, we should enforce that it is done
706 710 # withing an appropriate change-context that scope the change and ensure it
707 711 # eventually get written on disk (or rolled back)
708 712 def hacky_extension_update_file(self, *args, **kwargs):
709 713 """NEVER USE THIS, YOU DO NOT NEED IT
710 714
711 715 This function is a variant of "update_file" to be called by a small set
712 716 of extensions, it also adjust the internal state of file, but can be
713 717 called outside an `changing_parents` context.
714 718
715 719 A very small number of extension meddle with the working copy content
716 720 in a way that requires to adjust the dirstate accordingly. At the time
717 721 this command is written they are :
718 722 - keyword,
719 723 - largefile,
720 724 PLEASE DO NOT GROW THIS LIST ANY FURTHER.
721 725
722 726 This function could probably be replaced by more semantic one (like
723 727 "adjust expected size" or "always revalidate file content", etc)
724 728 however at the time where this is writen, this is too much of a detour
725 729 to be considered.
726 730 """
727 731 self._update_file(
728 732 *args,
729 733 **kwargs,
730 734 )
731 735
732 736 def _update_file(
733 737 self,
734 738 filename,
735 739 wc_tracked,
736 740 p1_tracked,
737 741 p2_info=False,
738 742 possibly_dirty=False,
739 743 parentfiledata=None,
740 744 ):
741 745
742 746 # note: I do not think we need to double check name clash here since we
743 747 # are in a update/merge case that should already have taken care of
744 748 # this. The test agrees
745 749
746 750 self._dirty = True
747 751 old_entry = self._map.get(filename)
748 752 if old_entry is None:
749 753 prev_tracked = False
750 754 else:
751 755 prev_tracked = old_entry.tracked
752 756 if prev_tracked != wc_tracked:
753 757 self._dirty_tracked_set = True
754 758
755 759 self._map.reset_state(
756 760 filename,
757 761 wc_tracked,
758 762 p1_tracked,
759 763 p2_info=p2_info,
760 764 has_meaningful_mtime=not possibly_dirty,
761 765 parentfiledata=parentfiledata,
762 766 )
763 767
764 768 def _check_new_tracked_filename(self, filename):
765 769 scmutil.checkfilename(filename)
766 770 if self._map.hastrackeddir(filename):
767 771 msg = _(b'directory %r already in dirstate')
768 772 msg %= pycompat.bytestr(filename)
769 773 raise error.Abort(msg)
770 774 # shadows
771 775 for d in pathutil.finddirs(filename):
772 776 if self._map.hastrackeddir(d):
773 777 break
774 778 entry = self._map.get(d)
775 779 if entry is not None and not entry.removed:
776 780 msg = _(b'file %r in dirstate clashes with %r')
777 781 msg %= (pycompat.bytestr(d), pycompat.bytestr(filename))
778 782 raise error.Abort(msg)
779 783 self._check_sparse(filename)
780 784
781 785 def _check_sparse(self, filename):
782 786 """Check that a filename is inside the sparse profile"""
783 787 sparsematch = self._sparsematcher
784 788 if sparsematch is not None and not sparsematch.always():
785 789 if not sparsematch(filename):
786 790 msg = _(b"cannot add '%s' - it is outside the sparse checkout")
787 791 hint = _(
788 792 b'include file with `hg debugsparse --include <pattern>` or use '
789 793 b'`hg add -s <file>` to include file directory while adding'
790 794 )
791 795 raise error.Abort(msg % filename, hint=hint)
792 796
793 797 def _discoverpath(self, path, normed, ignoremissing, exists, storemap):
794 798 if exists is None:
795 799 exists = os.path.lexists(os.path.join(self._root, path))
796 800 if not exists:
797 801 # Maybe a path component exists
798 802 if not ignoremissing and b'/' in path:
799 803 d, f = path.rsplit(b'/', 1)
800 804 d = self._normalize(d, False, ignoremissing, None)
801 805 folded = d + b"/" + f
802 806 else:
803 807 # No path components, preserve original case
804 808 folded = path
805 809 else:
806 810 # recursively normalize leading directory components
807 811 # against dirstate
808 812 if b'/' in normed:
809 813 d, f = normed.rsplit(b'/', 1)
810 814 d = self._normalize(d, False, ignoremissing, True)
811 815 r = self._root + b"/" + d
812 816 folded = d + b"/" + util.fspath(f, r)
813 817 else:
814 818 folded = util.fspath(normed, self._root)
815 819 storemap[normed] = folded
816 820
817 821 return folded
818 822
819 823 def _normalizefile(self, path, isknown, ignoremissing=False, exists=None):
820 824 normed = util.normcase(path)
821 825 folded = self._map.filefoldmap.get(normed, None)
822 826 if folded is None:
823 827 if isknown:
824 828 folded = path
825 829 else:
826 830 folded = self._discoverpath(
827 831 path, normed, ignoremissing, exists, self._map.filefoldmap
828 832 )
829 833 return folded
830 834
831 835 def _normalize(self, path, isknown, ignoremissing=False, exists=None):
832 836 normed = util.normcase(path)
833 837 folded = self._map.filefoldmap.get(normed, None)
834 838 if folded is None:
835 839 folded = self._map.dirfoldmap.get(normed, None)
836 840 if folded is None:
837 841 if isknown:
838 842 folded = path
839 843 else:
840 844 # store discovered result in dirfoldmap so that future
841 845 # normalizefile calls don't start matching directories
842 846 folded = self._discoverpath(
843 847 path, normed, ignoremissing, exists, self._map.dirfoldmap
844 848 )
845 849 return folded
846 850
847 851 def normalize(self, path, isknown=False, ignoremissing=False):
848 852 """
849 853 normalize the case of a pathname when on a casefolding filesystem
850 854
851 855 isknown specifies whether the filename came from walking the
852 856 disk, to avoid extra filesystem access.
853 857
854 858 If ignoremissing is True, missing path are returned
855 859 unchanged. Otherwise, we try harder to normalize possibly
856 860 existing path components.
857 861
858 862 The normalized case is determined based on the following precedence:
859 863
860 864 - version of name already stored in the dirstate
861 865 - version of name stored on disk
862 866 - version provided via command arguments
863 867 """
864 868
865 869 if self._checkcase:
866 870 return self._normalize(path, isknown, ignoremissing)
867 871 return path
868 872
869 873 # XXX this method is barely used, as a result:
870 874 # - its semantic is unclear
871 875 # - do we really needs it ?
872 876 @requires_changing_parents
873 877 def clear(self):
874 878 self._map.clear()
875 879 self._dirty = True
876 880
877 881 @requires_changing_parents
878 882 def rebuild(self, parent, allfiles, changedfiles=None):
879 883 matcher = self._sparsematcher
880 884 if matcher is not None and not matcher.always():
881 885 # should not add non-matching files
882 886 allfiles = [f for f in allfiles if matcher(f)]
883 887 if changedfiles:
884 888 changedfiles = [f for f in changedfiles if matcher(f)]
885 889
886 890 if changedfiles is not None:
887 891 # these files will be deleted from the dirstate when they are
888 892 # not found to be in allfiles
889 893 dirstatefilestoremove = {f for f in self if not matcher(f)}
890 894 changedfiles = dirstatefilestoremove.union(changedfiles)
891 895
892 896 if changedfiles is None:
893 897 # Rebuild entire dirstate
894 898 to_lookup = allfiles
895 899 to_drop = []
896 900 self.clear()
897 901 elif len(changedfiles) < 10:
898 902 # Avoid turning allfiles into a set, which can be expensive if it's
899 903 # large.
900 904 to_lookup = []
901 905 to_drop = []
902 906 for f in changedfiles:
903 907 if f in allfiles:
904 908 to_lookup.append(f)
905 909 else:
906 910 to_drop.append(f)
907 911 else:
908 912 changedfilesset = set(changedfiles)
909 913 to_lookup = changedfilesset & set(allfiles)
910 914 to_drop = changedfilesset - to_lookup
911 915
912 916 if self._origpl is None:
913 917 self._origpl = self._pl
914 918 self._map.setparents(parent, self._nodeconstants.nullid)
915 919
916 920 for f in to_lookup:
917 921 if self.in_merge:
918 922 self.set_tracked(f)
919 923 else:
920 924 self._map.reset_state(
921 925 f,
922 926 wc_tracked=True,
923 927 p1_tracked=True,
924 928 )
925 929 for f in to_drop:
926 930 self._map.reset_state(f)
927 931
928 932 self._dirty = True
929 933
930 934 def identity(self):
931 935 """Return identity of dirstate itself to detect changing in storage
932 936
933 937 If identity of previous dirstate is equal to this, writing
934 938 changes based on the former dirstate out can keep consistency.
935 939 """
936 940 return self._map.identity
937 941
938 942 def write(self, tr):
939 943 if not self._dirty:
940 944 return
941 945 # make sure we don't request a write of invalidated content
942 946 # XXX move before the dirty check once `unlock` stop calling `write`
943 947 assert not self._invalidated_context
944 948
945 949 write_key = self._use_tracked_hint and self._dirty_tracked_set
946 950 if tr:
947 951
948 952 def on_abort(tr):
949 953 self._attached_to_a_transaction = False
950 954 self.invalidate()
951 955
952 956 # make sure we invalidate the current change on abort
953 957 if tr is not None:
954 tr.addabort(b'dirstate-invalidate', on_abort)
958 tr.addabort(
959 b'dirstate-invalidate%s' % self._tr_key_suffix,
960 on_abort,
961 )
955 962
956 963 self._attached_to_a_transaction = True
957 964
958 965 def on_success(f):
959 966 self._attached_to_a_transaction = False
960 967 self._writedirstate(tr, f),
961 968
962 969 # delay writing in-memory changes out
963 970 tr.addfilegenerator(
964 b'dirstate-1-main',
971 b'dirstate-1-main%s' % self._tr_key_suffix,
965 972 (self._filename,),
966 973 on_success,
967 974 location=b'plain',
968 975 post_finalize=True,
969 976 )
970 977 if write_key:
971 978 tr.addfilegenerator(
972 b'dirstate-2-key-post',
979 b'dirstate-2-key-post%s' % self._tr_key_suffix,
973 980 (self._filename_th,),
974 981 lambda f: self._write_tracked_hint(tr, f),
975 982 location=b'plain',
976 983 post_finalize=True,
977 984 )
978 985 return
979 986
980 987 file = lambda f: self._opener(f, b"w", atomictemp=True, checkambig=True)
981 988 with file(self._filename) as f:
982 989 self._writedirstate(tr, f)
983 990 if write_key:
984 991 # we update the key-file after writing to make sure reader have a
985 992 # key that match the newly written content
986 993 with file(self._filename_th) as f:
987 994 self._write_tracked_hint(tr, f)
988 995
989 996 def delete_tracked_hint(self):
990 997 """remove the tracked_hint file
991 998
992 999 To be used by format downgrades operation"""
993 1000 self._opener.unlink(self._filename_th)
994 1001 self._use_tracked_hint = False
995 1002
996 1003 def addparentchangecallback(self, category, callback):
997 1004 """add a callback to be called when the wd parents are changed
998 1005
999 1006 Callback will be called with the following arguments:
1000 1007 dirstate, (oldp1, oldp2), (newp1, newp2)
1001 1008
1002 1009 Category is a unique identifier to allow overwriting an old callback
1003 1010 with a newer callback.
1004 1011 """
1005 1012 self._plchangecallbacks[category] = callback
1006 1013
1007 1014 def _writedirstate(self, tr, st):
1008 1015 # make sure we don't write invalidated content
1009 1016 assert not self._invalidated_context
1010 1017 # notify callbacks about parents change
1011 1018 if self._origpl is not None and self._origpl != self._pl:
1012 1019 for c, callback in sorted(self._plchangecallbacks.items()):
1013 1020 callback(self, self._origpl, self._pl)
1014 1021 self._origpl = None
1015 1022 self._map.write(tr, st)
1016 1023 self._dirty = False
1017 1024 self._dirty_tracked_set = False
1018 1025
1019 1026 def _write_tracked_hint(self, tr, f):
1020 1027 key = node.hex(uuid.uuid4().bytes)
1021 1028 f.write(b"1\n%s\n" % key) # 1 is the format version
1022 1029
1023 1030 def _dirignore(self, f):
1024 1031 if self._ignore(f):
1025 1032 return True
1026 1033 for p in pathutil.finddirs(f):
1027 1034 if self._ignore(p):
1028 1035 return True
1029 1036 return False
1030 1037
1031 1038 def _ignorefiles(self):
1032 1039 files = []
1033 1040 if os.path.exists(self._join(b'.hgignore')):
1034 1041 files.append(self._join(b'.hgignore'))
1035 1042 for name, path in self._ui.configitems(b"ui"):
1036 1043 if name == b'ignore' or name.startswith(b'ignore.'):
1037 1044 # we need to use os.path.join here rather than self._join
1038 1045 # because path is arbitrary and user-specified
1039 1046 files.append(os.path.join(self._rootdir, util.expandpath(path)))
1040 1047 return files
1041 1048
1042 1049 def _ignorefileandline(self, f):
1043 1050 files = collections.deque(self._ignorefiles())
1044 1051 visited = set()
1045 1052 while files:
1046 1053 i = files.popleft()
1047 1054 patterns = matchmod.readpatternfile(
1048 1055 i, self._ui.warn, sourceinfo=True
1049 1056 )
1050 1057 for pattern, lineno, line in patterns:
1051 1058 kind, p = matchmod._patsplit(pattern, b'glob')
1052 1059 if kind == b"subinclude":
1053 1060 if p not in visited:
1054 1061 files.append(p)
1055 1062 continue
1056 1063 m = matchmod.match(
1057 1064 self._root, b'', [], [pattern], warn=self._ui.warn
1058 1065 )
1059 1066 if m(f):
1060 1067 return (i, lineno, line)
1061 1068 visited.add(i)
1062 1069 return (None, -1, b"")
1063 1070
1064 1071 def _walkexplicit(self, match, subrepos):
1065 1072 """Get stat data about the files explicitly specified by match.
1066 1073
1067 1074 Return a triple (results, dirsfound, dirsnotfound).
1068 1075 - results is a mapping from filename to stat result. It also contains
1069 1076 listings mapping subrepos and .hg to None.
1070 1077 - dirsfound is a list of files found to be directories.
1071 1078 - dirsnotfound is a list of files that the dirstate thinks are
1072 1079 directories and that were not found."""
1073 1080
1074 1081 def badtype(mode):
1075 1082 kind = _(b'unknown')
1076 1083 if stat.S_ISCHR(mode):
1077 1084 kind = _(b'character device')
1078 1085 elif stat.S_ISBLK(mode):
1079 1086 kind = _(b'block device')
1080 1087 elif stat.S_ISFIFO(mode):
1081 1088 kind = _(b'fifo')
1082 1089 elif stat.S_ISSOCK(mode):
1083 1090 kind = _(b'socket')
1084 1091 elif stat.S_ISDIR(mode):
1085 1092 kind = _(b'directory')
1086 1093 return _(b'unsupported file type (type is %s)') % kind
1087 1094
1088 1095 badfn = match.bad
1089 1096 dmap = self._map
1090 1097 lstat = os.lstat
1091 1098 getkind = stat.S_IFMT
1092 1099 dirkind = stat.S_IFDIR
1093 1100 regkind = stat.S_IFREG
1094 1101 lnkkind = stat.S_IFLNK
1095 1102 join = self._join
1096 1103 dirsfound = []
1097 1104 foundadd = dirsfound.append
1098 1105 dirsnotfound = []
1099 1106 notfoundadd = dirsnotfound.append
1100 1107
1101 1108 if not match.isexact() and self._checkcase:
1102 1109 normalize = self._normalize
1103 1110 else:
1104 1111 normalize = None
1105 1112
1106 1113 files = sorted(match.files())
1107 1114 subrepos.sort()
1108 1115 i, j = 0, 0
1109 1116 while i < len(files) and j < len(subrepos):
1110 1117 subpath = subrepos[j] + b"/"
1111 1118 if files[i] < subpath:
1112 1119 i += 1
1113 1120 continue
1114 1121 while i < len(files) and files[i].startswith(subpath):
1115 1122 del files[i]
1116 1123 j += 1
1117 1124
1118 1125 if not files or b'' in files:
1119 1126 files = [b'']
1120 1127 # constructing the foldmap is expensive, so don't do it for the
1121 1128 # common case where files is ['']
1122 1129 normalize = None
1123 1130 results = dict.fromkeys(subrepos)
1124 1131 results[b'.hg'] = None
1125 1132
1126 1133 for ff in files:
1127 1134 if normalize:
1128 1135 nf = normalize(ff, False, True)
1129 1136 else:
1130 1137 nf = ff
1131 1138 if nf in results:
1132 1139 continue
1133 1140
1134 1141 try:
1135 1142 st = lstat(join(nf))
1136 1143 kind = getkind(st.st_mode)
1137 1144 if kind == dirkind:
1138 1145 if nf in dmap:
1139 1146 # file replaced by dir on disk but still in dirstate
1140 1147 results[nf] = None
1141 1148 foundadd((nf, ff))
1142 1149 elif kind == regkind or kind == lnkkind:
1143 1150 results[nf] = st
1144 1151 else:
1145 1152 badfn(ff, badtype(kind))
1146 1153 if nf in dmap:
1147 1154 results[nf] = None
1148 1155 except (OSError) as inst:
1149 1156 # nf not found on disk - it is dirstate only
1150 1157 if nf in dmap: # does it exactly match a missing file?
1151 1158 results[nf] = None
1152 1159 else: # does it match a missing directory?
1153 1160 if self._map.hasdir(nf):
1154 1161 notfoundadd(nf)
1155 1162 else:
1156 1163 badfn(ff, encoding.strtolocal(inst.strerror))
1157 1164
1158 1165 # match.files() may contain explicitly-specified paths that shouldn't
1159 1166 # be taken; drop them from the list of files found. dirsfound/notfound
1160 1167 # aren't filtered here because they will be tested later.
1161 1168 if match.anypats():
1162 1169 for f in list(results):
1163 1170 if f == b'.hg' or f in subrepos:
1164 1171 # keep sentinel to disable further out-of-repo walks
1165 1172 continue
1166 1173 if not match(f):
1167 1174 del results[f]
1168 1175
1169 1176 # Case insensitive filesystems cannot rely on lstat() failing to detect
1170 1177 # a case-only rename. Prune the stat object for any file that does not
1171 1178 # match the case in the filesystem, if there are multiple files that
1172 1179 # normalize to the same path.
1173 1180 if match.isexact() and self._checkcase:
1174 1181 normed = {}
1175 1182
1176 1183 for f, st in results.items():
1177 1184 if st is None:
1178 1185 continue
1179 1186
1180 1187 nc = util.normcase(f)
1181 1188 paths = normed.get(nc)
1182 1189
1183 1190 if paths is None:
1184 1191 paths = set()
1185 1192 normed[nc] = paths
1186 1193
1187 1194 paths.add(f)
1188 1195
1189 1196 for norm, paths in normed.items():
1190 1197 if len(paths) > 1:
1191 1198 for path in paths:
1192 1199 folded = self._discoverpath(
1193 1200 path, norm, True, None, self._map.dirfoldmap
1194 1201 )
1195 1202 if path != folded:
1196 1203 results[path] = None
1197 1204
1198 1205 return results, dirsfound, dirsnotfound
1199 1206
1200 1207 def walk(self, match, subrepos, unknown, ignored, full=True):
1201 1208 """
1202 1209 Walk recursively through the directory tree, finding all files
1203 1210 matched by match.
1204 1211
1205 1212 If full is False, maybe skip some known-clean files.
1206 1213
1207 1214 Return a dict mapping filename to stat-like object (either
1208 1215 mercurial.osutil.stat instance or return value of os.stat()).
1209 1216
1210 1217 """
1211 1218 # full is a flag that extensions that hook into walk can use -- this
1212 1219 # implementation doesn't use it at all. This satisfies the contract
1213 1220 # because we only guarantee a "maybe".
1214 1221
1215 1222 if ignored:
1216 1223 ignore = util.never
1217 1224 dirignore = util.never
1218 1225 elif unknown:
1219 1226 ignore = self._ignore
1220 1227 dirignore = self._dirignore
1221 1228 else:
1222 1229 # if not unknown and not ignored, drop dir recursion and step 2
1223 1230 ignore = util.always
1224 1231 dirignore = util.always
1225 1232
1226 1233 if self._sparsematchfn is not None:
1227 1234 em = matchmod.exact(match.files())
1228 1235 sm = matchmod.unionmatcher([self._sparsematcher, em])
1229 1236 match = matchmod.intersectmatchers(match, sm)
1230 1237
1231 1238 matchfn = match.matchfn
1232 1239 matchalways = match.always()
1233 1240 matchtdir = match.traversedir
1234 1241 dmap = self._map
1235 1242 listdir = util.listdir
1236 1243 lstat = os.lstat
1237 1244 dirkind = stat.S_IFDIR
1238 1245 regkind = stat.S_IFREG
1239 1246 lnkkind = stat.S_IFLNK
1240 1247 join = self._join
1241 1248
1242 1249 exact = skipstep3 = False
1243 1250 if match.isexact(): # match.exact
1244 1251 exact = True
1245 1252 dirignore = util.always # skip step 2
1246 1253 elif match.prefix(): # match.match, no patterns
1247 1254 skipstep3 = True
1248 1255
1249 1256 if not exact and self._checkcase:
1250 1257 normalize = self._normalize
1251 1258 normalizefile = self._normalizefile
1252 1259 skipstep3 = False
1253 1260 else:
1254 1261 normalize = self._normalize
1255 1262 normalizefile = None
1256 1263
1257 1264 # step 1: find all explicit files
1258 1265 results, work, dirsnotfound = self._walkexplicit(match, subrepos)
1259 1266 if matchtdir:
1260 1267 for d in work:
1261 1268 matchtdir(d[0])
1262 1269 for d in dirsnotfound:
1263 1270 matchtdir(d)
1264 1271
1265 1272 skipstep3 = skipstep3 and not (work or dirsnotfound)
1266 1273 work = [d for d in work if not dirignore(d[0])]
1267 1274
1268 1275 # step 2: visit subdirectories
1269 1276 def traverse(work, alreadynormed):
1270 1277 wadd = work.append
1271 1278 while work:
1272 1279 tracing.counter('dirstate.walk work', len(work))
1273 1280 nd = work.pop()
1274 1281 visitentries = match.visitchildrenset(nd)
1275 1282 if not visitentries:
1276 1283 continue
1277 1284 if visitentries == b'this' or visitentries == b'all':
1278 1285 visitentries = None
1279 1286 skip = None
1280 1287 if nd != b'':
1281 1288 skip = b'.hg'
1282 1289 try:
1283 1290 with tracing.log('dirstate.walk.traverse listdir %s', nd):
1284 1291 entries = listdir(join(nd), stat=True, skip=skip)
1285 1292 except (PermissionError, FileNotFoundError) as inst:
1286 1293 match.bad(
1287 1294 self.pathto(nd), encoding.strtolocal(inst.strerror)
1288 1295 )
1289 1296 continue
1290 1297 for f, kind, st in entries:
1291 1298 # Some matchers may return files in the visitentries set,
1292 1299 # instead of 'this', if the matcher explicitly mentions them
1293 1300 # and is not an exactmatcher. This is acceptable; we do not
1294 1301 # make any hard assumptions about file-or-directory below
1295 1302 # based on the presence of `f` in visitentries. If
1296 1303 # visitchildrenset returned a set, we can always skip the
1297 1304 # entries *not* in the set it provided regardless of whether
1298 1305 # they're actually a file or a directory.
1299 1306 if visitentries and f not in visitentries:
1300 1307 continue
1301 1308 if normalizefile:
1302 1309 # even though f might be a directory, we're only
1303 1310 # interested in comparing it to files currently in the
1304 1311 # dmap -- therefore normalizefile is enough
1305 1312 nf = normalizefile(
1306 1313 nd and (nd + b"/" + f) or f, True, True
1307 1314 )
1308 1315 else:
1309 1316 nf = nd and (nd + b"/" + f) or f
1310 1317 if nf not in results:
1311 1318 if kind == dirkind:
1312 1319 if not ignore(nf):
1313 1320 if matchtdir:
1314 1321 matchtdir(nf)
1315 1322 wadd(nf)
1316 1323 if nf in dmap and (matchalways or matchfn(nf)):
1317 1324 results[nf] = None
1318 1325 elif kind == regkind or kind == lnkkind:
1319 1326 if nf in dmap:
1320 1327 if matchalways or matchfn(nf):
1321 1328 results[nf] = st
1322 1329 elif (matchalways or matchfn(nf)) and not ignore(
1323 1330 nf
1324 1331 ):
1325 1332 # unknown file -- normalize if necessary
1326 1333 if not alreadynormed:
1327 1334 nf = normalize(nf, False, True)
1328 1335 results[nf] = st
1329 1336 elif nf in dmap and (matchalways or matchfn(nf)):
1330 1337 results[nf] = None
1331 1338
1332 1339 for nd, d in work:
1333 1340 # alreadynormed means that processwork doesn't have to do any
1334 1341 # expensive directory normalization
1335 1342 alreadynormed = not normalize or nd == d
1336 1343 traverse([d], alreadynormed)
1337 1344
1338 1345 for s in subrepos:
1339 1346 del results[s]
1340 1347 del results[b'.hg']
1341 1348
1342 1349 # step 3: visit remaining files from dmap
1343 1350 if not skipstep3 and not exact:
1344 1351 # If a dmap file is not in results yet, it was either
1345 1352 # a) not matching matchfn b) ignored, c) missing, or d) under a
1346 1353 # symlink directory.
1347 1354 if not results and matchalways:
1348 1355 visit = [f for f in dmap]
1349 1356 else:
1350 1357 visit = [f for f in dmap if f not in results and matchfn(f)]
1351 1358 visit.sort()
1352 1359
1353 1360 if unknown:
1354 1361 # unknown == True means we walked all dirs under the roots
1355 1362 # that wasn't ignored, and everything that matched was stat'ed
1356 1363 # and is already in results.
1357 1364 # The rest must thus be ignored or under a symlink.
1358 1365 audit_path = pathutil.pathauditor(self._root, cached=True)
1359 1366
1360 1367 for nf in iter(visit):
1361 1368 # If a stat for the same file was already added with a
1362 1369 # different case, don't add one for this, since that would
1363 1370 # make it appear as if the file exists under both names
1364 1371 # on disk.
1365 1372 if (
1366 1373 normalizefile
1367 1374 and normalizefile(nf, True, True) in results
1368 1375 ):
1369 1376 results[nf] = None
1370 1377 # Report ignored items in the dmap as long as they are not
1371 1378 # under a symlink directory.
1372 1379 elif audit_path.check(nf):
1373 1380 try:
1374 1381 results[nf] = lstat(join(nf))
1375 1382 # file was just ignored, no links, and exists
1376 1383 except OSError:
1377 1384 # file doesn't exist
1378 1385 results[nf] = None
1379 1386 else:
1380 1387 # It's either missing or under a symlink directory
1381 1388 # which we in this case report as missing
1382 1389 results[nf] = None
1383 1390 else:
1384 1391 # We may not have walked the full directory tree above,
1385 1392 # so stat and check everything we missed.
1386 1393 iv = iter(visit)
1387 1394 for st in util.statfiles([join(i) for i in visit]):
1388 1395 results[next(iv)] = st
1389 1396 return results
1390 1397
1391 1398 def _rust_status(self, matcher, list_clean, list_ignored, list_unknown):
1392 1399 if self._sparsematchfn is not None:
1393 1400 em = matchmod.exact(matcher.files())
1394 1401 sm = matchmod.unionmatcher([self._sparsematcher, em])
1395 1402 matcher = matchmod.intersectmatchers(matcher, sm)
1396 1403 # Force Rayon (Rust parallelism library) to respect the number of
1397 1404 # workers. This is a temporary workaround until Rust code knows
1398 1405 # how to read the config file.
1399 1406 numcpus = self._ui.configint(b"worker", b"numcpus")
1400 1407 if numcpus is not None:
1401 1408 encoding.environ.setdefault(b'RAYON_NUM_THREADS', b'%d' % numcpus)
1402 1409
1403 1410 workers_enabled = self._ui.configbool(b"worker", b"enabled", True)
1404 1411 if not workers_enabled:
1405 1412 encoding.environ[b"RAYON_NUM_THREADS"] = b"1"
1406 1413
1407 1414 (
1408 1415 lookup,
1409 1416 modified,
1410 1417 added,
1411 1418 removed,
1412 1419 deleted,
1413 1420 clean,
1414 1421 ignored,
1415 1422 unknown,
1416 1423 warnings,
1417 1424 bad,
1418 1425 traversed,
1419 1426 dirty,
1420 1427 ) = rustmod.status(
1421 1428 self._map._map,
1422 1429 matcher,
1423 1430 self._rootdir,
1424 1431 self._ignorefiles(),
1425 1432 self._checkexec,
1426 1433 bool(list_clean),
1427 1434 bool(list_ignored),
1428 1435 bool(list_unknown),
1429 1436 bool(matcher.traversedir),
1430 1437 )
1431 1438
1432 1439 self._dirty |= dirty
1433 1440
1434 1441 if matcher.traversedir:
1435 1442 for dir in traversed:
1436 1443 matcher.traversedir(dir)
1437 1444
1438 1445 if self._ui.warn:
1439 1446 for item in warnings:
1440 1447 if isinstance(item, tuple):
1441 1448 file_path, syntax = item
1442 1449 msg = _(b"%s: ignoring invalid syntax '%s'\n") % (
1443 1450 file_path,
1444 1451 syntax,
1445 1452 )
1446 1453 self._ui.warn(msg)
1447 1454 else:
1448 1455 msg = _(b"skipping unreadable pattern file '%s': %s\n")
1449 1456 self._ui.warn(
1450 1457 msg
1451 1458 % (
1452 1459 pathutil.canonpath(
1453 1460 self._rootdir, self._rootdir, item
1454 1461 ),
1455 1462 b"No such file or directory",
1456 1463 )
1457 1464 )
1458 1465
1459 1466 for fn, message in bad:
1460 1467 matcher.bad(fn, encoding.strtolocal(message))
1461 1468
1462 1469 status = scmutil.status(
1463 1470 modified=modified,
1464 1471 added=added,
1465 1472 removed=removed,
1466 1473 deleted=deleted,
1467 1474 unknown=unknown,
1468 1475 ignored=ignored,
1469 1476 clean=clean,
1470 1477 )
1471 1478 return (lookup, status)
1472 1479
1473 1480 # XXX since this can make the dirstate dirty (through rust), we should
1474 1481 # enforce that it is done withing an appropriate change-context that scope
1475 1482 # the change and ensure it eventually get written on disk (or rolled back)
1476 1483 def status(self, match, subrepos, ignored, clean, unknown):
1477 1484 """Determine the status of the working copy relative to the
1478 1485 dirstate and return a pair of (unsure, status), where status is of type
1479 1486 scmutil.status and:
1480 1487
1481 1488 unsure:
1482 1489 files that might have been modified since the dirstate was
1483 1490 written, but need to be read to be sure (size is the same
1484 1491 but mtime differs)
1485 1492 status.modified:
1486 1493 files that have definitely been modified since the dirstate
1487 1494 was written (different size or mode)
1488 1495 status.clean:
1489 1496 files that have definitely not been modified since the
1490 1497 dirstate was written
1491 1498 """
1492 1499 listignored, listclean, listunknown = ignored, clean, unknown
1493 1500 lookup, modified, added, unknown, ignored = [], [], [], [], []
1494 1501 removed, deleted, clean = [], [], []
1495 1502
1496 1503 dmap = self._map
1497 1504 dmap.preload()
1498 1505
1499 1506 use_rust = True
1500 1507
1501 1508 allowed_matchers = (
1502 1509 matchmod.alwaysmatcher,
1503 1510 matchmod.differencematcher,
1504 1511 matchmod.exactmatcher,
1505 1512 matchmod.includematcher,
1506 1513 matchmod.intersectionmatcher,
1507 1514 matchmod.nevermatcher,
1508 1515 matchmod.unionmatcher,
1509 1516 )
1510 1517
1511 1518 if rustmod is None:
1512 1519 use_rust = False
1513 1520 elif self._checkcase:
1514 1521 # Case-insensitive filesystems are not handled yet
1515 1522 use_rust = False
1516 1523 elif subrepos:
1517 1524 use_rust = False
1518 1525 elif not isinstance(match, allowed_matchers):
1519 1526 # Some matchers have yet to be implemented
1520 1527 use_rust = False
1521 1528
1522 1529 # Get the time from the filesystem so we can disambiguate files that
1523 1530 # appear modified in the present or future.
1524 1531 try:
1525 1532 mtime_boundary = timestamp.get_fs_now(self._opener)
1526 1533 except OSError:
1527 1534 # In largefiles or readonly context
1528 1535 mtime_boundary = None
1529 1536
1530 1537 if use_rust:
1531 1538 try:
1532 1539 res = self._rust_status(
1533 1540 match, listclean, listignored, listunknown
1534 1541 )
1535 1542 return res + (mtime_boundary,)
1536 1543 except rustmod.FallbackError:
1537 1544 pass
1538 1545
1539 1546 def noop(f):
1540 1547 pass
1541 1548
1542 1549 dcontains = dmap.__contains__
1543 1550 dget = dmap.__getitem__
1544 1551 ladd = lookup.append # aka "unsure"
1545 1552 madd = modified.append
1546 1553 aadd = added.append
1547 1554 uadd = unknown.append if listunknown else noop
1548 1555 iadd = ignored.append if listignored else noop
1549 1556 radd = removed.append
1550 1557 dadd = deleted.append
1551 1558 cadd = clean.append if listclean else noop
1552 1559 mexact = match.exact
1553 1560 dirignore = self._dirignore
1554 1561 checkexec = self._checkexec
1555 1562 checklink = self._checklink
1556 1563 copymap = self._map.copymap
1557 1564
1558 1565 # We need to do full walks when either
1559 1566 # - we're listing all clean files, or
1560 1567 # - match.traversedir does something, because match.traversedir should
1561 1568 # be called for every dir in the working dir
1562 1569 full = listclean or match.traversedir is not None
1563 1570 for fn, st in self.walk(
1564 1571 match, subrepos, listunknown, listignored, full=full
1565 1572 ).items():
1566 1573 if not dcontains(fn):
1567 1574 if (listignored or mexact(fn)) and dirignore(fn):
1568 1575 if listignored:
1569 1576 iadd(fn)
1570 1577 else:
1571 1578 uadd(fn)
1572 1579 continue
1573 1580
1574 1581 t = dget(fn)
1575 1582 mode = t.mode
1576 1583 size = t.size
1577 1584
1578 1585 if not st and t.tracked:
1579 1586 dadd(fn)
1580 1587 elif t.p2_info:
1581 1588 madd(fn)
1582 1589 elif t.added:
1583 1590 aadd(fn)
1584 1591 elif t.removed:
1585 1592 radd(fn)
1586 1593 elif t.tracked:
1587 1594 if not checklink and t.has_fallback_symlink:
1588 1595 # If the file system does not support symlink, the mode
1589 1596 # might not be correctly stored in the dirstate, so do not
1590 1597 # trust it.
1591 1598 ladd(fn)
1592 1599 elif not checkexec and t.has_fallback_exec:
1593 1600 # If the file system does not support exec bits, the mode
1594 1601 # might not be correctly stored in the dirstate, so do not
1595 1602 # trust it.
1596 1603 ladd(fn)
1597 1604 elif (
1598 1605 size >= 0
1599 1606 and (
1600 1607 (size != st.st_size and size != st.st_size & _rangemask)
1601 1608 or ((mode ^ st.st_mode) & 0o100 and checkexec)
1602 1609 )
1603 1610 or fn in copymap
1604 1611 ):
1605 1612 if stat.S_ISLNK(st.st_mode) and size != st.st_size:
1606 1613 # issue6456: Size returned may be longer due to
1607 1614 # encryption on EXT-4 fscrypt, undecided.
1608 1615 ladd(fn)
1609 1616 else:
1610 1617 madd(fn)
1611 1618 elif not t.mtime_likely_equal_to(timestamp.mtime_of(st)):
1612 1619 # There might be a change in the future if for example the
1613 1620 # internal clock is off, but this is a case where the issues
1614 1621 # the user would face would be a lot worse and there is
1615 1622 # nothing we can really do.
1616 1623 ladd(fn)
1617 1624 elif listclean:
1618 1625 cadd(fn)
1619 1626 status = scmutil.status(
1620 1627 modified, added, removed, deleted, unknown, ignored, clean
1621 1628 )
1622 1629 return (lookup, status, mtime_boundary)
1623 1630
1624 1631 def matches(self, match):
1625 1632 """
1626 1633 return files in the dirstate (in whatever state) filtered by match
1627 1634 """
1628 1635 dmap = self._map
1629 1636 if rustmod is not None:
1630 1637 dmap = self._map._map
1631 1638
1632 1639 if match.always():
1633 1640 return dmap.keys()
1634 1641 files = match.files()
1635 1642 if match.isexact():
1636 1643 # fast path -- filter the other way around, since typically files is
1637 1644 # much smaller than dmap
1638 1645 return [f for f in files if f in dmap]
1639 1646 if match.prefix() and all(fn in dmap for fn in files):
1640 1647 # fast path -- all the values are known to be files, so just return
1641 1648 # that
1642 1649 return list(files)
1643 1650 return [f for f in dmap if match(f)]
1644 1651
1645 1652 def _actualfilename(self, tr):
1646 1653 if tr:
1647 1654 return self._pendingfilename
1648 1655 else:
1649 1656 return self._filename
1650 1657
1651 1658 def all_file_names(self):
1652 1659 """list all filename currently used by this dirstate
1653 1660
1654 1661 This is only used to do `hg rollback` related backup in the transaction
1655 1662 """
1656 1663 if not self._opener.exists(self._filename):
1657 1664 # no data every written to disk yet
1658 1665 return ()
1659 1666 elif self._use_dirstate_v2:
1660 1667 return (
1661 1668 self._filename,
1662 1669 self._map.docket.data_filename(),
1663 1670 )
1664 1671 else:
1665 1672 return (self._filename,)
1666 1673
1667 1674 def verify(self, m1, m2, p1, narrow_matcher=None):
1668 1675 """
1669 1676 check the dirstate contents against the parent manifest and yield errors
1670 1677 """
1671 1678 missing_from_p1 = _(
1672 1679 b"%s marked as tracked in p1 (%s) but not in manifest1\n"
1673 1680 )
1674 1681 unexpected_in_p1 = _(b"%s marked as added, but also in manifest1\n")
1675 1682 missing_from_ps = _(
1676 1683 b"%s marked as modified, but not in either manifest\n"
1677 1684 )
1678 1685 missing_from_ds = _(
1679 1686 b"%s in manifest1, but not marked as tracked in p1 (%s)\n"
1680 1687 )
1681 1688 for f, entry in self.items():
1682 1689 if entry.p1_tracked:
1683 1690 if entry.modified and f not in m1 and f not in m2:
1684 1691 yield missing_from_ps % f
1685 1692 elif f not in m1:
1686 1693 yield missing_from_p1 % (f, node.short(p1))
1687 1694 if entry.added and f in m1:
1688 1695 yield unexpected_in_p1 % f
1689 1696 for f in m1:
1690 1697 if narrow_matcher is not None and not narrow_matcher(f):
1691 1698 continue
1692 1699 entry = self.get_entry(f)
1693 1700 if not entry.p1_tracked:
1694 1701 yield missing_from_ds % (f, node.short(p1))
General Comments 0
You need to be logged in to leave comments. Login now