##// END OF EJS Templates
store: use StoreEntry API instead of parsing filename in largefile...
marmoute -
r51381:b4a9c8f1 default
parent child Browse files
Show More
@@ -1,824 +1,823 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''largefiles utility code: must not import other modules in this package.'''
10 10
11 11 import contextlib
12 12 import copy
13 13 import os
14 14 import stat
15 15
16 16 from mercurial.i18n import _
17 17 from mercurial.node import hex
18 18 from mercurial.pycompat import open
19 19
20 20 from mercurial import (
21 21 dirstate,
22 22 encoding,
23 23 error,
24 24 httpconnection,
25 25 match as matchmod,
26 26 pycompat,
27 27 requirements,
28 28 scmutil,
29 29 sparse,
30 30 util,
31 31 vfs as vfsmod,
32 32 )
33 33 from mercurial.utils import hashutil
34 34 from mercurial.dirstateutils import timestamp
35 35
36 36 shortname = b'.hglf'
37 37 shortnameslash = shortname + b'/'
38 38 longname = b'largefiles'
39 39
40 40 # -- Private worker functions ------------------------------------------
41 41
42 42
43 43 @contextlib.contextmanager
44 44 def lfstatus(repo, value=True):
45 45 oldvalue = getattr(repo, 'lfstatus', False)
46 46 repo.lfstatus = value
47 47 try:
48 48 yield
49 49 finally:
50 50 repo.lfstatus = oldvalue
51 51
52 52
53 53 def getminsize(ui, assumelfiles, opt, default=10):
54 54 lfsize = opt
55 55 if not lfsize and assumelfiles:
56 56 lfsize = ui.config(longname, b'minsize', default=default)
57 57 if lfsize:
58 58 try:
59 59 lfsize = float(lfsize)
60 60 except ValueError:
61 61 raise error.Abort(
62 62 _(b'largefiles: size must be number (not %s)\n') % lfsize
63 63 )
64 64 if lfsize is None:
65 65 raise error.Abort(_(b'minimum size for largefiles must be specified'))
66 66 return lfsize
67 67
68 68
69 69 def link(src, dest):
70 70 """Try to create hardlink - if that fails, efficiently make a copy."""
71 71 util.makedirs(os.path.dirname(dest))
72 72 try:
73 73 util.oslink(src, dest)
74 74 except OSError:
75 75 # if hardlinks fail, fallback on atomic copy
76 76 with open(src, b'rb') as srcf, util.atomictempfile(dest) as dstf:
77 77 for chunk in util.filechunkiter(srcf):
78 78 dstf.write(chunk)
79 79 os.chmod(dest, os.stat(src).st_mode)
80 80
81 81
82 82 def usercachepath(ui, hash):
83 83 """Return the correct location in the "global" largefiles cache for a file
84 84 with the given hash.
85 85 This cache is used for sharing of largefiles across repositories - both
86 86 to preserve download bandwidth and storage space."""
87 87 return os.path.join(_usercachedir(ui), hash)
88 88
89 89
90 90 def _usercachedir(ui, name=longname):
91 91 '''Return the location of the "global" largefiles cache.'''
92 92 path = ui.configpath(name, b'usercache')
93 93 if path:
94 94 return path
95 95
96 96 hint = None
97 97
98 98 if pycompat.iswindows:
99 99 appdata = encoding.environ.get(
100 100 b'LOCALAPPDATA', encoding.environ.get(b'APPDATA')
101 101 )
102 102 if appdata:
103 103 return os.path.join(appdata, name)
104 104
105 105 hint = _(b"define %s or %s in the environment, or set %s.usercache") % (
106 106 b"LOCALAPPDATA",
107 107 b"APPDATA",
108 108 name,
109 109 )
110 110 elif pycompat.isdarwin:
111 111 home = encoding.environ.get(b'HOME')
112 112 if home:
113 113 return os.path.join(home, b'Library', b'Caches', name)
114 114
115 115 hint = _(b"define %s in the environment, or set %s.usercache") % (
116 116 b"HOME",
117 117 name,
118 118 )
119 119 elif pycompat.isposix:
120 120 path = encoding.environ.get(b'XDG_CACHE_HOME')
121 121 if path:
122 122 return os.path.join(path, name)
123 123 home = encoding.environ.get(b'HOME')
124 124 if home:
125 125 return os.path.join(home, b'.cache', name)
126 126
127 127 hint = _(b"define %s or %s in the environment, or set %s.usercache") % (
128 128 b"XDG_CACHE_HOME",
129 129 b"HOME",
130 130 name,
131 131 )
132 132 else:
133 133 raise error.Abort(
134 134 _(b'unknown operating system: %s\n') % pycompat.osname
135 135 )
136 136
137 137 raise error.Abort(_(b'unknown %s usercache location') % name, hint=hint)
138 138
139 139
140 140 def inusercache(ui, hash):
141 141 path = usercachepath(ui, hash)
142 142 return os.path.exists(path)
143 143
144 144
145 145 def findfile(repo, hash):
146 146 """Return store path of the largefile with the specified hash.
147 147 As a side effect, the file might be linked from user cache.
148 148 Return None if the file can't be found locally."""
149 149 path, exists = findstorepath(repo, hash)
150 150 if exists:
151 151 repo.ui.note(_(b'found %s in store\n') % hash)
152 152 return path
153 153 elif inusercache(repo.ui, hash):
154 154 repo.ui.note(_(b'found %s in system cache\n') % hash)
155 155 path = storepath(repo, hash)
156 156 link(usercachepath(repo.ui, hash), path)
157 157 return path
158 158 return None
159 159
160 160
161 161 class largefilesdirstate(dirstate.dirstate):
162 162 _large_file_dirstate = True
163 163 _tr_key_suffix = b'-large-files'
164 164
165 165 def __getitem__(self, key):
166 166 return super(largefilesdirstate, self).__getitem__(unixpath(key))
167 167
168 168 def set_tracked(self, f):
169 169 return super(largefilesdirstate, self).set_tracked(unixpath(f))
170 170
171 171 def set_untracked(self, f):
172 172 return super(largefilesdirstate, self).set_untracked(unixpath(f))
173 173
174 174 def normal(self, f, parentfiledata=None):
175 175 # not sure if we should pass the `parentfiledata` down or throw it
176 176 # away. So throwing it away to stay on the safe side.
177 177 return super(largefilesdirstate, self).normal(unixpath(f))
178 178
179 179 def remove(self, f):
180 180 return super(largefilesdirstate, self).remove(unixpath(f))
181 181
182 182 def add(self, f):
183 183 return super(largefilesdirstate, self).add(unixpath(f))
184 184
185 185 def drop(self, f):
186 186 return super(largefilesdirstate, self).drop(unixpath(f))
187 187
188 188 def forget(self, f):
189 189 return super(largefilesdirstate, self).forget(unixpath(f))
190 190
191 191 def normallookup(self, f):
192 192 return super(largefilesdirstate, self).normallookup(unixpath(f))
193 193
194 194 def _ignore(self, f):
195 195 return False
196 196
197 197 def write(self, tr):
198 198 # (1) disable PENDING mode always
199 199 # (lfdirstate isn't yet managed as a part of the transaction)
200 200 # (2) avoid develwarn 'use dirstate.write with ....'
201 201 if tr:
202 202 tr.addbackup(b'largefiles/dirstate', location=b'plain')
203 203 super(largefilesdirstate, self).write(None)
204 204
205 205
206 206 def openlfdirstate(ui, repo, create=True):
207 207 """
208 208 Return a dirstate object that tracks largefiles: i.e. its root is
209 209 the repo root, but it is saved in .hg/largefiles/dirstate.
210 210
211 211 If a dirstate object already exists and is being used for a 'changing_*'
212 212 context, it will be returned.
213 213 """
214 214 sub_dirstate = getattr(repo.dirstate, '_sub_dirstate', None)
215 215 if sub_dirstate is not None:
216 216 return sub_dirstate
217 217 vfs = repo.vfs
218 218 lfstoredir = longname
219 219 opener = vfsmod.vfs(vfs.join(lfstoredir))
220 220 use_dirstate_v2 = requirements.DIRSTATE_V2_REQUIREMENT in repo.requirements
221 221 lfdirstate = largefilesdirstate(
222 222 opener,
223 223 ui,
224 224 repo.root,
225 225 repo.dirstate._validate,
226 226 lambda: sparse.matcher(repo),
227 227 repo.nodeconstants,
228 228 use_dirstate_v2,
229 229 )
230 230
231 231 # If the largefiles dirstate does not exist, populate and create
232 232 # it. This ensures that we create it on the first meaningful
233 233 # largefiles operation in a new clone.
234 234 if create and not vfs.exists(vfs.join(lfstoredir, b'dirstate')):
235 235 try:
236 236 with repo.wlock(wait=False), lfdirstate.changing_files(repo):
237 237 matcher = getstandinmatcher(repo)
238 238 standins = repo.dirstate.walk(
239 239 matcher, subrepos=[], unknown=False, ignored=False
240 240 )
241 241
242 242 if len(standins) > 0:
243 243 vfs.makedirs(lfstoredir)
244 244
245 245 for standin in standins:
246 246 lfile = splitstandin(standin)
247 247 lfdirstate.hacky_extension_update_file(
248 248 lfile,
249 249 p1_tracked=True,
250 250 wc_tracked=True,
251 251 possibly_dirty=True,
252 252 )
253 253 except error.LockError:
254 254 # Assume that whatever was holding the lock was important.
255 255 # If we were doing something important, we would already have
256 256 # either the lock or a largefile dirstate.
257 257 pass
258 258 return lfdirstate
259 259
260 260
261 261 def lfdirstatestatus(lfdirstate, repo):
262 262 pctx = repo[b'.']
263 263 match = matchmod.always()
264 264 unsure, s, mtime_boundary = lfdirstate.status(
265 265 match, subrepos=[], ignored=False, clean=False, unknown=False
266 266 )
267 267 modified, clean = s.modified, s.clean
268 268 wctx = repo[None]
269 269 for lfile in unsure:
270 270 try:
271 271 fctx = pctx[standin(lfile)]
272 272 except LookupError:
273 273 fctx = None
274 274 if not fctx or readasstandin(fctx) != hashfile(repo.wjoin(lfile)):
275 275 modified.append(lfile)
276 276 else:
277 277 clean.append(lfile)
278 278 st = wctx[lfile].lstat()
279 279 mode = st.st_mode
280 280 size = st.st_size
281 281 mtime = timestamp.reliable_mtime_of(st, mtime_boundary)
282 282 if mtime is not None:
283 283 cache_data = (mode, size, mtime)
284 284 lfdirstate.set_clean(lfile, cache_data)
285 285 return s
286 286
287 287
288 288 def listlfiles(repo, rev=None, matcher=None):
289 289 """return a list of largefiles in the working copy or the
290 290 specified changeset"""
291 291
292 292 if matcher is None:
293 293 matcher = getstandinmatcher(repo)
294 294
295 295 # ignore unknown files in working directory
296 296 return [
297 297 splitstandin(f)
298 298 for f in repo[rev].walk(matcher)
299 299 if rev is not None or repo.dirstate.get_entry(f).any_tracked
300 300 ]
301 301
302 302
303 303 def instore(repo, hash, forcelocal=False):
304 304 '''Return true if a largefile with the given hash exists in the store'''
305 305 return os.path.exists(storepath(repo, hash, forcelocal))
306 306
307 307
308 308 def storepath(repo, hash, forcelocal=False):
309 309 """Return the correct location in the repository largefiles store for a
310 310 file with the given hash."""
311 311 if not forcelocal and repo.shared():
312 312 return repo.vfs.reljoin(repo.sharedpath, longname, hash)
313 313 return repo.vfs.join(longname, hash)
314 314
315 315
316 316 def findstorepath(repo, hash):
317 317 """Search through the local store path(s) to find the file for the given
318 318 hash. If the file is not found, its path in the primary store is returned.
319 319 The return value is a tuple of (path, exists(path)).
320 320 """
321 321 # For shared repos, the primary store is in the share source. But for
322 322 # backward compatibility, force a lookup in the local store if it wasn't
323 323 # found in the share source.
324 324 path = storepath(repo, hash, False)
325 325
326 326 if instore(repo, hash):
327 327 return (path, True)
328 328 elif repo.shared() and instore(repo, hash, True):
329 329 return storepath(repo, hash, True), True
330 330
331 331 return (path, False)
332 332
333 333
334 334 def copyfromcache(repo, hash, filename):
335 335 """Copy the specified largefile from the repo or system cache to
336 336 filename in the repository. Return true on success or false if the
337 337 file was not found in either cache (which should not happened:
338 338 this is meant to be called only after ensuring that the needed
339 339 largefile exists in the cache)."""
340 340 wvfs = repo.wvfs
341 341 path = findfile(repo, hash)
342 342 if path is None:
343 343 return False
344 344 wvfs.makedirs(wvfs.dirname(wvfs.join(filename)))
345 345 # The write may fail before the file is fully written, but we
346 346 # don't use atomic writes in the working copy.
347 347 with open(path, b'rb') as srcfd, wvfs(filename, b'wb') as destfd:
348 348 gothash = copyandhash(util.filechunkiter(srcfd), destfd)
349 349 if gothash != hash:
350 350 repo.ui.warn(
351 351 _(b'%s: data corruption in %s with hash %s\n')
352 352 % (filename, path, gothash)
353 353 )
354 354 wvfs.unlink(filename)
355 355 return False
356 356 return True
357 357
358 358
359 359 def copytostore(repo, ctx, file, fstandin):
360 360 wvfs = repo.wvfs
361 361 hash = readasstandin(ctx[fstandin])
362 362 if instore(repo, hash):
363 363 return
364 364 if wvfs.exists(file):
365 365 copytostoreabsolute(repo, wvfs.join(file), hash)
366 366 else:
367 367 repo.ui.warn(
368 368 _(b"%s: largefile %s not available from local store\n")
369 369 % (file, hash)
370 370 )
371 371
372 372
373 373 def copyalltostore(repo, node):
374 374 '''Copy all largefiles in a given revision to the store'''
375 375
376 376 ctx = repo[node]
377 377 for filename in ctx.files():
378 378 realfile = splitstandin(filename)
379 379 if realfile is not None and filename in ctx.manifest():
380 380 copytostore(repo, ctx, realfile, filename)
381 381
382 382
383 383 def copytostoreabsolute(repo, file, hash):
384 384 if inusercache(repo.ui, hash):
385 385 link(usercachepath(repo.ui, hash), storepath(repo, hash))
386 386 else:
387 387 util.makedirs(os.path.dirname(storepath(repo, hash)))
388 388 with open(file, b'rb') as srcf:
389 389 with util.atomictempfile(
390 390 storepath(repo, hash), createmode=repo.store.createmode
391 391 ) as dstf:
392 392 for chunk in util.filechunkiter(srcf):
393 393 dstf.write(chunk)
394 394 linktousercache(repo, hash)
395 395
396 396
397 397 def linktousercache(repo, hash):
398 398 """Link / copy the largefile with the specified hash from the store
399 399 to the cache."""
400 400 path = usercachepath(repo.ui, hash)
401 401 link(storepath(repo, hash), path)
402 402
403 403
404 404 def getstandinmatcher(repo, rmatcher=None):
405 405 '''Return a match object that applies rmatcher to the standin directory'''
406 406 wvfs = repo.wvfs
407 407 standindir = shortname
408 408
409 409 # no warnings about missing files or directories
410 410 badfn = lambda f, msg: None
411 411
412 412 if rmatcher and not rmatcher.always():
413 413 pats = [wvfs.join(standindir, pat) for pat in rmatcher.files()]
414 414 if not pats:
415 415 pats = [wvfs.join(standindir)]
416 416 match = scmutil.match(repo[None], pats, badfn=badfn)
417 417 else:
418 418 # no patterns: relative to repo root
419 419 match = scmutil.match(repo[None], [wvfs.join(standindir)], badfn=badfn)
420 420 return match
421 421
422 422
423 423 def composestandinmatcher(repo, rmatcher):
424 424 """Return a matcher that accepts standins corresponding to the
425 425 files accepted by rmatcher. Pass the list of files in the matcher
426 426 as the paths specified by the user."""
427 427 smatcher = getstandinmatcher(repo, rmatcher)
428 428 isstandin = smatcher.matchfn
429 429
430 430 def composedmatchfn(f):
431 431 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
432 432
433 433 smatcher.matchfn = composedmatchfn
434 434
435 435 return smatcher
436 436
437 437
438 438 def standin(filename):
439 439 """Return the repo-relative path to the standin for the specified big
440 440 file."""
441 441 # Notes:
442 442 # 1) Some callers want an absolute path, but for instance addlargefiles
443 443 # needs it repo-relative so it can be passed to repo[None].add(). So
444 444 # leave it up to the caller to use repo.wjoin() to get an absolute path.
445 445 # 2) Join with '/' because that's what dirstate always uses, even on
446 446 # Windows. Change existing separator to '/' first in case we are
447 447 # passed filenames from an external source (like the command line).
448 448 return shortnameslash + util.pconvert(filename)
449 449
450 450
451 451 def isstandin(filename):
452 452 """Return true if filename is a big file standin. filename must be
453 453 in Mercurial's internal form (slash-separated)."""
454 454 return filename.startswith(shortnameslash)
455 455
456 456
457 457 def splitstandin(filename):
458 458 # Split on / because that's what dirstate always uses, even on Windows.
459 459 # Change local separator to / first just in case we are passed filenames
460 460 # from an external source (like the command line).
461 461 bits = util.pconvert(filename).split(b'/', 1)
462 462 if len(bits) == 2 and bits[0] == shortname:
463 463 return bits[1]
464 464 else:
465 465 return None
466 466
467 467
468 468 def updatestandin(repo, lfile, standin):
469 469 """Re-calculate hash value of lfile and write it into standin
470 470
471 471 This assumes that "lfutil.standin(lfile) == standin", for efficiency.
472 472 """
473 473 file = repo.wjoin(lfile)
474 474 if repo.wvfs.exists(lfile):
475 475 hash = hashfile(file)
476 476 executable = getexecutable(file)
477 477 writestandin(repo, standin, hash, executable)
478 478 else:
479 479 raise error.Abort(_(b'%s: file not found!') % lfile)
480 480
481 481
482 482 def readasstandin(fctx):
483 483 """read hex hash from given filectx of standin file
484 484
485 485 This encapsulates how "standin" data is stored into storage layer."""
486 486 return fctx.data().strip()
487 487
488 488
489 489 def writestandin(repo, standin, hash, executable):
490 490 '''write hash to <repo.root>/<standin>'''
491 491 repo.wwrite(standin, hash + b'\n', executable and b'x' or b'')
492 492
493 493
494 494 def copyandhash(instream, outfile):
495 495 """Read bytes from instream (iterable) and write them to outfile,
496 496 computing the SHA-1 hash of the data along the way. Return the hash."""
497 497 hasher = hashutil.sha1(b'')
498 498 for data in instream:
499 499 hasher.update(data)
500 500 outfile.write(data)
501 501 return hex(hasher.digest())
502 502
503 503
504 504 def hashfile(file):
505 505 if not os.path.exists(file):
506 506 return b''
507 507 with open(file, b'rb') as fd:
508 508 return hexsha1(fd)
509 509
510 510
511 511 def getexecutable(filename):
512 512 mode = os.stat(filename).st_mode
513 513 return (
514 514 (mode & stat.S_IXUSR)
515 515 and (mode & stat.S_IXGRP)
516 516 and (mode & stat.S_IXOTH)
517 517 )
518 518
519 519
520 520 def urljoin(first, second, *arg):
521 521 def join(left, right):
522 522 if not left.endswith(b'/'):
523 523 left += b'/'
524 524 if right.startswith(b'/'):
525 525 right = right[1:]
526 526 return left + right
527 527
528 528 url = join(first, second)
529 529 for a in arg:
530 530 url = join(url, a)
531 531 return url
532 532
533 533
534 534 def hexsha1(fileobj):
535 535 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
536 536 object data"""
537 537 h = hashutil.sha1()
538 538 for chunk in util.filechunkiter(fileobj):
539 539 h.update(chunk)
540 540 return hex(h.digest())
541 541
542 542
543 543 def httpsendfile(ui, filename):
544 544 return httpconnection.httpsendfile(ui, filename, b'rb')
545 545
546 546
547 547 def unixpath(path):
548 548 '''Return a version of path normalized for use with the lfdirstate.'''
549 549 return util.pconvert(os.path.normpath(path))
550 550
551 551
552 552 def islfilesrepo(repo):
553 553 '''Return true if the repo is a largefile repo.'''
554 if b'largefiles' in repo.requirements and any(
555 shortnameslash in entry.unencoded_path
556 for entry in repo.store.datafiles()
557 ):
558 return True
554 if b'largefiles' in repo.requirements:
555 for entry in repo.store.datafiles():
556 if entry.is_revlog and shortnameslash in entry.target_id:
557 return True
559 558
560 559 return any(openlfdirstate(repo.ui, repo, False))
561 560
562 561
563 562 class storeprotonotcapable(Exception):
564 563 def __init__(self, storetypes):
565 564 self.storetypes = storetypes
566 565
567 566
568 567 def getstandinsstate(repo):
569 568 standins = []
570 569 matcher = getstandinmatcher(repo)
571 570 wctx = repo[None]
572 571 for standin in repo.dirstate.walk(
573 572 matcher, subrepos=[], unknown=False, ignored=False
574 573 ):
575 574 lfile = splitstandin(standin)
576 575 try:
577 576 hash = readasstandin(wctx[standin])
578 577 except IOError:
579 578 hash = None
580 579 standins.append((lfile, hash))
581 580 return standins
582 581
583 582
584 583 def synclfdirstate(repo, lfdirstate, lfile, normallookup):
585 584 lfstandin = standin(lfile)
586 585 if lfstandin not in repo.dirstate:
587 586 lfdirstate.hacky_extension_update_file(
588 587 lfile,
589 588 p1_tracked=False,
590 589 wc_tracked=False,
591 590 )
592 591 else:
593 592 entry = repo.dirstate.get_entry(lfstandin)
594 593 lfdirstate.hacky_extension_update_file(
595 594 lfile,
596 595 wc_tracked=entry.tracked,
597 596 p1_tracked=entry.p1_tracked,
598 597 p2_info=entry.p2_info,
599 598 possibly_dirty=True,
600 599 )
601 600
602 601
603 602 def markcommitted(orig, ctx, node):
604 603 repo = ctx.repo()
605 604
606 605 with repo.dirstate.changing_parents(repo):
607 606 orig(node)
608 607
609 608 # ATTENTION: "ctx.files()" may differ from "repo[node].files()"
610 609 # because files coming from the 2nd parent are omitted in the latter.
611 610 #
612 611 # The former should be used to get targets of "synclfdirstate",
613 612 # because such files:
614 613 # - are marked as "a" by "patch.patch()" (e.g. via transplant), and
615 614 # - have to be marked as "n" after commit, but
616 615 # - aren't listed in "repo[node].files()"
617 616
618 617 lfdirstate = openlfdirstate(repo.ui, repo)
619 618 for f in ctx.files():
620 619 lfile = splitstandin(f)
621 620 if lfile is not None:
622 621 synclfdirstate(repo, lfdirstate, lfile, False)
623 622
624 623 # As part of committing, copy all of the largefiles into the cache.
625 624 #
626 625 # Using "node" instead of "ctx" implies additional "repo[node]"
627 626 # lookup while copyalltostore(), but can omit redundant check for
628 627 # files comming from the 2nd parent, which should exist in store
629 628 # at merging.
630 629 copyalltostore(repo, node)
631 630
632 631
633 632 def getlfilestoupdate(oldstandins, newstandins):
634 633 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
635 634 filelist = []
636 635 for f in changedstandins:
637 636 if f[0] not in filelist:
638 637 filelist.append(f[0])
639 638 return filelist
640 639
641 640
642 641 def getlfilestoupload(repo, missing, addfunc):
643 642 makeprogress = repo.ui.makeprogress
644 643 with makeprogress(
645 644 _(b'finding outgoing largefiles'),
646 645 unit=_(b'revisions'),
647 646 total=len(missing),
648 647 ) as progress:
649 648 for i, n in enumerate(missing):
650 649 progress.update(i)
651 650 parents = [p for p in repo[n].parents() if p != repo.nullid]
652 651
653 652 with lfstatus(repo, value=False):
654 653 ctx = repo[n]
655 654
656 655 files = set(ctx.files())
657 656 if len(parents) == 2:
658 657 mc = ctx.manifest()
659 658 mp1 = ctx.p1().manifest()
660 659 mp2 = ctx.p2().manifest()
661 660 for f in mp1:
662 661 if f not in mc:
663 662 files.add(f)
664 663 for f in mp2:
665 664 if f not in mc:
666 665 files.add(f)
667 666 for f in mc:
668 667 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
669 668 files.add(f)
670 669 for fn in files:
671 670 if isstandin(fn) and fn in ctx:
672 671 addfunc(fn, readasstandin(ctx[fn]))
673 672
674 673
675 674 def updatestandinsbymatch(repo, match):
676 675 """Update standins in the working directory according to specified match
677 676
678 677 This returns (possibly modified) ``match`` object to be used for
679 678 subsequent commit process.
680 679 """
681 680
682 681 ui = repo.ui
683 682
684 683 # Case 1: user calls commit with no specific files or
685 684 # include/exclude patterns: refresh and commit all files that
686 685 # are "dirty".
687 686 if match is None or match.always():
688 687 # Spend a bit of time here to get a list of files we know
689 688 # are modified so we can compare only against those.
690 689 # It can cost a lot of time (several seconds)
691 690 # otherwise to update all standins if the largefiles are
692 691 # large.
693 692 dirtymatch = matchmod.always()
694 693 with repo.dirstate.running_status(repo):
695 694 lfdirstate = openlfdirstate(ui, repo)
696 695 unsure, s, mtime_boundary = lfdirstate.status(
697 696 dirtymatch,
698 697 subrepos=[],
699 698 ignored=False,
700 699 clean=False,
701 700 unknown=False,
702 701 )
703 702 modifiedfiles = unsure + s.modified + s.added + s.removed
704 703 lfiles = listlfiles(repo)
705 704 # this only loops through largefiles that exist (not
706 705 # removed/renamed)
707 706 for lfile in lfiles:
708 707 if lfile in modifiedfiles:
709 708 fstandin = standin(lfile)
710 709 if repo.wvfs.exists(fstandin):
711 710 # this handles the case where a rebase is being
712 711 # performed and the working copy is not updated
713 712 # yet.
714 713 if repo.wvfs.exists(lfile):
715 714 updatestandin(repo, lfile, fstandin)
716 715
717 716 return match
718 717
719 718 lfiles = listlfiles(repo)
720 719 match._files = repo._subdirlfs(match.files(), lfiles)
721 720
722 721 # Case 2: user calls commit with specified patterns: refresh
723 722 # any matching big files.
724 723 smatcher = composestandinmatcher(repo, match)
725 724 standins = repo.dirstate.walk(
726 725 smatcher, subrepos=[], unknown=False, ignored=False
727 726 )
728 727
729 728 # No matching big files: get out of the way and pass control to
730 729 # the usual commit() method.
731 730 if not standins:
732 731 return match
733 732
734 733 # Refresh all matching big files. It's possible that the
735 734 # commit will end up failing, in which case the big files will
736 735 # stay refreshed. No harm done: the user modified them and
737 736 # asked to commit them, so sooner or later we're going to
738 737 # refresh the standins. Might as well leave them refreshed.
739 738 lfdirstate = openlfdirstate(ui, repo)
740 739 for fstandin in standins:
741 740 lfile = splitstandin(fstandin)
742 741 if lfdirstate.get_entry(lfile).tracked:
743 742 updatestandin(repo, lfile, fstandin)
744 743
745 744 # Cook up a new matcher that only matches regular files or
746 745 # standins corresponding to the big files requested by the
747 746 # user. Have to modify _files to prevent commit() from
748 747 # complaining "not tracked" for big files.
749 748 match = copy.copy(match)
750 749 origmatchfn = match.matchfn
751 750
752 751 # Check both the list of largefiles and the list of
753 752 # standins because if a largefile was removed, it
754 753 # won't be in the list of largefiles at this point
755 754 match._files += sorted(standins)
756 755
757 756 actualfiles = []
758 757 for f in match._files:
759 758 fstandin = standin(f)
760 759
761 760 # For largefiles, only one of the normal and standin should be
762 761 # committed (except if one of them is a remove). In the case of a
763 762 # standin removal, drop the normal file if it is unknown to dirstate.
764 763 # Thus, skip plain largefile names but keep the standin.
765 764 if f in lfiles or fstandin in standins:
766 765 if not repo.dirstate.get_entry(fstandin).removed:
767 766 if not repo.dirstate.get_entry(f).removed:
768 767 continue
769 768 elif not repo.dirstate.get_entry(f).any_tracked:
770 769 continue
771 770
772 771 actualfiles.append(f)
773 772 match._files = actualfiles
774 773
775 774 def matchfn(f):
776 775 if origmatchfn(f):
777 776 return f not in lfiles
778 777 else:
779 778 return f in standins
780 779
781 780 match.matchfn = matchfn
782 781
783 782 return match
784 783
785 784
786 785 class automatedcommithook:
787 786 """Stateful hook to update standins at the 1st commit of resuming
788 787
789 788 For efficiency, updating standins in the working directory should
790 789 be avoided while automated committing (like rebase, transplant and
791 790 so on), because they should be updated before committing.
792 791
793 792 But the 1st commit of resuming automated committing (e.g. ``rebase
794 793 --continue``) should update them, because largefiles may be
795 794 modified manually.
796 795 """
797 796
798 797 def __init__(self, resuming):
799 798 self.resuming = resuming
800 799
801 800 def __call__(self, repo, match):
802 801 if self.resuming:
803 802 self.resuming = False # avoids updating at subsequent commits
804 803 return updatestandinsbymatch(repo, match)
805 804 else:
806 805 return match
807 806
808 807
809 808 def getstatuswriter(ui, repo, forcibly=None):
810 809 """Return the function to write largefiles specific status out
811 810
812 811 If ``forcibly`` is ``None``, this returns the last element of
813 812 ``repo._lfstatuswriters`` as "default" writer function.
814 813
815 814 Otherwise, this returns the function to always write out (or
816 815 ignore if ``not forcibly``) status.
817 816 """
818 817 if forcibly is None and util.safehasattr(repo, b'_largefilesenabled'):
819 818 return repo._lfstatuswriters[-1]
820 819 else:
821 820 if forcibly:
822 821 return ui.status # forcibly WRITE OUT
823 822 else:
824 823 return lambda *msg, **opts: None # forcibly IGNORE
@@ -1,470 +1,474 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''setup for largefiles repositories: reposetup'''
10 10
11 11 import copy
12 12
13 13 from mercurial.i18n import _
14 14
15 15 from mercurial import (
16 16 error,
17 17 extensions,
18 18 localrepo,
19 19 match as matchmod,
20 20 scmutil,
21 21 util,
22 22 )
23 23
24 24 from mercurial.dirstateutils import timestamp
25 25
26 26 from . import (
27 27 lfcommands,
28 28 lfutil,
29 29 )
30 30
31 31
32 32 def reposetup(ui, repo):
33 33 # wire repositories should be given new wireproto functions
34 34 # by "proto.wirereposetup()" via "hg.wirepeersetupfuncs"
35 35 if not repo.local():
36 36 return
37 37
38 38 class lfilesrepo(repo.__class__):
39 39 # the mark to examine whether "repo" object enables largefiles or not
40 40 _largefilesenabled = True
41 41
42 42 lfstatus = False
43 43
44 44 # When lfstatus is set, return a context that gives the names
45 45 # of largefiles instead of their corresponding standins and
46 46 # identifies the largefiles as always binary, regardless of
47 47 # their actual contents.
48 48 def __getitem__(self, changeid):
49 49 ctx = super(lfilesrepo, self).__getitem__(changeid)
50 50 if self.lfstatus:
51 51
52 52 def files(orig):
53 53 filenames = orig()
54 54 return [lfutil.splitstandin(f) or f for f in filenames]
55 55
56 56 extensions.wrapfunction(ctx, 'files', files)
57 57
58 58 def manifest(orig):
59 59 man1 = orig()
60 60
61 61 class lfilesmanifest(man1.__class__):
62 62 def __contains__(self, filename):
63 63 orig = super(lfilesmanifest, self).__contains__
64 64 return orig(filename) or orig(
65 65 lfutil.standin(filename)
66 66 )
67 67
68 68 man1.__class__ = lfilesmanifest
69 69 return man1
70 70
71 71 extensions.wrapfunction(ctx, 'manifest', manifest)
72 72
73 73 def filectx(orig, path, fileid=None, filelog=None):
74 74 try:
75 75 if filelog is not None:
76 76 result = orig(path, fileid, filelog)
77 77 else:
78 78 result = orig(path, fileid)
79 79 except error.LookupError:
80 80 # Adding a null character will cause Mercurial to
81 81 # identify this as a binary file.
82 82 if filelog is not None:
83 83 result = orig(lfutil.standin(path), fileid, filelog)
84 84 else:
85 85 result = orig(lfutil.standin(path), fileid)
86 86 olddata = result.data
87 87 result.data = lambda: olddata() + b'\0'
88 88 return result
89 89
90 90 extensions.wrapfunction(ctx, 'filectx', filectx)
91 91
92 92 return ctx
93 93
94 94 # Figure out the status of big files and insert them into the
95 95 # appropriate list in the result. Also removes standin files
96 96 # from the listing. Revert to the original status if
97 97 # self.lfstatus is False.
98 98 # XXX large file status is buggy when used on repo proxy.
99 99 # XXX this needs to be investigated.
100 100 @localrepo.unfilteredmethod
101 101 def status(
102 102 self,
103 103 node1=b'.',
104 104 node2=None,
105 105 match=None,
106 106 ignored=False,
107 107 clean=False,
108 108 unknown=False,
109 109 listsubrepos=False,
110 110 ):
111 111 listignored, listclean, listunknown = ignored, clean, unknown
112 112 orig = super(lfilesrepo, self).status
113 113 if not self.lfstatus:
114 114 return orig(
115 115 node1,
116 116 node2,
117 117 match,
118 118 listignored,
119 119 listclean,
120 120 listunknown,
121 121 listsubrepos,
122 122 )
123 123
124 124 # some calls in this function rely on the old version of status
125 125 self.lfstatus = False
126 126 ctx1 = self[node1]
127 127 ctx2 = self[node2]
128 128 working = ctx2.rev() is None
129 129 parentworking = working and ctx1 == self[b'.']
130 130
131 131 if match is None:
132 132 match = matchmod.always()
133 133
134 134 try:
135 135 # updating the dirstate is optional
136 136 # so we don't wait on the lock
137 137 wlock = self.wlock(False)
138 138 gotlock = True
139 139 except error.LockError:
140 140 wlock = util.nullcontextmanager()
141 141 gotlock = False
142 142 with wlock, self.dirstate.running_status(self):
143 143
144 144 # First check if paths or patterns were specified on the
145 145 # command line. If there were, and they don't match any
146 146 # largefiles, we should just bail here and let super
147 147 # handle it -- thus gaining a big performance boost.
148 148 lfdirstate = lfutil.openlfdirstate(ui, self)
149 149 if not match.always():
150 150 for f in lfdirstate:
151 151 if match(f):
152 152 break
153 153 else:
154 154 return orig(
155 155 node1,
156 156 node2,
157 157 match,
158 158 listignored,
159 159 listclean,
160 160 listunknown,
161 161 listsubrepos,
162 162 )
163 163
164 164 # Create a copy of match that matches standins instead
165 165 # of largefiles.
166 166 def tostandins(files):
167 167 if not working:
168 168 return files
169 169 newfiles = []
170 170 dirstate = self.dirstate
171 171 for f in files:
172 172 sf = lfutil.standin(f)
173 173 if sf in dirstate:
174 174 newfiles.append(sf)
175 175 elif dirstate.hasdir(sf):
176 176 # Directory entries could be regular or
177 177 # standin, check both
178 178 newfiles.extend((f, sf))
179 179 else:
180 180 newfiles.append(f)
181 181 return newfiles
182 182
183 183 m = copy.copy(match)
184 184 m._files = tostandins(m._files)
185 185
186 186 result = orig(
187 187 node1, node2, m, ignored, clean, unknown, listsubrepos
188 188 )
189 189 if working:
190 190
191 191 def sfindirstate(f):
192 192 sf = lfutil.standin(f)
193 193 dirstate = self.dirstate
194 194 return sf in dirstate or dirstate.hasdir(sf)
195 195
196 196 match._files = [f for f in match._files if sfindirstate(f)]
197 197 # Don't waste time getting the ignored and unknown
198 198 # files from lfdirstate
199 199 unsure, s, mtime_boundary = lfdirstate.status(
200 200 match,
201 201 subrepos=[],
202 202 ignored=False,
203 203 clean=listclean,
204 204 unknown=False,
205 205 )
206 206 (modified, added, removed, deleted, clean) = (
207 207 s.modified,
208 208 s.added,
209 209 s.removed,
210 210 s.deleted,
211 211 s.clean,
212 212 )
213 213 if parentworking:
214 214 wctx = repo[None]
215 215 for lfile in unsure:
216 216 standin = lfutil.standin(lfile)
217 217 if standin not in ctx1:
218 218 # from second parent
219 219 modified.append(lfile)
220 220 elif lfutil.readasstandin(
221 221 ctx1[standin]
222 222 ) != lfutil.hashfile(self.wjoin(lfile)):
223 223 modified.append(lfile)
224 224 else:
225 225 if listclean:
226 226 clean.append(lfile)
227 227 s = wctx[lfile].lstat()
228 228 mode = s.st_mode
229 229 size = s.st_size
230 230 mtime = timestamp.reliable_mtime_of(
231 231 s, mtime_boundary
232 232 )
233 233 if mtime is not None:
234 234 cache_data = (mode, size, mtime)
235 235 lfdirstate.set_clean(lfile, cache_data)
236 236 else:
237 237 tocheck = unsure + modified + added + clean
238 238 modified, added, clean = [], [], []
239 239 checkexec = self.dirstate._checkexec
240 240
241 241 for lfile in tocheck:
242 242 standin = lfutil.standin(lfile)
243 243 if standin in ctx1:
244 244 abslfile = self.wjoin(lfile)
245 245 if (
246 246 lfutil.readasstandin(ctx1[standin])
247 247 != lfutil.hashfile(abslfile)
248 248 ) or (
249 249 checkexec
250 250 and (b'x' in ctx1.flags(standin))
251 251 != bool(lfutil.getexecutable(abslfile))
252 252 ):
253 253 modified.append(lfile)
254 254 elif listclean:
255 255 clean.append(lfile)
256 256 else:
257 257 added.append(lfile)
258 258
259 259 # at this point, 'removed' contains largefiles
260 260 # marked as 'R' in the working context.
261 261 # then, largefiles not managed also in the target
262 262 # context should be excluded from 'removed'.
263 263 removed = [
264 264 lfile
265 265 for lfile in removed
266 266 if lfutil.standin(lfile) in ctx1
267 267 ]
268 268
269 269 # Standins no longer found in lfdirstate have been deleted
270 270 for standin in ctx1.walk(lfutil.getstandinmatcher(self)):
271 271 lfile = lfutil.splitstandin(standin)
272 272 if not match(lfile):
273 273 continue
274 274 if lfile not in lfdirstate:
275 275 deleted.append(lfile)
276 276 # Sync "largefile has been removed" back to the
277 277 # standin. Removing a file as a side effect of
278 278 # running status is gross, but the alternatives (if
279 279 # any) are worse.
280 280 self.wvfs.unlinkpath(standin, ignoremissing=True)
281 281
282 282 # Filter result lists
283 283 result = list(result)
284 284
285 285 # Largefiles are not really removed when they're
286 286 # still in the normal dirstate. Likewise, normal
287 287 # files are not really removed if they are still in
288 288 # lfdirstate. This happens in merges where files
289 289 # change type.
290 290 removed = [f for f in removed if f not in self.dirstate]
291 291 result[2] = [f for f in result[2] if f not in lfdirstate]
292 292
293 293 lfiles = set(lfdirstate)
294 294 # Unknown files
295 295 result[4] = set(result[4]).difference(lfiles)
296 296 # Ignored files
297 297 result[5] = set(result[5]).difference(lfiles)
298 298 # combine normal files and largefiles
299 299 normals = [
300 300 [fn for fn in filelist if not lfutil.isstandin(fn)]
301 301 for filelist in result
302 302 ]
303 303 lfstatus = (
304 304 modified,
305 305 added,
306 306 removed,
307 307 deleted,
308 308 [],
309 309 [],
310 310 clean,
311 311 )
312 312 result = [
313 313 sorted(list1 + list2)
314 314 for (list1, list2) in zip(normals, lfstatus)
315 315 ]
316 316 else: # not against working directory
317 317 result = [
318 318 [lfutil.splitstandin(f) or f for f in items]
319 319 for items in result
320 320 ]
321 321
322 322 if gotlock:
323 323 lfdirstate.write(self.currenttransaction())
324 324 else:
325 325 lfdirstate.invalidate()
326 326
327 327 self.lfstatus = True
328 328 return scmutil.status(*result)
329 329
330 330 def commitctx(self, ctx, *args, **kwargs):
331 331 node = super(lfilesrepo, self).commitctx(ctx, *args, **kwargs)
332 332
333 333 class lfilesctx(ctx.__class__):
334 334 def markcommitted(self, node):
335 335 orig = super(lfilesctx, self).markcommitted
336 336 return lfutil.markcommitted(orig, self, node)
337 337
338 338 ctx.__class__ = lfilesctx
339 339 return node
340 340
341 341 # Before commit, largefile standins have not had their
342 342 # contents updated to reflect the hash of their largefile.
343 343 # Do that here.
344 344 def commit(
345 345 self,
346 346 text=b"",
347 347 user=None,
348 348 date=None,
349 349 match=None,
350 350 force=False,
351 351 editor=False,
352 352 extra=None,
353 353 ):
354 354 if extra is None:
355 355 extra = {}
356 356 orig = super(lfilesrepo, self).commit
357 357
358 358 with self.wlock():
359 359 lfcommithook = self._lfcommithooks[-1]
360 360 match = lfcommithook(self, match)
361 361 result = orig(
362 362 text=text,
363 363 user=user,
364 364 date=date,
365 365 match=match,
366 366 force=force,
367 367 editor=editor,
368 368 extra=extra,
369 369 )
370 370 return result
371 371
372 372 # TODO: _subdirlfs should be moved into "lfutil.py", because
373 373 # it is referred only from "lfutil.updatestandinsbymatch"
374 374 def _subdirlfs(self, files, lfiles):
375 375 """
376 376 Adjust matched file list
377 377 If we pass a directory to commit whose only committable files
378 378 are largefiles, the core commit code aborts before finding
379 379 the largefiles.
380 380 So we do the following:
381 381 For directories that only have largefiles as matches,
382 382 we explicitly add the largefiles to the match list and remove
383 383 the directory.
384 384 In other cases, we leave the match list unmodified.
385 385 """
386 386 actualfiles = []
387 387 dirs = []
388 388 regulars = []
389 389
390 390 for f in files:
391 391 if lfutil.isstandin(f + b'/'):
392 392 raise error.Abort(
393 393 _(b'file "%s" is a largefile standin') % f,
394 394 hint=b'commit the largefile itself instead',
395 395 )
396 396 # Scan directories
397 397 if self.wvfs.isdir(f):
398 398 dirs.append(f)
399 399 else:
400 400 regulars.append(f)
401 401
402 402 for f in dirs:
403 403 matcheddir = False
404 404 d = self.dirstate.normalize(f) + b'/'
405 405 # Check for matched normal files
406 406 for mf in regulars:
407 407 if self.dirstate.normalize(mf).startswith(d):
408 408 actualfiles.append(f)
409 409 matcheddir = True
410 410 break
411 411 if not matcheddir:
412 412 # If no normal match, manually append
413 413 # any matching largefiles
414 414 for lf in lfiles:
415 415 if self.dirstate.normalize(lf).startswith(d):
416 416 actualfiles.append(lf)
417 417 if not matcheddir:
418 418 # There may still be normal files in the dir, so
419 419 # add a directory to the list, which
420 420 # forces status/dirstate to walk all files and
421 421 # call the match function on the matcher, even
422 422 # on case sensitive filesystems.
423 423 actualfiles.append(b'.')
424 424 matcheddir = True
425 425 # Nothing in dir, so readd it
426 426 # and let commit reject it
427 427 if not matcheddir:
428 428 actualfiles.append(f)
429 429
430 430 # Always add normal files
431 431 actualfiles += regulars
432 432 return actualfiles
433 433
434 434 repo.__class__ = lfilesrepo
435 435
436 436 # stack of hooks being executed before committing.
437 437 # only last element ("_lfcommithooks[-1]") is used for each committing.
438 438 repo._lfcommithooks = [lfutil.updatestandinsbymatch]
439 439
440 440 # Stack of status writer functions taking "*msg, **opts" arguments
441 441 # like "ui.status()". Only last element ("_lfstatuswriters[-1]")
442 442 # is used to write status out.
443 443 repo._lfstatuswriters = [ui.status]
444 444
445 445 def prepushoutgoinghook(pushop):
446 446 """Push largefiles for pushop before pushing revisions."""
447 447 lfrevs = pushop.lfrevs
448 448 if lfrevs is None:
449 449 lfrevs = pushop.outgoing.missing
450 450 if lfrevs:
451 451 toupload = set()
452 452 addfunc = lambda fn, lfhash: toupload.add(lfhash)
453 453 lfutil.getlfilestoupload(pushop.repo, lfrevs, addfunc)
454 454 lfcommands.uploadlfiles(ui, pushop.repo, pushop.remote, toupload)
455 455
456 456 repo.prepushoutgoinghooks.add(b"largefiles", prepushoutgoinghook)
457 457
458 458 def checkrequireslfiles(ui, repo, **kwargs):
459 459 with repo.lock():
460 if b'largefiles' not in repo.requirements and any(
461 lfutil.shortname + b'/' in entry.unencoded_path
462 for entry in repo.store.datafiles()
463 ):
464 repo.requirements.add(b'largefiles')
465 scmutil.writereporequirements(repo)
460 if b'largefiles' in repo.requirements:
461 return
462 marker = lfutil.shortnameslash
463 for entry in repo.store.datafiles():
464 # XXX note that this match is not rooted and can wrongly match
465 # directory ending with ".hglf"
466 if entry.is_revlog and marker in entry.target_id:
467 repo.requirements.add(b'largefiles')
468 scmutil.writereporequirements(repo)
469 break
466 470
467 471 ui.setconfig(
468 472 b'hooks', b'changegroup.lfiles', checkrequireslfiles, b'largefiles'
469 473 )
470 474 ui.setconfig(b'hooks', b'commit.lfiles', checkrequireslfiles, b'largefiles')
General Comments 0
You need to be logged in to leave comments. Login now