##// END OF EJS Templates
largefile: use the proper "mtime boundary" logic during fixup...
marmoute -
r49219:a96a5d62 default draft
parent child Browse files
Show More
@@ -1,801 +1,798 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''largefiles utility code: must not import other modules in this package.'''
10 10 from __future__ import absolute_import
11 11
12 12 import contextlib
13 13 import copy
14 14 import os
15 15 import stat
16 16
17 17 from mercurial.i18n import _
18 18 from mercurial.node import hex
19 19 from mercurial.pycompat import open
20 20
21 21 from mercurial import (
22 22 dirstate,
23 23 encoding,
24 24 error,
25 25 httpconnection,
26 26 match as matchmod,
27 27 pycompat,
28 28 requirements,
29 29 scmutil,
30 30 sparse,
31 31 util,
32 32 vfs as vfsmod,
33 33 )
34 34 from mercurial.utils import hashutil
35 35 from mercurial.dirstateutils import timestamp
36 36
37 37 shortname = b'.hglf'
38 38 shortnameslash = shortname + b'/'
39 39 longname = b'largefiles'
40 40
41 41 # -- Private worker functions ------------------------------------------
42 42
43 43
44 44 @contextlib.contextmanager
45 45 def lfstatus(repo, value=True):
46 46 oldvalue = getattr(repo, 'lfstatus', False)
47 47 repo.lfstatus = value
48 48 try:
49 49 yield
50 50 finally:
51 51 repo.lfstatus = oldvalue
52 52
53 53
54 54 def getminsize(ui, assumelfiles, opt, default=10):
55 55 lfsize = opt
56 56 if not lfsize and assumelfiles:
57 57 lfsize = ui.config(longname, b'minsize', default=default)
58 58 if lfsize:
59 59 try:
60 60 lfsize = float(lfsize)
61 61 except ValueError:
62 62 raise error.Abort(
63 63 _(b'largefiles: size must be number (not %s)\n') % lfsize
64 64 )
65 65 if lfsize is None:
66 66 raise error.Abort(_(b'minimum size for largefiles must be specified'))
67 67 return lfsize
68 68
69 69
70 70 def link(src, dest):
71 71 """Try to create hardlink - if that fails, efficiently make a copy."""
72 72 util.makedirs(os.path.dirname(dest))
73 73 try:
74 74 util.oslink(src, dest)
75 75 except OSError:
76 76 # if hardlinks fail, fallback on atomic copy
77 77 with open(src, b'rb') as srcf, util.atomictempfile(dest) as dstf:
78 78 for chunk in util.filechunkiter(srcf):
79 79 dstf.write(chunk)
80 80 os.chmod(dest, os.stat(src).st_mode)
81 81
82 82
83 83 def usercachepath(ui, hash):
84 84 """Return the correct location in the "global" largefiles cache for a file
85 85 with the given hash.
86 86 This cache is used for sharing of largefiles across repositories - both
87 87 to preserve download bandwidth and storage space."""
88 88 return os.path.join(_usercachedir(ui), hash)
89 89
90 90
91 91 def _usercachedir(ui, name=longname):
92 92 '''Return the location of the "global" largefiles cache.'''
93 93 path = ui.configpath(name, b'usercache')
94 94 if path:
95 95 return path
96 96
97 97 hint = None
98 98
99 99 if pycompat.iswindows:
100 100 appdata = encoding.environ.get(
101 101 b'LOCALAPPDATA', encoding.environ.get(b'APPDATA')
102 102 )
103 103 if appdata:
104 104 return os.path.join(appdata, name)
105 105
106 106 hint = _(b"define %s or %s in the environment, or set %s.usercache") % (
107 107 b"LOCALAPPDATA",
108 108 b"APPDATA",
109 109 name,
110 110 )
111 111 elif pycompat.isdarwin:
112 112 home = encoding.environ.get(b'HOME')
113 113 if home:
114 114 return os.path.join(home, b'Library', b'Caches', name)
115 115
116 116 hint = _(b"define %s in the environment, or set %s.usercache") % (
117 117 b"HOME",
118 118 name,
119 119 )
120 120 elif pycompat.isposix:
121 121 path = encoding.environ.get(b'XDG_CACHE_HOME')
122 122 if path:
123 123 return os.path.join(path, name)
124 124 home = encoding.environ.get(b'HOME')
125 125 if home:
126 126 return os.path.join(home, b'.cache', name)
127 127
128 128 hint = _(b"define %s or %s in the environment, or set %s.usercache") % (
129 129 b"XDG_CACHE_HOME",
130 130 b"HOME",
131 131 name,
132 132 )
133 133 else:
134 134 raise error.Abort(
135 135 _(b'unknown operating system: %s\n') % pycompat.osname
136 136 )
137 137
138 138 raise error.Abort(_(b'unknown %s usercache location') % name, hint=hint)
139 139
140 140
141 141 def inusercache(ui, hash):
142 142 path = usercachepath(ui, hash)
143 143 return os.path.exists(path)
144 144
145 145
146 146 def findfile(repo, hash):
147 147 """Return store path of the largefile with the specified hash.
148 148 As a side effect, the file might be linked from user cache.
149 149 Return None if the file can't be found locally."""
150 150 path, exists = findstorepath(repo, hash)
151 151 if exists:
152 152 repo.ui.note(_(b'found %s in store\n') % hash)
153 153 return path
154 154 elif inusercache(repo.ui, hash):
155 155 repo.ui.note(_(b'found %s in system cache\n') % hash)
156 156 path = storepath(repo, hash)
157 157 link(usercachepath(repo.ui, hash), path)
158 158 return path
159 159 return None
160 160
161 161
162 162 class largefilesdirstate(dirstate.dirstate):
163 163 def __getitem__(self, key):
164 164 return super(largefilesdirstate, self).__getitem__(unixpath(key))
165 165
166 166 def set_tracked(self, f):
167 167 return super(largefilesdirstate, self).set_tracked(unixpath(f))
168 168
169 169 def set_untracked(self, f):
170 170 return super(largefilesdirstate, self).set_untracked(unixpath(f))
171 171
172 172 def normal(self, f, parentfiledata=None):
173 173 # not sure if we should pass the `parentfiledata` down or throw it
174 174 # away. So throwing it away to stay on the safe side.
175 175 return super(largefilesdirstate, self).normal(unixpath(f))
176 176
177 177 def remove(self, f):
178 178 return super(largefilesdirstate, self).remove(unixpath(f))
179 179
180 180 def add(self, f):
181 181 return super(largefilesdirstate, self).add(unixpath(f))
182 182
183 183 def drop(self, f):
184 184 return super(largefilesdirstate, self).drop(unixpath(f))
185 185
186 186 def forget(self, f):
187 187 return super(largefilesdirstate, self).forget(unixpath(f))
188 188
189 189 def normallookup(self, f):
190 190 return super(largefilesdirstate, self).normallookup(unixpath(f))
191 191
192 192 def _ignore(self, f):
193 193 return False
194 194
195 195 def write(self, tr):
196 196 # (1) disable PENDING mode always
197 197 # (lfdirstate isn't yet managed as a part of the transaction)
198 198 # (2) avoid develwarn 'use dirstate.write with ....'
199 199 if tr:
200 200 tr.addbackup(b'largefiles/dirstate', location=b'plain')
201 201 super(largefilesdirstate, self).write(None)
202 202
203 203
204 204 def openlfdirstate(ui, repo, create=True):
205 205 """
206 206 Return a dirstate object that tracks largefiles: i.e. its root is
207 207 the repo root, but it is saved in .hg/largefiles/dirstate.
208 208 """
209 209 vfs = repo.vfs
210 210 lfstoredir = longname
211 211 opener = vfsmod.vfs(vfs.join(lfstoredir))
212 212 use_dirstate_v2 = requirements.DIRSTATE_V2_REQUIREMENT in repo.requirements
213 213 lfdirstate = largefilesdirstate(
214 214 opener,
215 215 ui,
216 216 repo.root,
217 217 repo.dirstate._validate,
218 218 lambda: sparse.matcher(repo),
219 219 repo.nodeconstants,
220 220 use_dirstate_v2,
221 221 )
222 222
223 223 # If the largefiles dirstate does not exist, populate and create
224 224 # it. This ensures that we create it on the first meaningful
225 225 # largefiles operation in a new clone.
226 226 if create and not vfs.exists(vfs.join(lfstoredir, b'dirstate')):
227 227 matcher = getstandinmatcher(repo)
228 228 standins = repo.dirstate.walk(
229 229 matcher, subrepos=[], unknown=False, ignored=False
230 230 )
231 231
232 232 if len(standins) > 0:
233 233 vfs.makedirs(lfstoredir)
234 234
235 235 with lfdirstate.parentchange():
236 236 for standin in standins:
237 237 lfile = splitstandin(standin)
238 238 lfdirstate.update_file(
239 239 lfile, p1_tracked=True, wc_tracked=True, possibly_dirty=True
240 240 )
241 241 return lfdirstate
242 242
243 243
244 244 def lfdirstatestatus(lfdirstate, repo):
245 245 pctx = repo[b'.']
246 246 match = matchmod.always()
247 247 unsure, s, mtime_boundary = lfdirstate.status(
248 248 match, subrepos=[], ignored=False, clean=False, unknown=False
249 249 )
250 250 modified, clean = s.modified, s.clean
251 251 wctx = repo[None]
252 252 for lfile in unsure:
253 253 try:
254 254 fctx = pctx[standin(lfile)]
255 255 except LookupError:
256 256 fctx = None
257 257 if not fctx or readasstandin(fctx) != hashfile(repo.wjoin(lfile)):
258 258 modified.append(lfile)
259 259 else:
260 260 clean.append(lfile)
261 261 st = wctx[lfile].lstat()
262 262 mode = st.st_mode
263 263 size = st.st_size
264 mtime = timestamp.mtime_of(st)
265 cache_data = (mode, size, mtime)
266 # We should consider using the mtime_boundary
267 # logic here, but largefile never actually had
268 # ambiguity protection before, so this confuse
269 # the tests and need more thinking.
270 lfdirstate.set_clean(lfile, cache_data)
264 mtime = timestamp.reliable_mtime_of(st, mtime_boundary)
265 if mtime is not None:
266 cache_data = (mode, size, mtime)
267 lfdirstate.set_clean(lfile, cache_data)
271 268 return s
272 269
273 270
274 271 def listlfiles(repo, rev=None, matcher=None):
275 272 """return a list of largefiles in the working copy or the
276 273 specified changeset"""
277 274
278 275 if matcher is None:
279 276 matcher = getstandinmatcher(repo)
280 277
281 278 # ignore unknown files in working directory
282 279 return [
283 280 splitstandin(f)
284 281 for f in repo[rev].walk(matcher)
285 282 if rev is not None or repo.dirstate.get_entry(f).any_tracked
286 283 ]
287 284
288 285
289 286 def instore(repo, hash, forcelocal=False):
290 287 '''Return true if a largefile with the given hash exists in the store'''
291 288 return os.path.exists(storepath(repo, hash, forcelocal))
292 289
293 290
294 291 def storepath(repo, hash, forcelocal=False):
295 292 """Return the correct location in the repository largefiles store for a
296 293 file with the given hash."""
297 294 if not forcelocal and repo.shared():
298 295 return repo.vfs.reljoin(repo.sharedpath, longname, hash)
299 296 return repo.vfs.join(longname, hash)
300 297
301 298
302 299 def findstorepath(repo, hash):
303 300 """Search through the local store path(s) to find the file for the given
304 301 hash. If the file is not found, its path in the primary store is returned.
305 302 The return value is a tuple of (path, exists(path)).
306 303 """
307 304 # For shared repos, the primary store is in the share source. But for
308 305 # backward compatibility, force a lookup in the local store if it wasn't
309 306 # found in the share source.
310 307 path = storepath(repo, hash, False)
311 308
312 309 if instore(repo, hash):
313 310 return (path, True)
314 311 elif repo.shared() and instore(repo, hash, True):
315 312 return storepath(repo, hash, True), True
316 313
317 314 return (path, False)
318 315
319 316
320 317 def copyfromcache(repo, hash, filename):
321 318 """Copy the specified largefile from the repo or system cache to
322 319 filename in the repository. Return true on success or false if the
323 320 file was not found in either cache (which should not happened:
324 321 this is meant to be called only after ensuring that the needed
325 322 largefile exists in the cache)."""
326 323 wvfs = repo.wvfs
327 324 path = findfile(repo, hash)
328 325 if path is None:
329 326 return False
330 327 wvfs.makedirs(wvfs.dirname(wvfs.join(filename)))
331 328 # The write may fail before the file is fully written, but we
332 329 # don't use atomic writes in the working copy.
333 330 with open(path, b'rb') as srcfd, wvfs(filename, b'wb') as destfd:
334 331 gothash = copyandhash(util.filechunkiter(srcfd), destfd)
335 332 if gothash != hash:
336 333 repo.ui.warn(
337 334 _(b'%s: data corruption in %s with hash %s\n')
338 335 % (filename, path, gothash)
339 336 )
340 337 wvfs.unlink(filename)
341 338 return False
342 339 return True
343 340
344 341
345 342 def copytostore(repo, ctx, file, fstandin):
346 343 wvfs = repo.wvfs
347 344 hash = readasstandin(ctx[fstandin])
348 345 if instore(repo, hash):
349 346 return
350 347 if wvfs.exists(file):
351 348 copytostoreabsolute(repo, wvfs.join(file), hash)
352 349 else:
353 350 repo.ui.warn(
354 351 _(b"%s: largefile %s not available from local store\n")
355 352 % (file, hash)
356 353 )
357 354
358 355
359 356 def copyalltostore(repo, node):
360 357 '''Copy all largefiles in a given revision to the store'''
361 358
362 359 ctx = repo[node]
363 360 for filename in ctx.files():
364 361 realfile = splitstandin(filename)
365 362 if realfile is not None and filename in ctx.manifest():
366 363 copytostore(repo, ctx, realfile, filename)
367 364
368 365
369 366 def copytostoreabsolute(repo, file, hash):
370 367 if inusercache(repo.ui, hash):
371 368 link(usercachepath(repo.ui, hash), storepath(repo, hash))
372 369 else:
373 370 util.makedirs(os.path.dirname(storepath(repo, hash)))
374 371 with open(file, b'rb') as srcf:
375 372 with util.atomictempfile(
376 373 storepath(repo, hash), createmode=repo.store.createmode
377 374 ) as dstf:
378 375 for chunk in util.filechunkiter(srcf):
379 376 dstf.write(chunk)
380 377 linktousercache(repo, hash)
381 378
382 379
383 380 def linktousercache(repo, hash):
384 381 """Link / copy the largefile with the specified hash from the store
385 382 to the cache."""
386 383 path = usercachepath(repo.ui, hash)
387 384 link(storepath(repo, hash), path)
388 385
389 386
390 387 def getstandinmatcher(repo, rmatcher=None):
391 388 '''Return a match object that applies rmatcher to the standin directory'''
392 389 wvfs = repo.wvfs
393 390 standindir = shortname
394 391
395 392 # no warnings about missing files or directories
396 393 badfn = lambda f, msg: None
397 394
398 395 if rmatcher and not rmatcher.always():
399 396 pats = [wvfs.join(standindir, pat) for pat in rmatcher.files()]
400 397 if not pats:
401 398 pats = [wvfs.join(standindir)]
402 399 match = scmutil.match(repo[None], pats, badfn=badfn)
403 400 else:
404 401 # no patterns: relative to repo root
405 402 match = scmutil.match(repo[None], [wvfs.join(standindir)], badfn=badfn)
406 403 return match
407 404
408 405
409 406 def composestandinmatcher(repo, rmatcher):
410 407 """Return a matcher that accepts standins corresponding to the
411 408 files accepted by rmatcher. Pass the list of files in the matcher
412 409 as the paths specified by the user."""
413 410 smatcher = getstandinmatcher(repo, rmatcher)
414 411 isstandin = smatcher.matchfn
415 412
416 413 def composedmatchfn(f):
417 414 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
418 415
419 416 smatcher.matchfn = composedmatchfn
420 417
421 418 return smatcher
422 419
423 420
424 421 def standin(filename):
425 422 """Return the repo-relative path to the standin for the specified big
426 423 file."""
427 424 # Notes:
428 425 # 1) Some callers want an absolute path, but for instance addlargefiles
429 426 # needs it repo-relative so it can be passed to repo[None].add(). So
430 427 # leave it up to the caller to use repo.wjoin() to get an absolute path.
431 428 # 2) Join with '/' because that's what dirstate always uses, even on
432 429 # Windows. Change existing separator to '/' first in case we are
433 430 # passed filenames from an external source (like the command line).
434 431 return shortnameslash + util.pconvert(filename)
435 432
436 433
437 434 def isstandin(filename):
438 435 """Return true if filename is a big file standin. filename must be
439 436 in Mercurial's internal form (slash-separated)."""
440 437 return filename.startswith(shortnameslash)
441 438
442 439
443 440 def splitstandin(filename):
444 441 # Split on / because that's what dirstate always uses, even on Windows.
445 442 # Change local separator to / first just in case we are passed filenames
446 443 # from an external source (like the command line).
447 444 bits = util.pconvert(filename).split(b'/', 1)
448 445 if len(bits) == 2 and bits[0] == shortname:
449 446 return bits[1]
450 447 else:
451 448 return None
452 449
453 450
454 451 def updatestandin(repo, lfile, standin):
455 452 """Re-calculate hash value of lfile and write it into standin
456 453
457 454 This assumes that "lfutil.standin(lfile) == standin", for efficiency.
458 455 """
459 456 file = repo.wjoin(lfile)
460 457 if repo.wvfs.exists(lfile):
461 458 hash = hashfile(file)
462 459 executable = getexecutable(file)
463 460 writestandin(repo, standin, hash, executable)
464 461 else:
465 462 raise error.Abort(_(b'%s: file not found!') % lfile)
466 463
467 464
468 465 def readasstandin(fctx):
469 466 """read hex hash from given filectx of standin file
470 467
471 468 This encapsulates how "standin" data is stored into storage layer."""
472 469 return fctx.data().strip()
473 470
474 471
475 472 def writestandin(repo, standin, hash, executable):
476 473 '''write hash to <repo.root>/<standin>'''
477 474 repo.wwrite(standin, hash + b'\n', executable and b'x' or b'')
478 475
479 476
480 477 def copyandhash(instream, outfile):
481 478 """Read bytes from instream (iterable) and write them to outfile,
482 479 computing the SHA-1 hash of the data along the way. Return the hash."""
483 480 hasher = hashutil.sha1(b'')
484 481 for data in instream:
485 482 hasher.update(data)
486 483 outfile.write(data)
487 484 return hex(hasher.digest())
488 485
489 486
490 487 def hashfile(file):
491 488 if not os.path.exists(file):
492 489 return b''
493 490 with open(file, b'rb') as fd:
494 491 return hexsha1(fd)
495 492
496 493
497 494 def getexecutable(filename):
498 495 mode = os.stat(filename).st_mode
499 496 return (
500 497 (mode & stat.S_IXUSR)
501 498 and (mode & stat.S_IXGRP)
502 499 and (mode & stat.S_IXOTH)
503 500 )
504 501
505 502
506 503 def urljoin(first, second, *arg):
507 504 def join(left, right):
508 505 if not left.endswith(b'/'):
509 506 left += b'/'
510 507 if right.startswith(b'/'):
511 508 right = right[1:]
512 509 return left + right
513 510
514 511 url = join(first, second)
515 512 for a in arg:
516 513 url = join(url, a)
517 514 return url
518 515
519 516
520 517 def hexsha1(fileobj):
521 518 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
522 519 object data"""
523 520 h = hashutil.sha1()
524 521 for chunk in util.filechunkiter(fileobj):
525 522 h.update(chunk)
526 523 return hex(h.digest())
527 524
528 525
529 526 def httpsendfile(ui, filename):
530 527 return httpconnection.httpsendfile(ui, filename, b'rb')
531 528
532 529
533 530 def unixpath(path):
534 531 '''Return a version of path normalized for use with the lfdirstate.'''
535 532 return util.pconvert(os.path.normpath(path))
536 533
537 534
538 535 def islfilesrepo(repo):
539 536 '''Return true if the repo is a largefile repo.'''
540 537 if b'largefiles' in repo.requirements and any(
541 538 shortnameslash in f[1] for f in repo.store.datafiles()
542 539 ):
543 540 return True
544 541
545 542 return any(openlfdirstate(repo.ui, repo, False))
546 543
547 544
548 545 class storeprotonotcapable(Exception):
549 546 def __init__(self, storetypes):
550 547 self.storetypes = storetypes
551 548
552 549
553 550 def getstandinsstate(repo):
554 551 standins = []
555 552 matcher = getstandinmatcher(repo)
556 553 wctx = repo[None]
557 554 for standin in repo.dirstate.walk(
558 555 matcher, subrepos=[], unknown=False, ignored=False
559 556 ):
560 557 lfile = splitstandin(standin)
561 558 try:
562 559 hash = readasstandin(wctx[standin])
563 560 except IOError:
564 561 hash = None
565 562 standins.append((lfile, hash))
566 563 return standins
567 564
568 565
569 566 def synclfdirstate(repo, lfdirstate, lfile, normallookup):
570 567 lfstandin = standin(lfile)
571 568 if lfstandin not in repo.dirstate:
572 569 lfdirstate.update_file(lfile, p1_tracked=False, wc_tracked=False)
573 570 else:
574 571 entry = repo.dirstate.get_entry(lfstandin)
575 572 lfdirstate.update_file(
576 573 lfile,
577 574 wc_tracked=entry.tracked,
578 575 p1_tracked=entry.p1_tracked,
579 576 p2_info=entry.p2_info,
580 577 possibly_dirty=True,
581 578 )
582 579
583 580
584 581 def markcommitted(orig, ctx, node):
585 582 repo = ctx.repo()
586 583
587 584 lfdirstate = openlfdirstate(repo.ui, repo)
588 585 with lfdirstate.parentchange():
589 586 orig(node)
590 587
591 588 # ATTENTION: "ctx.files()" may differ from "repo[node].files()"
592 589 # because files coming from the 2nd parent are omitted in the latter.
593 590 #
594 591 # The former should be used to get targets of "synclfdirstate",
595 592 # because such files:
596 593 # - are marked as "a" by "patch.patch()" (e.g. via transplant), and
597 594 # - have to be marked as "n" after commit, but
598 595 # - aren't listed in "repo[node].files()"
599 596
600 597 for f in ctx.files():
601 598 lfile = splitstandin(f)
602 599 if lfile is not None:
603 600 synclfdirstate(repo, lfdirstate, lfile, False)
604 601 lfdirstate.write(repo.currenttransaction())
605 602
606 603 # As part of committing, copy all of the largefiles into the cache.
607 604 #
608 605 # Using "node" instead of "ctx" implies additional "repo[node]"
609 606 # lookup while copyalltostore(), but can omit redundant check for
610 607 # files comming from the 2nd parent, which should exist in store
611 608 # at merging.
612 609 copyalltostore(repo, node)
613 610
614 611
615 612 def getlfilestoupdate(oldstandins, newstandins):
616 613 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
617 614 filelist = []
618 615 for f in changedstandins:
619 616 if f[0] not in filelist:
620 617 filelist.append(f[0])
621 618 return filelist
622 619
623 620
624 621 def getlfilestoupload(repo, missing, addfunc):
625 622 makeprogress = repo.ui.makeprogress
626 623 with makeprogress(
627 624 _(b'finding outgoing largefiles'),
628 625 unit=_(b'revisions'),
629 626 total=len(missing),
630 627 ) as progress:
631 628 for i, n in enumerate(missing):
632 629 progress.update(i)
633 630 parents = [p for p in repo[n].parents() if p != repo.nullid]
634 631
635 632 with lfstatus(repo, value=False):
636 633 ctx = repo[n]
637 634
638 635 files = set(ctx.files())
639 636 if len(parents) == 2:
640 637 mc = ctx.manifest()
641 638 mp1 = ctx.p1().manifest()
642 639 mp2 = ctx.p2().manifest()
643 640 for f in mp1:
644 641 if f not in mc:
645 642 files.add(f)
646 643 for f in mp2:
647 644 if f not in mc:
648 645 files.add(f)
649 646 for f in mc:
650 647 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
651 648 files.add(f)
652 649 for fn in files:
653 650 if isstandin(fn) and fn in ctx:
654 651 addfunc(fn, readasstandin(ctx[fn]))
655 652
656 653
657 654 def updatestandinsbymatch(repo, match):
658 655 """Update standins in the working directory according to specified match
659 656
660 657 This returns (possibly modified) ``match`` object to be used for
661 658 subsequent commit process.
662 659 """
663 660
664 661 ui = repo.ui
665 662
666 663 # Case 1: user calls commit with no specific files or
667 664 # include/exclude patterns: refresh and commit all files that
668 665 # are "dirty".
669 666 if match is None or match.always():
670 667 # Spend a bit of time here to get a list of files we know
671 668 # are modified so we can compare only against those.
672 669 # It can cost a lot of time (several seconds)
673 670 # otherwise to update all standins if the largefiles are
674 671 # large.
675 672 lfdirstate = openlfdirstate(ui, repo)
676 673 dirtymatch = matchmod.always()
677 674 unsure, s, mtime_boundary = lfdirstate.status(
678 675 dirtymatch, subrepos=[], ignored=False, clean=False, unknown=False
679 676 )
680 677 modifiedfiles = unsure + s.modified + s.added + s.removed
681 678 lfiles = listlfiles(repo)
682 679 # this only loops through largefiles that exist (not
683 680 # removed/renamed)
684 681 for lfile in lfiles:
685 682 if lfile in modifiedfiles:
686 683 fstandin = standin(lfile)
687 684 if repo.wvfs.exists(fstandin):
688 685 # this handles the case where a rebase is being
689 686 # performed and the working copy is not updated
690 687 # yet.
691 688 if repo.wvfs.exists(lfile):
692 689 updatestandin(repo, lfile, fstandin)
693 690
694 691 return match
695 692
696 693 lfiles = listlfiles(repo)
697 694 match._files = repo._subdirlfs(match.files(), lfiles)
698 695
699 696 # Case 2: user calls commit with specified patterns: refresh
700 697 # any matching big files.
701 698 smatcher = composestandinmatcher(repo, match)
702 699 standins = repo.dirstate.walk(
703 700 smatcher, subrepos=[], unknown=False, ignored=False
704 701 )
705 702
706 703 # No matching big files: get out of the way and pass control to
707 704 # the usual commit() method.
708 705 if not standins:
709 706 return match
710 707
711 708 # Refresh all matching big files. It's possible that the
712 709 # commit will end up failing, in which case the big files will
713 710 # stay refreshed. No harm done: the user modified them and
714 711 # asked to commit them, so sooner or later we're going to
715 712 # refresh the standins. Might as well leave them refreshed.
716 713 lfdirstate = openlfdirstate(ui, repo)
717 714 for fstandin in standins:
718 715 lfile = splitstandin(fstandin)
719 716 if lfdirstate.get_entry(lfile).tracked:
720 717 updatestandin(repo, lfile, fstandin)
721 718
722 719 # Cook up a new matcher that only matches regular files or
723 720 # standins corresponding to the big files requested by the
724 721 # user. Have to modify _files to prevent commit() from
725 722 # complaining "not tracked" for big files.
726 723 match = copy.copy(match)
727 724 origmatchfn = match.matchfn
728 725
729 726 # Check both the list of largefiles and the list of
730 727 # standins because if a largefile was removed, it
731 728 # won't be in the list of largefiles at this point
732 729 match._files += sorted(standins)
733 730
734 731 actualfiles = []
735 732 for f in match._files:
736 733 fstandin = standin(f)
737 734
738 735 # For largefiles, only one of the normal and standin should be
739 736 # committed (except if one of them is a remove). In the case of a
740 737 # standin removal, drop the normal file if it is unknown to dirstate.
741 738 # Thus, skip plain largefile names but keep the standin.
742 739 if f in lfiles or fstandin in standins:
743 740 if not repo.dirstate.get_entry(fstandin).removed:
744 741 if not repo.dirstate.get_entry(f).removed:
745 742 continue
746 743 elif not repo.dirstate.get_entry(f).any_tracked:
747 744 continue
748 745
749 746 actualfiles.append(f)
750 747 match._files = actualfiles
751 748
752 749 def matchfn(f):
753 750 if origmatchfn(f):
754 751 return f not in lfiles
755 752 else:
756 753 return f in standins
757 754
758 755 match.matchfn = matchfn
759 756
760 757 return match
761 758
762 759
763 760 class automatedcommithook(object):
764 761 """Stateful hook to update standins at the 1st commit of resuming
765 762
766 763 For efficiency, updating standins in the working directory should
767 764 be avoided while automated committing (like rebase, transplant and
768 765 so on), because they should be updated before committing.
769 766
770 767 But the 1st commit of resuming automated committing (e.g. ``rebase
771 768 --continue``) should update them, because largefiles may be
772 769 modified manually.
773 770 """
774 771
775 772 def __init__(self, resuming):
776 773 self.resuming = resuming
777 774
778 775 def __call__(self, repo, match):
779 776 if self.resuming:
780 777 self.resuming = False # avoids updating at subsequent commits
781 778 return updatestandinsbymatch(repo, match)
782 779 else:
783 780 return match
784 781
785 782
786 783 def getstatuswriter(ui, repo, forcibly=None):
787 784 """Return the function to write largefiles specific status out
788 785
789 786 If ``forcibly`` is ``None``, this returns the last element of
790 787 ``repo._lfstatuswriters`` as "default" writer function.
791 788
792 789 Otherwise, this returns the function to always write out (or
793 790 ignore if ``not forcibly``) status.
794 791 """
795 792 if forcibly is None and util.safehasattr(repo, b'_largefilesenabled'):
796 793 return repo._lfstatuswriters[-1]
797 794 else:
798 795 if forcibly:
799 796 return ui.status # forcibly WRITE OUT
800 797 else:
801 798 return lambda *msg, **opts: None # forcibly IGNORE
@@ -1,468 +1,467 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''setup for largefiles repositories: reposetup'''
10 10 from __future__ import absolute_import
11 11
12 12 import copy
13 13
14 14 from mercurial.i18n import _
15 15
16 16 from mercurial import (
17 17 error,
18 18 extensions,
19 19 localrepo,
20 20 match as matchmod,
21 21 scmutil,
22 22 util,
23 23 )
24 24
25 25 from mercurial.dirstateutils import timestamp
26 26
27 27 from . import (
28 28 lfcommands,
29 29 lfutil,
30 30 )
31 31
32 32
33 33 def reposetup(ui, repo):
34 34 # wire repositories should be given new wireproto functions
35 35 # by "proto.wirereposetup()" via "hg.wirepeersetupfuncs"
36 36 if not repo.local():
37 37 return
38 38
39 39 class lfilesrepo(repo.__class__):
40 40 # the mark to examine whether "repo" object enables largefiles or not
41 41 _largefilesenabled = True
42 42
43 43 lfstatus = False
44 44
45 45 # When lfstatus is set, return a context that gives the names
46 46 # of largefiles instead of their corresponding standins and
47 47 # identifies the largefiles as always binary, regardless of
48 48 # their actual contents.
49 49 def __getitem__(self, changeid):
50 50 ctx = super(lfilesrepo, self).__getitem__(changeid)
51 51 if self.lfstatus:
52 52
53 53 def files(orig):
54 54 filenames = orig()
55 55 return [lfutil.splitstandin(f) or f for f in filenames]
56 56
57 57 extensions.wrapfunction(ctx, 'files', files)
58 58
59 59 def manifest(orig):
60 60 man1 = orig()
61 61
62 62 class lfilesmanifest(man1.__class__):
63 63 def __contains__(self, filename):
64 64 orig = super(lfilesmanifest, self).__contains__
65 65 return orig(filename) or orig(
66 66 lfutil.standin(filename)
67 67 )
68 68
69 69 man1.__class__ = lfilesmanifest
70 70 return man1
71 71
72 72 extensions.wrapfunction(ctx, 'manifest', manifest)
73 73
74 74 def filectx(orig, path, fileid=None, filelog=None):
75 75 try:
76 76 if filelog is not None:
77 77 result = orig(path, fileid, filelog)
78 78 else:
79 79 result = orig(path, fileid)
80 80 except error.LookupError:
81 81 # Adding a null character will cause Mercurial to
82 82 # identify this as a binary file.
83 83 if filelog is not None:
84 84 result = orig(lfutil.standin(path), fileid, filelog)
85 85 else:
86 86 result = orig(lfutil.standin(path), fileid)
87 87 olddata = result.data
88 88 result.data = lambda: olddata() + b'\0'
89 89 return result
90 90
91 91 extensions.wrapfunction(ctx, 'filectx', filectx)
92 92
93 93 return ctx
94 94
95 95 # Figure out the status of big files and insert them into the
96 96 # appropriate list in the result. Also removes standin files
97 97 # from the listing. Revert to the original status if
98 98 # self.lfstatus is False.
99 99 # XXX large file status is buggy when used on repo proxy.
100 100 # XXX this needs to be investigated.
101 101 @localrepo.unfilteredmethod
102 102 def status(
103 103 self,
104 104 node1=b'.',
105 105 node2=None,
106 106 match=None,
107 107 ignored=False,
108 108 clean=False,
109 109 unknown=False,
110 110 listsubrepos=False,
111 111 ):
112 112 listignored, listclean, listunknown = ignored, clean, unknown
113 113 orig = super(lfilesrepo, self).status
114 114 if not self.lfstatus:
115 115 return orig(
116 116 node1,
117 117 node2,
118 118 match,
119 119 listignored,
120 120 listclean,
121 121 listunknown,
122 122 listsubrepos,
123 123 )
124 124
125 125 # some calls in this function rely on the old version of status
126 126 self.lfstatus = False
127 127 ctx1 = self[node1]
128 128 ctx2 = self[node2]
129 129 working = ctx2.rev() is None
130 130 parentworking = working and ctx1 == self[b'.']
131 131
132 132 if match is None:
133 133 match = matchmod.always()
134 134
135 135 try:
136 136 # updating the dirstate is optional
137 137 # so we don't wait on the lock
138 138 wlock = self.wlock(False)
139 139 gotlock = True
140 140 except error.LockError:
141 141 wlock = util.nullcontextmanager()
142 142 gotlock = False
143 143 with wlock:
144 144
145 145 # First check if paths or patterns were specified on the
146 146 # command line. If there were, and they don't match any
147 147 # largefiles, we should just bail here and let super
148 148 # handle it -- thus gaining a big performance boost.
149 149 lfdirstate = lfutil.openlfdirstate(ui, self)
150 150 if not match.always():
151 151 for f in lfdirstate:
152 152 if match(f):
153 153 break
154 154 else:
155 155 return orig(
156 156 node1,
157 157 node2,
158 158 match,
159 159 listignored,
160 160 listclean,
161 161 listunknown,
162 162 listsubrepos,
163 163 )
164 164
165 165 # Create a copy of match that matches standins instead
166 166 # of largefiles.
167 167 def tostandins(files):
168 168 if not working:
169 169 return files
170 170 newfiles = []
171 171 dirstate = self.dirstate
172 172 for f in files:
173 173 sf = lfutil.standin(f)
174 174 if sf in dirstate:
175 175 newfiles.append(sf)
176 176 elif dirstate.hasdir(sf):
177 177 # Directory entries could be regular or
178 178 # standin, check both
179 179 newfiles.extend((f, sf))
180 180 else:
181 181 newfiles.append(f)
182 182 return newfiles
183 183
184 184 m = copy.copy(match)
185 185 m._files = tostandins(m._files)
186 186
187 187 result = orig(
188 188 node1, node2, m, ignored, clean, unknown, listsubrepos
189 189 )
190 190 if working:
191 191
192 192 def sfindirstate(f):
193 193 sf = lfutil.standin(f)
194 194 dirstate = self.dirstate
195 195 return sf in dirstate or dirstate.hasdir(sf)
196 196
197 197 match._files = [f for f in match._files if sfindirstate(f)]
198 198 # Don't waste time getting the ignored and unknown
199 199 # files from lfdirstate
200 200 unsure, s, mtime_boundary = lfdirstate.status(
201 201 match,
202 202 subrepos=[],
203 203 ignored=False,
204 204 clean=listclean,
205 205 unknown=False,
206 206 )
207 207 (modified, added, removed, deleted, clean) = (
208 208 s.modified,
209 209 s.added,
210 210 s.removed,
211 211 s.deleted,
212 212 s.clean,
213 213 )
214 214 if parentworking:
215 215 wctx = repo[None]
216 216 for lfile in unsure:
217 217 standin = lfutil.standin(lfile)
218 218 if standin not in ctx1:
219 219 # from second parent
220 220 modified.append(lfile)
221 221 elif lfutil.readasstandin(
222 222 ctx1[standin]
223 223 ) != lfutil.hashfile(self.wjoin(lfile)):
224 224 modified.append(lfile)
225 225 else:
226 226 if listclean:
227 227 clean.append(lfile)
228 228 s = wctx[lfile].lstat()
229 229 mode = s.st_mode
230 230 size = s.st_size
231 mtime = timestamp.mtime_of(s)
232 cache_data = (mode, size, mtime)
233 # We should consider using the mtime_boundary
234 # logic here, but largefile never actually had
235 # ambiguity protection before, so this confuse
236 # the tests and need more thinking.
237 lfdirstate.set_clean(lfile, cache_data)
231 mtime = timestamp.reliable_mtime_of(
232 s, mtime_boundary
233 )
234 if mtime is not None:
235 cache_data = (mode, size, mtime)
236 lfdirstate.set_clean(lfile, cache_data)
238 237 else:
239 238 tocheck = unsure + modified + added + clean
240 239 modified, added, clean = [], [], []
241 240 checkexec = self.dirstate._checkexec
242 241
243 242 for lfile in tocheck:
244 243 standin = lfutil.standin(lfile)
245 244 if standin in ctx1:
246 245 abslfile = self.wjoin(lfile)
247 246 if (
248 247 lfutil.readasstandin(ctx1[standin])
249 248 != lfutil.hashfile(abslfile)
250 249 ) or (
251 250 checkexec
252 251 and (b'x' in ctx1.flags(standin))
253 252 != bool(lfutil.getexecutable(abslfile))
254 253 ):
255 254 modified.append(lfile)
256 255 elif listclean:
257 256 clean.append(lfile)
258 257 else:
259 258 added.append(lfile)
260 259
261 260 # at this point, 'removed' contains largefiles
262 261 # marked as 'R' in the working context.
263 262 # then, largefiles not managed also in the target
264 263 # context should be excluded from 'removed'.
265 264 removed = [
266 265 lfile
267 266 for lfile in removed
268 267 if lfutil.standin(lfile) in ctx1
269 268 ]
270 269
271 270 # Standins no longer found in lfdirstate have been deleted
272 271 for standin in ctx1.walk(lfutil.getstandinmatcher(self)):
273 272 lfile = lfutil.splitstandin(standin)
274 273 if not match(lfile):
275 274 continue
276 275 if lfile not in lfdirstate:
277 276 deleted.append(lfile)
278 277 # Sync "largefile has been removed" back to the
279 278 # standin. Removing a file as a side effect of
280 279 # running status is gross, but the alternatives (if
281 280 # any) are worse.
282 281 self.wvfs.unlinkpath(standin, ignoremissing=True)
283 282
284 283 # Filter result lists
285 284 result = list(result)
286 285
287 286 # Largefiles are not really removed when they're
288 287 # still in the normal dirstate. Likewise, normal
289 288 # files are not really removed if they are still in
290 289 # lfdirstate. This happens in merges where files
291 290 # change type.
292 291 removed = [f for f in removed if f not in self.dirstate]
293 292 result[2] = [f for f in result[2] if f not in lfdirstate]
294 293
295 294 lfiles = set(lfdirstate)
296 295 # Unknown files
297 296 result[4] = set(result[4]).difference(lfiles)
298 297 # Ignored files
299 298 result[5] = set(result[5]).difference(lfiles)
300 299 # combine normal files and largefiles
301 300 normals = [
302 301 [fn for fn in filelist if not lfutil.isstandin(fn)]
303 302 for filelist in result
304 303 ]
305 304 lfstatus = (
306 305 modified,
307 306 added,
308 307 removed,
309 308 deleted,
310 309 [],
311 310 [],
312 311 clean,
313 312 )
314 313 result = [
315 314 sorted(list1 + list2)
316 315 for (list1, list2) in zip(normals, lfstatus)
317 316 ]
318 317 else: # not against working directory
319 318 result = [
320 319 [lfutil.splitstandin(f) or f for f in items]
321 320 for items in result
322 321 ]
323 322
324 323 if gotlock:
325 324 lfdirstate.write(self.currenttransaction())
326 325
327 326 self.lfstatus = True
328 327 return scmutil.status(*result)
329 328
330 329 def commitctx(self, ctx, *args, **kwargs):
331 330 node = super(lfilesrepo, self).commitctx(ctx, *args, **kwargs)
332 331
333 332 class lfilesctx(ctx.__class__):
334 333 def markcommitted(self, node):
335 334 orig = super(lfilesctx, self).markcommitted
336 335 return lfutil.markcommitted(orig, self, node)
337 336
338 337 ctx.__class__ = lfilesctx
339 338 return node
340 339
341 340 # Before commit, largefile standins have not had their
342 341 # contents updated to reflect the hash of their largefile.
343 342 # Do that here.
344 343 def commit(
345 344 self,
346 345 text=b"",
347 346 user=None,
348 347 date=None,
349 348 match=None,
350 349 force=False,
351 350 editor=False,
352 351 extra=None,
353 352 ):
354 353 if extra is None:
355 354 extra = {}
356 355 orig = super(lfilesrepo, self).commit
357 356
358 357 with self.wlock():
359 358 lfcommithook = self._lfcommithooks[-1]
360 359 match = lfcommithook(self, match)
361 360 result = orig(
362 361 text=text,
363 362 user=user,
364 363 date=date,
365 364 match=match,
366 365 force=force,
367 366 editor=editor,
368 367 extra=extra,
369 368 )
370 369 return result
371 370
372 371 # TODO: _subdirlfs should be moved into "lfutil.py", because
373 372 # it is referred only from "lfutil.updatestandinsbymatch"
374 373 def _subdirlfs(self, files, lfiles):
375 374 """
376 375 Adjust matched file list
377 376 If we pass a directory to commit whose only committable files
378 377 are largefiles, the core commit code aborts before finding
379 378 the largefiles.
380 379 So we do the following:
381 380 For directories that only have largefiles as matches,
382 381 we explicitly add the largefiles to the match list and remove
383 382 the directory.
384 383 In other cases, we leave the match list unmodified.
385 384 """
386 385 actualfiles = []
387 386 dirs = []
388 387 regulars = []
389 388
390 389 for f in files:
391 390 if lfutil.isstandin(f + b'/'):
392 391 raise error.Abort(
393 392 _(b'file "%s" is a largefile standin') % f,
394 393 hint=b'commit the largefile itself instead',
395 394 )
396 395 # Scan directories
397 396 if self.wvfs.isdir(f):
398 397 dirs.append(f)
399 398 else:
400 399 regulars.append(f)
401 400
402 401 for f in dirs:
403 402 matcheddir = False
404 403 d = self.dirstate.normalize(f) + b'/'
405 404 # Check for matched normal files
406 405 for mf in regulars:
407 406 if self.dirstate.normalize(mf).startswith(d):
408 407 actualfiles.append(f)
409 408 matcheddir = True
410 409 break
411 410 if not matcheddir:
412 411 # If no normal match, manually append
413 412 # any matching largefiles
414 413 for lf in lfiles:
415 414 if self.dirstate.normalize(lf).startswith(d):
416 415 actualfiles.append(lf)
417 416 if not matcheddir:
418 417 # There may still be normal files in the dir, so
419 418 # add a directory to the list, which
420 419 # forces status/dirstate to walk all files and
421 420 # call the match function on the matcher, even
422 421 # on case sensitive filesystems.
423 422 actualfiles.append(b'.')
424 423 matcheddir = True
425 424 # Nothing in dir, so readd it
426 425 # and let commit reject it
427 426 if not matcheddir:
428 427 actualfiles.append(f)
429 428
430 429 # Always add normal files
431 430 actualfiles += regulars
432 431 return actualfiles
433 432
434 433 repo.__class__ = lfilesrepo
435 434
436 435 # stack of hooks being executed before committing.
437 436 # only last element ("_lfcommithooks[-1]") is used for each committing.
438 437 repo._lfcommithooks = [lfutil.updatestandinsbymatch]
439 438
440 439 # Stack of status writer functions taking "*msg, **opts" arguments
441 440 # like "ui.status()". Only last element ("_lfstatuswriters[-1]")
442 441 # is used to write status out.
443 442 repo._lfstatuswriters = [ui.status]
444 443
445 444 def prepushoutgoinghook(pushop):
446 445 """Push largefiles for pushop before pushing revisions."""
447 446 lfrevs = pushop.lfrevs
448 447 if lfrevs is None:
449 448 lfrevs = pushop.outgoing.missing
450 449 if lfrevs:
451 450 toupload = set()
452 451 addfunc = lambda fn, lfhash: toupload.add(lfhash)
453 452 lfutil.getlfilestoupload(pushop.repo, lfrevs, addfunc)
454 453 lfcommands.uploadlfiles(ui, pushop.repo, pushop.remote, toupload)
455 454
456 455 repo.prepushoutgoinghooks.add(b"largefiles", prepushoutgoinghook)
457 456
458 457 def checkrequireslfiles(ui, repo, **kwargs):
459 458 if b'largefiles' not in repo.requirements and any(
460 459 lfutil.shortname + b'/' in f[1] for f in repo.store.datafiles()
461 460 ):
462 461 repo.requirements.add(b'largefiles')
463 462 scmutil.writereporequirements(repo)
464 463
465 464 ui.setconfig(
466 465 b'hooks', b'changegroup.lfiles', checkrequireslfiles, b'largefiles'
467 466 )
468 467 ui.setconfig(b'hooks', b'commit.lfiles', checkrequireslfiles, b'largefiles')
General Comments 0
You need to be logged in to leave comments. Login now