##// END OF EJS Templates
largefiles: ensure that the standin files are available in getlfilestoupload()...
Matt Harbison -
r23657:95f238ca default
parent child Browse files
Show More
@@ -1,576 +1,583 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''largefiles utility code: must not import other modules in this package.'''
10 10
11 11 import os
12 12 import platform
13 13 import shutil
14 14 import stat
15 15 import copy
16 16
17 17 from mercurial import dirstate, httpconnection, match as match_, util, scmutil
18 18 from mercurial.i18n import _
19 19 from mercurial import node
20 20
21 21 shortname = '.hglf'
22 22 shortnameslash = shortname + '/'
23 23 longname = 'largefiles'
24 24
25 25
26 26 # -- Private worker functions ------------------------------------------
27 27
28 28 def getminsize(ui, assumelfiles, opt, default=10):
29 29 lfsize = opt
30 30 if not lfsize and assumelfiles:
31 31 lfsize = ui.config(longname, 'minsize', default=default)
32 32 if lfsize:
33 33 try:
34 34 lfsize = float(lfsize)
35 35 except ValueError:
36 36 raise util.Abort(_('largefiles: size must be number (not %s)\n')
37 37 % lfsize)
38 38 if lfsize is None:
39 39 raise util.Abort(_('minimum size for largefiles must be specified'))
40 40 return lfsize
41 41
42 42 def link(src, dest):
43 43 util.makedirs(os.path.dirname(dest))
44 44 try:
45 45 util.oslink(src, dest)
46 46 except OSError:
47 47 # if hardlinks fail, fallback on atomic copy
48 48 dst = util.atomictempfile(dest)
49 49 for chunk in util.filechunkiter(open(src, 'rb')):
50 50 dst.write(chunk)
51 51 dst.close()
52 52 os.chmod(dest, os.stat(src).st_mode)
53 53
54 54 def usercachepath(ui, hash):
55 55 path = ui.configpath(longname, 'usercache', None)
56 56 if path:
57 57 path = os.path.join(path, hash)
58 58 else:
59 59 if os.name == 'nt':
60 60 appdata = os.getenv('LOCALAPPDATA', os.getenv('APPDATA'))
61 61 if appdata:
62 62 path = os.path.join(appdata, longname, hash)
63 63 elif platform.system() == 'Darwin':
64 64 home = os.getenv('HOME')
65 65 if home:
66 66 path = os.path.join(home, 'Library', 'Caches',
67 67 longname, hash)
68 68 elif os.name == 'posix':
69 69 path = os.getenv('XDG_CACHE_HOME')
70 70 if path:
71 71 path = os.path.join(path, longname, hash)
72 72 else:
73 73 home = os.getenv('HOME')
74 74 if home:
75 75 path = os.path.join(home, '.cache', longname, hash)
76 76 else:
77 77 raise util.Abort(_('unknown operating system: %s\n') % os.name)
78 78 return path
79 79
80 80 def inusercache(ui, hash):
81 81 path = usercachepath(ui, hash)
82 82 return path and os.path.exists(path)
83 83
84 84 def findfile(repo, hash):
85 85 if instore(repo, hash):
86 86 repo.ui.note(_('found %s in store\n') % hash)
87 87 return storepath(repo, hash)
88 88 elif inusercache(repo.ui, hash):
89 89 repo.ui.note(_('found %s in system cache\n') % hash)
90 90 path = storepath(repo, hash)
91 91 link(usercachepath(repo.ui, hash), path)
92 92 return path
93 93 return None
94 94
95 95 class largefilesdirstate(dirstate.dirstate):
96 96 def __getitem__(self, key):
97 97 return super(largefilesdirstate, self).__getitem__(unixpath(key))
98 98 def normal(self, f):
99 99 return super(largefilesdirstate, self).normal(unixpath(f))
100 100 def remove(self, f):
101 101 return super(largefilesdirstate, self).remove(unixpath(f))
102 102 def add(self, f):
103 103 return super(largefilesdirstate, self).add(unixpath(f))
104 104 def drop(self, f):
105 105 return super(largefilesdirstate, self).drop(unixpath(f))
106 106 def forget(self, f):
107 107 return super(largefilesdirstate, self).forget(unixpath(f))
108 108 def normallookup(self, f):
109 109 return super(largefilesdirstate, self).normallookup(unixpath(f))
110 110 def _ignore(self, f):
111 111 return False
112 112
113 113 def openlfdirstate(ui, repo, create=True):
114 114 '''
115 115 Return a dirstate object that tracks largefiles: i.e. its root is
116 116 the repo root, but it is saved in .hg/largefiles/dirstate.
117 117 '''
118 118 lfstoredir = repo.join(longname)
119 119 opener = scmutil.opener(lfstoredir)
120 120 lfdirstate = largefilesdirstate(opener, ui, repo.root,
121 121 repo.dirstate._validate)
122 122
123 123 # If the largefiles dirstate does not exist, populate and create
124 124 # it. This ensures that we create it on the first meaningful
125 125 # largefiles operation in a new clone.
126 126 if create and not os.path.exists(os.path.join(lfstoredir, 'dirstate')):
127 127 matcher = getstandinmatcher(repo)
128 128 standins = repo.dirstate.walk(matcher, [], False, False)
129 129
130 130 if len(standins) > 0:
131 131 util.makedirs(lfstoredir)
132 132
133 133 for standin in standins:
134 134 lfile = splitstandin(standin)
135 135 lfdirstate.normallookup(lfile)
136 136 return lfdirstate
137 137
138 138 def lfdirstatestatus(lfdirstate, repo):
139 139 wctx = repo['.']
140 140 match = match_.always(repo.root, repo.getcwd())
141 141 unsure, s = lfdirstate.status(match, [], False, False, False)
142 142 modified, clean = s.modified, s.clean
143 143 for lfile in unsure:
144 144 try:
145 145 fctx = wctx[standin(lfile)]
146 146 except LookupError:
147 147 fctx = None
148 148 if not fctx or fctx.data().strip() != hashfile(repo.wjoin(lfile)):
149 149 modified.append(lfile)
150 150 else:
151 151 clean.append(lfile)
152 152 lfdirstate.normal(lfile)
153 153 return s
154 154
155 155 def listlfiles(repo, rev=None, matcher=None):
156 156 '''return a list of largefiles in the working copy or the
157 157 specified changeset'''
158 158
159 159 if matcher is None:
160 160 matcher = getstandinmatcher(repo)
161 161
162 162 # ignore unknown files in working directory
163 163 return [splitstandin(f)
164 164 for f in repo[rev].walk(matcher)
165 165 if rev is not None or repo.dirstate[f] != '?']
166 166
167 167 def instore(repo, hash):
168 168 return os.path.exists(storepath(repo, hash))
169 169
170 170 def storepath(repo, hash):
171 171 return repo.join(os.path.join(longname, hash))
172 172
173 173 def copyfromcache(repo, hash, filename):
174 174 '''Copy the specified largefile from the repo or system cache to
175 175 filename in the repository. Return true on success or false if the
176 176 file was not found in either cache (which should not happened:
177 177 this is meant to be called only after ensuring that the needed
178 178 largefile exists in the cache).'''
179 179 path = findfile(repo, hash)
180 180 if path is None:
181 181 return False
182 182 util.makedirs(os.path.dirname(repo.wjoin(filename)))
183 183 # The write may fail before the file is fully written, but we
184 184 # don't use atomic writes in the working copy.
185 185 shutil.copy(path, repo.wjoin(filename))
186 186 return True
187 187
188 188 def copytostore(repo, rev, file, uploaded=False):
189 189 hash = readstandin(repo, file, rev)
190 190 if instore(repo, hash):
191 191 return
192 192 copytostoreabsolute(repo, repo.wjoin(file), hash)
193 193
194 194 def copyalltostore(repo, node):
195 195 '''Copy all largefiles in a given revision to the store'''
196 196
197 197 ctx = repo[node]
198 198 for filename in ctx.files():
199 199 if isstandin(filename) and filename in ctx.manifest():
200 200 realfile = splitstandin(filename)
201 201 copytostore(repo, ctx.node(), realfile)
202 202
203 203
204 204 def copytostoreabsolute(repo, file, hash):
205 205 if inusercache(repo.ui, hash):
206 206 link(usercachepath(repo.ui, hash), storepath(repo, hash))
207 207 else:
208 208 util.makedirs(os.path.dirname(storepath(repo, hash)))
209 209 dst = util.atomictempfile(storepath(repo, hash),
210 210 createmode=repo.store.createmode)
211 211 for chunk in util.filechunkiter(open(file, 'rb')):
212 212 dst.write(chunk)
213 213 dst.close()
214 214 linktousercache(repo, hash)
215 215
216 216 def linktousercache(repo, hash):
217 217 path = usercachepath(repo.ui, hash)
218 218 if path:
219 219 link(storepath(repo, hash), path)
220 220
221 221 def getstandinmatcher(repo, pats=[], opts={}):
222 222 '''Return a match object that applies pats to the standin directory'''
223 223 standindir = repo.wjoin(shortname)
224 224 if pats:
225 225 pats = [os.path.join(standindir, pat) for pat in pats]
226 226 else:
227 227 # no patterns: relative to repo root
228 228 pats = [standindir]
229 229 # no warnings about missing files or directories
230 230 match = scmutil.match(repo[None], pats, opts)
231 231 match.bad = lambda f, msg: None
232 232 return match
233 233
234 234 def composestandinmatcher(repo, rmatcher):
235 235 '''Return a matcher that accepts standins corresponding to the
236 236 files accepted by rmatcher. Pass the list of files in the matcher
237 237 as the paths specified by the user.'''
238 238 smatcher = getstandinmatcher(repo, rmatcher.files())
239 239 isstandin = smatcher.matchfn
240 240 def composedmatchfn(f):
241 241 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
242 242 smatcher.matchfn = composedmatchfn
243 243
244 244 return smatcher
245 245
246 246 def standin(filename):
247 247 '''Return the repo-relative path to the standin for the specified big
248 248 file.'''
249 249 # Notes:
250 250 # 1) Some callers want an absolute path, but for instance addlargefiles
251 251 # needs it repo-relative so it can be passed to repo[None].add(). So
252 252 # leave it up to the caller to use repo.wjoin() to get an absolute path.
253 253 # 2) Join with '/' because that's what dirstate always uses, even on
254 254 # Windows. Change existing separator to '/' first in case we are
255 255 # passed filenames from an external source (like the command line).
256 256 return shortnameslash + util.pconvert(filename)
257 257
258 258 def isstandin(filename):
259 259 '''Return true if filename is a big file standin. filename must be
260 260 in Mercurial's internal form (slash-separated).'''
261 261 return filename.startswith(shortnameslash)
262 262
263 263 def splitstandin(filename):
264 264 # Split on / because that's what dirstate always uses, even on Windows.
265 265 # Change local separator to / first just in case we are passed filenames
266 266 # from an external source (like the command line).
267 267 bits = util.pconvert(filename).split('/', 1)
268 268 if len(bits) == 2 and bits[0] == shortname:
269 269 return bits[1]
270 270 else:
271 271 return None
272 272
273 273 def updatestandin(repo, standin):
274 274 file = repo.wjoin(splitstandin(standin))
275 275 if os.path.exists(file):
276 276 hash = hashfile(file)
277 277 executable = getexecutable(file)
278 278 writestandin(repo, standin, hash, executable)
279 279
280 280 def readstandin(repo, filename, node=None):
281 281 '''read hex hash from standin for filename at given node, or working
282 282 directory if no node is given'''
283 283 return repo[node][standin(filename)].data().strip()
284 284
285 285 def writestandin(repo, standin, hash, executable):
286 286 '''write hash to <repo.root>/<standin>'''
287 287 repo.wwrite(standin, hash + '\n', executable and 'x' or '')
288 288
289 289 def copyandhash(instream, outfile):
290 290 '''Read bytes from instream (iterable) and write them to outfile,
291 291 computing the SHA-1 hash of the data along the way. Return the hash.'''
292 292 hasher = util.sha1('')
293 293 for data in instream:
294 294 hasher.update(data)
295 295 outfile.write(data)
296 296 return hasher.hexdigest()
297 297
298 298 def hashrepofile(repo, file):
299 299 return hashfile(repo.wjoin(file))
300 300
301 301 def hashfile(file):
302 302 if not os.path.exists(file):
303 303 return ''
304 304 hasher = util.sha1('')
305 305 fd = open(file, 'rb')
306 306 for data in util.filechunkiter(fd, 128 * 1024):
307 307 hasher.update(data)
308 308 fd.close()
309 309 return hasher.hexdigest()
310 310
311 311 def getexecutable(filename):
312 312 mode = os.stat(filename).st_mode
313 313 return ((mode & stat.S_IXUSR) and
314 314 (mode & stat.S_IXGRP) and
315 315 (mode & stat.S_IXOTH))
316 316
317 317 def urljoin(first, second, *arg):
318 318 def join(left, right):
319 319 if not left.endswith('/'):
320 320 left += '/'
321 321 if right.startswith('/'):
322 322 right = right[1:]
323 323 return left + right
324 324
325 325 url = join(first, second)
326 326 for a in arg:
327 327 url = join(url, a)
328 328 return url
329 329
330 330 def hexsha1(data):
331 331 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
332 332 object data"""
333 333 h = util.sha1()
334 334 for chunk in util.filechunkiter(data):
335 335 h.update(chunk)
336 336 return h.hexdigest()
337 337
338 338 def httpsendfile(ui, filename):
339 339 return httpconnection.httpsendfile(ui, filename, 'rb')
340 340
341 341 def unixpath(path):
342 342 '''Return a version of path normalized for use with the lfdirstate.'''
343 343 return util.pconvert(os.path.normpath(path))
344 344
345 345 def islfilesrepo(repo):
346 346 if ('largefiles' in repo.requirements and
347 347 util.any(shortnameslash in f[0] for f in repo.store.datafiles())):
348 348 return True
349 349
350 350 return util.any(openlfdirstate(repo.ui, repo, False))
351 351
352 352 class storeprotonotcapable(Exception):
353 353 def __init__(self, storetypes):
354 354 self.storetypes = storetypes
355 355
356 356 def getstandinsstate(repo):
357 357 standins = []
358 358 matcher = getstandinmatcher(repo)
359 359 for standin in repo.dirstate.walk(matcher, [], False, False):
360 360 lfile = splitstandin(standin)
361 361 try:
362 362 hash = readstandin(repo, lfile)
363 363 except IOError:
364 364 hash = None
365 365 standins.append((lfile, hash))
366 366 return standins
367 367
368 368 def synclfdirstate(repo, lfdirstate, lfile, normallookup):
369 369 lfstandin = standin(lfile)
370 370 if lfstandin in repo.dirstate:
371 371 stat = repo.dirstate._map[lfstandin]
372 372 state, mtime = stat[0], stat[3]
373 373 else:
374 374 state, mtime = '?', -1
375 375 if state == 'n':
376 376 if normallookup or mtime < 0:
377 377 # state 'n' doesn't ensure 'clean' in this case
378 378 lfdirstate.normallookup(lfile)
379 379 else:
380 380 lfdirstate.normal(lfile)
381 381 elif state == 'm':
382 382 lfdirstate.normallookup(lfile)
383 383 elif state == 'r':
384 384 lfdirstate.remove(lfile)
385 385 elif state == 'a':
386 386 lfdirstate.add(lfile)
387 387 elif state == '?':
388 388 lfdirstate.drop(lfile)
389 389
390 390 def markcommitted(orig, ctx, node):
391 391 repo = ctx._repo
392 392
393 393 orig(node)
394 394
395 395 # ATTENTION: "ctx.files()" may differ from "repo[node].files()"
396 396 # because files coming from the 2nd parent are omitted in the latter.
397 397 #
398 398 # The former should be used to get targets of "synclfdirstate",
399 399 # because such files:
400 400 # - are marked as "a" by "patch.patch()" (e.g. via transplant), and
401 401 # - have to be marked as "n" after commit, but
402 402 # - aren't listed in "repo[node].files()"
403 403
404 404 lfdirstate = openlfdirstate(repo.ui, repo)
405 405 for f in ctx.files():
406 406 if isstandin(f):
407 407 lfile = splitstandin(f)
408 408 synclfdirstate(repo, lfdirstate, lfile, False)
409 409 lfdirstate.write()
410 410
411 411 # As part of committing, copy all of the largefiles into the cache.
412 412 copyalltostore(repo, node)
413 413
414 414 def getlfilestoupdate(oldstandins, newstandins):
415 415 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
416 416 filelist = []
417 417 for f in changedstandins:
418 418 if f[0] not in filelist:
419 419 filelist.append(f[0])
420 420 return filelist
421 421
422 422 def getlfilestoupload(repo, missing, addfunc):
423 423 for n in missing:
424 424 parents = [p for p in repo.changelog.parents(n) if p != node.nullid]
425
426 oldlfstatus = repo.lfstatus
427 repo.lfstatus = False
428 try:
425 429 ctx = repo[n]
430 finally:
431 repo.lfstatus = oldlfstatus
432
426 433 files = set(ctx.files())
427 434 if len(parents) == 2:
428 435 mc = ctx.manifest()
429 436 mp1 = ctx.parents()[0].manifest()
430 437 mp2 = ctx.parents()[1].manifest()
431 438 for f in mp1:
432 439 if f not in mc:
433 440 files.add(f)
434 441 for f in mp2:
435 442 if f not in mc:
436 443 files.add(f)
437 444 for f in mc:
438 445 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
439 446 files.add(f)
440 447 for fn in files:
441 448 if isstandin(fn) and fn in ctx:
442 449 addfunc(fn, ctx[fn].data().strip())
443 450
444 451 def updatestandinsbymatch(repo, match):
445 452 '''Update standins in the working directory according to specified match
446 453
447 454 This returns (possibly modified) ``match`` object to be used for
448 455 subsequent commit process.
449 456 '''
450 457
451 458 ui = repo.ui
452 459
453 460 # Case 1: user calls commit with no specific files or
454 461 # include/exclude patterns: refresh and commit all files that
455 462 # are "dirty".
456 463 if match is None or match.always():
457 464 # Spend a bit of time here to get a list of files we know
458 465 # are modified so we can compare only against those.
459 466 # It can cost a lot of time (several seconds)
460 467 # otherwise to update all standins if the largefiles are
461 468 # large.
462 469 lfdirstate = openlfdirstate(ui, repo)
463 470 dirtymatch = match_.always(repo.root, repo.getcwd())
464 471 unsure, s = lfdirstate.status(dirtymatch, [], False, False,
465 472 False)
466 473 modifiedfiles = unsure + s.modified + s.added + s.removed
467 474 lfiles = listlfiles(repo)
468 475 # this only loops through largefiles that exist (not
469 476 # removed/renamed)
470 477 for lfile in lfiles:
471 478 if lfile in modifiedfiles:
472 479 if os.path.exists(
473 480 repo.wjoin(standin(lfile))):
474 481 # this handles the case where a rebase is being
475 482 # performed and the working copy is not updated
476 483 # yet.
477 484 if os.path.exists(repo.wjoin(lfile)):
478 485 updatestandin(repo,
479 486 standin(lfile))
480 487
481 488 return match
482 489
483 490 lfiles = listlfiles(repo)
484 491 match._files = repo._subdirlfs(match.files(), lfiles)
485 492
486 493 # Case 2: user calls commit with specified patterns: refresh
487 494 # any matching big files.
488 495 smatcher = composestandinmatcher(repo, match)
489 496 standins = repo.dirstate.walk(smatcher, [], False, False)
490 497
491 498 # No matching big files: get out of the way and pass control to
492 499 # the usual commit() method.
493 500 if not standins:
494 501 return match
495 502
496 503 # Refresh all matching big files. It's possible that the
497 504 # commit will end up failing, in which case the big files will
498 505 # stay refreshed. No harm done: the user modified them and
499 506 # asked to commit them, so sooner or later we're going to
500 507 # refresh the standins. Might as well leave them refreshed.
501 508 lfdirstate = openlfdirstate(ui, repo)
502 509 for fstandin in standins:
503 510 lfile = splitstandin(fstandin)
504 511 if lfdirstate[lfile] != 'r':
505 512 updatestandin(repo, fstandin)
506 513
507 514 # Cook up a new matcher that only matches regular files or
508 515 # standins corresponding to the big files requested by the
509 516 # user. Have to modify _files to prevent commit() from
510 517 # complaining "not tracked" for big files.
511 518 match = copy.copy(match)
512 519 origmatchfn = match.matchfn
513 520
514 521 # Check both the list of largefiles and the list of
515 522 # standins because if a largefile was removed, it
516 523 # won't be in the list of largefiles at this point
517 524 match._files += sorted(standins)
518 525
519 526 actualfiles = []
520 527 for f in match._files:
521 528 fstandin = standin(f)
522 529
523 530 # ignore known largefiles and standins
524 531 if f in lfiles or fstandin in standins:
525 532 continue
526 533
527 534 actualfiles.append(f)
528 535 match._files = actualfiles
529 536
530 537 def matchfn(f):
531 538 if origmatchfn(f):
532 539 return f not in lfiles
533 540 else:
534 541 return f in standins
535 542
536 543 match.matchfn = matchfn
537 544
538 545 return match
539 546
540 547 class automatedcommithook(object):
541 548 '''Stateful hook to update standins at the 1st commit of resuming
542 549
543 550 For efficiency, updating standins in the working directory should
544 551 be avoided while automated committing (like rebase, transplant and
545 552 so on), because they should be updated before committing.
546 553
547 554 But the 1st commit of resuming automated committing (e.g. ``rebase
548 555 --continue``) should update them, because largefiles may be
549 556 modified manually.
550 557 '''
551 558 def __init__(self, resuming):
552 559 self.resuming = resuming
553 560
554 561 def __call__(self, repo, match):
555 562 if self.resuming:
556 563 self.resuming = False # avoids updating at subsequent commits
557 564 return updatestandinsbymatch(repo, match)
558 565 else:
559 566 return match
560 567
561 568 def getstatuswriter(ui, repo, forcibly=None):
562 569 '''Return the function to write largefiles specific status out
563 570
564 571 If ``forcibly`` is ``None``, this returns the last element of
565 572 ``repo._lfstatuswriters`` as "default" writer function.
566 573
567 574 Otherwise, this returns the function to always write out (or
568 575 ignore if ``not forcibly``) status.
569 576 '''
570 577 if forcibly is None:
571 578 return repo._lfstatuswriters[-1]
572 579 else:
573 580 if forcibly:
574 581 return ui.status # forcibly WRITE OUT
575 582 else:
576 583 return lambda *msg, **opts: None # forcibly IGNORE
General Comments 0
You need to be logged in to leave comments. Login now