##// END OF EJS Templates
largefiles: introduce lfutil.findstorepath()...
Matt Harbison -
r24629:8dc2533f default
parent child Browse files
Show More
@@ -1,586 +1,593 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''largefiles utility code: must not import other modules in this package.'''
10 10
11 11 import os
12 12 import platform
13 13 import shutil
14 14 import stat
15 15 import copy
16 16
17 17 from mercurial import dirstate, httpconnection, match as match_, util, scmutil
18 18 from mercurial.i18n import _
19 19 from mercurial import node
20 20
21 21 shortname = '.hglf'
22 22 shortnameslash = shortname + '/'
23 23 longname = 'largefiles'
24 24
25 25
26 26 # -- Private worker functions ------------------------------------------
27 27
28 28 def getminsize(ui, assumelfiles, opt, default=10):
29 29 lfsize = opt
30 30 if not lfsize and assumelfiles:
31 31 lfsize = ui.config(longname, 'minsize', default=default)
32 32 if lfsize:
33 33 try:
34 34 lfsize = float(lfsize)
35 35 except ValueError:
36 36 raise util.Abort(_('largefiles: size must be number (not %s)\n')
37 37 % lfsize)
38 38 if lfsize is None:
39 39 raise util.Abort(_('minimum size for largefiles must be specified'))
40 40 return lfsize
41 41
42 42 def link(src, dest):
43 43 util.makedirs(os.path.dirname(dest))
44 44 try:
45 45 util.oslink(src, dest)
46 46 except OSError:
47 47 # if hardlinks fail, fallback on atomic copy
48 48 dst = util.atomictempfile(dest)
49 49 for chunk in util.filechunkiter(open(src, 'rb')):
50 50 dst.write(chunk)
51 51 dst.close()
52 52 os.chmod(dest, os.stat(src).st_mode)
53 53
54 54 def usercachepath(ui, hash):
55 55 path = ui.configpath(longname, 'usercache', None)
56 56 if path:
57 57 path = os.path.join(path, hash)
58 58 else:
59 59 if os.name == 'nt':
60 60 appdata = os.getenv('LOCALAPPDATA', os.getenv('APPDATA'))
61 61 if appdata:
62 62 path = os.path.join(appdata, longname, hash)
63 63 elif platform.system() == 'Darwin':
64 64 home = os.getenv('HOME')
65 65 if home:
66 66 path = os.path.join(home, 'Library', 'Caches',
67 67 longname, hash)
68 68 elif os.name == 'posix':
69 69 path = os.getenv('XDG_CACHE_HOME')
70 70 if path:
71 71 path = os.path.join(path, longname, hash)
72 72 else:
73 73 home = os.getenv('HOME')
74 74 if home:
75 75 path = os.path.join(home, '.cache', longname, hash)
76 76 else:
77 77 raise util.Abort(_('unknown operating system: %s\n') % os.name)
78 78 return path
79 79
80 80 def inusercache(ui, hash):
81 81 path = usercachepath(ui, hash)
82 82 return path and os.path.exists(path)
83 83
84 84 def findfile(repo, hash):
85 85 if instore(repo, hash):
86 86 repo.ui.note(_('found %s in store\n') % hash)
87 87 return storepath(repo, hash)
88 88 elif inusercache(repo.ui, hash):
89 89 repo.ui.note(_('found %s in system cache\n') % hash)
90 90 path = storepath(repo, hash)
91 91 link(usercachepath(repo.ui, hash), path)
92 92 return path
93 93 return None
94 94
95 95 class largefilesdirstate(dirstate.dirstate):
96 96 def __getitem__(self, key):
97 97 return super(largefilesdirstate, self).__getitem__(unixpath(key))
98 98 def normal(self, f):
99 99 return super(largefilesdirstate, self).normal(unixpath(f))
100 100 def remove(self, f):
101 101 return super(largefilesdirstate, self).remove(unixpath(f))
102 102 def add(self, f):
103 103 return super(largefilesdirstate, self).add(unixpath(f))
104 104 def drop(self, f):
105 105 return super(largefilesdirstate, self).drop(unixpath(f))
106 106 def forget(self, f):
107 107 return super(largefilesdirstate, self).forget(unixpath(f))
108 108 def normallookup(self, f):
109 109 return super(largefilesdirstate, self).normallookup(unixpath(f))
110 110 def _ignore(self, f):
111 111 return False
112 112
113 113 def openlfdirstate(ui, repo, create=True):
114 114 '''
115 115 Return a dirstate object that tracks largefiles: i.e. its root is
116 116 the repo root, but it is saved in .hg/largefiles/dirstate.
117 117 '''
118 118 lfstoredir = repo.join(longname)
119 119 opener = scmutil.opener(lfstoredir)
120 120 lfdirstate = largefilesdirstate(opener, ui, repo.root,
121 121 repo.dirstate._validate)
122 122
123 123 # If the largefiles dirstate does not exist, populate and create
124 124 # it. This ensures that we create it on the first meaningful
125 125 # largefiles operation in a new clone.
126 126 if create and not os.path.exists(os.path.join(lfstoredir, 'dirstate')):
127 127 matcher = getstandinmatcher(repo)
128 128 standins = repo.dirstate.walk(matcher, [], False, False)
129 129
130 130 if len(standins) > 0:
131 131 util.makedirs(lfstoredir)
132 132
133 133 for standin in standins:
134 134 lfile = splitstandin(standin)
135 135 lfdirstate.normallookup(lfile)
136 136 return lfdirstate
137 137
138 138 def lfdirstatestatus(lfdirstate, repo):
139 139 wctx = repo['.']
140 140 match = match_.always(repo.root, repo.getcwd())
141 141 unsure, s = lfdirstate.status(match, [], False, False, False)
142 142 modified, clean = s.modified, s.clean
143 143 for lfile in unsure:
144 144 try:
145 145 fctx = wctx[standin(lfile)]
146 146 except LookupError:
147 147 fctx = None
148 148 if not fctx or fctx.data().strip() != hashfile(repo.wjoin(lfile)):
149 149 modified.append(lfile)
150 150 else:
151 151 clean.append(lfile)
152 152 lfdirstate.normal(lfile)
153 153 return s
154 154
155 155 def listlfiles(repo, rev=None, matcher=None):
156 156 '''return a list of largefiles in the working copy or the
157 157 specified changeset'''
158 158
159 159 if matcher is None:
160 160 matcher = getstandinmatcher(repo)
161 161
162 162 # ignore unknown files in working directory
163 163 return [splitstandin(f)
164 164 for f in repo[rev].walk(matcher)
165 165 if rev is not None or repo.dirstate[f] != '?']
166 166
167 167 def instore(repo, hash):
168 168 return os.path.exists(storepath(repo, hash))
169 169
170 170 def storepath(repo, hash):
171 171 return repo.join(longname, hash)
172 172
173 def findstorepath(repo, hash):
174 '''Search through the local store path(s) to find the file for the given
175 hash. If the file is not found, its path in the primary store is returned.
176 The return value is a tuple of (path, exists(path)).
177 '''
178 return (storepath(repo, hash), instore(repo, hash))
179
173 180 def copyfromcache(repo, hash, filename):
174 181 '''Copy the specified largefile from the repo or system cache to
175 182 filename in the repository. Return true on success or false if the
176 183 file was not found in either cache (which should not happened:
177 184 this is meant to be called only after ensuring that the needed
178 185 largefile exists in the cache).'''
179 186 path = findfile(repo, hash)
180 187 if path is None:
181 188 return False
182 189 util.makedirs(os.path.dirname(repo.wjoin(filename)))
183 190 # The write may fail before the file is fully written, but we
184 191 # don't use atomic writes in the working copy.
185 192 shutil.copy(path, repo.wjoin(filename))
186 193 return True
187 194
188 195 def copytostore(repo, rev, file, uploaded=False):
189 196 hash = readstandin(repo, file, rev)
190 197 if instore(repo, hash):
191 198 return
192 199 copytostoreabsolute(repo, repo.wjoin(file), hash)
193 200
194 201 def copyalltostore(repo, node):
195 202 '''Copy all largefiles in a given revision to the store'''
196 203
197 204 ctx = repo[node]
198 205 for filename in ctx.files():
199 206 if isstandin(filename) and filename in ctx.manifest():
200 207 realfile = splitstandin(filename)
201 208 copytostore(repo, ctx.node(), realfile)
202 209
203 210
204 211 def copytostoreabsolute(repo, file, hash):
205 212 if inusercache(repo.ui, hash):
206 213 link(usercachepath(repo.ui, hash), storepath(repo, hash))
207 214 else:
208 215 util.makedirs(os.path.dirname(storepath(repo, hash)))
209 216 dst = util.atomictempfile(storepath(repo, hash),
210 217 createmode=repo.store.createmode)
211 218 for chunk in util.filechunkiter(open(file, 'rb')):
212 219 dst.write(chunk)
213 220 dst.close()
214 221 linktousercache(repo, hash)
215 222
216 223 def linktousercache(repo, hash):
217 224 path = usercachepath(repo.ui, hash)
218 225 if path:
219 226 link(storepath(repo, hash), path)
220 227
221 228 def getstandinmatcher(repo, pats=[], opts={}):
222 229 '''Return a match object that applies pats to the standin directory'''
223 230 standindir = repo.wjoin(shortname)
224 231 if pats:
225 232 pats = [os.path.join(standindir, pat) for pat in pats]
226 233 else:
227 234 # no patterns: relative to repo root
228 235 pats = [standindir]
229 236 # no warnings about missing files or directories
230 237 match = scmutil.match(repo[None], pats, opts)
231 238 match.bad = lambda f, msg: None
232 239 return match
233 240
234 241 def composestandinmatcher(repo, rmatcher):
235 242 '''Return a matcher that accepts standins corresponding to the
236 243 files accepted by rmatcher. Pass the list of files in the matcher
237 244 as the paths specified by the user.'''
238 245 smatcher = getstandinmatcher(repo, rmatcher.files())
239 246 isstandin = smatcher.matchfn
240 247 def composedmatchfn(f):
241 248 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
242 249 smatcher.matchfn = composedmatchfn
243 250
244 251 return smatcher
245 252
246 253 def standin(filename):
247 254 '''Return the repo-relative path to the standin for the specified big
248 255 file.'''
249 256 # Notes:
250 257 # 1) Some callers want an absolute path, but for instance addlargefiles
251 258 # needs it repo-relative so it can be passed to repo[None].add(). So
252 259 # leave it up to the caller to use repo.wjoin() to get an absolute path.
253 260 # 2) Join with '/' because that's what dirstate always uses, even on
254 261 # Windows. Change existing separator to '/' first in case we are
255 262 # passed filenames from an external source (like the command line).
256 263 return shortnameslash + util.pconvert(filename)
257 264
258 265 def isstandin(filename):
259 266 '''Return true if filename is a big file standin. filename must be
260 267 in Mercurial's internal form (slash-separated).'''
261 268 return filename.startswith(shortnameslash)
262 269
263 270 def splitstandin(filename):
264 271 # Split on / because that's what dirstate always uses, even on Windows.
265 272 # Change local separator to / first just in case we are passed filenames
266 273 # from an external source (like the command line).
267 274 bits = util.pconvert(filename).split('/', 1)
268 275 if len(bits) == 2 and bits[0] == shortname:
269 276 return bits[1]
270 277 else:
271 278 return None
272 279
273 280 def updatestandin(repo, standin):
274 281 file = repo.wjoin(splitstandin(standin))
275 282 if os.path.exists(file):
276 283 hash = hashfile(file)
277 284 executable = getexecutable(file)
278 285 writestandin(repo, standin, hash, executable)
279 286
280 287 def readstandin(repo, filename, node=None):
281 288 '''read hex hash from standin for filename at given node, or working
282 289 directory if no node is given'''
283 290 return repo[node][standin(filename)].data().strip()
284 291
285 292 def writestandin(repo, standin, hash, executable):
286 293 '''write hash to <repo.root>/<standin>'''
287 294 repo.wwrite(standin, hash + '\n', executable and 'x' or '')
288 295
289 296 def copyandhash(instream, outfile):
290 297 '''Read bytes from instream (iterable) and write them to outfile,
291 298 computing the SHA-1 hash of the data along the way. Return the hash.'''
292 299 hasher = util.sha1('')
293 300 for data in instream:
294 301 hasher.update(data)
295 302 outfile.write(data)
296 303 return hasher.hexdigest()
297 304
298 305 def hashrepofile(repo, file):
299 306 return hashfile(repo.wjoin(file))
300 307
301 308 def hashfile(file):
302 309 if not os.path.exists(file):
303 310 return ''
304 311 hasher = util.sha1('')
305 312 fd = open(file, 'rb')
306 313 for data in util.filechunkiter(fd, 128 * 1024):
307 314 hasher.update(data)
308 315 fd.close()
309 316 return hasher.hexdigest()
310 317
311 318 def getexecutable(filename):
312 319 mode = os.stat(filename).st_mode
313 320 return ((mode & stat.S_IXUSR) and
314 321 (mode & stat.S_IXGRP) and
315 322 (mode & stat.S_IXOTH))
316 323
317 324 def urljoin(first, second, *arg):
318 325 def join(left, right):
319 326 if not left.endswith('/'):
320 327 left += '/'
321 328 if right.startswith('/'):
322 329 right = right[1:]
323 330 return left + right
324 331
325 332 url = join(first, second)
326 333 for a in arg:
327 334 url = join(url, a)
328 335 return url
329 336
330 337 def hexsha1(data):
331 338 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
332 339 object data"""
333 340 h = util.sha1()
334 341 for chunk in util.filechunkiter(data):
335 342 h.update(chunk)
336 343 return h.hexdigest()
337 344
338 345 def httpsendfile(ui, filename):
339 346 return httpconnection.httpsendfile(ui, filename, 'rb')
340 347
341 348 def unixpath(path):
342 349 '''Return a version of path normalized for use with the lfdirstate.'''
343 350 return util.pconvert(os.path.normpath(path))
344 351
345 352 def islfilesrepo(repo):
346 353 if ('largefiles' in repo.requirements and
347 354 util.any(shortnameslash in f[0] for f in repo.store.datafiles())):
348 355 return True
349 356
350 357 return util.any(openlfdirstate(repo.ui, repo, False))
351 358
352 359 class storeprotonotcapable(Exception):
353 360 def __init__(self, storetypes):
354 361 self.storetypes = storetypes
355 362
356 363 def getstandinsstate(repo):
357 364 standins = []
358 365 matcher = getstandinmatcher(repo)
359 366 for standin in repo.dirstate.walk(matcher, [], False, False):
360 367 lfile = splitstandin(standin)
361 368 try:
362 369 hash = readstandin(repo, lfile)
363 370 except IOError:
364 371 hash = None
365 372 standins.append((lfile, hash))
366 373 return standins
367 374
368 375 def synclfdirstate(repo, lfdirstate, lfile, normallookup):
369 376 lfstandin = standin(lfile)
370 377 if lfstandin in repo.dirstate:
371 378 stat = repo.dirstate._map[lfstandin]
372 379 state, mtime = stat[0], stat[3]
373 380 else:
374 381 state, mtime = '?', -1
375 382 if state == 'n':
376 383 if normallookup or mtime < 0:
377 384 # state 'n' doesn't ensure 'clean' in this case
378 385 lfdirstate.normallookup(lfile)
379 386 else:
380 387 lfdirstate.normal(lfile)
381 388 elif state == 'm':
382 389 lfdirstate.normallookup(lfile)
383 390 elif state == 'r':
384 391 lfdirstate.remove(lfile)
385 392 elif state == 'a':
386 393 lfdirstate.add(lfile)
387 394 elif state == '?':
388 395 lfdirstate.drop(lfile)
389 396
390 397 def markcommitted(orig, ctx, node):
391 398 repo = ctx.repo()
392 399
393 400 orig(node)
394 401
395 402 # ATTENTION: "ctx.files()" may differ from "repo[node].files()"
396 403 # because files coming from the 2nd parent are omitted in the latter.
397 404 #
398 405 # The former should be used to get targets of "synclfdirstate",
399 406 # because such files:
400 407 # - are marked as "a" by "patch.patch()" (e.g. via transplant), and
401 408 # - have to be marked as "n" after commit, but
402 409 # - aren't listed in "repo[node].files()"
403 410
404 411 lfdirstate = openlfdirstate(repo.ui, repo)
405 412 for f in ctx.files():
406 413 if isstandin(f):
407 414 lfile = splitstandin(f)
408 415 synclfdirstate(repo, lfdirstate, lfile, False)
409 416 lfdirstate.write()
410 417
411 418 # As part of committing, copy all of the largefiles into the cache.
412 419 copyalltostore(repo, node)
413 420
414 421 def getlfilestoupdate(oldstandins, newstandins):
415 422 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
416 423 filelist = []
417 424 for f in changedstandins:
418 425 if f[0] not in filelist:
419 426 filelist.append(f[0])
420 427 return filelist
421 428
422 429 def getlfilestoupload(repo, missing, addfunc):
423 430 for i, n in enumerate(missing):
424 431 repo.ui.progress(_('finding outgoing largefiles'), i,
425 432 unit=_('revision'), total=len(missing))
426 433 parents = [p for p in repo.changelog.parents(n) if p != node.nullid]
427 434
428 435 oldlfstatus = repo.lfstatus
429 436 repo.lfstatus = False
430 437 try:
431 438 ctx = repo[n]
432 439 finally:
433 440 repo.lfstatus = oldlfstatus
434 441
435 442 files = set(ctx.files())
436 443 if len(parents) == 2:
437 444 mc = ctx.manifest()
438 445 mp1 = ctx.parents()[0].manifest()
439 446 mp2 = ctx.parents()[1].manifest()
440 447 for f in mp1:
441 448 if f not in mc:
442 449 files.add(f)
443 450 for f in mp2:
444 451 if f not in mc:
445 452 files.add(f)
446 453 for f in mc:
447 454 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
448 455 files.add(f)
449 456 for fn in files:
450 457 if isstandin(fn) and fn in ctx:
451 458 addfunc(fn, ctx[fn].data().strip())
452 459 repo.ui.progress(_('finding outgoing largefiles'), None)
453 460
454 461 def updatestandinsbymatch(repo, match):
455 462 '''Update standins in the working directory according to specified match
456 463
457 464 This returns (possibly modified) ``match`` object to be used for
458 465 subsequent commit process.
459 466 '''
460 467
461 468 ui = repo.ui
462 469
463 470 # Case 1: user calls commit with no specific files or
464 471 # include/exclude patterns: refresh and commit all files that
465 472 # are "dirty".
466 473 if match is None or match.always():
467 474 # Spend a bit of time here to get a list of files we know
468 475 # are modified so we can compare only against those.
469 476 # It can cost a lot of time (several seconds)
470 477 # otherwise to update all standins if the largefiles are
471 478 # large.
472 479 lfdirstate = openlfdirstate(ui, repo)
473 480 dirtymatch = match_.always(repo.root, repo.getcwd())
474 481 unsure, s = lfdirstate.status(dirtymatch, [], False, False,
475 482 False)
476 483 modifiedfiles = unsure + s.modified + s.added + s.removed
477 484 lfiles = listlfiles(repo)
478 485 # this only loops through largefiles that exist (not
479 486 # removed/renamed)
480 487 for lfile in lfiles:
481 488 if lfile in modifiedfiles:
482 489 if os.path.exists(
483 490 repo.wjoin(standin(lfile))):
484 491 # this handles the case where a rebase is being
485 492 # performed and the working copy is not updated
486 493 # yet.
487 494 if os.path.exists(repo.wjoin(lfile)):
488 495 updatestandin(repo,
489 496 standin(lfile))
490 497
491 498 return match
492 499
493 500 lfiles = listlfiles(repo)
494 501 match._files = repo._subdirlfs(match.files(), lfiles)
495 502
496 503 # Case 2: user calls commit with specified patterns: refresh
497 504 # any matching big files.
498 505 smatcher = composestandinmatcher(repo, match)
499 506 standins = repo.dirstate.walk(smatcher, [], False, False)
500 507
501 508 # No matching big files: get out of the way and pass control to
502 509 # the usual commit() method.
503 510 if not standins:
504 511 return match
505 512
506 513 # Refresh all matching big files. It's possible that the
507 514 # commit will end up failing, in which case the big files will
508 515 # stay refreshed. No harm done: the user modified them and
509 516 # asked to commit them, so sooner or later we're going to
510 517 # refresh the standins. Might as well leave them refreshed.
511 518 lfdirstate = openlfdirstate(ui, repo)
512 519 for fstandin in standins:
513 520 lfile = splitstandin(fstandin)
514 521 if lfdirstate[lfile] != 'r':
515 522 updatestandin(repo, fstandin)
516 523
517 524 # Cook up a new matcher that only matches regular files or
518 525 # standins corresponding to the big files requested by the
519 526 # user. Have to modify _files to prevent commit() from
520 527 # complaining "not tracked" for big files.
521 528 match = copy.copy(match)
522 529 origmatchfn = match.matchfn
523 530
524 531 # Check both the list of largefiles and the list of
525 532 # standins because if a largefile was removed, it
526 533 # won't be in the list of largefiles at this point
527 534 match._files += sorted(standins)
528 535
529 536 actualfiles = []
530 537 for f in match._files:
531 538 fstandin = standin(f)
532 539
533 540 # ignore known largefiles and standins
534 541 if f in lfiles or fstandin in standins:
535 542 continue
536 543
537 544 actualfiles.append(f)
538 545 match._files = actualfiles
539 546
540 547 def matchfn(f):
541 548 if origmatchfn(f):
542 549 return f not in lfiles
543 550 else:
544 551 return f in standins
545 552
546 553 match.matchfn = matchfn
547 554
548 555 return match
549 556
550 557 class automatedcommithook(object):
551 558 '''Stateful hook to update standins at the 1st commit of resuming
552 559
553 560 For efficiency, updating standins in the working directory should
554 561 be avoided while automated committing (like rebase, transplant and
555 562 so on), because they should be updated before committing.
556 563
557 564 But the 1st commit of resuming automated committing (e.g. ``rebase
558 565 --continue``) should update them, because largefiles may be
559 566 modified manually.
560 567 '''
561 568 def __init__(self, resuming):
562 569 self.resuming = resuming
563 570
564 571 def __call__(self, repo, match):
565 572 if self.resuming:
566 573 self.resuming = False # avoids updating at subsequent commits
567 574 return updatestandinsbymatch(repo, match)
568 575 else:
569 576 return match
570 577
571 578 def getstatuswriter(ui, repo, forcibly=None):
572 579 '''Return the function to write largefiles specific status out
573 580
574 581 If ``forcibly`` is ``None``, this returns the last element of
575 582 ``repo._lfstatuswriters`` as "default" writer function.
576 583
577 584 Otherwise, this returns the function to always write out (or
578 585 ignore if ``not forcibly``) status.
579 586 '''
580 587 if forcibly is None and util.safehasattr(repo, '_largefilesenabled'):
581 588 return repo._lfstatuswriters[-1]
582 589 else:
583 590 if forcibly:
584 591 return ui.status # forcibly WRITE OUT
585 592 else:
586 593 return lambda *msg, **opts: None # forcibly IGNORE
General Comments 0
You need to be logged in to leave comments. Login now