##// END OF EJS Templates
largefiles: drop os.path reference in lfutil.storepath()...
Matt Harbison -
r24627:f33236c9 default
parent child Browse files
Show More
@@ -1,586 +1,586 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''largefiles utility code: must not import other modules in this package.'''
10 10
11 11 import os
12 12 import platform
13 13 import shutil
14 14 import stat
15 15 import copy
16 16
17 17 from mercurial import dirstate, httpconnection, match as match_, util, scmutil
18 18 from mercurial.i18n import _
19 19 from mercurial import node
20 20
21 21 shortname = '.hglf'
22 22 shortnameslash = shortname + '/'
23 23 longname = 'largefiles'
24 24
25 25
26 26 # -- Private worker functions ------------------------------------------
27 27
28 28 def getminsize(ui, assumelfiles, opt, default=10):
29 29 lfsize = opt
30 30 if not lfsize and assumelfiles:
31 31 lfsize = ui.config(longname, 'minsize', default=default)
32 32 if lfsize:
33 33 try:
34 34 lfsize = float(lfsize)
35 35 except ValueError:
36 36 raise util.Abort(_('largefiles: size must be number (not %s)\n')
37 37 % lfsize)
38 38 if lfsize is None:
39 39 raise util.Abort(_('minimum size for largefiles must be specified'))
40 40 return lfsize
41 41
42 42 def link(src, dest):
43 43 util.makedirs(os.path.dirname(dest))
44 44 try:
45 45 util.oslink(src, dest)
46 46 except OSError:
47 47 # if hardlinks fail, fallback on atomic copy
48 48 dst = util.atomictempfile(dest)
49 49 for chunk in util.filechunkiter(open(src, 'rb')):
50 50 dst.write(chunk)
51 51 dst.close()
52 52 os.chmod(dest, os.stat(src).st_mode)
53 53
54 54 def usercachepath(ui, hash):
55 55 path = ui.configpath(longname, 'usercache', None)
56 56 if path:
57 57 path = os.path.join(path, hash)
58 58 else:
59 59 if os.name == 'nt':
60 60 appdata = os.getenv('LOCALAPPDATA', os.getenv('APPDATA'))
61 61 if appdata:
62 62 path = os.path.join(appdata, longname, hash)
63 63 elif platform.system() == 'Darwin':
64 64 home = os.getenv('HOME')
65 65 if home:
66 66 path = os.path.join(home, 'Library', 'Caches',
67 67 longname, hash)
68 68 elif os.name == 'posix':
69 69 path = os.getenv('XDG_CACHE_HOME')
70 70 if path:
71 71 path = os.path.join(path, longname, hash)
72 72 else:
73 73 home = os.getenv('HOME')
74 74 if home:
75 75 path = os.path.join(home, '.cache', longname, hash)
76 76 else:
77 77 raise util.Abort(_('unknown operating system: %s\n') % os.name)
78 78 return path
79 79
80 80 def inusercache(ui, hash):
81 81 path = usercachepath(ui, hash)
82 82 return path and os.path.exists(path)
83 83
84 84 def findfile(repo, hash):
85 85 if instore(repo, hash):
86 86 repo.ui.note(_('found %s in store\n') % hash)
87 87 return storepath(repo, hash)
88 88 elif inusercache(repo.ui, hash):
89 89 repo.ui.note(_('found %s in system cache\n') % hash)
90 90 path = storepath(repo, hash)
91 91 link(usercachepath(repo.ui, hash), path)
92 92 return path
93 93 return None
94 94
95 95 class largefilesdirstate(dirstate.dirstate):
96 96 def __getitem__(self, key):
97 97 return super(largefilesdirstate, self).__getitem__(unixpath(key))
98 98 def normal(self, f):
99 99 return super(largefilesdirstate, self).normal(unixpath(f))
100 100 def remove(self, f):
101 101 return super(largefilesdirstate, self).remove(unixpath(f))
102 102 def add(self, f):
103 103 return super(largefilesdirstate, self).add(unixpath(f))
104 104 def drop(self, f):
105 105 return super(largefilesdirstate, self).drop(unixpath(f))
106 106 def forget(self, f):
107 107 return super(largefilesdirstate, self).forget(unixpath(f))
108 108 def normallookup(self, f):
109 109 return super(largefilesdirstate, self).normallookup(unixpath(f))
110 110 def _ignore(self, f):
111 111 return False
112 112
113 113 def openlfdirstate(ui, repo, create=True):
114 114 '''
115 115 Return a dirstate object that tracks largefiles: i.e. its root is
116 116 the repo root, but it is saved in .hg/largefiles/dirstate.
117 117 '''
118 118 lfstoredir = repo.join(longname)
119 119 opener = scmutil.opener(lfstoredir)
120 120 lfdirstate = largefilesdirstate(opener, ui, repo.root,
121 121 repo.dirstate._validate)
122 122
123 123 # If the largefiles dirstate does not exist, populate and create
124 124 # it. This ensures that we create it on the first meaningful
125 125 # largefiles operation in a new clone.
126 126 if create and not os.path.exists(os.path.join(lfstoredir, 'dirstate')):
127 127 matcher = getstandinmatcher(repo)
128 128 standins = repo.dirstate.walk(matcher, [], False, False)
129 129
130 130 if len(standins) > 0:
131 131 util.makedirs(lfstoredir)
132 132
133 133 for standin in standins:
134 134 lfile = splitstandin(standin)
135 135 lfdirstate.normallookup(lfile)
136 136 return lfdirstate
137 137
138 138 def lfdirstatestatus(lfdirstate, repo):
139 139 wctx = repo['.']
140 140 match = match_.always(repo.root, repo.getcwd())
141 141 unsure, s = lfdirstate.status(match, [], False, False, False)
142 142 modified, clean = s.modified, s.clean
143 143 for lfile in unsure:
144 144 try:
145 145 fctx = wctx[standin(lfile)]
146 146 except LookupError:
147 147 fctx = None
148 148 if not fctx or fctx.data().strip() != hashfile(repo.wjoin(lfile)):
149 149 modified.append(lfile)
150 150 else:
151 151 clean.append(lfile)
152 152 lfdirstate.normal(lfile)
153 153 return s
154 154
155 155 def listlfiles(repo, rev=None, matcher=None):
156 156 '''return a list of largefiles in the working copy or the
157 157 specified changeset'''
158 158
159 159 if matcher is None:
160 160 matcher = getstandinmatcher(repo)
161 161
162 162 # ignore unknown files in working directory
163 163 return [splitstandin(f)
164 164 for f in repo[rev].walk(matcher)
165 165 if rev is not None or repo.dirstate[f] != '?']
166 166
167 167 def instore(repo, hash):
168 168 return os.path.exists(storepath(repo, hash))
169 169
170 170 def storepath(repo, hash):
171 return repo.join(os.path.join(longname, hash))
171 return repo.join(longname, hash)
172 172
173 173 def copyfromcache(repo, hash, filename):
174 174 '''Copy the specified largefile from the repo or system cache to
175 175 filename in the repository. Return true on success or false if the
176 176 file was not found in either cache (which should not happened:
177 177 this is meant to be called only after ensuring that the needed
178 178 largefile exists in the cache).'''
179 179 path = findfile(repo, hash)
180 180 if path is None:
181 181 return False
182 182 util.makedirs(os.path.dirname(repo.wjoin(filename)))
183 183 # The write may fail before the file is fully written, but we
184 184 # don't use atomic writes in the working copy.
185 185 shutil.copy(path, repo.wjoin(filename))
186 186 return True
187 187
188 188 def copytostore(repo, rev, file, uploaded=False):
189 189 hash = readstandin(repo, file, rev)
190 190 if instore(repo, hash):
191 191 return
192 192 copytostoreabsolute(repo, repo.wjoin(file), hash)
193 193
194 194 def copyalltostore(repo, node):
195 195 '''Copy all largefiles in a given revision to the store'''
196 196
197 197 ctx = repo[node]
198 198 for filename in ctx.files():
199 199 if isstandin(filename) and filename in ctx.manifest():
200 200 realfile = splitstandin(filename)
201 201 copytostore(repo, ctx.node(), realfile)
202 202
203 203
204 204 def copytostoreabsolute(repo, file, hash):
205 205 if inusercache(repo.ui, hash):
206 206 link(usercachepath(repo.ui, hash), storepath(repo, hash))
207 207 else:
208 208 util.makedirs(os.path.dirname(storepath(repo, hash)))
209 209 dst = util.atomictempfile(storepath(repo, hash),
210 210 createmode=repo.store.createmode)
211 211 for chunk in util.filechunkiter(open(file, 'rb')):
212 212 dst.write(chunk)
213 213 dst.close()
214 214 linktousercache(repo, hash)
215 215
216 216 def linktousercache(repo, hash):
217 217 path = usercachepath(repo.ui, hash)
218 218 if path:
219 219 link(storepath(repo, hash), path)
220 220
221 221 def getstandinmatcher(repo, pats=[], opts={}):
222 222 '''Return a match object that applies pats to the standin directory'''
223 223 standindir = repo.wjoin(shortname)
224 224 if pats:
225 225 pats = [os.path.join(standindir, pat) for pat in pats]
226 226 else:
227 227 # no patterns: relative to repo root
228 228 pats = [standindir]
229 229 # no warnings about missing files or directories
230 230 match = scmutil.match(repo[None], pats, opts)
231 231 match.bad = lambda f, msg: None
232 232 return match
233 233
234 234 def composestandinmatcher(repo, rmatcher):
235 235 '''Return a matcher that accepts standins corresponding to the
236 236 files accepted by rmatcher. Pass the list of files in the matcher
237 237 as the paths specified by the user.'''
238 238 smatcher = getstandinmatcher(repo, rmatcher.files())
239 239 isstandin = smatcher.matchfn
240 240 def composedmatchfn(f):
241 241 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
242 242 smatcher.matchfn = composedmatchfn
243 243
244 244 return smatcher
245 245
246 246 def standin(filename):
247 247 '''Return the repo-relative path to the standin for the specified big
248 248 file.'''
249 249 # Notes:
250 250 # 1) Some callers want an absolute path, but for instance addlargefiles
251 251 # needs it repo-relative so it can be passed to repo[None].add(). So
252 252 # leave it up to the caller to use repo.wjoin() to get an absolute path.
253 253 # 2) Join with '/' because that's what dirstate always uses, even on
254 254 # Windows. Change existing separator to '/' first in case we are
255 255 # passed filenames from an external source (like the command line).
256 256 return shortnameslash + util.pconvert(filename)
257 257
258 258 def isstandin(filename):
259 259 '''Return true if filename is a big file standin. filename must be
260 260 in Mercurial's internal form (slash-separated).'''
261 261 return filename.startswith(shortnameslash)
262 262
263 263 def splitstandin(filename):
264 264 # Split on / because that's what dirstate always uses, even on Windows.
265 265 # Change local separator to / first just in case we are passed filenames
266 266 # from an external source (like the command line).
267 267 bits = util.pconvert(filename).split('/', 1)
268 268 if len(bits) == 2 and bits[0] == shortname:
269 269 return bits[1]
270 270 else:
271 271 return None
272 272
273 273 def updatestandin(repo, standin):
274 274 file = repo.wjoin(splitstandin(standin))
275 275 if os.path.exists(file):
276 276 hash = hashfile(file)
277 277 executable = getexecutable(file)
278 278 writestandin(repo, standin, hash, executable)
279 279
280 280 def readstandin(repo, filename, node=None):
281 281 '''read hex hash from standin for filename at given node, or working
282 282 directory if no node is given'''
283 283 return repo[node][standin(filename)].data().strip()
284 284
285 285 def writestandin(repo, standin, hash, executable):
286 286 '''write hash to <repo.root>/<standin>'''
287 287 repo.wwrite(standin, hash + '\n', executable and 'x' or '')
288 288
289 289 def copyandhash(instream, outfile):
290 290 '''Read bytes from instream (iterable) and write them to outfile,
291 291 computing the SHA-1 hash of the data along the way. Return the hash.'''
292 292 hasher = util.sha1('')
293 293 for data in instream:
294 294 hasher.update(data)
295 295 outfile.write(data)
296 296 return hasher.hexdigest()
297 297
298 298 def hashrepofile(repo, file):
299 299 return hashfile(repo.wjoin(file))
300 300
301 301 def hashfile(file):
302 302 if not os.path.exists(file):
303 303 return ''
304 304 hasher = util.sha1('')
305 305 fd = open(file, 'rb')
306 306 for data in util.filechunkiter(fd, 128 * 1024):
307 307 hasher.update(data)
308 308 fd.close()
309 309 return hasher.hexdigest()
310 310
311 311 def getexecutable(filename):
312 312 mode = os.stat(filename).st_mode
313 313 return ((mode & stat.S_IXUSR) and
314 314 (mode & stat.S_IXGRP) and
315 315 (mode & stat.S_IXOTH))
316 316
317 317 def urljoin(first, second, *arg):
318 318 def join(left, right):
319 319 if not left.endswith('/'):
320 320 left += '/'
321 321 if right.startswith('/'):
322 322 right = right[1:]
323 323 return left + right
324 324
325 325 url = join(first, second)
326 326 for a in arg:
327 327 url = join(url, a)
328 328 return url
329 329
330 330 def hexsha1(data):
331 331 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
332 332 object data"""
333 333 h = util.sha1()
334 334 for chunk in util.filechunkiter(data):
335 335 h.update(chunk)
336 336 return h.hexdigest()
337 337
338 338 def httpsendfile(ui, filename):
339 339 return httpconnection.httpsendfile(ui, filename, 'rb')
340 340
341 341 def unixpath(path):
342 342 '''Return a version of path normalized for use with the lfdirstate.'''
343 343 return util.pconvert(os.path.normpath(path))
344 344
345 345 def islfilesrepo(repo):
346 346 if ('largefiles' in repo.requirements and
347 347 util.any(shortnameslash in f[0] for f in repo.store.datafiles())):
348 348 return True
349 349
350 350 return util.any(openlfdirstate(repo.ui, repo, False))
351 351
352 352 class storeprotonotcapable(Exception):
353 353 def __init__(self, storetypes):
354 354 self.storetypes = storetypes
355 355
356 356 def getstandinsstate(repo):
357 357 standins = []
358 358 matcher = getstandinmatcher(repo)
359 359 for standin in repo.dirstate.walk(matcher, [], False, False):
360 360 lfile = splitstandin(standin)
361 361 try:
362 362 hash = readstandin(repo, lfile)
363 363 except IOError:
364 364 hash = None
365 365 standins.append((lfile, hash))
366 366 return standins
367 367
368 368 def synclfdirstate(repo, lfdirstate, lfile, normallookup):
369 369 lfstandin = standin(lfile)
370 370 if lfstandin in repo.dirstate:
371 371 stat = repo.dirstate._map[lfstandin]
372 372 state, mtime = stat[0], stat[3]
373 373 else:
374 374 state, mtime = '?', -1
375 375 if state == 'n':
376 376 if normallookup or mtime < 0:
377 377 # state 'n' doesn't ensure 'clean' in this case
378 378 lfdirstate.normallookup(lfile)
379 379 else:
380 380 lfdirstate.normal(lfile)
381 381 elif state == 'm':
382 382 lfdirstate.normallookup(lfile)
383 383 elif state == 'r':
384 384 lfdirstate.remove(lfile)
385 385 elif state == 'a':
386 386 lfdirstate.add(lfile)
387 387 elif state == '?':
388 388 lfdirstate.drop(lfile)
389 389
390 390 def markcommitted(orig, ctx, node):
391 391 repo = ctx.repo()
392 392
393 393 orig(node)
394 394
395 395 # ATTENTION: "ctx.files()" may differ from "repo[node].files()"
396 396 # because files coming from the 2nd parent are omitted in the latter.
397 397 #
398 398 # The former should be used to get targets of "synclfdirstate",
399 399 # because such files:
400 400 # - are marked as "a" by "patch.patch()" (e.g. via transplant), and
401 401 # - have to be marked as "n" after commit, but
402 402 # - aren't listed in "repo[node].files()"
403 403
404 404 lfdirstate = openlfdirstate(repo.ui, repo)
405 405 for f in ctx.files():
406 406 if isstandin(f):
407 407 lfile = splitstandin(f)
408 408 synclfdirstate(repo, lfdirstate, lfile, False)
409 409 lfdirstate.write()
410 410
411 411 # As part of committing, copy all of the largefiles into the cache.
412 412 copyalltostore(repo, node)
413 413
414 414 def getlfilestoupdate(oldstandins, newstandins):
415 415 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
416 416 filelist = []
417 417 for f in changedstandins:
418 418 if f[0] not in filelist:
419 419 filelist.append(f[0])
420 420 return filelist
421 421
422 422 def getlfilestoupload(repo, missing, addfunc):
423 423 for i, n in enumerate(missing):
424 424 repo.ui.progress(_('finding outgoing largefiles'), i,
425 425 unit=_('revision'), total=len(missing))
426 426 parents = [p for p in repo.changelog.parents(n) if p != node.nullid]
427 427
428 428 oldlfstatus = repo.lfstatus
429 429 repo.lfstatus = False
430 430 try:
431 431 ctx = repo[n]
432 432 finally:
433 433 repo.lfstatus = oldlfstatus
434 434
435 435 files = set(ctx.files())
436 436 if len(parents) == 2:
437 437 mc = ctx.manifest()
438 438 mp1 = ctx.parents()[0].manifest()
439 439 mp2 = ctx.parents()[1].manifest()
440 440 for f in mp1:
441 441 if f not in mc:
442 442 files.add(f)
443 443 for f in mp2:
444 444 if f not in mc:
445 445 files.add(f)
446 446 for f in mc:
447 447 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
448 448 files.add(f)
449 449 for fn in files:
450 450 if isstandin(fn) and fn in ctx:
451 451 addfunc(fn, ctx[fn].data().strip())
452 452 repo.ui.progress(_('finding outgoing largefiles'), None)
453 453
454 454 def updatestandinsbymatch(repo, match):
455 455 '''Update standins in the working directory according to specified match
456 456
457 457 This returns (possibly modified) ``match`` object to be used for
458 458 subsequent commit process.
459 459 '''
460 460
461 461 ui = repo.ui
462 462
463 463 # Case 1: user calls commit with no specific files or
464 464 # include/exclude patterns: refresh and commit all files that
465 465 # are "dirty".
466 466 if match is None or match.always():
467 467 # Spend a bit of time here to get a list of files we know
468 468 # are modified so we can compare only against those.
469 469 # It can cost a lot of time (several seconds)
470 470 # otherwise to update all standins if the largefiles are
471 471 # large.
472 472 lfdirstate = openlfdirstate(ui, repo)
473 473 dirtymatch = match_.always(repo.root, repo.getcwd())
474 474 unsure, s = lfdirstate.status(dirtymatch, [], False, False,
475 475 False)
476 476 modifiedfiles = unsure + s.modified + s.added + s.removed
477 477 lfiles = listlfiles(repo)
478 478 # this only loops through largefiles that exist (not
479 479 # removed/renamed)
480 480 for lfile in lfiles:
481 481 if lfile in modifiedfiles:
482 482 if os.path.exists(
483 483 repo.wjoin(standin(lfile))):
484 484 # this handles the case where a rebase is being
485 485 # performed and the working copy is not updated
486 486 # yet.
487 487 if os.path.exists(repo.wjoin(lfile)):
488 488 updatestandin(repo,
489 489 standin(lfile))
490 490
491 491 return match
492 492
493 493 lfiles = listlfiles(repo)
494 494 match._files = repo._subdirlfs(match.files(), lfiles)
495 495
496 496 # Case 2: user calls commit with specified patterns: refresh
497 497 # any matching big files.
498 498 smatcher = composestandinmatcher(repo, match)
499 499 standins = repo.dirstate.walk(smatcher, [], False, False)
500 500
501 501 # No matching big files: get out of the way and pass control to
502 502 # the usual commit() method.
503 503 if not standins:
504 504 return match
505 505
506 506 # Refresh all matching big files. It's possible that the
507 507 # commit will end up failing, in which case the big files will
508 508 # stay refreshed. No harm done: the user modified them and
509 509 # asked to commit them, so sooner or later we're going to
510 510 # refresh the standins. Might as well leave them refreshed.
511 511 lfdirstate = openlfdirstate(ui, repo)
512 512 for fstandin in standins:
513 513 lfile = splitstandin(fstandin)
514 514 if lfdirstate[lfile] != 'r':
515 515 updatestandin(repo, fstandin)
516 516
517 517 # Cook up a new matcher that only matches regular files or
518 518 # standins corresponding to the big files requested by the
519 519 # user. Have to modify _files to prevent commit() from
520 520 # complaining "not tracked" for big files.
521 521 match = copy.copy(match)
522 522 origmatchfn = match.matchfn
523 523
524 524 # Check both the list of largefiles and the list of
525 525 # standins because if a largefile was removed, it
526 526 # won't be in the list of largefiles at this point
527 527 match._files += sorted(standins)
528 528
529 529 actualfiles = []
530 530 for f in match._files:
531 531 fstandin = standin(f)
532 532
533 533 # ignore known largefiles and standins
534 534 if f in lfiles or fstandin in standins:
535 535 continue
536 536
537 537 actualfiles.append(f)
538 538 match._files = actualfiles
539 539
540 540 def matchfn(f):
541 541 if origmatchfn(f):
542 542 return f not in lfiles
543 543 else:
544 544 return f in standins
545 545
546 546 match.matchfn = matchfn
547 547
548 548 return match
549 549
550 550 class automatedcommithook(object):
551 551 '''Stateful hook to update standins at the 1st commit of resuming
552 552
553 553 For efficiency, updating standins in the working directory should
554 554 be avoided while automated committing (like rebase, transplant and
555 555 so on), because they should be updated before committing.
556 556
557 557 But the 1st commit of resuming automated committing (e.g. ``rebase
558 558 --continue``) should update them, because largefiles may be
559 559 modified manually.
560 560 '''
561 561 def __init__(self, resuming):
562 562 self.resuming = resuming
563 563
564 564 def __call__(self, repo, match):
565 565 if self.resuming:
566 566 self.resuming = False # avoids updating at subsequent commits
567 567 return updatestandinsbymatch(repo, match)
568 568 else:
569 569 return match
570 570
571 571 def getstatuswriter(ui, repo, forcibly=None):
572 572 '''Return the function to write largefiles specific status out
573 573
574 574 If ``forcibly`` is ``None``, this returns the last element of
575 575 ``repo._lfstatuswriters`` as "default" writer function.
576 576
577 577 Otherwise, this returns the function to always write out (or
578 578 ignore if ``not forcibly``) status.
579 579 '''
580 580 if forcibly is None and util.safehasattr(repo, '_largefilesenabled'):
581 581 return repo._lfstatuswriters[-1]
582 582 else:
583 583 if forcibly:
584 584 return ui.status # forcibly WRITE OUT
585 585 else:
586 586 return lambda *msg, **opts: None # forcibly IGNORE
General Comments 0
You need to be logged in to leave comments. Login now