##// END OF EJS Templates
largefiles: pass in whole matcher to getstandinmatcher()...
Martin von Zweigbergk -
r25292:31d543cd default
parent child Browse files
Show More
@@ -1,606 +1,606 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''largefiles utility code: must not import other modules in this package.'''
10 10
11 11 import os
12 12 import platform
13 13 import shutil
14 14 import stat
15 15 import copy
16 16
17 17 from mercurial import dirstate, httpconnection, match as match_, util, scmutil
18 18 from mercurial.i18n import _
19 19 from mercurial import node
20 20
21 21 shortname = '.hglf'
22 22 shortnameslash = shortname + '/'
23 23 longname = 'largefiles'
24 24
25 25
26 26 # -- Private worker functions ------------------------------------------
27 27
28 28 def getminsize(ui, assumelfiles, opt, default=10):
29 29 lfsize = opt
30 30 if not lfsize and assumelfiles:
31 31 lfsize = ui.config(longname, 'minsize', default=default)
32 32 if lfsize:
33 33 try:
34 34 lfsize = float(lfsize)
35 35 except ValueError:
36 36 raise util.Abort(_('largefiles: size must be number (not %s)\n')
37 37 % lfsize)
38 38 if lfsize is None:
39 39 raise util.Abort(_('minimum size for largefiles must be specified'))
40 40 return lfsize
41 41
42 42 def link(src, dest):
43 43 util.makedirs(os.path.dirname(dest))
44 44 try:
45 45 util.oslink(src, dest)
46 46 except OSError:
47 47 # if hardlinks fail, fallback on atomic copy
48 48 dst = util.atomictempfile(dest)
49 49 for chunk in util.filechunkiter(open(src, 'rb')):
50 50 dst.write(chunk)
51 51 dst.close()
52 52 os.chmod(dest, os.stat(src).st_mode)
53 53
54 54 def usercachepath(ui, hash):
55 55 path = ui.configpath(longname, 'usercache', None)
56 56 if path:
57 57 path = os.path.join(path, hash)
58 58 else:
59 59 if os.name == 'nt':
60 60 appdata = os.getenv('LOCALAPPDATA', os.getenv('APPDATA'))
61 61 if appdata:
62 62 path = os.path.join(appdata, longname, hash)
63 63 elif platform.system() == 'Darwin':
64 64 home = os.getenv('HOME')
65 65 if home:
66 66 path = os.path.join(home, 'Library', 'Caches',
67 67 longname, hash)
68 68 elif os.name == 'posix':
69 69 path = os.getenv('XDG_CACHE_HOME')
70 70 if path:
71 71 path = os.path.join(path, longname, hash)
72 72 else:
73 73 home = os.getenv('HOME')
74 74 if home:
75 75 path = os.path.join(home, '.cache', longname, hash)
76 76 else:
77 77 raise util.Abort(_('unknown operating system: %s\n') % os.name)
78 78 return path
79 79
80 80 def inusercache(ui, hash):
81 81 path = usercachepath(ui, hash)
82 82 return path and os.path.exists(path)
83 83
84 84 def findfile(repo, hash):
85 85 path, exists = findstorepath(repo, hash)
86 86 if exists:
87 87 repo.ui.note(_('found %s in store\n') % hash)
88 88 return path
89 89 elif inusercache(repo.ui, hash):
90 90 repo.ui.note(_('found %s in system cache\n') % hash)
91 91 path = storepath(repo, hash)
92 92 link(usercachepath(repo.ui, hash), path)
93 93 return path
94 94 return None
95 95
96 96 class largefilesdirstate(dirstate.dirstate):
97 97 def __getitem__(self, key):
98 98 return super(largefilesdirstate, self).__getitem__(unixpath(key))
99 99 def normal(self, f):
100 100 return super(largefilesdirstate, self).normal(unixpath(f))
101 101 def remove(self, f):
102 102 return super(largefilesdirstate, self).remove(unixpath(f))
103 103 def add(self, f):
104 104 return super(largefilesdirstate, self).add(unixpath(f))
105 105 def drop(self, f):
106 106 return super(largefilesdirstate, self).drop(unixpath(f))
107 107 def forget(self, f):
108 108 return super(largefilesdirstate, self).forget(unixpath(f))
109 109 def normallookup(self, f):
110 110 return super(largefilesdirstate, self).normallookup(unixpath(f))
111 111 def _ignore(self, f):
112 112 return False
113 113
114 114 def openlfdirstate(ui, repo, create=True):
115 115 '''
116 116 Return a dirstate object that tracks largefiles: i.e. its root is
117 117 the repo root, but it is saved in .hg/largefiles/dirstate.
118 118 '''
119 119 lfstoredir = repo.join(longname)
120 120 opener = scmutil.opener(lfstoredir)
121 121 lfdirstate = largefilesdirstate(opener, ui, repo.root,
122 122 repo.dirstate._validate)
123 123
124 124 # If the largefiles dirstate does not exist, populate and create
125 125 # it. This ensures that we create it on the first meaningful
126 126 # largefiles operation in a new clone.
127 127 if create and not os.path.exists(os.path.join(lfstoredir, 'dirstate')):
128 128 matcher = getstandinmatcher(repo)
129 129 standins = repo.dirstate.walk(matcher, [], False, False)
130 130
131 131 if len(standins) > 0:
132 132 util.makedirs(lfstoredir)
133 133
134 134 for standin in standins:
135 135 lfile = splitstandin(standin)
136 136 lfdirstate.normallookup(lfile)
137 137 return lfdirstate
138 138
139 139 def lfdirstatestatus(lfdirstate, repo):
140 140 wctx = repo['.']
141 141 match = match_.always(repo.root, repo.getcwd())
142 142 unsure, s = lfdirstate.status(match, [], False, False, False)
143 143 modified, clean = s.modified, s.clean
144 144 for lfile in unsure:
145 145 try:
146 146 fctx = wctx[standin(lfile)]
147 147 except LookupError:
148 148 fctx = None
149 149 if not fctx or fctx.data().strip() != hashfile(repo.wjoin(lfile)):
150 150 modified.append(lfile)
151 151 else:
152 152 clean.append(lfile)
153 153 lfdirstate.normal(lfile)
154 154 return s
155 155
156 156 def listlfiles(repo, rev=None, matcher=None):
157 157 '''return a list of largefiles in the working copy or the
158 158 specified changeset'''
159 159
160 160 if matcher is None:
161 161 matcher = getstandinmatcher(repo)
162 162
163 163 # ignore unknown files in working directory
164 164 return [splitstandin(f)
165 165 for f in repo[rev].walk(matcher)
166 166 if rev is not None or repo.dirstate[f] != '?']
167 167
168 168 def instore(repo, hash, forcelocal=False):
169 169 return os.path.exists(storepath(repo, hash, forcelocal))
170 170
171 171 def storepath(repo, hash, forcelocal=False):
172 172 if not forcelocal and repo.shared():
173 173 return repo.vfs.reljoin(repo.sharedpath, longname, hash)
174 174 return repo.join(longname, hash)
175 175
176 176 def findstorepath(repo, hash):
177 177 '''Search through the local store path(s) to find the file for the given
178 178 hash. If the file is not found, its path in the primary store is returned.
179 179 The return value is a tuple of (path, exists(path)).
180 180 '''
181 181 # For shared repos, the primary store is in the share source. But for
182 182 # backward compatibility, force a lookup in the local store if it wasn't
183 183 # found in the share source.
184 184 path = storepath(repo, hash, False)
185 185
186 186 if instore(repo, hash):
187 187 return (path, True)
188 188 elif repo.shared() and instore(repo, hash, True):
189 189 return storepath(repo, hash, True)
190 190
191 191 return (path, False)
192 192
193 193 def copyfromcache(repo, hash, filename):
194 194 '''Copy the specified largefile from the repo or system cache to
195 195 filename in the repository. Return true on success or false if the
196 196 file was not found in either cache (which should not happened:
197 197 this is meant to be called only after ensuring that the needed
198 198 largefile exists in the cache).'''
199 199 path = findfile(repo, hash)
200 200 if path is None:
201 201 return False
202 202 util.makedirs(os.path.dirname(repo.wjoin(filename)))
203 203 # The write may fail before the file is fully written, but we
204 204 # don't use atomic writes in the working copy.
205 205 shutil.copy(path, repo.wjoin(filename))
206 206 return True
207 207
208 208 def copytostore(repo, rev, file, uploaded=False):
209 209 hash = readstandin(repo, file, rev)
210 210 if instore(repo, hash):
211 211 return
212 212 copytostoreabsolute(repo, repo.wjoin(file), hash)
213 213
214 214 def copyalltostore(repo, node):
215 215 '''Copy all largefiles in a given revision to the store'''
216 216
217 217 ctx = repo[node]
218 218 for filename in ctx.files():
219 219 if isstandin(filename) and filename in ctx.manifest():
220 220 realfile = splitstandin(filename)
221 221 copytostore(repo, ctx.node(), realfile)
222 222
223 223
224 224 def copytostoreabsolute(repo, file, hash):
225 225 if inusercache(repo.ui, hash):
226 226 link(usercachepath(repo.ui, hash), storepath(repo, hash))
227 227 else:
228 228 util.makedirs(os.path.dirname(storepath(repo, hash)))
229 229 dst = util.atomictempfile(storepath(repo, hash),
230 230 createmode=repo.store.createmode)
231 231 for chunk in util.filechunkiter(open(file, 'rb')):
232 232 dst.write(chunk)
233 233 dst.close()
234 234 linktousercache(repo, hash)
235 235
236 236 def linktousercache(repo, hash):
237 237 path = usercachepath(repo.ui, hash)
238 238 if path:
239 239 link(storepath(repo, hash), path)
240 240
241 def getstandinmatcher(repo, pats=[]):
242 '''Return a match object that applies pats to the standin directory'''
241 def getstandinmatcher(repo, rmatcher=None):
242 '''Return a match object that applies rmatcher to the standin directory'''
243 243 standindir = repo.wjoin(shortname)
244 if pats:
245 pats = [os.path.join(standindir, pat) for pat in pats]
244 if rmatcher and rmatcher.files():
245 pats = [os.path.join(standindir, pat) for pat in rmatcher.files()]
246 246 else:
247 247 # no patterns: relative to repo root
248 248 pats = [standindir]
249 249 # no warnings about missing files or directories
250 250 match = scmutil.match(repo[None], pats)
251 251 match.bad = lambda f, msg: None
252 252 return match
253 253
254 254 def composestandinmatcher(repo, rmatcher):
255 255 '''Return a matcher that accepts standins corresponding to the
256 256 files accepted by rmatcher. Pass the list of files in the matcher
257 257 as the paths specified by the user.'''
258 smatcher = getstandinmatcher(repo, rmatcher.files())
258 smatcher = getstandinmatcher(repo, rmatcher)
259 259 isstandin = smatcher.matchfn
260 260 def composedmatchfn(f):
261 261 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
262 262 smatcher.matchfn = composedmatchfn
263 263
264 264 return smatcher
265 265
266 266 def standin(filename):
267 267 '''Return the repo-relative path to the standin for the specified big
268 268 file.'''
269 269 # Notes:
270 270 # 1) Some callers want an absolute path, but for instance addlargefiles
271 271 # needs it repo-relative so it can be passed to repo[None].add(). So
272 272 # leave it up to the caller to use repo.wjoin() to get an absolute path.
273 273 # 2) Join with '/' because that's what dirstate always uses, even on
274 274 # Windows. Change existing separator to '/' first in case we are
275 275 # passed filenames from an external source (like the command line).
276 276 return shortnameslash + util.pconvert(filename)
277 277
278 278 def isstandin(filename):
279 279 '''Return true if filename is a big file standin. filename must be
280 280 in Mercurial's internal form (slash-separated).'''
281 281 return filename.startswith(shortnameslash)
282 282
283 283 def splitstandin(filename):
284 284 # Split on / because that's what dirstate always uses, even on Windows.
285 285 # Change local separator to / first just in case we are passed filenames
286 286 # from an external source (like the command line).
287 287 bits = util.pconvert(filename).split('/', 1)
288 288 if len(bits) == 2 and bits[0] == shortname:
289 289 return bits[1]
290 290 else:
291 291 return None
292 292
293 293 def updatestandin(repo, standin):
294 294 file = repo.wjoin(splitstandin(standin))
295 295 if os.path.exists(file):
296 296 hash = hashfile(file)
297 297 executable = getexecutable(file)
298 298 writestandin(repo, standin, hash, executable)
299 299
300 300 def readstandin(repo, filename, node=None):
301 301 '''read hex hash from standin for filename at given node, or working
302 302 directory if no node is given'''
303 303 return repo[node][standin(filename)].data().strip()
304 304
305 305 def writestandin(repo, standin, hash, executable):
306 306 '''write hash to <repo.root>/<standin>'''
307 307 repo.wwrite(standin, hash + '\n', executable and 'x' or '')
308 308
309 309 def copyandhash(instream, outfile):
310 310 '''Read bytes from instream (iterable) and write them to outfile,
311 311 computing the SHA-1 hash of the data along the way. Return the hash.'''
312 312 hasher = util.sha1('')
313 313 for data in instream:
314 314 hasher.update(data)
315 315 outfile.write(data)
316 316 return hasher.hexdigest()
317 317
318 318 def hashrepofile(repo, file):
319 319 return hashfile(repo.wjoin(file))
320 320
321 321 def hashfile(file):
322 322 if not os.path.exists(file):
323 323 return ''
324 324 hasher = util.sha1('')
325 325 fd = open(file, 'rb')
326 326 for data in util.filechunkiter(fd, 128 * 1024):
327 327 hasher.update(data)
328 328 fd.close()
329 329 return hasher.hexdigest()
330 330
331 331 def getexecutable(filename):
332 332 mode = os.stat(filename).st_mode
333 333 return ((mode & stat.S_IXUSR) and
334 334 (mode & stat.S_IXGRP) and
335 335 (mode & stat.S_IXOTH))
336 336
337 337 def urljoin(first, second, *arg):
338 338 def join(left, right):
339 339 if not left.endswith('/'):
340 340 left += '/'
341 341 if right.startswith('/'):
342 342 right = right[1:]
343 343 return left + right
344 344
345 345 url = join(first, second)
346 346 for a in arg:
347 347 url = join(url, a)
348 348 return url
349 349
350 350 def hexsha1(data):
351 351 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
352 352 object data"""
353 353 h = util.sha1()
354 354 for chunk in util.filechunkiter(data):
355 355 h.update(chunk)
356 356 return h.hexdigest()
357 357
358 358 def httpsendfile(ui, filename):
359 359 return httpconnection.httpsendfile(ui, filename, 'rb')
360 360
361 361 def unixpath(path):
362 362 '''Return a version of path normalized for use with the lfdirstate.'''
363 363 return util.pconvert(os.path.normpath(path))
364 364
365 365 def islfilesrepo(repo):
366 366 if ('largefiles' in repo.requirements and
367 367 any(shortnameslash in f[0] for f in repo.store.datafiles())):
368 368 return True
369 369
370 370 return any(openlfdirstate(repo.ui, repo, False))
371 371
372 372 class storeprotonotcapable(Exception):
373 373 def __init__(self, storetypes):
374 374 self.storetypes = storetypes
375 375
376 376 def getstandinsstate(repo):
377 377 standins = []
378 378 matcher = getstandinmatcher(repo)
379 379 for standin in repo.dirstate.walk(matcher, [], False, False):
380 380 lfile = splitstandin(standin)
381 381 try:
382 382 hash = readstandin(repo, lfile)
383 383 except IOError:
384 384 hash = None
385 385 standins.append((lfile, hash))
386 386 return standins
387 387
388 388 def synclfdirstate(repo, lfdirstate, lfile, normallookup):
389 389 lfstandin = standin(lfile)
390 390 if lfstandin in repo.dirstate:
391 391 stat = repo.dirstate._map[lfstandin]
392 392 state, mtime = stat[0], stat[3]
393 393 else:
394 394 state, mtime = '?', -1
395 395 if state == 'n':
396 396 if normallookup or mtime < 0:
397 397 # state 'n' doesn't ensure 'clean' in this case
398 398 lfdirstate.normallookup(lfile)
399 399 else:
400 400 lfdirstate.normal(lfile)
401 401 elif state == 'm':
402 402 lfdirstate.normallookup(lfile)
403 403 elif state == 'r':
404 404 lfdirstate.remove(lfile)
405 405 elif state == 'a':
406 406 lfdirstate.add(lfile)
407 407 elif state == '?':
408 408 lfdirstate.drop(lfile)
409 409
410 410 def markcommitted(orig, ctx, node):
411 411 repo = ctx.repo()
412 412
413 413 orig(node)
414 414
415 415 # ATTENTION: "ctx.files()" may differ from "repo[node].files()"
416 416 # because files coming from the 2nd parent are omitted in the latter.
417 417 #
418 418 # The former should be used to get targets of "synclfdirstate",
419 419 # because such files:
420 420 # - are marked as "a" by "patch.patch()" (e.g. via transplant), and
421 421 # - have to be marked as "n" after commit, but
422 422 # - aren't listed in "repo[node].files()"
423 423
424 424 lfdirstate = openlfdirstate(repo.ui, repo)
425 425 for f in ctx.files():
426 426 if isstandin(f):
427 427 lfile = splitstandin(f)
428 428 synclfdirstate(repo, lfdirstate, lfile, False)
429 429 lfdirstate.write()
430 430
431 431 # As part of committing, copy all of the largefiles into the cache.
432 432 copyalltostore(repo, node)
433 433
434 434 def getlfilestoupdate(oldstandins, newstandins):
435 435 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
436 436 filelist = []
437 437 for f in changedstandins:
438 438 if f[0] not in filelist:
439 439 filelist.append(f[0])
440 440 return filelist
441 441
442 442 def getlfilestoupload(repo, missing, addfunc):
443 443 for i, n in enumerate(missing):
444 444 repo.ui.progress(_('finding outgoing largefiles'), i,
445 445 unit=_('revision'), total=len(missing))
446 446 parents = [p for p in repo.changelog.parents(n) if p != node.nullid]
447 447
448 448 oldlfstatus = repo.lfstatus
449 449 repo.lfstatus = False
450 450 try:
451 451 ctx = repo[n]
452 452 finally:
453 453 repo.lfstatus = oldlfstatus
454 454
455 455 files = set(ctx.files())
456 456 if len(parents) == 2:
457 457 mc = ctx.manifest()
458 458 mp1 = ctx.parents()[0].manifest()
459 459 mp2 = ctx.parents()[1].manifest()
460 460 for f in mp1:
461 461 if f not in mc:
462 462 files.add(f)
463 463 for f in mp2:
464 464 if f not in mc:
465 465 files.add(f)
466 466 for f in mc:
467 467 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
468 468 files.add(f)
469 469 for fn in files:
470 470 if isstandin(fn) and fn in ctx:
471 471 addfunc(fn, ctx[fn].data().strip())
472 472 repo.ui.progress(_('finding outgoing largefiles'), None)
473 473
474 474 def updatestandinsbymatch(repo, match):
475 475 '''Update standins in the working directory according to specified match
476 476
477 477 This returns (possibly modified) ``match`` object to be used for
478 478 subsequent commit process.
479 479 '''
480 480
481 481 ui = repo.ui
482 482
483 483 # Case 1: user calls commit with no specific files or
484 484 # include/exclude patterns: refresh and commit all files that
485 485 # are "dirty".
486 486 if match is None or match.always():
487 487 # Spend a bit of time here to get a list of files we know
488 488 # are modified so we can compare only against those.
489 489 # It can cost a lot of time (several seconds)
490 490 # otherwise to update all standins if the largefiles are
491 491 # large.
492 492 lfdirstate = openlfdirstate(ui, repo)
493 493 dirtymatch = match_.always(repo.root, repo.getcwd())
494 494 unsure, s = lfdirstate.status(dirtymatch, [], False, False,
495 495 False)
496 496 modifiedfiles = unsure + s.modified + s.added + s.removed
497 497 lfiles = listlfiles(repo)
498 498 # this only loops through largefiles that exist (not
499 499 # removed/renamed)
500 500 for lfile in lfiles:
501 501 if lfile in modifiedfiles:
502 502 if os.path.exists(
503 503 repo.wjoin(standin(lfile))):
504 504 # this handles the case where a rebase is being
505 505 # performed and the working copy is not updated
506 506 # yet.
507 507 if os.path.exists(repo.wjoin(lfile)):
508 508 updatestandin(repo,
509 509 standin(lfile))
510 510
511 511 return match
512 512
513 513 lfiles = listlfiles(repo)
514 514 match._files = repo._subdirlfs(match.files(), lfiles)
515 515
516 516 # Case 2: user calls commit with specified patterns: refresh
517 517 # any matching big files.
518 518 smatcher = composestandinmatcher(repo, match)
519 519 standins = repo.dirstate.walk(smatcher, [], False, False)
520 520
521 521 # No matching big files: get out of the way and pass control to
522 522 # the usual commit() method.
523 523 if not standins:
524 524 return match
525 525
526 526 # Refresh all matching big files. It's possible that the
527 527 # commit will end up failing, in which case the big files will
528 528 # stay refreshed. No harm done: the user modified them and
529 529 # asked to commit them, so sooner or later we're going to
530 530 # refresh the standins. Might as well leave them refreshed.
531 531 lfdirstate = openlfdirstate(ui, repo)
532 532 for fstandin in standins:
533 533 lfile = splitstandin(fstandin)
534 534 if lfdirstate[lfile] != 'r':
535 535 updatestandin(repo, fstandin)
536 536
537 537 # Cook up a new matcher that only matches regular files or
538 538 # standins corresponding to the big files requested by the
539 539 # user. Have to modify _files to prevent commit() from
540 540 # complaining "not tracked" for big files.
541 541 match = copy.copy(match)
542 542 origmatchfn = match.matchfn
543 543
544 544 # Check both the list of largefiles and the list of
545 545 # standins because if a largefile was removed, it
546 546 # won't be in the list of largefiles at this point
547 547 match._files += sorted(standins)
548 548
549 549 actualfiles = []
550 550 for f in match._files:
551 551 fstandin = standin(f)
552 552
553 553 # ignore known largefiles and standins
554 554 if f in lfiles or fstandin in standins:
555 555 continue
556 556
557 557 actualfiles.append(f)
558 558 match._files = actualfiles
559 559
560 560 def matchfn(f):
561 561 if origmatchfn(f):
562 562 return f not in lfiles
563 563 else:
564 564 return f in standins
565 565
566 566 match.matchfn = matchfn
567 567
568 568 return match
569 569
570 570 class automatedcommithook(object):
571 571 '''Stateful hook to update standins at the 1st commit of resuming
572 572
573 573 For efficiency, updating standins in the working directory should
574 574 be avoided while automated committing (like rebase, transplant and
575 575 so on), because they should be updated before committing.
576 576
577 577 But the 1st commit of resuming automated committing (e.g. ``rebase
578 578 --continue``) should update them, because largefiles may be
579 579 modified manually.
580 580 '''
581 581 def __init__(self, resuming):
582 582 self.resuming = resuming
583 583
584 584 def __call__(self, repo, match):
585 585 if self.resuming:
586 586 self.resuming = False # avoids updating at subsequent commits
587 587 return updatestandinsbymatch(repo, match)
588 588 else:
589 589 return match
590 590
591 591 def getstatuswriter(ui, repo, forcibly=None):
592 592 '''Return the function to write largefiles specific status out
593 593
594 594 If ``forcibly`` is ``None``, this returns the last element of
595 595 ``repo._lfstatuswriters`` as "default" writer function.
596 596
597 597 Otherwise, this returns the function to always write out (or
598 598 ignore if ``not forcibly``) status.
599 599 '''
600 600 if forcibly is None and util.safehasattr(repo, '_largefilesenabled'):
601 601 return repo._lfstatuswriters[-1]
602 602 else:
603 603 if forcibly:
604 604 return ui.status # forcibly WRITE OUT
605 605 else:
606 606 return lambda *msg, **opts: None # forcibly IGNORE
General Comments 0
You need to be logged in to leave comments. Login now