##// END OF EJS Templates
largefiles: use the optional badfn argument when building a matcher...
Matt Harbison -
r25470:378a8e70 default
parent child Browse files
Show More
@@ -1,608 +1,610 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''largefiles utility code: must not import other modules in this package.'''
10 10
11 11 import os
12 12 import platform
13 13 import shutil
14 14 import stat
15 15 import copy
16 16
17 17 from mercurial import dirstate, httpconnection, match as match_, util, scmutil
18 18 from mercurial.i18n import _
19 19 from mercurial import node
20 20
21 21 shortname = '.hglf'
22 22 shortnameslash = shortname + '/'
23 23 longname = 'largefiles'
24 24
25 25
26 26 # -- Private worker functions ------------------------------------------
27 27
28 28 def getminsize(ui, assumelfiles, opt, default=10):
29 29 lfsize = opt
30 30 if not lfsize and assumelfiles:
31 31 lfsize = ui.config(longname, 'minsize', default=default)
32 32 if lfsize:
33 33 try:
34 34 lfsize = float(lfsize)
35 35 except ValueError:
36 36 raise util.Abort(_('largefiles: size must be number (not %s)\n')
37 37 % lfsize)
38 38 if lfsize is None:
39 39 raise util.Abort(_('minimum size for largefiles must be specified'))
40 40 return lfsize
41 41
42 42 def link(src, dest):
43 43 util.makedirs(os.path.dirname(dest))
44 44 try:
45 45 util.oslink(src, dest)
46 46 except OSError:
47 47 # if hardlinks fail, fallback on atomic copy
48 48 dst = util.atomictempfile(dest)
49 49 for chunk in util.filechunkiter(open(src, 'rb')):
50 50 dst.write(chunk)
51 51 dst.close()
52 52 os.chmod(dest, os.stat(src).st_mode)
53 53
54 54 def usercachepath(ui, hash):
55 55 path = ui.configpath(longname, 'usercache', None)
56 56 if path:
57 57 path = os.path.join(path, hash)
58 58 else:
59 59 if os.name == 'nt':
60 60 appdata = os.getenv('LOCALAPPDATA', os.getenv('APPDATA'))
61 61 if appdata:
62 62 path = os.path.join(appdata, longname, hash)
63 63 elif platform.system() == 'Darwin':
64 64 home = os.getenv('HOME')
65 65 if home:
66 66 path = os.path.join(home, 'Library', 'Caches',
67 67 longname, hash)
68 68 elif os.name == 'posix':
69 69 path = os.getenv('XDG_CACHE_HOME')
70 70 if path:
71 71 path = os.path.join(path, longname, hash)
72 72 else:
73 73 home = os.getenv('HOME')
74 74 if home:
75 75 path = os.path.join(home, '.cache', longname, hash)
76 76 else:
77 77 raise util.Abort(_('unknown operating system: %s\n') % os.name)
78 78 return path
79 79
80 80 def inusercache(ui, hash):
81 81 path = usercachepath(ui, hash)
82 82 return path and os.path.exists(path)
83 83
84 84 def findfile(repo, hash):
85 85 path, exists = findstorepath(repo, hash)
86 86 if exists:
87 87 repo.ui.note(_('found %s in store\n') % hash)
88 88 return path
89 89 elif inusercache(repo.ui, hash):
90 90 repo.ui.note(_('found %s in system cache\n') % hash)
91 91 path = storepath(repo, hash)
92 92 link(usercachepath(repo.ui, hash), path)
93 93 return path
94 94 return None
95 95
96 96 class largefilesdirstate(dirstate.dirstate):
97 97 def __getitem__(self, key):
98 98 return super(largefilesdirstate, self).__getitem__(unixpath(key))
99 99 def normal(self, f):
100 100 return super(largefilesdirstate, self).normal(unixpath(f))
101 101 def remove(self, f):
102 102 return super(largefilesdirstate, self).remove(unixpath(f))
103 103 def add(self, f):
104 104 return super(largefilesdirstate, self).add(unixpath(f))
105 105 def drop(self, f):
106 106 return super(largefilesdirstate, self).drop(unixpath(f))
107 107 def forget(self, f):
108 108 return super(largefilesdirstate, self).forget(unixpath(f))
109 109 def normallookup(self, f):
110 110 return super(largefilesdirstate, self).normallookup(unixpath(f))
111 111 def _ignore(self, f):
112 112 return False
113 113
114 114 def openlfdirstate(ui, repo, create=True):
115 115 '''
116 116 Return a dirstate object that tracks largefiles: i.e. its root is
117 117 the repo root, but it is saved in .hg/largefiles/dirstate.
118 118 '''
119 119 lfstoredir = repo.join(longname)
120 120 opener = scmutil.opener(lfstoredir)
121 121 lfdirstate = largefilesdirstate(opener, ui, repo.root,
122 122 repo.dirstate._validate)
123 123
124 124 # If the largefiles dirstate does not exist, populate and create
125 125 # it. This ensures that we create it on the first meaningful
126 126 # largefiles operation in a new clone.
127 127 if create and not os.path.exists(os.path.join(lfstoredir, 'dirstate')):
128 128 matcher = getstandinmatcher(repo)
129 129 standins = repo.dirstate.walk(matcher, [], False, False)
130 130
131 131 if len(standins) > 0:
132 132 util.makedirs(lfstoredir)
133 133
134 134 for standin in standins:
135 135 lfile = splitstandin(standin)
136 136 lfdirstate.normallookup(lfile)
137 137 return lfdirstate
138 138
139 139 def lfdirstatestatus(lfdirstate, repo):
140 140 wctx = repo['.']
141 141 match = match_.always(repo.root, repo.getcwd())
142 142 unsure, s = lfdirstate.status(match, [], False, False, False)
143 143 modified, clean = s.modified, s.clean
144 144 for lfile in unsure:
145 145 try:
146 146 fctx = wctx[standin(lfile)]
147 147 except LookupError:
148 148 fctx = None
149 149 if not fctx or fctx.data().strip() != hashfile(repo.wjoin(lfile)):
150 150 modified.append(lfile)
151 151 else:
152 152 clean.append(lfile)
153 153 lfdirstate.normal(lfile)
154 154 return s
155 155
156 156 def listlfiles(repo, rev=None, matcher=None):
157 157 '''return a list of largefiles in the working copy or the
158 158 specified changeset'''
159 159
160 160 if matcher is None:
161 161 matcher = getstandinmatcher(repo)
162 162
163 163 # ignore unknown files in working directory
164 164 return [splitstandin(f)
165 165 for f in repo[rev].walk(matcher)
166 166 if rev is not None or repo.dirstate[f] != '?']
167 167
168 168 def instore(repo, hash, forcelocal=False):
169 169 return os.path.exists(storepath(repo, hash, forcelocal))
170 170
171 171 def storepath(repo, hash, forcelocal=False):
172 172 if not forcelocal and repo.shared():
173 173 return repo.vfs.reljoin(repo.sharedpath, longname, hash)
174 174 return repo.join(longname, hash)
175 175
176 176 def findstorepath(repo, hash):
177 177 '''Search through the local store path(s) to find the file for the given
178 178 hash. If the file is not found, its path in the primary store is returned.
179 179 The return value is a tuple of (path, exists(path)).
180 180 '''
181 181 # For shared repos, the primary store is in the share source. But for
182 182 # backward compatibility, force a lookup in the local store if it wasn't
183 183 # found in the share source.
184 184 path = storepath(repo, hash, False)
185 185
186 186 if instore(repo, hash):
187 187 return (path, True)
188 188 elif repo.shared() and instore(repo, hash, True):
189 189 return storepath(repo, hash, True)
190 190
191 191 return (path, False)
192 192
193 193 def copyfromcache(repo, hash, filename):
194 194 '''Copy the specified largefile from the repo or system cache to
195 195 filename in the repository. Return true on success or false if the
196 196 file was not found in either cache (which should not happened:
197 197 this is meant to be called only after ensuring that the needed
198 198 largefile exists in the cache).'''
199 199 path = findfile(repo, hash)
200 200 if path is None:
201 201 return False
202 202 util.makedirs(os.path.dirname(repo.wjoin(filename)))
203 203 # The write may fail before the file is fully written, but we
204 204 # don't use atomic writes in the working copy.
205 205 shutil.copy(path, repo.wjoin(filename))
206 206 return True
207 207
208 208 def copytostore(repo, rev, file, uploaded=False):
209 209 hash = readstandin(repo, file, rev)
210 210 if instore(repo, hash):
211 211 return
212 212 copytostoreabsolute(repo, repo.wjoin(file), hash)
213 213
214 214 def copyalltostore(repo, node):
215 215 '''Copy all largefiles in a given revision to the store'''
216 216
217 217 ctx = repo[node]
218 218 for filename in ctx.files():
219 219 if isstandin(filename) and filename in ctx.manifest():
220 220 realfile = splitstandin(filename)
221 221 copytostore(repo, ctx.node(), realfile)
222 222
223 223
224 224 def copytostoreabsolute(repo, file, hash):
225 225 if inusercache(repo.ui, hash):
226 226 link(usercachepath(repo.ui, hash), storepath(repo, hash))
227 227 else:
228 228 util.makedirs(os.path.dirname(storepath(repo, hash)))
229 229 dst = util.atomictempfile(storepath(repo, hash),
230 230 createmode=repo.store.createmode)
231 231 for chunk in util.filechunkiter(open(file, 'rb')):
232 232 dst.write(chunk)
233 233 dst.close()
234 234 linktousercache(repo, hash)
235 235
236 236 def linktousercache(repo, hash):
237 237 path = usercachepath(repo.ui, hash)
238 238 if path:
239 239 link(storepath(repo, hash), path)
240 240
241 241 def getstandinmatcher(repo, rmatcher=None):
242 242 '''Return a match object that applies rmatcher to the standin directory'''
243 243 standindir = repo.wjoin(shortname)
244
245 # no warnings about missing files or directories
246 badfn = lambda f, msg: None
247
244 248 if rmatcher and not rmatcher.always():
245 249 pats = [os.path.join(standindir, pat) for pat in rmatcher.files()]
246 match = scmutil.match(repo[None], pats)
250 match = scmutil.match(repo[None], pats, badfn=badfn)
247 251 # if pats is empty, it would incorrectly always match, so clear _always
248 252 match._always = False
249 253 else:
250 254 # no patterns: relative to repo root
251 match = scmutil.match(repo[None], [standindir])
252 # no warnings about missing files or directories
253 match.bad = lambda f, msg: None
255 match = scmutil.match(repo[None], [standindir], badfn=badfn)
254 256 return match
255 257
256 258 def composestandinmatcher(repo, rmatcher):
257 259 '''Return a matcher that accepts standins corresponding to the
258 260 files accepted by rmatcher. Pass the list of files in the matcher
259 261 as the paths specified by the user.'''
260 262 smatcher = getstandinmatcher(repo, rmatcher)
261 263 isstandin = smatcher.matchfn
262 264 def composedmatchfn(f):
263 265 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
264 266 smatcher.matchfn = composedmatchfn
265 267
266 268 return smatcher
267 269
268 270 def standin(filename):
269 271 '''Return the repo-relative path to the standin for the specified big
270 272 file.'''
271 273 # Notes:
272 274 # 1) Some callers want an absolute path, but for instance addlargefiles
273 275 # needs it repo-relative so it can be passed to repo[None].add(). So
274 276 # leave it up to the caller to use repo.wjoin() to get an absolute path.
275 277 # 2) Join with '/' because that's what dirstate always uses, even on
276 278 # Windows. Change existing separator to '/' first in case we are
277 279 # passed filenames from an external source (like the command line).
278 280 return shortnameslash + util.pconvert(filename)
279 281
280 282 def isstandin(filename):
281 283 '''Return true if filename is a big file standin. filename must be
282 284 in Mercurial's internal form (slash-separated).'''
283 285 return filename.startswith(shortnameslash)
284 286
285 287 def splitstandin(filename):
286 288 # Split on / because that's what dirstate always uses, even on Windows.
287 289 # Change local separator to / first just in case we are passed filenames
288 290 # from an external source (like the command line).
289 291 bits = util.pconvert(filename).split('/', 1)
290 292 if len(bits) == 2 and bits[0] == shortname:
291 293 return bits[1]
292 294 else:
293 295 return None
294 296
295 297 def updatestandin(repo, standin):
296 298 file = repo.wjoin(splitstandin(standin))
297 299 if os.path.exists(file):
298 300 hash = hashfile(file)
299 301 executable = getexecutable(file)
300 302 writestandin(repo, standin, hash, executable)
301 303
302 304 def readstandin(repo, filename, node=None):
303 305 '''read hex hash from standin for filename at given node, or working
304 306 directory if no node is given'''
305 307 return repo[node][standin(filename)].data().strip()
306 308
307 309 def writestandin(repo, standin, hash, executable):
308 310 '''write hash to <repo.root>/<standin>'''
309 311 repo.wwrite(standin, hash + '\n', executable and 'x' or '')
310 312
311 313 def copyandhash(instream, outfile):
312 314 '''Read bytes from instream (iterable) and write them to outfile,
313 315 computing the SHA-1 hash of the data along the way. Return the hash.'''
314 316 hasher = util.sha1('')
315 317 for data in instream:
316 318 hasher.update(data)
317 319 outfile.write(data)
318 320 return hasher.hexdigest()
319 321
320 322 def hashrepofile(repo, file):
321 323 return hashfile(repo.wjoin(file))
322 324
323 325 def hashfile(file):
324 326 if not os.path.exists(file):
325 327 return ''
326 328 hasher = util.sha1('')
327 329 fd = open(file, 'rb')
328 330 for data in util.filechunkiter(fd, 128 * 1024):
329 331 hasher.update(data)
330 332 fd.close()
331 333 return hasher.hexdigest()
332 334
333 335 def getexecutable(filename):
334 336 mode = os.stat(filename).st_mode
335 337 return ((mode & stat.S_IXUSR) and
336 338 (mode & stat.S_IXGRP) and
337 339 (mode & stat.S_IXOTH))
338 340
339 341 def urljoin(first, second, *arg):
340 342 def join(left, right):
341 343 if not left.endswith('/'):
342 344 left += '/'
343 345 if right.startswith('/'):
344 346 right = right[1:]
345 347 return left + right
346 348
347 349 url = join(first, second)
348 350 for a in arg:
349 351 url = join(url, a)
350 352 return url
351 353
352 354 def hexsha1(data):
353 355 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
354 356 object data"""
355 357 h = util.sha1()
356 358 for chunk in util.filechunkiter(data):
357 359 h.update(chunk)
358 360 return h.hexdigest()
359 361
360 362 def httpsendfile(ui, filename):
361 363 return httpconnection.httpsendfile(ui, filename, 'rb')
362 364
363 365 def unixpath(path):
364 366 '''Return a version of path normalized for use with the lfdirstate.'''
365 367 return util.pconvert(os.path.normpath(path))
366 368
367 369 def islfilesrepo(repo):
368 370 if ('largefiles' in repo.requirements and
369 371 any(shortnameslash in f[0] for f in repo.store.datafiles())):
370 372 return True
371 373
372 374 return any(openlfdirstate(repo.ui, repo, False))
373 375
374 376 class storeprotonotcapable(Exception):
375 377 def __init__(self, storetypes):
376 378 self.storetypes = storetypes
377 379
378 380 def getstandinsstate(repo):
379 381 standins = []
380 382 matcher = getstandinmatcher(repo)
381 383 for standin in repo.dirstate.walk(matcher, [], False, False):
382 384 lfile = splitstandin(standin)
383 385 try:
384 386 hash = readstandin(repo, lfile)
385 387 except IOError:
386 388 hash = None
387 389 standins.append((lfile, hash))
388 390 return standins
389 391
390 392 def synclfdirstate(repo, lfdirstate, lfile, normallookup):
391 393 lfstandin = standin(lfile)
392 394 if lfstandin in repo.dirstate:
393 395 stat = repo.dirstate._map[lfstandin]
394 396 state, mtime = stat[0], stat[3]
395 397 else:
396 398 state, mtime = '?', -1
397 399 if state == 'n':
398 400 if normallookup or mtime < 0:
399 401 # state 'n' doesn't ensure 'clean' in this case
400 402 lfdirstate.normallookup(lfile)
401 403 else:
402 404 lfdirstate.normal(lfile)
403 405 elif state == 'm':
404 406 lfdirstate.normallookup(lfile)
405 407 elif state == 'r':
406 408 lfdirstate.remove(lfile)
407 409 elif state == 'a':
408 410 lfdirstate.add(lfile)
409 411 elif state == '?':
410 412 lfdirstate.drop(lfile)
411 413
412 414 def markcommitted(orig, ctx, node):
413 415 repo = ctx.repo()
414 416
415 417 orig(node)
416 418
417 419 # ATTENTION: "ctx.files()" may differ from "repo[node].files()"
418 420 # because files coming from the 2nd parent are omitted in the latter.
419 421 #
420 422 # The former should be used to get targets of "synclfdirstate",
421 423 # because such files:
422 424 # - are marked as "a" by "patch.patch()" (e.g. via transplant), and
423 425 # - have to be marked as "n" after commit, but
424 426 # - aren't listed in "repo[node].files()"
425 427
426 428 lfdirstate = openlfdirstate(repo.ui, repo)
427 429 for f in ctx.files():
428 430 if isstandin(f):
429 431 lfile = splitstandin(f)
430 432 synclfdirstate(repo, lfdirstate, lfile, False)
431 433 lfdirstate.write()
432 434
433 435 # As part of committing, copy all of the largefiles into the cache.
434 436 copyalltostore(repo, node)
435 437
436 438 def getlfilestoupdate(oldstandins, newstandins):
437 439 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
438 440 filelist = []
439 441 for f in changedstandins:
440 442 if f[0] not in filelist:
441 443 filelist.append(f[0])
442 444 return filelist
443 445
444 446 def getlfilestoupload(repo, missing, addfunc):
445 447 for i, n in enumerate(missing):
446 448 repo.ui.progress(_('finding outgoing largefiles'), i,
447 449 unit=_('revision'), total=len(missing))
448 450 parents = [p for p in repo.changelog.parents(n) if p != node.nullid]
449 451
450 452 oldlfstatus = repo.lfstatus
451 453 repo.lfstatus = False
452 454 try:
453 455 ctx = repo[n]
454 456 finally:
455 457 repo.lfstatus = oldlfstatus
456 458
457 459 files = set(ctx.files())
458 460 if len(parents) == 2:
459 461 mc = ctx.manifest()
460 462 mp1 = ctx.parents()[0].manifest()
461 463 mp2 = ctx.parents()[1].manifest()
462 464 for f in mp1:
463 465 if f not in mc:
464 466 files.add(f)
465 467 for f in mp2:
466 468 if f not in mc:
467 469 files.add(f)
468 470 for f in mc:
469 471 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
470 472 files.add(f)
471 473 for fn in files:
472 474 if isstandin(fn) and fn in ctx:
473 475 addfunc(fn, ctx[fn].data().strip())
474 476 repo.ui.progress(_('finding outgoing largefiles'), None)
475 477
476 478 def updatestandinsbymatch(repo, match):
477 479 '''Update standins in the working directory according to specified match
478 480
479 481 This returns (possibly modified) ``match`` object to be used for
480 482 subsequent commit process.
481 483 '''
482 484
483 485 ui = repo.ui
484 486
485 487 # Case 1: user calls commit with no specific files or
486 488 # include/exclude patterns: refresh and commit all files that
487 489 # are "dirty".
488 490 if match is None or match.always():
489 491 # Spend a bit of time here to get a list of files we know
490 492 # are modified so we can compare only against those.
491 493 # It can cost a lot of time (several seconds)
492 494 # otherwise to update all standins if the largefiles are
493 495 # large.
494 496 lfdirstate = openlfdirstate(ui, repo)
495 497 dirtymatch = match_.always(repo.root, repo.getcwd())
496 498 unsure, s = lfdirstate.status(dirtymatch, [], False, False,
497 499 False)
498 500 modifiedfiles = unsure + s.modified + s.added + s.removed
499 501 lfiles = listlfiles(repo)
500 502 # this only loops through largefiles that exist (not
501 503 # removed/renamed)
502 504 for lfile in lfiles:
503 505 if lfile in modifiedfiles:
504 506 if os.path.exists(
505 507 repo.wjoin(standin(lfile))):
506 508 # this handles the case where a rebase is being
507 509 # performed and the working copy is not updated
508 510 # yet.
509 511 if os.path.exists(repo.wjoin(lfile)):
510 512 updatestandin(repo,
511 513 standin(lfile))
512 514
513 515 return match
514 516
515 517 lfiles = listlfiles(repo)
516 518 match._files = repo._subdirlfs(match.files(), lfiles)
517 519
518 520 # Case 2: user calls commit with specified patterns: refresh
519 521 # any matching big files.
520 522 smatcher = composestandinmatcher(repo, match)
521 523 standins = repo.dirstate.walk(smatcher, [], False, False)
522 524
523 525 # No matching big files: get out of the way and pass control to
524 526 # the usual commit() method.
525 527 if not standins:
526 528 return match
527 529
528 530 # Refresh all matching big files. It's possible that the
529 531 # commit will end up failing, in which case the big files will
530 532 # stay refreshed. No harm done: the user modified them and
531 533 # asked to commit them, so sooner or later we're going to
532 534 # refresh the standins. Might as well leave them refreshed.
533 535 lfdirstate = openlfdirstate(ui, repo)
534 536 for fstandin in standins:
535 537 lfile = splitstandin(fstandin)
536 538 if lfdirstate[lfile] != 'r':
537 539 updatestandin(repo, fstandin)
538 540
539 541 # Cook up a new matcher that only matches regular files or
540 542 # standins corresponding to the big files requested by the
541 543 # user. Have to modify _files to prevent commit() from
542 544 # complaining "not tracked" for big files.
543 545 match = copy.copy(match)
544 546 origmatchfn = match.matchfn
545 547
546 548 # Check both the list of largefiles and the list of
547 549 # standins because if a largefile was removed, it
548 550 # won't be in the list of largefiles at this point
549 551 match._files += sorted(standins)
550 552
551 553 actualfiles = []
552 554 for f in match._files:
553 555 fstandin = standin(f)
554 556
555 557 # ignore known largefiles and standins
556 558 if f in lfiles or fstandin in standins:
557 559 continue
558 560
559 561 actualfiles.append(f)
560 562 match._files = actualfiles
561 563
562 564 def matchfn(f):
563 565 if origmatchfn(f):
564 566 return f not in lfiles
565 567 else:
566 568 return f in standins
567 569
568 570 match.matchfn = matchfn
569 571
570 572 return match
571 573
572 574 class automatedcommithook(object):
573 575 '''Stateful hook to update standins at the 1st commit of resuming
574 576
575 577 For efficiency, updating standins in the working directory should
576 578 be avoided while automated committing (like rebase, transplant and
577 579 so on), because they should be updated before committing.
578 580
579 581 But the 1st commit of resuming automated committing (e.g. ``rebase
580 582 --continue``) should update them, because largefiles may be
581 583 modified manually.
582 584 '''
583 585 def __init__(self, resuming):
584 586 self.resuming = resuming
585 587
586 588 def __call__(self, repo, match):
587 589 if self.resuming:
588 590 self.resuming = False # avoids updating at subsequent commits
589 591 return updatestandinsbymatch(repo, match)
590 592 else:
591 593 return match
592 594
593 595 def getstatuswriter(ui, repo, forcibly=None):
594 596 '''Return the function to write largefiles specific status out
595 597
596 598 If ``forcibly`` is ``None``, this returns the last element of
597 599 ``repo._lfstatuswriters`` as "default" writer function.
598 600
599 601 Otherwise, this returns the function to always write out (or
600 602 ignore if ``not forcibly``) status.
601 603 '''
602 604 if forcibly is None and util.safehasattr(repo, '_largefilesenabled'):
603 605 return repo._lfstatuswriters[-1]
604 606 else:
605 607 if forcibly:
606 608 return ui.status # forcibly WRITE OUT
607 609 else:
608 610 return lambda *msg, **opts: None # forcibly IGNORE
General Comments 0
You need to be logged in to leave comments. Login now