##// END OF EJS Templates
largefiles: avoid match.files() in conditions...
Martin von Zweigbergk -
r25293:ab618e52 default
parent child Browse files
Show More
@@ -1,606 +1,608 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''largefiles utility code: must not import other modules in this package.'''
10 10
11 11 import os
12 12 import platform
13 13 import shutil
14 14 import stat
15 15 import copy
16 16
17 17 from mercurial import dirstate, httpconnection, match as match_, util, scmutil
18 18 from mercurial.i18n import _
19 19 from mercurial import node
20 20
21 21 shortname = '.hglf'
22 22 shortnameslash = shortname + '/'
23 23 longname = 'largefiles'
24 24
25 25
26 26 # -- Private worker functions ------------------------------------------
27 27
28 28 def getminsize(ui, assumelfiles, opt, default=10):
29 29 lfsize = opt
30 30 if not lfsize and assumelfiles:
31 31 lfsize = ui.config(longname, 'minsize', default=default)
32 32 if lfsize:
33 33 try:
34 34 lfsize = float(lfsize)
35 35 except ValueError:
36 36 raise util.Abort(_('largefiles: size must be number (not %s)\n')
37 37 % lfsize)
38 38 if lfsize is None:
39 39 raise util.Abort(_('minimum size for largefiles must be specified'))
40 40 return lfsize
41 41
42 42 def link(src, dest):
43 43 util.makedirs(os.path.dirname(dest))
44 44 try:
45 45 util.oslink(src, dest)
46 46 except OSError:
47 47 # if hardlinks fail, fallback on atomic copy
48 48 dst = util.atomictempfile(dest)
49 49 for chunk in util.filechunkiter(open(src, 'rb')):
50 50 dst.write(chunk)
51 51 dst.close()
52 52 os.chmod(dest, os.stat(src).st_mode)
53 53
54 54 def usercachepath(ui, hash):
55 55 path = ui.configpath(longname, 'usercache', None)
56 56 if path:
57 57 path = os.path.join(path, hash)
58 58 else:
59 59 if os.name == 'nt':
60 60 appdata = os.getenv('LOCALAPPDATA', os.getenv('APPDATA'))
61 61 if appdata:
62 62 path = os.path.join(appdata, longname, hash)
63 63 elif platform.system() == 'Darwin':
64 64 home = os.getenv('HOME')
65 65 if home:
66 66 path = os.path.join(home, 'Library', 'Caches',
67 67 longname, hash)
68 68 elif os.name == 'posix':
69 69 path = os.getenv('XDG_CACHE_HOME')
70 70 if path:
71 71 path = os.path.join(path, longname, hash)
72 72 else:
73 73 home = os.getenv('HOME')
74 74 if home:
75 75 path = os.path.join(home, '.cache', longname, hash)
76 76 else:
77 77 raise util.Abort(_('unknown operating system: %s\n') % os.name)
78 78 return path
79 79
80 80 def inusercache(ui, hash):
81 81 path = usercachepath(ui, hash)
82 82 return path and os.path.exists(path)
83 83
84 84 def findfile(repo, hash):
85 85 path, exists = findstorepath(repo, hash)
86 86 if exists:
87 87 repo.ui.note(_('found %s in store\n') % hash)
88 88 return path
89 89 elif inusercache(repo.ui, hash):
90 90 repo.ui.note(_('found %s in system cache\n') % hash)
91 91 path = storepath(repo, hash)
92 92 link(usercachepath(repo.ui, hash), path)
93 93 return path
94 94 return None
95 95
96 96 class largefilesdirstate(dirstate.dirstate):
97 97 def __getitem__(self, key):
98 98 return super(largefilesdirstate, self).__getitem__(unixpath(key))
99 99 def normal(self, f):
100 100 return super(largefilesdirstate, self).normal(unixpath(f))
101 101 def remove(self, f):
102 102 return super(largefilesdirstate, self).remove(unixpath(f))
103 103 def add(self, f):
104 104 return super(largefilesdirstate, self).add(unixpath(f))
105 105 def drop(self, f):
106 106 return super(largefilesdirstate, self).drop(unixpath(f))
107 107 def forget(self, f):
108 108 return super(largefilesdirstate, self).forget(unixpath(f))
109 109 def normallookup(self, f):
110 110 return super(largefilesdirstate, self).normallookup(unixpath(f))
111 111 def _ignore(self, f):
112 112 return False
113 113
114 114 def openlfdirstate(ui, repo, create=True):
115 115 '''
116 116 Return a dirstate object that tracks largefiles: i.e. its root is
117 117 the repo root, but it is saved in .hg/largefiles/dirstate.
118 118 '''
119 119 lfstoredir = repo.join(longname)
120 120 opener = scmutil.opener(lfstoredir)
121 121 lfdirstate = largefilesdirstate(opener, ui, repo.root,
122 122 repo.dirstate._validate)
123 123
124 124 # If the largefiles dirstate does not exist, populate and create
125 125 # it. This ensures that we create it on the first meaningful
126 126 # largefiles operation in a new clone.
127 127 if create and not os.path.exists(os.path.join(lfstoredir, 'dirstate')):
128 128 matcher = getstandinmatcher(repo)
129 129 standins = repo.dirstate.walk(matcher, [], False, False)
130 130
131 131 if len(standins) > 0:
132 132 util.makedirs(lfstoredir)
133 133
134 134 for standin in standins:
135 135 lfile = splitstandin(standin)
136 136 lfdirstate.normallookup(lfile)
137 137 return lfdirstate
138 138
139 139 def lfdirstatestatus(lfdirstate, repo):
140 140 wctx = repo['.']
141 141 match = match_.always(repo.root, repo.getcwd())
142 142 unsure, s = lfdirstate.status(match, [], False, False, False)
143 143 modified, clean = s.modified, s.clean
144 144 for lfile in unsure:
145 145 try:
146 146 fctx = wctx[standin(lfile)]
147 147 except LookupError:
148 148 fctx = None
149 149 if not fctx or fctx.data().strip() != hashfile(repo.wjoin(lfile)):
150 150 modified.append(lfile)
151 151 else:
152 152 clean.append(lfile)
153 153 lfdirstate.normal(lfile)
154 154 return s
155 155
156 156 def listlfiles(repo, rev=None, matcher=None):
157 157 '''return a list of largefiles in the working copy or the
158 158 specified changeset'''
159 159
160 160 if matcher is None:
161 161 matcher = getstandinmatcher(repo)
162 162
163 163 # ignore unknown files in working directory
164 164 return [splitstandin(f)
165 165 for f in repo[rev].walk(matcher)
166 166 if rev is not None or repo.dirstate[f] != '?']
167 167
168 168 def instore(repo, hash, forcelocal=False):
169 169 return os.path.exists(storepath(repo, hash, forcelocal))
170 170
171 171 def storepath(repo, hash, forcelocal=False):
172 172 if not forcelocal and repo.shared():
173 173 return repo.vfs.reljoin(repo.sharedpath, longname, hash)
174 174 return repo.join(longname, hash)
175 175
176 176 def findstorepath(repo, hash):
177 177 '''Search through the local store path(s) to find the file for the given
178 178 hash. If the file is not found, its path in the primary store is returned.
179 179 The return value is a tuple of (path, exists(path)).
180 180 '''
181 181 # For shared repos, the primary store is in the share source. But for
182 182 # backward compatibility, force a lookup in the local store if it wasn't
183 183 # found in the share source.
184 184 path = storepath(repo, hash, False)
185 185
186 186 if instore(repo, hash):
187 187 return (path, True)
188 188 elif repo.shared() and instore(repo, hash, True):
189 189 return storepath(repo, hash, True)
190 190
191 191 return (path, False)
192 192
193 193 def copyfromcache(repo, hash, filename):
194 194 '''Copy the specified largefile from the repo or system cache to
195 195 filename in the repository. Return true on success or false if the
196 196 file was not found in either cache (which should not happened:
197 197 this is meant to be called only after ensuring that the needed
198 198 largefile exists in the cache).'''
199 199 path = findfile(repo, hash)
200 200 if path is None:
201 201 return False
202 202 util.makedirs(os.path.dirname(repo.wjoin(filename)))
203 203 # The write may fail before the file is fully written, but we
204 204 # don't use atomic writes in the working copy.
205 205 shutil.copy(path, repo.wjoin(filename))
206 206 return True
207 207
208 208 def copytostore(repo, rev, file, uploaded=False):
209 209 hash = readstandin(repo, file, rev)
210 210 if instore(repo, hash):
211 211 return
212 212 copytostoreabsolute(repo, repo.wjoin(file), hash)
213 213
214 214 def copyalltostore(repo, node):
215 215 '''Copy all largefiles in a given revision to the store'''
216 216
217 217 ctx = repo[node]
218 218 for filename in ctx.files():
219 219 if isstandin(filename) and filename in ctx.manifest():
220 220 realfile = splitstandin(filename)
221 221 copytostore(repo, ctx.node(), realfile)
222 222
223 223
224 224 def copytostoreabsolute(repo, file, hash):
225 225 if inusercache(repo.ui, hash):
226 226 link(usercachepath(repo.ui, hash), storepath(repo, hash))
227 227 else:
228 228 util.makedirs(os.path.dirname(storepath(repo, hash)))
229 229 dst = util.atomictempfile(storepath(repo, hash),
230 230 createmode=repo.store.createmode)
231 231 for chunk in util.filechunkiter(open(file, 'rb')):
232 232 dst.write(chunk)
233 233 dst.close()
234 234 linktousercache(repo, hash)
235 235
236 236 def linktousercache(repo, hash):
237 237 path = usercachepath(repo.ui, hash)
238 238 if path:
239 239 link(storepath(repo, hash), path)
240 240
241 241 def getstandinmatcher(repo, rmatcher=None):
242 242 '''Return a match object that applies rmatcher to the standin directory'''
243 243 standindir = repo.wjoin(shortname)
244 if rmatcher and rmatcher.files():
244 if rmatcher and not rmatcher.always():
245 245 pats = [os.path.join(standindir, pat) for pat in rmatcher.files()]
246 match = scmutil.match(repo[None], pats)
247 # if pats is empty, it would incorrectly always match, so clear _always
248 match._always = False
246 249 else:
247 250 # no patterns: relative to repo root
248 pats = [standindir]
251 match = scmutil.match(repo[None], [standindir])
249 252 # no warnings about missing files or directories
250 match = scmutil.match(repo[None], pats)
251 253 match.bad = lambda f, msg: None
252 254 return match
253 255
254 256 def composestandinmatcher(repo, rmatcher):
255 257 '''Return a matcher that accepts standins corresponding to the
256 258 files accepted by rmatcher. Pass the list of files in the matcher
257 259 as the paths specified by the user.'''
258 260 smatcher = getstandinmatcher(repo, rmatcher)
259 261 isstandin = smatcher.matchfn
260 262 def composedmatchfn(f):
261 263 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
262 264 smatcher.matchfn = composedmatchfn
263 265
264 266 return smatcher
265 267
266 268 def standin(filename):
267 269 '''Return the repo-relative path to the standin for the specified big
268 270 file.'''
269 271 # Notes:
270 272 # 1) Some callers want an absolute path, but for instance addlargefiles
271 273 # needs it repo-relative so it can be passed to repo[None].add(). So
272 274 # leave it up to the caller to use repo.wjoin() to get an absolute path.
273 275 # 2) Join with '/' because that's what dirstate always uses, even on
274 276 # Windows. Change existing separator to '/' first in case we are
275 277 # passed filenames from an external source (like the command line).
276 278 return shortnameslash + util.pconvert(filename)
277 279
278 280 def isstandin(filename):
279 281 '''Return true if filename is a big file standin. filename must be
280 282 in Mercurial's internal form (slash-separated).'''
281 283 return filename.startswith(shortnameslash)
282 284
283 285 def splitstandin(filename):
284 286 # Split on / because that's what dirstate always uses, even on Windows.
285 287 # Change local separator to / first just in case we are passed filenames
286 288 # from an external source (like the command line).
287 289 bits = util.pconvert(filename).split('/', 1)
288 290 if len(bits) == 2 and bits[0] == shortname:
289 291 return bits[1]
290 292 else:
291 293 return None
292 294
293 295 def updatestandin(repo, standin):
294 296 file = repo.wjoin(splitstandin(standin))
295 297 if os.path.exists(file):
296 298 hash = hashfile(file)
297 299 executable = getexecutable(file)
298 300 writestandin(repo, standin, hash, executable)
299 301
300 302 def readstandin(repo, filename, node=None):
301 303 '''read hex hash from standin for filename at given node, or working
302 304 directory if no node is given'''
303 305 return repo[node][standin(filename)].data().strip()
304 306
305 307 def writestandin(repo, standin, hash, executable):
306 308 '''write hash to <repo.root>/<standin>'''
307 309 repo.wwrite(standin, hash + '\n', executable and 'x' or '')
308 310
309 311 def copyandhash(instream, outfile):
310 312 '''Read bytes from instream (iterable) and write them to outfile,
311 313 computing the SHA-1 hash of the data along the way. Return the hash.'''
312 314 hasher = util.sha1('')
313 315 for data in instream:
314 316 hasher.update(data)
315 317 outfile.write(data)
316 318 return hasher.hexdigest()
317 319
318 320 def hashrepofile(repo, file):
319 321 return hashfile(repo.wjoin(file))
320 322
321 323 def hashfile(file):
322 324 if not os.path.exists(file):
323 325 return ''
324 326 hasher = util.sha1('')
325 327 fd = open(file, 'rb')
326 328 for data in util.filechunkiter(fd, 128 * 1024):
327 329 hasher.update(data)
328 330 fd.close()
329 331 return hasher.hexdigest()
330 332
331 333 def getexecutable(filename):
332 334 mode = os.stat(filename).st_mode
333 335 return ((mode & stat.S_IXUSR) and
334 336 (mode & stat.S_IXGRP) and
335 337 (mode & stat.S_IXOTH))
336 338
337 339 def urljoin(first, second, *arg):
338 340 def join(left, right):
339 341 if not left.endswith('/'):
340 342 left += '/'
341 343 if right.startswith('/'):
342 344 right = right[1:]
343 345 return left + right
344 346
345 347 url = join(first, second)
346 348 for a in arg:
347 349 url = join(url, a)
348 350 return url
349 351
350 352 def hexsha1(data):
351 353 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
352 354 object data"""
353 355 h = util.sha1()
354 356 for chunk in util.filechunkiter(data):
355 357 h.update(chunk)
356 358 return h.hexdigest()
357 359
358 360 def httpsendfile(ui, filename):
359 361 return httpconnection.httpsendfile(ui, filename, 'rb')
360 362
361 363 def unixpath(path):
362 364 '''Return a version of path normalized for use with the lfdirstate.'''
363 365 return util.pconvert(os.path.normpath(path))
364 366
365 367 def islfilesrepo(repo):
366 368 if ('largefiles' in repo.requirements and
367 369 any(shortnameslash in f[0] for f in repo.store.datafiles())):
368 370 return True
369 371
370 372 return any(openlfdirstate(repo.ui, repo, False))
371 373
372 374 class storeprotonotcapable(Exception):
373 375 def __init__(self, storetypes):
374 376 self.storetypes = storetypes
375 377
376 378 def getstandinsstate(repo):
377 379 standins = []
378 380 matcher = getstandinmatcher(repo)
379 381 for standin in repo.dirstate.walk(matcher, [], False, False):
380 382 lfile = splitstandin(standin)
381 383 try:
382 384 hash = readstandin(repo, lfile)
383 385 except IOError:
384 386 hash = None
385 387 standins.append((lfile, hash))
386 388 return standins
387 389
388 390 def synclfdirstate(repo, lfdirstate, lfile, normallookup):
389 391 lfstandin = standin(lfile)
390 392 if lfstandin in repo.dirstate:
391 393 stat = repo.dirstate._map[lfstandin]
392 394 state, mtime = stat[0], stat[3]
393 395 else:
394 396 state, mtime = '?', -1
395 397 if state == 'n':
396 398 if normallookup or mtime < 0:
397 399 # state 'n' doesn't ensure 'clean' in this case
398 400 lfdirstate.normallookup(lfile)
399 401 else:
400 402 lfdirstate.normal(lfile)
401 403 elif state == 'm':
402 404 lfdirstate.normallookup(lfile)
403 405 elif state == 'r':
404 406 lfdirstate.remove(lfile)
405 407 elif state == 'a':
406 408 lfdirstate.add(lfile)
407 409 elif state == '?':
408 410 lfdirstate.drop(lfile)
409 411
410 412 def markcommitted(orig, ctx, node):
411 413 repo = ctx.repo()
412 414
413 415 orig(node)
414 416
415 417 # ATTENTION: "ctx.files()" may differ from "repo[node].files()"
416 418 # because files coming from the 2nd parent are omitted in the latter.
417 419 #
418 420 # The former should be used to get targets of "synclfdirstate",
419 421 # because such files:
420 422 # - are marked as "a" by "patch.patch()" (e.g. via transplant), and
421 423 # - have to be marked as "n" after commit, but
422 424 # - aren't listed in "repo[node].files()"
423 425
424 426 lfdirstate = openlfdirstate(repo.ui, repo)
425 427 for f in ctx.files():
426 428 if isstandin(f):
427 429 lfile = splitstandin(f)
428 430 synclfdirstate(repo, lfdirstate, lfile, False)
429 431 lfdirstate.write()
430 432
431 433 # As part of committing, copy all of the largefiles into the cache.
432 434 copyalltostore(repo, node)
433 435
434 436 def getlfilestoupdate(oldstandins, newstandins):
435 437 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
436 438 filelist = []
437 439 for f in changedstandins:
438 440 if f[0] not in filelist:
439 441 filelist.append(f[0])
440 442 return filelist
441 443
442 444 def getlfilestoupload(repo, missing, addfunc):
443 445 for i, n in enumerate(missing):
444 446 repo.ui.progress(_('finding outgoing largefiles'), i,
445 447 unit=_('revision'), total=len(missing))
446 448 parents = [p for p in repo.changelog.parents(n) if p != node.nullid]
447 449
448 450 oldlfstatus = repo.lfstatus
449 451 repo.lfstatus = False
450 452 try:
451 453 ctx = repo[n]
452 454 finally:
453 455 repo.lfstatus = oldlfstatus
454 456
455 457 files = set(ctx.files())
456 458 if len(parents) == 2:
457 459 mc = ctx.manifest()
458 460 mp1 = ctx.parents()[0].manifest()
459 461 mp2 = ctx.parents()[1].manifest()
460 462 for f in mp1:
461 463 if f not in mc:
462 464 files.add(f)
463 465 for f in mp2:
464 466 if f not in mc:
465 467 files.add(f)
466 468 for f in mc:
467 469 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
468 470 files.add(f)
469 471 for fn in files:
470 472 if isstandin(fn) and fn in ctx:
471 473 addfunc(fn, ctx[fn].data().strip())
472 474 repo.ui.progress(_('finding outgoing largefiles'), None)
473 475
474 476 def updatestandinsbymatch(repo, match):
475 477 '''Update standins in the working directory according to specified match
476 478
477 479 This returns (possibly modified) ``match`` object to be used for
478 480 subsequent commit process.
479 481 '''
480 482
481 483 ui = repo.ui
482 484
483 485 # Case 1: user calls commit with no specific files or
484 486 # include/exclude patterns: refresh and commit all files that
485 487 # are "dirty".
486 488 if match is None or match.always():
487 489 # Spend a bit of time here to get a list of files we know
488 490 # are modified so we can compare only against those.
489 491 # It can cost a lot of time (several seconds)
490 492 # otherwise to update all standins if the largefiles are
491 493 # large.
492 494 lfdirstate = openlfdirstate(ui, repo)
493 495 dirtymatch = match_.always(repo.root, repo.getcwd())
494 496 unsure, s = lfdirstate.status(dirtymatch, [], False, False,
495 497 False)
496 498 modifiedfiles = unsure + s.modified + s.added + s.removed
497 499 lfiles = listlfiles(repo)
498 500 # this only loops through largefiles that exist (not
499 501 # removed/renamed)
500 502 for lfile in lfiles:
501 503 if lfile in modifiedfiles:
502 504 if os.path.exists(
503 505 repo.wjoin(standin(lfile))):
504 506 # this handles the case where a rebase is being
505 507 # performed and the working copy is not updated
506 508 # yet.
507 509 if os.path.exists(repo.wjoin(lfile)):
508 510 updatestandin(repo,
509 511 standin(lfile))
510 512
511 513 return match
512 514
513 515 lfiles = listlfiles(repo)
514 516 match._files = repo._subdirlfs(match.files(), lfiles)
515 517
516 518 # Case 2: user calls commit with specified patterns: refresh
517 519 # any matching big files.
518 520 smatcher = composestandinmatcher(repo, match)
519 521 standins = repo.dirstate.walk(smatcher, [], False, False)
520 522
521 523 # No matching big files: get out of the way and pass control to
522 524 # the usual commit() method.
523 525 if not standins:
524 526 return match
525 527
526 528 # Refresh all matching big files. It's possible that the
527 529 # commit will end up failing, in which case the big files will
528 530 # stay refreshed. No harm done: the user modified them and
529 531 # asked to commit them, so sooner or later we're going to
530 532 # refresh the standins. Might as well leave them refreshed.
531 533 lfdirstate = openlfdirstate(ui, repo)
532 534 for fstandin in standins:
533 535 lfile = splitstandin(fstandin)
534 536 if lfdirstate[lfile] != 'r':
535 537 updatestandin(repo, fstandin)
536 538
537 539 # Cook up a new matcher that only matches regular files or
538 540 # standins corresponding to the big files requested by the
539 541 # user. Have to modify _files to prevent commit() from
540 542 # complaining "not tracked" for big files.
541 543 match = copy.copy(match)
542 544 origmatchfn = match.matchfn
543 545
544 546 # Check both the list of largefiles and the list of
545 547 # standins because if a largefile was removed, it
546 548 # won't be in the list of largefiles at this point
547 549 match._files += sorted(standins)
548 550
549 551 actualfiles = []
550 552 for f in match._files:
551 553 fstandin = standin(f)
552 554
553 555 # ignore known largefiles and standins
554 556 if f in lfiles or fstandin in standins:
555 557 continue
556 558
557 559 actualfiles.append(f)
558 560 match._files = actualfiles
559 561
560 562 def matchfn(f):
561 563 if origmatchfn(f):
562 564 return f not in lfiles
563 565 else:
564 566 return f in standins
565 567
566 568 match.matchfn = matchfn
567 569
568 570 return match
569 571
570 572 class automatedcommithook(object):
571 573 '''Stateful hook to update standins at the 1st commit of resuming
572 574
573 575 For efficiency, updating standins in the working directory should
574 576 be avoided while automated committing (like rebase, transplant and
575 577 so on), because they should be updated before committing.
576 578
577 579 But the 1st commit of resuming automated committing (e.g. ``rebase
578 580 --continue``) should update them, because largefiles may be
579 581 modified manually.
580 582 '''
581 583 def __init__(self, resuming):
582 584 self.resuming = resuming
583 585
584 586 def __call__(self, repo, match):
585 587 if self.resuming:
586 588 self.resuming = False # avoids updating at subsequent commits
587 589 return updatestandinsbymatch(repo, match)
588 590 else:
589 591 return match
590 592
591 593 def getstatuswriter(ui, repo, forcibly=None):
592 594 '''Return the function to write largefiles specific status out
593 595
594 596 If ``forcibly`` is ``None``, this returns the last element of
595 597 ``repo._lfstatuswriters`` as "default" writer function.
596 598
597 599 Otherwise, this returns the function to always write out (or
598 600 ignore if ``not forcibly``) status.
599 601 '''
600 602 if forcibly is None and util.safehasattr(repo, '_largefilesenabled'):
601 603 return repo._lfstatuswriters[-1]
602 604 else:
603 605 if forcibly:
604 606 return ui.status # forcibly WRITE OUT
605 607 else:
606 608 return lambda *msg, **opts: None # forcibly IGNORE
General Comments 0
You need to be logged in to leave comments. Login now