##// END OF EJS Templates
largefiles: ensure destination directory exists before findfile links to there...
Hao Lian -
r15408:db8b0ee7 stable
parent child Browse files
Show More
@@ -1,451 +1,453 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''largefiles utility code: must not import other modules in this package.'''
10 10
11 11 import os
12 12 import errno
13 13 import platform
14 14 import shutil
15 15 import stat
16 16 import tempfile
17 17
18 18 from mercurial import dirstate, httpconnection, match as match_, util, scmutil
19 19 from mercurial.i18n import _
20 20
21 21 shortname = '.hglf'
22 22 longname = 'largefiles'
23 23
24 24
25 25 # -- Portability wrappers ----------------------------------------------
26 26
27 27 def dirstate_walk(dirstate, matcher, unknown=False, ignored=False):
28 28 return dirstate.walk(matcher, [], unknown, ignored)
29 29
30 30 def repo_add(repo, list):
31 31 add = repo[None].add
32 32 return add(list)
33 33
34 34 def repo_remove(repo, list, unlink=False):
35 35 def remove(list, unlink):
36 36 wlock = repo.wlock()
37 37 try:
38 38 if unlink:
39 39 for f in list:
40 40 try:
41 41 util.unlinkpath(repo.wjoin(f))
42 42 except OSError, inst:
43 43 if inst.errno != errno.ENOENT:
44 44 raise
45 45 repo[None].forget(list)
46 46 finally:
47 47 wlock.release()
48 48 return remove(list, unlink=unlink)
49 49
50 50 def repo_forget(repo, list):
51 51 forget = repo[None].forget
52 52 return forget(list)
53 53
54 54 def findoutgoing(repo, remote, force):
55 55 from mercurial import discovery
56 56 common, _anyinc, _heads = discovery.findcommonincoming(repo,
57 57 remote, force=force)
58 58 return repo.changelog.findmissing(common)
59 59
60 60 # -- Private worker functions ------------------------------------------
61 61
62 62 def getminsize(ui, assumelfiles, opt, default=10):
63 63 lfsize = opt
64 64 if not lfsize and assumelfiles:
65 65 lfsize = ui.config(longname, 'minsize', default=default)
66 66 if lfsize:
67 67 try:
68 68 lfsize = float(lfsize)
69 69 except ValueError:
70 70 raise util.Abort(_('largefiles: size must be number (not %s)\n')
71 71 % lfsize)
72 72 if lfsize is None:
73 73 raise util.Abort(_('minimum size for largefiles must be specified'))
74 74 return lfsize
75 75
76 76 def link(src, dest):
77 77 try:
78 78 util.oslink(src, dest)
79 79 except OSError:
80 80 # if hardlinks fail, fallback on copy
81 81 shutil.copyfile(src, dest)
82 82 os.chmod(dest, os.stat(src).st_mode)
83 83
84 84 def usercachepath(ui, hash):
85 85 path = ui.configpath(longname, 'usercache', None)
86 86 if path:
87 87 path = os.path.join(path, hash)
88 88 else:
89 89 if os.name == 'nt':
90 90 appdata = os.getenv('LOCALAPPDATA', os.getenv('APPDATA'))
91 91 path = os.path.join(appdata, longname, hash)
92 92 elif platform.system() == 'Darwin':
93 93 path = os.path.join(os.getenv('HOME'), 'Library', 'Caches',
94 94 longname, hash)
95 95 elif os.name == 'posix':
96 96 path = os.getenv('XDG_CACHE_HOME')
97 97 if path:
98 98 path = os.path.join(path, longname, hash)
99 99 else:
100 100 path = os.path.join(os.getenv('HOME'), '.cache', longname, hash)
101 101 else:
102 102 raise util.Abort(_('unknown operating system: %s\n') % os.name)
103 103 return path
104 104
105 105 def inusercache(ui, hash):
106 106 return os.path.exists(usercachepath(ui, hash))
107 107
108 108 def findfile(repo, hash):
109 109 if instore(repo, hash):
110 110 repo.ui.note(_('Found %s in store\n') % hash)
111 111 elif inusercache(repo.ui, hash):
112 112 repo.ui.note(_('Found %s in system cache\n') % hash)
113 link(usercachepath(repo.ui, hash), storepath(repo, hash))
113 path = storepath(repo, hash)
114 util.makedirs(os.path.dirname(path))
115 link(usercachepath(repo.ui, hash), path)
114 116 else:
115 117 return None
116 118 return storepath(repo, hash)
117 119
118 120 class largefiles_dirstate(dirstate.dirstate):
119 121 def __getitem__(self, key):
120 122 return super(largefiles_dirstate, self).__getitem__(unixpath(key))
121 123 def normal(self, f):
122 124 return super(largefiles_dirstate, self).normal(unixpath(f))
123 125 def remove(self, f):
124 126 return super(largefiles_dirstate, self).remove(unixpath(f))
125 127 def add(self, f):
126 128 return super(largefiles_dirstate, self).add(unixpath(f))
127 129 def drop(self, f):
128 130 return super(largefiles_dirstate, self).drop(unixpath(f))
129 131 def forget(self, f):
130 132 return super(largefiles_dirstate, self).forget(unixpath(f))
131 133
132 134 def openlfdirstate(ui, repo):
133 135 '''
134 136 Return a dirstate object that tracks largefiles: i.e. its root is
135 137 the repo root, but it is saved in .hg/largefiles/dirstate.
136 138 '''
137 139 admin = repo.join(longname)
138 140 opener = scmutil.opener(admin)
139 141 lfdirstate = largefiles_dirstate(opener, ui, repo.root,
140 142 repo.dirstate._validate)
141 143
142 144 # If the largefiles dirstate does not exist, populate and create
143 145 # it. This ensures that we create it on the first meaningful
144 146 # largefiles operation in a new clone. It also gives us an easy
145 147 # way to forcibly rebuild largefiles state:
146 148 # rm .hg/largefiles/dirstate && hg status
147 149 # Or even, if things are really messed up:
148 150 # rm -rf .hg/largefiles && hg status
149 151 if not os.path.exists(os.path.join(admin, 'dirstate')):
150 152 util.makedirs(admin)
151 153 matcher = getstandinmatcher(repo)
152 154 for standin in dirstate_walk(repo.dirstate, matcher):
153 155 lfile = splitstandin(standin)
154 156 hash = readstandin(repo, lfile)
155 157 lfdirstate.normallookup(lfile)
156 158 try:
157 159 if hash == hashfile(lfile):
158 160 lfdirstate.normal(lfile)
159 161 except IOError, err:
160 162 if err.errno != errno.ENOENT:
161 163 raise
162 164
163 165 lfdirstate.write()
164 166
165 167 return lfdirstate
166 168
167 169 def lfdirstate_status(lfdirstate, repo, rev):
168 170 wlock = repo.wlock()
169 171 try:
170 172 match = match_.always(repo.root, repo.getcwd())
171 173 s = lfdirstate.status(match, [], False, False, False)
172 174 unsure, modified, added, removed, missing, unknown, ignored, clean = s
173 175 for lfile in unsure:
174 176 if repo[rev][standin(lfile)].data().strip() != \
175 177 hashfile(repo.wjoin(lfile)):
176 178 modified.append(lfile)
177 179 else:
178 180 clean.append(lfile)
179 181 lfdirstate.normal(lfile)
180 182 lfdirstate.write()
181 183 finally:
182 184 wlock.release()
183 185 return (modified, added, removed, missing, unknown, ignored, clean)
184 186
185 187 def listlfiles(repo, rev=None, matcher=None):
186 188 '''return a list of largefiles in the working copy or the
187 189 specified changeset'''
188 190
189 191 if matcher is None:
190 192 matcher = getstandinmatcher(repo)
191 193
192 194 # ignore unknown files in working directory
193 195 return [splitstandin(f)
194 196 for f in repo[rev].walk(matcher)
195 197 if rev is not None or repo.dirstate[f] != '?']
196 198
197 199 def instore(repo, hash):
198 200 return os.path.exists(storepath(repo, hash))
199 201
200 202 def storepath(repo, hash):
201 203 return repo.join(os.path.join(longname, hash))
202 204
203 205 def copyfromcache(repo, hash, filename):
204 206 '''Copy the specified largefile from the repo or system cache to
205 207 filename in the repository. Return true on success or false if the
206 208 file was not found in either cache (which should not happened:
207 209 this is meant to be called only after ensuring that the needed
208 210 largefile exists in the cache).'''
209 211 path = findfile(repo, hash)
210 212 if path is None:
211 213 return False
212 214 util.makedirs(os.path.dirname(repo.wjoin(filename)))
213 215 shutil.copy(path, repo.wjoin(filename))
214 216 return True
215 217
216 218 def copytostore(repo, rev, file, uploaded=False):
217 219 hash = readstandin(repo, file)
218 220 if instore(repo, hash):
219 221 return
220 222 copytostoreabsolute(repo, repo.wjoin(file), hash)
221 223
222 224 def copytostoreabsolute(repo, file, hash):
223 225 util.makedirs(os.path.dirname(storepath(repo, hash)))
224 226 if inusercache(repo.ui, hash):
225 227 link(usercachepath(repo.ui, hash), storepath(repo, hash))
226 228 else:
227 229 shutil.copyfile(file, storepath(repo, hash))
228 230 os.chmod(storepath(repo, hash), os.stat(file).st_mode)
229 231 linktousercache(repo, hash)
230 232
231 233 def linktousercache(repo, hash):
232 234 util.makedirs(os.path.dirname(usercachepath(repo.ui, hash)))
233 235 link(storepath(repo, hash), usercachepath(repo.ui, hash))
234 236
235 237 def getstandinmatcher(repo, pats=[], opts={}):
236 238 '''Return a match object that applies pats to the standin directory'''
237 239 standindir = repo.pathto(shortname)
238 240 if pats:
239 241 # patterns supplied: search standin directory relative to current dir
240 242 cwd = repo.getcwd()
241 243 if os.path.isabs(cwd):
242 244 # cwd is an absolute path for hg -R <reponame>
243 245 # work relative to the repository root in this case
244 246 cwd = ''
245 247 pats = [os.path.join(standindir, cwd, pat) for pat in pats]
246 248 elif os.path.isdir(standindir):
247 249 # no patterns: relative to repo root
248 250 pats = [standindir]
249 251 else:
250 252 # no patterns and no standin dir: return matcher that matches nothing
251 253 match = match_.match(repo.root, None, [], exact=True)
252 254 match.matchfn = lambda f: False
253 255 return match
254 256 return getmatcher(repo, pats, opts, showbad=False)
255 257
256 258 def getmatcher(repo, pats=[], opts={}, showbad=True):
257 259 '''Wrapper around scmutil.match() that adds showbad: if false,
258 260 neuter the match object's bad() method so it does not print any
259 261 warnings about missing files or directories.'''
260 262 match = scmutil.match(repo[None], pats, opts)
261 263
262 264 if not showbad:
263 265 match.bad = lambda f, msg: None
264 266 return match
265 267
266 268 def composestandinmatcher(repo, rmatcher):
267 269 '''Return a matcher that accepts standins corresponding to the
268 270 files accepted by rmatcher. Pass the list of files in the matcher
269 271 as the paths specified by the user.'''
270 272 smatcher = getstandinmatcher(repo, rmatcher.files())
271 273 isstandin = smatcher.matchfn
272 274 def composed_matchfn(f):
273 275 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
274 276 smatcher.matchfn = composed_matchfn
275 277
276 278 return smatcher
277 279
278 280 def standin(filename):
279 281 '''Return the repo-relative path to the standin for the specified big
280 282 file.'''
281 283 # Notes:
282 284 # 1) Most callers want an absolute path, but _create_standin() needs
283 285 # it repo-relative so lfadd() can pass it to repo_add(). So leave
284 286 # it up to the caller to use repo.wjoin() to get an absolute path.
285 287 # 2) Join with '/' because that's what dirstate always uses, even on
286 288 # Windows. Change existing separator to '/' first in case we are
287 289 # passed filenames from an external source (like the command line).
288 290 return shortname + '/' + filename.replace(os.sep, '/')
289 291
290 292 def isstandin(filename):
291 293 '''Return true if filename is a big file standin. filename must be
292 294 in Mercurial's internal form (slash-separated).'''
293 295 return filename.startswith(shortname + '/')
294 296
295 297 def splitstandin(filename):
296 298 # Split on / because that's what dirstate always uses, even on Windows.
297 299 # Change local separator to / first just in case we are passed filenames
298 300 # from an external source (like the command line).
299 301 bits = filename.replace(os.sep, '/').split('/', 1)
300 302 if len(bits) == 2 and bits[0] == shortname:
301 303 return bits[1]
302 304 else:
303 305 return None
304 306
305 307 def updatestandin(repo, standin):
306 308 file = repo.wjoin(splitstandin(standin))
307 309 if os.path.exists(file):
308 310 hash = hashfile(file)
309 311 executable = getexecutable(file)
310 312 writestandin(repo, standin, hash, executable)
311 313
312 314 def readstandin(repo, filename, node=None):
313 315 '''read hex hash from standin for filename at given node, or working
314 316 directory if no node is given'''
315 317 return repo[node][standin(filename)].data().strip()
316 318
317 319 def writestandin(repo, standin, hash, executable):
318 320 '''write hash to <repo.root>/<standin>'''
319 321 writehash(hash, repo.wjoin(standin), executable)
320 322
321 323 def copyandhash(instream, outfile):
322 324 '''Read bytes from instream (iterable) and write them to outfile,
323 325 computing the SHA-1 hash of the data along the way. Close outfile
324 326 when done and return the binary hash.'''
325 327 hasher = util.sha1('')
326 328 for data in instream:
327 329 hasher.update(data)
328 330 outfile.write(data)
329 331
330 332 # Blecch: closing a file that somebody else opened is rude and
331 333 # wrong. But it's so darn convenient and practical! After all,
332 334 # outfile was opened just to copy and hash.
333 335 outfile.close()
334 336
335 337 return hasher.digest()
336 338
337 339 def hashrepofile(repo, file):
338 340 return hashfile(repo.wjoin(file))
339 341
340 342 def hashfile(file):
341 343 if not os.path.exists(file):
342 344 return ''
343 345 hasher = util.sha1('')
344 346 fd = open(file, 'rb')
345 347 for data in blockstream(fd):
346 348 hasher.update(data)
347 349 fd.close()
348 350 return hasher.hexdigest()
349 351
350 352 class limitreader(object):
351 353 def __init__(self, f, limit):
352 354 self.f = f
353 355 self.limit = limit
354 356
355 357 def read(self, length):
356 358 if self.limit == 0:
357 359 return ''
358 360 length = length > self.limit and self.limit or length
359 361 self.limit -= length
360 362 return self.f.read(length)
361 363
362 364 def close(self):
363 365 pass
364 366
365 367 def blockstream(infile, blocksize=128 * 1024):
366 368 """Generator that yields blocks of data from infile and closes infile."""
367 369 while True:
368 370 data = infile.read(blocksize)
369 371 if not data:
370 372 break
371 373 yield data
372 374 # same blecch as copyandhash() above
373 375 infile.close()
374 376
375 377 def readhash(filename):
376 378 rfile = open(filename, 'rb')
377 379 hash = rfile.read(40)
378 380 rfile.close()
379 381 if len(hash) < 40:
380 382 raise util.Abort(_('bad hash in \'%s\' (only %d bytes long)')
381 383 % (filename, len(hash)))
382 384 return hash
383 385
384 386 def writehash(hash, filename, executable):
385 387 util.makedirs(os.path.dirname(filename))
386 388 if os.path.exists(filename):
387 389 os.unlink(filename)
388 390 wfile = open(filename, 'wb')
389 391
390 392 try:
391 393 wfile.write(hash)
392 394 wfile.write('\n')
393 395 finally:
394 396 wfile.close()
395 397 if os.path.exists(filename):
396 398 os.chmod(filename, getmode(executable))
397 399
398 400 def getexecutable(filename):
399 401 mode = os.stat(filename).st_mode
400 402 return ((mode & stat.S_IXUSR) and
401 403 (mode & stat.S_IXGRP) and
402 404 (mode & stat.S_IXOTH))
403 405
404 406 def getmode(executable):
405 407 if executable:
406 408 return 0755
407 409 else:
408 410 return 0644
409 411
410 412 def urljoin(first, second, *arg):
411 413 def join(left, right):
412 414 if not left.endswith('/'):
413 415 left += '/'
414 416 if right.startswith('/'):
415 417 right = right[1:]
416 418 return left + right
417 419
418 420 url = join(first, second)
419 421 for a in arg:
420 422 url = join(url, a)
421 423 return url
422 424
423 425 def hexsha1(data):
424 426 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
425 427 object data"""
426 428 h = util.sha1()
427 429 for chunk in util.filechunkiter(data):
428 430 h.update(chunk)
429 431 return h.hexdigest()
430 432
431 433 def httpsendfile(ui, filename):
432 434 return httpconnection.httpsendfile(ui, filename, 'rb')
433 435
434 436 def unixpath(path):
435 437 '''Return a version of path normalized for use with the lfdirstate.'''
436 438 return os.path.normpath(path).replace(os.sep, '/')
437 439
438 440 def islfilesrepo(repo):
439 441 return ('largefiles' in repo.requirements and
440 442 util.any(shortname + '/' in f[0] for f in repo.store.datafiles()))
441 443
442 444 def mkstemp(repo, prefix):
443 445 '''Returns a file descriptor and a filename corresponding to a temporary
444 446 file in the repo's largefiles store.'''
445 447 path = repo.join(longname)
446 448 util.makedirs(path)
447 449 return tempfile.mkstemp(prefix=prefix, dir=path)
448 450
449 451 class storeprotonotcapable(Exception):
450 452 def __init__(self, storetypes):
451 453 self.storetypes = storetypes
General Comments 0
You need to be logged in to leave comments. Login now