##// END OF EJS Templates
merge with stable
Matt Mackall -
r15660:c7b0bedb merge default
parent child Browse files
Show More
@@ -1,452 +1,460
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''largefiles utility code: must not import other modules in this package.'''
10 10
11 11 import os
12 12 import errno
13 13 import platform
14 14 import shutil
15 15 import stat
16 16 import tempfile
17 17
18 18 from mercurial import dirstate, httpconnection, match as match_, util, scmutil
19 19 from mercurial.i18n import _
20 20
21 21 shortname = '.hglf'
22 22 longname = 'largefiles'
23 23
24 24
25 25 # -- Portability wrappers ----------------------------------------------
26 26
27 27 def dirstate_walk(dirstate, matcher, unknown=False, ignored=False):
28 28 return dirstate.walk(matcher, [], unknown, ignored)
29 29
30 30 def repo_add(repo, list):
31 31 add = repo[None].add
32 32 return add(list)
33 33
34 34 def repo_remove(repo, list, unlink=False):
35 35 def remove(list, unlink):
36 36 wlock = repo.wlock()
37 37 try:
38 38 if unlink:
39 39 for f in list:
40 40 try:
41 41 util.unlinkpath(repo.wjoin(f))
42 42 except OSError, inst:
43 43 if inst.errno != errno.ENOENT:
44 44 raise
45 45 repo[None].forget(list)
46 46 finally:
47 47 wlock.release()
48 48 return remove(list, unlink=unlink)
49 49
50 50 def repo_forget(repo, list):
51 51 forget = repo[None].forget
52 52 return forget(list)
53 53
54 54 def findoutgoing(repo, remote, force):
55 55 from mercurial import discovery
56 56 common, _anyinc, _heads = discovery.findcommonincoming(repo,
57 57 remote, force=force)
58 58 return repo.changelog.findmissing(common)
59 59
60 60 # -- Private worker functions ------------------------------------------
61 61
62 62 def getminsize(ui, assumelfiles, opt, default=10):
63 63 lfsize = opt
64 64 if not lfsize and assumelfiles:
65 65 lfsize = ui.config(longname, 'minsize', default=default)
66 66 if lfsize:
67 67 try:
68 68 lfsize = float(lfsize)
69 69 except ValueError:
70 70 raise util.Abort(_('largefiles: size must be number (not %s)\n')
71 71 % lfsize)
72 72 if lfsize is None:
73 73 raise util.Abort(_('minimum size for largefiles must be specified'))
74 74 return lfsize
75 75
76 76 def link(src, dest):
77 77 try:
78 78 util.oslink(src, dest)
79 79 except OSError:
80 80 # if hardlinks fail, fallback on atomic copy
81 81 dst = util.atomictempfile(dest)
82 82 for chunk in util.filechunkiter(open(src)):
83 83 dst.write(chunk)
84 84 dst.close()
85 85 os.chmod(dest, os.stat(src).st_mode)
86 86
87 87 def usercachepath(ui, hash):
88 88 path = ui.configpath(longname, 'usercache', None)
89 89 if path:
90 90 path = os.path.join(path, hash)
91 91 else:
92 92 if os.name == 'nt':
93 93 appdata = os.getenv('LOCALAPPDATA', os.getenv('APPDATA'))
94 if appdata:
94 95 path = os.path.join(appdata, longname, hash)
95 96 elif platform.system() == 'Darwin':
96 path = os.path.join(os.getenv('HOME'), 'Library', 'Caches',
97 home = os.getenv('HOME')
98 if home:
99 path = os.path.join(home, 'Library', 'Caches',
97 100 longname, hash)
98 101 elif os.name == 'posix':
99 102 path = os.getenv('XDG_CACHE_HOME')
100 103 if path:
101 104 path = os.path.join(path, longname, hash)
102 105 else:
103 path = os.path.join(os.getenv('HOME'), '.cache', longname, hash)
106 home = os.getenv('HOME')
107 if home:
108 path = os.path.join(home, '.cache', longname, hash)
104 109 else:
105 110 raise util.Abort(_('unknown operating system: %s\n') % os.name)
106 111 return path
107 112
108 113 def inusercache(ui, hash):
109 return os.path.exists(usercachepath(ui, hash))
114 path = usercachepath(ui, hash)
115 return path and os.path.exists(path)
110 116
111 117 def findfile(repo, hash):
112 118 if instore(repo, hash):
113 119 repo.ui.note(_('Found %s in store\n') % hash)
114 120 elif inusercache(repo.ui, hash):
115 121 repo.ui.note(_('Found %s in system cache\n') % hash)
116 122 path = storepath(repo, hash)
117 123 util.makedirs(os.path.dirname(path))
118 124 link(usercachepath(repo.ui, hash), path)
119 125 else:
120 126 return None
121 127 return storepath(repo, hash)
122 128
123 129 class largefiles_dirstate(dirstate.dirstate):
124 130 def __getitem__(self, key):
125 131 return super(largefiles_dirstate, self).__getitem__(unixpath(key))
126 132 def normal(self, f):
127 133 return super(largefiles_dirstate, self).normal(unixpath(f))
128 134 def remove(self, f):
129 135 return super(largefiles_dirstate, self).remove(unixpath(f))
130 136 def add(self, f):
131 137 return super(largefiles_dirstate, self).add(unixpath(f))
132 138 def drop(self, f):
133 139 return super(largefiles_dirstate, self).drop(unixpath(f))
134 140 def forget(self, f):
135 141 return super(largefiles_dirstate, self).forget(unixpath(f))
136 142
137 143 def openlfdirstate(ui, repo):
138 144 '''
139 145 Return a dirstate object that tracks largefiles: i.e. its root is
140 146 the repo root, but it is saved in .hg/largefiles/dirstate.
141 147 '''
142 148 admin = repo.join(longname)
143 149 opener = scmutil.opener(admin)
144 150 lfdirstate = largefiles_dirstate(opener, ui, repo.root,
145 151 repo.dirstate._validate)
146 152
147 153 # If the largefiles dirstate does not exist, populate and create
148 154 # it. This ensures that we create it on the first meaningful
149 155 # largefiles operation in a new clone. It also gives us an easy
150 156 # way to forcibly rebuild largefiles state:
151 157 # rm .hg/largefiles/dirstate && hg status
152 158 # Or even, if things are really messed up:
153 159 # rm -rf .hg/largefiles && hg status
154 160 if not os.path.exists(os.path.join(admin, 'dirstate')):
155 161 util.makedirs(admin)
156 162 matcher = getstandinmatcher(repo)
157 163 for standin in dirstate_walk(repo.dirstate, matcher):
158 164 lfile = splitstandin(standin)
159 165 hash = readstandin(repo, lfile)
160 166 lfdirstate.normallookup(lfile)
161 167 try:
162 168 if hash == hashfile(repo.wjoin(lfile)):
163 169 lfdirstate.normal(lfile)
164 170 except OSError, err:
165 171 if err.errno != errno.ENOENT:
166 172 raise
167 173
168 174 lfdirstate.write()
169 175
170 176 return lfdirstate
171 177
172 178 def lfdirstate_status(lfdirstate, repo, rev):
173 179 wlock = repo.wlock()
174 180 try:
175 181 match = match_.always(repo.root, repo.getcwd())
176 182 s = lfdirstate.status(match, [], False, False, False)
177 183 unsure, modified, added, removed, missing, unknown, ignored, clean = s
178 184 for lfile in unsure:
179 185 if repo[rev][standin(lfile)].data().strip() != \
180 186 hashfile(repo.wjoin(lfile)):
181 187 modified.append(lfile)
182 188 else:
183 189 clean.append(lfile)
184 190 lfdirstate.normal(lfile)
185 191 lfdirstate.write()
186 192 finally:
187 193 wlock.release()
188 194 return (modified, added, removed, missing, unknown, ignored, clean)
189 195
190 196 def listlfiles(repo, rev=None, matcher=None):
191 197 '''return a list of largefiles in the working copy or the
192 198 specified changeset'''
193 199
194 200 if matcher is None:
195 201 matcher = getstandinmatcher(repo)
196 202
197 203 # ignore unknown files in working directory
198 204 return [splitstandin(f)
199 205 for f in repo[rev].walk(matcher)
200 206 if rev is not None or repo.dirstate[f] != '?']
201 207
202 208 def instore(repo, hash):
203 209 return os.path.exists(storepath(repo, hash))
204 210
205 211 def storepath(repo, hash):
206 212 return repo.join(os.path.join(longname, hash))
207 213
208 214 def copyfromcache(repo, hash, filename):
209 215 '''Copy the specified largefile from the repo or system cache to
210 216 filename in the repository. Return true on success or false if the
211 217 file was not found in either cache (which should not happened:
212 218 this is meant to be called only after ensuring that the needed
213 219 largefile exists in the cache).'''
214 220 path = findfile(repo, hash)
215 221 if path is None:
216 222 return False
217 223 util.makedirs(os.path.dirname(repo.wjoin(filename)))
218 224 # The write may fail before the file is fully written, but we
219 225 # don't use atomic writes in the working copy.
220 226 shutil.copy(path, repo.wjoin(filename))
221 227 return True
222 228
223 229 def copytostore(repo, rev, file, uploaded=False):
224 230 hash = readstandin(repo, file)
225 231 if instore(repo, hash):
226 232 return
227 233 copytostoreabsolute(repo, repo.wjoin(file), hash)
228 234
229 235 def copytostoreabsolute(repo, file, hash):
230 236 util.makedirs(os.path.dirname(storepath(repo, hash)))
231 237 if inusercache(repo.ui, hash):
232 238 link(usercachepath(repo.ui, hash), storepath(repo, hash))
233 239 else:
234 240 dst = util.atomictempfile(storepath(repo, hash))
235 241 for chunk in util.filechunkiter(open(file)):
236 242 dst.write(chunk)
237 243 dst.close()
238 244 util.copymode(file, storepath(repo, hash))
239 245 linktousercache(repo, hash)
240 246
241 247 def linktousercache(repo, hash):
242 util.makedirs(os.path.dirname(usercachepath(repo.ui, hash)))
243 link(storepath(repo, hash), usercachepath(repo.ui, hash))
248 path = usercachepath(repo.ui, hash)
249 if path:
250 util.makedirs(os.path.dirname(path))
251 link(storepath(repo, hash), path)
244 252
245 253 def getstandinmatcher(repo, pats=[], opts={}):
246 254 '''Return a match object that applies pats to the standin directory'''
247 255 standindir = repo.pathto(shortname)
248 256 if pats:
249 257 # patterns supplied: search standin directory relative to current dir
250 258 cwd = repo.getcwd()
251 259 if os.path.isabs(cwd):
252 260 # cwd is an absolute path for hg -R <reponame>
253 261 # work relative to the repository root in this case
254 262 cwd = ''
255 263 pats = [os.path.join(standindir, cwd, pat) for pat in pats]
256 264 elif os.path.isdir(standindir):
257 265 # no patterns: relative to repo root
258 266 pats = [standindir]
259 267 else:
260 268 # no patterns and no standin dir: return matcher that matches nothing
261 269 match = match_.match(repo.root, None, [], exact=True)
262 270 match.matchfn = lambda f: False
263 271 return match
264 272 return getmatcher(repo, pats, opts, showbad=False)
265 273
266 274 def getmatcher(repo, pats=[], opts={}, showbad=True):
267 275 '''Wrapper around scmutil.match() that adds showbad: if false,
268 276 neuter the match object's bad() method so it does not print any
269 277 warnings about missing files or directories.'''
270 278 match = scmutil.match(repo[None], pats, opts)
271 279
272 280 if not showbad:
273 281 match.bad = lambda f, msg: None
274 282 return match
275 283
276 284 def composestandinmatcher(repo, rmatcher):
277 285 '''Return a matcher that accepts standins corresponding to the
278 286 files accepted by rmatcher. Pass the list of files in the matcher
279 287 as the paths specified by the user.'''
280 288 smatcher = getstandinmatcher(repo, rmatcher.files())
281 289 isstandin = smatcher.matchfn
282 290 def composed_matchfn(f):
283 291 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
284 292 smatcher.matchfn = composed_matchfn
285 293
286 294 return smatcher
287 295
288 296 def standin(filename):
289 297 '''Return the repo-relative path to the standin for the specified big
290 298 file.'''
291 299 # Notes:
292 300 # 1) Most callers want an absolute path, but _create_standin() needs
293 301 # it repo-relative so lfadd() can pass it to repo_add(). So leave
294 302 # it up to the caller to use repo.wjoin() to get an absolute path.
295 303 # 2) Join with '/' because that's what dirstate always uses, even on
296 304 # Windows. Change existing separator to '/' first in case we are
297 305 # passed filenames from an external source (like the command line).
298 306 return shortname + '/' + filename.replace(os.sep, '/')
299 307
300 308 def isstandin(filename):
301 309 '''Return true if filename is a big file standin. filename must be
302 310 in Mercurial's internal form (slash-separated).'''
303 311 return filename.startswith(shortname + '/')
304 312
305 313 def splitstandin(filename):
306 314 # Split on / because that's what dirstate always uses, even on Windows.
307 315 # Change local separator to / first just in case we are passed filenames
308 316 # from an external source (like the command line).
309 317 bits = filename.replace(os.sep, '/').split('/', 1)
310 318 if len(bits) == 2 and bits[0] == shortname:
311 319 return bits[1]
312 320 else:
313 321 return None
314 322
315 323 def updatestandin(repo, standin):
316 324 file = repo.wjoin(splitstandin(standin))
317 325 if os.path.exists(file):
318 326 hash = hashfile(file)
319 327 executable = getexecutable(file)
320 328 writestandin(repo, standin, hash, executable)
321 329
322 330 def readstandin(repo, filename, node=None):
323 331 '''read hex hash from standin for filename at given node, or working
324 332 directory if no node is given'''
325 333 return repo[node][standin(filename)].data().strip()
326 334
327 335 def writestandin(repo, standin, hash, executable):
328 336 '''write hash to <repo.root>/<standin>'''
329 337 writehash(hash, repo.wjoin(standin), executable)
330 338
331 339 def copyandhash(instream, outfile):
332 340 '''Read bytes from instream (iterable) and write them to outfile,
333 341 computing the SHA-1 hash of the data along the way. Close outfile
334 342 when done and return the binary hash.'''
335 343 hasher = util.sha1('')
336 344 for data in instream:
337 345 hasher.update(data)
338 346 outfile.write(data)
339 347
340 348 # Blecch: closing a file that somebody else opened is rude and
341 349 # wrong. But it's so darn convenient and practical! After all,
342 350 # outfile was opened just to copy and hash.
343 351 outfile.close()
344 352
345 353 return hasher.digest()
346 354
347 355 def hashrepofile(repo, file):
348 356 return hashfile(repo.wjoin(file))
349 357
350 358 def hashfile(file):
351 359 if not os.path.exists(file):
352 360 return ''
353 361 hasher = util.sha1('')
354 362 fd = open(file, 'rb')
355 363 for data in blockstream(fd):
356 364 hasher.update(data)
357 365 fd.close()
358 366 return hasher.hexdigest()
359 367
360 368 class limitreader(object):
361 369 def __init__(self, f, limit):
362 370 self.f = f
363 371 self.limit = limit
364 372
365 373 def read(self, length):
366 374 if self.limit == 0:
367 375 return ''
368 376 length = length > self.limit and self.limit or length
369 377 self.limit -= length
370 378 return self.f.read(length)
371 379
372 380 def close(self):
373 381 pass
374 382
375 383 def blockstream(infile, blocksize=128 * 1024):
376 384 """Generator that yields blocks of data from infile and closes infile."""
377 385 while True:
378 386 data = infile.read(blocksize)
379 387 if not data:
380 388 break
381 389 yield data
382 390 # same blecch as copyandhash() above
383 391 infile.close()
384 392
385 393 def readhash(filename):
386 394 rfile = open(filename, 'rb')
387 395 hash = rfile.read(40)
388 396 rfile.close()
389 397 if len(hash) < 40:
390 398 raise util.Abort(_('bad hash in \'%s\' (only %d bytes long)')
391 399 % (filename, len(hash)))
392 400 return hash
393 401
394 402 def writehash(hash, filename, executable):
395 403 util.makedirs(os.path.dirname(filename))
396 404 util.writefile(filename, hash + '\n')
397 405 os.chmod(filename, getmode(executable))
398 406
399 407 def getexecutable(filename):
400 408 mode = os.stat(filename).st_mode
401 409 return ((mode & stat.S_IXUSR) and
402 410 (mode & stat.S_IXGRP) and
403 411 (mode & stat.S_IXOTH))
404 412
405 413 def getmode(executable):
406 414 if executable:
407 415 return 0755
408 416 else:
409 417 return 0644
410 418
411 419 def urljoin(first, second, *arg):
412 420 def join(left, right):
413 421 if not left.endswith('/'):
414 422 left += '/'
415 423 if right.startswith('/'):
416 424 right = right[1:]
417 425 return left + right
418 426
419 427 url = join(first, second)
420 428 for a in arg:
421 429 url = join(url, a)
422 430 return url
423 431
424 432 def hexsha1(data):
425 433 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
426 434 object data"""
427 435 h = util.sha1()
428 436 for chunk in util.filechunkiter(data):
429 437 h.update(chunk)
430 438 return h.hexdigest()
431 439
432 440 def httpsendfile(ui, filename):
433 441 return httpconnection.httpsendfile(ui, filename, 'rb')
434 442
435 443 def unixpath(path):
436 444 '''Return a version of path normalized for use with the lfdirstate.'''
437 445 return os.path.normpath(path).replace(os.sep, '/')
438 446
439 447 def islfilesrepo(repo):
440 448 return ('largefiles' in repo.requirements and
441 449 util.any(shortname + '/' in f[0] for f in repo.store.datafiles()))
442 450
443 451 def mkstemp(repo, prefix):
444 452 '''Returns a file descriptor and a filename corresponding to a temporary
445 453 file in the repo's largefiles store.'''
446 454 path = repo.join(longname)
447 455 util.makedirs(path)
448 456 return tempfile.mkstemp(prefix=prefix, dir=path)
449 457
450 458 class storeprotonotcapable(Exception):
451 459 def __init__(self, storetypes):
452 460 self.storetypes = storetypes
General Comments 0
You need to be logged in to leave comments. Login now