##// END OF EJS Templates
largefiles: remove blecch from lfutil.copyandhash - don't close the passed fd
Mads Kiilerich -
r19002:5083baa6 default
parent child Browse files
Show More
@@ -1,408 +1,401 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''largefiles utility code: must not import other modules in this package.'''
10 10
11 11 import os
12 12 import errno
13 13 import platform
14 14 import shutil
15 15 import stat
16 16
17 17 from mercurial import dirstate, httpconnection, match as match_, util, scmutil
18 18 from mercurial.i18n import _
19 19
20 20 shortname = '.hglf'
21 21 shortnameslash = shortname + '/'
22 22 longname = 'largefiles'
23 23
24 24
25 25 # -- Private worker functions ------------------------------------------
26 26
27 27 def getminsize(ui, assumelfiles, opt, default=10):
28 28 lfsize = opt
29 29 if not lfsize and assumelfiles:
30 30 lfsize = ui.config(longname, 'minsize', default=default)
31 31 if lfsize:
32 32 try:
33 33 lfsize = float(lfsize)
34 34 except ValueError:
35 35 raise util.Abort(_('largefiles: size must be number (not %s)\n')
36 36 % lfsize)
37 37 if lfsize is None:
38 38 raise util.Abort(_('minimum size for largefiles must be specified'))
39 39 return lfsize
40 40
41 41 def link(src, dest):
42 42 util.makedirs(os.path.dirname(dest))
43 43 try:
44 44 util.oslink(src, dest)
45 45 except OSError:
46 46 # if hardlinks fail, fallback on atomic copy
47 47 dst = util.atomictempfile(dest)
48 48 for chunk in util.filechunkiter(open(src, 'rb')):
49 49 dst.write(chunk)
50 50 dst.close()
51 51 os.chmod(dest, os.stat(src).st_mode)
52 52
53 53 def usercachepath(ui, hash):
54 54 path = ui.configpath(longname, 'usercache', None)
55 55 if path:
56 56 path = os.path.join(path, hash)
57 57 else:
58 58 if os.name == 'nt':
59 59 appdata = os.getenv('LOCALAPPDATA', os.getenv('APPDATA'))
60 60 if appdata:
61 61 path = os.path.join(appdata, longname, hash)
62 62 elif platform.system() == 'Darwin':
63 63 home = os.getenv('HOME')
64 64 if home:
65 65 path = os.path.join(home, 'Library', 'Caches',
66 66 longname, hash)
67 67 elif os.name == 'posix':
68 68 path = os.getenv('XDG_CACHE_HOME')
69 69 if path:
70 70 path = os.path.join(path, longname, hash)
71 71 else:
72 72 home = os.getenv('HOME')
73 73 if home:
74 74 path = os.path.join(home, '.cache', longname, hash)
75 75 else:
76 76 raise util.Abort(_('unknown operating system: %s\n') % os.name)
77 77 return path
78 78
79 79 def inusercache(ui, hash):
80 80 path = usercachepath(ui, hash)
81 81 return path and os.path.exists(path)
82 82
83 83 def findfile(repo, hash):
84 84 if instore(repo, hash):
85 85 repo.ui.note(_('found %s in store\n') % hash)
86 86 return storepath(repo, hash)
87 87 elif inusercache(repo.ui, hash):
88 88 repo.ui.note(_('found %s in system cache\n') % hash)
89 89 path = storepath(repo, hash)
90 90 link(usercachepath(repo.ui, hash), path)
91 91 return path
92 92 return None
93 93
94 94 class largefilesdirstate(dirstate.dirstate):
95 95 def __getitem__(self, key):
96 96 return super(largefilesdirstate, self).__getitem__(unixpath(key))
97 97 def normal(self, f):
98 98 return super(largefilesdirstate, self).normal(unixpath(f))
99 99 def remove(self, f):
100 100 return super(largefilesdirstate, self).remove(unixpath(f))
101 101 def add(self, f):
102 102 return super(largefilesdirstate, self).add(unixpath(f))
103 103 def drop(self, f):
104 104 return super(largefilesdirstate, self).drop(unixpath(f))
105 105 def forget(self, f):
106 106 return super(largefilesdirstate, self).forget(unixpath(f))
107 107 def normallookup(self, f):
108 108 return super(largefilesdirstate, self).normallookup(unixpath(f))
109 109 def _ignore(self):
110 110 return False
111 111
112 112 def openlfdirstate(ui, repo, create=True):
113 113 '''
114 114 Return a dirstate object that tracks largefiles: i.e. its root is
115 115 the repo root, but it is saved in .hg/largefiles/dirstate.
116 116 '''
117 117 lfstoredir = repo.join(longname)
118 118 opener = scmutil.opener(lfstoredir)
119 119 lfdirstate = largefilesdirstate(opener, ui, repo.root,
120 120 repo.dirstate._validate)
121 121
122 122 # If the largefiles dirstate does not exist, populate and create
123 123 # it. This ensures that we create it on the first meaningful
124 124 # largefiles operation in a new clone.
125 125 if create and not os.path.exists(os.path.join(lfstoredir, 'dirstate')):
126 126 util.makedirs(lfstoredir)
127 127 matcher = getstandinmatcher(repo)
128 128 for standin in repo.dirstate.walk(matcher, [], False, False):
129 129 lfile = splitstandin(standin)
130 130 hash = readstandin(repo, lfile)
131 131 lfdirstate.normallookup(lfile)
132 132 try:
133 133 if hash == hashfile(repo.wjoin(lfile)):
134 134 lfdirstate.normal(lfile)
135 135 except OSError, err:
136 136 if err.errno != errno.ENOENT:
137 137 raise
138 138 return lfdirstate
139 139
140 140 def lfdirstatestatus(lfdirstate, repo, rev):
141 141 match = match_.always(repo.root, repo.getcwd())
142 142 s = lfdirstate.status(match, [], False, False, False)
143 143 unsure, modified, added, removed, missing, unknown, ignored, clean = s
144 144 for lfile in unsure:
145 145 try:
146 146 fctx = repo[rev][standin(lfile)]
147 147 except LookupError:
148 148 fctx = None
149 149 if not fctx or fctx.data().strip() != hashfile(repo.wjoin(lfile)):
150 150 modified.append(lfile)
151 151 else:
152 152 clean.append(lfile)
153 153 lfdirstate.normal(lfile)
154 154 return (modified, added, removed, missing, unknown, ignored, clean)
155 155
156 156 def listlfiles(repo, rev=None, matcher=None):
157 157 '''return a list of largefiles in the working copy or the
158 158 specified changeset'''
159 159
160 160 if matcher is None:
161 161 matcher = getstandinmatcher(repo)
162 162
163 163 # ignore unknown files in working directory
164 164 return [splitstandin(f)
165 165 for f in repo[rev].walk(matcher)
166 166 if rev is not None or repo.dirstate[f] != '?']
167 167
168 168 def instore(repo, hash):
169 169 return os.path.exists(storepath(repo, hash))
170 170
171 171 def storepath(repo, hash):
172 172 return repo.join(os.path.join(longname, hash))
173 173
174 174 def copyfromcache(repo, hash, filename):
175 175 '''Copy the specified largefile from the repo or system cache to
176 176 filename in the repository. Return true on success or false if the
177 177 file was not found in either cache (which should not happened:
178 178 this is meant to be called only after ensuring that the needed
179 179 largefile exists in the cache).'''
180 180 path = findfile(repo, hash)
181 181 if path is None:
182 182 return False
183 183 util.makedirs(os.path.dirname(repo.wjoin(filename)))
184 184 # The write may fail before the file is fully written, but we
185 185 # don't use atomic writes in the working copy.
186 186 shutil.copy(path, repo.wjoin(filename))
187 187 return True
188 188
189 189 def copytostore(repo, rev, file, uploaded=False):
190 190 hash = readstandin(repo, file, rev)
191 191 if instore(repo, hash):
192 192 return
193 193 copytostoreabsolute(repo, repo.wjoin(file), hash)
194 194
195 195 def copyalltostore(repo, node):
196 196 '''Copy all largefiles in a given revision to the store'''
197 197
198 198 ctx = repo[node]
199 199 for filename in ctx.files():
200 200 if isstandin(filename) and filename in ctx.manifest():
201 201 realfile = splitstandin(filename)
202 202 copytostore(repo, ctx.node(), realfile)
203 203
204 204
205 205 def copytostoreabsolute(repo, file, hash):
206 206 if inusercache(repo.ui, hash):
207 207 link(usercachepath(repo.ui, hash), storepath(repo, hash))
208 208 elif not getattr(repo, "_isconverting", False):
209 209 util.makedirs(os.path.dirname(storepath(repo, hash)))
210 210 dst = util.atomictempfile(storepath(repo, hash),
211 211 createmode=repo.store.createmode)
212 212 for chunk in util.filechunkiter(open(file, 'rb')):
213 213 dst.write(chunk)
214 214 dst.close()
215 215 linktousercache(repo, hash)
216 216
217 217 def linktousercache(repo, hash):
218 218 path = usercachepath(repo.ui, hash)
219 219 if path:
220 220 link(storepath(repo, hash), path)
221 221
222 222 def getstandinmatcher(repo, pats=[], opts={}):
223 223 '''Return a match object that applies pats to the standin directory'''
224 224 standindir = repo.wjoin(shortname)
225 225 if pats:
226 226 pats = [os.path.join(standindir, pat) for pat in pats]
227 227 else:
228 228 # no patterns: relative to repo root
229 229 pats = [standindir]
230 230 # no warnings about missing files or directories
231 231 match = scmutil.match(repo[None], pats, opts)
232 232 match.bad = lambda f, msg: None
233 233 return match
234 234
235 235 def composestandinmatcher(repo, rmatcher):
236 236 '''Return a matcher that accepts standins corresponding to the
237 237 files accepted by rmatcher. Pass the list of files in the matcher
238 238 as the paths specified by the user.'''
239 239 smatcher = getstandinmatcher(repo, rmatcher.files())
240 240 isstandin = smatcher.matchfn
241 241 def composedmatchfn(f):
242 242 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
243 243 smatcher.matchfn = composedmatchfn
244 244
245 245 return smatcher
246 246
247 247 def standin(filename):
248 248 '''Return the repo-relative path to the standin for the specified big
249 249 file.'''
250 250 # Notes:
251 251 # 1) Some callers want an absolute path, but for instance addlargefiles
252 252 # needs it repo-relative so it can be passed to repo[None].add(). So
253 253 # leave it up to the caller to use repo.wjoin() to get an absolute path.
254 254 # 2) Join with '/' because that's what dirstate always uses, even on
255 255 # Windows. Change existing separator to '/' first in case we are
256 256 # passed filenames from an external source (like the command line).
257 257 return shortnameslash + util.pconvert(filename)
258 258
259 259 def isstandin(filename):
260 260 '''Return true if filename is a big file standin. filename must be
261 261 in Mercurial's internal form (slash-separated).'''
262 262 return filename.startswith(shortnameslash)
263 263
264 264 def splitstandin(filename):
265 265 # Split on / because that's what dirstate always uses, even on Windows.
266 266 # Change local separator to / first just in case we are passed filenames
267 267 # from an external source (like the command line).
268 268 bits = util.pconvert(filename).split('/', 1)
269 269 if len(bits) == 2 and bits[0] == shortname:
270 270 return bits[1]
271 271 else:
272 272 return None
273 273
274 274 def updatestandin(repo, standin):
275 275 file = repo.wjoin(splitstandin(standin))
276 276 if os.path.exists(file):
277 277 hash = hashfile(file)
278 278 executable = getexecutable(file)
279 279 writestandin(repo, standin, hash, executable)
280 280
281 281 def readstandin(repo, filename, node=None):
282 282 '''read hex hash from standin for filename at given node, or working
283 283 directory if no node is given'''
284 284 return repo[node][standin(filename)].data().strip()
285 285
286 286 def writestandin(repo, standin, hash, executable):
287 287 '''write hash to <repo.root>/<standin>'''
288 288 writehash(hash, repo.wjoin(standin), executable)
289 289
290 290 def copyandhash(instream, outfile):
291 291 '''Read bytes from instream (iterable) and write them to outfile,
292 computing the SHA-1 hash of the data along the way. Close outfile
293 when done and return the hash.'''
292 computing the SHA-1 hash of the data along the way. Return the hash.'''
294 293 hasher = util.sha1('')
295 294 for data in instream:
296 295 hasher.update(data)
297 296 outfile.write(data)
298
299 # Blecch: closing a file that somebody else opened is rude and
300 # wrong. But it's so darn convenient and practical! After all,
301 # outfile was opened just to copy and hash.
302 outfile.close()
303
304 297 return hasher.hexdigest()
305 298
306 299 def hashrepofile(repo, file):
307 300 return hashfile(repo.wjoin(file))
308 301
309 302 def hashfile(file):
310 303 if not os.path.exists(file):
311 304 return ''
312 305 hasher = util.sha1('')
313 306 fd = open(file, 'rb')
314 307 for data in util.filechunkiter(fd, 128 * 1024):
315 308 hasher.update(data)
316 309 fd.close()
317 310 return hasher.hexdigest()
318 311
319 312 class limitreader(object):
320 313 def __init__(self, f, limit):
321 314 self.f = f
322 315 self.limit = limit
323 316
324 317 def read(self, length):
325 318 if self.limit == 0:
326 319 return ''
327 320 length = length > self.limit and self.limit or length
328 321 self.limit -= length
329 322 return self.f.read(length)
330 323
331 324 def close(self):
332 325 pass
333 326
334 327 def writehash(hash, filename, executable):
335 328 util.makedirs(os.path.dirname(filename))
336 329 util.writefile(filename, hash + '\n')
337 330 os.chmod(filename, getmode(executable))
338 331
339 332 def getexecutable(filename):
340 333 mode = os.stat(filename).st_mode
341 334 return ((mode & stat.S_IXUSR) and
342 335 (mode & stat.S_IXGRP) and
343 336 (mode & stat.S_IXOTH))
344 337
345 338 def getmode(executable):
346 339 if executable:
347 340 return 0755
348 341 else:
349 342 return 0644
350 343
351 344 def urljoin(first, second, *arg):
352 345 def join(left, right):
353 346 if not left.endswith('/'):
354 347 left += '/'
355 348 if right.startswith('/'):
356 349 right = right[1:]
357 350 return left + right
358 351
359 352 url = join(first, second)
360 353 for a in arg:
361 354 url = join(url, a)
362 355 return url
363 356
364 357 def hexsha1(data):
365 358 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
366 359 object data"""
367 360 h = util.sha1()
368 361 for chunk in util.filechunkiter(data):
369 362 h.update(chunk)
370 363 return h.hexdigest()
371 364
372 365 def httpsendfile(ui, filename):
373 366 return httpconnection.httpsendfile(ui, filename, 'rb')
374 367
375 368 def unixpath(path):
376 369 '''Return a version of path normalized for use with the lfdirstate.'''
377 370 return util.pconvert(os.path.normpath(path))
378 371
379 372 def islfilesrepo(repo):
380 373 if ('largefiles' in repo.requirements and
381 374 util.any(shortnameslash in f[0] for f in repo.store.datafiles())):
382 375 return True
383 376
384 377 return util.any(openlfdirstate(repo.ui, repo, False))
385 378
386 379 class storeprotonotcapable(Exception):
387 380 def __init__(self, storetypes):
388 381 self.storetypes = storetypes
389 382
390 383 def getstandinsstate(repo):
391 384 standins = []
392 385 matcher = getstandinmatcher(repo)
393 386 for standin in repo.dirstate.walk(matcher, [], False, False):
394 387 lfile = splitstandin(standin)
395 388 try:
396 389 hash = readstandin(repo, lfile)
397 390 except IOError:
398 391 hash = None
399 392 standins.append((lfile, hash))
400 393 return standins
401 394
402 395 def getlfilestoupdate(oldstandins, newstandins):
403 396 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
404 397 filelist = []
405 398 for f in changedstandins:
406 399 if f[0] not in filelist:
407 400 filelist.append(f[0])
408 401 return filelist
@@ -1,74 +1,75 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''store class for local filesystem'''
10 10
11 11 from mercurial.i18n import _
12 12
13 13 import lfutil
14 14 import basestore
15 15
16 16 class localstore(basestore.basestore):
17 17 '''localstore first attempts to grab files out of the store in the remote
18 18 Mercurial repository. Failing that, it attempts to grab the files from
19 19 the user cache.'''
20 20
21 21 def __init__(self, ui, repo, remote):
22 22 self.remote = remote.local()
23 23 super(localstore, self).__init__(ui, repo, self.remote.url())
24 24
25 25 def put(self, source, hash):
26 26 if lfutil.instore(self.remote, hash):
27 27 return
28 28 lfutil.link(lfutil.storepath(self.repo, hash),
29 29 lfutil.storepath(self.remote, hash))
30 30
31 31 def exists(self, hashes):
32 32 retval = {}
33 33 for hash in hashes:
34 34 retval[hash] = lfutil.instore(self.remote, hash)
35 35 return retval
36 36
37 37
38 38 def _getfile(self, tmpfile, filename, hash):
39 39 path = lfutil.findfile(self.remote, hash)
40 40 if not path:
41 41 raise basestore.StoreError(filename, hash, self.url,
42 42 _("can't get file locally"))
43 43 fd = open(path, 'rb')
44 44 try:
45 45 return lfutil.copyandhash(fd, tmpfile)
46 46 finally:
47 47 fd.close()
48 tmpfile.close()
48 49
49 50 def _verifyfile(self, cctx, cset, contents, standin, verified):
50 51 filename = lfutil.splitstandin(standin)
51 52 if not filename:
52 53 return False
53 54 fctx = cctx[standin]
54 55 key = (filename, fctx.filenode())
55 56 if key in verified:
56 57 return False
57 58
58 59 expecthash = fctx.data()[0:40]
59 60 storepath = lfutil.storepath(self.remote, expecthash)
60 61 verified.add(key)
61 62 if not lfutil.instore(self.remote, expecthash):
62 63 self.ui.warn(
63 64 _('changeset %s: %s references missing %s\n')
64 65 % (cset, filename, storepath))
65 66 return True # failed
66 67
67 68 if contents:
68 69 actualhash = lfutil.hashfile(storepath)
69 70 if actualhash != expecthash:
70 71 self.ui.warn(
71 72 _('changeset %s: %s references corrupted %s\n')
72 73 % (cset, filename, storepath))
73 74 return True # failed
74 75 return False
@@ -1,115 +1,116 b''
1 1 # Copyright 2010-2011 Fog Creek Software
2 2 # Copyright 2010-2011 Unity Technologies
3 3 #
4 4 # This software may be used and distributed according to the terms of the
5 5 # GNU General Public License version 2 or any later version.
6 6
7 7 '''remote largefile store; the base class for wirestore'''
8 8
9 9 import urllib2
10 10
11 11 from mercurial import util
12 12 from mercurial.i18n import _
13 13 from mercurial.wireproto import remotebatch
14 14
15 15 import lfutil
16 16 import basestore
17 17
18 18 class remotestore(basestore.basestore):
19 19 '''a largefile store accessed over a network'''
20 20 def __init__(self, ui, repo, url):
21 21 super(remotestore, self).__init__(ui, repo, url)
22 22
23 23 def put(self, source, hash):
24 24 if self.sendfile(source, hash):
25 25 raise util.Abort(
26 26 _('remotestore: could not put %s to remote store %s')
27 27 % (source, self.url))
28 28 self.ui.debug(
29 29 _('remotestore: put %s to remote store %s') % (source, self.url))
30 30
31 31 def exists(self, hashes):
32 32 return dict((h, s == 0) for (h, s) in self._stat(hashes).iteritems())
33 33
34 34 def sendfile(self, filename, hash):
35 35 self.ui.debug('remotestore: sendfile(%s, %s)\n' % (filename, hash))
36 36 fd = None
37 37 try:
38 38 try:
39 39 fd = lfutil.httpsendfile(self.ui, filename)
40 40 except IOError, e:
41 41 raise util.Abort(
42 42 _('remotestore: could not open file %s: %s')
43 43 % (filename, str(e)))
44 44 return self._put(hash, fd)
45 45 finally:
46 46 if fd:
47 47 fd.close()
48 48
49 49 def _getfile(self, tmpfile, filename, hash):
50 50 # quit if the largefile isn't there
51 51 stat = self._stat([hash])[hash]
52 52 if stat == 1:
53 53 raise util.Abort(_('remotestore: largefile %s is invalid') % hash)
54 54 elif stat == 2:
55 55 raise util.Abort(_('remotestore: largefile %s is missing') % hash)
56 56 elif stat != 0:
57 57 raise RuntimeError('error getting file: unexpected response from '
58 58 'statlfile (%r)' % stat)
59 59
60 60 try:
61 61 length, infile = self._get(hash)
62 62 except urllib2.HTTPError, e:
63 63 # 401s get converted to util.Aborts; everything else is fine being
64 64 # turned into a StoreError
65 65 raise basestore.StoreError(filename, hash, self.url, str(e))
66 66 except urllib2.URLError, e:
67 67 # This usually indicates a connection problem, so don't
68 68 # keep trying with the other files... they will probably
69 69 # all fail too.
70 70 raise util.Abort('%s: %s' % (self.url, e.reason))
71 71 except IOError, e:
72 72 raise basestore.StoreError(filename, hash, self.url, str(e))
73 73
74 74 # Mercurial does not close its SSH connections after writing a stream
75 75 if length is not None:
76 76 infile = lfutil.limitreader(infile, length)
77 77 try:
78 78 return lfutil.copyandhash(util.filechunkiter(infile, 128 * 1024),
79 79 tmpfile)
80 80 finally:
81 81 infile.close()
82 tmpfile.close()
82 83
83 84 def _verifyfile(self, cctx, cset, contents, standin, verified):
84 85 filename = lfutil.splitstandin(standin)
85 86 if not filename:
86 87 return False
87 88 fctx = cctx[standin]
88 89 key = (filename, fctx.filenode())
89 90 if key in verified:
90 91 return False
91 92
92 93 verified.add(key)
93 94
94 95 expecthash = fctx.data()[0:40]
95 96 stat = self._stat([expecthash])[expecthash]
96 97 if not stat:
97 98 return False
98 99 elif stat == 1:
99 100 self.ui.warn(
100 101 _('changeset %s: %s: contents differ\n')
101 102 % (cset, filename))
102 103 return True # failed
103 104 elif stat == 2:
104 105 self.ui.warn(
105 106 _('changeset %s: %s missing\n')
106 107 % (cset, filename))
107 108 return True # failed
108 109 else:
109 110 raise RuntimeError('verify failed: unexpected response from '
110 111 'statlfile (%r)' % stat)
111 112
112 113 def batch(self):
113 114 '''Support for remote batching.'''
114 115 return remotebatch(self)
115 116
General Comments 0
You need to be logged in to leave comments. Login now