##// END OF EJS Templates
largefiles: refactoring - create destination dir in lfutil.link
Mads Kiilerich -
r18998:d035c390 default
parent child Browse files
Show More
@@ -1,419 +1,418 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''largefiles utility code: must not import other modules in this package.'''
10 10
11 11 import os
12 12 import errno
13 13 import platform
14 14 import shutil
15 15 import stat
16 16
17 17 from mercurial import dirstate, httpconnection, match as match_, util, scmutil
18 18 from mercurial.i18n import _
19 19
20 20 shortname = '.hglf'
21 21 shortnameslash = shortname + '/'
22 22 longname = 'largefiles'
23 23
24 24
25 25 # -- Private worker functions ------------------------------------------
26 26
27 27 def getminsize(ui, assumelfiles, opt, default=10):
28 28 lfsize = opt
29 29 if not lfsize and assumelfiles:
30 30 lfsize = ui.config(longname, 'minsize', default=default)
31 31 if lfsize:
32 32 try:
33 33 lfsize = float(lfsize)
34 34 except ValueError:
35 35 raise util.Abort(_('largefiles: size must be number (not %s)\n')
36 36 % lfsize)
37 37 if lfsize is None:
38 38 raise util.Abort(_('minimum size for largefiles must be specified'))
39 39 return lfsize
40 40
41 41 def link(src, dest):
42 util.makedirs(os.path.dirname(dest))
42 43 try:
43 44 util.oslink(src, dest)
44 45 except OSError:
45 46 # if hardlinks fail, fallback on atomic copy
46 47 dst = util.atomictempfile(dest)
47 48 for chunk in util.filechunkiter(open(src, 'rb')):
48 49 dst.write(chunk)
49 50 dst.close()
50 51 os.chmod(dest, os.stat(src).st_mode)
51 52
52 53 def usercachepath(ui, hash):
53 54 path = ui.configpath(longname, 'usercache', None)
54 55 if path:
55 56 path = os.path.join(path, hash)
56 57 else:
57 58 if os.name == 'nt':
58 59 appdata = os.getenv('LOCALAPPDATA', os.getenv('APPDATA'))
59 60 if appdata:
60 61 path = os.path.join(appdata, longname, hash)
61 62 elif platform.system() == 'Darwin':
62 63 home = os.getenv('HOME')
63 64 if home:
64 65 path = os.path.join(home, 'Library', 'Caches',
65 66 longname, hash)
66 67 elif os.name == 'posix':
67 68 path = os.getenv('XDG_CACHE_HOME')
68 69 if path:
69 70 path = os.path.join(path, longname, hash)
70 71 else:
71 72 home = os.getenv('HOME')
72 73 if home:
73 74 path = os.path.join(home, '.cache', longname, hash)
74 75 else:
75 76 raise util.Abort(_('unknown operating system: %s\n') % os.name)
76 77 return path
77 78
78 79 def inusercache(ui, hash):
79 80 path = usercachepath(ui, hash)
80 81 return path and os.path.exists(path)
81 82
82 83 def findfile(repo, hash):
83 84 if instore(repo, hash):
84 85 repo.ui.note(_('found %s in store\n') % hash)
85 86 return storepath(repo, hash)
86 87 elif inusercache(repo.ui, hash):
87 88 repo.ui.note(_('found %s in system cache\n') % hash)
88 89 path = storepath(repo, hash)
89 util.makedirs(os.path.dirname(path))
90 90 link(usercachepath(repo.ui, hash), path)
91 91 return path
92 92 return None
93 93
94 94 class largefilesdirstate(dirstate.dirstate):
95 95 def __getitem__(self, key):
96 96 return super(largefilesdirstate, self).__getitem__(unixpath(key))
97 97 def normal(self, f):
98 98 return super(largefilesdirstate, self).normal(unixpath(f))
99 99 def remove(self, f):
100 100 return super(largefilesdirstate, self).remove(unixpath(f))
101 101 def add(self, f):
102 102 return super(largefilesdirstate, self).add(unixpath(f))
103 103 def drop(self, f):
104 104 return super(largefilesdirstate, self).drop(unixpath(f))
105 105 def forget(self, f):
106 106 return super(largefilesdirstate, self).forget(unixpath(f))
107 107 def normallookup(self, f):
108 108 return super(largefilesdirstate, self).normallookup(unixpath(f))
109 109 def _ignore(self):
110 110 return False
111 111
112 112 def openlfdirstate(ui, repo, create=True):
113 113 '''
114 114 Return a dirstate object that tracks largefiles: i.e. its root is
115 115 the repo root, but it is saved in .hg/largefiles/dirstate.
116 116 '''
117 117 lfstoredir = repo.join(longname)
118 118 opener = scmutil.opener(lfstoredir)
119 119 lfdirstate = largefilesdirstate(opener, ui, repo.root,
120 120 repo.dirstate._validate)
121 121
122 122 # If the largefiles dirstate does not exist, populate and create
123 123 # it. This ensures that we create it on the first meaningful
124 124 # largefiles operation in a new clone.
125 125 if create and not os.path.exists(os.path.join(lfstoredir, 'dirstate')):
126 126 util.makedirs(lfstoredir)
127 127 matcher = getstandinmatcher(repo)
128 128 for standin in repo.dirstate.walk(matcher, [], False, False):
129 129 lfile = splitstandin(standin)
130 130 hash = readstandin(repo, lfile)
131 131 lfdirstate.normallookup(lfile)
132 132 try:
133 133 if hash == hashfile(repo.wjoin(lfile)):
134 134 lfdirstate.normal(lfile)
135 135 except OSError, err:
136 136 if err.errno != errno.ENOENT:
137 137 raise
138 138 return lfdirstate
139 139
140 140 def lfdirstatestatus(lfdirstate, repo, rev):
141 141 match = match_.always(repo.root, repo.getcwd())
142 142 s = lfdirstate.status(match, [], False, False, False)
143 143 unsure, modified, added, removed, missing, unknown, ignored, clean = s
144 144 for lfile in unsure:
145 145 try:
146 146 fctx = repo[rev][standin(lfile)]
147 147 except LookupError:
148 148 fctx = None
149 149 if not fctx or fctx.data().strip() != hashfile(repo.wjoin(lfile)):
150 150 modified.append(lfile)
151 151 else:
152 152 clean.append(lfile)
153 153 lfdirstate.normal(lfile)
154 154 return (modified, added, removed, missing, unknown, ignored, clean)
155 155
156 156 def listlfiles(repo, rev=None, matcher=None):
157 157 '''return a list of largefiles in the working copy or the
158 158 specified changeset'''
159 159
160 160 if matcher is None:
161 161 matcher = getstandinmatcher(repo)
162 162
163 163 # ignore unknown files in working directory
164 164 return [splitstandin(f)
165 165 for f in repo[rev].walk(matcher)
166 166 if rev is not None or repo.dirstate[f] != '?']
167 167
168 168 def instore(repo, hash):
169 169 return os.path.exists(storepath(repo, hash))
170 170
171 171 def storepath(repo, hash):
172 172 return repo.join(os.path.join(longname, hash))
173 173
174 174 def copyfromcache(repo, hash, filename):
175 175 '''Copy the specified largefile from the repo or system cache to
176 176 filename in the repository. Return true on success or false if the
177 177 file was not found in either cache (which should not happened:
178 178 this is meant to be called only after ensuring that the needed
179 179 largefile exists in the cache).'''
180 180 path = findfile(repo, hash)
181 181 if path is None:
182 182 return False
183 183 util.makedirs(os.path.dirname(repo.wjoin(filename)))
184 184 # The write may fail before the file is fully written, but we
185 185 # don't use atomic writes in the working copy.
186 186 shutil.copy(path, repo.wjoin(filename))
187 187 return True
188 188
189 189 def copytostore(repo, rev, file, uploaded=False):
190 190 hash = readstandin(repo, file, rev)
191 191 if instore(repo, hash):
192 192 return
193 193 copytostoreabsolute(repo, repo.wjoin(file), hash)
194 194
195 195 def copyalltostore(repo, node):
196 196 '''Copy all largefiles in a given revision to the store'''
197 197
198 198 ctx = repo[node]
199 199 for filename in ctx.files():
200 200 if isstandin(filename) and filename in ctx.manifest():
201 201 realfile = splitstandin(filename)
202 202 copytostore(repo, ctx.node(), realfile)
203 203
204 204
205 205 def copytostoreabsolute(repo, file, hash):
206 util.makedirs(os.path.dirname(storepath(repo, hash)))
207 206 if inusercache(repo.ui, hash):
208 207 link(usercachepath(repo.ui, hash), storepath(repo, hash))
209 208 elif not getattr(repo, "_isconverting", False):
209 util.makedirs(os.path.dirname(storepath(repo, hash)))
210 210 dst = util.atomictempfile(storepath(repo, hash),
211 211 createmode=repo.store.createmode)
212 212 for chunk in util.filechunkiter(open(file, 'rb')):
213 213 dst.write(chunk)
214 214 dst.close()
215 215 linktousercache(repo, hash)
216 216
217 217 def linktousercache(repo, hash):
218 218 path = usercachepath(repo.ui, hash)
219 219 if path:
220 util.makedirs(os.path.dirname(path))
221 220 link(storepath(repo, hash), path)
222 221
223 222 def getstandinmatcher(repo, pats=[], opts={}):
224 223 '''Return a match object that applies pats to the standin directory'''
225 224 standindir = repo.wjoin(shortname)
226 225 if pats:
227 226 pats = [os.path.join(standindir, pat) for pat in pats]
228 227 else:
229 228 # no patterns: relative to repo root
230 229 pats = [standindir]
231 230 # no warnings about missing files or directories
232 231 match = scmutil.match(repo[None], pats, opts)
233 232 match.bad = lambda f, msg: None
234 233 return match
235 234
236 235 def composestandinmatcher(repo, rmatcher):
237 236 '''Return a matcher that accepts standins corresponding to the
238 237 files accepted by rmatcher. Pass the list of files in the matcher
239 238 as the paths specified by the user.'''
240 239 smatcher = getstandinmatcher(repo, rmatcher.files())
241 240 isstandin = smatcher.matchfn
242 241 def composedmatchfn(f):
243 242 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
244 243 smatcher.matchfn = composedmatchfn
245 244
246 245 return smatcher
247 246
248 247 def standin(filename):
249 248 '''Return the repo-relative path to the standin for the specified big
250 249 file.'''
251 250 # Notes:
252 251 # 1) Some callers want an absolute path, but for instance addlargefiles
253 252 # needs it repo-relative so it can be passed to repo[None].add(). So
254 253 # leave it up to the caller to use repo.wjoin() to get an absolute path.
255 254 # 2) Join with '/' because that's what dirstate always uses, even on
256 255 # Windows. Change existing separator to '/' first in case we are
257 256 # passed filenames from an external source (like the command line).
258 257 return shortnameslash + util.pconvert(filename)
259 258
260 259 def isstandin(filename):
261 260 '''Return true if filename is a big file standin. filename must be
262 261 in Mercurial's internal form (slash-separated).'''
263 262 return filename.startswith(shortnameslash)
264 263
265 264 def splitstandin(filename):
266 265 # Split on / because that's what dirstate always uses, even on Windows.
267 266 # Change local separator to / first just in case we are passed filenames
268 267 # from an external source (like the command line).
269 268 bits = util.pconvert(filename).split('/', 1)
270 269 if len(bits) == 2 and bits[0] == shortname:
271 270 return bits[1]
272 271 else:
273 272 return None
274 273
275 274 def updatestandin(repo, standin):
276 275 file = repo.wjoin(splitstandin(standin))
277 276 if os.path.exists(file):
278 277 hash = hashfile(file)
279 278 executable = getexecutable(file)
280 279 writestandin(repo, standin, hash, executable)
281 280
282 281 def readstandin(repo, filename, node=None):
283 282 '''read hex hash from standin for filename at given node, or working
284 283 directory if no node is given'''
285 284 return repo[node][standin(filename)].data().strip()
286 285
287 286 def writestandin(repo, standin, hash, executable):
288 287 '''write hash to <repo.root>/<standin>'''
289 288 writehash(hash, repo.wjoin(standin), executable)
290 289
291 290 def copyandhash(instream, outfile):
292 291 '''Read bytes from instream (iterable) and write them to outfile,
293 292 computing the SHA-1 hash of the data along the way. Close outfile
294 293 when done and return the binary hash.'''
295 294 hasher = util.sha1('')
296 295 for data in instream:
297 296 hasher.update(data)
298 297 outfile.write(data)
299 298
300 299 # Blecch: closing a file that somebody else opened is rude and
301 300 # wrong. But it's so darn convenient and practical! After all,
302 301 # outfile was opened just to copy and hash.
303 302 outfile.close()
304 303
305 304 return hasher.digest()
306 305
307 306 def hashrepofile(repo, file):
308 307 return hashfile(repo.wjoin(file))
309 308
310 309 def hashfile(file):
311 310 if not os.path.exists(file):
312 311 return ''
313 312 hasher = util.sha1('')
314 313 fd = open(file, 'rb')
315 314 for data in blockstream(fd):
316 315 hasher.update(data)
317 316 fd.close()
318 317 return hasher.hexdigest()
319 318
320 319 class limitreader(object):
321 320 def __init__(self, f, limit):
322 321 self.f = f
323 322 self.limit = limit
324 323
325 324 def read(self, length):
326 325 if self.limit == 0:
327 326 return ''
328 327 length = length > self.limit and self.limit or length
329 328 self.limit -= length
330 329 return self.f.read(length)
331 330
332 331 def close(self):
333 332 pass
334 333
335 334 def blockstream(infile, blocksize=128 * 1024):
336 335 """Generator that yields blocks of data from infile and closes infile."""
337 336 while True:
338 337 data = infile.read(blocksize)
339 338 if not data:
340 339 break
341 340 yield data
342 341 # same blecch as copyandhash() above
343 342 infile.close()
344 343
345 344 def writehash(hash, filename, executable):
346 345 util.makedirs(os.path.dirname(filename))
347 346 util.writefile(filename, hash + '\n')
348 347 os.chmod(filename, getmode(executable))
349 348
350 349 def getexecutable(filename):
351 350 mode = os.stat(filename).st_mode
352 351 return ((mode & stat.S_IXUSR) and
353 352 (mode & stat.S_IXGRP) and
354 353 (mode & stat.S_IXOTH))
355 354
356 355 def getmode(executable):
357 356 if executable:
358 357 return 0755
359 358 else:
360 359 return 0644
361 360
362 361 def urljoin(first, second, *arg):
363 362 def join(left, right):
364 363 if not left.endswith('/'):
365 364 left += '/'
366 365 if right.startswith('/'):
367 366 right = right[1:]
368 367 return left + right
369 368
370 369 url = join(first, second)
371 370 for a in arg:
372 371 url = join(url, a)
373 372 return url
374 373
375 374 def hexsha1(data):
376 375 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
377 376 object data"""
378 377 h = util.sha1()
379 378 for chunk in util.filechunkiter(data):
380 379 h.update(chunk)
381 380 return h.hexdigest()
382 381
383 382 def httpsendfile(ui, filename):
384 383 return httpconnection.httpsendfile(ui, filename, 'rb')
385 384
386 385 def unixpath(path):
387 386 '''Return a version of path normalized for use with the lfdirstate.'''
388 387 return util.pconvert(os.path.normpath(path))
389 388
390 389 def islfilesrepo(repo):
391 390 if ('largefiles' in repo.requirements and
392 391 util.any(shortnameslash in f[0] for f in repo.store.datafiles())):
393 392 return True
394 393
395 394 return util.any(openlfdirstate(repo.ui, repo, False))
396 395
397 396 class storeprotonotcapable(Exception):
398 397 def __init__(self, storetypes):
399 398 self.storetypes = storetypes
400 399
401 400 def getstandinsstate(repo):
402 401 standins = []
403 402 matcher = getstandinmatcher(repo)
404 403 for standin in repo.dirstate.walk(matcher, [], False, False):
405 404 lfile = splitstandin(standin)
406 405 try:
407 406 hash = readstandin(repo, lfile)
408 407 except IOError:
409 408 hash = None
410 409 standins.append((lfile, hash))
411 410 return standins
412 411
413 412 def getlfilestoupdate(oldstandins, newstandins):
414 413 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
415 414 filelist = []
416 415 for f in changedstandins:
417 416 if f[0] not in filelist:
418 417 filelist.append(f[0])
419 418 return filelist
@@ -1,81 +1,77 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''store class for local filesystem'''
10 10
11 import os
12
13 from mercurial import util
14 11 from mercurial.i18n import _
15 12
16 13 import lfutil
17 14 import basestore
18 15
19 16 class localstore(basestore.basestore):
20 17 '''localstore first attempts to grab files out of the store in the remote
21 18 Mercurial repository. Failing that, it attempts to grab the files from
22 19 the user cache.'''
23 20
24 21 def __init__(self, ui, repo, remote):
25 22 self.remote = remote.local()
26 23 super(localstore, self).__init__(ui, repo, self.remote.url())
27 24
28 25 def put(self, source, hash):
29 util.makedirs(os.path.dirname(lfutil.storepath(self.remote, hash)))
30 26 if lfutil.instore(self.remote, hash):
31 27 return
32 28 lfutil.link(lfutil.storepath(self.repo, hash),
33 29 lfutil.storepath(self.remote, hash))
34 30
35 31 def exists(self, hashes):
36 32 retval = {}
37 33 for hash in hashes:
38 34 retval[hash] = lfutil.instore(self.remote, hash)
39 35 return retval
40 36
41 37
42 38 def _getfile(self, tmpfile, filename, hash):
43 39 if lfutil.instore(self.remote, hash):
44 40 path = lfutil.storepath(self.remote, hash)
45 41 elif lfutil.inusercache(self.ui, hash):
46 42 path = lfutil.usercachepath(self.ui, hash)
47 43 else:
48 44 raise basestore.StoreError(filename, hash, self.url,
49 45 _("can't get file locally"))
50 46 fd = open(path, 'rb')
51 47 try:
52 48 return lfutil.copyandhash(fd, tmpfile)
53 49 finally:
54 50 fd.close()
55 51
56 52 def _verifyfile(self, cctx, cset, contents, standin, verified):
57 53 filename = lfutil.splitstandin(standin)
58 54 if not filename:
59 55 return False
60 56 fctx = cctx[standin]
61 57 key = (filename, fctx.filenode())
62 58 if key in verified:
63 59 return False
64 60
65 61 expecthash = fctx.data()[0:40]
66 62 storepath = lfutil.storepath(self.remote, expecthash)
67 63 verified.add(key)
68 64 if not lfutil.instore(self.remote, expecthash):
69 65 self.ui.warn(
70 66 _('changeset %s: %s references missing %s\n')
71 67 % (cset, filename, storepath))
72 68 return True # failed
73 69
74 70 if contents:
75 71 actualhash = lfutil.hashfile(storepath)
76 72 if actualhash != expecthash:
77 73 self.ui.warn(
78 74 _('changeset %s: %s references corrupted %s\n')
79 75 % (cset, filename, storepath))
80 76 return True # failed
81 77 return False
General Comments 0
You need to be logged in to leave comments. Login now