##// END OF EJS Templates
largefiles: refactor lfutil.findfiles to be more logical
Na'Tosha Bard -
r15913:c35dcde2 default
parent child Browse files
Show More
@@ -1,460 +1,460 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''largefiles utility code: must not import other modules in this package.'''
10 10
11 11 import os
12 12 import errno
13 13 import platform
14 14 import shutil
15 15 import stat
16 16 import tempfile
17 17
18 18 from mercurial import dirstate, httpconnection, match as match_, util, scmutil
19 19 from mercurial.i18n import _
20 20
21 21 shortname = '.hglf'
22 22 longname = 'largefiles'
23 23
24 24
25 25 # -- Portability wrappers ----------------------------------------------
26 26
27 27 def dirstate_walk(dirstate, matcher, unknown=False, ignored=False):
28 28 return dirstate.walk(matcher, [], unknown, ignored)
29 29
30 30 def repo_add(repo, list):
31 31 add = repo[None].add
32 32 return add(list)
33 33
34 34 def repo_remove(repo, list, unlink=False):
35 35 def remove(list, unlink):
36 36 wlock = repo.wlock()
37 37 try:
38 38 if unlink:
39 39 for f in list:
40 40 try:
41 41 util.unlinkpath(repo.wjoin(f))
42 42 except OSError, inst:
43 43 if inst.errno != errno.ENOENT:
44 44 raise
45 45 repo[None].forget(list)
46 46 finally:
47 47 wlock.release()
48 48 return remove(list, unlink=unlink)
49 49
50 50 def repo_forget(repo, list):
51 51 forget = repo[None].forget
52 52 return forget(list)
53 53
54 54 def findoutgoing(repo, remote, force):
55 55 from mercurial import discovery
56 56 common, _anyinc, _heads = discovery.findcommonincoming(repo,
57 57 remote, force=force)
58 58 return repo.changelog.findmissing(common)
59 59
60 60 # -- Private worker functions ------------------------------------------
61 61
62 62 def getminsize(ui, assumelfiles, opt, default=10):
63 63 lfsize = opt
64 64 if not lfsize and assumelfiles:
65 65 lfsize = ui.config(longname, 'minsize', default=default)
66 66 if lfsize:
67 67 try:
68 68 lfsize = float(lfsize)
69 69 except ValueError:
70 70 raise util.Abort(_('largefiles: size must be number (not %s)\n')
71 71 % lfsize)
72 72 if lfsize is None:
73 73 raise util.Abort(_('minimum size for largefiles must be specified'))
74 74 return lfsize
75 75
76 76 def link(src, dest):
77 77 try:
78 78 util.oslink(src, dest)
79 79 except OSError:
80 80 # if hardlinks fail, fallback on atomic copy
81 81 dst = util.atomictempfile(dest)
82 82 for chunk in util.filechunkiter(open(src, 'rb')):
83 83 dst.write(chunk)
84 84 dst.close()
85 85 os.chmod(dest, os.stat(src).st_mode)
86 86
87 87 def usercachepath(ui, hash):
88 88 path = ui.configpath(longname, 'usercache', None)
89 89 if path:
90 90 path = os.path.join(path, hash)
91 91 else:
92 92 if os.name == 'nt':
93 93 appdata = os.getenv('LOCALAPPDATA', os.getenv('APPDATA'))
94 94 if appdata:
95 95 path = os.path.join(appdata, longname, hash)
96 96 elif platform.system() == 'Darwin':
97 97 home = os.getenv('HOME')
98 98 if home:
99 99 path = os.path.join(home, 'Library', 'Caches',
100 100 longname, hash)
101 101 elif os.name == 'posix':
102 102 path = os.getenv('XDG_CACHE_HOME')
103 103 if path:
104 104 path = os.path.join(path, longname, hash)
105 105 else:
106 106 home = os.getenv('HOME')
107 107 if home:
108 108 path = os.path.join(home, '.cache', longname, hash)
109 109 else:
110 110 raise util.Abort(_('unknown operating system: %s\n') % os.name)
111 111 return path
112 112
113 113 def inusercache(ui, hash):
114 114 path = usercachepath(ui, hash)
115 115 return path and os.path.exists(path)
116 116
117 117 def findfile(repo, hash):
118 118 if instore(repo, hash):
119 119 repo.ui.note(_('Found %s in store\n') % hash)
120 return storepath(repo, hash)
120 121 elif inusercache(repo.ui, hash):
121 122 repo.ui.note(_('Found %s in system cache\n') % hash)
122 123 path = storepath(repo, hash)
123 124 util.makedirs(os.path.dirname(path))
124 125 link(usercachepath(repo.ui, hash), path)
125 else:
126 return None
127 return storepath(repo, hash)
126 return path
127 return None
128 128
129 129 class largefiles_dirstate(dirstate.dirstate):
130 130 def __getitem__(self, key):
131 131 return super(largefiles_dirstate, self).__getitem__(unixpath(key))
132 132 def normal(self, f):
133 133 return super(largefiles_dirstate, self).normal(unixpath(f))
134 134 def remove(self, f):
135 135 return super(largefiles_dirstate, self).remove(unixpath(f))
136 136 def add(self, f):
137 137 return super(largefiles_dirstate, self).add(unixpath(f))
138 138 def drop(self, f):
139 139 return super(largefiles_dirstate, self).drop(unixpath(f))
140 140 def forget(self, f):
141 141 return super(largefiles_dirstate, self).forget(unixpath(f))
142 142 def normallookup(self, f):
143 143 return super(largefiles_dirstate, self).normallookup(unixpath(f))
144 144
145 145 def openlfdirstate(ui, repo):
146 146 '''
147 147 Return a dirstate object that tracks largefiles: i.e. its root is
148 148 the repo root, but it is saved in .hg/largefiles/dirstate.
149 149 '''
150 150 admin = repo.join(longname)
151 151 opener = scmutil.opener(admin)
152 152 lfdirstate = largefiles_dirstate(opener, ui, repo.root,
153 153 repo.dirstate._validate)
154 154
155 155 # If the largefiles dirstate does not exist, populate and create
156 156 # it. This ensures that we create it on the first meaningful
157 157 # largefiles operation in a new clone.
158 158 if not os.path.exists(os.path.join(admin, 'dirstate')):
159 159 util.makedirs(admin)
160 160 matcher = getstandinmatcher(repo)
161 161 for standin in dirstate_walk(repo.dirstate, matcher):
162 162 lfile = splitstandin(standin)
163 163 hash = readstandin(repo, lfile)
164 164 lfdirstate.normallookup(lfile)
165 165 try:
166 166 if hash == hashfile(repo.wjoin(lfile)):
167 167 lfdirstate.normal(lfile)
168 168 except OSError, err:
169 169 if err.errno != errno.ENOENT:
170 170 raise
171 171 return lfdirstate
172 172
173 173 def lfdirstate_status(lfdirstate, repo, rev):
174 174 match = match_.always(repo.root, repo.getcwd())
175 175 s = lfdirstate.status(match, [], False, False, False)
176 176 unsure, modified, added, removed, missing, unknown, ignored, clean = s
177 177 for lfile in unsure:
178 178 if repo[rev][standin(lfile)].data().strip() != \
179 179 hashfile(repo.wjoin(lfile)):
180 180 modified.append(lfile)
181 181 else:
182 182 clean.append(lfile)
183 183 lfdirstate.normal(lfile)
184 184 return (modified, added, removed, missing, unknown, ignored, clean)
185 185
186 186 def listlfiles(repo, rev=None, matcher=None):
187 187 '''return a list of largefiles in the working copy or the
188 188 specified changeset'''
189 189
190 190 if matcher is None:
191 191 matcher = getstandinmatcher(repo)
192 192
193 193 # ignore unknown files in working directory
194 194 return [splitstandin(f)
195 195 for f in repo[rev].walk(matcher)
196 196 if rev is not None or repo.dirstate[f] != '?']
197 197
198 198 def instore(repo, hash):
199 199 return os.path.exists(storepath(repo, hash))
200 200
201 201 def storepath(repo, hash):
202 202 return repo.join(os.path.join(longname, hash))
203 203
204 204 def copyfromcache(repo, hash, filename):
205 205 '''Copy the specified largefile from the repo or system cache to
206 206 filename in the repository. Return true on success or false if the
207 207 file was not found in either cache (which should not happened:
208 208 this is meant to be called only after ensuring that the needed
209 209 largefile exists in the cache).'''
210 210 path = findfile(repo, hash)
211 211 if path is None:
212 212 return False
213 213 util.makedirs(os.path.dirname(repo.wjoin(filename)))
214 214 # The write may fail before the file is fully written, but we
215 215 # don't use atomic writes in the working copy.
216 216 shutil.copy(path, repo.wjoin(filename))
217 217 return True
218 218
219 219 def copytostore(repo, rev, file, uploaded=False):
220 220 hash = readstandin(repo, file)
221 221 if instore(repo, hash):
222 222 return
223 223 copytostoreabsolute(repo, repo.wjoin(file), hash)
224 224
225 225 def copyalltostore(repo, node):
226 226 '''Copy all largefiles in a given revision to the store'''
227 227
228 228 ctx = repo[node]
229 229 for filename in ctx.files():
230 230 if isstandin(filename) and filename in ctx.manifest():
231 231 realfile = splitstandin(filename)
232 232 copytostore(repo, ctx.node(), realfile)
233 233
234 234
235 235 def copytostoreabsolute(repo, file, hash):
236 236 util.makedirs(os.path.dirname(storepath(repo, hash)))
237 237 if inusercache(repo.ui, hash):
238 238 link(usercachepath(repo.ui, hash), storepath(repo, hash))
239 239 else:
240 240 dst = util.atomictempfile(storepath(repo, hash))
241 241 for chunk in util.filechunkiter(open(file, 'rb')):
242 242 dst.write(chunk)
243 243 dst.close()
244 244 util.copymode(file, storepath(repo, hash))
245 245 linktousercache(repo, hash)
246 246
247 247 def linktousercache(repo, hash):
248 248 path = usercachepath(repo.ui, hash)
249 249 if path:
250 250 util.makedirs(os.path.dirname(path))
251 251 link(storepath(repo, hash), path)
252 252
253 253 def getstandinmatcher(repo, pats=[], opts={}):
254 254 '''Return a match object that applies pats to the standin directory'''
255 255 standindir = repo.pathto(shortname)
256 256 if pats:
257 257 # patterns supplied: search standin directory relative to current dir
258 258 cwd = repo.getcwd()
259 259 if os.path.isabs(cwd):
260 260 # cwd is an absolute path for hg -R <reponame>
261 261 # work relative to the repository root in this case
262 262 cwd = ''
263 263 pats = [os.path.join(standindir, cwd, pat) for pat in pats]
264 264 elif os.path.isdir(standindir):
265 265 # no patterns: relative to repo root
266 266 pats = [standindir]
267 267 else:
268 268 # no patterns and no standin dir: return matcher that matches nothing
269 269 match = match_.match(repo.root, None, [], exact=True)
270 270 match.matchfn = lambda f: False
271 271 return match
272 272 return getmatcher(repo, pats, opts, showbad=False)
273 273
274 274 def getmatcher(repo, pats=[], opts={}, showbad=True):
275 275 '''Wrapper around scmutil.match() that adds showbad: if false,
276 276 neuter the match object's bad() method so it does not print any
277 277 warnings about missing files or directories.'''
278 278 match = scmutil.match(repo[None], pats, opts)
279 279
280 280 if not showbad:
281 281 match.bad = lambda f, msg: None
282 282 return match
283 283
284 284 def composestandinmatcher(repo, rmatcher):
285 285 '''Return a matcher that accepts standins corresponding to the
286 286 files accepted by rmatcher. Pass the list of files in the matcher
287 287 as the paths specified by the user.'''
288 288 smatcher = getstandinmatcher(repo, rmatcher.files())
289 289 isstandin = smatcher.matchfn
290 290 def composed_matchfn(f):
291 291 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
292 292 smatcher.matchfn = composed_matchfn
293 293
294 294 return smatcher
295 295
296 296 def standin(filename):
297 297 '''Return the repo-relative path to the standin for the specified big
298 298 file.'''
299 299 # Notes:
300 300 # 1) Most callers want an absolute path, but _create_standin() needs
301 301 # it repo-relative so lfadd() can pass it to repo_add(). So leave
302 302 # it up to the caller to use repo.wjoin() to get an absolute path.
303 303 # 2) Join with '/' because that's what dirstate always uses, even on
304 304 # Windows. Change existing separator to '/' first in case we are
305 305 # passed filenames from an external source (like the command line).
306 306 return shortname + '/' + filename.replace(os.sep, '/')
307 307
308 308 def isstandin(filename):
309 309 '''Return true if filename is a big file standin. filename must be
310 310 in Mercurial's internal form (slash-separated).'''
311 311 return filename.startswith(shortname + '/')
312 312
313 313 def splitstandin(filename):
314 314 # Split on / because that's what dirstate always uses, even on Windows.
315 315 # Change local separator to / first just in case we are passed filenames
316 316 # from an external source (like the command line).
317 317 bits = filename.replace(os.sep, '/').split('/', 1)
318 318 if len(bits) == 2 and bits[0] == shortname:
319 319 return bits[1]
320 320 else:
321 321 return None
322 322
323 323 def updatestandin(repo, standin):
324 324 file = repo.wjoin(splitstandin(standin))
325 325 if os.path.exists(file):
326 326 hash = hashfile(file)
327 327 executable = getexecutable(file)
328 328 writestandin(repo, standin, hash, executable)
329 329
330 330 def readstandin(repo, filename, node=None):
331 331 '''read hex hash from standin for filename at given node, or working
332 332 directory if no node is given'''
333 333 return repo[node][standin(filename)].data().strip()
334 334
335 335 def writestandin(repo, standin, hash, executable):
336 336 '''write hash to <repo.root>/<standin>'''
337 337 writehash(hash, repo.wjoin(standin), executable)
338 338
339 339 def copyandhash(instream, outfile):
340 340 '''Read bytes from instream (iterable) and write them to outfile,
341 341 computing the SHA-1 hash of the data along the way. Close outfile
342 342 when done and return the binary hash.'''
343 343 hasher = util.sha1('')
344 344 for data in instream:
345 345 hasher.update(data)
346 346 outfile.write(data)
347 347
348 348 # Blecch: closing a file that somebody else opened is rude and
349 349 # wrong. But it's so darn convenient and practical! After all,
350 350 # outfile was opened just to copy and hash.
351 351 outfile.close()
352 352
353 353 return hasher.digest()
354 354
355 355 def hashrepofile(repo, file):
356 356 return hashfile(repo.wjoin(file))
357 357
358 358 def hashfile(file):
359 359 if not os.path.exists(file):
360 360 return ''
361 361 hasher = util.sha1('')
362 362 fd = open(file, 'rb')
363 363 for data in blockstream(fd):
364 364 hasher.update(data)
365 365 fd.close()
366 366 return hasher.hexdigest()
367 367
368 368 class limitreader(object):
369 369 def __init__(self, f, limit):
370 370 self.f = f
371 371 self.limit = limit
372 372
373 373 def read(self, length):
374 374 if self.limit == 0:
375 375 return ''
376 376 length = length > self.limit and self.limit or length
377 377 self.limit -= length
378 378 return self.f.read(length)
379 379
380 380 def close(self):
381 381 pass
382 382
383 383 def blockstream(infile, blocksize=128 * 1024):
384 384 """Generator that yields blocks of data from infile and closes infile."""
385 385 while True:
386 386 data = infile.read(blocksize)
387 387 if not data:
388 388 break
389 389 yield data
390 390 # same blecch as copyandhash() above
391 391 infile.close()
392 392
393 393 def readhash(filename):
394 394 rfile = open(filename, 'rb')
395 395 hash = rfile.read(40)
396 396 rfile.close()
397 397 if len(hash) < 40:
398 398 raise util.Abort(_('bad hash in \'%s\' (only %d bytes long)')
399 399 % (filename, len(hash)))
400 400 return hash
401 401
402 402 def writehash(hash, filename, executable):
403 403 util.makedirs(os.path.dirname(filename))
404 404 util.writefile(filename, hash + '\n')
405 405 os.chmod(filename, getmode(executable))
406 406
407 407 def getexecutable(filename):
408 408 mode = os.stat(filename).st_mode
409 409 return ((mode & stat.S_IXUSR) and
410 410 (mode & stat.S_IXGRP) and
411 411 (mode & stat.S_IXOTH))
412 412
413 413 def getmode(executable):
414 414 if executable:
415 415 return 0755
416 416 else:
417 417 return 0644
418 418
419 419 def urljoin(first, second, *arg):
420 420 def join(left, right):
421 421 if not left.endswith('/'):
422 422 left += '/'
423 423 if right.startswith('/'):
424 424 right = right[1:]
425 425 return left + right
426 426
427 427 url = join(first, second)
428 428 for a in arg:
429 429 url = join(url, a)
430 430 return url
431 431
432 432 def hexsha1(data):
433 433 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
434 434 object data"""
435 435 h = util.sha1()
436 436 for chunk in util.filechunkiter(data):
437 437 h.update(chunk)
438 438 return h.hexdigest()
439 439
440 440 def httpsendfile(ui, filename):
441 441 return httpconnection.httpsendfile(ui, filename, 'rb')
442 442
443 443 def unixpath(path):
444 444 '''Return a version of path normalized for use with the lfdirstate.'''
445 445 return os.path.normpath(path).replace(os.sep, '/')
446 446
447 447 def islfilesrepo(repo):
448 448 return ('largefiles' in repo.requirements and
449 449 util.any(shortname + '/' in f[0] for f in repo.store.datafiles()))
450 450
451 451 def mkstemp(repo, prefix):
452 452 '''Returns a file descriptor and a filename corresponding to a temporary
453 453 file in the repo's largefiles store.'''
454 454 path = repo.join(longname)
455 455 util.makedirs(path)
456 456 return tempfile.mkstemp(prefix=prefix, dir=path)
457 457
458 458 class storeprotonotcapable(Exception):
459 459 def __init__(self, storetypes):
460 460 self.storetypes = storetypes
General Comments 0
You need to be logged in to leave comments. Login now