##// END OF EJS Templates
largefiles: add copytostore() fstandin argument to replace readstandin() (API)...
FUJIWARA Katsunori -
r31736:dd2079fa default
parent child Browse files
Show More
@@ -1,677 +1,677 b''
1 # Copyright 2009-2010 Gregory P. Ward
1 # Copyright 2009-2010 Gregory P. Ward
2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 # Copyright 2010-2011 Fog Creek Software
3 # Copyright 2010-2011 Fog Creek Software
4 # Copyright 2010-2011 Unity Technologies
4 # Copyright 2010-2011 Unity Technologies
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 '''largefiles utility code: must not import other modules in this package.'''
9 '''largefiles utility code: must not import other modules in this package.'''
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import copy
12 import copy
13 import hashlib
13 import hashlib
14 import os
14 import os
15 import platform
15 import platform
16 import stat
16 import stat
17
17
18 from mercurial.i18n import _
18 from mercurial.i18n import _
19
19
20 from mercurial import (
20 from mercurial import (
21 dirstate,
21 dirstate,
22 encoding,
22 encoding,
23 error,
23 error,
24 httpconnection,
24 httpconnection,
25 match as matchmod,
25 match as matchmod,
26 node,
26 node,
27 pycompat,
27 pycompat,
28 scmutil,
28 scmutil,
29 util,
29 util,
30 vfs as vfsmod,
30 vfs as vfsmod,
31 )
31 )
32
32
33 shortname = '.hglf'
33 shortname = '.hglf'
34 shortnameslash = shortname + '/'
34 shortnameslash = shortname + '/'
35 longname = 'largefiles'
35 longname = 'largefiles'
36
36
37 # -- Private worker functions ------------------------------------------
37 # -- Private worker functions ------------------------------------------
38
38
39 def getminsize(ui, assumelfiles, opt, default=10):
39 def getminsize(ui, assumelfiles, opt, default=10):
40 lfsize = opt
40 lfsize = opt
41 if not lfsize and assumelfiles:
41 if not lfsize and assumelfiles:
42 lfsize = ui.config(longname, 'minsize', default=default)
42 lfsize = ui.config(longname, 'minsize', default=default)
43 if lfsize:
43 if lfsize:
44 try:
44 try:
45 lfsize = float(lfsize)
45 lfsize = float(lfsize)
46 except ValueError:
46 except ValueError:
47 raise error.Abort(_('largefiles: size must be number (not %s)\n')
47 raise error.Abort(_('largefiles: size must be number (not %s)\n')
48 % lfsize)
48 % lfsize)
49 if lfsize is None:
49 if lfsize is None:
50 raise error.Abort(_('minimum size for largefiles must be specified'))
50 raise error.Abort(_('minimum size for largefiles must be specified'))
51 return lfsize
51 return lfsize
52
52
53 def link(src, dest):
53 def link(src, dest):
54 """Try to create hardlink - if that fails, efficiently make a copy."""
54 """Try to create hardlink - if that fails, efficiently make a copy."""
55 util.makedirs(os.path.dirname(dest))
55 util.makedirs(os.path.dirname(dest))
56 try:
56 try:
57 util.oslink(src, dest)
57 util.oslink(src, dest)
58 except OSError:
58 except OSError:
59 # if hardlinks fail, fallback on atomic copy
59 # if hardlinks fail, fallback on atomic copy
60 with open(src, 'rb') as srcf:
60 with open(src, 'rb') as srcf:
61 with util.atomictempfile(dest) as dstf:
61 with util.atomictempfile(dest) as dstf:
62 for chunk in util.filechunkiter(srcf):
62 for chunk in util.filechunkiter(srcf):
63 dstf.write(chunk)
63 dstf.write(chunk)
64 os.chmod(dest, os.stat(src).st_mode)
64 os.chmod(dest, os.stat(src).st_mode)
65
65
66 def usercachepath(ui, hash):
66 def usercachepath(ui, hash):
67 '''Return the correct location in the "global" largefiles cache for a file
67 '''Return the correct location in the "global" largefiles cache for a file
68 with the given hash.
68 with the given hash.
69 This cache is used for sharing of largefiles across repositories - both
69 This cache is used for sharing of largefiles across repositories - both
70 to preserve download bandwidth and storage space.'''
70 to preserve download bandwidth and storage space.'''
71 return os.path.join(_usercachedir(ui), hash)
71 return os.path.join(_usercachedir(ui), hash)
72
72
73 def _usercachedir(ui):
73 def _usercachedir(ui):
74 '''Return the location of the "global" largefiles cache.'''
74 '''Return the location of the "global" largefiles cache.'''
75 path = ui.configpath(longname, 'usercache', None)
75 path = ui.configpath(longname, 'usercache', None)
76 if path:
76 if path:
77 return path
77 return path
78 if pycompat.osname == 'nt':
78 if pycompat.osname == 'nt':
79 appdata = encoding.environ.get('LOCALAPPDATA',\
79 appdata = encoding.environ.get('LOCALAPPDATA',\
80 encoding.environ.get('APPDATA'))
80 encoding.environ.get('APPDATA'))
81 if appdata:
81 if appdata:
82 return os.path.join(appdata, longname)
82 return os.path.join(appdata, longname)
83 elif platform.system() == 'Darwin':
83 elif platform.system() == 'Darwin':
84 home = encoding.environ.get('HOME')
84 home = encoding.environ.get('HOME')
85 if home:
85 if home:
86 return os.path.join(home, 'Library', 'Caches', longname)
86 return os.path.join(home, 'Library', 'Caches', longname)
87 elif pycompat.osname == 'posix':
87 elif pycompat.osname == 'posix':
88 path = encoding.environ.get('XDG_CACHE_HOME')
88 path = encoding.environ.get('XDG_CACHE_HOME')
89 if path:
89 if path:
90 return os.path.join(path, longname)
90 return os.path.join(path, longname)
91 home = encoding.environ.get('HOME')
91 home = encoding.environ.get('HOME')
92 if home:
92 if home:
93 return os.path.join(home, '.cache', longname)
93 return os.path.join(home, '.cache', longname)
94 else:
94 else:
95 raise error.Abort(_('unknown operating system: %s\n')
95 raise error.Abort(_('unknown operating system: %s\n')
96 % pycompat.osname)
96 % pycompat.osname)
97 raise error.Abort(_('unknown %s usercache location') % longname)
97 raise error.Abort(_('unknown %s usercache location') % longname)
98
98
99 def inusercache(ui, hash):
99 def inusercache(ui, hash):
100 path = usercachepath(ui, hash)
100 path = usercachepath(ui, hash)
101 return os.path.exists(path)
101 return os.path.exists(path)
102
102
103 def findfile(repo, hash):
103 def findfile(repo, hash):
104 '''Return store path of the largefile with the specified hash.
104 '''Return store path of the largefile with the specified hash.
105 As a side effect, the file might be linked from user cache.
105 As a side effect, the file might be linked from user cache.
106 Return None if the file can't be found locally.'''
106 Return None if the file can't be found locally.'''
107 path, exists = findstorepath(repo, hash)
107 path, exists = findstorepath(repo, hash)
108 if exists:
108 if exists:
109 repo.ui.note(_('found %s in store\n') % hash)
109 repo.ui.note(_('found %s in store\n') % hash)
110 return path
110 return path
111 elif inusercache(repo.ui, hash):
111 elif inusercache(repo.ui, hash):
112 repo.ui.note(_('found %s in system cache\n') % hash)
112 repo.ui.note(_('found %s in system cache\n') % hash)
113 path = storepath(repo, hash)
113 path = storepath(repo, hash)
114 link(usercachepath(repo.ui, hash), path)
114 link(usercachepath(repo.ui, hash), path)
115 return path
115 return path
116 return None
116 return None
117
117
118 class largefilesdirstate(dirstate.dirstate):
118 class largefilesdirstate(dirstate.dirstate):
119 def __getitem__(self, key):
119 def __getitem__(self, key):
120 return super(largefilesdirstate, self).__getitem__(unixpath(key))
120 return super(largefilesdirstate, self).__getitem__(unixpath(key))
121 def normal(self, f):
121 def normal(self, f):
122 return super(largefilesdirstate, self).normal(unixpath(f))
122 return super(largefilesdirstate, self).normal(unixpath(f))
123 def remove(self, f):
123 def remove(self, f):
124 return super(largefilesdirstate, self).remove(unixpath(f))
124 return super(largefilesdirstate, self).remove(unixpath(f))
125 def add(self, f):
125 def add(self, f):
126 return super(largefilesdirstate, self).add(unixpath(f))
126 return super(largefilesdirstate, self).add(unixpath(f))
127 def drop(self, f):
127 def drop(self, f):
128 return super(largefilesdirstate, self).drop(unixpath(f))
128 return super(largefilesdirstate, self).drop(unixpath(f))
129 def forget(self, f):
129 def forget(self, f):
130 return super(largefilesdirstate, self).forget(unixpath(f))
130 return super(largefilesdirstate, self).forget(unixpath(f))
131 def normallookup(self, f):
131 def normallookup(self, f):
132 return super(largefilesdirstate, self).normallookup(unixpath(f))
132 return super(largefilesdirstate, self).normallookup(unixpath(f))
133 def _ignore(self, f):
133 def _ignore(self, f):
134 return False
134 return False
135 def write(self, tr=False):
135 def write(self, tr=False):
136 # (1) disable PENDING mode always
136 # (1) disable PENDING mode always
137 # (lfdirstate isn't yet managed as a part of the transaction)
137 # (lfdirstate isn't yet managed as a part of the transaction)
138 # (2) avoid develwarn 'use dirstate.write with ....'
138 # (2) avoid develwarn 'use dirstate.write with ....'
139 super(largefilesdirstate, self).write(None)
139 super(largefilesdirstate, self).write(None)
140
140
141 def openlfdirstate(ui, repo, create=True):
141 def openlfdirstate(ui, repo, create=True):
142 '''
142 '''
143 Return a dirstate object that tracks largefiles: i.e. its root is
143 Return a dirstate object that tracks largefiles: i.e. its root is
144 the repo root, but it is saved in .hg/largefiles/dirstate.
144 the repo root, but it is saved in .hg/largefiles/dirstate.
145 '''
145 '''
146 vfs = repo.vfs
146 vfs = repo.vfs
147 lfstoredir = longname
147 lfstoredir = longname
148 opener = vfsmod.vfs(vfs.join(lfstoredir))
148 opener = vfsmod.vfs(vfs.join(lfstoredir))
149 lfdirstate = largefilesdirstate(opener, ui, repo.root,
149 lfdirstate = largefilesdirstate(opener, ui, repo.root,
150 repo.dirstate._validate)
150 repo.dirstate._validate)
151
151
152 # If the largefiles dirstate does not exist, populate and create
152 # If the largefiles dirstate does not exist, populate and create
153 # it. This ensures that we create it on the first meaningful
153 # it. This ensures that we create it on the first meaningful
154 # largefiles operation in a new clone.
154 # largefiles operation in a new clone.
155 if create and not vfs.exists(vfs.join(lfstoredir, 'dirstate')):
155 if create and not vfs.exists(vfs.join(lfstoredir, 'dirstate')):
156 matcher = getstandinmatcher(repo)
156 matcher = getstandinmatcher(repo)
157 standins = repo.dirstate.walk(matcher, [], False, False)
157 standins = repo.dirstate.walk(matcher, [], False, False)
158
158
159 if len(standins) > 0:
159 if len(standins) > 0:
160 vfs.makedirs(lfstoredir)
160 vfs.makedirs(lfstoredir)
161
161
162 for standin in standins:
162 for standin in standins:
163 lfile = splitstandin(standin)
163 lfile = splitstandin(standin)
164 lfdirstate.normallookup(lfile)
164 lfdirstate.normallookup(lfile)
165 return lfdirstate
165 return lfdirstate
166
166
167 def lfdirstatestatus(lfdirstate, repo):
167 def lfdirstatestatus(lfdirstate, repo):
168 pctx = repo['.']
168 pctx = repo['.']
169 match = matchmod.always(repo.root, repo.getcwd())
169 match = matchmod.always(repo.root, repo.getcwd())
170 unsure, s = lfdirstate.status(match, [], False, False, False)
170 unsure, s = lfdirstate.status(match, [], False, False, False)
171 modified, clean = s.modified, s.clean
171 modified, clean = s.modified, s.clean
172 for lfile in unsure:
172 for lfile in unsure:
173 try:
173 try:
174 fctx = pctx[standin(lfile)]
174 fctx = pctx[standin(lfile)]
175 except LookupError:
175 except LookupError:
176 fctx = None
176 fctx = None
177 if not fctx or fctx.data().strip() != hashfile(repo.wjoin(lfile)):
177 if not fctx or fctx.data().strip() != hashfile(repo.wjoin(lfile)):
178 modified.append(lfile)
178 modified.append(lfile)
179 else:
179 else:
180 clean.append(lfile)
180 clean.append(lfile)
181 lfdirstate.normal(lfile)
181 lfdirstate.normal(lfile)
182 return s
182 return s
183
183
184 def listlfiles(repo, rev=None, matcher=None):
184 def listlfiles(repo, rev=None, matcher=None):
185 '''return a list of largefiles in the working copy or the
185 '''return a list of largefiles in the working copy or the
186 specified changeset'''
186 specified changeset'''
187
187
188 if matcher is None:
188 if matcher is None:
189 matcher = getstandinmatcher(repo)
189 matcher = getstandinmatcher(repo)
190
190
191 # ignore unknown files in working directory
191 # ignore unknown files in working directory
192 return [splitstandin(f)
192 return [splitstandin(f)
193 for f in repo[rev].walk(matcher)
193 for f in repo[rev].walk(matcher)
194 if rev is not None or repo.dirstate[f] != '?']
194 if rev is not None or repo.dirstate[f] != '?']
195
195
196 def instore(repo, hash, forcelocal=False):
196 def instore(repo, hash, forcelocal=False):
197 '''Return true if a largefile with the given hash exists in the store'''
197 '''Return true if a largefile with the given hash exists in the store'''
198 return os.path.exists(storepath(repo, hash, forcelocal))
198 return os.path.exists(storepath(repo, hash, forcelocal))
199
199
200 def storepath(repo, hash, forcelocal=False):
200 def storepath(repo, hash, forcelocal=False):
201 '''Return the correct location in the repository largefiles store for a
201 '''Return the correct location in the repository largefiles store for a
202 file with the given hash.'''
202 file with the given hash.'''
203 if not forcelocal and repo.shared():
203 if not forcelocal and repo.shared():
204 return repo.vfs.reljoin(repo.sharedpath, longname, hash)
204 return repo.vfs.reljoin(repo.sharedpath, longname, hash)
205 return repo.vfs.join(longname, hash)
205 return repo.vfs.join(longname, hash)
206
206
207 def findstorepath(repo, hash):
207 def findstorepath(repo, hash):
208 '''Search through the local store path(s) to find the file for the given
208 '''Search through the local store path(s) to find the file for the given
209 hash. If the file is not found, its path in the primary store is returned.
209 hash. If the file is not found, its path in the primary store is returned.
210 The return value is a tuple of (path, exists(path)).
210 The return value is a tuple of (path, exists(path)).
211 '''
211 '''
212 # For shared repos, the primary store is in the share source. But for
212 # For shared repos, the primary store is in the share source. But for
213 # backward compatibility, force a lookup in the local store if it wasn't
213 # backward compatibility, force a lookup in the local store if it wasn't
214 # found in the share source.
214 # found in the share source.
215 path = storepath(repo, hash, False)
215 path = storepath(repo, hash, False)
216
216
217 if instore(repo, hash):
217 if instore(repo, hash):
218 return (path, True)
218 return (path, True)
219 elif repo.shared() and instore(repo, hash, True):
219 elif repo.shared() and instore(repo, hash, True):
220 return storepath(repo, hash, True), True
220 return storepath(repo, hash, True), True
221
221
222 return (path, False)
222 return (path, False)
223
223
224 def copyfromcache(repo, hash, filename):
224 def copyfromcache(repo, hash, filename):
225 '''Copy the specified largefile from the repo or system cache to
225 '''Copy the specified largefile from the repo or system cache to
226 filename in the repository. Return true on success or false if the
226 filename in the repository. Return true on success or false if the
227 file was not found in either cache (which should not happened:
227 file was not found in either cache (which should not happened:
228 this is meant to be called only after ensuring that the needed
228 this is meant to be called only after ensuring that the needed
229 largefile exists in the cache).'''
229 largefile exists in the cache).'''
230 wvfs = repo.wvfs
230 wvfs = repo.wvfs
231 path = findfile(repo, hash)
231 path = findfile(repo, hash)
232 if path is None:
232 if path is None:
233 return False
233 return False
234 wvfs.makedirs(wvfs.dirname(wvfs.join(filename)))
234 wvfs.makedirs(wvfs.dirname(wvfs.join(filename)))
235 # The write may fail before the file is fully written, but we
235 # The write may fail before the file is fully written, but we
236 # don't use atomic writes in the working copy.
236 # don't use atomic writes in the working copy.
237 with open(path, 'rb') as srcfd:
237 with open(path, 'rb') as srcfd:
238 with wvfs(filename, 'wb') as destfd:
238 with wvfs(filename, 'wb') as destfd:
239 gothash = copyandhash(
239 gothash = copyandhash(
240 util.filechunkiter(srcfd), destfd)
240 util.filechunkiter(srcfd), destfd)
241 if gothash != hash:
241 if gothash != hash:
242 repo.ui.warn(_('%s: data corruption in %s with hash %s\n')
242 repo.ui.warn(_('%s: data corruption in %s with hash %s\n')
243 % (filename, path, gothash))
243 % (filename, path, gothash))
244 wvfs.unlink(filename)
244 wvfs.unlink(filename)
245 return False
245 return False
246 return True
246 return True
247
247
248 def copytostore(repo, revorctx, file, uploaded=False):
248 def copytostore(repo, revorctx, file, fstandin, uploaded=False):
249 wvfs = repo.wvfs
249 wvfs = repo.wvfs
250 hash = readstandin(repo, file, revorctx)
250 hash = readasstandin(repo[revorctx][fstandin])
251 if instore(repo, hash):
251 if instore(repo, hash):
252 return
252 return
253 if wvfs.exists(file):
253 if wvfs.exists(file):
254 copytostoreabsolute(repo, wvfs.join(file), hash)
254 copytostoreabsolute(repo, wvfs.join(file), hash)
255 else:
255 else:
256 repo.ui.warn(_("%s: largefile %s not available from local store\n") %
256 repo.ui.warn(_("%s: largefile %s not available from local store\n") %
257 (file, hash))
257 (file, hash))
258
258
259 def copyalltostore(repo, node):
259 def copyalltostore(repo, node):
260 '''Copy all largefiles in a given revision to the store'''
260 '''Copy all largefiles in a given revision to the store'''
261
261
262 ctx = repo[node]
262 ctx = repo[node]
263 for filename in ctx.files():
263 for filename in ctx.files():
264 realfile = splitstandin(filename)
264 realfile = splitstandin(filename)
265 if realfile is not None and filename in ctx.manifest():
265 if realfile is not None and filename in ctx.manifest():
266 copytostore(repo, ctx, realfile)
266 copytostore(repo, ctx, realfile, filename)
267
267
268 def copytostoreabsolute(repo, file, hash):
268 def copytostoreabsolute(repo, file, hash):
269 if inusercache(repo.ui, hash):
269 if inusercache(repo.ui, hash):
270 link(usercachepath(repo.ui, hash), storepath(repo, hash))
270 link(usercachepath(repo.ui, hash), storepath(repo, hash))
271 else:
271 else:
272 util.makedirs(os.path.dirname(storepath(repo, hash)))
272 util.makedirs(os.path.dirname(storepath(repo, hash)))
273 with open(file, 'rb') as srcf:
273 with open(file, 'rb') as srcf:
274 with util.atomictempfile(storepath(repo, hash),
274 with util.atomictempfile(storepath(repo, hash),
275 createmode=repo.store.createmode) as dstf:
275 createmode=repo.store.createmode) as dstf:
276 for chunk in util.filechunkiter(srcf):
276 for chunk in util.filechunkiter(srcf):
277 dstf.write(chunk)
277 dstf.write(chunk)
278 linktousercache(repo, hash)
278 linktousercache(repo, hash)
279
279
280 def linktousercache(repo, hash):
280 def linktousercache(repo, hash):
281 '''Link / copy the largefile with the specified hash from the store
281 '''Link / copy the largefile with the specified hash from the store
282 to the cache.'''
282 to the cache.'''
283 path = usercachepath(repo.ui, hash)
283 path = usercachepath(repo.ui, hash)
284 link(storepath(repo, hash), path)
284 link(storepath(repo, hash), path)
285
285
286 def getstandinmatcher(repo, rmatcher=None):
286 def getstandinmatcher(repo, rmatcher=None):
287 '''Return a match object that applies rmatcher to the standin directory'''
287 '''Return a match object that applies rmatcher to the standin directory'''
288 wvfs = repo.wvfs
288 wvfs = repo.wvfs
289 standindir = shortname
289 standindir = shortname
290
290
291 # no warnings about missing files or directories
291 # no warnings about missing files or directories
292 badfn = lambda f, msg: None
292 badfn = lambda f, msg: None
293
293
294 if rmatcher and not rmatcher.always():
294 if rmatcher and not rmatcher.always():
295 pats = [wvfs.join(standindir, pat) for pat in rmatcher.files()]
295 pats = [wvfs.join(standindir, pat) for pat in rmatcher.files()]
296 if not pats:
296 if not pats:
297 pats = [wvfs.join(standindir)]
297 pats = [wvfs.join(standindir)]
298 match = scmutil.match(repo[None], pats, badfn=badfn)
298 match = scmutil.match(repo[None], pats, badfn=badfn)
299 # if pats is empty, it would incorrectly always match, so clear _always
299 # if pats is empty, it would incorrectly always match, so clear _always
300 match._always = False
300 match._always = False
301 else:
301 else:
302 # no patterns: relative to repo root
302 # no patterns: relative to repo root
303 match = scmutil.match(repo[None], [wvfs.join(standindir)], badfn=badfn)
303 match = scmutil.match(repo[None], [wvfs.join(standindir)], badfn=badfn)
304 return match
304 return match
305
305
306 def composestandinmatcher(repo, rmatcher):
306 def composestandinmatcher(repo, rmatcher):
307 '''Return a matcher that accepts standins corresponding to the
307 '''Return a matcher that accepts standins corresponding to the
308 files accepted by rmatcher. Pass the list of files in the matcher
308 files accepted by rmatcher. Pass the list of files in the matcher
309 as the paths specified by the user.'''
309 as the paths specified by the user.'''
310 smatcher = getstandinmatcher(repo, rmatcher)
310 smatcher = getstandinmatcher(repo, rmatcher)
311 isstandin = smatcher.matchfn
311 isstandin = smatcher.matchfn
312 def composedmatchfn(f):
312 def composedmatchfn(f):
313 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
313 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
314 smatcher.matchfn = composedmatchfn
314 smatcher.matchfn = composedmatchfn
315
315
316 return smatcher
316 return smatcher
317
317
318 def standin(filename):
318 def standin(filename):
319 '''Return the repo-relative path to the standin for the specified big
319 '''Return the repo-relative path to the standin for the specified big
320 file.'''
320 file.'''
321 # Notes:
321 # Notes:
322 # 1) Some callers want an absolute path, but for instance addlargefiles
322 # 1) Some callers want an absolute path, but for instance addlargefiles
323 # needs it repo-relative so it can be passed to repo[None].add(). So
323 # needs it repo-relative so it can be passed to repo[None].add(). So
324 # leave it up to the caller to use repo.wjoin() to get an absolute path.
324 # leave it up to the caller to use repo.wjoin() to get an absolute path.
325 # 2) Join with '/' because that's what dirstate always uses, even on
325 # 2) Join with '/' because that's what dirstate always uses, even on
326 # Windows. Change existing separator to '/' first in case we are
326 # Windows. Change existing separator to '/' first in case we are
327 # passed filenames from an external source (like the command line).
327 # passed filenames from an external source (like the command line).
328 return shortnameslash + util.pconvert(filename)
328 return shortnameslash + util.pconvert(filename)
329
329
330 def isstandin(filename):
330 def isstandin(filename):
331 '''Return true if filename is a big file standin. filename must be
331 '''Return true if filename is a big file standin. filename must be
332 in Mercurial's internal form (slash-separated).'''
332 in Mercurial's internal form (slash-separated).'''
333 return filename.startswith(shortnameslash)
333 return filename.startswith(shortnameslash)
334
334
335 def splitstandin(filename):
335 def splitstandin(filename):
336 # Split on / because that's what dirstate always uses, even on Windows.
336 # Split on / because that's what dirstate always uses, even on Windows.
337 # Change local separator to / first just in case we are passed filenames
337 # Change local separator to / first just in case we are passed filenames
338 # from an external source (like the command line).
338 # from an external source (like the command line).
339 bits = util.pconvert(filename).split('/', 1)
339 bits = util.pconvert(filename).split('/', 1)
340 if len(bits) == 2 and bits[0] == shortname:
340 if len(bits) == 2 and bits[0] == shortname:
341 return bits[1]
341 return bits[1]
342 else:
342 else:
343 return None
343 return None
344
344
345 def updatestandin(repo, lfile, standin):
345 def updatestandin(repo, lfile, standin):
346 """Re-calculate hash value of lfile and write it into standin
346 """Re-calculate hash value of lfile and write it into standin
347
347
348 This assumes that "lfutil.standin(lfile) == standin", for efficiency.
348 This assumes that "lfutil.standin(lfile) == standin", for efficiency.
349 """
349 """
350 file = repo.wjoin(lfile)
350 file = repo.wjoin(lfile)
351 if repo.wvfs.exists(lfile):
351 if repo.wvfs.exists(lfile):
352 hash = hashfile(file)
352 hash = hashfile(file)
353 executable = getexecutable(file)
353 executable = getexecutable(file)
354 writestandin(repo, standin, hash, executable)
354 writestandin(repo, standin, hash, executable)
355 else:
355 else:
356 raise error.Abort(_('%s: file not found!') % lfile)
356 raise error.Abort(_('%s: file not found!') % lfile)
357
357
358 def readasstandin(fctx):
358 def readasstandin(fctx):
359 '''read hex hash from given filectx of standin file
359 '''read hex hash from given filectx of standin file
360
360
361 This encapsulates how "standin" data is stored into storage layer.'''
361 This encapsulates how "standin" data is stored into storage layer.'''
362 return fctx.data().strip()
362 return fctx.data().strip()
363
363
364 def readstandin(repo, filename, node=None):
364 def readstandin(repo, filename, node=None):
365 '''read hex hash from standin for filename at given node, or working
365 '''read hex hash from standin for filename at given node, or working
366 directory if no node is given'''
366 directory if no node is given'''
367 return readasstandin(repo[node][standin(filename)])
367 return readasstandin(repo[node][standin(filename)])
368
368
369 def writestandin(repo, standin, hash, executable):
369 def writestandin(repo, standin, hash, executable):
370 '''write hash to <repo.root>/<standin>'''
370 '''write hash to <repo.root>/<standin>'''
371 repo.wwrite(standin, hash + '\n', executable and 'x' or '')
371 repo.wwrite(standin, hash + '\n', executable and 'x' or '')
372
372
373 def copyandhash(instream, outfile):
373 def copyandhash(instream, outfile):
374 '''Read bytes from instream (iterable) and write them to outfile,
374 '''Read bytes from instream (iterable) and write them to outfile,
375 computing the SHA-1 hash of the data along the way. Return the hash.'''
375 computing the SHA-1 hash of the data along the way. Return the hash.'''
376 hasher = hashlib.sha1('')
376 hasher = hashlib.sha1('')
377 for data in instream:
377 for data in instream:
378 hasher.update(data)
378 hasher.update(data)
379 outfile.write(data)
379 outfile.write(data)
380 return hasher.hexdigest()
380 return hasher.hexdigest()
381
381
382 def hashfile(file):
382 def hashfile(file):
383 if not os.path.exists(file):
383 if not os.path.exists(file):
384 return ''
384 return ''
385 with open(file, 'rb') as fd:
385 with open(file, 'rb') as fd:
386 return hexsha1(fd)
386 return hexsha1(fd)
387
387
388 def getexecutable(filename):
388 def getexecutable(filename):
389 mode = os.stat(filename).st_mode
389 mode = os.stat(filename).st_mode
390 return ((mode & stat.S_IXUSR) and
390 return ((mode & stat.S_IXUSR) and
391 (mode & stat.S_IXGRP) and
391 (mode & stat.S_IXGRP) and
392 (mode & stat.S_IXOTH))
392 (mode & stat.S_IXOTH))
393
393
394 def urljoin(first, second, *arg):
394 def urljoin(first, second, *arg):
395 def join(left, right):
395 def join(left, right):
396 if not left.endswith('/'):
396 if not left.endswith('/'):
397 left += '/'
397 left += '/'
398 if right.startswith('/'):
398 if right.startswith('/'):
399 right = right[1:]
399 right = right[1:]
400 return left + right
400 return left + right
401
401
402 url = join(first, second)
402 url = join(first, second)
403 for a in arg:
403 for a in arg:
404 url = join(url, a)
404 url = join(url, a)
405 return url
405 return url
406
406
407 def hexsha1(fileobj):
407 def hexsha1(fileobj):
408 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
408 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
409 object data"""
409 object data"""
410 h = hashlib.sha1()
410 h = hashlib.sha1()
411 for chunk in util.filechunkiter(fileobj):
411 for chunk in util.filechunkiter(fileobj):
412 h.update(chunk)
412 h.update(chunk)
413 return h.hexdigest()
413 return h.hexdigest()
414
414
415 def httpsendfile(ui, filename):
415 def httpsendfile(ui, filename):
416 return httpconnection.httpsendfile(ui, filename, 'rb')
416 return httpconnection.httpsendfile(ui, filename, 'rb')
417
417
418 def unixpath(path):
418 def unixpath(path):
419 '''Return a version of path normalized for use with the lfdirstate.'''
419 '''Return a version of path normalized for use with the lfdirstate.'''
420 return util.pconvert(os.path.normpath(path))
420 return util.pconvert(os.path.normpath(path))
421
421
422 def islfilesrepo(repo):
422 def islfilesrepo(repo):
423 '''Return true if the repo is a largefile repo.'''
423 '''Return true if the repo is a largefile repo.'''
424 if ('largefiles' in repo.requirements and
424 if ('largefiles' in repo.requirements and
425 any(shortnameslash in f[0] for f in repo.store.datafiles())):
425 any(shortnameslash in f[0] for f in repo.store.datafiles())):
426 return True
426 return True
427
427
428 return any(openlfdirstate(repo.ui, repo, False))
428 return any(openlfdirstate(repo.ui, repo, False))
429
429
430 class storeprotonotcapable(Exception):
430 class storeprotonotcapable(Exception):
431 def __init__(self, storetypes):
431 def __init__(self, storetypes):
432 self.storetypes = storetypes
432 self.storetypes = storetypes
433
433
434 def getstandinsstate(repo):
434 def getstandinsstate(repo):
435 standins = []
435 standins = []
436 matcher = getstandinmatcher(repo)
436 matcher = getstandinmatcher(repo)
437 wctx = repo[None]
437 wctx = repo[None]
438 for standin in repo.dirstate.walk(matcher, [], False, False):
438 for standin in repo.dirstate.walk(matcher, [], False, False):
439 lfile = splitstandin(standin)
439 lfile = splitstandin(standin)
440 try:
440 try:
441 hash = readasstandin(wctx[standin])
441 hash = readasstandin(wctx[standin])
442 except IOError:
442 except IOError:
443 hash = None
443 hash = None
444 standins.append((lfile, hash))
444 standins.append((lfile, hash))
445 return standins
445 return standins
446
446
447 def synclfdirstate(repo, lfdirstate, lfile, normallookup):
447 def synclfdirstate(repo, lfdirstate, lfile, normallookup):
448 lfstandin = standin(lfile)
448 lfstandin = standin(lfile)
449 if lfstandin in repo.dirstate:
449 if lfstandin in repo.dirstate:
450 stat = repo.dirstate._map[lfstandin]
450 stat = repo.dirstate._map[lfstandin]
451 state, mtime = stat[0], stat[3]
451 state, mtime = stat[0], stat[3]
452 else:
452 else:
453 state, mtime = '?', -1
453 state, mtime = '?', -1
454 if state == 'n':
454 if state == 'n':
455 if (normallookup or mtime < 0 or
455 if (normallookup or mtime < 0 or
456 not repo.wvfs.exists(lfile)):
456 not repo.wvfs.exists(lfile)):
457 # state 'n' doesn't ensure 'clean' in this case
457 # state 'n' doesn't ensure 'clean' in this case
458 lfdirstate.normallookup(lfile)
458 lfdirstate.normallookup(lfile)
459 else:
459 else:
460 lfdirstate.normal(lfile)
460 lfdirstate.normal(lfile)
461 elif state == 'm':
461 elif state == 'm':
462 lfdirstate.normallookup(lfile)
462 lfdirstate.normallookup(lfile)
463 elif state == 'r':
463 elif state == 'r':
464 lfdirstate.remove(lfile)
464 lfdirstate.remove(lfile)
465 elif state == 'a':
465 elif state == 'a':
466 lfdirstate.add(lfile)
466 lfdirstate.add(lfile)
467 elif state == '?':
467 elif state == '?':
468 lfdirstate.drop(lfile)
468 lfdirstate.drop(lfile)
469
469
470 def markcommitted(orig, ctx, node):
470 def markcommitted(orig, ctx, node):
471 repo = ctx.repo()
471 repo = ctx.repo()
472
472
473 orig(node)
473 orig(node)
474
474
475 # ATTENTION: "ctx.files()" may differ from "repo[node].files()"
475 # ATTENTION: "ctx.files()" may differ from "repo[node].files()"
476 # because files coming from the 2nd parent are omitted in the latter.
476 # because files coming from the 2nd parent are omitted in the latter.
477 #
477 #
478 # The former should be used to get targets of "synclfdirstate",
478 # The former should be used to get targets of "synclfdirstate",
479 # because such files:
479 # because such files:
480 # - are marked as "a" by "patch.patch()" (e.g. via transplant), and
480 # - are marked as "a" by "patch.patch()" (e.g. via transplant), and
481 # - have to be marked as "n" after commit, but
481 # - have to be marked as "n" after commit, but
482 # - aren't listed in "repo[node].files()"
482 # - aren't listed in "repo[node].files()"
483
483
484 lfdirstate = openlfdirstate(repo.ui, repo)
484 lfdirstate = openlfdirstate(repo.ui, repo)
485 for f in ctx.files():
485 for f in ctx.files():
486 lfile = splitstandin(f)
486 lfile = splitstandin(f)
487 if lfile is not None:
487 if lfile is not None:
488 synclfdirstate(repo, lfdirstate, lfile, False)
488 synclfdirstate(repo, lfdirstate, lfile, False)
489 lfdirstate.write()
489 lfdirstate.write()
490
490
491 # As part of committing, copy all of the largefiles into the cache.
491 # As part of committing, copy all of the largefiles into the cache.
492 #
492 #
493 # Using "node" instead of "ctx" implies additional "repo[node]"
493 # Using "node" instead of "ctx" implies additional "repo[node]"
494 # lookup while copyalltostore(), but can omit redundant check for
494 # lookup while copyalltostore(), but can omit redundant check for
495 # files comming from the 2nd parent, which should exist in store
495 # files comming from the 2nd parent, which should exist in store
496 # at merging.
496 # at merging.
497 copyalltostore(repo, node)
497 copyalltostore(repo, node)
498
498
499 def getlfilestoupdate(oldstandins, newstandins):
499 def getlfilestoupdate(oldstandins, newstandins):
500 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
500 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
501 filelist = []
501 filelist = []
502 for f in changedstandins:
502 for f in changedstandins:
503 if f[0] not in filelist:
503 if f[0] not in filelist:
504 filelist.append(f[0])
504 filelist.append(f[0])
505 return filelist
505 return filelist
506
506
507 def getlfilestoupload(repo, missing, addfunc):
507 def getlfilestoupload(repo, missing, addfunc):
508 for i, n in enumerate(missing):
508 for i, n in enumerate(missing):
509 repo.ui.progress(_('finding outgoing largefiles'), i,
509 repo.ui.progress(_('finding outgoing largefiles'), i,
510 unit=_('revisions'), total=len(missing))
510 unit=_('revisions'), total=len(missing))
511 parents = [p for p in repo[n].parents() if p != node.nullid]
511 parents = [p for p in repo[n].parents() if p != node.nullid]
512
512
513 oldlfstatus = repo.lfstatus
513 oldlfstatus = repo.lfstatus
514 repo.lfstatus = False
514 repo.lfstatus = False
515 try:
515 try:
516 ctx = repo[n]
516 ctx = repo[n]
517 finally:
517 finally:
518 repo.lfstatus = oldlfstatus
518 repo.lfstatus = oldlfstatus
519
519
520 files = set(ctx.files())
520 files = set(ctx.files())
521 if len(parents) == 2:
521 if len(parents) == 2:
522 mc = ctx.manifest()
522 mc = ctx.manifest()
523 mp1 = ctx.parents()[0].manifest()
523 mp1 = ctx.parents()[0].manifest()
524 mp2 = ctx.parents()[1].manifest()
524 mp2 = ctx.parents()[1].manifest()
525 for f in mp1:
525 for f in mp1:
526 if f not in mc:
526 if f not in mc:
527 files.add(f)
527 files.add(f)
528 for f in mp2:
528 for f in mp2:
529 if f not in mc:
529 if f not in mc:
530 files.add(f)
530 files.add(f)
531 for f in mc:
531 for f in mc:
532 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
532 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
533 files.add(f)
533 files.add(f)
534 for fn in files:
534 for fn in files:
535 if isstandin(fn) and fn in ctx:
535 if isstandin(fn) and fn in ctx:
536 addfunc(fn, ctx[fn].data().strip())
536 addfunc(fn, ctx[fn].data().strip())
537 repo.ui.progress(_('finding outgoing largefiles'), None)
537 repo.ui.progress(_('finding outgoing largefiles'), None)
538
538
539 def updatestandinsbymatch(repo, match):
539 def updatestandinsbymatch(repo, match):
540 '''Update standins in the working directory according to specified match
540 '''Update standins in the working directory according to specified match
541
541
542 This returns (possibly modified) ``match`` object to be used for
542 This returns (possibly modified) ``match`` object to be used for
543 subsequent commit process.
543 subsequent commit process.
544 '''
544 '''
545
545
546 ui = repo.ui
546 ui = repo.ui
547
547
548 # Case 1: user calls commit with no specific files or
548 # Case 1: user calls commit with no specific files or
549 # include/exclude patterns: refresh and commit all files that
549 # include/exclude patterns: refresh and commit all files that
550 # are "dirty".
550 # are "dirty".
551 if match is None or match.always():
551 if match is None or match.always():
552 # Spend a bit of time here to get a list of files we know
552 # Spend a bit of time here to get a list of files we know
553 # are modified so we can compare only against those.
553 # are modified so we can compare only against those.
554 # It can cost a lot of time (several seconds)
554 # It can cost a lot of time (several seconds)
555 # otherwise to update all standins if the largefiles are
555 # otherwise to update all standins if the largefiles are
556 # large.
556 # large.
557 lfdirstate = openlfdirstate(ui, repo)
557 lfdirstate = openlfdirstate(ui, repo)
558 dirtymatch = matchmod.always(repo.root, repo.getcwd())
558 dirtymatch = matchmod.always(repo.root, repo.getcwd())
559 unsure, s = lfdirstate.status(dirtymatch, [], False, False,
559 unsure, s = lfdirstate.status(dirtymatch, [], False, False,
560 False)
560 False)
561 modifiedfiles = unsure + s.modified + s.added + s.removed
561 modifiedfiles = unsure + s.modified + s.added + s.removed
562 lfiles = listlfiles(repo)
562 lfiles = listlfiles(repo)
563 # this only loops through largefiles that exist (not
563 # this only loops through largefiles that exist (not
564 # removed/renamed)
564 # removed/renamed)
565 for lfile in lfiles:
565 for lfile in lfiles:
566 if lfile in modifiedfiles:
566 if lfile in modifiedfiles:
567 fstandin = standin(lfile)
567 fstandin = standin(lfile)
568 if repo.wvfs.exists(fstandin):
568 if repo.wvfs.exists(fstandin):
569 # this handles the case where a rebase is being
569 # this handles the case where a rebase is being
570 # performed and the working copy is not updated
570 # performed and the working copy is not updated
571 # yet.
571 # yet.
572 if repo.wvfs.exists(lfile):
572 if repo.wvfs.exists(lfile):
573 updatestandin(repo, lfile, fstandin)
573 updatestandin(repo, lfile, fstandin)
574
574
575 return match
575 return match
576
576
577 lfiles = listlfiles(repo)
577 lfiles = listlfiles(repo)
578 match._files = repo._subdirlfs(match.files(), lfiles)
578 match._files = repo._subdirlfs(match.files(), lfiles)
579
579
580 # Case 2: user calls commit with specified patterns: refresh
580 # Case 2: user calls commit with specified patterns: refresh
581 # any matching big files.
581 # any matching big files.
582 smatcher = composestandinmatcher(repo, match)
582 smatcher = composestandinmatcher(repo, match)
583 standins = repo.dirstate.walk(smatcher, [], False, False)
583 standins = repo.dirstate.walk(smatcher, [], False, False)
584
584
585 # No matching big files: get out of the way and pass control to
585 # No matching big files: get out of the way and pass control to
586 # the usual commit() method.
586 # the usual commit() method.
587 if not standins:
587 if not standins:
588 return match
588 return match
589
589
590 # Refresh all matching big files. It's possible that the
590 # Refresh all matching big files. It's possible that the
591 # commit will end up failing, in which case the big files will
591 # commit will end up failing, in which case the big files will
592 # stay refreshed. No harm done: the user modified them and
592 # stay refreshed. No harm done: the user modified them and
593 # asked to commit them, so sooner or later we're going to
593 # asked to commit them, so sooner or later we're going to
594 # refresh the standins. Might as well leave them refreshed.
594 # refresh the standins. Might as well leave them refreshed.
595 lfdirstate = openlfdirstate(ui, repo)
595 lfdirstate = openlfdirstate(ui, repo)
596 for fstandin in standins:
596 for fstandin in standins:
597 lfile = splitstandin(fstandin)
597 lfile = splitstandin(fstandin)
598 if lfdirstate[lfile] != 'r':
598 if lfdirstate[lfile] != 'r':
599 updatestandin(repo, lfile, fstandin)
599 updatestandin(repo, lfile, fstandin)
600
600
601 # Cook up a new matcher that only matches regular files or
601 # Cook up a new matcher that only matches regular files or
602 # standins corresponding to the big files requested by the
602 # standins corresponding to the big files requested by the
603 # user. Have to modify _files to prevent commit() from
603 # user. Have to modify _files to prevent commit() from
604 # complaining "not tracked" for big files.
604 # complaining "not tracked" for big files.
605 match = copy.copy(match)
605 match = copy.copy(match)
606 origmatchfn = match.matchfn
606 origmatchfn = match.matchfn
607
607
608 # Check both the list of largefiles and the list of
608 # Check both the list of largefiles and the list of
609 # standins because if a largefile was removed, it
609 # standins because if a largefile was removed, it
610 # won't be in the list of largefiles at this point
610 # won't be in the list of largefiles at this point
611 match._files += sorted(standins)
611 match._files += sorted(standins)
612
612
613 actualfiles = []
613 actualfiles = []
614 for f in match._files:
614 for f in match._files:
615 fstandin = standin(f)
615 fstandin = standin(f)
616
616
617 # For largefiles, only one of the normal and standin should be
617 # For largefiles, only one of the normal and standin should be
618 # committed (except if one of them is a remove). In the case of a
618 # committed (except if one of them is a remove). In the case of a
619 # standin removal, drop the normal file if it is unknown to dirstate.
619 # standin removal, drop the normal file if it is unknown to dirstate.
620 # Thus, skip plain largefile names but keep the standin.
620 # Thus, skip plain largefile names but keep the standin.
621 if f in lfiles or fstandin in standins:
621 if f in lfiles or fstandin in standins:
622 if repo.dirstate[fstandin] != 'r':
622 if repo.dirstate[fstandin] != 'r':
623 if repo.dirstate[f] != 'r':
623 if repo.dirstate[f] != 'r':
624 continue
624 continue
625 elif repo.dirstate[f] == '?':
625 elif repo.dirstate[f] == '?':
626 continue
626 continue
627
627
628 actualfiles.append(f)
628 actualfiles.append(f)
629 match._files = actualfiles
629 match._files = actualfiles
630
630
631 def matchfn(f):
631 def matchfn(f):
632 if origmatchfn(f):
632 if origmatchfn(f):
633 return f not in lfiles
633 return f not in lfiles
634 else:
634 else:
635 return f in standins
635 return f in standins
636
636
637 match.matchfn = matchfn
637 match.matchfn = matchfn
638
638
639 return match
639 return match
640
640
641 class automatedcommithook(object):
641 class automatedcommithook(object):
642 '''Stateful hook to update standins at the 1st commit of resuming
642 '''Stateful hook to update standins at the 1st commit of resuming
643
643
644 For efficiency, updating standins in the working directory should
644 For efficiency, updating standins in the working directory should
645 be avoided while automated committing (like rebase, transplant and
645 be avoided while automated committing (like rebase, transplant and
646 so on), because they should be updated before committing.
646 so on), because they should be updated before committing.
647
647
648 But the 1st commit of resuming automated committing (e.g. ``rebase
648 But the 1st commit of resuming automated committing (e.g. ``rebase
649 --continue``) should update them, because largefiles may be
649 --continue``) should update them, because largefiles may be
650 modified manually.
650 modified manually.
651 '''
651 '''
652 def __init__(self, resuming):
652 def __init__(self, resuming):
653 self.resuming = resuming
653 self.resuming = resuming
654
654
655 def __call__(self, repo, match):
655 def __call__(self, repo, match):
656 if self.resuming:
656 if self.resuming:
657 self.resuming = False # avoids updating at subsequent commits
657 self.resuming = False # avoids updating at subsequent commits
658 return updatestandinsbymatch(repo, match)
658 return updatestandinsbymatch(repo, match)
659 else:
659 else:
660 return match
660 return match
661
661
662 def getstatuswriter(ui, repo, forcibly=None):
662 def getstatuswriter(ui, repo, forcibly=None):
663 '''Return the function to write largefiles specific status out
663 '''Return the function to write largefiles specific status out
664
664
665 If ``forcibly`` is ``None``, this returns the last element of
665 If ``forcibly`` is ``None``, this returns the last element of
666 ``repo._lfstatuswriters`` as "default" writer function.
666 ``repo._lfstatuswriters`` as "default" writer function.
667
667
668 Otherwise, this returns the function to always write out (or
668 Otherwise, this returns the function to always write out (or
669 ignore if ``not forcibly``) status.
669 ignore if ``not forcibly``) status.
670 '''
670 '''
671 if forcibly is None and util.safehasattr(repo, '_largefilesenabled'):
671 if forcibly is None and util.safehasattr(repo, '_largefilesenabled'):
672 return repo._lfstatuswriters[-1]
672 return repo._lfstatuswriters[-1]
673 else:
673 else:
674 if forcibly:
674 if forcibly:
675 return ui.status # forcibly WRITE OUT
675 return ui.status # forcibly WRITE OUT
676 else:
676 else:
677 return lambda *msg, **opts: None # forcibly IGNORE
677 return lambda *msg, **opts: None # forcibly IGNORE
General Comments 0
You need to be logged in to leave comments. Login now