##// END OF EJS Templates
lfs: use sysstr to check for attribute presence...
marmoute -
r51794:6543469a default
parent child Browse files
Show More
@@ -1,789 +1,789 b''
1 # blobstore.py - local and remote (speaking Git-LFS protocol) blob storages
1 # blobstore.py - local and remote (speaking Git-LFS protocol) blob storages
2 #
2 #
3 # Copyright 2017 Facebook, Inc.
3 # Copyright 2017 Facebook, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import contextlib
9 import contextlib
10 import errno
10 import errno
11 import hashlib
11 import hashlib
12 import json
12 import json
13 import os
13 import os
14 import re
14 import re
15 import socket
15 import socket
16
16
17 from mercurial.i18n import _
17 from mercurial.i18n import _
18 from mercurial.pycompat import getattr
18 from mercurial.pycompat import getattr
19 from mercurial.node import hex
19 from mercurial.node import hex
20
20
21 from mercurial import (
21 from mercurial import (
22 encoding,
22 encoding,
23 error,
23 error,
24 httpconnection as httpconnectionmod,
24 httpconnection as httpconnectionmod,
25 pathutil,
25 pathutil,
26 pycompat,
26 pycompat,
27 url as urlmod,
27 url as urlmod,
28 util,
28 util,
29 vfs as vfsmod,
29 vfs as vfsmod,
30 worker,
30 worker,
31 )
31 )
32
32
33 from mercurial.utils import (
33 from mercurial.utils import (
34 stringutil,
34 stringutil,
35 urlutil,
35 urlutil,
36 )
36 )
37
37
38 from ..largefiles import lfutil
38 from ..largefiles import lfutil
39
39
40 # 64 bytes for SHA256
40 # 64 bytes for SHA256
41 _lfsre = re.compile(br'\A[a-f0-9]{64}\Z')
41 _lfsre = re.compile(br'\A[a-f0-9]{64}\Z')
42
42
43
43
44 class lfsvfs(vfsmod.vfs):
44 class lfsvfs(vfsmod.vfs):
45 def join(self, path):
45 def join(self, path):
46 """split the path at first two characters, like: XX/XXXXX..."""
46 """split the path at first two characters, like: XX/XXXXX..."""
47 if not _lfsre.match(path):
47 if not _lfsre.match(path):
48 raise error.ProgrammingError(b'unexpected lfs path: %s' % path)
48 raise error.ProgrammingError(b'unexpected lfs path: %s' % path)
49 return super(lfsvfs, self).join(path[0:2], path[2:])
49 return super(lfsvfs, self).join(path[0:2], path[2:])
50
50
51 def walk(self, path=None, onerror=None):
51 def walk(self, path=None, onerror=None):
52 """Yield (dirpath, [], oids) tuple for blobs under path
52 """Yield (dirpath, [], oids) tuple for blobs under path
53
53
54 Oids only exist in the root of this vfs, so dirpath is always ''.
54 Oids only exist in the root of this vfs, so dirpath is always ''.
55 """
55 """
56 root = os.path.normpath(self.base)
56 root = os.path.normpath(self.base)
57 # when dirpath == root, dirpath[prefixlen:] becomes empty
57 # when dirpath == root, dirpath[prefixlen:] becomes empty
58 # because len(dirpath) < prefixlen.
58 # because len(dirpath) < prefixlen.
59 prefixlen = len(pathutil.normasprefix(root))
59 prefixlen = len(pathutil.normasprefix(root))
60 oids = []
60 oids = []
61
61
62 for dirpath, dirs, files in os.walk(
62 for dirpath, dirs, files in os.walk(
63 self.reljoin(self.base, path or b''), onerror=onerror
63 self.reljoin(self.base, path or b''), onerror=onerror
64 ):
64 ):
65 dirpath = dirpath[prefixlen:]
65 dirpath = dirpath[prefixlen:]
66
66
67 # Silently skip unexpected files and directories
67 # Silently skip unexpected files and directories
68 if len(dirpath) == 2:
68 if len(dirpath) == 2:
69 oids.extend(
69 oids.extend(
70 [dirpath + f for f in files if _lfsre.match(dirpath + f)]
70 [dirpath + f for f in files if _lfsre.match(dirpath + f)]
71 )
71 )
72
72
73 yield (b'', [], oids)
73 yield (b'', [], oids)
74
74
75
75
76 class nullvfs(lfsvfs):
76 class nullvfs(lfsvfs):
77 def __init__(self):
77 def __init__(self):
78 pass
78 pass
79
79
80 def exists(self, oid):
80 def exists(self, oid):
81 return False
81 return False
82
82
83 def read(self, oid):
83 def read(self, oid):
84 # store.read() calls into here if the blob doesn't exist in its
84 # store.read() calls into here if the blob doesn't exist in its
85 # self.vfs. Raise the same error as a normal vfs when asked to read a
85 # self.vfs. Raise the same error as a normal vfs when asked to read a
86 # file that doesn't exist. The only difference is the full file path
86 # file that doesn't exist. The only difference is the full file path
87 # isn't available in the error.
87 # isn't available in the error.
88 raise IOError(
88 raise IOError(
89 errno.ENOENT,
89 errno.ENOENT,
90 pycompat.sysstr(b'%s: No such file or directory' % oid),
90 pycompat.sysstr(b'%s: No such file or directory' % oid),
91 )
91 )
92
92
93 def walk(self, path=None, onerror=None):
93 def walk(self, path=None, onerror=None):
94 return (b'', [], [])
94 return (b'', [], [])
95
95
96 def write(self, oid, data):
96 def write(self, oid, data):
97 pass
97 pass
98
98
99
99
100 class lfsuploadfile(httpconnectionmod.httpsendfile):
100 class lfsuploadfile(httpconnectionmod.httpsendfile):
101 """a file-like object that supports keepalive."""
101 """a file-like object that supports keepalive."""
102
102
103 def __init__(self, ui, filename):
103 def __init__(self, ui, filename):
104 super(lfsuploadfile, self).__init__(ui, filename, b'rb')
104 super(lfsuploadfile, self).__init__(ui, filename, b'rb')
105 self.read = self._data.read
105 self.read = self._data.read
106
106
107 def _makeprogress(self):
107 def _makeprogress(self):
108 return None # progress is handled by the worker client
108 return None # progress is handled by the worker client
109
109
110
110
111 class local:
111 class local:
112 """Local blobstore for large file contents.
112 """Local blobstore for large file contents.
113
113
114 This blobstore is used both as a cache and as a staging area for large blobs
114 This blobstore is used both as a cache and as a staging area for large blobs
115 to be uploaded to the remote blobstore.
115 to be uploaded to the remote blobstore.
116 """
116 """
117
117
118 def __init__(self, repo):
118 def __init__(self, repo):
119 fullpath = repo.svfs.join(b'lfs/objects')
119 fullpath = repo.svfs.join(b'lfs/objects')
120 self.vfs = lfsvfs(fullpath)
120 self.vfs = lfsvfs(fullpath)
121
121
122 if repo.ui.configbool(b'experimental', b'lfs.disableusercache'):
122 if repo.ui.configbool(b'experimental', b'lfs.disableusercache'):
123 self.cachevfs = nullvfs()
123 self.cachevfs = nullvfs()
124 else:
124 else:
125 usercache = lfutil._usercachedir(repo.ui, b'lfs')
125 usercache = lfutil._usercachedir(repo.ui, b'lfs')
126 self.cachevfs = lfsvfs(usercache)
126 self.cachevfs = lfsvfs(usercache)
127 self.ui = repo.ui
127 self.ui = repo.ui
128
128
129 def open(self, oid):
129 def open(self, oid):
130 """Open a read-only file descriptor to the named blob, in either the
130 """Open a read-only file descriptor to the named blob, in either the
131 usercache or the local store."""
131 usercache or the local store."""
132 return open(self.path(oid), 'rb')
132 return open(self.path(oid), 'rb')
133
133
134 def path(self, oid):
134 def path(self, oid):
135 """Build the path for the given blob ``oid``.
135 """Build the path for the given blob ``oid``.
136
136
137 If the blob exists locally, the path may point to either the usercache
137 If the blob exists locally, the path may point to either the usercache
138 or the local store. If it doesn't, it will point to the local store.
138 or the local store. If it doesn't, it will point to the local store.
139 This is meant for situations where existing code that isn't LFS aware
139 This is meant for situations where existing code that isn't LFS aware
140 needs to open a blob. Generally, prefer the ``open`` method on this
140 needs to open a blob. Generally, prefer the ``open`` method on this
141 class.
141 class.
142 """
142 """
143 # The usercache is the most likely place to hold the file. Commit will
143 # The usercache is the most likely place to hold the file. Commit will
144 # write to both it and the local store, as will anything that downloads
144 # write to both it and the local store, as will anything that downloads
145 # the blobs. However, things like clone without an update won't
145 # the blobs. However, things like clone without an update won't
146 # populate the local store. For an init + push of a local clone,
146 # populate the local store. For an init + push of a local clone,
147 # the usercache is the only place it _could_ be. If not present, the
147 # the usercache is the only place it _could_ be. If not present, the
148 # missing file msg here will indicate the local repo, not the usercache.
148 # missing file msg here will indicate the local repo, not the usercache.
149 if self.cachevfs.exists(oid):
149 if self.cachevfs.exists(oid):
150 return self.cachevfs.join(oid)
150 return self.cachevfs.join(oid)
151
151
152 return self.vfs.join(oid)
152 return self.vfs.join(oid)
153
153
154 def download(self, oid, src, content_length):
154 def download(self, oid, src, content_length):
155 """Read the blob from the remote source in chunks, verify the content,
155 """Read the blob from the remote source in chunks, verify the content,
156 and write to this local blobstore."""
156 and write to this local blobstore."""
157 sha256 = hashlib.sha256()
157 sha256 = hashlib.sha256()
158 size = 0
158 size = 0
159
159
160 with self.vfs(oid, b'wb', atomictemp=True) as fp:
160 with self.vfs(oid, b'wb', atomictemp=True) as fp:
161 for chunk in util.filechunkiter(src, size=1048576):
161 for chunk in util.filechunkiter(src, size=1048576):
162 fp.write(chunk)
162 fp.write(chunk)
163 sha256.update(chunk)
163 sha256.update(chunk)
164 size += len(chunk)
164 size += len(chunk)
165
165
166 # If the server advertised a length longer than what we actually
166 # If the server advertised a length longer than what we actually
167 # received, then we should expect that the server crashed while
167 # received, then we should expect that the server crashed while
168 # producing the response (but the server has no way of telling us
168 # producing the response (but the server has no way of telling us
169 # that), and we really don't need to try to write the response to
169 # that), and we really don't need to try to write the response to
170 # the localstore, because it's not going to match the expected.
170 # the localstore, because it's not going to match the expected.
171 # The server also uses this method to store data uploaded by the
171 # The server also uses this method to store data uploaded by the
172 # client, so if this happens on the server side, it's possible
172 # client, so if this happens on the server side, it's possible
173 # that the client crashed or an antivirus interfered with the
173 # that the client crashed or an antivirus interfered with the
174 # upload.
174 # upload.
175 if content_length is not None and int(content_length) != size:
175 if content_length is not None and int(content_length) != size:
176 msg = (
176 msg = (
177 b"Response length (%d) does not match Content-Length "
177 b"Response length (%d) does not match Content-Length "
178 b"header (%d) for %s"
178 b"header (%d) for %s"
179 )
179 )
180 raise LfsRemoteError(_(msg) % (size, int(content_length), oid))
180 raise LfsRemoteError(_(msg) % (size, int(content_length), oid))
181
181
182 realoid = hex(sha256.digest())
182 realoid = hex(sha256.digest())
183 if realoid != oid:
183 if realoid != oid:
184 raise LfsCorruptionError(
184 raise LfsCorruptionError(
185 _(b'corrupt remote lfs object: %s') % oid
185 _(b'corrupt remote lfs object: %s') % oid
186 )
186 )
187
187
188 self._linktousercache(oid)
188 self._linktousercache(oid)
189
189
190 def write(self, oid, data):
190 def write(self, oid, data):
191 """Write blob to local blobstore.
191 """Write blob to local blobstore.
192
192
193 This should only be called from the filelog during a commit or similar.
193 This should only be called from the filelog during a commit or similar.
194 As such, there is no need to verify the data. Imports from a remote
194 As such, there is no need to verify the data. Imports from a remote
195 store must use ``download()`` instead."""
195 store must use ``download()`` instead."""
196 with self.vfs(oid, b'wb', atomictemp=True) as fp:
196 with self.vfs(oid, b'wb', atomictemp=True) as fp:
197 fp.write(data)
197 fp.write(data)
198
198
199 self._linktousercache(oid)
199 self._linktousercache(oid)
200
200
201 def linkfromusercache(self, oid):
201 def linkfromusercache(self, oid):
202 """Link blobs found in the user cache into this store.
202 """Link blobs found in the user cache into this store.
203
203
204 The server module needs to do this when it lets the client know not to
204 The server module needs to do this when it lets the client know not to
205 upload the blob, to ensure it is always available in this store.
205 upload the blob, to ensure it is always available in this store.
206 Normally this is done implicitly when the client reads or writes the
206 Normally this is done implicitly when the client reads or writes the
207 blob, but that doesn't happen when the server tells the client that it
207 blob, but that doesn't happen when the server tells the client that it
208 already has the blob.
208 already has the blob.
209 """
209 """
210 if not isinstance(self.cachevfs, nullvfs) and not self.vfs.exists(oid):
210 if not isinstance(self.cachevfs, nullvfs) and not self.vfs.exists(oid):
211 self.ui.note(_(b'lfs: found %s in the usercache\n') % oid)
211 self.ui.note(_(b'lfs: found %s in the usercache\n') % oid)
212 lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
212 lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
213
213
214 def _linktousercache(self, oid):
214 def _linktousercache(self, oid):
215 # XXX: should we verify the content of the cache, and hardlink back to
215 # XXX: should we verify the content of the cache, and hardlink back to
216 # the local store on success, but truncate, write and link on failure?
216 # the local store on success, but truncate, write and link on failure?
217 if not self.cachevfs.exists(oid) and not isinstance(
217 if not self.cachevfs.exists(oid) and not isinstance(
218 self.cachevfs, nullvfs
218 self.cachevfs, nullvfs
219 ):
219 ):
220 self.ui.note(_(b'lfs: adding %s to the usercache\n') % oid)
220 self.ui.note(_(b'lfs: adding %s to the usercache\n') % oid)
221 lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
221 lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
222
222
223 def read(self, oid, verify=True):
223 def read(self, oid, verify=True):
224 """Read blob from local blobstore."""
224 """Read blob from local blobstore."""
225 if not self.vfs.exists(oid):
225 if not self.vfs.exists(oid):
226 blob = self._read(self.cachevfs, oid, verify)
226 blob = self._read(self.cachevfs, oid, verify)
227
227
228 # Even if revlog will verify the content, it needs to be verified
228 # Even if revlog will verify the content, it needs to be verified
229 # now before making the hardlink to avoid propagating corrupt blobs.
229 # now before making the hardlink to avoid propagating corrupt blobs.
230 # Don't abort if corruption is detected, because `hg verify` will
230 # Don't abort if corruption is detected, because `hg verify` will
231 # give more useful info about the corruption- simply don't add the
231 # give more useful info about the corruption- simply don't add the
232 # hardlink.
232 # hardlink.
233 if verify or hex(hashlib.sha256(blob).digest()) == oid:
233 if verify or hex(hashlib.sha256(blob).digest()) == oid:
234 self.ui.note(_(b'lfs: found %s in the usercache\n') % oid)
234 self.ui.note(_(b'lfs: found %s in the usercache\n') % oid)
235 lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
235 lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
236 else:
236 else:
237 self.ui.note(_(b'lfs: found %s in the local lfs store\n') % oid)
237 self.ui.note(_(b'lfs: found %s in the local lfs store\n') % oid)
238 blob = self._read(self.vfs, oid, verify)
238 blob = self._read(self.vfs, oid, verify)
239 return blob
239 return blob
240
240
241 def _read(self, vfs, oid, verify):
241 def _read(self, vfs, oid, verify):
242 """Read blob (after verifying) from the given store"""
242 """Read blob (after verifying) from the given store"""
243 blob = vfs.read(oid)
243 blob = vfs.read(oid)
244 if verify:
244 if verify:
245 _verify(oid, blob)
245 _verify(oid, blob)
246 return blob
246 return blob
247
247
248 def verify(self, oid):
248 def verify(self, oid):
249 """Indicate whether or not the hash of the underlying file matches its
249 """Indicate whether or not the hash of the underlying file matches its
250 name."""
250 name."""
251 sha256 = hashlib.sha256()
251 sha256 = hashlib.sha256()
252
252
253 with self.open(oid) as fp:
253 with self.open(oid) as fp:
254 for chunk in util.filechunkiter(fp, size=1048576):
254 for chunk in util.filechunkiter(fp, size=1048576):
255 sha256.update(chunk)
255 sha256.update(chunk)
256
256
257 return oid == hex(sha256.digest())
257 return oid == hex(sha256.digest())
258
258
259 def has(self, oid):
259 def has(self, oid):
260 """Returns True if the local blobstore contains the requested blob,
260 """Returns True if the local blobstore contains the requested blob,
261 False otherwise."""
261 False otherwise."""
262 return self.cachevfs.exists(oid) or self.vfs.exists(oid)
262 return self.cachevfs.exists(oid) or self.vfs.exists(oid)
263
263
264
264
265 def _urlerrorreason(urlerror):
265 def _urlerrorreason(urlerror):
266 """Create a friendly message for the given URLError to be used in an
266 """Create a friendly message for the given URLError to be used in an
267 LfsRemoteError message.
267 LfsRemoteError message.
268 """
268 """
269 inst = urlerror
269 inst = urlerror
270
270
271 if isinstance(urlerror.reason, Exception):
271 if isinstance(urlerror.reason, Exception):
272 inst = urlerror.reason
272 inst = urlerror.reason
273
273
274 if util.safehasattr(inst, b'reason'):
274 if util.safehasattr(inst, 'reason'):
275 try: # usually it is in the form (errno, strerror)
275 try: # usually it is in the form (errno, strerror)
276 reason = inst.reason.args[1]
276 reason = inst.reason.args[1]
277 except (AttributeError, IndexError):
277 except (AttributeError, IndexError):
278 # it might be anything, for example a string
278 # it might be anything, for example a string
279 reason = inst.reason
279 reason = inst.reason
280 if isinstance(reason, str):
280 if isinstance(reason, str):
281 # SSLError of Python 2.7.9 contains a unicode
281 # SSLError of Python 2.7.9 contains a unicode
282 reason = encoding.unitolocal(reason)
282 reason = encoding.unitolocal(reason)
283 return reason
283 return reason
284 elif getattr(inst, "strerror", None):
284 elif getattr(inst, "strerror", None):
285 return encoding.strtolocal(inst.strerror)
285 return encoding.strtolocal(inst.strerror)
286 else:
286 else:
287 return stringutil.forcebytestr(urlerror)
287 return stringutil.forcebytestr(urlerror)
288
288
289
289
290 class lfsauthhandler(util.urlreq.basehandler):
290 class lfsauthhandler(util.urlreq.basehandler):
291 handler_order = 480 # Before HTTPDigestAuthHandler (== 490)
291 handler_order = 480 # Before HTTPDigestAuthHandler (== 490)
292
292
293 def http_error_401(self, req, fp, code, msg, headers):
293 def http_error_401(self, req, fp, code, msg, headers):
294 """Enforces that any authentication performed is HTTP Basic
294 """Enforces that any authentication performed is HTTP Basic
295 Authentication. No authentication is also acceptable.
295 Authentication. No authentication is also acceptable.
296 """
296 """
297 authreq = headers.get('www-authenticate', None)
297 authreq = headers.get('www-authenticate', None)
298 if authreq:
298 if authreq:
299 scheme = authreq.split()[0]
299 scheme = authreq.split()[0]
300
300
301 if scheme.lower() != 'basic':
301 if scheme.lower() != 'basic':
302 msg = _(b'the server must support Basic Authentication')
302 msg = _(b'the server must support Basic Authentication')
303 raise util.urlerr.httperror(
303 raise util.urlerr.httperror(
304 req.get_full_url(),
304 req.get_full_url(),
305 code,
305 code,
306 encoding.strfromlocal(msg),
306 encoding.strfromlocal(msg),
307 headers,
307 headers,
308 fp,
308 fp,
309 )
309 )
310 return None
310 return None
311
311
312
312
313 class _gitlfsremote:
313 class _gitlfsremote:
314 def __init__(self, repo, url):
314 def __init__(self, repo, url):
315 ui = repo.ui
315 ui = repo.ui
316 self.ui = ui
316 self.ui = ui
317 baseurl, authinfo = url.authinfo()
317 baseurl, authinfo = url.authinfo()
318 self.baseurl = baseurl.rstrip(b'/')
318 self.baseurl = baseurl.rstrip(b'/')
319 useragent = repo.ui.config(b'experimental', b'lfs.user-agent')
319 useragent = repo.ui.config(b'experimental', b'lfs.user-agent')
320 if not useragent:
320 if not useragent:
321 useragent = b'git-lfs/2.3.4 (Mercurial %s)' % util.version()
321 useragent = b'git-lfs/2.3.4 (Mercurial %s)' % util.version()
322 self.urlopener = urlmod.opener(ui, authinfo, useragent)
322 self.urlopener = urlmod.opener(ui, authinfo, useragent)
323 self.urlopener.add_handler(lfsauthhandler())
323 self.urlopener.add_handler(lfsauthhandler())
324 self.retry = ui.configint(b'lfs', b'retry')
324 self.retry = ui.configint(b'lfs', b'retry')
325
325
326 def writebatch(self, pointers, fromstore):
326 def writebatch(self, pointers, fromstore):
327 """Batch upload from local to remote blobstore."""
327 """Batch upload from local to remote blobstore."""
328 self._batch(_deduplicate(pointers), fromstore, b'upload')
328 self._batch(_deduplicate(pointers), fromstore, b'upload')
329
329
330 def readbatch(self, pointers, tostore):
330 def readbatch(self, pointers, tostore):
331 """Batch download from remote to local blostore."""
331 """Batch download from remote to local blostore."""
332 self._batch(_deduplicate(pointers), tostore, b'download')
332 self._batch(_deduplicate(pointers), tostore, b'download')
333
333
334 def _batchrequest(self, pointers, action):
334 def _batchrequest(self, pointers, action):
335 """Get metadata about objects pointed by pointers for given action
335 """Get metadata about objects pointed by pointers for given action
336
336
337 Return decoded JSON object like {'objects': [{'oid': '', 'size': 1}]}
337 Return decoded JSON object like {'objects': [{'oid': '', 'size': 1}]}
338 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/batch.md
338 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/batch.md
339 """
339 """
340 objects = [
340 objects = [
341 {'oid': pycompat.strurl(p.oid()), 'size': p.size()}
341 {'oid': pycompat.strurl(p.oid()), 'size': p.size()}
342 for p in pointers
342 for p in pointers
343 ]
343 ]
344 requestdata = pycompat.bytesurl(
344 requestdata = pycompat.bytesurl(
345 json.dumps(
345 json.dumps(
346 {
346 {
347 'objects': objects,
347 'objects': objects,
348 'operation': pycompat.strurl(action),
348 'operation': pycompat.strurl(action),
349 }
349 }
350 )
350 )
351 )
351 )
352 url = b'%s/objects/batch' % self.baseurl
352 url = b'%s/objects/batch' % self.baseurl
353 batchreq = util.urlreq.request(pycompat.strurl(url), data=requestdata)
353 batchreq = util.urlreq.request(pycompat.strurl(url), data=requestdata)
354 batchreq.add_header('Accept', 'application/vnd.git-lfs+json')
354 batchreq.add_header('Accept', 'application/vnd.git-lfs+json')
355 batchreq.add_header('Content-Type', 'application/vnd.git-lfs+json')
355 batchreq.add_header('Content-Type', 'application/vnd.git-lfs+json')
356 try:
356 try:
357 with contextlib.closing(self.urlopener.open(batchreq)) as rsp:
357 with contextlib.closing(self.urlopener.open(batchreq)) as rsp:
358 rawjson = rsp.read()
358 rawjson = rsp.read()
359 except util.urlerr.httperror as ex:
359 except util.urlerr.httperror as ex:
360 hints = {
360 hints = {
361 400: _(
361 400: _(
362 b'check that lfs serving is enabled on %s and "%s" is '
362 b'check that lfs serving is enabled on %s and "%s" is '
363 b'supported'
363 b'supported'
364 )
364 )
365 % (self.baseurl, action),
365 % (self.baseurl, action),
366 404: _(b'the "lfs.url" config may be used to override %s')
366 404: _(b'the "lfs.url" config may be used to override %s')
367 % self.baseurl,
367 % self.baseurl,
368 }
368 }
369 hint = hints.get(ex.code, _(b'api=%s, action=%s') % (url, action))
369 hint = hints.get(ex.code, _(b'api=%s, action=%s') % (url, action))
370 raise LfsRemoteError(
370 raise LfsRemoteError(
371 _(b'LFS HTTP error: %s') % stringutil.forcebytestr(ex),
371 _(b'LFS HTTP error: %s') % stringutil.forcebytestr(ex),
372 hint=hint,
372 hint=hint,
373 )
373 )
374 except util.urlerr.urlerror as ex:
374 except util.urlerr.urlerror as ex:
375 hint = (
375 hint = (
376 _(b'the "lfs.url" config may be used to override %s')
376 _(b'the "lfs.url" config may be used to override %s')
377 % self.baseurl
377 % self.baseurl
378 )
378 )
379 raise LfsRemoteError(
379 raise LfsRemoteError(
380 _(b'LFS error: %s') % _urlerrorreason(ex), hint=hint
380 _(b'LFS error: %s') % _urlerrorreason(ex), hint=hint
381 )
381 )
382 try:
382 try:
383 response = pycompat.json_loads(rawjson)
383 response = pycompat.json_loads(rawjson)
384 except ValueError:
384 except ValueError:
385 raise LfsRemoteError(
385 raise LfsRemoteError(
386 _(b'LFS server returns invalid JSON: %s')
386 _(b'LFS server returns invalid JSON: %s')
387 % rawjson.encode("utf-8")
387 % rawjson.encode("utf-8")
388 )
388 )
389
389
390 if self.ui.debugflag:
390 if self.ui.debugflag:
391 self.ui.debug(b'Status: %d\n' % rsp.status)
391 self.ui.debug(b'Status: %d\n' % rsp.status)
392 # lfs-test-server and hg serve return headers in different order
392 # lfs-test-server and hg serve return headers in different order
393 headers = pycompat.bytestr(rsp.info()).strip()
393 headers = pycompat.bytestr(rsp.info()).strip()
394 self.ui.debug(b'%s\n' % b'\n'.join(sorted(headers.splitlines())))
394 self.ui.debug(b'%s\n' % b'\n'.join(sorted(headers.splitlines())))
395
395
396 if 'objects' in response:
396 if 'objects' in response:
397 response['objects'] = sorted(
397 response['objects'] = sorted(
398 response['objects'], key=lambda p: p['oid']
398 response['objects'], key=lambda p: p['oid']
399 )
399 )
400 self.ui.debug(
400 self.ui.debug(
401 b'%s\n'
401 b'%s\n'
402 % pycompat.bytesurl(
402 % pycompat.bytesurl(
403 json.dumps(
403 json.dumps(
404 response,
404 response,
405 indent=2,
405 indent=2,
406 separators=('', ': '),
406 separators=('', ': '),
407 sort_keys=True,
407 sort_keys=True,
408 )
408 )
409 )
409 )
410 )
410 )
411
411
412 def encodestr(x):
412 def encodestr(x):
413 if isinstance(x, str):
413 if isinstance(x, str):
414 return x.encode('utf-8')
414 return x.encode('utf-8')
415 return x
415 return x
416
416
417 return pycompat.rapply(encodestr, response)
417 return pycompat.rapply(encodestr, response)
418
418
419 def _checkforservererror(self, pointers, responses, action):
419 def _checkforservererror(self, pointers, responses, action):
420 """Scans errors from objects
420 """Scans errors from objects
421
421
422 Raises LfsRemoteError if any objects have an error"""
422 Raises LfsRemoteError if any objects have an error"""
423 for response in responses:
423 for response in responses:
424 # The server should return 404 when objects cannot be found. Some
424 # The server should return 404 when objects cannot be found. Some
425 # server implementation (ex. lfs-test-server) does not set "error"
425 # server implementation (ex. lfs-test-server) does not set "error"
426 # but just removes "download" from "actions". Treat that case
426 # but just removes "download" from "actions". Treat that case
427 # as the same as 404 error.
427 # as the same as 404 error.
428 if b'error' not in response:
428 if b'error' not in response:
429 if action == b'download' and action not in response.get(
429 if action == b'download' and action not in response.get(
430 b'actions', []
430 b'actions', []
431 ):
431 ):
432 code = 404
432 code = 404
433 else:
433 else:
434 continue
434 continue
435 else:
435 else:
436 # An error dict without a code doesn't make much sense, so
436 # An error dict without a code doesn't make much sense, so
437 # treat as a server error.
437 # treat as a server error.
438 code = response.get(b'error').get(b'code', 500)
438 code = response.get(b'error').get(b'code', 500)
439
439
440 ptrmap = {p.oid(): p for p in pointers}
440 ptrmap = {p.oid(): p for p in pointers}
441 p = ptrmap.get(response[b'oid'], None)
441 p = ptrmap.get(response[b'oid'], None)
442 if p:
442 if p:
443 filename = getattr(p, 'filename', b'unknown')
443 filename = getattr(p, 'filename', b'unknown')
444 errors = {
444 errors = {
445 404: b'The object does not exist',
445 404: b'The object does not exist',
446 410: b'The object was removed by the owner',
446 410: b'The object was removed by the owner',
447 422: b'Validation error',
447 422: b'Validation error',
448 500: b'Internal server error',
448 500: b'Internal server error',
449 }
449 }
450 msg = errors.get(code, b'status code %d' % code)
450 msg = errors.get(code, b'status code %d' % code)
451 raise LfsRemoteError(
451 raise LfsRemoteError(
452 _(b'LFS server error for "%s": %s') % (filename, msg)
452 _(b'LFS server error for "%s": %s') % (filename, msg)
453 )
453 )
454 else:
454 else:
455 raise LfsRemoteError(
455 raise LfsRemoteError(
456 _(b'LFS server error. Unsolicited response for oid %s')
456 _(b'LFS server error. Unsolicited response for oid %s')
457 % response[b'oid']
457 % response[b'oid']
458 )
458 )
459
459
460 def _extractobjects(self, response, pointers, action):
460 def _extractobjects(self, response, pointers, action):
461 """extract objects from response of the batch API
461 """extract objects from response of the batch API
462
462
463 response: parsed JSON object returned by batch API
463 response: parsed JSON object returned by batch API
464 return response['objects'] filtered by action
464 return response['objects'] filtered by action
465 raise if any object has an error
465 raise if any object has an error
466 """
466 """
467 # Scan errors from objects - fail early
467 # Scan errors from objects - fail early
468 objects = response.get(b'objects', [])
468 objects = response.get(b'objects', [])
469 self._checkforservererror(pointers, objects, action)
469 self._checkforservererror(pointers, objects, action)
470
470
471 # Filter objects with given action. Practically, this skips uploading
471 # Filter objects with given action. Practically, this skips uploading
472 # objects which exist in the server.
472 # objects which exist in the server.
473 filteredobjects = [
473 filteredobjects = [
474 o for o in objects if action in o.get(b'actions', [])
474 o for o in objects if action in o.get(b'actions', [])
475 ]
475 ]
476
476
477 return filteredobjects
477 return filteredobjects
478
478
479 def _basictransfer(self, obj, action, localstore):
479 def _basictransfer(self, obj, action, localstore):
480 """Download or upload a single object using basic transfer protocol
480 """Download or upload a single object using basic transfer protocol
481
481
482 obj: dict, an object description returned by batch API
482 obj: dict, an object description returned by batch API
483 action: string, one of ['upload', 'download']
483 action: string, one of ['upload', 'download']
484 localstore: blobstore.local
484 localstore: blobstore.local
485
485
486 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/\
486 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/\
487 basic-transfers.md
487 basic-transfers.md
488 """
488 """
489 oid = obj[b'oid']
489 oid = obj[b'oid']
490 href = obj[b'actions'][action].get(b'href')
490 href = obj[b'actions'][action].get(b'href')
491 headers = obj[b'actions'][action].get(b'header', {}).items()
491 headers = obj[b'actions'][action].get(b'header', {}).items()
492
492
493 request = util.urlreq.request(pycompat.strurl(href))
493 request = util.urlreq.request(pycompat.strurl(href))
494 if action == b'upload':
494 if action == b'upload':
495 # If uploading blobs, read data from local blobstore.
495 # If uploading blobs, read data from local blobstore.
496 if not localstore.verify(oid):
496 if not localstore.verify(oid):
497 raise error.Abort(
497 raise error.Abort(
498 _(b'detected corrupt lfs object: %s') % oid,
498 _(b'detected corrupt lfs object: %s') % oid,
499 hint=_(b'run hg verify'),
499 hint=_(b'run hg verify'),
500 )
500 )
501
501
502 for k, v in headers:
502 for k, v in headers:
503 request.add_header(pycompat.strurl(k), pycompat.strurl(v))
503 request.add_header(pycompat.strurl(k), pycompat.strurl(v))
504
504
505 try:
505 try:
506 if action == b'upload':
506 if action == b'upload':
507 request.data = lfsuploadfile(self.ui, localstore.path(oid))
507 request.data = lfsuploadfile(self.ui, localstore.path(oid))
508 request.get_method = lambda: 'PUT'
508 request.get_method = lambda: 'PUT'
509 request.add_header('Content-Type', 'application/octet-stream')
509 request.add_header('Content-Type', 'application/octet-stream')
510 request.add_header('Content-Length', request.data.length)
510 request.add_header('Content-Length', request.data.length)
511
511
512 with contextlib.closing(self.urlopener.open(request)) as res:
512 with contextlib.closing(self.urlopener.open(request)) as res:
513 contentlength = res.info().get(b"content-length")
513 contentlength = res.info().get(b"content-length")
514 ui = self.ui # Shorten debug lines
514 ui = self.ui # Shorten debug lines
515 if self.ui.debugflag:
515 if self.ui.debugflag:
516 ui.debug(b'Status: %d\n' % res.status)
516 ui.debug(b'Status: %d\n' % res.status)
517 # lfs-test-server and hg serve return headers in different
517 # lfs-test-server and hg serve return headers in different
518 # order
518 # order
519 headers = pycompat.bytestr(res.info()).strip()
519 headers = pycompat.bytestr(res.info()).strip()
520 ui.debug(b'%s\n' % b'\n'.join(sorted(headers.splitlines())))
520 ui.debug(b'%s\n' % b'\n'.join(sorted(headers.splitlines())))
521
521
522 if action == b'download':
522 if action == b'download':
523 # If downloading blobs, store downloaded data to local
523 # If downloading blobs, store downloaded data to local
524 # blobstore
524 # blobstore
525 localstore.download(oid, res, contentlength)
525 localstore.download(oid, res, contentlength)
526 else:
526 else:
527 blocks = []
527 blocks = []
528 while True:
528 while True:
529 data = res.read(1048576)
529 data = res.read(1048576)
530 if not data:
530 if not data:
531 break
531 break
532 blocks.append(data)
532 blocks.append(data)
533
533
534 response = b"".join(blocks)
534 response = b"".join(blocks)
535 if response:
535 if response:
536 ui.debug(b'lfs %s response: %s' % (action, response))
536 ui.debug(b'lfs %s response: %s' % (action, response))
537 except util.urlerr.httperror as ex:
537 except util.urlerr.httperror as ex:
538 if self.ui.debugflag:
538 if self.ui.debugflag:
539 self.ui.debug(
539 self.ui.debug(
540 b'%s: %s\n' % (oid, ex.read())
540 b'%s: %s\n' % (oid, ex.read())
541 ) # XXX: also bytes?
541 ) # XXX: also bytes?
542 raise LfsRemoteError(
542 raise LfsRemoteError(
543 _(b'LFS HTTP error: %s (oid=%s, action=%s)')
543 _(b'LFS HTTP error: %s (oid=%s, action=%s)')
544 % (stringutil.forcebytestr(ex), oid, action)
544 % (stringutil.forcebytestr(ex), oid, action)
545 )
545 )
546 except util.urlerr.urlerror as ex:
546 except util.urlerr.urlerror as ex:
547 hint = _(b'attempted connection to %s') % pycompat.bytesurl(
547 hint = _(b'attempted connection to %s') % pycompat.bytesurl(
548 util.urllibcompat.getfullurl(request)
548 util.urllibcompat.getfullurl(request)
549 )
549 )
550 raise LfsRemoteError(
550 raise LfsRemoteError(
551 _(b'LFS error: %s') % _urlerrorreason(ex), hint=hint
551 _(b'LFS error: %s') % _urlerrorreason(ex), hint=hint
552 )
552 )
553 finally:
553 finally:
554 if request.data:
554 if request.data:
555 request.data.close()
555 request.data.close()
556
556
557 def _batch(self, pointers, localstore, action):
557 def _batch(self, pointers, localstore, action):
558 if action not in [b'upload', b'download']:
558 if action not in [b'upload', b'download']:
559 raise error.ProgrammingError(b'invalid Git-LFS action: %s' % action)
559 raise error.ProgrammingError(b'invalid Git-LFS action: %s' % action)
560
560
561 response = self._batchrequest(pointers, action)
561 response = self._batchrequest(pointers, action)
562 objects = self._extractobjects(response, pointers, action)
562 objects = self._extractobjects(response, pointers, action)
563 total = sum(x.get(b'size', 0) for x in objects)
563 total = sum(x.get(b'size', 0) for x in objects)
564 sizes = {}
564 sizes = {}
565 for obj in objects:
565 for obj in objects:
566 sizes[obj.get(b'oid')] = obj.get(b'size', 0)
566 sizes[obj.get(b'oid')] = obj.get(b'size', 0)
567 topic = {
567 topic = {
568 b'upload': _(b'lfs uploading'),
568 b'upload': _(b'lfs uploading'),
569 b'download': _(b'lfs downloading'),
569 b'download': _(b'lfs downloading'),
570 }[action]
570 }[action]
571 if len(objects) > 1:
571 if len(objects) > 1:
572 self.ui.note(
572 self.ui.note(
573 _(b'lfs: need to transfer %d objects (%s)\n')
573 _(b'lfs: need to transfer %d objects (%s)\n')
574 % (len(objects), util.bytecount(total))
574 % (len(objects), util.bytecount(total))
575 )
575 )
576
576
577 def transfer(chunk):
577 def transfer(chunk):
578 for obj in chunk:
578 for obj in chunk:
579 objsize = obj.get(b'size', 0)
579 objsize = obj.get(b'size', 0)
580 if self.ui.verbose:
580 if self.ui.verbose:
581 if action == b'download':
581 if action == b'download':
582 msg = _(b'lfs: downloading %s (%s)\n')
582 msg = _(b'lfs: downloading %s (%s)\n')
583 elif action == b'upload':
583 elif action == b'upload':
584 msg = _(b'lfs: uploading %s (%s)\n')
584 msg = _(b'lfs: uploading %s (%s)\n')
585 self.ui.note(
585 self.ui.note(
586 msg % (obj.get(b'oid'), util.bytecount(objsize))
586 msg % (obj.get(b'oid'), util.bytecount(objsize))
587 )
587 )
588 retry = self.retry
588 retry = self.retry
589 while True:
589 while True:
590 try:
590 try:
591 self._basictransfer(obj, action, localstore)
591 self._basictransfer(obj, action, localstore)
592 yield 1, obj.get(b'oid')
592 yield 1, obj.get(b'oid')
593 break
593 break
594 except socket.error as ex:
594 except socket.error as ex:
595 if retry > 0:
595 if retry > 0:
596 self.ui.note(
596 self.ui.note(
597 _(b'lfs: failed: %r (remaining retry %d)\n')
597 _(b'lfs: failed: %r (remaining retry %d)\n')
598 % (stringutil.forcebytestr(ex), retry)
598 % (stringutil.forcebytestr(ex), retry)
599 )
599 )
600 retry -= 1
600 retry -= 1
601 continue
601 continue
602 raise
602 raise
603
603
604 # Until https multiplexing gets sorted out. It's not clear if
604 # Until https multiplexing gets sorted out. It's not clear if
605 # ConnectionManager.set_ready() is externally synchronized for thread
605 # ConnectionManager.set_ready() is externally synchronized for thread
606 # safety with Windows workers.
606 # safety with Windows workers.
607 if self.ui.configbool(b'experimental', b'lfs.worker-enable'):
607 if self.ui.configbool(b'experimental', b'lfs.worker-enable'):
608 # The POSIX workers are forks of this process, so before spinning
608 # The POSIX workers are forks of this process, so before spinning
609 # them up, close all pooled connections. Otherwise, there's no way
609 # them up, close all pooled connections. Otherwise, there's no way
610 # to coordinate between them about who is using what, and the
610 # to coordinate between them about who is using what, and the
611 # transfers will get corrupted.
611 # transfers will get corrupted.
612 #
612 #
613 # TODO: add a function to keepalive.ConnectionManager to mark all
613 # TODO: add a function to keepalive.ConnectionManager to mark all
614 # ready connections as in use, and roll that back after the fork?
614 # ready connections as in use, and roll that back after the fork?
615 # That would allow the existing pool of connections in this process
615 # That would allow the existing pool of connections in this process
616 # to be preserved.
616 # to be preserved.
617 def prefork():
617 def prefork():
618 for h in self.urlopener.handlers:
618 for h in self.urlopener.handlers:
619 getattr(h, "close_all", lambda: None)()
619 getattr(h, "close_all", lambda: None)()
620
620
621 oids = worker.worker(
621 oids = worker.worker(
622 self.ui,
622 self.ui,
623 0.1,
623 0.1,
624 transfer,
624 transfer,
625 (),
625 (),
626 sorted(objects, key=lambda o: o.get(b'oid')),
626 sorted(objects, key=lambda o: o.get(b'oid')),
627 prefork=prefork,
627 prefork=prefork,
628 )
628 )
629 else:
629 else:
630 oids = transfer(sorted(objects, key=lambda o: o.get(b'oid')))
630 oids = transfer(sorted(objects, key=lambda o: o.get(b'oid')))
631
631
632 with self.ui.makeprogress(
632 with self.ui.makeprogress(
633 topic, unit=_(b"bytes"), total=total
633 topic, unit=_(b"bytes"), total=total
634 ) as progress:
634 ) as progress:
635 progress.update(0)
635 progress.update(0)
636 processed = 0
636 processed = 0
637 blobs = 0
637 blobs = 0
638 for _one, oid in oids:
638 for _one, oid in oids:
639 processed += sizes[oid]
639 processed += sizes[oid]
640 blobs += 1
640 blobs += 1
641 progress.update(processed)
641 progress.update(processed)
642 self.ui.note(_(b'lfs: processed: %s\n') % oid)
642 self.ui.note(_(b'lfs: processed: %s\n') % oid)
643
643
644 if blobs > 0:
644 if blobs > 0:
645 if action == b'upload':
645 if action == b'upload':
646 self.ui.status(
646 self.ui.status(
647 _(b'lfs: uploaded %d files (%s)\n')
647 _(b'lfs: uploaded %d files (%s)\n')
648 % (blobs, util.bytecount(processed))
648 % (blobs, util.bytecount(processed))
649 )
649 )
650 elif action == b'download':
650 elif action == b'download':
651 self.ui.status(
651 self.ui.status(
652 _(b'lfs: downloaded %d files (%s)\n')
652 _(b'lfs: downloaded %d files (%s)\n')
653 % (blobs, util.bytecount(processed))
653 % (blobs, util.bytecount(processed))
654 )
654 )
655
655
656 def __del__(self):
656 def __del__(self):
657 # copied from mercurial/httppeer.py
657 # copied from mercurial/httppeer.py
658 urlopener = getattr(self, 'urlopener', None)
658 urlopener = getattr(self, 'urlopener', None)
659 if urlopener:
659 if urlopener:
660 for h in urlopener.handlers:
660 for h in urlopener.handlers:
661 h.close()
661 h.close()
662 getattr(h, "close_all", lambda: None)()
662 getattr(h, "close_all", lambda: None)()
663
663
664
664
665 class _dummyremote:
665 class _dummyremote:
666 """Dummy store storing blobs to temp directory."""
666 """Dummy store storing blobs to temp directory."""
667
667
668 def __init__(self, repo, url):
668 def __init__(self, repo, url):
669 fullpath = repo.vfs.join(b'lfs', url.path)
669 fullpath = repo.vfs.join(b'lfs', url.path)
670 self.vfs = lfsvfs(fullpath)
670 self.vfs = lfsvfs(fullpath)
671
671
672 def writebatch(self, pointers, fromstore):
672 def writebatch(self, pointers, fromstore):
673 for p in _deduplicate(pointers):
673 for p in _deduplicate(pointers):
674 content = fromstore.read(p.oid(), verify=True)
674 content = fromstore.read(p.oid(), verify=True)
675 with self.vfs(p.oid(), b'wb', atomictemp=True) as fp:
675 with self.vfs(p.oid(), b'wb', atomictemp=True) as fp:
676 fp.write(content)
676 fp.write(content)
677
677
678 def readbatch(self, pointers, tostore):
678 def readbatch(self, pointers, tostore):
679 for p in _deduplicate(pointers):
679 for p in _deduplicate(pointers):
680 with self.vfs(p.oid(), b'rb') as fp:
680 with self.vfs(p.oid(), b'rb') as fp:
681 tostore.download(p.oid(), fp, None)
681 tostore.download(p.oid(), fp, None)
682
682
683
683
684 class _nullremote:
684 class _nullremote:
685 """Null store storing blobs to /dev/null."""
685 """Null store storing blobs to /dev/null."""
686
686
687 def __init__(self, repo, url):
687 def __init__(self, repo, url):
688 pass
688 pass
689
689
690 def writebatch(self, pointers, fromstore):
690 def writebatch(self, pointers, fromstore):
691 pass
691 pass
692
692
693 def readbatch(self, pointers, tostore):
693 def readbatch(self, pointers, tostore):
694 pass
694 pass
695
695
696
696
697 class _promptremote:
697 class _promptremote:
698 """Prompt user to set lfs.url when accessed."""
698 """Prompt user to set lfs.url when accessed."""
699
699
700 def __init__(self, repo, url):
700 def __init__(self, repo, url):
701 pass
701 pass
702
702
703 def writebatch(self, pointers, fromstore, ui=None):
703 def writebatch(self, pointers, fromstore, ui=None):
704 self._prompt()
704 self._prompt()
705
705
706 def readbatch(self, pointers, tostore, ui=None):
706 def readbatch(self, pointers, tostore, ui=None):
707 self._prompt()
707 self._prompt()
708
708
709 def _prompt(self):
709 def _prompt(self):
710 raise error.Abort(_(b'lfs.url needs to be configured'))
710 raise error.Abort(_(b'lfs.url needs to be configured'))
711
711
712
712
713 _storemap = {
713 _storemap = {
714 b'https': _gitlfsremote,
714 b'https': _gitlfsremote,
715 b'http': _gitlfsremote,
715 b'http': _gitlfsremote,
716 b'file': _dummyremote,
716 b'file': _dummyremote,
717 b'null': _nullremote,
717 b'null': _nullremote,
718 None: _promptremote,
718 None: _promptremote,
719 }
719 }
720
720
721
721
722 def _deduplicate(pointers):
722 def _deduplicate(pointers):
723 """Remove any duplicate oids that exist in the list"""
723 """Remove any duplicate oids that exist in the list"""
724 reduced = util.sortdict()
724 reduced = util.sortdict()
725 for p in pointers:
725 for p in pointers:
726 reduced[p.oid()] = p
726 reduced[p.oid()] = p
727 return reduced.values()
727 return reduced.values()
728
728
729
729
730 def _verify(oid, content):
730 def _verify(oid, content):
731 realoid = hex(hashlib.sha256(content).digest())
731 realoid = hex(hashlib.sha256(content).digest())
732 if realoid != oid:
732 if realoid != oid:
733 raise LfsCorruptionError(
733 raise LfsCorruptionError(
734 _(b'detected corrupt lfs object: %s') % oid,
734 _(b'detected corrupt lfs object: %s') % oid,
735 hint=_(b'run hg verify'),
735 hint=_(b'run hg verify'),
736 )
736 )
737
737
738
738
739 def remote(repo, remote=None):
739 def remote(repo, remote=None):
740 """remotestore factory. return a store in _storemap depending on config
740 """remotestore factory. return a store in _storemap depending on config
741
741
742 If ``lfs.url`` is specified, use that remote endpoint. Otherwise, try to
742 If ``lfs.url`` is specified, use that remote endpoint. Otherwise, try to
743 infer the endpoint, based on the remote repository using the same path
743 infer the endpoint, based on the remote repository using the same path
744 adjustments as git. As an extension, 'http' is supported as well so that
744 adjustments as git. As an extension, 'http' is supported as well so that
745 ``hg serve`` works out of the box.
745 ``hg serve`` works out of the box.
746
746
747 https://github.com/git-lfs/git-lfs/blob/master/docs/api/server-discovery.md
747 https://github.com/git-lfs/git-lfs/blob/master/docs/api/server-discovery.md
748 """
748 """
749 lfsurl = repo.ui.config(b'lfs', b'url')
749 lfsurl = repo.ui.config(b'lfs', b'url')
750 url = urlutil.url(lfsurl or b'')
750 url = urlutil.url(lfsurl or b'')
751 if lfsurl is None:
751 if lfsurl is None:
752 if remote:
752 if remote:
753 path = remote
753 path = remote
754 elif util.safehasattr(repo, b'_subtoppath'):
754 elif util.safehasattr(repo, '_subtoppath'):
755 # The pull command sets this during the optional update phase, which
755 # The pull command sets this during the optional update phase, which
756 # tells exactly where the pull originated, whether 'paths.default'
756 # tells exactly where the pull originated, whether 'paths.default'
757 # or explicit.
757 # or explicit.
758 path = repo._subtoppath
758 path = repo._subtoppath
759 else:
759 else:
760 # TODO: investigate 'paths.remote:lfsurl' style path customization,
760 # TODO: investigate 'paths.remote:lfsurl' style path customization,
761 # and fall back to inferring from 'paths.remote' if unspecified.
761 # and fall back to inferring from 'paths.remote' if unspecified.
762 path = repo.ui.config(b'paths', b'default') or b''
762 path = repo.ui.config(b'paths', b'default') or b''
763
763
764 defaulturl = urlutil.url(path)
764 defaulturl = urlutil.url(path)
765
765
766 # TODO: support local paths as well.
766 # TODO: support local paths as well.
767 # TODO: consider the ssh -> https transformation that git applies
767 # TODO: consider the ssh -> https transformation that git applies
768 if defaulturl.scheme in (b'http', b'https'):
768 if defaulturl.scheme in (b'http', b'https'):
769 if defaulturl.path and defaulturl.path[:-1] != b'/':
769 if defaulturl.path and defaulturl.path[:-1] != b'/':
770 defaulturl.path += b'/'
770 defaulturl.path += b'/'
771 defaulturl.path = (defaulturl.path or b'') + b'.git/info/lfs'
771 defaulturl.path = (defaulturl.path or b'') + b'.git/info/lfs'
772
772
773 url = urlutil.url(bytes(defaulturl))
773 url = urlutil.url(bytes(defaulturl))
774 repo.ui.note(_(b'lfs: assuming remote store: %s\n') % url)
774 repo.ui.note(_(b'lfs: assuming remote store: %s\n') % url)
775
775
776 scheme = url.scheme
776 scheme = url.scheme
777 if scheme not in _storemap:
777 if scheme not in _storemap:
778 raise error.Abort(_(b'lfs: unknown url scheme: %s') % scheme)
778 raise error.Abort(_(b'lfs: unknown url scheme: %s') % scheme)
779 return _storemap[scheme](repo, url)
779 return _storemap[scheme](repo, url)
780
780
781
781
782 class LfsRemoteError(error.StorageError):
782 class LfsRemoteError(error.StorageError):
783 pass
783 pass
784
784
785
785
786 class LfsCorruptionError(error.Abort):
786 class LfsCorruptionError(error.Abort):
787 """Raised when a corrupt blob is detected, aborting an operation
787 """Raised when a corrupt blob is detected, aborting an operation
788
788
789 It exists to allow specialized handling on the server side."""
789 It exists to allow specialized handling on the server side."""
@@ -1,545 +1,545 b''
1 # wrapper.py - methods wrapping core mercurial logic
1 # wrapper.py - methods wrapping core mercurial logic
2 #
2 #
3 # Copyright 2017 Facebook, Inc.
3 # Copyright 2017 Facebook, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import hashlib
9 import hashlib
10
10
11 from mercurial.i18n import _
11 from mercurial.i18n import _
12 from mercurial.node import bin, hex, short
12 from mercurial.node import bin, hex, short
13 from mercurial.pycompat import (
13 from mercurial.pycompat import (
14 getattr,
14 getattr,
15 setattr,
15 setattr,
16 )
16 )
17
17
18 from mercurial import (
18 from mercurial import (
19 bundle2,
19 bundle2,
20 changegroup,
20 changegroup,
21 cmdutil,
21 cmdutil,
22 context,
22 context,
23 error,
23 error,
24 exchange,
24 exchange,
25 exthelper,
25 exthelper,
26 localrepo,
26 localrepo,
27 revlog,
27 revlog,
28 scmutil,
28 scmutil,
29 util,
29 util,
30 vfs as vfsmod,
30 vfs as vfsmod,
31 wireprotov1server,
31 wireprotov1server,
32 )
32 )
33
33
34 from mercurial.upgrade_utils import (
34 from mercurial.upgrade_utils import (
35 actions as upgrade_actions,
35 actions as upgrade_actions,
36 engine as upgrade_engine,
36 engine as upgrade_engine,
37 )
37 )
38
38
39 from mercurial.interfaces import repository
39 from mercurial.interfaces import repository
40
40
41 from mercurial.utils import (
41 from mercurial.utils import (
42 storageutil,
42 storageutil,
43 stringutil,
43 stringutil,
44 )
44 )
45
45
46 from ..largefiles import lfutil
46 from ..largefiles import lfutil
47
47
48 from . import (
48 from . import (
49 blobstore,
49 blobstore,
50 pointer,
50 pointer,
51 )
51 )
52
52
53 eh = exthelper.exthelper()
53 eh = exthelper.exthelper()
54
54
55
55
56 @eh.wrapfunction(localrepo, 'makefilestorage')
56 @eh.wrapfunction(localrepo, 'makefilestorage')
57 def localrepomakefilestorage(orig, requirements, features, **kwargs):
57 def localrepomakefilestorage(orig, requirements, features, **kwargs):
58 if b'lfs' in requirements:
58 if b'lfs' in requirements:
59 features.add(repository.REPO_FEATURE_LFS)
59 features.add(repository.REPO_FEATURE_LFS)
60
60
61 return orig(requirements=requirements, features=features, **kwargs)
61 return orig(requirements=requirements, features=features, **kwargs)
62
62
63
63
64 @eh.wrapfunction(changegroup, 'allsupportedversions')
64 @eh.wrapfunction(changegroup, 'allsupportedversions')
65 def allsupportedversions(orig, ui):
65 def allsupportedversions(orig, ui):
66 versions = orig(ui)
66 versions = orig(ui)
67 versions.add(b'03')
67 versions.add(b'03')
68 return versions
68 return versions
69
69
70
70
71 @eh.wrapfunction(wireprotov1server, '_capabilities')
71 @eh.wrapfunction(wireprotov1server, '_capabilities')
72 def _capabilities(orig, repo, proto):
72 def _capabilities(orig, repo, proto):
73 '''Wrap server command to announce lfs server capability'''
73 '''Wrap server command to announce lfs server capability'''
74 caps = orig(repo, proto)
74 caps = orig(repo, proto)
75 if util.safehasattr(repo.svfs, b'lfslocalblobstore'):
75 if util.safehasattr(repo.svfs, 'lfslocalblobstore'):
76 # Advertise a slightly different capability when lfs is *required*, so
76 # Advertise a slightly different capability when lfs is *required*, so
77 # that the client knows it MUST load the extension. If lfs is not
77 # that the client knows it MUST load the extension. If lfs is not
78 # required on the server, there's no reason to autoload the extension
78 # required on the server, there's no reason to autoload the extension
79 # on the client.
79 # on the client.
80 if b'lfs' in repo.requirements:
80 if b'lfs' in repo.requirements:
81 caps.append(b'lfs-serve')
81 caps.append(b'lfs-serve')
82
82
83 caps.append(b'lfs')
83 caps.append(b'lfs')
84 return caps
84 return caps
85
85
86
86
87 def bypasscheckhash(self, text):
87 def bypasscheckhash(self, text):
88 return False
88 return False
89
89
90
90
91 def readfromstore(self, text):
91 def readfromstore(self, text):
92 """Read filelog content from local blobstore transform for flagprocessor.
92 """Read filelog content from local blobstore transform for flagprocessor.
93
93
94 Default tranform for flagprocessor, returning contents from blobstore.
94 Default tranform for flagprocessor, returning contents from blobstore.
95 Returns a 2-typle (text, validatehash) where validatehash is True as the
95 Returns a 2-typle (text, validatehash) where validatehash is True as the
96 contents of the blobstore should be checked using checkhash.
96 contents of the blobstore should be checked using checkhash.
97 """
97 """
98 p = pointer.deserialize(text)
98 p = pointer.deserialize(text)
99 oid = p.oid()
99 oid = p.oid()
100 store = self.opener.lfslocalblobstore
100 store = self.opener.lfslocalblobstore
101 if not store.has(oid):
101 if not store.has(oid):
102 p.filename = self.filename
102 p.filename = self.filename
103 self.opener.lfsremoteblobstore.readbatch([p], store)
103 self.opener.lfsremoteblobstore.readbatch([p], store)
104
104
105 # The caller will validate the content
105 # The caller will validate the content
106 text = store.read(oid, verify=False)
106 text = store.read(oid, verify=False)
107
107
108 # pack hg filelog metadata
108 # pack hg filelog metadata
109 hgmeta = {}
109 hgmeta = {}
110 for k in p.keys():
110 for k in p.keys():
111 if k.startswith(b'x-hg-'):
111 if k.startswith(b'x-hg-'):
112 name = k[len(b'x-hg-') :]
112 name = k[len(b'x-hg-') :]
113 hgmeta[name] = p[k]
113 hgmeta[name] = p[k]
114 if hgmeta or text.startswith(b'\1\n'):
114 if hgmeta or text.startswith(b'\1\n'):
115 text = storageutil.packmeta(hgmeta, text)
115 text = storageutil.packmeta(hgmeta, text)
116
116
117 return (text, True)
117 return (text, True)
118
118
119
119
120 def writetostore(self, text):
120 def writetostore(self, text):
121 # hg filelog metadata (includes rename, etc)
121 # hg filelog metadata (includes rename, etc)
122 hgmeta, offset = storageutil.parsemeta(text)
122 hgmeta, offset = storageutil.parsemeta(text)
123 if offset and offset > 0:
123 if offset and offset > 0:
124 # lfs blob does not contain hg filelog metadata
124 # lfs blob does not contain hg filelog metadata
125 text = text[offset:]
125 text = text[offset:]
126
126
127 # git-lfs only supports sha256
127 # git-lfs only supports sha256
128 oid = hex(hashlib.sha256(text).digest())
128 oid = hex(hashlib.sha256(text).digest())
129 self.opener.lfslocalblobstore.write(oid, text)
129 self.opener.lfslocalblobstore.write(oid, text)
130
130
131 # replace contents with metadata
131 # replace contents with metadata
132 longoid = b'sha256:%s' % oid
132 longoid = b'sha256:%s' % oid
133 metadata = pointer.gitlfspointer(oid=longoid, size=b'%d' % len(text))
133 metadata = pointer.gitlfspointer(oid=longoid, size=b'%d' % len(text))
134
134
135 # by default, we expect the content to be binary. however, LFS could also
135 # by default, we expect the content to be binary. however, LFS could also
136 # be used for non-binary content. add a special entry for non-binary data.
136 # be used for non-binary content. add a special entry for non-binary data.
137 # this will be used by filectx.isbinary().
137 # this will be used by filectx.isbinary().
138 if not stringutil.binary(text):
138 if not stringutil.binary(text):
139 # not hg filelog metadata (affecting commit hash), no "x-hg-" prefix
139 # not hg filelog metadata (affecting commit hash), no "x-hg-" prefix
140 metadata[b'x-is-binary'] = b'0'
140 metadata[b'x-is-binary'] = b'0'
141
141
142 # translate hg filelog metadata to lfs metadata with "x-hg-" prefix
142 # translate hg filelog metadata to lfs metadata with "x-hg-" prefix
143 if hgmeta is not None:
143 if hgmeta is not None:
144 for k, v in hgmeta.items():
144 for k, v in hgmeta.items():
145 metadata[b'x-hg-%s' % k] = v
145 metadata[b'x-hg-%s' % k] = v
146
146
147 rawtext = metadata.serialize()
147 rawtext = metadata.serialize()
148 return (rawtext, False)
148 return (rawtext, False)
149
149
150
150
151 def _islfs(rlog, node=None, rev=None):
151 def _islfs(rlog, node=None, rev=None):
152 if rev is None:
152 if rev is None:
153 if node is None:
153 if node is None:
154 # both None - likely working copy content where node is not ready
154 # both None - likely working copy content where node is not ready
155 return False
155 return False
156 rev = rlog.rev(node)
156 rev = rlog.rev(node)
157 else:
157 else:
158 node = rlog.node(rev)
158 node = rlog.node(rev)
159 if node == rlog.nullid:
159 if node == rlog.nullid:
160 return False
160 return False
161 flags = rlog.flags(rev)
161 flags = rlog.flags(rev)
162 return bool(flags & revlog.REVIDX_EXTSTORED)
162 return bool(flags & revlog.REVIDX_EXTSTORED)
163
163
164
164
165 # Wrapping may also be applied by remotefilelog
165 # Wrapping may also be applied by remotefilelog
166 def filelogaddrevision(
166 def filelogaddrevision(
167 orig,
167 orig,
168 self,
168 self,
169 text,
169 text,
170 transaction,
170 transaction,
171 link,
171 link,
172 p1,
172 p1,
173 p2,
173 p2,
174 cachedelta=None,
174 cachedelta=None,
175 node=None,
175 node=None,
176 flags=revlog.REVIDX_DEFAULT_FLAGS,
176 flags=revlog.REVIDX_DEFAULT_FLAGS,
177 **kwds
177 **kwds
178 ):
178 ):
179 # The matcher isn't available if reposetup() wasn't called.
179 # The matcher isn't available if reposetup() wasn't called.
180 lfstrack = self._revlog.opener.options.get(b'lfstrack')
180 lfstrack = self._revlog.opener.options.get(b'lfstrack')
181
181
182 if lfstrack:
182 if lfstrack:
183 textlen = len(text)
183 textlen = len(text)
184 # exclude hg rename meta from file size
184 # exclude hg rename meta from file size
185 meta, offset = storageutil.parsemeta(text)
185 meta, offset = storageutil.parsemeta(text)
186 if offset:
186 if offset:
187 textlen -= offset
187 textlen -= offset
188
188
189 if lfstrack(self._revlog.filename, textlen):
189 if lfstrack(self._revlog.filename, textlen):
190 flags |= revlog.REVIDX_EXTSTORED
190 flags |= revlog.REVIDX_EXTSTORED
191
191
192 return orig(
192 return orig(
193 self,
193 self,
194 text,
194 text,
195 transaction,
195 transaction,
196 link,
196 link,
197 p1,
197 p1,
198 p2,
198 p2,
199 cachedelta=cachedelta,
199 cachedelta=cachedelta,
200 node=node,
200 node=node,
201 flags=flags,
201 flags=flags,
202 **kwds
202 **kwds
203 )
203 )
204
204
205
205
206 # Wrapping may also be applied by remotefilelog
206 # Wrapping may also be applied by remotefilelog
207 def filelogrenamed(orig, self, node):
207 def filelogrenamed(orig, self, node):
208 if _islfs(self._revlog, node):
208 if _islfs(self._revlog, node):
209 rawtext = self._revlog.rawdata(node)
209 rawtext = self._revlog.rawdata(node)
210 if not rawtext:
210 if not rawtext:
211 return False
211 return False
212 metadata = pointer.deserialize(rawtext)
212 metadata = pointer.deserialize(rawtext)
213 if b'x-hg-copy' in metadata and b'x-hg-copyrev' in metadata:
213 if b'x-hg-copy' in metadata and b'x-hg-copyrev' in metadata:
214 return metadata[b'x-hg-copy'], bin(metadata[b'x-hg-copyrev'])
214 return metadata[b'x-hg-copy'], bin(metadata[b'x-hg-copyrev'])
215 else:
215 else:
216 return False
216 return False
217 return orig(self, node)
217 return orig(self, node)
218
218
219
219
220 # Wrapping may also be applied by remotefilelog
220 # Wrapping may also be applied by remotefilelog
221 def filelogsize(orig, self, rev):
221 def filelogsize(orig, self, rev):
222 if _islfs(self._revlog, rev=rev):
222 if _islfs(self._revlog, rev=rev):
223 # fast path: use lfs metadata to answer size
223 # fast path: use lfs metadata to answer size
224 rawtext = self._revlog.rawdata(rev)
224 rawtext = self._revlog.rawdata(rev)
225 metadata = pointer.deserialize(rawtext)
225 metadata = pointer.deserialize(rawtext)
226 return int(metadata[b'size'])
226 return int(metadata[b'size'])
227 return orig(self, rev)
227 return orig(self, rev)
228
228
229
229
230 @eh.wrapfunction(revlog, '_verify_revision')
230 @eh.wrapfunction(revlog, '_verify_revision')
231 def _verify_revision(orig, rl, skipflags, state, node):
231 def _verify_revision(orig, rl, skipflags, state, node):
232 if _islfs(rl, node=node):
232 if _islfs(rl, node=node):
233 rawtext = rl.rawdata(node)
233 rawtext = rl.rawdata(node)
234 metadata = pointer.deserialize(rawtext)
234 metadata = pointer.deserialize(rawtext)
235
235
236 # Don't skip blobs that are stored locally, as local verification is
236 # Don't skip blobs that are stored locally, as local verification is
237 # relatively cheap and there's no other way to verify the raw data in
237 # relatively cheap and there's no other way to verify the raw data in
238 # the revlog.
238 # the revlog.
239 if rl.opener.lfslocalblobstore.has(metadata.oid()):
239 if rl.opener.lfslocalblobstore.has(metadata.oid()):
240 skipflags &= ~revlog.REVIDX_EXTSTORED
240 skipflags &= ~revlog.REVIDX_EXTSTORED
241 elif skipflags & revlog.REVIDX_EXTSTORED:
241 elif skipflags & revlog.REVIDX_EXTSTORED:
242 # The wrapped method will set `skipread`, but there's enough local
242 # The wrapped method will set `skipread`, but there's enough local
243 # info to check renames.
243 # info to check renames.
244 state[b'safe_renamed'].add(node)
244 state[b'safe_renamed'].add(node)
245
245
246 orig(rl, skipflags, state, node)
246 orig(rl, skipflags, state, node)
247
247
248
248
249 @eh.wrapfunction(context.basefilectx, 'cmp')
249 @eh.wrapfunction(context.basefilectx, 'cmp')
250 def filectxcmp(orig, self, fctx):
250 def filectxcmp(orig, self, fctx):
251 """returns True if text is different than fctx"""
251 """returns True if text is different than fctx"""
252 # some fctx (ex. hg-git) is not based on basefilectx and do not have islfs
252 # some fctx (ex. hg-git) is not based on basefilectx and do not have islfs
253 if self.islfs() and getattr(fctx, 'islfs', lambda: False)():
253 if self.islfs() and getattr(fctx, 'islfs', lambda: False)():
254 # fast path: check LFS oid
254 # fast path: check LFS oid
255 p1 = pointer.deserialize(self.rawdata())
255 p1 = pointer.deserialize(self.rawdata())
256 p2 = pointer.deserialize(fctx.rawdata())
256 p2 = pointer.deserialize(fctx.rawdata())
257 return p1.oid() != p2.oid()
257 return p1.oid() != p2.oid()
258 return orig(self, fctx)
258 return orig(self, fctx)
259
259
260
260
261 @eh.wrapfunction(context.basefilectx, 'isbinary')
261 @eh.wrapfunction(context.basefilectx, 'isbinary')
262 def filectxisbinary(orig, self):
262 def filectxisbinary(orig, self):
263 if self.islfs():
263 if self.islfs():
264 # fast path: use lfs metadata to answer isbinary
264 # fast path: use lfs metadata to answer isbinary
265 metadata = pointer.deserialize(self.rawdata())
265 metadata = pointer.deserialize(self.rawdata())
266 # if lfs metadata says nothing, assume it's binary by default
266 # if lfs metadata says nothing, assume it's binary by default
267 return bool(int(metadata.get(b'x-is-binary', 1)))
267 return bool(int(metadata.get(b'x-is-binary', 1)))
268 return orig(self)
268 return orig(self)
269
269
270
270
271 def filectxislfs(self):
271 def filectxislfs(self):
272 return _islfs(self.filelog()._revlog, self.filenode())
272 return _islfs(self.filelog()._revlog, self.filenode())
273
273
274
274
275 @eh.wrapfunction(cmdutil, '_updatecatformatter')
275 @eh.wrapfunction(cmdutil, '_updatecatformatter')
276 def _updatecatformatter(orig, fm, ctx, matcher, path, decode):
276 def _updatecatformatter(orig, fm, ctx, matcher, path, decode):
277 orig(fm, ctx, matcher, path, decode)
277 orig(fm, ctx, matcher, path, decode)
278 fm.data(rawdata=ctx[path].rawdata())
278 fm.data(rawdata=ctx[path].rawdata())
279
279
280
280
281 @eh.wrapfunction(scmutil, 'wrapconvertsink')
281 @eh.wrapfunction(scmutil, 'wrapconvertsink')
282 def convertsink(orig, sink):
282 def convertsink(orig, sink):
283 sink = orig(sink)
283 sink = orig(sink)
284 if sink.repotype == b'hg':
284 if sink.repotype == b'hg':
285
285
286 class lfssink(sink.__class__):
286 class lfssink(sink.__class__):
287 def putcommit(
287 def putcommit(
288 self,
288 self,
289 files,
289 files,
290 copies,
290 copies,
291 parents,
291 parents,
292 commit,
292 commit,
293 source,
293 source,
294 revmap,
294 revmap,
295 full,
295 full,
296 cleanp2,
296 cleanp2,
297 ):
297 ):
298 pc = super(lfssink, self).putcommit
298 pc = super(lfssink, self).putcommit
299 node = pc(
299 node = pc(
300 files,
300 files,
301 copies,
301 copies,
302 parents,
302 parents,
303 commit,
303 commit,
304 source,
304 source,
305 revmap,
305 revmap,
306 full,
306 full,
307 cleanp2,
307 cleanp2,
308 )
308 )
309
309
310 if b'lfs' not in self.repo.requirements:
310 if b'lfs' not in self.repo.requirements:
311 ctx = self.repo[node]
311 ctx = self.repo[node]
312
312
313 # The file list may contain removed files, so check for
313 # The file list may contain removed files, so check for
314 # membership before assuming it is in the context.
314 # membership before assuming it is in the context.
315 if any(f in ctx and ctx[f].islfs() for f, n in files):
315 if any(f in ctx and ctx[f].islfs() for f, n in files):
316 self.repo.requirements.add(b'lfs')
316 self.repo.requirements.add(b'lfs')
317 scmutil.writereporequirements(self.repo)
317 scmutil.writereporequirements(self.repo)
318
318
319 return node
319 return node
320
320
321 sink.__class__ = lfssink
321 sink.__class__ = lfssink
322
322
323 return sink
323 return sink
324
324
325
325
326 # bundlerepo uses "vfsmod.readonlyvfs(othervfs)", we need to make sure lfs
326 # bundlerepo uses "vfsmod.readonlyvfs(othervfs)", we need to make sure lfs
327 # options and blob stores are passed from othervfs to the new readonlyvfs.
327 # options and blob stores are passed from othervfs to the new readonlyvfs.
328 @eh.wrapfunction(vfsmod.readonlyvfs, '__init__')
328 @eh.wrapfunction(vfsmod.readonlyvfs, '__init__')
329 def vfsinit(orig, self, othervfs):
329 def vfsinit(orig, self, othervfs):
330 orig(self, othervfs)
330 orig(self, othervfs)
331 # copy lfs related options
331 # copy lfs related options
332 for k, v in othervfs.options.items():
332 for k, v in othervfs.options.items():
333 if k.startswith(b'lfs'):
333 if k.startswith(b'lfs'):
334 self.options[k] = v
334 self.options[k] = v
335 # also copy lfs blobstores. note: this can run before reposetup, so lfs
335 # also copy lfs blobstores. note: this can run before reposetup, so lfs
336 # blobstore attributes are not always ready at this time.
336 # blobstore attributes are not always ready at this time.
337 for name in [b'lfslocalblobstore', b'lfsremoteblobstore']:
337 for name in ['lfslocalblobstore', 'lfsremoteblobstore']:
338 if util.safehasattr(othervfs, name):
338 if util.safehasattr(othervfs, name):
339 setattr(self, name, getattr(othervfs, name))
339 setattr(self, name, getattr(othervfs, name))
340
340
341
341
342 def _prefetchfiles(repo, revmatches):
342 def _prefetchfiles(repo, revmatches):
343 """Ensure that required LFS blobs are present, fetching them as a group if
343 """Ensure that required LFS blobs are present, fetching them as a group if
344 needed."""
344 needed."""
345 if not util.safehasattr(repo.svfs, b'lfslocalblobstore'):
345 if not util.safehasattr(repo.svfs, 'lfslocalblobstore'):
346 return
346 return
347
347
348 pointers = []
348 pointers = []
349 oids = set()
349 oids = set()
350 localstore = repo.svfs.lfslocalblobstore
350 localstore = repo.svfs.lfslocalblobstore
351
351
352 for rev, match in revmatches:
352 for rev, match in revmatches:
353 ctx = repo[rev]
353 ctx = repo[rev]
354 for f in ctx.walk(match):
354 for f in ctx.walk(match):
355 p = pointerfromctx(ctx, f)
355 p = pointerfromctx(ctx, f)
356 if p and p.oid() not in oids and not localstore.has(p.oid()):
356 if p and p.oid() not in oids and not localstore.has(p.oid()):
357 p.filename = f
357 p.filename = f
358 pointers.append(p)
358 pointers.append(p)
359 oids.add(p.oid())
359 oids.add(p.oid())
360
360
361 if pointers:
361 if pointers:
362 # Recalculating the repo store here allows 'paths.default' that is set
362 # Recalculating the repo store here allows 'paths.default' that is set
363 # on the repo by a clone command to be used for the update.
363 # on the repo by a clone command to be used for the update.
364 blobstore.remote(repo).readbatch(pointers, localstore)
364 blobstore.remote(repo).readbatch(pointers, localstore)
365
365
366
366
367 def _canskipupload(repo):
367 def _canskipupload(repo):
368 # Skip if this hasn't been passed to reposetup()
368 # Skip if this hasn't been passed to reposetup()
369 if not util.safehasattr(repo.svfs, b'lfsremoteblobstore'):
369 if not util.safehasattr(repo.svfs, 'lfsremoteblobstore'):
370 return True
370 return True
371
371
372 # if remotestore is a null store, upload is a no-op and can be skipped
372 # if remotestore is a null store, upload is a no-op and can be skipped
373 return isinstance(repo.svfs.lfsremoteblobstore, blobstore._nullremote)
373 return isinstance(repo.svfs.lfsremoteblobstore, blobstore._nullremote)
374
374
375
375
376 def candownload(repo):
376 def candownload(repo):
377 # Skip if this hasn't been passed to reposetup()
377 # Skip if this hasn't been passed to reposetup()
378 if not util.safehasattr(repo.svfs, b'lfsremoteblobstore'):
378 if not util.safehasattr(repo.svfs, 'lfsremoteblobstore'):
379 return False
379 return False
380
380
381 # if remotestore is a null store, downloads will lead to nothing
381 # if remotestore is a null store, downloads will lead to nothing
382 return not isinstance(repo.svfs.lfsremoteblobstore, blobstore._nullremote)
382 return not isinstance(repo.svfs.lfsremoteblobstore, blobstore._nullremote)
383
383
384
384
385 def uploadblobsfromrevs(repo, revs):
385 def uploadblobsfromrevs(repo, revs):
386 """upload lfs blobs introduced by revs"""
386 """upload lfs blobs introduced by revs"""
387 if _canskipupload(repo):
387 if _canskipupload(repo):
388 return
388 return
389 pointers = extractpointers(repo, revs)
389 pointers = extractpointers(repo, revs)
390 uploadblobs(repo, pointers)
390 uploadblobs(repo, pointers)
391
391
392
392
393 def prepush(pushop):
393 def prepush(pushop):
394 """Prepush hook.
394 """Prepush hook.
395
395
396 Read through the revisions to push, looking for filelog entries that can be
396 Read through the revisions to push, looking for filelog entries that can be
397 deserialized into metadata so that we can block the push on their upload to
397 deserialized into metadata so that we can block the push on their upload to
398 the remote blobstore.
398 the remote blobstore.
399 """
399 """
400 return uploadblobsfromrevs(pushop.repo, pushop.outgoing.missing)
400 return uploadblobsfromrevs(pushop.repo, pushop.outgoing.missing)
401
401
402
402
403 @eh.wrapfunction(exchange, 'push')
403 @eh.wrapfunction(exchange, 'push')
404 def push(orig, repo, remote, *args, **kwargs):
404 def push(orig, repo, remote, *args, **kwargs):
405 """bail on push if the extension isn't enabled on remote when needed, and
405 """bail on push if the extension isn't enabled on remote when needed, and
406 update the remote store based on the destination path."""
406 update the remote store based on the destination path."""
407 if b'lfs' in repo.requirements:
407 if b'lfs' in repo.requirements:
408 # If the remote peer is for a local repo, the requirement tests in the
408 # If the remote peer is for a local repo, the requirement tests in the
409 # base class method enforce lfs support. Otherwise, some revisions in
409 # base class method enforce lfs support. Otherwise, some revisions in
410 # this repo use lfs, and the remote repo needs the extension loaded.
410 # this repo use lfs, and the remote repo needs the extension loaded.
411 if not remote.local() and not remote.capable(b'lfs'):
411 if not remote.local() and not remote.capable(b'lfs'):
412 # This is a copy of the message in exchange.push() when requirements
412 # This is a copy of the message in exchange.push() when requirements
413 # are missing between local repos.
413 # are missing between local repos.
414 m = _(b"required features are not supported in the destination: %s")
414 m = _(b"required features are not supported in the destination: %s")
415 raise error.Abort(
415 raise error.Abort(
416 m % b'lfs', hint=_(b'enable the lfs extension on the server')
416 m % b'lfs', hint=_(b'enable the lfs extension on the server')
417 )
417 )
418
418
419 # Repositories where this extension is disabled won't have the field.
419 # Repositories where this extension is disabled won't have the field.
420 # But if there's a requirement, then the extension must be loaded AND
420 # But if there's a requirement, then the extension must be loaded AND
421 # there may be blobs to push.
421 # there may be blobs to push.
422 remotestore = repo.svfs.lfsremoteblobstore
422 remotestore = repo.svfs.lfsremoteblobstore
423 try:
423 try:
424 repo.svfs.lfsremoteblobstore = blobstore.remote(repo, remote.url())
424 repo.svfs.lfsremoteblobstore = blobstore.remote(repo, remote.url())
425 return orig(repo, remote, *args, **kwargs)
425 return orig(repo, remote, *args, **kwargs)
426 finally:
426 finally:
427 repo.svfs.lfsremoteblobstore = remotestore
427 repo.svfs.lfsremoteblobstore = remotestore
428 else:
428 else:
429 return orig(repo, remote, *args, **kwargs)
429 return orig(repo, remote, *args, **kwargs)
430
430
431
431
432 # when writing a bundle via "hg bundle" command, upload related LFS blobs
432 # when writing a bundle via "hg bundle" command, upload related LFS blobs
433 @eh.wrapfunction(bundle2, 'writenewbundle')
433 @eh.wrapfunction(bundle2, 'writenewbundle')
434 def writenewbundle(
434 def writenewbundle(
435 orig, ui, repo, source, filename, bundletype, outgoing, *args, **kwargs
435 orig, ui, repo, source, filename, bundletype, outgoing, *args, **kwargs
436 ):
436 ):
437 """upload LFS blobs added by outgoing revisions on 'hg bundle'"""
437 """upload LFS blobs added by outgoing revisions on 'hg bundle'"""
438 uploadblobsfromrevs(repo, outgoing.missing)
438 uploadblobsfromrevs(repo, outgoing.missing)
439 return orig(
439 return orig(
440 ui, repo, source, filename, bundletype, outgoing, *args, **kwargs
440 ui, repo, source, filename, bundletype, outgoing, *args, **kwargs
441 )
441 )
442
442
443
443
444 def extractpointers(repo, revs):
444 def extractpointers(repo, revs):
445 """return a list of lfs pointers added by given revs"""
445 """return a list of lfs pointers added by given revs"""
446 repo.ui.debug(b'lfs: computing set of blobs to upload\n')
446 repo.ui.debug(b'lfs: computing set of blobs to upload\n')
447 pointers = {}
447 pointers = {}
448
448
449 makeprogress = repo.ui.makeprogress
449 makeprogress = repo.ui.makeprogress
450 with makeprogress(
450 with makeprogress(
451 _(b'lfs search'), _(b'changesets'), len(revs)
451 _(b'lfs search'), _(b'changesets'), len(revs)
452 ) as progress:
452 ) as progress:
453 for r in revs:
453 for r in revs:
454 ctx = repo[r]
454 ctx = repo[r]
455 for p in pointersfromctx(ctx).values():
455 for p in pointersfromctx(ctx).values():
456 pointers[p.oid()] = p
456 pointers[p.oid()] = p
457 progress.increment()
457 progress.increment()
458 return sorted(pointers.values(), key=lambda p: p.oid())
458 return sorted(pointers.values(), key=lambda p: p.oid())
459
459
460
460
461 def pointerfromctx(ctx, f, removed=False):
461 def pointerfromctx(ctx, f, removed=False):
462 """return a pointer for the named file from the given changectx, or None if
462 """return a pointer for the named file from the given changectx, or None if
463 the file isn't LFS.
463 the file isn't LFS.
464
464
465 Optionally, the pointer for a file deleted from the context can be returned.
465 Optionally, the pointer for a file deleted from the context can be returned.
466 Since no such pointer is actually stored, and to distinguish from a non LFS
466 Since no such pointer is actually stored, and to distinguish from a non LFS
467 file, this pointer is represented by an empty dict.
467 file, this pointer is represented by an empty dict.
468 """
468 """
469 _ctx = ctx
469 _ctx = ctx
470 if f not in ctx:
470 if f not in ctx:
471 if not removed:
471 if not removed:
472 return None
472 return None
473 if f in ctx.p1():
473 if f in ctx.p1():
474 _ctx = ctx.p1()
474 _ctx = ctx.p1()
475 elif f in ctx.p2():
475 elif f in ctx.p2():
476 _ctx = ctx.p2()
476 _ctx = ctx.p2()
477 else:
477 else:
478 return None
478 return None
479 fctx = _ctx[f]
479 fctx = _ctx[f]
480 if not _islfs(fctx.filelog()._revlog, fctx.filenode()):
480 if not _islfs(fctx.filelog()._revlog, fctx.filenode()):
481 return None
481 return None
482 try:
482 try:
483 p = pointer.deserialize(fctx.rawdata())
483 p = pointer.deserialize(fctx.rawdata())
484 if ctx == _ctx:
484 if ctx == _ctx:
485 return p
485 return p
486 return {}
486 return {}
487 except pointer.InvalidPointer as ex:
487 except pointer.InvalidPointer as ex:
488 raise error.Abort(
488 raise error.Abort(
489 _(b'lfs: corrupted pointer (%s@%s): %s\n')
489 _(b'lfs: corrupted pointer (%s@%s): %s\n')
490 % (f, short(_ctx.node()), ex)
490 % (f, short(_ctx.node()), ex)
491 )
491 )
492
492
493
493
494 def pointersfromctx(ctx, removed=False):
494 def pointersfromctx(ctx, removed=False):
495 """return a dict {path: pointer} for given single changectx.
495 """return a dict {path: pointer} for given single changectx.
496
496
497 If ``removed`` == True and the LFS file was removed from ``ctx``, the value
497 If ``removed`` == True and the LFS file was removed from ``ctx``, the value
498 stored for the path is an empty dict.
498 stored for the path is an empty dict.
499 """
499 """
500 result = {}
500 result = {}
501 m = ctx.repo().narrowmatch()
501 m = ctx.repo().narrowmatch()
502
502
503 # TODO: consider manifest.fastread() instead
503 # TODO: consider manifest.fastread() instead
504 for f in ctx.files():
504 for f in ctx.files():
505 if not m(f):
505 if not m(f):
506 continue
506 continue
507 p = pointerfromctx(ctx, f, removed=removed)
507 p = pointerfromctx(ctx, f, removed=removed)
508 if p is not None:
508 if p is not None:
509 result[f] = p
509 result[f] = p
510 return result
510 return result
511
511
512
512
513 def uploadblobs(repo, pointers):
513 def uploadblobs(repo, pointers):
514 """upload given pointers from local blobstore"""
514 """upload given pointers from local blobstore"""
515 if not pointers:
515 if not pointers:
516 return
516 return
517
517
518 remoteblob = repo.svfs.lfsremoteblobstore
518 remoteblob = repo.svfs.lfsremoteblobstore
519 remoteblob.writebatch(pointers, repo.svfs.lfslocalblobstore)
519 remoteblob.writebatch(pointers, repo.svfs.lfslocalblobstore)
520
520
521
521
522 @eh.wrapfunction(upgrade_engine, 'finishdatamigration')
522 @eh.wrapfunction(upgrade_engine, 'finishdatamigration')
523 def upgradefinishdatamigration(orig, ui, srcrepo, dstrepo, requirements):
523 def upgradefinishdatamigration(orig, ui, srcrepo, dstrepo, requirements):
524 orig(ui, srcrepo, dstrepo, requirements)
524 orig(ui, srcrepo, dstrepo, requirements)
525
525
526 # Skip if this hasn't been passed to reposetup()
526 # Skip if this hasn't been passed to reposetup()
527 if util.safehasattr(
527 if util.safehasattr(srcrepo.svfs, 'lfslocalblobstore') and util.safehasattr(
528 srcrepo.svfs, b'lfslocalblobstore'
528 dstrepo.svfs, 'lfslocalblobstore'
529 ) and util.safehasattr(dstrepo.svfs, b'lfslocalblobstore'):
529 ):
530 srclfsvfs = srcrepo.svfs.lfslocalblobstore.vfs
530 srclfsvfs = srcrepo.svfs.lfslocalblobstore.vfs
531 dstlfsvfs = dstrepo.svfs.lfslocalblobstore.vfs
531 dstlfsvfs = dstrepo.svfs.lfslocalblobstore.vfs
532
532
533 for dirpath, dirs, files in srclfsvfs.walk():
533 for dirpath, dirs, files in srclfsvfs.walk():
534 for oid in files:
534 for oid in files:
535 ui.write(_(b'copying lfs blob %s\n') % oid)
535 ui.write(_(b'copying lfs blob %s\n') % oid)
536 lfutil.link(srclfsvfs.join(oid), dstlfsvfs.join(oid))
536 lfutil.link(srclfsvfs.join(oid), dstlfsvfs.join(oid))
537
537
538
538
539 @eh.wrapfunction(upgrade_actions, 'preservedrequirements')
539 @eh.wrapfunction(upgrade_actions, 'preservedrequirements')
540 @eh.wrapfunction(upgrade_actions, 'supporteddestrequirements')
540 @eh.wrapfunction(upgrade_actions, 'supporteddestrequirements')
541 def upgraderequirements(orig, repo):
541 def upgraderequirements(orig, repo):
542 reqs = orig(repo)
542 reqs = orig(repo)
543 if b'lfs' in repo.requirements:
543 if b'lfs' in repo.requirements:
544 reqs.add(b'lfs')
544 reqs.add(b'lfs')
545 return reqs
545 return reqs
General Comments 0
You need to be logged in to leave comments. Login now