##// END OF EJS Templates
lfs: handle paths that don't end with '/' when inferring the blob store...
Matt Harbison -
r37583:9c7a25ef default
parent child Browse files
Show More
@@ -1,570 +1,575
1 # blobstore.py - local and remote (speaking Git-LFS protocol) blob storages
1 # blobstore.py - local and remote (speaking Git-LFS protocol) blob storages
2 #
2 #
3 # Copyright 2017 Facebook, Inc.
3 # Copyright 2017 Facebook, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import errno
10 import errno
11 import hashlib
11 import hashlib
12 import json
12 import json
13 import os
13 import os
14 import re
14 import re
15 import socket
15 import socket
16
16
17 from mercurial.i18n import _
17 from mercurial.i18n import _
18
18
19 from mercurial import (
19 from mercurial import (
20 error,
20 error,
21 pathutil,
21 pathutil,
22 pycompat,
22 pycompat,
23 url as urlmod,
23 url as urlmod,
24 util,
24 util,
25 vfs as vfsmod,
25 vfs as vfsmod,
26 worker,
26 worker,
27 )
27 )
28
28
29 from ..largefiles import lfutil
29 from ..largefiles import lfutil
30
30
31 # 64 bytes for SHA256
31 # 64 bytes for SHA256
32 _lfsre = re.compile(br'\A[a-f0-9]{64}\Z')
32 _lfsre = re.compile(br'\A[a-f0-9]{64}\Z')
33
33
34 class lfsvfs(vfsmod.vfs):
34 class lfsvfs(vfsmod.vfs):
35 def join(self, path):
35 def join(self, path):
36 """split the path at first two characters, like: XX/XXXXX..."""
36 """split the path at first two characters, like: XX/XXXXX..."""
37 if not _lfsre.match(path):
37 if not _lfsre.match(path):
38 raise error.ProgrammingError('unexpected lfs path: %s' % path)
38 raise error.ProgrammingError('unexpected lfs path: %s' % path)
39 return super(lfsvfs, self).join(path[0:2], path[2:])
39 return super(lfsvfs, self).join(path[0:2], path[2:])
40
40
41 def walk(self, path=None, onerror=None):
41 def walk(self, path=None, onerror=None):
42 """Yield (dirpath, [], oids) tuple for blobs under path
42 """Yield (dirpath, [], oids) tuple for blobs under path
43
43
44 Oids only exist in the root of this vfs, so dirpath is always ''.
44 Oids only exist in the root of this vfs, so dirpath is always ''.
45 """
45 """
46 root = os.path.normpath(self.base)
46 root = os.path.normpath(self.base)
47 # when dirpath == root, dirpath[prefixlen:] becomes empty
47 # when dirpath == root, dirpath[prefixlen:] becomes empty
48 # because len(dirpath) < prefixlen.
48 # because len(dirpath) < prefixlen.
49 prefixlen = len(pathutil.normasprefix(root))
49 prefixlen = len(pathutil.normasprefix(root))
50 oids = []
50 oids = []
51
51
52 for dirpath, dirs, files in os.walk(self.reljoin(self.base, path or ''),
52 for dirpath, dirs, files in os.walk(self.reljoin(self.base, path or ''),
53 onerror=onerror):
53 onerror=onerror):
54 dirpath = dirpath[prefixlen:]
54 dirpath = dirpath[prefixlen:]
55
55
56 # Silently skip unexpected files and directories
56 # Silently skip unexpected files and directories
57 if len(dirpath) == 2:
57 if len(dirpath) == 2:
58 oids.extend([dirpath + f for f in files
58 oids.extend([dirpath + f for f in files
59 if _lfsre.match(dirpath + f)])
59 if _lfsre.match(dirpath + f)])
60
60
61 yield ('', [], oids)
61 yield ('', [], oids)
62
62
63 class nullvfs(lfsvfs):
63 class nullvfs(lfsvfs):
64 def __init__(self):
64 def __init__(self):
65 pass
65 pass
66
66
67 def exists(self, oid):
67 def exists(self, oid):
68 return False
68 return False
69
69
70 def read(self, oid):
70 def read(self, oid):
71 # store.read() calls into here if the blob doesn't exist in its
71 # store.read() calls into here if the blob doesn't exist in its
72 # self.vfs. Raise the same error as a normal vfs when asked to read a
72 # self.vfs. Raise the same error as a normal vfs when asked to read a
73 # file that doesn't exist. The only difference is the full file path
73 # file that doesn't exist. The only difference is the full file path
74 # isn't available in the error.
74 # isn't available in the error.
75 raise IOError(errno.ENOENT, '%s: No such file or directory' % oid)
75 raise IOError(errno.ENOENT, '%s: No such file or directory' % oid)
76
76
77 def walk(self, path=None, onerror=None):
77 def walk(self, path=None, onerror=None):
78 return ('', [], [])
78 return ('', [], [])
79
79
80 def write(self, oid, data):
80 def write(self, oid, data):
81 pass
81 pass
82
82
83 class filewithprogress(object):
83 class filewithprogress(object):
84 """a file-like object that supports __len__ and read.
84 """a file-like object that supports __len__ and read.
85
85
86 Useful to provide progress information for how many bytes are read.
86 Useful to provide progress information for how many bytes are read.
87 """
87 """
88
88
89 def __init__(self, fp, callback):
89 def __init__(self, fp, callback):
90 self._fp = fp
90 self._fp = fp
91 self._callback = callback # func(readsize)
91 self._callback = callback # func(readsize)
92 fp.seek(0, os.SEEK_END)
92 fp.seek(0, os.SEEK_END)
93 self._len = fp.tell()
93 self._len = fp.tell()
94 fp.seek(0)
94 fp.seek(0)
95
95
96 def __len__(self):
96 def __len__(self):
97 return self._len
97 return self._len
98
98
99 def read(self, size):
99 def read(self, size):
100 if self._fp is None:
100 if self._fp is None:
101 return b''
101 return b''
102 data = self._fp.read(size)
102 data = self._fp.read(size)
103 if data:
103 if data:
104 if self._callback:
104 if self._callback:
105 self._callback(len(data))
105 self._callback(len(data))
106 else:
106 else:
107 self._fp.close()
107 self._fp.close()
108 self._fp = None
108 self._fp = None
109 return data
109 return data
110
110
111 class local(object):
111 class local(object):
112 """Local blobstore for large file contents.
112 """Local blobstore for large file contents.
113
113
114 This blobstore is used both as a cache and as a staging area for large blobs
114 This blobstore is used both as a cache and as a staging area for large blobs
115 to be uploaded to the remote blobstore.
115 to be uploaded to the remote blobstore.
116 """
116 """
117
117
118 def __init__(self, repo):
118 def __init__(self, repo):
119 fullpath = repo.svfs.join('lfs/objects')
119 fullpath = repo.svfs.join('lfs/objects')
120 self.vfs = lfsvfs(fullpath)
120 self.vfs = lfsvfs(fullpath)
121
121
122 if repo.ui.configbool('experimental', 'lfs.disableusercache'):
122 if repo.ui.configbool('experimental', 'lfs.disableusercache'):
123 self.cachevfs = nullvfs()
123 self.cachevfs = nullvfs()
124 else:
124 else:
125 usercache = lfutil._usercachedir(repo.ui, 'lfs')
125 usercache = lfutil._usercachedir(repo.ui, 'lfs')
126 self.cachevfs = lfsvfs(usercache)
126 self.cachevfs = lfsvfs(usercache)
127 self.ui = repo.ui
127 self.ui = repo.ui
128
128
129 def open(self, oid):
129 def open(self, oid):
130 """Open a read-only file descriptor to the named blob, in either the
130 """Open a read-only file descriptor to the named blob, in either the
131 usercache or the local store."""
131 usercache or the local store."""
132 # The usercache is the most likely place to hold the file. Commit will
132 # The usercache is the most likely place to hold the file. Commit will
133 # write to both it and the local store, as will anything that downloads
133 # write to both it and the local store, as will anything that downloads
134 # the blobs. However, things like clone without an update won't
134 # the blobs. However, things like clone without an update won't
135 # populate the local store. For an init + push of a local clone,
135 # populate the local store. For an init + push of a local clone,
136 # the usercache is the only place it _could_ be. If not present, the
136 # the usercache is the only place it _could_ be. If not present, the
137 # missing file msg here will indicate the local repo, not the usercache.
137 # missing file msg here will indicate the local repo, not the usercache.
138 if self.cachevfs.exists(oid):
138 if self.cachevfs.exists(oid):
139 return self.cachevfs(oid, 'rb')
139 return self.cachevfs(oid, 'rb')
140
140
141 return self.vfs(oid, 'rb')
141 return self.vfs(oid, 'rb')
142
142
143 def download(self, oid, src):
143 def download(self, oid, src):
144 """Read the blob from the remote source in chunks, verify the content,
144 """Read the blob from the remote source in chunks, verify the content,
145 and write to this local blobstore."""
145 and write to this local blobstore."""
146 sha256 = hashlib.sha256()
146 sha256 = hashlib.sha256()
147
147
148 with self.vfs(oid, 'wb', atomictemp=True) as fp:
148 with self.vfs(oid, 'wb', atomictemp=True) as fp:
149 for chunk in util.filechunkiter(src, size=1048576):
149 for chunk in util.filechunkiter(src, size=1048576):
150 fp.write(chunk)
150 fp.write(chunk)
151 sha256.update(chunk)
151 sha256.update(chunk)
152
152
153 realoid = sha256.hexdigest()
153 realoid = sha256.hexdigest()
154 if realoid != oid:
154 if realoid != oid:
155 raise error.Abort(_('corrupt remote lfs object: %s') % oid)
155 raise error.Abort(_('corrupt remote lfs object: %s') % oid)
156
156
157 self._linktousercache(oid)
157 self._linktousercache(oid)
158
158
159 def write(self, oid, data):
159 def write(self, oid, data):
160 """Write blob to local blobstore.
160 """Write blob to local blobstore.
161
161
162 This should only be called from the filelog during a commit or similar.
162 This should only be called from the filelog during a commit or similar.
163 As such, there is no need to verify the data. Imports from a remote
163 As such, there is no need to verify the data. Imports from a remote
164 store must use ``download()`` instead."""
164 store must use ``download()`` instead."""
165 with self.vfs(oid, 'wb', atomictemp=True) as fp:
165 with self.vfs(oid, 'wb', atomictemp=True) as fp:
166 fp.write(data)
166 fp.write(data)
167
167
168 self._linktousercache(oid)
168 self._linktousercache(oid)
169
169
170 def _linktousercache(self, oid):
170 def _linktousercache(self, oid):
171 # XXX: should we verify the content of the cache, and hardlink back to
171 # XXX: should we verify the content of the cache, and hardlink back to
172 # the local store on success, but truncate, write and link on failure?
172 # the local store on success, but truncate, write and link on failure?
173 if (not self.cachevfs.exists(oid)
173 if (not self.cachevfs.exists(oid)
174 and not isinstance(self.cachevfs, nullvfs)):
174 and not isinstance(self.cachevfs, nullvfs)):
175 self.ui.note(_('lfs: adding %s to the usercache\n') % oid)
175 self.ui.note(_('lfs: adding %s to the usercache\n') % oid)
176 lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
176 lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
177
177
178 def read(self, oid, verify=True):
178 def read(self, oid, verify=True):
179 """Read blob from local blobstore."""
179 """Read blob from local blobstore."""
180 if not self.vfs.exists(oid):
180 if not self.vfs.exists(oid):
181 blob = self._read(self.cachevfs, oid, verify)
181 blob = self._read(self.cachevfs, oid, verify)
182
182
183 # Even if revlog will verify the content, it needs to be verified
183 # Even if revlog will verify the content, it needs to be verified
184 # now before making the hardlink to avoid propagating corrupt blobs.
184 # now before making the hardlink to avoid propagating corrupt blobs.
185 # Don't abort if corruption is detected, because `hg verify` will
185 # Don't abort if corruption is detected, because `hg verify` will
186 # give more useful info about the corruption- simply don't add the
186 # give more useful info about the corruption- simply don't add the
187 # hardlink.
187 # hardlink.
188 if verify or hashlib.sha256(blob).hexdigest() == oid:
188 if verify or hashlib.sha256(blob).hexdigest() == oid:
189 self.ui.note(_('lfs: found %s in the usercache\n') % oid)
189 self.ui.note(_('lfs: found %s in the usercache\n') % oid)
190 lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
190 lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
191 else:
191 else:
192 self.ui.note(_('lfs: found %s in the local lfs store\n') % oid)
192 self.ui.note(_('lfs: found %s in the local lfs store\n') % oid)
193 blob = self._read(self.vfs, oid, verify)
193 blob = self._read(self.vfs, oid, verify)
194 return blob
194 return blob
195
195
196 def _read(self, vfs, oid, verify):
196 def _read(self, vfs, oid, verify):
197 """Read blob (after verifying) from the given store"""
197 """Read blob (after verifying) from the given store"""
198 blob = vfs.read(oid)
198 blob = vfs.read(oid)
199 if verify:
199 if verify:
200 _verify(oid, blob)
200 _verify(oid, blob)
201 return blob
201 return blob
202
202
203 def verify(self, oid):
203 def verify(self, oid):
204 """Indicate whether or not the hash of the underlying file matches its
204 """Indicate whether or not the hash of the underlying file matches its
205 name."""
205 name."""
206 sha256 = hashlib.sha256()
206 sha256 = hashlib.sha256()
207
207
208 with self.open(oid) as fp:
208 with self.open(oid) as fp:
209 for chunk in util.filechunkiter(fp, size=1048576):
209 for chunk in util.filechunkiter(fp, size=1048576):
210 sha256.update(chunk)
210 sha256.update(chunk)
211
211
212 return oid == sha256.hexdigest()
212 return oid == sha256.hexdigest()
213
213
214 def has(self, oid):
214 def has(self, oid):
215 """Returns True if the local blobstore contains the requested blob,
215 """Returns True if the local blobstore contains the requested blob,
216 False otherwise."""
216 False otherwise."""
217 return self.cachevfs.exists(oid) or self.vfs.exists(oid)
217 return self.cachevfs.exists(oid) or self.vfs.exists(oid)
218
218
219 class _gitlfsremote(object):
219 class _gitlfsremote(object):
220
220
221 def __init__(self, repo, url):
221 def __init__(self, repo, url):
222 ui = repo.ui
222 ui = repo.ui
223 self.ui = ui
223 self.ui = ui
224 baseurl, authinfo = url.authinfo()
224 baseurl, authinfo = url.authinfo()
225 self.baseurl = baseurl.rstrip('/')
225 self.baseurl = baseurl.rstrip('/')
226 useragent = repo.ui.config('experimental', 'lfs.user-agent')
226 useragent = repo.ui.config('experimental', 'lfs.user-agent')
227 if not useragent:
227 if not useragent:
228 useragent = 'git-lfs/2.3.4 (Mercurial %s)' % util.version()
228 useragent = 'git-lfs/2.3.4 (Mercurial %s)' % util.version()
229 self.urlopener = urlmod.opener(ui, authinfo, useragent)
229 self.urlopener = urlmod.opener(ui, authinfo, useragent)
230 self.retry = ui.configint('lfs', 'retry')
230 self.retry = ui.configint('lfs', 'retry')
231
231
232 def writebatch(self, pointers, fromstore):
232 def writebatch(self, pointers, fromstore):
233 """Batch upload from local to remote blobstore."""
233 """Batch upload from local to remote blobstore."""
234 self._batch(_deduplicate(pointers), fromstore, 'upload')
234 self._batch(_deduplicate(pointers), fromstore, 'upload')
235
235
236 def readbatch(self, pointers, tostore):
236 def readbatch(self, pointers, tostore):
237 """Batch download from remote to local blostore."""
237 """Batch download from remote to local blostore."""
238 self._batch(_deduplicate(pointers), tostore, 'download')
238 self._batch(_deduplicate(pointers), tostore, 'download')
239
239
240 def _batchrequest(self, pointers, action):
240 def _batchrequest(self, pointers, action):
241 """Get metadata about objects pointed by pointers for given action
241 """Get metadata about objects pointed by pointers for given action
242
242
243 Return decoded JSON object like {'objects': [{'oid': '', 'size': 1}]}
243 Return decoded JSON object like {'objects': [{'oid': '', 'size': 1}]}
244 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/batch.md
244 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/batch.md
245 """
245 """
246 objects = [{'oid': p.oid(), 'size': p.size()} for p in pointers]
246 objects = [{'oid': p.oid(), 'size': p.size()} for p in pointers]
247 requestdata = json.dumps({
247 requestdata = json.dumps({
248 'objects': objects,
248 'objects': objects,
249 'operation': action,
249 'operation': action,
250 })
250 })
251 batchreq = util.urlreq.request('%s/objects/batch' % self.baseurl,
251 batchreq = util.urlreq.request('%s/objects/batch' % self.baseurl,
252 data=requestdata)
252 data=requestdata)
253 batchreq.add_header('Accept', 'application/vnd.git-lfs+json')
253 batchreq.add_header('Accept', 'application/vnd.git-lfs+json')
254 batchreq.add_header('Content-Type', 'application/vnd.git-lfs+json')
254 batchreq.add_header('Content-Type', 'application/vnd.git-lfs+json')
255 try:
255 try:
256 rsp = self.urlopener.open(batchreq)
256 rsp = self.urlopener.open(batchreq)
257 rawjson = rsp.read()
257 rawjson = rsp.read()
258 except util.urlerr.httperror as ex:
258 except util.urlerr.httperror as ex:
259 raise LfsRemoteError(_('LFS HTTP error: %s (action=%s)')
259 raise LfsRemoteError(_('LFS HTTP error: %s (action=%s)')
260 % (ex, action))
260 % (ex, action))
261 try:
261 try:
262 response = json.loads(rawjson)
262 response = json.loads(rawjson)
263 except ValueError:
263 except ValueError:
264 raise LfsRemoteError(_('LFS server returns invalid JSON: %s')
264 raise LfsRemoteError(_('LFS server returns invalid JSON: %s')
265 % rawjson)
265 % rawjson)
266
266
267 if self.ui.debugflag:
267 if self.ui.debugflag:
268 self.ui.debug('Status: %d\n' % rsp.status)
268 self.ui.debug('Status: %d\n' % rsp.status)
269 # lfs-test-server and hg serve return headers in different order
269 # lfs-test-server and hg serve return headers in different order
270 self.ui.debug('%s\n'
270 self.ui.debug('%s\n'
271 % '\n'.join(sorted(str(rsp.info()).splitlines())))
271 % '\n'.join(sorted(str(rsp.info()).splitlines())))
272
272
273 if 'objects' in response:
273 if 'objects' in response:
274 response['objects'] = sorted(response['objects'],
274 response['objects'] = sorted(response['objects'],
275 key=lambda p: p['oid'])
275 key=lambda p: p['oid'])
276 self.ui.debug('%s\n'
276 self.ui.debug('%s\n'
277 % json.dumps(response, indent=2,
277 % json.dumps(response, indent=2,
278 separators=('', ': '), sort_keys=True))
278 separators=('', ': '), sort_keys=True))
279
279
280 return response
280 return response
281
281
282 def _checkforservererror(self, pointers, responses, action):
282 def _checkforservererror(self, pointers, responses, action):
283 """Scans errors from objects
283 """Scans errors from objects
284
284
285 Raises LfsRemoteError if any objects have an error"""
285 Raises LfsRemoteError if any objects have an error"""
286 for response in responses:
286 for response in responses:
287 # The server should return 404 when objects cannot be found. Some
287 # The server should return 404 when objects cannot be found. Some
288 # server implementation (ex. lfs-test-server) does not set "error"
288 # server implementation (ex. lfs-test-server) does not set "error"
289 # but just removes "download" from "actions". Treat that case
289 # but just removes "download" from "actions". Treat that case
290 # as the same as 404 error.
290 # as the same as 404 error.
291 if 'error' not in response:
291 if 'error' not in response:
292 if (action == 'download'
292 if (action == 'download'
293 and action not in response.get('actions', [])):
293 and action not in response.get('actions', [])):
294 code = 404
294 code = 404
295 else:
295 else:
296 continue
296 continue
297 else:
297 else:
298 # An error dict without a code doesn't make much sense, so
298 # An error dict without a code doesn't make much sense, so
299 # treat as a server error.
299 # treat as a server error.
300 code = response.get('error').get('code', 500)
300 code = response.get('error').get('code', 500)
301
301
302 ptrmap = {p.oid(): p for p in pointers}
302 ptrmap = {p.oid(): p for p in pointers}
303 p = ptrmap.get(response['oid'], None)
303 p = ptrmap.get(response['oid'], None)
304 if p:
304 if p:
305 filename = getattr(p, 'filename', 'unknown')
305 filename = getattr(p, 'filename', 'unknown')
306 errors = {
306 errors = {
307 404: 'The object does not exist',
307 404: 'The object does not exist',
308 410: 'The object was removed by the owner',
308 410: 'The object was removed by the owner',
309 422: 'Validation error',
309 422: 'Validation error',
310 500: 'Internal server error',
310 500: 'Internal server error',
311 }
311 }
312 msg = errors.get(code, 'status code %d' % code)
312 msg = errors.get(code, 'status code %d' % code)
313 raise LfsRemoteError(_('LFS server error for "%s": %s')
313 raise LfsRemoteError(_('LFS server error for "%s": %s')
314 % (filename, msg))
314 % (filename, msg))
315 else:
315 else:
316 raise LfsRemoteError(
316 raise LfsRemoteError(
317 _('LFS server error. Unsolicited response for oid %s')
317 _('LFS server error. Unsolicited response for oid %s')
318 % response['oid'])
318 % response['oid'])
319
319
320 def _extractobjects(self, response, pointers, action):
320 def _extractobjects(self, response, pointers, action):
321 """extract objects from response of the batch API
321 """extract objects from response of the batch API
322
322
323 response: parsed JSON object returned by batch API
323 response: parsed JSON object returned by batch API
324 return response['objects'] filtered by action
324 return response['objects'] filtered by action
325 raise if any object has an error
325 raise if any object has an error
326 """
326 """
327 # Scan errors from objects - fail early
327 # Scan errors from objects - fail early
328 objects = response.get('objects', [])
328 objects = response.get('objects', [])
329 self._checkforservererror(pointers, objects, action)
329 self._checkforservererror(pointers, objects, action)
330
330
331 # Filter objects with given action. Practically, this skips uploading
331 # Filter objects with given action. Practically, this skips uploading
332 # objects which exist in the server.
332 # objects which exist in the server.
333 filteredobjects = [o for o in objects if action in o.get('actions', [])]
333 filteredobjects = [o for o in objects if action in o.get('actions', [])]
334
334
335 return filteredobjects
335 return filteredobjects
336
336
337 def _basictransfer(self, obj, action, localstore):
337 def _basictransfer(self, obj, action, localstore):
338 """Download or upload a single object using basic transfer protocol
338 """Download or upload a single object using basic transfer protocol
339
339
340 obj: dict, an object description returned by batch API
340 obj: dict, an object description returned by batch API
341 action: string, one of ['upload', 'download']
341 action: string, one of ['upload', 'download']
342 localstore: blobstore.local
342 localstore: blobstore.local
343
343
344 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/\
344 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/\
345 basic-transfers.md
345 basic-transfers.md
346 """
346 """
347 oid = pycompat.bytestr(obj['oid'])
347 oid = pycompat.bytestr(obj['oid'])
348
348
349 href = pycompat.bytestr(obj['actions'][action].get('href'))
349 href = pycompat.bytestr(obj['actions'][action].get('href'))
350 headers = obj['actions'][action].get('header', {}).items()
350 headers = obj['actions'][action].get('header', {}).items()
351
351
352 request = util.urlreq.request(href)
352 request = util.urlreq.request(href)
353 if action == 'upload':
353 if action == 'upload':
354 # If uploading blobs, read data from local blobstore.
354 # If uploading blobs, read data from local blobstore.
355 if not localstore.verify(oid):
355 if not localstore.verify(oid):
356 raise error.Abort(_('detected corrupt lfs object: %s') % oid,
356 raise error.Abort(_('detected corrupt lfs object: %s') % oid,
357 hint=_('run hg verify'))
357 hint=_('run hg verify'))
358 request.data = filewithprogress(localstore.open(oid), None)
358 request.data = filewithprogress(localstore.open(oid), None)
359 request.get_method = lambda: 'PUT'
359 request.get_method = lambda: 'PUT'
360 request.add_header('Content-Type', 'application/octet-stream')
360 request.add_header('Content-Type', 'application/octet-stream')
361
361
362 for k, v in headers:
362 for k, v in headers:
363 request.add_header(k, v)
363 request.add_header(k, v)
364
364
365 response = b''
365 response = b''
366 try:
366 try:
367 req = self.urlopener.open(request)
367 req = self.urlopener.open(request)
368
368
369 if self.ui.debugflag:
369 if self.ui.debugflag:
370 self.ui.debug('Status: %d\n' % req.status)
370 self.ui.debug('Status: %d\n' % req.status)
371 # lfs-test-server and hg serve return headers in different order
371 # lfs-test-server and hg serve return headers in different order
372 self.ui.debug('%s\n'
372 self.ui.debug('%s\n'
373 % '\n'.join(sorted(str(req.info()).splitlines())))
373 % '\n'.join(sorted(str(req.info()).splitlines())))
374
374
375 if action == 'download':
375 if action == 'download':
376 # If downloading blobs, store downloaded data to local blobstore
376 # If downloading blobs, store downloaded data to local blobstore
377 localstore.download(oid, req)
377 localstore.download(oid, req)
378 else:
378 else:
379 while True:
379 while True:
380 data = req.read(1048576)
380 data = req.read(1048576)
381 if not data:
381 if not data:
382 break
382 break
383 response += data
383 response += data
384 if response:
384 if response:
385 self.ui.debug('lfs %s response: %s' % (action, response))
385 self.ui.debug('lfs %s response: %s' % (action, response))
386 except util.urlerr.httperror as ex:
386 except util.urlerr.httperror as ex:
387 if self.ui.debugflag:
387 if self.ui.debugflag:
388 self.ui.debug('%s: %s\n' % (oid, ex.read()))
388 self.ui.debug('%s: %s\n' % (oid, ex.read()))
389 raise LfsRemoteError(_('HTTP error: %s (oid=%s, action=%s)')
389 raise LfsRemoteError(_('HTTP error: %s (oid=%s, action=%s)')
390 % (ex, oid, action))
390 % (ex, oid, action))
391
391
392 def _batch(self, pointers, localstore, action):
392 def _batch(self, pointers, localstore, action):
393 if action not in ['upload', 'download']:
393 if action not in ['upload', 'download']:
394 raise error.ProgrammingError('invalid Git-LFS action: %s' % action)
394 raise error.ProgrammingError('invalid Git-LFS action: %s' % action)
395
395
396 response = self._batchrequest(pointers, action)
396 response = self._batchrequest(pointers, action)
397 objects = self._extractobjects(response, pointers, action)
397 objects = self._extractobjects(response, pointers, action)
398 total = sum(x.get('size', 0) for x in objects)
398 total = sum(x.get('size', 0) for x in objects)
399 sizes = {}
399 sizes = {}
400 for obj in objects:
400 for obj in objects:
401 sizes[obj.get('oid')] = obj.get('size', 0)
401 sizes[obj.get('oid')] = obj.get('size', 0)
402 topic = {'upload': _('lfs uploading'),
402 topic = {'upload': _('lfs uploading'),
403 'download': _('lfs downloading')}[action]
403 'download': _('lfs downloading')}[action]
404 if len(objects) > 1:
404 if len(objects) > 1:
405 self.ui.note(_('lfs: need to transfer %d objects (%s)\n')
405 self.ui.note(_('lfs: need to transfer %d objects (%s)\n')
406 % (len(objects), util.bytecount(total)))
406 % (len(objects), util.bytecount(total)))
407 self.ui.progress(topic, 0, total=total)
407 self.ui.progress(topic, 0, total=total)
408 def transfer(chunk):
408 def transfer(chunk):
409 for obj in chunk:
409 for obj in chunk:
410 objsize = obj.get('size', 0)
410 objsize = obj.get('size', 0)
411 if self.ui.verbose:
411 if self.ui.verbose:
412 if action == 'download':
412 if action == 'download':
413 msg = _('lfs: downloading %s (%s)\n')
413 msg = _('lfs: downloading %s (%s)\n')
414 elif action == 'upload':
414 elif action == 'upload':
415 msg = _('lfs: uploading %s (%s)\n')
415 msg = _('lfs: uploading %s (%s)\n')
416 self.ui.note(msg % (obj.get('oid'),
416 self.ui.note(msg % (obj.get('oid'),
417 util.bytecount(objsize)))
417 util.bytecount(objsize)))
418 retry = self.retry
418 retry = self.retry
419 while True:
419 while True:
420 try:
420 try:
421 self._basictransfer(obj, action, localstore)
421 self._basictransfer(obj, action, localstore)
422 yield 1, obj.get('oid')
422 yield 1, obj.get('oid')
423 break
423 break
424 except socket.error as ex:
424 except socket.error as ex:
425 if retry > 0:
425 if retry > 0:
426 self.ui.note(
426 self.ui.note(
427 _('lfs: failed: %r (remaining retry %d)\n')
427 _('lfs: failed: %r (remaining retry %d)\n')
428 % (ex, retry))
428 % (ex, retry))
429 retry -= 1
429 retry -= 1
430 continue
430 continue
431 raise
431 raise
432
432
433 # Until https multiplexing gets sorted out
433 # Until https multiplexing gets sorted out
434 if self.ui.configbool('experimental', 'lfs.worker-enable'):
434 if self.ui.configbool('experimental', 'lfs.worker-enable'):
435 oids = worker.worker(self.ui, 0.1, transfer, (),
435 oids = worker.worker(self.ui, 0.1, transfer, (),
436 sorted(objects, key=lambda o: o.get('oid')))
436 sorted(objects, key=lambda o: o.get('oid')))
437 else:
437 else:
438 oids = transfer(sorted(objects, key=lambda o: o.get('oid')))
438 oids = transfer(sorted(objects, key=lambda o: o.get('oid')))
439
439
440 processed = 0
440 processed = 0
441 blobs = 0
441 blobs = 0
442 for _one, oid in oids:
442 for _one, oid in oids:
443 processed += sizes[oid]
443 processed += sizes[oid]
444 blobs += 1
444 blobs += 1
445 self.ui.progress(topic, processed, total=total)
445 self.ui.progress(topic, processed, total=total)
446 self.ui.note(_('lfs: processed: %s\n') % oid)
446 self.ui.note(_('lfs: processed: %s\n') % oid)
447 self.ui.progress(topic, pos=None, total=total)
447 self.ui.progress(topic, pos=None, total=total)
448
448
449 if blobs > 0:
449 if blobs > 0:
450 if action == 'upload':
450 if action == 'upload':
451 self.ui.status(_('lfs: uploaded %d files (%s)\n')
451 self.ui.status(_('lfs: uploaded %d files (%s)\n')
452 % (blobs, util.bytecount(processed)))
452 % (blobs, util.bytecount(processed)))
453 # TODO: coalesce the download requests, and comment this in
453 # TODO: coalesce the download requests, and comment this in
454 #elif action == 'download':
454 #elif action == 'download':
455 # self.ui.status(_('lfs: downloaded %d files (%s)\n')
455 # self.ui.status(_('lfs: downloaded %d files (%s)\n')
456 # % (blobs, util.bytecount(processed)))
456 # % (blobs, util.bytecount(processed)))
457
457
458 def __del__(self):
458 def __del__(self):
459 # copied from mercurial/httppeer.py
459 # copied from mercurial/httppeer.py
460 urlopener = getattr(self, 'urlopener', None)
460 urlopener = getattr(self, 'urlopener', None)
461 if urlopener:
461 if urlopener:
462 for h in urlopener.handlers:
462 for h in urlopener.handlers:
463 h.close()
463 h.close()
464 getattr(h, "close_all", lambda : None)()
464 getattr(h, "close_all", lambda : None)()
465
465
466 class _dummyremote(object):
466 class _dummyremote(object):
467 """Dummy store storing blobs to temp directory."""
467 """Dummy store storing blobs to temp directory."""
468
468
469 def __init__(self, repo, url):
469 def __init__(self, repo, url):
470 fullpath = repo.vfs.join('lfs', url.path)
470 fullpath = repo.vfs.join('lfs', url.path)
471 self.vfs = lfsvfs(fullpath)
471 self.vfs = lfsvfs(fullpath)
472
472
473 def writebatch(self, pointers, fromstore):
473 def writebatch(self, pointers, fromstore):
474 for p in _deduplicate(pointers):
474 for p in _deduplicate(pointers):
475 content = fromstore.read(p.oid(), verify=True)
475 content = fromstore.read(p.oid(), verify=True)
476 with self.vfs(p.oid(), 'wb', atomictemp=True) as fp:
476 with self.vfs(p.oid(), 'wb', atomictemp=True) as fp:
477 fp.write(content)
477 fp.write(content)
478
478
479 def readbatch(self, pointers, tostore):
479 def readbatch(self, pointers, tostore):
480 for p in _deduplicate(pointers):
480 for p in _deduplicate(pointers):
481 with self.vfs(p.oid(), 'rb') as fp:
481 with self.vfs(p.oid(), 'rb') as fp:
482 tostore.download(p.oid(), fp)
482 tostore.download(p.oid(), fp)
483
483
484 class _nullremote(object):
484 class _nullremote(object):
485 """Null store storing blobs to /dev/null."""
485 """Null store storing blobs to /dev/null."""
486
486
487 def __init__(self, repo, url):
487 def __init__(self, repo, url):
488 pass
488 pass
489
489
490 def writebatch(self, pointers, fromstore):
490 def writebatch(self, pointers, fromstore):
491 pass
491 pass
492
492
493 def readbatch(self, pointers, tostore):
493 def readbatch(self, pointers, tostore):
494 pass
494 pass
495
495
496 class _promptremote(object):
496 class _promptremote(object):
497 """Prompt user to set lfs.url when accessed."""
497 """Prompt user to set lfs.url when accessed."""
498
498
499 def __init__(self, repo, url):
499 def __init__(self, repo, url):
500 pass
500 pass
501
501
502 def writebatch(self, pointers, fromstore, ui=None):
502 def writebatch(self, pointers, fromstore, ui=None):
503 self._prompt()
503 self._prompt()
504
504
505 def readbatch(self, pointers, tostore, ui=None):
505 def readbatch(self, pointers, tostore, ui=None):
506 self._prompt()
506 self._prompt()
507
507
508 def _prompt(self):
508 def _prompt(self):
509 raise error.Abort(_('lfs.url needs to be configured'))
509 raise error.Abort(_('lfs.url needs to be configured'))
510
510
511 _storemap = {
511 _storemap = {
512 'https': _gitlfsremote,
512 'https': _gitlfsremote,
513 'http': _gitlfsremote,
513 'http': _gitlfsremote,
514 'file': _dummyremote,
514 'file': _dummyremote,
515 'null': _nullremote,
515 'null': _nullremote,
516 None: _promptremote,
516 None: _promptremote,
517 }
517 }
518
518
519 def _deduplicate(pointers):
519 def _deduplicate(pointers):
520 """Remove any duplicate oids that exist in the list"""
520 """Remove any duplicate oids that exist in the list"""
521 reduced = util.sortdict()
521 reduced = util.sortdict()
522 for p in pointers:
522 for p in pointers:
523 reduced[p.oid()] = p
523 reduced[p.oid()] = p
524 return reduced.values()
524 return reduced.values()
525
525
526 def _verify(oid, content):
526 def _verify(oid, content):
527 realoid = hashlib.sha256(content).hexdigest()
527 realoid = hashlib.sha256(content).hexdigest()
528 if realoid != oid:
528 if realoid != oid:
529 raise error.Abort(_('detected corrupt lfs object: %s') % oid,
529 raise error.Abort(_('detected corrupt lfs object: %s') % oid,
530 hint=_('run hg verify'))
530 hint=_('run hg verify'))
531
531
532 def remote(repo, remote=None):
532 def remote(repo, remote=None):
533 """remotestore factory. return a store in _storemap depending on config
533 """remotestore factory. return a store in _storemap depending on config
534
534
535 If ``lfs.url`` is specified, use that remote endpoint. Otherwise, try to
535 If ``lfs.url`` is specified, use that remote endpoint. Otherwise, try to
536 infer the endpoint, based on the remote repository using the same path
536 infer the endpoint, based on the remote repository using the same path
537 adjustments as git. As an extension, 'http' is supported as well so that
537 adjustments as git. As an extension, 'http' is supported as well so that
538 ``hg serve`` works out of the box.
538 ``hg serve`` works out of the box.
539
539
540 https://github.com/git-lfs/git-lfs/blob/master/docs/api/server-discovery.md
540 https://github.com/git-lfs/git-lfs/blob/master/docs/api/server-discovery.md
541 """
541 """
542 url = util.url(repo.ui.config('lfs', 'url') or '')
542 lfsurl = repo.ui.config('lfs', 'url')
543 if url.scheme is None:
543 url = util.url(lfsurl or '')
544 if lfsurl is None:
544 if remote:
545 if remote:
545 defaulturl = util.url(remote)
546 path = remote
546 elif util.safehasattr(repo, '_subtoppath'):
547 elif util.safehasattr(repo, '_subtoppath'):
547 # The pull command sets this during the optional update phase, which
548 # The pull command sets this during the optional update phase, which
548 # tells exactly where the pull originated, whether 'paths.default'
549 # tells exactly where the pull originated, whether 'paths.default'
549 # or explicit.
550 # or explicit.
550 defaulturl = util.url(repo._subtoppath)
551 path = repo._subtoppath
551 else:
552 else:
552 # TODO: investigate 'paths.remote:lfsurl' style path customization,
553 # TODO: investigate 'paths.remote:lfsurl' style path customization,
553 # and fall back to inferring from 'paths.remote' if unspecified.
554 # and fall back to inferring from 'paths.remote' if unspecified.
554 defaulturl = util.url(repo.ui.config('paths', 'default') or b'')
555 path = repo.ui.config('paths', 'default') or ''
556
557 defaulturl = util.url(path)
555
558
556 # TODO: support local paths as well.
559 # TODO: support local paths as well.
557 # TODO: consider the ssh -> https transformation that git applies
560 # TODO: consider the ssh -> https transformation that git applies
558 if defaulturl.scheme in (b'http', b'https'):
561 if defaulturl.scheme in (b'http', b'https'):
562 if defaulturl.path and defaulturl.path[:-1] != b'/':
563 defaulturl.path += b'/'
559 defaulturl.path = defaulturl.path or b'' + b'.git/info/lfs'
564 defaulturl.path = defaulturl.path or b'' + b'.git/info/lfs'
560
565
561 url = util.url(bytes(defaulturl))
566 url = util.url(bytes(defaulturl))
562 repo.ui.note(_('lfs: assuming remote store: %s\n') % url)
567 repo.ui.note(_('lfs: assuming remote store: %s\n') % url)
563
568
564 scheme = url.scheme
569 scheme = url.scheme
565 if scheme not in _storemap:
570 if scheme not in _storemap:
566 raise error.Abort(_('lfs: unknown url scheme: %s') % scheme)
571 raise error.Abort(_('lfs: unknown url scheme: %s') % scheme)
567 return _storemap[scheme](repo, url)
572 return _storemap[scheme](repo, url)
568
573
569 class LfsRemoteError(error.RevlogError):
574 class LfsRemoteError(error.RevlogError):
570 pass
575 pass
General Comments 0
You need to be logged in to leave comments. Login now