##// END OF EJS Templates
lfs: use a context manager to control the progress bar lifetime
Matt Harbison -
r39426:b26350d9 default
parent child Browse files
Show More
@@ -1,581 +1,581 b''
1 # blobstore.py - local and remote (speaking Git-LFS protocol) blob storages
1 # blobstore.py - local and remote (speaking Git-LFS protocol) blob storages
2 #
2 #
3 # Copyright 2017 Facebook, Inc.
3 # Copyright 2017 Facebook, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import errno
10 import errno
11 import hashlib
11 import hashlib
12 import json
12 import json
13 import os
13 import os
14 import re
14 import re
15 import socket
15 import socket
16
16
17 from mercurial.i18n import _
17 from mercurial.i18n import _
18
18
19 from mercurial import (
19 from mercurial import (
20 error,
20 error,
21 pathutil,
21 pathutil,
22 pycompat,
22 pycompat,
23 url as urlmod,
23 url as urlmod,
24 util,
24 util,
25 vfs as vfsmod,
25 vfs as vfsmod,
26 worker,
26 worker,
27 )
27 )
28
28
29 from ..largefiles import lfutil
29 from ..largefiles import lfutil
30
30
31 # 64 bytes for SHA256
31 # 64 bytes for SHA256
32 _lfsre = re.compile(br'\A[a-f0-9]{64}\Z')
32 _lfsre = re.compile(br'\A[a-f0-9]{64}\Z')
33
33
34 class lfsvfs(vfsmod.vfs):
34 class lfsvfs(vfsmod.vfs):
35 def join(self, path):
35 def join(self, path):
36 """split the path at first two characters, like: XX/XXXXX..."""
36 """split the path at first two characters, like: XX/XXXXX..."""
37 if not _lfsre.match(path):
37 if not _lfsre.match(path):
38 raise error.ProgrammingError('unexpected lfs path: %s' % path)
38 raise error.ProgrammingError('unexpected lfs path: %s' % path)
39 return super(lfsvfs, self).join(path[0:2], path[2:])
39 return super(lfsvfs, self).join(path[0:2], path[2:])
40
40
41 def walk(self, path=None, onerror=None):
41 def walk(self, path=None, onerror=None):
42 """Yield (dirpath, [], oids) tuple for blobs under path
42 """Yield (dirpath, [], oids) tuple for blobs under path
43
43
44 Oids only exist in the root of this vfs, so dirpath is always ''.
44 Oids only exist in the root of this vfs, so dirpath is always ''.
45 """
45 """
46 root = os.path.normpath(self.base)
46 root = os.path.normpath(self.base)
47 # when dirpath == root, dirpath[prefixlen:] becomes empty
47 # when dirpath == root, dirpath[prefixlen:] becomes empty
48 # because len(dirpath) < prefixlen.
48 # because len(dirpath) < prefixlen.
49 prefixlen = len(pathutil.normasprefix(root))
49 prefixlen = len(pathutil.normasprefix(root))
50 oids = []
50 oids = []
51
51
52 for dirpath, dirs, files in os.walk(self.reljoin(self.base, path or ''),
52 for dirpath, dirs, files in os.walk(self.reljoin(self.base, path or ''),
53 onerror=onerror):
53 onerror=onerror):
54 dirpath = dirpath[prefixlen:]
54 dirpath = dirpath[prefixlen:]
55
55
56 # Silently skip unexpected files and directories
56 # Silently skip unexpected files and directories
57 if len(dirpath) == 2:
57 if len(dirpath) == 2:
58 oids.extend([dirpath + f for f in files
58 oids.extend([dirpath + f for f in files
59 if _lfsre.match(dirpath + f)])
59 if _lfsre.match(dirpath + f)])
60
60
61 yield ('', [], oids)
61 yield ('', [], oids)
62
62
63 class nullvfs(lfsvfs):
63 class nullvfs(lfsvfs):
64 def __init__(self):
64 def __init__(self):
65 pass
65 pass
66
66
67 def exists(self, oid):
67 def exists(self, oid):
68 return False
68 return False
69
69
70 def read(self, oid):
70 def read(self, oid):
71 # store.read() calls into here if the blob doesn't exist in its
71 # store.read() calls into here if the blob doesn't exist in its
72 # self.vfs. Raise the same error as a normal vfs when asked to read a
72 # self.vfs. Raise the same error as a normal vfs when asked to read a
73 # file that doesn't exist. The only difference is the full file path
73 # file that doesn't exist. The only difference is the full file path
74 # isn't available in the error.
74 # isn't available in the error.
75 raise IOError(errno.ENOENT, '%s: No such file or directory' % oid)
75 raise IOError(errno.ENOENT, '%s: No such file or directory' % oid)
76
76
77 def walk(self, path=None, onerror=None):
77 def walk(self, path=None, onerror=None):
78 return ('', [], [])
78 return ('', [], [])
79
79
80 def write(self, oid, data):
80 def write(self, oid, data):
81 pass
81 pass
82
82
83 class filewithprogress(object):
83 class filewithprogress(object):
84 """a file-like object that supports __len__ and read.
84 """a file-like object that supports __len__ and read.
85
85
86 Useful to provide progress information for how many bytes are read.
86 Useful to provide progress information for how many bytes are read.
87 """
87 """
88
88
89 def __init__(self, fp, callback):
89 def __init__(self, fp, callback):
90 self._fp = fp
90 self._fp = fp
91 self._callback = callback # func(readsize)
91 self._callback = callback # func(readsize)
92 fp.seek(0, os.SEEK_END)
92 fp.seek(0, os.SEEK_END)
93 self._len = fp.tell()
93 self._len = fp.tell()
94 fp.seek(0)
94 fp.seek(0)
95
95
96 def __len__(self):
96 def __len__(self):
97 return self._len
97 return self._len
98
98
99 def read(self, size):
99 def read(self, size):
100 if self._fp is None:
100 if self._fp is None:
101 return b''
101 return b''
102 data = self._fp.read(size)
102 data = self._fp.read(size)
103 if data:
103 if data:
104 if self._callback:
104 if self._callback:
105 self._callback(len(data))
105 self._callback(len(data))
106 else:
106 else:
107 self._fp.close()
107 self._fp.close()
108 self._fp = None
108 self._fp = None
109 return data
109 return data
110
110
111 class local(object):
111 class local(object):
112 """Local blobstore for large file contents.
112 """Local blobstore for large file contents.
113
113
114 This blobstore is used both as a cache and as a staging area for large blobs
114 This blobstore is used both as a cache and as a staging area for large blobs
115 to be uploaded to the remote blobstore.
115 to be uploaded to the remote blobstore.
116 """
116 """
117
117
118 def __init__(self, repo):
118 def __init__(self, repo):
119 fullpath = repo.svfs.join('lfs/objects')
119 fullpath = repo.svfs.join('lfs/objects')
120 self.vfs = lfsvfs(fullpath)
120 self.vfs = lfsvfs(fullpath)
121
121
122 if repo.ui.configbool('experimental', 'lfs.disableusercache'):
122 if repo.ui.configbool('experimental', 'lfs.disableusercache'):
123 self.cachevfs = nullvfs()
123 self.cachevfs = nullvfs()
124 else:
124 else:
125 usercache = lfutil._usercachedir(repo.ui, 'lfs')
125 usercache = lfutil._usercachedir(repo.ui, 'lfs')
126 self.cachevfs = lfsvfs(usercache)
126 self.cachevfs = lfsvfs(usercache)
127 self.ui = repo.ui
127 self.ui = repo.ui
128
128
129 def open(self, oid):
129 def open(self, oid):
130 """Open a read-only file descriptor to the named blob, in either the
130 """Open a read-only file descriptor to the named blob, in either the
131 usercache or the local store."""
131 usercache or the local store."""
132 # The usercache is the most likely place to hold the file. Commit will
132 # The usercache is the most likely place to hold the file. Commit will
133 # write to both it and the local store, as will anything that downloads
133 # write to both it and the local store, as will anything that downloads
134 # the blobs. However, things like clone without an update won't
134 # the blobs. However, things like clone without an update won't
135 # populate the local store. For an init + push of a local clone,
135 # populate the local store. For an init + push of a local clone,
136 # the usercache is the only place it _could_ be. If not present, the
136 # the usercache is the only place it _could_ be. If not present, the
137 # missing file msg here will indicate the local repo, not the usercache.
137 # missing file msg here will indicate the local repo, not the usercache.
138 if self.cachevfs.exists(oid):
138 if self.cachevfs.exists(oid):
139 return self.cachevfs(oid, 'rb')
139 return self.cachevfs(oid, 'rb')
140
140
141 return self.vfs(oid, 'rb')
141 return self.vfs(oid, 'rb')
142
142
143 def download(self, oid, src):
143 def download(self, oid, src):
144 """Read the blob from the remote source in chunks, verify the content,
144 """Read the blob from the remote source in chunks, verify the content,
145 and write to this local blobstore."""
145 and write to this local blobstore."""
146 sha256 = hashlib.sha256()
146 sha256 = hashlib.sha256()
147
147
148 with self.vfs(oid, 'wb', atomictemp=True) as fp:
148 with self.vfs(oid, 'wb', atomictemp=True) as fp:
149 for chunk in util.filechunkiter(src, size=1048576):
149 for chunk in util.filechunkiter(src, size=1048576):
150 fp.write(chunk)
150 fp.write(chunk)
151 sha256.update(chunk)
151 sha256.update(chunk)
152
152
153 realoid = sha256.hexdigest()
153 realoid = sha256.hexdigest()
154 if realoid != oid:
154 if realoid != oid:
155 raise LfsCorruptionError(_('corrupt remote lfs object: %s')
155 raise LfsCorruptionError(_('corrupt remote lfs object: %s')
156 % oid)
156 % oid)
157
157
158 self._linktousercache(oid)
158 self._linktousercache(oid)
159
159
160 def write(self, oid, data):
160 def write(self, oid, data):
161 """Write blob to local blobstore.
161 """Write blob to local blobstore.
162
162
163 This should only be called from the filelog during a commit or similar.
163 This should only be called from the filelog during a commit or similar.
164 As such, there is no need to verify the data. Imports from a remote
164 As such, there is no need to verify the data. Imports from a remote
165 store must use ``download()`` instead."""
165 store must use ``download()`` instead."""
166 with self.vfs(oid, 'wb', atomictemp=True) as fp:
166 with self.vfs(oid, 'wb', atomictemp=True) as fp:
167 fp.write(data)
167 fp.write(data)
168
168
169 self._linktousercache(oid)
169 self._linktousercache(oid)
170
170
171 def _linktousercache(self, oid):
171 def _linktousercache(self, oid):
172 # XXX: should we verify the content of the cache, and hardlink back to
172 # XXX: should we verify the content of the cache, and hardlink back to
173 # the local store on success, but truncate, write and link on failure?
173 # the local store on success, but truncate, write and link on failure?
174 if (not self.cachevfs.exists(oid)
174 if (not self.cachevfs.exists(oid)
175 and not isinstance(self.cachevfs, nullvfs)):
175 and not isinstance(self.cachevfs, nullvfs)):
176 self.ui.note(_('lfs: adding %s to the usercache\n') % oid)
176 self.ui.note(_('lfs: adding %s to the usercache\n') % oid)
177 lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
177 lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
178
178
179 def read(self, oid, verify=True):
179 def read(self, oid, verify=True):
180 """Read blob from local blobstore."""
180 """Read blob from local blobstore."""
181 if not self.vfs.exists(oid):
181 if not self.vfs.exists(oid):
182 blob = self._read(self.cachevfs, oid, verify)
182 blob = self._read(self.cachevfs, oid, verify)
183
183
184 # Even if revlog will verify the content, it needs to be verified
184 # Even if revlog will verify the content, it needs to be verified
185 # now before making the hardlink to avoid propagating corrupt blobs.
185 # now before making the hardlink to avoid propagating corrupt blobs.
186 # Don't abort if corruption is detected, because `hg verify` will
186 # Don't abort if corruption is detected, because `hg verify` will
187 # give more useful info about the corruption- simply don't add the
187 # give more useful info about the corruption- simply don't add the
188 # hardlink.
188 # hardlink.
189 if verify or hashlib.sha256(blob).hexdigest() == oid:
189 if verify or hashlib.sha256(blob).hexdigest() == oid:
190 self.ui.note(_('lfs: found %s in the usercache\n') % oid)
190 self.ui.note(_('lfs: found %s in the usercache\n') % oid)
191 lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
191 lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
192 else:
192 else:
193 self.ui.note(_('lfs: found %s in the local lfs store\n') % oid)
193 self.ui.note(_('lfs: found %s in the local lfs store\n') % oid)
194 blob = self._read(self.vfs, oid, verify)
194 blob = self._read(self.vfs, oid, verify)
195 return blob
195 return blob
196
196
197 def _read(self, vfs, oid, verify):
197 def _read(self, vfs, oid, verify):
198 """Read blob (after verifying) from the given store"""
198 """Read blob (after verifying) from the given store"""
199 blob = vfs.read(oid)
199 blob = vfs.read(oid)
200 if verify:
200 if verify:
201 _verify(oid, blob)
201 _verify(oid, blob)
202 return blob
202 return blob
203
203
204 def verify(self, oid):
204 def verify(self, oid):
205 """Indicate whether or not the hash of the underlying file matches its
205 """Indicate whether or not the hash of the underlying file matches its
206 name."""
206 name."""
207 sha256 = hashlib.sha256()
207 sha256 = hashlib.sha256()
208
208
209 with self.open(oid) as fp:
209 with self.open(oid) as fp:
210 for chunk in util.filechunkiter(fp, size=1048576):
210 for chunk in util.filechunkiter(fp, size=1048576):
211 sha256.update(chunk)
211 sha256.update(chunk)
212
212
213 return oid == sha256.hexdigest()
213 return oid == sha256.hexdigest()
214
214
215 def has(self, oid):
215 def has(self, oid):
216 """Returns True if the local blobstore contains the requested blob,
216 """Returns True if the local blobstore contains the requested blob,
217 False otherwise."""
217 False otherwise."""
218 return self.cachevfs.exists(oid) or self.vfs.exists(oid)
218 return self.cachevfs.exists(oid) or self.vfs.exists(oid)
219
219
220 class _gitlfsremote(object):
220 class _gitlfsremote(object):
221
221
222 def __init__(self, repo, url):
222 def __init__(self, repo, url):
223 ui = repo.ui
223 ui = repo.ui
224 self.ui = ui
224 self.ui = ui
225 baseurl, authinfo = url.authinfo()
225 baseurl, authinfo = url.authinfo()
226 self.baseurl = baseurl.rstrip('/')
226 self.baseurl = baseurl.rstrip('/')
227 useragent = repo.ui.config('experimental', 'lfs.user-agent')
227 useragent = repo.ui.config('experimental', 'lfs.user-agent')
228 if not useragent:
228 if not useragent:
229 useragent = 'git-lfs/2.3.4 (Mercurial %s)' % util.version()
229 useragent = 'git-lfs/2.3.4 (Mercurial %s)' % util.version()
230 self.urlopener = urlmod.opener(ui, authinfo, useragent)
230 self.urlopener = urlmod.opener(ui, authinfo, useragent)
231 self.retry = ui.configint('lfs', 'retry')
231 self.retry = ui.configint('lfs', 'retry')
232
232
233 def writebatch(self, pointers, fromstore):
233 def writebatch(self, pointers, fromstore):
234 """Batch upload from local to remote blobstore."""
234 """Batch upload from local to remote blobstore."""
235 self._batch(_deduplicate(pointers), fromstore, 'upload')
235 self._batch(_deduplicate(pointers), fromstore, 'upload')
236
236
237 def readbatch(self, pointers, tostore):
237 def readbatch(self, pointers, tostore):
238 """Batch download from remote to local blostore."""
238 """Batch download from remote to local blostore."""
239 self._batch(_deduplicate(pointers), tostore, 'download')
239 self._batch(_deduplicate(pointers), tostore, 'download')
240
240
241 def _batchrequest(self, pointers, action):
241 def _batchrequest(self, pointers, action):
242 """Get metadata about objects pointed by pointers for given action
242 """Get metadata about objects pointed by pointers for given action
243
243
244 Return decoded JSON object like {'objects': [{'oid': '', 'size': 1}]}
244 Return decoded JSON object like {'objects': [{'oid': '', 'size': 1}]}
245 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/batch.md
245 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/batch.md
246 """
246 """
247 objects = [{'oid': p.oid(), 'size': p.size()} for p in pointers]
247 objects = [{'oid': p.oid(), 'size': p.size()} for p in pointers]
248 requestdata = json.dumps({
248 requestdata = json.dumps({
249 'objects': objects,
249 'objects': objects,
250 'operation': action,
250 'operation': action,
251 })
251 })
252 batchreq = util.urlreq.request('%s/objects/batch' % self.baseurl,
252 batchreq = util.urlreq.request('%s/objects/batch' % self.baseurl,
253 data=requestdata)
253 data=requestdata)
254 batchreq.add_header('Accept', 'application/vnd.git-lfs+json')
254 batchreq.add_header('Accept', 'application/vnd.git-lfs+json')
255 batchreq.add_header('Content-Type', 'application/vnd.git-lfs+json')
255 batchreq.add_header('Content-Type', 'application/vnd.git-lfs+json')
256 try:
256 try:
257 rsp = self.urlopener.open(batchreq)
257 rsp = self.urlopener.open(batchreq)
258 rawjson = rsp.read()
258 rawjson = rsp.read()
259 except util.urlerr.httperror as ex:
259 except util.urlerr.httperror as ex:
260 raise LfsRemoteError(_('LFS HTTP error: %s (action=%s)')
260 raise LfsRemoteError(_('LFS HTTP error: %s (action=%s)')
261 % (ex, action))
261 % (ex, action))
262 try:
262 try:
263 response = json.loads(rawjson)
263 response = json.loads(rawjson)
264 except ValueError:
264 except ValueError:
265 raise LfsRemoteError(_('LFS server returns invalid JSON: %s')
265 raise LfsRemoteError(_('LFS server returns invalid JSON: %s')
266 % rawjson)
266 % rawjson)
267
267
268 if self.ui.debugflag:
268 if self.ui.debugflag:
269 self.ui.debug('Status: %d\n' % rsp.status)
269 self.ui.debug('Status: %d\n' % rsp.status)
270 # lfs-test-server and hg serve return headers in different order
270 # lfs-test-server and hg serve return headers in different order
271 self.ui.debug('%s\n'
271 self.ui.debug('%s\n'
272 % '\n'.join(sorted(str(rsp.info()).splitlines())))
272 % '\n'.join(sorted(str(rsp.info()).splitlines())))
273
273
274 if 'objects' in response:
274 if 'objects' in response:
275 response['objects'] = sorted(response['objects'],
275 response['objects'] = sorted(response['objects'],
276 key=lambda p: p['oid'])
276 key=lambda p: p['oid'])
277 self.ui.debug('%s\n'
277 self.ui.debug('%s\n'
278 % json.dumps(response, indent=2,
278 % json.dumps(response, indent=2,
279 separators=('', ': '), sort_keys=True))
279 separators=('', ': '), sort_keys=True))
280
280
281 return response
281 return response
282
282
283 def _checkforservererror(self, pointers, responses, action):
283 def _checkforservererror(self, pointers, responses, action):
284 """Scans errors from objects
284 """Scans errors from objects
285
285
286 Raises LfsRemoteError if any objects have an error"""
286 Raises LfsRemoteError if any objects have an error"""
287 for response in responses:
287 for response in responses:
288 # The server should return 404 when objects cannot be found. Some
288 # The server should return 404 when objects cannot be found. Some
289 # server implementation (ex. lfs-test-server) does not set "error"
289 # server implementation (ex. lfs-test-server) does not set "error"
290 # but just removes "download" from "actions". Treat that case
290 # but just removes "download" from "actions". Treat that case
291 # as the same as 404 error.
291 # as the same as 404 error.
292 if 'error' not in response:
292 if 'error' not in response:
293 if (action == 'download'
293 if (action == 'download'
294 and action not in response.get('actions', [])):
294 and action not in response.get('actions', [])):
295 code = 404
295 code = 404
296 else:
296 else:
297 continue
297 continue
298 else:
298 else:
299 # An error dict without a code doesn't make much sense, so
299 # An error dict without a code doesn't make much sense, so
300 # treat as a server error.
300 # treat as a server error.
301 code = response.get('error').get('code', 500)
301 code = response.get('error').get('code', 500)
302
302
303 ptrmap = {p.oid(): p for p in pointers}
303 ptrmap = {p.oid(): p for p in pointers}
304 p = ptrmap.get(response['oid'], None)
304 p = ptrmap.get(response['oid'], None)
305 if p:
305 if p:
306 filename = getattr(p, 'filename', 'unknown')
306 filename = getattr(p, 'filename', 'unknown')
307 errors = {
307 errors = {
308 404: 'The object does not exist',
308 404: 'The object does not exist',
309 410: 'The object was removed by the owner',
309 410: 'The object was removed by the owner',
310 422: 'Validation error',
310 422: 'Validation error',
311 500: 'Internal server error',
311 500: 'Internal server error',
312 }
312 }
313 msg = errors.get(code, 'status code %d' % code)
313 msg = errors.get(code, 'status code %d' % code)
314 raise LfsRemoteError(_('LFS server error for "%s": %s')
314 raise LfsRemoteError(_('LFS server error for "%s": %s')
315 % (filename, msg))
315 % (filename, msg))
316 else:
316 else:
317 raise LfsRemoteError(
317 raise LfsRemoteError(
318 _('LFS server error. Unsolicited response for oid %s')
318 _('LFS server error. Unsolicited response for oid %s')
319 % response['oid'])
319 % response['oid'])
320
320
321 def _extractobjects(self, response, pointers, action):
321 def _extractobjects(self, response, pointers, action):
322 """extract objects from response of the batch API
322 """extract objects from response of the batch API
323
323
324 response: parsed JSON object returned by batch API
324 response: parsed JSON object returned by batch API
325 return response['objects'] filtered by action
325 return response['objects'] filtered by action
326 raise if any object has an error
326 raise if any object has an error
327 """
327 """
328 # Scan errors from objects - fail early
328 # Scan errors from objects - fail early
329 objects = response.get('objects', [])
329 objects = response.get('objects', [])
330 self._checkforservererror(pointers, objects, action)
330 self._checkforservererror(pointers, objects, action)
331
331
332 # Filter objects with given action. Practically, this skips uploading
332 # Filter objects with given action. Practically, this skips uploading
333 # objects which exist in the server.
333 # objects which exist in the server.
334 filteredobjects = [o for o in objects if action in o.get('actions', [])]
334 filteredobjects = [o for o in objects if action in o.get('actions', [])]
335
335
336 return filteredobjects
336 return filteredobjects
337
337
338 def _basictransfer(self, obj, action, localstore):
338 def _basictransfer(self, obj, action, localstore):
339 """Download or upload a single object using basic transfer protocol
339 """Download or upload a single object using basic transfer protocol
340
340
341 obj: dict, an object description returned by batch API
341 obj: dict, an object description returned by batch API
342 action: string, one of ['upload', 'download']
342 action: string, one of ['upload', 'download']
343 localstore: blobstore.local
343 localstore: blobstore.local
344
344
345 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/\
345 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/\
346 basic-transfers.md
346 basic-transfers.md
347 """
347 """
348 oid = pycompat.bytestr(obj['oid'])
348 oid = pycompat.bytestr(obj['oid'])
349
349
350 href = pycompat.bytestr(obj['actions'][action].get('href'))
350 href = pycompat.bytestr(obj['actions'][action].get('href'))
351 headers = obj['actions'][action].get('header', {}).items()
351 headers = obj['actions'][action].get('header', {}).items()
352
352
353 request = util.urlreq.request(href)
353 request = util.urlreq.request(href)
354 if action == 'upload':
354 if action == 'upload':
355 # If uploading blobs, read data from local blobstore.
355 # If uploading blobs, read data from local blobstore.
356 if not localstore.verify(oid):
356 if not localstore.verify(oid):
357 raise error.Abort(_('detected corrupt lfs object: %s') % oid,
357 raise error.Abort(_('detected corrupt lfs object: %s') % oid,
358 hint=_('run hg verify'))
358 hint=_('run hg verify'))
359 request.data = filewithprogress(localstore.open(oid), None)
359 request.data = filewithprogress(localstore.open(oid), None)
360 request.get_method = lambda: 'PUT'
360 request.get_method = lambda: 'PUT'
361 request.add_header('Content-Type', 'application/octet-stream')
361 request.add_header('Content-Type', 'application/octet-stream')
362
362
363 for k, v in headers:
363 for k, v in headers:
364 request.add_header(k, v)
364 request.add_header(k, v)
365
365
366 response = b''
366 response = b''
367 try:
367 try:
368 req = self.urlopener.open(request)
368 req = self.urlopener.open(request)
369
369
370 if self.ui.debugflag:
370 if self.ui.debugflag:
371 self.ui.debug('Status: %d\n' % req.status)
371 self.ui.debug('Status: %d\n' % req.status)
372 # lfs-test-server and hg serve return headers in different order
372 # lfs-test-server and hg serve return headers in different order
373 self.ui.debug('%s\n'
373 self.ui.debug('%s\n'
374 % '\n'.join(sorted(str(req.info()).splitlines())))
374 % '\n'.join(sorted(str(req.info()).splitlines())))
375
375
376 if action == 'download':
376 if action == 'download':
377 # If downloading blobs, store downloaded data to local blobstore
377 # If downloading blobs, store downloaded data to local blobstore
378 localstore.download(oid, req)
378 localstore.download(oid, req)
379 else:
379 else:
380 while True:
380 while True:
381 data = req.read(1048576)
381 data = req.read(1048576)
382 if not data:
382 if not data:
383 break
383 break
384 response += data
384 response += data
385 if response:
385 if response:
386 self.ui.debug('lfs %s response: %s' % (action, response))
386 self.ui.debug('lfs %s response: %s' % (action, response))
387 except util.urlerr.httperror as ex:
387 except util.urlerr.httperror as ex:
388 if self.ui.debugflag:
388 if self.ui.debugflag:
389 self.ui.debug('%s: %s\n' % (oid, ex.read()))
389 self.ui.debug('%s: %s\n' % (oid, ex.read()))
390 raise LfsRemoteError(_('HTTP error: %s (oid=%s, action=%s)')
390 raise LfsRemoteError(_('HTTP error: %s (oid=%s, action=%s)')
391 % (ex, oid, action))
391 % (ex, oid, action))
392
392
393 def _batch(self, pointers, localstore, action):
393 def _batch(self, pointers, localstore, action):
394 if action not in ['upload', 'download']:
394 if action not in ['upload', 'download']:
395 raise error.ProgrammingError('invalid Git-LFS action: %s' % action)
395 raise error.ProgrammingError('invalid Git-LFS action: %s' % action)
396
396
397 response = self._batchrequest(pointers, action)
397 response = self._batchrequest(pointers, action)
398 objects = self._extractobjects(response, pointers, action)
398 objects = self._extractobjects(response, pointers, action)
399 total = sum(x.get('size', 0) for x in objects)
399 total = sum(x.get('size', 0) for x in objects)
400 sizes = {}
400 sizes = {}
401 for obj in objects:
401 for obj in objects:
402 sizes[obj.get('oid')] = obj.get('size', 0)
402 sizes[obj.get('oid')] = obj.get('size', 0)
403 topic = {'upload': _('lfs uploading'),
403 topic = {'upload': _('lfs uploading'),
404 'download': _('lfs downloading')}[action]
404 'download': _('lfs downloading')}[action]
405 if len(objects) > 1:
405 if len(objects) > 1:
406 self.ui.note(_('lfs: need to transfer %d objects (%s)\n')
406 self.ui.note(_('lfs: need to transfer %d objects (%s)\n')
407 % (len(objects), util.bytecount(total)))
407 % (len(objects), util.bytecount(total)))
408 progress = self.ui.makeprogress(topic, total=total)
408
409 progress.update(0)
410 def transfer(chunk):
409 def transfer(chunk):
411 for obj in chunk:
410 for obj in chunk:
412 objsize = obj.get('size', 0)
411 objsize = obj.get('size', 0)
413 if self.ui.verbose:
412 if self.ui.verbose:
414 if action == 'download':
413 if action == 'download':
415 msg = _('lfs: downloading %s (%s)\n')
414 msg = _('lfs: downloading %s (%s)\n')
416 elif action == 'upload':
415 elif action == 'upload':
417 msg = _('lfs: uploading %s (%s)\n')
416 msg = _('lfs: uploading %s (%s)\n')
418 self.ui.note(msg % (obj.get('oid'),
417 self.ui.note(msg % (obj.get('oid'),
419 util.bytecount(objsize)))
418 util.bytecount(objsize)))
420 retry = self.retry
419 retry = self.retry
421 while True:
420 while True:
422 try:
421 try:
423 self._basictransfer(obj, action, localstore)
422 self._basictransfer(obj, action, localstore)
424 yield 1, obj.get('oid')
423 yield 1, obj.get('oid')
425 break
424 break
426 except socket.error as ex:
425 except socket.error as ex:
427 if retry > 0:
426 if retry > 0:
428 self.ui.note(
427 self.ui.note(
429 _('lfs: failed: %r (remaining retry %d)\n')
428 _('lfs: failed: %r (remaining retry %d)\n')
430 % (ex, retry))
429 % (ex, retry))
431 retry -= 1
430 retry -= 1
432 continue
431 continue
433 raise
432 raise
434
433
435 # Until https multiplexing gets sorted out
434 # Until https multiplexing gets sorted out
436 if self.ui.configbool('experimental', 'lfs.worker-enable'):
435 if self.ui.configbool('experimental', 'lfs.worker-enable'):
437 oids = worker.worker(self.ui, 0.1, transfer, (),
436 oids = worker.worker(self.ui, 0.1, transfer, (),
438 sorted(objects, key=lambda o: o.get('oid')))
437 sorted(objects, key=lambda o: o.get('oid')))
439 else:
438 else:
440 oids = transfer(sorted(objects, key=lambda o: o.get('oid')))
439 oids = transfer(sorted(objects, key=lambda o: o.get('oid')))
441
440
442 processed = 0
441 with self.ui.makeprogress(topic, total=total) as progress:
443 blobs = 0
442 progress.update(0)
444 for _one, oid in oids:
443 processed = 0
445 processed += sizes[oid]
444 blobs = 0
446 blobs += 1
445 for _one, oid in oids:
447 progress.update(processed)
446 processed += sizes[oid]
448 self.ui.note(_('lfs: processed: %s\n') % oid)
447 blobs += 1
449 progress.complete()
448 progress.update(processed)
449 self.ui.note(_('lfs: processed: %s\n') % oid)
450
450
451 if blobs > 0:
451 if blobs > 0:
452 if action == 'upload':
452 if action == 'upload':
453 self.ui.status(_('lfs: uploaded %d files (%s)\n')
453 self.ui.status(_('lfs: uploaded %d files (%s)\n')
454 % (blobs, util.bytecount(processed)))
454 % (blobs, util.bytecount(processed)))
455 elif action == 'download':
455 elif action == 'download':
456 self.ui.status(_('lfs: downloaded %d files (%s)\n')
456 self.ui.status(_('lfs: downloaded %d files (%s)\n')
457 % (blobs, util.bytecount(processed)))
457 % (blobs, util.bytecount(processed)))
458
458
459 def __del__(self):
459 def __del__(self):
460 # copied from mercurial/httppeer.py
460 # copied from mercurial/httppeer.py
461 urlopener = getattr(self, 'urlopener', None)
461 urlopener = getattr(self, 'urlopener', None)
462 if urlopener:
462 if urlopener:
463 for h in urlopener.handlers:
463 for h in urlopener.handlers:
464 h.close()
464 h.close()
465 getattr(h, "close_all", lambda : None)()
465 getattr(h, "close_all", lambda : None)()
466
466
467 class _dummyremote(object):
467 class _dummyremote(object):
468 """Dummy store storing blobs to temp directory."""
468 """Dummy store storing blobs to temp directory."""
469
469
470 def __init__(self, repo, url):
470 def __init__(self, repo, url):
471 fullpath = repo.vfs.join('lfs', url.path)
471 fullpath = repo.vfs.join('lfs', url.path)
472 self.vfs = lfsvfs(fullpath)
472 self.vfs = lfsvfs(fullpath)
473
473
474 def writebatch(self, pointers, fromstore):
474 def writebatch(self, pointers, fromstore):
475 for p in _deduplicate(pointers):
475 for p in _deduplicate(pointers):
476 content = fromstore.read(p.oid(), verify=True)
476 content = fromstore.read(p.oid(), verify=True)
477 with self.vfs(p.oid(), 'wb', atomictemp=True) as fp:
477 with self.vfs(p.oid(), 'wb', atomictemp=True) as fp:
478 fp.write(content)
478 fp.write(content)
479
479
480 def readbatch(self, pointers, tostore):
480 def readbatch(self, pointers, tostore):
481 for p in _deduplicate(pointers):
481 for p in _deduplicate(pointers):
482 with self.vfs(p.oid(), 'rb') as fp:
482 with self.vfs(p.oid(), 'rb') as fp:
483 tostore.download(p.oid(), fp)
483 tostore.download(p.oid(), fp)
484
484
485 class _nullremote(object):
485 class _nullremote(object):
486 """Null store storing blobs to /dev/null."""
486 """Null store storing blobs to /dev/null."""
487
487
488 def __init__(self, repo, url):
488 def __init__(self, repo, url):
489 pass
489 pass
490
490
491 def writebatch(self, pointers, fromstore):
491 def writebatch(self, pointers, fromstore):
492 pass
492 pass
493
493
494 def readbatch(self, pointers, tostore):
494 def readbatch(self, pointers, tostore):
495 pass
495 pass
496
496
497 class _promptremote(object):
497 class _promptremote(object):
498 """Prompt user to set lfs.url when accessed."""
498 """Prompt user to set lfs.url when accessed."""
499
499
500 def __init__(self, repo, url):
500 def __init__(self, repo, url):
501 pass
501 pass
502
502
503 def writebatch(self, pointers, fromstore, ui=None):
503 def writebatch(self, pointers, fromstore, ui=None):
504 self._prompt()
504 self._prompt()
505
505
506 def readbatch(self, pointers, tostore, ui=None):
506 def readbatch(self, pointers, tostore, ui=None):
507 self._prompt()
507 self._prompt()
508
508
509 def _prompt(self):
509 def _prompt(self):
510 raise error.Abort(_('lfs.url needs to be configured'))
510 raise error.Abort(_('lfs.url needs to be configured'))
511
511
512 _storemap = {
512 _storemap = {
513 'https': _gitlfsremote,
513 'https': _gitlfsremote,
514 'http': _gitlfsremote,
514 'http': _gitlfsremote,
515 'file': _dummyremote,
515 'file': _dummyremote,
516 'null': _nullremote,
516 'null': _nullremote,
517 None: _promptremote,
517 None: _promptremote,
518 }
518 }
519
519
520 def _deduplicate(pointers):
520 def _deduplicate(pointers):
521 """Remove any duplicate oids that exist in the list"""
521 """Remove any duplicate oids that exist in the list"""
522 reduced = util.sortdict()
522 reduced = util.sortdict()
523 for p in pointers:
523 for p in pointers:
524 reduced[p.oid()] = p
524 reduced[p.oid()] = p
525 return reduced.values()
525 return reduced.values()
526
526
527 def _verify(oid, content):
527 def _verify(oid, content):
528 realoid = hashlib.sha256(content).hexdigest()
528 realoid = hashlib.sha256(content).hexdigest()
529 if realoid != oid:
529 if realoid != oid:
530 raise LfsCorruptionError(_('detected corrupt lfs object: %s') % oid,
530 raise LfsCorruptionError(_('detected corrupt lfs object: %s') % oid,
531 hint=_('run hg verify'))
531 hint=_('run hg verify'))
532
532
533 def remote(repo, remote=None):
533 def remote(repo, remote=None):
534 """remotestore factory. return a store in _storemap depending on config
534 """remotestore factory. return a store in _storemap depending on config
535
535
536 If ``lfs.url`` is specified, use that remote endpoint. Otherwise, try to
536 If ``lfs.url`` is specified, use that remote endpoint. Otherwise, try to
537 infer the endpoint, based on the remote repository using the same path
537 infer the endpoint, based on the remote repository using the same path
538 adjustments as git. As an extension, 'http' is supported as well so that
538 adjustments as git. As an extension, 'http' is supported as well so that
539 ``hg serve`` works out of the box.
539 ``hg serve`` works out of the box.
540
540
541 https://github.com/git-lfs/git-lfs/blob/master/docs/api/server-discovery.md
541 https://github.com/git-lfs/git-lfs/blob/master/docs/api/server-discovery.md
542 """
542 """
543 lfsurl = repo.ui.config('lfs', 'url')
543 lfsurl = repo.ui.config('lfs', 'url')
544 url = util.url(lfsurl or '')
544 url = util.url(lfsurl or '')
545 if lfsurl is None:
545 if lfsurl is None:
546 if remote:
546 if remote:
547 path = remote
547 path = remote
548 elif util.safehasattr(repo, '_subtoppath'):
548 elif util.safehasattr(repo, '_subtoppath'):
549 # The pull command sets this during the optional update phase, which
549 # The pull command sets this during the optional update phase, which
550 # tells exactly where the pull originated, whether 'paths.default'
550 # tells exactly where the pull originated, whether 'paths.default'
551 # or explicit.
551 # or explicit.
552 path = repo._subtoppath
552 path = repo._subtoppath
553 else:
553 else:
554 # TODO: investigate 'paths.remote:lfsurl' style path customization,
554 # TODO: investigate 'paths.remote:lfsurl' style path customization,
555 # and fall back to inferring from 'paths.remote' if unspecified.
555 # and fall back to inferring from 'paths.remote' if unspecified.
556 path = repo.ui.config('paths', 'default') or ''
556 path = repo.ui.config('paths', 'default') or ''
557
557
558 defaulturl = util.url(path)
558 defaulturl = util.url(path)
559
559
560 # TODO: support local paths as well.
560 # TODO: support local paths as well.
561 # TODO: consider the ssh -> https transformation that git applies
561 # TODO: consider the ssh -> https transformation that git applies
562 if defaulturl.scheme in (b'http', b'https'):
562 if defaulturl.scheme in (b'http', b'https'):
563 if defaulturl.path and defaulturl.path[:-1] != b'/':
563 if defaulturl.path and defaulturl.path[:-1] != b'/':
564 defaulturl.path += b'/'
564 defaulturl.path += b'/'
565 defaulturl.path = (defaulturl.path or b'') + b'.git/info/lfs'
565 defaulturl.path = (defaulturl.path or b'') + b'.git/info/lfs'
566
566
567 url = util.url(bytes(defaulturl))
567 url = util.url(bytes(defaulturl))
568 repo.ui.note(_('lfs: assuming remote store: %s\n') % url)
568 repo.ui.note(_('lfs: assuming remote store: %s\n') % url)
569
569
570 scheme = url.scheme
570 scheme = url.scheme
571 if scheme not in _storemap:
571 if scheme not in _storemap:
572 raise error.Abort(_('lfs: unknown url scheme: %s') % scheme)
572 raise error.Abort(_('lfs: unknown url scheme: %s') % scheme)
573 return _storemap[scheme](repo, url)
573 return _storemap[scheme](repo, url)
574
574
575 class LfsRemoteError(error.RevlogError):
575 class LfsRemoteError(error.RevlogError):
576 pass
576 pass
577
577
578 class LfsCorruptionError(error.Abort):
578 class LfsCorruptionError(error.Abort):
579 """Raised when a corrupt blob is detected, aborting an operation
579 """Raised when a corrupt blob is detected, aborting an operation
580
580
581 It exists to allow specialized handling on the server side."""
581 It exists to allow specialized handling on the server side."""
@@ -1,428 +1,425 b''
1 # wrapper.py - methods wrapping core mercurial logic
1 # wrapper.py - methods wrapping core mercurial logic
2 #
2 #
3 # Copyright 2017 Facebook, Inc.
3 # Copyright 2017 Facebook, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import hashlib
10 import hashlib
11
11
12 from mercurial.i18n import _
12 from mercurial.i18n import _
13 from mercurial.node import bin, hex, nullid, short
13 from mercurial.node import bin, hex, nullid, short
14
14
15 from mercurial import (
15 from mercurial import (
16 error,
16 error,
17 revlog,
17 revlog,
18 util,
18 util,
19 )
19 )
20
20
21 from mercurial.utils import (
21 from mercurial.utils import (
22 stringutil,
22 stringutil,
23 )
23 )
24
24
25 from ..largefiles import lfutil
25 from ..largefiles import lfutil
26
26
27 from . import (
27 from . import (
28 blobstore,
28 blobstore,
29 pointer,
29 pointer,
30 )
30 )
31
31
32 def allsupportedversions(orig, ui):
32 def allsupportedversions(orig, ui):
33 versions = orig(ui)
33 versions = orig(ui)
34 versions.add('03')
34 versions.add('03')
35 return versions
35 return versions
36
36
37 def _capabilities(orig, repo, proto):
37 def _capabilities(orig, repo, proto):
38 '''Wrap server command to announce lfs server capability'''
38 '''Wrap server command to announce lfs server capability'''
39 caps = orig(repo, proto)
39 caps = orig(repo, proto)
40 if util.safehasattr(repo.svfs, 'lfslocalblobstore'):
40 if util.safehasattr(repo.svfs, 'lfslocalblobstore'):
41 # XXX: change to 'lfs=serve' when separate git server isn't required?
41 # XXX: change to 'lfs=serve' when separate git server isn't required?
42 caps.append('lfs')
42 caps.append('lfs')
43 return caps
43 return caps
44
44
45 def bypasscheckhash(self, text):
45 def bypasscheckhash(self, text):
46 return False
46 return False
47
47
48 def readfromstore(self, text):
48 def readfromstore(self, text):
49 """Read filelog content from local blobstore transform for flagprocessor.
49 """Read filelog content from local blobstore transform for flagprocessor.
50
50
51 Default tranform for flagprocessor, returning contents from blobstore.
51 Default tranform for flagprocessor, returning contents from blobstore.
52 Returns a 2-typle (text, validatehash) where validatehash is True as the
52 Returns a 2-typle (text, validatehash) where validatehash is True as the
53 contents of the blobstore should be checked using checkhash.
53 contents of the blobstore should be checked using checkhash.
54 """
54 """
55 p = pointer.deserialize(text)
55 p = pointer.deserialize(text)
56 oid = p.oid()
56 oid = p.oid()
57 store = self.opener.lfslocalblobstore
57 store = self.opener.lfslocalblobstore
58 if not store.has(oid):
58 if not store.has(oid):
59 p.filename = self.filename
59 p.filename = self.filename
60 self.opener.lfsremoteblobstore.readbatch([p], store)
60 self.opener.lfsremoteblobstore.readbatch([p], store)
61
61
62 # The caller will validate the content
62 # The caller will validate the content
63 text = store.read(oid, verify=False)
63 text = store.read(oid, verify=False)
64
64
65 # pack hg filelog metadata
65 # pack hg filelog metadata
66 hgmeta = {}
66 hgmeta = {}
67 for k in p.keys():
67 for k in p.keys():
68 if k.startswith('x-hg-'):
68 if k.startswith('x-hg-'):
69 name = k[len('x-hg-'):]
69 name = k[len('x-hg-'):]
70 hgmeta[name] = p[k]
70 hgmeta[name] = p[k]
71 if hgmeta or text.startswith('\1\n'):
71 if hgmeta or text.startswith('\1\n'):
72 text = revlog.packmeta(hgmeta, text)
72 text = revlog.packmeta(hgmeta, text)
73
73
74 return (text, True)
74 return (text, True)
75
75
76 def writetostore(self, text):
76 def writetostore(self, text):
77 # hg filelog metadata (includes rename, etc)
77 # hg filelog metadata (includes rename, etc)
78 hgmeta, offset = revlog.parsemeta(text)
78 hgmeta, offset = revlog.parsemeta(text)
79 if offset and offset > 0:
79 if offset and offset > 0:
80 # lfs blob does not contain hg filelog metadata
80 # lfs blob does not contain hg filelog metadata
81 text = text[offset:]
81 text = text[offset:]
82
82
83 # git-lfs only supports sha256
83 # git-lfs only supports sha256
84 oid = hex(hashlib.sha256(text).digest())
84 oid = hex(hashlib.sha256(text).digest())
85 self.opener.lfslocalblobstore.write(oid, text)
85 self.opener.lfslocalblobstore.write(oid, text)
86
86
87 # replace contents with metadata
87 # replace contents with metadata
88 longoid = 'sha256:%s' % oid
88 longoid = 'sha256:%s' % oid
89 metadata = pointer.gitlfspointer(oid=longoid, size='%d' % len(text))
89 metadata = pointer.gitlfspointer(oid=longoid, size='%d' % len(text))
90
90
91 # by default, we expect the content to be binary. however, LFS could also
91 # by default, we expect the content to be binary. however, LFS could also
92 # be used for non-binary content. add a special entry for non-binary data.
92 # be used for non-binary content. add a special entry for non-binary data.
93 # this will be used by filectx.isbinary().
93 # this will be used by filectx.isbinary().
94 if not stringutil.binary(text):
94 if not stringutil.binary(text):
95 # not hg filelog metadata (affecting commit hash), no "x-hg-" prefix
95 # not hg filelog metadata (affecting commit hash), no "x-hg-" prefix
96 metadata['x-is-binary'] = '0'
96 metadata['x-is-binary'] = '0'
97
97
98 # translate hg filelog metadata to lfs metadata with "x-hg-" prefix
98 # translate hg filelog metadata to lfs metadata with "x-hg-" prefix
99 if hgmeta is not None:
99 if hgmeta is not None:
100 for k, v in hgmeta.iteritems():
100 for k, v in hgmeta.iteritems():
101 metadata['x-hg-%s' % k] = v
101 metadata['x-hg-%s' % k] = v
102
102
103 rawtext = metadata.serialize()
103 rawtext = metadata.serialize()
104 return (rawtext, False)
104 return (rawtext, False)
105
105
106 def _islfs(rlog, node=None, rev=None):
106 def _islfs(rlog, node=None, rev=None):
107 if rev is None:
107 if rev is None:
108 if node is None:
108 if node is None:
109 # both None - likely working copy content where node is not ready
109 # both None - likely working copy content where node is not ready
110 return False
110 return False
111 rev = rlog.rev(node)
111 rev = rlog.rev(node)
112 else:
112 else:
113 node = rlog.node(rev)
113 node = rlog.node(rev)
114 if node == nullid:
114 if node == nullid:
115 return False
115 return False
116 flags = rlog.flags(rev)
116 flags = rlog.flags(rev)
117 return bool(flags & revlog.REVIDX_EXTSTORED)
117 return bool(flags & revlog.REVIDX_EXTSTORED)
118
118
119 def filelogaddrevision(orig, self, text, transaction, link, p1, p2,
119 def filelogaddrevision(orig, self, text, transaction, link, p1, p2,
120 cachedelta=None, node=None,
120 cachedelta=None, node=None,
121 flags=revlog.REVIDX_DEFAULT_FLAGS, **kwds):
121 flags=revlog.REVIDX_DEFAULT_FLAGS, **kwds):
122 # The matcher isn't available if reposetup() wasn't called.
122 # The matcher isn't available if reposetup() wasn't called.
123 lfstrack = self.opener.options.get('lfstrack')
123 lfstrack = self.opener.options.get('lfstrack')
124
124
125 if lfstrack:
125 if lfstrack:
126 textlen = len(text)
126 textlen = len(text)
127 # exclude hg rename meta from file size
127 # exclude hg rename meta from file size
128 meta, offset = revlog.parsemeta(text)
128 meta, offset = revlog.parsemeta(text)
129 if offset:
129 if offset:
130 textlen -= offset
130 textlen -= offset
131
131
132 if lfstrack(self.filename, textlen):
132 if lfstrack(self.filename, textlen):
133 flags |= revlog.REVIDX_EXTSTORED
133 flags |= revlog.REVIDX_EXTSTORED
134
134
135 return orig(self, text, transaction, link, p1, p2, cachedelta=cachedelta,
135 return orig(self, text, transaction, link, p1, p2, cachedelta=cachedelta,
136 node=node, flags=flags, **kwds)
136 node=node, flags=flags, **kwds)
137
137
138 def filelogrenamed(orig, self, node):
138 def filelogrenamed(orig, self, node):
139 if _islfs(self, node):
139 if _islfs(self, node):
140 rawtext = self.revision(node, raw=True)
140 rawtext = self.revision(node, raw=True)
141 if not rawtext:
141 if not rawtext:
142 return False
142 return False
143 metadata = pointer.deserialize(rawtext)
143 metadata = pointer.deserialize(rawtext)
144 if 'x-hg-copy' in metadata and 'x-hg-copyrev' in metadata:
144 if 'x-hg-copy' in metadata and 'x-hg-copyrev' in metadata:
145 return metadata['x-hg-copy'], bin(metadata['x-hg-copyrev'])
145 return metadata['x-hg-copy'], bin(metadata['x-hg-copyrev'])
146 else:
146 else:
147 return False
147 return False
148 return orig(self, node)
148 return orig(self, node)
149
149
150 def filelogsize(orig, self, rev):
150 def filelogsize(orig, self, rev):
151 if _islfs(self, rev=rev):
151 if _islfs(self, rev=rev):
152 # fast path: use lfs metadata to answer size
152 # fast path: use lfs metadata to answer size
153 rawtext = self.revision(rev, raw=True)
153 rawtext = self.revision(rev, raw=True)
154 metadata = pointer.deserialize(rawtext)
154 metadata = pointer.deserialize(rawtext)
155 return int(metadata['size'])
155 return int(metadata['size'])
156 return orig(self, rev)
156 return orig(self, rev)
157
157
158 def filectxcmp(orig, self, fctx):
158 def filectxcmp(orig, self, fctx):
159 """returns True if text is different than fctx"""
159 """returns True if text is different than fctx"""
160 # some fctx (ex. hg-git) is not based on basefilectx and do not have islfs
160 # some fctx (ex. hg-git) is not based on basefilectx and do not have islfs
161 if self.islfs() and getattr(fctx, 'islfs', lambda: False)():
161 if self.islfs() and getattr(fctx, 'islfs', lambda: False)():
162 # fast path: check LFS oid
162 # fast path: check LFS oid
163 p1 = pointer.deserialize(self.rawdata())
163 p1 = pointer.deserialize(self.rawdata())
164 p2 = pointer.deserialize(fctx.rawdata())
164 p2 = pointer.deserialize(fctx.rawdata())
165 return p1.oid() != p2.oid()
165 return p1.oid() != p2.oid()
166 return orig(self, fctx)
166 return orig(self, fctx)
167
167
168 def filectxisbinary(orig, self):
168 def filectxisbinary(orig, self):
169 if self.islfs():
169 if self.islfs():
170 # fast path: use lfs metadata to answer isbinary
170 # fast path: use lfs metadata to answer isbinary
171 metadata = pointer.deserialize(self.rawdata())
171 metadata = pointer.deserialize(self.rawdata())
172 # if lfs metadata says nothing, assume it's binary by default
172 # if lfs metadata says nothing, assume it's binary by default
173 return bool(int(metadata.get('x-is-binary', 1)))
173 return bool(int(metadata.get('x-is-binary', 1)))
174 return orig(self)
174 return orig(self)
175
175
176 def filectxislfs(self):
176 def filectxislfs(self):
177 return _islfs(self.filelog(), self.filenode())
177 return _islfs(self.filelog(), self.filenode())
178
178
179 def _updatecatformatter(orig, fm, ctx, matcher, path, decode):
179 def _updatecatformatter(orig, fm, ctx, matcher, path, decode):
180 orig(fm, ctx, matcher, path, decode)
180 orig(fm, ctx, matcher, path, decode)
181 fm.data(rawdata=ctx[path].rawdata())
181 fm.data(rawdata=ctx[path].rawdata())
182
182
183 def convertsink(orig, sink):
183 def convertsink(orig, sink):
184 sink = orig(sink)
184 sink = orig(sink)
185 if sink.repotype == 'hg':
185 if sink.repotype == 'hg':
186 class lfssink(sink.__class__):
186 class lfssink(sink.__class__):
187 def putcommit(self, files, copies, parents, commit, source, revmap,
187 def putcommit(self, files, copies, parents, commit, source, revmap,
188 full, cleanp2):
188 full, cleanp2):
189 pc = super(lfssink, self).putcommit
189 pc = super(lfssink, self).putcommit
190 node = pc(files, copies, parents, commit, source, revmap, full,
190 node = pc(files, copies, parents, commit, source, revmap, full,
191 cleanp2)
191 cleanp2)
192
192
193 if 'lfs' not in self.repo.requirements:
193 if 'lfs' not in self.repo.requirements:
194 ctx = self.repo[node]
194 ctx = self.repo[node]
195
195
196 # The file list may contain removed files, so check for
196 # The file list may contain removed files, so check for
197 # membership before assuming it is in the context.
197 # membership before assuming it is in the context.
198 if any(f in ctx and ctx[f].islfs() for f, n in files):
198 if any(f in ctx and ctx[f].islfs() for f, n in files):
199 self.repo.requirements.add('lfs')
199 self.repo.requirements.add('lfs')
200 self.repo._writerequirements()
200 self.repo._writerequirements()
201
201
202 # Permanently enable lfs locally
202 # Permanently enable lfs locally
203 self.repo.vfs.append(
203 self.repo.vfs.append(
204 'hgrc', util.tonativeeol('\n[extensions]\nlfs=\n'))
204 'hgrc', util.tonativeeol('\n[extensions]\nlfs=\n'))
205
205
206 return node
206 return node
207
207
208 sink.__class__ = lfssink
208 sink.__class__ = lfssink
209
209
210 return sink
210 return sink
211
211
212 def vfsinit(orig, self, othervfs):
212 def vfsinit(orig, self, othervfs):
213 orig(self, othervfs)
213 orig(self, othervfs)
214 # copy lfs related options
214 # copy lfs related options
215 for k, v in othervfs.options.items():
215 for k, v in othervfs.options.items():
216 if k.startswith('lfs'):
216 if k.startswith('lfs'):
217 self.options[k] = v
217 self.options[k] = v
218 # also copy lfs blobstores. note: this can run before reposetup, so lfs
218 # also copy lfs blobstores. note: this can run before reposetup, so lfs
219 # blobstore attributes are not always ready at this time.
219 # blobstore attributes are not always ready at this time.
220 for name in ['lfslocalblobstore', 'lfsremoteblobstore']:
220 for name in ['lfslocalblobstore', 'lfsremoteblobstore']:
221 if util.safehasattr(othervfs, name):
221 if util.safehasattr(othervfs, name):
222 setattr(self, name, getattr(othervfs, name))
222 setattr(self, name, getattr(othervfs, name))
223
223
224 def hgclone(orig, ui, opts, *args, **kwargs):
224 def hgclone(orig, ui, opts, *args, **kwargs):
225 result = orig(ui, opts, *args, **kwargs)
225 result = orig(ui, opts, *args, **kwargs)
226
226
227 if result is not None:
227 if result is not None:
228 sourcerepo, destrepo = result
228 sourcerepo, destrepo = result
229 repo = destrepo.local()
229 repo = destrepo.local()
230
230
231 # When cloning to a remote repo (like through SSH), no repo is available
231 # When cloning to a remote repo (like through SSH), no repo is available
232 # from the peer. Therefore the hgrc can't be updated.
232 # from the peer. Therefore the hgrc can't be updated.
233 if not repo:
233 if not repo:
234 return result
234 return result
235
235
236 # If lfs is required for this repo, permanently enable it locally
236 # If lfs is required for this repo, permanently enable it locally
237 if 'lfs' in repo.requirements:
237 if 'lfs' in repo.requirements:
238 repo.vfs.append('hgrc',
238 repo.vfs.append('hgrc',
239 util.tonativeeol('\n[extensions]\nlfs=\n'))
239 util.tonativeeol('\n[extensions]\nlfs=\n'))
240
240
241 return result
241 return result
242
242
243 def hgpostshare(orig, sourcerepo, destrepo, bookmarks=True, defaultpath=None):
243 def hgpostshare(orig, sourcerepo, destrepo, bookmarks=True, defaultpath=None):
244 orig(sourcerepo, destrepo, bookmarks, defaultpath)
244 orig(sourcerepo, destrepo, bookmarks, defaultpath)
245
245
246 # If lfs is required for this repo, permanently enable it locally
246 # If lfs is required for this repo, permanently enable it locally
247 if 'lfs' in destrepo.requirements:
247 if 'lfs' in destrepo.requirements:
248 destrepo.vfs.append('hgrc', util.tonativeeol('\n[extensions]\nlfs=\n'))
248 destrepo.vfs.append('hgrc', util.tonativeeol('\n[extensions]\nlfs=\n'))
249
249
250 def _prefetchfiles(repo, revs, match):
250 def _prefetchfiles(repo, revs, match):
251 """Ensure that required LFS blobs are present, fetching them as a group if
251 """Ensure that required LFS blobs are present, fetching them as a group if
252 needed."""
252 needed."""
253 if not util.safehasattr(repo.svfs, 'lfslocalblobstore'):
253 if not util.safehasattr(repo.svfs, 'lfslocalblobstore'):
254 return
254 return
255
255
256 pointers = []
256 pointers = []
257 oids = set()
257 oids = set()
258 localstore = repo.svfs.lfslocalblobstore
258 localstore = repo.svfs.lfslocalblobstore
259
259
260 for rev in revs:
260 for rev in revs:
261 ctx = repo[rev]
261 ctx = repo[rev]
262 for f in ctx.walk(match):
262 for f in ctx.walk(match):
263 p = pointerfromctx(ctx, f)
263 p = pointerfromctx(ctx, f)
264 if p and p.oid() not in oids and not localstore.has(p.oid()):
264 if p and p.oid() not in oids and not localstore.has(p.oid()):
265 p.filename = f
265 p.filename = f
266 pointers.append(p)
266 pointers.append(p)
267 oids.add(p.oid())
267 oids.add(p.oid())
268
268
269 if pointers:
269 if pointers:
270 # Recalculating the repo store here allows 'paths.default' that is set
270 # Recalculating the repo store here allows 'paths.default' that is set
271 # on the repo by a clone command to be used for the update.
271 # on the repo by a clone command to be used for the update.
272 blobstore.remote(repo).readbatch(pointers, localstore)
272 blobstore.remote(repo).readbatch(pointers, localstore)
273
273
274 def _canskipupload(repo):
274 def _canskipupload(repo):
275 # Skip if this hasn't been passed to reposetup()
275 # Skip if this hasn't been passed to reposetup()
276 if not util.safehasattr(repo.svfs, 'lfsremoteblobstore'):
276 if not util.safehasattr(repo.svfs, 'lfsremoteblobstore'):
277 return True
277 return True
278
278
279 # if remotestore is a null store, upload is a no-op and can be skipped
279 # if remotestore is a null store, upload is a no-op and can be skipped
280 return isinstance(repo.svfs.lfsremoteblobstore, blobstore._nullremote)
280 return isinstance(repo.svfs.lfsremoteblobstore, blobstore._nullremote)
281
281
282 def candownload(repo):
282 def candownload(repo):
283 # Skip if this hasn't been passed to reposetup()
283 # Skip if this hasn't been passed to reposetup()
284 if not util.safehasattr(repo.svfs, 'lfsremoteblobstore'):
284 if not util.safehasattr(repo.svfs, 'lfsremoteblobstore'):
285 return False
285 return False
286
286
287 # if remotestore is a null store, downloads will lead to nothing
287 # if remotestore is a null store, downloads will lead to nothing
288 return not isinstance(repo.svfs.lfsremoteblobstore, blobstore._nullremote)
288 return not isinstance(repo.svfs.lfsremoteblobstore, blobstore._nullremote)
289
289
290 def uploadblobsfromrevs(repo, revs):
290 def uploadblobsfromrevs(repo, revs):
291 '''upload lfs blobs introduced by revs
291 '''upload lfs blobs introduced by revs
292
292
293 Note: also used by other extensions e. g. infinitepush. avoid renaming.
293 Note: also used by other extensions e. g. infinitepush. avoid renaming.
294 '''
294 '''
295 if _canskipupload(repo):
295 if _canskipupload(repo):
296 return
296 return
297 pointers = extractpointers(repo, revs)
297 pointers = extractpointers(repo, revs)
298 uploadblobs(repo, pointers)
298 uploadblobs(repo, pointers)
299
299
300 def prepush(pushop):
300 def prepush(pushop):
301 """Prepush hook.
301 """Prepush hook.
302
302
303 Read through the revisions to push, looking for filelog entries that can be
303 Read through the revisions to push, looking for filelog entries that can be
304 deserialized into metadata so that we can block the push on their upload to
304 deserialized into metadata so that we can block the push on their upload to
305 the remote blobstore.
305 the remote blobstore.
306 """
306 """
307 return uploadblobsfromrevs(pushop.repo, pushop.outgoing.missing)
307 return uploadblobsfromrevs(pushop.repo, pushop.outgoing.missing)
308
308
309 def push(orig, repo, remote, *args, **kwargs):
309 def push(orig, repo, remote, *args, **kwargs):
310 """bail on push if the extension isn't enabled on remote when needed, and
310 """bail on push if the extension isn't enabled on remote when needed, and
311 update the remote store based on the destination path."""
311 update the remote store based on the destination path."""
312 if 'lfs' in repo.requirements:
312 if 'lfs' in repo.requirements:
313 # If the remote peer is for a local repo, the requirement tests in the
313 # If the remote peer is for a local repo, the requirement tests in the
314 # base class method enforce lfs support. Otherwise, some revisions in
314 # base class method enforce lfs support. Otherwise, some revisions in
315 # this repo use lfs, and the remote repo needs the extension loaded.
315 # this repo use lfs, and the remote repo needs the extension loaded.
316 if not remote.local() and not remote.capable('lfs'):
316 if not remote.local() and not remote.capable('lfs'):
317 # This is a copy of the message in exchange.push() when requirements
317 # This is a copy of the message in exchange.push() when requirements
318 # are missing between local repos.
318 # are missing between local repos.
319 m = _("required features are not supported in the destination: %s")
319 m = _("required features are not supported in the destination: %s")
320 raise error.Abort(m % 'lfs',
320 raise error.Abort(m % 'lfs',
321 hint=_('enable the lfs extension on the server'))
321 hint=_('enable the lfs extension on the server'))
322
322
323 # Repositories where this extension is disabled won't have the field.
323 # Repositories where this extension is disabled won't have the field.
324 # But if there's a requirement, then the extension must be loaded AND
324 # But if there's a requirement, then the extension must be loaded AND
325 # there may be blobs to push.
325 # there may be blobs to push.
326 remotestore = repo.svfs.lfsremoteblobstore
326 remotestore = repo.svfs.lfsremoteblobstore
327 try:
327 try:
328 repo.svfs.lfsremoteblobstore = blobstore.remote(repo, remote.url())
328 repo.svfs.lfsremoteblobstore = blobstore.remote(repo, remote.url())
329 return orig(repo, remote, *args, **kwargs)
329 return orig(repo, remote, *args, **kwargs)
330 finally:
330 finally:
331 repo.svfs.lfsremoteblobstore = remotestore
331 repo.svfs.lfsremoteblobstore = remotestore
332 else:
332 else:
333 return orig(repo, remote, *args, **kwargs)
333 return orig(repo, remote, *args, **kwargs)
334
334
335 def writenewbundle(orig, ui, repo, source, filename, bundletype, outgoing,
335 def writenewbundle(orig, ui, repo, source, filename, bundletype, outgoing,
336 *args, **kwargs):
336 *args, **kwargs):
337 """upload LFS blobs added by outgoing revisions on 'hg bundle'"""
337 """upload LFS blobs added by outgoing revisions on 'hg bundle'"""
338 uploadblobsfromrevs(repo, outgoing.missing)
338 uploadblobsfromrevs(repo, outgoing.missing)
339 return orig(ui, repo, source, filename, bundletype, outgoing, *args,
339 return orig(ui, repo, source, filename, bundletype, outgoing, *args,
340 **kwargs)
340 **kwargs)
341
341
342 def extractpointers(repo, revs):
342 def extractpointers(repo, revs):
343 """return a list of lfs pointers added by given revs"""
343 """return a list of lfs pointers added by given revs"""
344 repo.ui.debug('lfs: computing set of blobs to upload\n')
344 repo.ui.debug('lfs: computing set of blobs to upload\n')
345 pointers = {}
345 pointers = {}
346
346
347 progress = repo.ui.makeprogress(_('lfs search'), _('changesets'), len(revs))
347 makeprogress = repo.ui.makeprogress
348
348 with makeprogress(_('lfs search'), _('changesets'), len(revs)) as progress:
349 try:
350 for r in revs:
349 for r in revs:
351 ctx = repo[r]
350 ctx = repo[r]
352 for p in pointersfromctx(ctx).values():
351 for p in pointersfromctx(ctx).values():
353 pointers[p.oid()] = p
352 pointers[p.oid()] = p
354 progress.increment()
353 progress.increment()
355 return sorted(pointers.values())
354 return sorted(pointers.values())
356 finally:
357 progress.complete()
358
355
359 def pointerfromctx(ctx, f, removed=False):
356 def pointerfromctx(ctx, f, removed=False):
360 """return a pointer for the named file from the given changectx, or None if
357 """return a pointer for the named file from the given changectx, or None if
361 the file isn't LFS.
358 the file isn't LFS.
362
359
363 Optionally, the pointer for a file deleted from the context can be returned.
360 Optionally, the pointer for a file deleted from the context can be returned.
364 Since no such pointer is actually stored, and to distinguish from a non LFS
361 Since no such pointer is actually stored, and to distinguish from a non LFS
365 file, this pointer is represented by an empty dict.
362 file, this pointer is represented by an empty dict.
366 """
363 """
367 _ctx = ctx
364 _ctx = ctx
368 if f not in ctx:
365 if f not in ctx:
369 if not removed:
366 if not removed:
370 return None
367 return None
371 if f in ctx.p1():
368 if f in ctx.p1():
372 _ctx = ctx.p1()
369 _ctx = ctx.p1()
373 elif f in ctx.p2():
370 elif f in ctx.p2():
374 _ctx = ctx.p2()
371 _ctx = ctx.p2()
375 else:
372 else:
376 return None
373 return None
377 fctx = _ctx[f]
374 fctx = _ctx[f]
378 if not _islfs(fctx.filelog(), fctx.filenode()):
375 if not _islfs(fctx.filelog(), fctx.filenode()):
379 return None
376 return None
380 try:
377 try:
381 p = pointer.deserialize(fctx.rawdata())
378 p = pointer.deserialize(fctx.rawdata())
382 if ctx == _ctx:
379 if ctx == _ctx:
383 return p
380 return p
384 return {}
381 return {}
385 except pointer.InvalidPointer as ex:
382 except pointer.InvalidPointer as ex:
386 raise error.Abort(_('lfs: corrupted pointer (%s@%s): %s\n')
383 raise error.Abort(_('lfs: corrupted pointer (%s@%s): %s\n')
387 % (f, short(_ctx.node()), ex))
384 % (f, short(_ctx.node()), ex))
388
385
389 def pointersfromctx(ctx, removed=False):
386 def pointersfromctx(ctx, removed=False):
390 """return a dict {path: pointer} for given single changectx.
387 """return a dict {path: pointer} for given single changectx.
391
388
392 If ``removed`` == True and the LFS file was removed from ``ctx``, the value
389 If ``removed`` == True and the LFS file was removed from ``ctx``, the value
393 stored for the path is an empty dict.
390 stored for the path is an empty dict.
394 """
391 """
395 result = {}
392 result = {}
396 for f in ctx.files():
393 for f in ctx.files():
397 p = pointerfromctx(ctx, f, removed=removed)
394 p = pointerfromctx(ctx, f, removed=removed)
398 if p is not None:
395 if p is not None:
399 result[f] = p
396 result[f] = p
400 return result
397 return result
401
398
402 def uploadblobs(repo, pointers):
399 def uploadblobs(repo, pointers):
403 """upload given pointers from local blobstore"""
400 """upload given pointers from local blobstore"""
404 if not pointers:
401 if not pointers:
405 return
402 return
406
403
407 remoteblob = repo.svfs.lfsremoteblobstore
404 remoteblob = repo.svfs.lfsremoteblobstore
408 remoteblob.writebatch(pointers, repo.svfs.lfslocalblobstore)
405 remoteblob.writebatch(pointers, repo.svfs.lfslocalblobstore)
409
406
410 def upgradefinishdatamigration(orig, ui, srcrepo, dstrepo, requirements):
407 def upgradefinishdatamigration(orig, ui, srcrepo, dstrepo, requirements):
411 orig(ui, srcrepo, dstrepo, requirements)
408 orig(ui, srcrepo, dstrepo, requirements)
412
409
413 # Skip if this hasn't been passed to reposetup()
410 # Skip if this hasn't been passed to reposetup()
414 if (util.safehasattr(srcrepo.svfs, 'lfslocalblobstore') and
411 if (util.safehasattr(srcrepo.svfs, 'lfslocalblobstore') and
415 util.safehasattr(dstrepo.svfs, 'lfslocalblobstore')):
412 util.safehasattr(dstrepo.svfs, 'lfslocalblobstore')):
416 srclfsvfs = srcrepo.svfs.lfslocalblobstore.vfs
413 srclfsvfs = srcrepo.svfs.lfslocalblobstore.vfs
417 dstlfsvfs = dstrepo.svfs.lfslocalblobstore.vfs
414 dstlfsvfs = dstrepo.svfs.lfslocalblobstore.vfs
418
415
419 for dirpath, dirs, files in srclfsvfs.walk():
416 for dirpath, dirs, files in srclfsvfs.walk():
420 for oid in files:
417 for oid in files:
421 ui.write(_('copying lfs blob %s\n') % oid)
418 ui.write(_('copying lfs blob %s\n') % oid)
422 lfutil.link(srclfsvfs.join(oid), dstlfsvfs.join(oid))
419 lfutil.link(srclfsvfs.join(oid), dstlfsvfs.join(oid))
423
420
424 def upgraderequirements(orig, repo):
421 def upgraderequirements(orig, repo):
425 reqs = orig(repo)
422 reqs = orig(repo)
426 if 'lfs' in repo.requirements:
423 if 'lfs' in repo.requirements:
427 reqs.add('lfs')
424 reqs.add('lfs')
428 return reqs
425 return reqs
General Comments 0
You need to be logged in to leave comments. Login now