##// END OF EJS Templates
lfs: add an experimental config to override User-Agent for the blob transfer...
Matt Harbison -
r35456:e333d275 default
parent child Browse files
Show More
@@ -1,201 +1,205 b''
1 1 # lfs - hash-preserving large file support using Git-LFS protocol
2 2 #
3 3 # Copyright 2017 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """lfs - large file support (EXPERIMENTAL)
9 9
10 10 Configs::
11 11
12 12 [lfs]
13 13 # Remote endpoint. Multiple protocols are supported:
14 14 # - http(s)://user:pass@example.com/path
15 15 # git-lfs endpoint
16 16 # - file:///tmp/path
17 17 # local filesystem, usually for testing
18 18 # if unset, lfs will prompt setting this when it must use this value.
19 19 # (default: unset)
20 20 url = https://example.com/lfs
21 21
22 22 # size of a file to make it use LFS
23 23 threshold = 10M
24 24
25 25 # how many times to retry before giving up on transferring an object
26 26 retry = 5
27 27
28 28 # the local directory to store lfs files for sharing across local clones.
29 29 # If not set, the cache is located in an OS specific cache location.
30 30 usercache = /path/to/global/cache
31 31 """
32 32
33 33 from __future__ import absolute_import
34 34
35 35 from mercurial.i18n import _
36 36
37 37 from mercurial import (
38 38 bundle2,
39 39 changegroup,
40 40 context,
41 41 exchange,
42 42 extensions,
43 43 filelog,
44 44 hg,
45 45 localrepo,
46 46 registrar,
47 47 revlog,
48 48 scmutil,
49 49 upgrade,
50 50 vfs as vfsmod,
51 51 )
52 52
53 53 from . import (
54 54 blobstore,
55 55 wrapper,
56 56 )
57 57
58 58 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
59 59 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
60 60 # be specifying the version(s) of Mercurial they are tested with, or
61 61 # leave the attribute unspecified.
62 62 testedwith = 'ships-with-hg-core'
63 63
64 64 configtable = {}
65 65 configitem = registrar.configitem(configtable)
66 66
67 configitem('experimental', 'lfs.user-agent',
68 default=None,
69 )
70
67 71 configitem('lfs', 'url',
68 72 default=configitem.dynamicdefault,
69 73 )
70 74 configitem('lfs', 'usercache',
71 75 default=None,
72 76 )
73 77 configitem('lfs', 'threshold',
74 78 default=None,
75 79 )
76 80 configitem('lfs', 'retry',
77 81 default=5,
78 82 )
79 83 # Deprecated
80 84 configitem('lfs', 'remotestore',
81 85 default=None,
82 86 )
83 87 # Deprecated
84 88 configitem('lfs', 'dummy',
85 89 default=None,
86 90 )
87 91 # Deprecated
88 92 configitem('lfs', 'git-lfs',
89 93 default=None,
90 94 )
91 95
92 96 cmdtable = {}
93 97 command = registrar.command(cmdtable)
94 98
95 99 templatekeyword = registrar.templatekeyword()
96 100
97 101 def featuresetup(ui, supported):
98 102 # don't die on seeing a repo with the lfs requirement
99 103 supported |= {'lfs'}
100 104
101 105 def uisetup(ui):
102 106 localrepo.localrepository.featuresetupfuncs.add(featuresetup)
103 107
104 108 def reposetup(ui, repo):
105 109 # Nothing to do with a remote repo
106 110 if not repo.local():
107 111 return
108 112
109 113 threshold = repo.ui.configbytes('lfs', 'threshold')
110 114
111 115 repo.svfs.options['lfsthreshold'] = threshold
112 116 repo.svfs.lfslocalblobstore = blobstore.local(repo)
113 117 repo.svfs.lfsremoteblobstore = blobstore.remote(repo)
114 118
115 119 # Push hook
116 120 repo.prepushoutgoinghooks.add('lfs', wrapper.prepush)
117 121
118 122 if 'lfs' not in repo.requirements:
119 123 def checkrequireslfs(ui, repo, **kwargs):
120 124 if 'lfs' not in repo.requirements:
121 125 ctx = repo[kwargs['node']]
122 126 # TODO: is there a way to just walk the files in the commit?
123 127 if any(ctx[f].islfs() for f in ctx.files()):
124 128 repo.requirements.add('lfs')
125 129 repo._writerequirements()
126 130
127 131 ui.setconfig('hooks', 'commit.lfs', checkrequireslfs, 'lfs')
128 132
129 133 def wrapfilelog(filelog):
130 134 wrapfunction = extensions.wrapfunction
131 135
132 136 wrapfunction(filelog, 'addrevision', wrapper.filelogaddrevision)
133 137 wrapfunction(filelog, 'renamed', wrapper.filelogrenamed)
134 138 wrapfunction(filelog, 'size', wrapper.filelogsize)
135 139
136 140 def extsetup(ui):
137 141 wrapfilelog(filelog.filelog)
138 142
139 143 wrapfunction = extensions.wrapfunction
140 144
141 145 wrapfunction(scmutil, 'wrapconvertsink', wrapper.convertsink)
142 146
143 147 wrapfunction(upgrade, '_finishdatamigration',
144 148 wrapper.upgradefinishdatamigration)
145 149
146 150 wrapfunction(upgrade, 'preservedrequirements',
147 151 wrapper.upgraderequirements)
148 152
149 153 wrapfunction(upgrade, 'supporteddestrequirements',
150 154 wrapper.upgraderequirements)
151 155
152 156 wrapfunction(changegroup,
153 157 'supportedoutgoingversions',
154 158 wrapper.supportedoutgoingversions)
155 159 wrapfunction(changegroup,
156 160 'allsupportedversions',
157 161 wrapper.allsupportedversions)
158 162
159 163 wrapfunction(context.basefilectx, 'cmp', wrapper.filectxcmp)
160 164 wrapfunction(context.basefilectx, 'isbinary', wrapper.filectxisbinary)
161 165 context.basefilectx.islfs = wrapper.filectxislfs
162 166
163 167 revlog.addflagprocessor(
164 168 revlog.REVIDX_EXTSTORED,
165 169 (
166 170 wrapper.readfromstore,
167 171 wrapper.writetostore,
168 172 wrapper.bypasscheckhash,
169 173 ),
170 174 )
171 175
172 176 wrapfunction(hg, 'clone', wrapper.hgclone)
173 177 wrapfunction(hg, 'postshare', wrapper.hgpostshare)
174 178
175 179 # Make bundle choose changegroup3 instead of changegroup2. This affects
176 180 # "hg bundle" command. Note: it does not cover all bundle formats like
177 181 # "packed1". Using "packed1" with lfs will likely cause trouble.
178 182 names = [k for k, v in exchange._bundlespeccgversions.items() if v == '02']
179 183 for k in names:
180 184 exchange._bundlespeccgversions[k] = '03'
181 185
182 186 # bundlerepo uses "vfsmod.readonlyvfs(othervfs)", we need to make sure lfs
183 187 # options and blob stores are passed from othervfs to the new readonlyvfs.
184 188 wrapfunction(vfsmod.readonlyvfs, '__init__', wrapper.vfsinit)
185 189
186 190 # when writing a bundle via "hg bundle" command, upload related LFS blobs
187 191 wrapfunction(bundle2, 'writenewbundle', wrapper.writenewbundle)
188 192
189 193 @templatekeyword('lfs_files')
190 194 def lfsfiles(repo, ctx, **args):
191 195 """List of strings. LFS files added or modified by the changeset."""
192 196 pointers = wrapper.pointersfromctx(ctx) # {path: pointer}
193 197 return sorted(pointers.keys())
194 198
195 199 @command('debuglfsupload',
196 200 [('r', 'rev', [], _('upload large files introduced by REV'))])
197 201 def debuglfsupload(ui, repo, **opts):
198 202 """upload lfs blobs added by the working copy parent or given revisions"""
199 203 revs = opts.get('rev', [])
200 204 pointers = wrapper.extractpointers(repo, scmutil.revrange(repo, revs))
201 205 wrapper.uploadblobs(repo, pointers)
@@ -1,387 +1,389 b''
1 1 # blobstore.py - local and remote (speaking Git-LFS protocol) blob storages
2 2 #
3 3 # Copyright 2017 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import json
11 11 import os
12 12 import re
13 13
14 14 from mercurial.i18n import _
15 15
16 16 from mercurial import (
17 17 error,
18 18 pathutil,
19 19 url as urlmod,
20 20 util,
21 21 vfs as vfsmod,
22 22 worker,
23 23 )
24 24
25 25 from ..largefiles import lfutil
26 26
27 27 # 64 bytes for SHA256
28 28 _lfsre = re.compile(r'\A[a-f0-9]{64}\Z')
29 29
30 30 class lfsvfs(vfsmod.vfs):
31 31 def join(self, path):
32 32 """split the path at first two characters, like: XX/XXXXX..."""
33 33 if not _lfsre.match(path):
34 34 raise error.ProgrammingError('unexpected lfs path: %s' % path)
35 35 return super(lfsvfs, self).join(path[0:2], path[2:])
36 36
37 37 def walk(self, path=None, onerror=None):
38 38 """Yield (dirpath, [], oids) tuple for blobs under path
39 39
40 40 Oids only exist in the root of this vfs, so dirpath is always ''.
41 41 """
42 42 root = os.path.normpath(self.base)
43 43 # when dirpath == root, dirpath[prefixlen:] becomes empty
44 44 # because len(dirpath) < prefixlen.
45 45 prefixlen = len(pathutil.normasprefix(root))
46 46 oids = []
47 47
48 48 for dirpath, dirs, files in os.walk(self.reljoin(self.base, path or ''),
49 49 onerror=onerror):
50 50 dirpath = dirpath[prefixlen:]
51 51
52 52 # Silently skip unexpected files and directories
53 53 if len(dirpath) == 2:
54 54 oids.extend([dirpath + f for f in files
55 55 if _lfsre.match(dirpath + f)])
56 56
57 57 yield ('', [], oids)
58 58
59 59 class filewithprogress(object):
60 60 """a file-like object that supports __len__ and read.
61 61
62 62 Useful to provide progress information for how many bytes are read.
63 63 """
64 64
65 65 def __init__(self, fp, callback):
66 66 self._fp = fp
67 67 self._callback = callback # func(readsize)
68 68 fp.seek(0, os.SEEK_END)
69 69 self._len = fp.tell()
70 70 fp.seek(0)
71 71
72 72 def __len__(self):
73 73 return self._len
74 74
75 75 def read(self, size):
76 76 if self._fp is None:
77 77 return b''
78 78 data = self._fp.read(size)
79 79 if data:
80 80 if self._callback:
81 81 self._callback(len(data))
82 82 else:
83 83 self._fp.close()
84 84 self._fp = None
85 85 return data
86 86
87 87 class local(object):
88 88 """Local blobstore for large file contents.
89 89
90 90 This blobstore is used both as a cache and as a staging area for large blobs
91 91 to be uploaded to the remote blobstore.
92 92 """
93 93
94 94 def __init__(self, repo):
95 95 fullpath = repo.svfs.join('lfs/objects')
96 96 self.vfs = lfsvfs(fullpath)
97 97 usercache = lfutil._usercachedir(repo.ui, 'lfs')
98 98 self.cachevfs = lfsvfs(usercache)
99 99
100 100 def write(self, oid, data):
101 101 """Write blob to local blobstore."""
102 102 with self.vfs(oid, 'wb', atomictemp=True) as fp:
103 103 fp.write(data)
104 104
105 105 # XXX: should we verify the content of the cache, and hardlink back to
106 106 # the local store on success, but truncate, write and link on failure?
107 107 if not self.cachevfs.exists(oid):
108 108 lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
109 109
110 110 def read(self, oid):
111 111 """Read blob from local blobstore."""
112 112 if not self.vfs.exists(oid):
113 113 lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
114 114 return self.vfs.read(oid)
115 115
116 116 def has(self, oid):
117 117 """Returns True if the local blobstore contains the requested blob,
118 118 False otherwise."""
119 119 return self.cachevfs.exists(oid) or self.vfs.exists(oid)
120 120
121 121 class _gitlfsremote(object):
122 122
123 123 def __init__(self, repo, url):
124 124 ui = repo.ui
125 125 self.ui = ui
126 126 baseurl, authinfo = url.authinfo()
127 127 self.baseurl = baseurl.rstrip('/')
128 useragent = 'mercurial/%s git/2.15.1' % util.version()
128 useragent = repo.ui.config('experimental', 'lfs.user-agent')
129 if not useragent:
130 useragent = 'mercurial/%s git/2.15.1' % util.version()
129 131 self.urlopener = urlmod.opener(ui, authinfo, useragent)
130 132 self.retry = ui.configint('lfs', 'retry')
131 133
132 134 def writebatch(self, pointers, fromstore):
133 135 """Batch upload from local to remote blobstore."""
134 136 self._batch(pointers, fromstore, 'upload')
135 137
136 138 def readbatch(self, pointers, tostore):
137 139 """Batch download from remote to local blostore."""
138 140 self._batch(pointers, tostore, 'download')
139 141
140 142 def _batchrequest(self, pointers, action):
141 143 """Get metadata about objects pointed by pointers for given action
142 144
143 145 Return decoded JSON object like {'objects': [{'oid': '', 'size': 1}]}
144 146 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/batch.md
145 147 """
146 148 objects = [{'oid': p.oid(), 'size': p.size()} for p in pointers]
147 149 requestdata = json.dumps({
148 150 'objects': objects,
149 151 'operation': action,
150 152 })
151 153 batchreq = util.urlreq.request('%s/objects/batch' % self.baseurl,
152 154 data=requestdata)
153 155 batchreq.add_header('Accept', 'application/vnd.git-lfs+json')
154 156 batchreq.add_header('Content-Type', 'application/vnd.git-lfs+json')
155 157 try:
156 158 rawjson = self.urlopener.open(batchreq).read()
157 159 except util.urlerr.httperror as ex:
158 160 raise LfsRemoteError(_('LFS HTTP error: %s (action=%s)')
159 161 % (ex, action))
160 162 try:
161 163 response = json.loads(rawjson)
162 164 except ValueError:
163 165 raise LfsRemoteError(_('LFS server returns invalid JSON: %s')
164 166 % rawjson)
165 167 return response
166 168
167 169 def _checkforservererror(self, pointers, responses):
168 170 """Scans errors from objects
169 171
170 172 Returns LfsRemoteError if any objects has an error"""
171 173 for response in responses:
172 174 error = response.get('error')
173 175 if error:
174 176 ptrmap = {p.oid(): p for p in pointers}
175 177 p = ptrmap.get(response['oid'], None)
176 178 if error['code'] == 404 and p:
177 179 filename = getattr(p, 'filename', 'unknown')
178 180 raise LfsRemoteError(
179 181 _(('LFS server error. Remote object '
180 182 'for file %s not found: %r')) % (filename, response))
181 183 raise LfsRemoteError(_('LFS server error: %r') % response)
182 184
183 185 def _extractobjects(self, response, pointers, action):
184 186 """extract objects from response of the batch API
185 187
186 188 response: parsed JSON object returned by batch API
187 189 return response['objects'] filtered by action
188 190 raise if any object has an error
189 191 """
190 192 # Scan errors from objects - fail early
191 193 objects = response.get('objects', [])
192 194 self._checkforservererror(pointers, objects)
193 195
194 196 # Filter objects with given action. Practically, this skips uploading
195 197 # objects which exist in the server.
196 198 filteredobjects = [o for o in objects if action in o.get('actions', [])]
197 199 # But for downloading, we want all objects. Therefore missing objects
198 200 # should be considered an error.
199 201 if action == 'download':
200 202 if len(filteredobjects) < len(objects):
201 203 missing = [o.get('oid', '?')
202 204 for o in objects
203 205 if action not in o.get('actions', [])]
204 206 raise LfsRemoteError(
205 207 _('LFS server claims required objects do not exist:\n%s')
206 208 % '\n'.join(missing))
207 209
208 210 return filteredobjects
209 211
210 212 def _basictransfer(self, obj, action, localstore):
211 213 """Download or upload a single object using basic transfer protocol
212 214
213 215 obj: dict, an object description returned by batch API
214 216 action: string, one of ['upload', 'download']
215 217 localstore: blobstore.local
216 218
217 219 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/\
218 220 basic-transfers.md
219 221 """
220 222 oid = str(obj['oid'])
221 223
222 224 href = str(obj['actions'][action].get('href'))
223 225 headers = obj['actions'][action].get('header', {}).items()
224 226
225 227 request = util.urlreq.request(href)
226 228 if action == 'upload':
227 229 # If uploading blobs, read data from local blobstore.
228 230 request.data = filewithprogress(localstore.vfs(oid), None)
229 231 request.get_method = lambda: 'PUT'
230 232
231 233 for k, v in headers:
232 234 request.add_header(k, v)
233 235
234 236 response = b''
235 237 try:
236 238 req = self.urlopener.open(request)
237 239 while True:
238 240 data = req.read(1048576)
239 241 if not data:
240 242 break
241 243 response += data
242 244 except util.urlerr.httperror as ex:
243 245 raise LfsRemoteError(_('HTTP error: %s (oid=%s, action=%s)')
244 246 % (ex, oid, action))
245 247
246 248 if action == 'download':
247 249 # If downloading blobs, store downloaded data to local blobstore
248 250 localstore.write(oid, response)
249 251
250 252 def _batch(self, pointers, localstore, action):
251 253 if action not in ['upload', 'download']:
252 254 raise error.ProgrammingError('invalid Git-LFS action: %s' % action)
253 255
254 256 response = self._batchrequest(pointers, action)
255 257 objects = self._extractobjects(response, pointers, action)
256 258 total = sum(x.get('size', 0) for x in objects)
257 259 sizes = {}
258 260 for obj in objects:
259 261 sizes[obj.get('oid')] = obj.get('size', 0)
260 262 topic = {'upload': _('lfs uploading'),
261 263 'download': _('lfs downloading')}[action]
262 264 if self.ui.verbose and len(objects) > 1:
263 265 self.ui.write(_('lfs: need to transfer %d objects (%s)\n')
264 266 % (len(objects), util.bytecount(total)))
265 267 self.ui.progress(topic, 0, total=total)
266 268 def transfer(chunk):
267 269 for obj in chunk:
268 270 objsize = obj.get('size', 0)
269 271 if self.ui.verbose:
270 272 if action == 'download':
271 273 msg = _('lfs: downloading %s (%s)\n')
272 274 elif action == 'upload':
273 275 msg = _('lfs: uploading %s (%s)\n')
274 276 self.ui.write(msg % (obj.get('oid'),
275 277 util.bytecount(objsize)))
276 278 retry = self.retry
277 279 while True:
278 280 try:
279 281 self._basictransfer(obj, action, localstore)
280 282 yield 1, obj.get('oid')
281 283 break
282 284 except Exception as ex:
283 285 if retry > 0:
284 286 if self.ui.verbose:
285 287 self.ui.write(
286 288 _('lfs: failed: %r (remaining retry %d)\n')
287 289 % (ex, retry))
288 290 retry -= 1
289 291 continue
290 292 raise
291 293
292 294 oids = worker.worker(self.ui, 0.1, transfer, (),
293 295 sorted(objects, key=lambda o: o.get('oid')))
294 296 processed = 0
295 297 for _one, oid in oids:
296 298 processed += sizes[oid]
297 299 self.ui.progress(topic, processed, total=total)
298 300 if self.ui.verbose:
299 301 self.ui.write(_('lfs: processed: %s\n') % oid)
300 302 self.ui.progress(topic, pos=None, total=total)
301 303
302 304 def __del__(self):
303 305 # copied from mercurial/httppeer.py
304 306 urlopener = getattr(self, 'urlopener', None)
305 307 if urlopener:
306 308 for h in urlopener.handlers:
307 309 h.close()
308 310 getattr(h, "close_all", lambda : None)()
309 311
310 312 class _dummyremote(object):
311 313 """Dummy store storing blobs to temp directory."""
312 314
313 315 def __init__(self, repo, url):
314 316 fullpath = repo.vfs.join('lfs', url.path)
315 317 self.vfs = lfsvfs(fullpath)
316 318
317 319 def writebatch(self, pointers, fromstore):
318 320 for p in pointers:
319 321 content = fromstore.read(p.oid())
320 322 with self.vfs(p.oid(), 'wb', atomictemp=True) as fp:
321 323 fp.write(content)
322 324
323 325 def readbatch(self, pointers, tostore):
324 326 for p in pointers:
325 327 content = self.vfs.read(p.oid())
326 328 tostore.write(p.oid(), content)
327 329
328 330 class _nullremote(object):
329 331 """Null store storing blobs to /dev/null."""
330 332
331 333 def __init__(self, repo, url):
332 334 pass
333 335
334 336 def writebatch(self, pointers, fromstore):
335 337 pass
336 338
337 339 def readbatch(self, pointers, tostore):
338 340 pass
339 341
340 342 class _promptremote(object):
341 343 """Prompt user to set lfs.url when accessed."""
342 344
343 345 def __init__(self, repo, url):
344 346 pass
345 347
346 348 def writebatch(self, pointers, fromstore, ui=None):
347 349 self._prompt()
348 350
349 351 def readbatch(self, pointers, tostore, ui=None):
350 352 self._prompt()
351 353
352 354 def _prompt(self):
353 355 raise error.Abort(_('lfs.url needs to be configured'))
354 356
355 357 _storemap = {
356 358 'https': _gitlfsremote,
357 359 'http': _gitlfsremote,
358 360 'file': _dummyremote,
359 361 'null': _nullremote,
360 362 None: _promptremote,
361 363 }
362 364
363 365 def remote(repo):
364 366 """remotestore factory. return a store in _storemap depending on config"""
365 367 defaulturl = ''
366 368
367 369 # convert deprecated configs to the new url. TODO: remove this if other
368 370 # places are migrated to the new url config.
369 371 # deprecated config: lfs.remotestore
370 372 deprecatedstore = repo.ui.config('lfs', 'remotestore')
371 373 if deprecatedstore == 'dummy':
372 374 # deprecated config: lfs.remotepath
373 375 defaulturl = 'file://' + repo.ui.config('lfs', 'remotepath')
374 376 elif deprecatedstore == 'git-lfs':
375 377 # deprecated config: lfs.remoteurl
376 378 defaulturl = repo.ui.config('lfs', 'remoteurl')
377 379 elif deprecatedstore == 'null':
378 380 defaulturl = 'null://'
379 381
380 382 url = util.url(repo.ui.config('lfs', 'url', defaulturl))
381 383 scheme = url.scheme
382 384 if scheme not in _storemap:
383 385 raise error.Abort(_('lfs: unknown url scheme: %s') % scheme)
384 386 return _storemap[scheme](repo, url)
385 387
386 388 class LfsRemoteError(error.RevlogError):
387 389 pass
General Comments 0
You need to be logged in to leave comments. Login now