##// END OF EJS Templates
py3: byteify the LFS blobstore module...
Matt Harbison -
r41471:02d0a777 default
parent child Browse files
Show More
@@ -42,7 +42,7 class lfsvfs(vfsmod.vfs):
42 42 def join(self, path):
43 43 """split the path at first two characters, like: XX/XXXXX..."""
44 44 if not _lfsre.match(path):
45 raise error.ProgrammingError('unexpected lfs path: %s' % path)
45 raise error.ProgrammingError(b'unexpected lfs path: %s' % path)
46 46 return super(lfsvfs, self).join(path[0:2], path[2:])
47 47
48 48 def walk(self, path=None, onerror=None):
@@ -56,7 +56,8 class lfsvfs(vfsmod.vfs):
56 56 prefixlen = len(pathutil.normasprefix(root))
57 57 oids = []
58 58
59 for dirpath, dirs, files in os.walk(self.reljoin(self.base, path or ''),
59 for dirpath, dirs, files in os.walk(self.reljoin(self.base, path
60 or b''),
60 61 onerror=onerror):
61 62 dirpath = dirpath[prefixlen:]
62 63
@@ -79,10 +80,11 class nullvfs(lfsvfs):
79 80 # self.vfs. Raise the same error as a normal vfs when asked to read a
80 81 # file that doesn't exist. The only difference is the full file path
81 82 # isn't available in the error.
82 raise IOError(errno.ENOENT, '%s: No such file or directory' % oid)
83 raise IOError(errno.ENOENT,
84 pycompat.sysstr(b'%s: No such file or directory' % oid))
83 85
84 86 def walk(self, path=None, onerror=None):
85 return ('', [], [])
87 return (b'', [], [])
86 88
87 89 def write(self, oid, data):
88 90 pass
@@ -123,13 +125,13 class local(object):
123 125 """
124 126
125 127 def __init__(self, repo):
126 fullpath = repo.svfs.join('lfs/objects')
128 fullpath = repo.svfs.join(b'lfs/objects')
127 129 self.vfs = lfsvfs(fullpath)
128 130
129 if repo.ui.configbool('experimental', 'lfs.disableusercache'):
131 if repo.ui.configbool(b'experimental', b'lfs.disableusercache'):
130 132 self.cachevfs = nullvfs()
131 133 else:
132 usercache = lfutil._usercachedir(repo.ui, 'lfs')
134 usercache = lfutil._usercachedir(repo.ui, b'lfs')
133 135 self.cachevfs = lfsvfs(usercache)
134 136 self.ui = repo.ui
135 137
@@ -143,23 +145,23 class local(object):
143 145 # the usercache is the only place it _could_ be. If not present, the
144 146 # missing file msg here will indicate the local repo, not the usercache.
145 147 if self.cachevfs.exists(oid):
146 return self.cachevfs(oid, 'rb')
148 return self.cachevfs(oid, b'rb')
147 149
148 return self.vfs(oid, 'rb')
150 return self.vfs(oid, b'rb')
149 151
150 152 def download(self, oid, src):
151 153 """Read the blob from the remote source in chunks, verify the content,
152 154 and write to this local blobstore."""
153 155 sha256 = hashlib.sha256()
154 156
155 with self.vfs(oid, 'wb', atomictemp=True) as fp:
157 with self.vfs(oid, b'wb', atomictemp=True) as fp:
156 158 for chunk in util.filechunkiter(src, size=1048576):
157 159 fp.write(chunk)
158 160 sha256.update(chunk)
159 161
160 162 realoid = node.hex(sha256.digest())
161 163 if realoid != oid:
162 raise LfsCorruptionError(_('corrupt remote lfs object: %s')
164 raise LfsCorruptionError(_(b'corrupt remote lfs object: %s')
163 165 % oid)
164 166
165 167 self._linktousercache(oid)
@@ -170,7 +172,7 class local(object):
170 172 This should only be called from the filelog during a commit or similar.
171 173 As such, there is no need to verify the data. Imports from a remote
172 174 store must use ``download()`` instead."""
173 with self.vfs(oid, 'wb', atomictemp=True) as fp:
175 with self.vfs(oid, b'wb', atomictemp=True) as fp:
174 176 fp.write(data)
175 177
176 178 self._linktousercache(oid)
@@ -186,7 +188,7 class local(object):
186 188 """
187 189 if (not isinstance(self.cachevfs, nullvfs)
188 190 and not self.vfs.exists(oid)):
189 self.ui.note(_('lfs: found %s in the usercache\n') % oid)
191 self.ui.note(_(b'lfs: found %s in the usercache\n') % oid)
190 192 lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
191 193
192 194 def _linktousercache(self, oid):
@@ -194,7 +196,7 class local(object):
194 196 # the local store on success, but truncate, write and link on failure?
195 197 if (not self.cachevfs.exists(oid)
196 198 and not isinstance(self.cachevfs, nullvfs)):
197 self.ui.note(_('lfs: adding %s to the usercache\n') % oid)
199 self.ui.note(_(b'lfs: adding %s to the usercache\n') % oid)
198 200 lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
199 201
200 202 def read(self, oid, verify=True):
@@ -208,10 +210,10 class local(object):
208 210 # give more useful info about the corruption- simply don't add the
209 211 # hardlink.
210 212 if verify or node.hex(hashlib.sha256(blob).digest()) == oid:
211 self.ui.note(_('lfs: found %s in the usercache\n') % oid)
213 self.ui.note(_(b'lfs: found %s in the usercache\n') % oid)
212 214 lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
213 215 else:
214 self.ui.note(_('lfs: found %s in the local lfs store\n') % oid)
216 self.ui.note(_(b'lfs: found %s in the local lfs store\n') % oid)
215 217 blob = self._read(self.vfs, oid, verify)
216 218 return blob
217 219
@@ -268,20 +270,20 class _gitlfsremote(object):
268 270 ui = repo.ui
269 271 self.ui = ui
270 272 baseurl, authinfo = url.authinfo()
271 self.baseurl = baseurl.rstrip('/')
272 useragent = repo.ui.config('experimental', 'lfs.user-agent')
273 self.baseurl = baseurl.rstrip(b'/')
274 useragent = repo.ui.config(b'experimental', b'lfs.user-agent')
273 275 if not useragent:
274 useragent = 'git-lfs/2.3.4 (Mercurial %s)' % util.version()
276 useragent = b'git-lfs/2.3.4 (Mercurial %s)' % util.version()
275 277 self.urlopener = urlmod.opener(ui, authinfo, useragent)
276 self.retry = ui.configint('lfs', 'retry')
278 self.retry = ui.configint(b'lfs', b'retry')
277 279
278 280 def writebatch(self, pointers, fromstore):
279 281 """Batch upload from local to remote blobstore."""
280 self._batch(_deduplicate(pointers), fromstore, 'upload')
282 self._batch(_deduplicate(pointers), fromstore, b'upload')
281 283
282 284 def readbatch(self, pointers, tostore):
283 285 """Batch download from remote to local blostore."""
284 self._batch(_deduplicate(pointers), tostore, 'download')
286 self._batch(_deduplicate(pointers), tostore, b'download')
285 287
286 288 def _batchrequest(self, pointers, action):
287 289 """Get metadata about objects pointed by pointers for given action
@@ -294,8 +296,8 class _gitlfsremote(object):
294 296 'objects': objects,
295 297 'operation': action,
296 298 })
297 url = '%s/objects/batch' % self.baseurl
298 batchreq = util.urlreq.request(url, data=requestdata)
299 url = b'%s/objects/batch' % self.baseurl
300 batchreq = util.urlreq.request(pycompat.strurl(url), data=requestdata)
299 301 batchreq.add_header('Accept', 'application/vnd.git-lfs+json')
300 302 batchreq.add_header('Content-Type', 'application/vnd.git-lfs+json')
301 303 try:
@@ -303,29 +305,32 class _gitlfsremote(object):
303 305 rawjson = rsp.read()
304 306 except util.urlerr.httperror as ex:
305 307 hints = {
306 400: _('check that lfs serving is enabled on %s and "%s" is '
308 400: _(b'check that lfs serving is enabled on %s and "%s" is '
307 309 'supported') % (self.baseurl, action),
308 404: _('the "lfs.url" config may be used to override %s')
310 404: _(b'the "lfs.url" config may be used to override %s')
309 311 % self.baseurl,
310 312 }
311 hint = hints.get(ex.code, _('api=%s, action=%s') % (url, action))
312 raise LfsRemoteError(_('LFS HTTP error: %s') % ex, hint=hint)
313 hint = hints.get(ex.code, _(b'api=%s, action=%s') % (url, action))
314 raise LfsRemoteError(
315 _(b'LFS HTTP error: %s') % stringutil.forcebytestr(ex),
316 hint=hint)
313 317 except util.urlerr.urlerror as ex:
314 hint = (_('the "lfs.url" config may be used to override %s')
318 hint = (_(b'the "lfs.url" config may be used to override %s')
315 319 % self.baseurl)
316 raise LfsRemoteError(_('LFS error: %s') % _urlerrorreason(ex),
320 raise LfsRemoteError(_(b'LFS error: %s') % _urlerrorreason(ex),
317 321 hint=hint)
318 322 try:
319 323 response = json.loads(rawjson)
320 324 except ValueError:
321 raise LfsRemoteError(_('LFS server returns invalid JSON: %s')
322 % rawjson)
325 raise LfsRemoteError(_(b'LFS server returns invalid JSON: %s')
326 % rawjson.encode("utf-8"))
323 327
324 328 if self.ui.debugflag:
325 self.ui.debug('Status: %d\n' % rsp.status)
329 self.ui.debug(b'Status: %d\n' % rsp.status)
326 330 # lfs-test-server and hg serve return headers in different order
327 self.ui.debug('%s\n'
328 % '\n'.join(sorted(str(rsp.info()).splitlines())))
331 headers = pycompat.bytestr(rsp.info())
332 self.ui.debug(b'%s\n'
333 % b'\n'.join(sorted(headers.splitlines())))
329 334
330 335 if 'objects' in response:
331 336 response['objects'] = sorted(response['objects'],
@@ -345,34 +350,34 class _gitlfsremote(object):
345 350 # server implementation (ex. lfs-test-server) does not set "error"
346 351 # but just removes "download" from "actions". Treat that case
347 352 # as the same as 404 error.
348 if 'error' not in response:
349 if (action == 'download'
350 and action not in response.get('actions', [])):
353 if b'error' not in response:
354 if (action == b'download'
355 and action not in response.get(b'actions', [])):
351 356 code = 404
352 357 else:
353 358 continue
354 359 else:
355 360 # An error dict without a code doesn't make much sense, so
356 361 # treat as a server error.
357 code = response.get('error').get('code', 500)
362 code = response.get(b'error').get(b'code', 500)
358 363
359 364 ptrmap = {p.oid(): p for p in pointers}
360 p = ptrmap.get(response['oid'], None)
365 p = ptrmap.get(response[b'oid'], None)
361 366 if p:
362 filename = getattr(p, 'filename', 'unknown')
367 filename = getattr(p, 'filename', b'unknown')
363 368 errors = {
364 404: 'The object does not exist',
365 410: 'The object was removed by the owner',
366 422: 'Validation error',
367 500: 'Internal server error',
369 404: b'The object does not exist',
370 410: b'The object was removed by the owner',
371 422: b'Validation error',
372 500: b'Internal server error',
368 373 }
369 msg = errors.get(code, 'status code %d' % code)
370 raise LfsRemoteError(_('LFS server error for "%s": %s')
374 msg = errors.get(code, b'status code %d' % code)
375 raise LfsRemoteError(_(b'LFS server error for "%s": %s')
371 376 % (filename, msg))
372 377 else:
373 378 raise LfsRemoteError(
374 _('LFS server error. Unsolicited response for oid %s')
375 % response['oid'])
379 _(b'LFS server error. Unsolicited response for oid %s')
380 % response[b'oid'])
376 381
377 382 def _extractobjects(self, response, pointers, action):
378 383 """extract objects from response of the batch API
@@ -382,12 +387,13 class _gitlfsremote(object):
382 387 raise if any object has an error
383 388 """
384 389 # Scan errors from objects - fail early
385 objects = response.get('objects', [])
390 objects = response.get(b'objects', [])
386 391 self._checkforservererror(pointers, objects, action)
387 392
388 393 # Filter objects with given action. Practically, this skips uploading
389 394 # objects which exist in the server.
390 filteredobjects = [o for o in objects if action in o.get('actions', [])]
395 filteredobjects = [o for o in objects
396 if action in o.get(b'actions', [])]
391 397
392 398 return filteredobjects
393 399
@@ -407,11 +413,11 class _gitlfsremote(object):
407 413 headers = obj['actions'][action].get('header', {}).items()
408 414
409 415 request = util.urlreq.request(href)
410 if action == 'upload':
416 if action == b'upload':
411 417 # If uploading blobs, read data from local blobstore.
412 418 if not localstore.verify(oid):
413 raise error.Abort(_('detected corrupt lfs object: %s') % oid,
414 hint=_('run hg verify'))
419 raise error.Abort(_(b'detected corrupt lfs object: %s') % oid,
420 hint=_(b'run hg verify'))
415 421 request.data = filewithprogress(localstore.open(oid), None)
416 422 request.get_method = lambda: 'PUT'
417 423 request.add_header('Content-Type', 'application/octet-stream')
@@ -424,13 +430,14 class _gitlfsremote(object):
424 430 with contextlib.closing(self.urlopener.open(request)) as req:
425 431 ui = self.ui # Shorten debug lines
426 432 if self.ui.debugflag:
427 ui.debug('Status: %d\n' % req.status)
433 ui.debug(b'Status: %d\n' % req.status)
428 434 # lfs-test-server and hg serve return headers in different
429 435 # order
430 ui.debug('%s\n'
431 % '\n'.join(sorted(str(req.info()).splitlines())))
436 headers = pycompat.bytestr(req.info())
437 ui.debug(b'%s\n'
438 % b'\n'.join(sorted(headers.splitlines())))
432 439
433 if action == 'download':
440 if action == b'download':
434 441 # If downloading blobs, store downloaded data to local
435 442 # blobstore
436 443 localstore.download(oid, req)
@@ -441,65 +448,65 class _gitlfsremote(object):
441 448 break
442 449 response += data
443 450 if response:
444 ui.debug('lfs %s response: %s' % (action, response))
451 ui.debug(b'lfs %s response: %s' % (action, response))
445 452 except util.urlerr.httperror as ex:
446 453 if self.ui.debugflag:
447 self.ui.debug('%s: %s\n' % (oid, ex.read()))
448 raise LfsRemoteError(_('LFS HTTP error: %s (oid=%s, action=%s)')
449 % (ex, oid, action))
454 self.ui.debug(b'%s: %s\n' % (oid, ex.read())) # XXX: also bytes?
455 raise LfsRemoteError(_(b'LFS HTTP error: %s (oid=%s, action=%s)')
456 % (stringutil.forcebytestr(ex), oid, action))
450 457 except util.urlerr.urlerror as ex:
451 hint = (_('attempted connection to %s')
452 % util.urllibcompat.getfullurl(request))
453 raise LfsRemoteError(_('LFS error: %s') % _urlerrorreason(ex),
458 hint = (_(b'attempted connection to %s')
459 % pycompat.bytesurl(util.urllibcompat.getfullurl(request)))
460 raise LfsRemoteError(_(b'LFS error: %s') % _urlerrorreason(ex),
454 461 hint=hint)
455 462
456 463 def _batch(self, pointers, localstore, action):
457 if action not in ['upload', 'download']:
458 raise error.ProgrammingError('invalid Git-LFS action: %s' % action)
464 if action not in [b'upload', b'download']:
465 raise error.ProgrammingError(b'invalid Git-LFS action: %s' % action)
459 466
460 467 response = self._batchrequest(pointers, action)
461 468 objects = self._extractobjects(response, pointers, action)
462 total = sum(x.get('size', 0) for x in objects)
469 total = sum(x.get(b'size', 0) for x in objects)
463 470 sizes = {}
464 471 for obj in objects:
465 sizes[obj.get('oid')] = obj.get('size', 0)
466 topic = {'upload': _('lfs uploading'),
467 'download': _('lfs downloading')}[action]
472 sizes[obj.get(b'oid')] = obj.get(b'size', 0)
473 topic = {b'upload': _(b'lfs uploading'),
474 b'download': _(b'lfs downloading')}[action]
468 475 if len(objects) > 1:
469 self.ui.note(_('lfs: need to transfer %d objects (%s)\n')
476 self.ui.note(_(b'lfs: need to transfer %d objects (%s)\n')
470 477 % (len(objects), util.bytecount(total)))
471 478
472 479 def transfer(chunk):
473 480 for obj in chunk:
474 objsize = obj.get('size', 0)
481 objsize = obj.get(b'size', 0)
475 482 if self.ui.verbose:
476 if action == 'download':
477 msg = _('lfs: downloading %s (%s)\n')
478 elif action == 'upload':
479 msg = _('lfs: uploading %s (%s)\n')
480 self.ui.note(msg % (obj.get('oid'),
483 if action == b'download':
484 msg = _(b'lfs: downloading %s (%s)\n')
485 elif action == b'upload':
486 msg = _(b'lfs: uploading %s (%s)\n')
487 self.ui.note(msg % (obj.get(b'oid'),
481 488 util.bytecount(objsize)))
482 489 retry = self.retry
483 490 while True:
484 491 try:
485 492 self._basictransfer(obj, action, localstore)
486 yield 1, obj.get('oid')
493 yield 1, obj.get(b'oid')
487 494 break
488 495 except socket.error as ex:
489 496 if retry > 0:
490 497 self.ui.note(
491 _('lfs: failed: %r (remaining retry %d)\n')
492 % (ex, retry))
498 _(b'lfs: failed: %r (remaining retry %d)\n')
499 % (stringutil.forcebytestr(ex), retry))
493 500 retry -= 1
494 501 continue
495 502 raise
496 503
497 504 # Until https multiplexing gets sorted out
498 if self.ui.configbool('experimental', 'lfs.worker-enable'):
505 if self.ui.configbool(b'experimental', b'lfs.worker-enable'):
499 506 oids = worker.worker(self.ui, 0.1, transfer, (),
500 sorted(objects, key=lambda o: o.get('oid')))
507 sorted(objects, key=lambda o: o.get(b'oid')))
501 508 else:
502 oids = transfer(sorted(objects, key=lambda o: o.get('oid')))
509 oids = transfer(sorted(objects, key=lambda o: o.get(b'oid')))
503 510
504 511 with self.ui.makeprogress(topic, total=total) as progress:
505 512 progress.update(0)
@@ -509,14 +516,14 class _gitlfsremote(object):
509 516 processed += sizes[oid]
510 517 blobs += 1
511 518 progress.update(processed)
512 self.ui.note(_('lfs: processed: %s\n') % oid)
519 self.ui.note(_(b'lfs: processed: %s\n') % oid)
513 520
514 521 if blobs > 0:
515 if action == 'upload':
516 self.ui.status(_('lfs: uploaded %d files (%s)\n')
522 if action == b'upload':
523 self.ui.status(_(b'lfs: uploaded %d files (%s)\n')
517 524 % (blobs, util.bytecount(processed)))
518 elif action == 'download':
519 self.ui.status(_('lfs: downloaded %d files (%s)\n')
525 elif action == b'download':
526 self.ui.status(_(b'lfs: downloaded %d files (%s)\n')
520 527 % (blobs, util.bytecount(processed)))
521 528
522 529 def __del__(self):
@@ -531,18 +538,18 class _dummyremote(object):
531 538 """Dummy store storing blobs to temp directory."""
532 539
533 540 def __init__(self, repo, url):
534 fullpath = repo.vfs.join('lfs', url.path)
541 fullpath = repo.vfs.join(b'lfs', url.path)
535 542 self.vfs = lfsvfs(fullpath)
536 543
537 544 def writebatch(self, pointers, fromstore):
538 545 for p in _deduplicate(pointers):
539 546 content = fromstore.read(p.oid(), verify=True)
540 with self.vfs(p.oid(), 'wb', atomictemp=True) as fp:
547 with self.vfs(p.oid(), b'wb', atomictemp=True) as fp:
541 548 fp.write(content)
542 549
543 550 def readbatch(self, pointers, tostore):
544 551 for p in _deduplicate(pointers):
545 with self.vfs(p.oid(), 'rb') as fp:
552 with self.vfs(p.oid(), b'rb') as fp:
546 553 tostore.download(p.oid(), fp)
547 554
548 555 class _nullremote(object):
@@ -570,13 +577,13 class _promptremote(object):
570 577 self._prompt()
571 578
572 579 def _prompt(self):
573 raise error.Abort(_('lfs.url needs to be configured'))
580 raise error.Abort(_(b'lfs.url needs to be configured'))
574 581
575 582 _storemap = {
576 'https': _gitlfsremote,
577 'http': _gitlfsremote,
578 'file': _dummyremote,
579 'null': _nullremote,
583 b'https': _gitlfsremote,
584 b'http': _gitlfsremote,
585 b'file': _dummyremote,
586 b'null': _nullremote,
580 587 None: _promptremote,
581 588 }
582 589
@@ -590,8 +597,8 def _deduplicate(pointers):
590 597 def _verify(oid, content):
591 598 realoid = node.hex(hashlib.sha256(content).digest())
592 599 if realoid != oid:
593 raise LfsCorruptionError(_('detected corrupt lfs object: %s') % oid,
594 hint=_('run hg verify'))
600 raise LfsCorruptionError(_(b'detected corrupt lfs object: %s') % oid,
601 hint=_(b'run hg verify'))
595 602
596 603 def remote(repo, remote=None):
597 604 """remotestore factory. return a store in _storemap depending on config
@@ -603,7 +610,7 def remote(repo, remote=None):
603 610
604 611 https://github.com/git-lfs/git-lfs/blob/master/docs/api/server-discovery.md
605 612 """
606 lfsurl = repo.ui.config('lfs', 'url')
613 lfsurl = repo.ui.config(b'lfs', b'url')
607 614 url = util.url(lfsurl or '')
608 615 if lfsurl is None:
609 616 if remote:
@@ -616,7 +623,7 def remote(repo, remote=None):
616 623 else:
617 624 # TODO: investigate 'paths.remote:lfsurl' style path customization,
618 625 # and fall back to inferring from 'paths.remote' if unspecified.
619 path = repo.ui.config('paths', 'default') or ''
626 path = repo.ui.config(b'paths', b'default') or b''
620 627
621 628 defaulturl = util.url(path)
622 629
@@ -628,11 +635,11 def remote(repo, remote=None):
628 635 defaulturl.path = (defaulturl.path or b'') + b'.git/info/lfs'
629 636
630 637 url = util.url(bytes(defaulturl))
631 repo.ui.note(_('lfs: assuming remote store: %s\n') % url)
638 repo.ui.note(_(b'lfs: assuming remote store: %s\n') % url)
632 639
633 640 scheme = url.scheme
634 641 if scheme not in _storemap:
635 raise error.Abort(_('lfs: unknown url scheme: %s') % scheme)
642 raise error.Abort(_(b'lfs: unknown url scheme: %s') % scheme)
636 643 return _storemap[scheme](repo, url)
637 644
638 645 class LfsRemoteError(error.StorageError):
@@ -374,7 +374,7 Test a checksum failure during the proce
374 374 $LOCALIP - - [$ERRDATE$] HG error: res.setbodybytes(localstore.read(oid)) (glob)
375 375 $LOCALIP - - [$ERRDATE$] HG error: blob = self._read(self.vfs, oid, verify) (glob)
376 376 $LOCALIP - - [$ERRDATE$] HG error: blobstore._verify(oid, b'dummy content') (glob)
377 $LOCALIP - - [$ERRDATE$] HG error: hint=_('run hg verify')) (glob)
377 $LOCALIP - - [$ERRDATE$] HG error: hint=_(b'run hg verify')) (glob)
378 378 $LOCALIP - - [$ERRDATE$] HG error: LfsCorruptionError: detected corrupt lfs object: 276f73cfd75f9fb519810df5f5d96d6594ca2521abd86cbcd92122f7d51a1f3d (glob)
379 379 $LOCALIP - - [$ERRDATE$] HG error: (glob)
380 380
General Comments 0
You need to be logged in to leave comments. Login now