diff --git a/hgext/lfs/blobstore.py b/hgext/lfs/blobstore.py --- a/hgext/lfs/blobstore.py +++ b/hgext/lfs/blobstore.py @@ -7,6 +7,7 @@ from __future__ import absolute_import +import hashlib import json import os import re @@ -99,8 +100,11 @@ class local(object): self.cachevfs = lfsvfs(usercache) self.ui = repo.ui - def write(self, oid, data): + def write(self, oid, data, verify=True): """Write blob to local blobstore.""" + if verify: + _verify(oid, data) + with self.vfs(oid, 'wb', atomictemp=True) as fp: fp.write(data) @@ -110,14 +114,23 @@ class local(object): self.ui.note(_('lfs: adding %s to the usercache\n') % oid) lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid)) - def read(self, oid): + def read(self, oid, verify=True): """Read blob from local blobstore.""" if not self.vfs.exists(oid): + blob = self._read(self.cachevfs, oid, verify) + self.ui.note(_('lfs: found %s in the usercache\n') % oid) lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid)) - self.ui.note(_('lfs: found %s in the usercache\n') % oid) else: self.ui.note(_('lfs: found %s in the local lfs store\n') % oid) - return self.vfs.read(oid) + blob = self._read(self.vfs, oid, verify) + return blob + + def _read(self, vfs, oid, verify): + """Read blob (after verifying) from the given store""" + blob = vfs.read(oid) + if verify: + _verify(oid, blob) + return blob def has(self, oid): """Returns True if the local blobstore contains the requested blob, @@ -233,6 +246,8 @@ class _gitlfsremote(object): request = util.urlreq.request(href) if action == 'upload': # If uploading blobs, read data from local blobstore. + with localstore.vfs(oid) as fp: + _verifyfile(oid, fp) request.data = filewithprogress(localstore.vfs(oid), None) request.get_method = lambda: 'PUT' @@ -253,7 +268,7 @@ class _gitlfsremote(object): if action == 'download': # If downloading blobs, store downloaded data to local blobstore - localstore.write(oid, response) + localstore.write(oid, response, verify=True) def _batch(self, pointers, localstore, action): if action not in ['upload', 'download']: @@ -324,14 +339,14 @@ class _dummyremote(object): def writebatch(self, pointers, fromstore): for p in pointers: - content = fromstore.read(p.oid()) + content = fromstore.read(p.oid(), verify=True) with self.vfs(p.oid(), 'wb', atomictemp=True) as fp: fp.write(content) def readbatch(self, pointers, tostore): for p in pointers: content = self.vfs.read(p.oid()) - tostore.write(p.oid(), content) + tostore.write(p.oid(), content, verify=True) class _nullremote(object): """Null store storing blobs to /dev/null.""" @@ -368,6 +383,24 @@ class _promptremote(object): None: _promptremote, } +def _verify(oid, content): + realoid = hashlib.sha256(content).hexdigest() + if realoid != oid: + raise error.Abort(_('detected corrupt lfs object: %s') % oid, + hint=_('run hg verify')) + +def _verifyfile(oid, fp): + sha256 = hashlib.sha256() + while True: + data = fp.read(1024 * 1024) + if not data: + break + sha256.update(data) + realoid = sha256.hexdigest() + if realoid != oid: + raise error.Abort(_('detected corrupt lfs object: %s') % oid, + hint=_('run hg verify')) + def remote(repo): """remotestore factory. return a store in _storemap depending on config""" defaulturl = '' diff --git a/hgext/lfs/wrapper.py b/hgext/lfs/wrapper.py --- a/hgext/lfs/wrapper.py +++ b/hgext/lfs/wrapper.py @@ -54,7 +54,9 @@ def readfromstore(self, text): if not store.has(oid): p.filename = getattr(self, 'indexfile', None) self.opener.lfsremoteblobstore.readbatch([p], store) - text = store.read(oid) + + # The caller will validate the content + text = store.read(oid, verify=False) # pack hg filelog metadata hgmeta = {} @@ -76,7 +78,7 @@ def writetostore(self, text): # git-lfs only supports sha256 oid = hashlib.sha256(text).hexdigest() - self.opener.lfslocalblobstore.write(oid, text) + self.opener.lfslocalblobstore.write(oid, text, verify=False) # replace contents with metadata longoid = 'sha256:%s' % oid diff --git a/tests/test-lfs-test-server.t b/tests/test-lfs-test-server.t --- a/tests/test-lfs-test-server.t +++ b/tests/test-lfs-test-server.t @@ -105,16 +105,15 @@ Clear the cache to force a download lfs: found 37a65ab78d5ecda767e8622c248b5dbff1e68b1678ab0e730d5eb8601ec8ad19 in the local lfs store 3 files updated, 0 files merged, 0 files removed, 0 files unresolved -Test a corrupt file download, but clear the cache first to force a download - -XXX: ideally, the validation would occur before polluting the usercache and -local store, with a clearer error message. +Test a corrupt file download, but clear the cache first to force a download. $ rm -rf `hg config lfs.usercache` $ cp $TESTTMP/lfs-content/d1/1e/1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998 blob $ echo 'damage' > $TESTTMP/lfs-content/d1/1e/1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998 $ rm ../repo1/.hg/store/lfs/objects/d1/1e1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998 $ rm ../repo1/* + +XXX: suggesting `hg verify` won't help with a corrupt file on the lfs server. $ hg --repo ../repo1 update -C tip -v resolving manifests getting a @@ -123,18 +122,16 @@ local store, with a clearer error messag lfs: found 31cf46fbc4ecd458a0943c5b4881f1f5a6dd36c53d6167d5b69ac45149b38e5b in the local lfs store getting c lfs: downloading d11e1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998 (19 bytes) - lfs: adding d11e1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998 to the usercache - lfs: processed: d11e1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998 - lfs: found d11e1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998 in the local lfs store - abort: integrity check failed on data/c.i:0! + abort: detected corrupt lfs object: d11e1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998 + (run hg verify) [255] -BUG: the corrupted blob was added to the usercache and local store +The corrupted blob is not added to the usercache or local store - $ cat ../repo1/.hg/store/lfs/objects/d1/1e1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998 | $TESTDIR/f --sha256 - sha256=fa82ca222fc9813afad3559637960bf311170cdd80ed35287f4623eb2320a660 - $ cat `hg config lfs.usercache`/d1/1e1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998 | $TESTDIR/f --sha256 - sha256=fa82ca222fc9813afad3559637960bf311170cdd80ed35287f4623eb2320a660 + $ test -f ../repo1/.hg/store/lfs/objects/d1/1e1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998 + [1] + $ test -f `hg config lfs.usercache`/d1/1e1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998 + [1] $ cp blob $TESTTMP/lfs-content/d1/1e/1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998 Test a corrupted file upload @@ -146,7 +143,8 @@ Test a corrupted file upload pushing to ../repo1 searching for changes lfs: uploading e659058e26b07b39d2a9c7145b3f99b41f797b6621c8076600e9cb7ee88291f0 (17 bytes) - abort: HTTP error: HTTP Error 500: Internal Server Error (oid=e659058e26b07b39d2a9c7145b3f99b41f797b6621c8076600e9cb7ee88291f0, action=upload)! + abort: detected corrupt lfs object: e659058e26b07b39d2a9c7145b3f99b41f797b6621c8076600e9cb7ee88291f0 + (run hg verify) [255] Check error message when the remote missed a blob: diff --git a/tests/test-lfs.t b/tests/test-lfs.t --- a/tests/test-lfs.t +++ b/tests/test-lfs.t @@ -704,18 +704,17 @@ Damaging a file required by the update d updating to branch default resolving manifests getting l - lfs: adding 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b to the usercache - lfs: found 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b in the local lfs store - abort: integrity check failed on data/l.i:3! + abort: detected corrupt lfs object: 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b + (run hg verify) [255] -BUG: A corrupted lfs blob either shouldn't be created after a transfer from a -file://remotestore, or it shouldn't be left behind. +A corrupted lfs blob is not transferred from a file://remotestore to the +usercache or local store. - $ cat emptycache/22/f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b | $TESTDIR/f --sha256 - sha256=40f67c7e91d554db4bc500f8f62c2e40f9f61daa5b62388e577bbae26f5396ff - $ cat fromcorrupt2/.hg/store/lfs/objects/22/f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b | $TESTDIR/f --sha256 - sha256=40f67c7e91d554db4bc500f8f62c2e40f9f61daa5b62388e577bbae26f5396ff + $ test -f emptycache/22/f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b + [1] + $ test -f fromcorrupt2/.hg/store/lfs/objects/22/f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b + [1] $ hg -R fromcorrupt2 verify checking changesets @@ -723,14 +722,13 @@ file://remotestore, or it shouldn't be l crosschecking files in changesets and manifests checking files l@1: unpacking 46a2f24864bc: integrity check failed on data/l.i:0 - l@4: unpacking 6f1ff1f39c11: integrity check failed on data/l.i:3 large@0: unpacking 2c531e0992ff: integrity check failed on data/large.i:0 4 files, 5 changesets, 10 total revisions - 3 integrity errors encountered! + 2 integrity errors encountered! (first damaged changeset appears to be 0) [1] -BUG: push will happily send corrupt files upstream. (The alternate dummy remote +Corrupt local files are not sent upstream. (The alternate dummy remote avoids the corrupt lfs object in the original remote.) $ mkdir $TESTTMP/dummy-remote2 @@ -742,18 +740,9 @@ avoids the corrupt lfs object in the ori lfs: found 89b6070915a3d573ff3599d1cda305bc5e38549b15c4847ab034169da66e1ca8 in the local lfs store lfs: found b1a6ea88da0017a0e77db139a54618986e9a2489bee24af9fe596de9daac498c in the local lfs store lfs: found 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e in the local lfs store - 5 changesets found - uncompressed size of bundle content: - 997 (changelog) - 1032 (manifests) - 841 l - 272 large - 788 s - 139 small - adding changesets - adding manifests - adding file changes - added 5 changesets with 10 changes to 4 files + abort: detected corrupt lfs object: 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e + (run hg verify) + [255] $ hg -R fromcorrupt2 --config lfs.url=file:///$TESTTMP/dummy-remote2 verify -v repository uses revlog format 1 @@ -764,27 +753,21 @@ avoids the corrupt lfs object in the ori lfs: found 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e in the local lfs store l@1: unpacking 46a2f24864bc: integrity check failed on data/l.i:0 lfs: found 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b in the local lfs store - l@4: unpacking 6f1ff1f39c11: integrity check failed on data/l.i:3 lfs: found 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e in the local lfs store large@0: unpacking 2c531e0992ff: integrity check failed on data/large.i:0 lfs: found 89b6070915a3d573ff3599d1cda305bc5e38549b15c4847ab034169da66e1ca8 in the local lfs store lfs: found b1a6ea88da0017a0e77db139a54618986e9a2489bee24af9fe596de9daac498c in the local lfs store 4 files, 5 changesets, 10 total revisions - 3 integrity errors encountered! + 2 integrity errors encountered! (first damaged changeset appears to be 0) [1] $ cat $TESTTMP/dummy-remote2/22/f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b | $TESTDIR/f --sha256 - sha256=40f67c7e91d554db4bc500f8f62c2e40f9f61daa5b62388e577bbae26f5396ff + sha256=22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b $ cat fromcorrupt2/.hg/store/lfs/objects/22/f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b | $TESTDIR/f --sha256 - sha256=40f67c7e91d554db4bc500f8f62c2e40f9f61daa5b62388e577bbae26f5396ff - - $ cat $TESTTMP/dummy-remote2/66/100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e - LONGER-THAN-TEN-BYTES-WILL-TRIGGER-LFS - damage - $ cat $TESTTMP/dummy-remote2/22/f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b - RESTORE-TO-BE-LARGE - damage + sha256=22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b + $ test -f $TESTTMP/dummy-remote2/66/100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e + [1] Accessing a corrupt file will complain