##// END OF EJS Templates
lfs: add a progress bar when searching for blobs to upload...
Matt Harbison -
r39306:37e56607 default
parent child Browse files
Show More
@@ -1,421 +1,428 b''
1 1 # wrapper.py - methods wrapping core mercurial logic
2 2 #
3 3 # Copyright 2017 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import hashlib
11 11
12 12 from mercurial.i18n import _
13 13 from mercurial.node import bin, hex, nullid, short
14 14
15 15 from mercurial import (
16 16 error,
17 17 revlog,
18 18 util,
19 19 )
20 20
21 21 from mercurial.utils import (
22 22 stringutil,
23 23 )
24 24
25 25 from ..largefiles import lfutil
26 26
27 27 from . import (
28 28 blobstore,
29 29 pointer,
30 30 )
31 31
32 32 def allsupportedversions(orig, ui):
33 33 versions = orig(ui)
34 34 versions.add('03')
35 35 return versions
36 36
37 37 def _capabilities(orig, repo, proto):
38 38 '''Wrap server command to announce lfs server capability'''
39 39 caps = orig(repo, proto)
40 40 if util.safehasattr(repo.svfs, 'lfslocalblobstore'):
41 41 # XXX: change to 'lfs=serve' when separate git server isn't required?
42 42 caps.append('lfs')
43 43 return caps
44 44
45 45 def bypasscheckhash(self, text):
46 46 return False
47 47
48 48 def readfromstore(self, text):
49 49 """Read filelog content from local blobstore transform for flagprocessor.
50 50
51 51 Default tranform for flagprocessor, returning contents from blobstore.
52 52 Returns a 2-typle (text, validatehash) where validatehash is True as the
53 53 contents of the blobstore should be checked using checkhash.
54 54 """
55 55 p = pointer.deserialize(text)
56 56 oid = p.oid()
57 57 store = self.opener.lfslocalblobstore
58 58 if not store.has(oid):
59 59 p.filename = self.filename
60 60 self.opener.lfsremoteblobstore.readbatch([p], store)
61 61
62 62 # The caller will validate the content
63 63 text = store.read(oid, verify=False)
64 64
65 65 # pack hg filelog metadata
66 66 hgmeta = {}
67 67 for k in p.keys():
68 68 if k.startswith('x-hg-'):
69 69 name = k[len('x-hg-'):]
70 70 hgmeta[name] = p[k]
71 71 if hgmeta or text.startswith('\1\n'):
72 72 text = revlog.packmeta(hgmeta, text)
73 73
74 74 return (text, True)
75 75
76 76 def writetostore(self, text):
77 77 # hg filelog metadata (includes rename, etc)
78 78 hgmeta, offset = revlog.parsemeta(text)
79 79 if offset and offset > 0:
80 80 # lfs blob does not contain hg filelog metadata
81 81 text = text[offset:]
82 82
83 83 # git-lfs only supports sha256
84 84 oid = hex(hashlib.sha256(text).digest())
85 85 self.opener.lfslocalblobstore.write(oid, text)
86 86
87 87 # replace contents with metadata
88 88 longoid = 'sha256:%s' % oid
89 89 metadata = pointer.gitlfspointer(oid=longoid, size='%d' % len(text))
90 90
91 91 # by default, we expect the content to be binary. however, LFS could also
92 92 # be used for non-binary content. add a special entry for non-binary data.
93 93 # this will be used by filectx.isbinary().
94 94 if not stringutil.binary(text):
95 95 # not hg filelog metadata (affecting commit hash), no "x-hg-" prefix
96 96 metadata['x-is-binary'] = '0'
97 97
98 98 # translate hg filelog metadata to lfs metadata with "x-hg-" prefix
99 99 if hgmeta is not None:
100 100 for k, v in hgmeta.iteritems():
101 101 metadata['x-hg-%s' % k] = v
102 102
103 103 rawtext = metadata.serialize()
104 104 return (rawtext, False)
105 105
106 106 def _islfs(rlog, node=None, rev=None):
107 107 if rev is None:
108 108 if node is None:
109 109 # both None - likely working copy content where node is not ready
110 110 return False
111 111 rev = rlog.rev(node)
112 112 else:
113 113 node = rlog.node(rev)
114 114 if node == nullid:
115 115 return False
116 116 flags = rlog.flags(rev)
117 117 return bool(flags & revlog.REVIDX_EXTSTORED)
118 118
119 119 def filelogaddrevision(orig, self, text, transaction, link, p1, p2,
120 120 cachedelta=None, node=None,
121 121 flags=revlog.REVIDX_DEFAULT_FLAGS, **kwds):
122 122 # The matcher isn't available if reposetup() wasn't called.
123 123 lfstrack = self.opener.options.get('lfstrack')
124 124
125 125 if lfstrack:
126 126 textlen = len(text)
127 127 # exclude hg rename meta from file size
128 128 meta, offset = revlog.parsemeta(text)
129 129 if offset:
130 130 textlen -= offset
131 131
132 132 if lfstrack(self.filename, textlen):
133 133 flags |= revlog.REVIDX_EXTSTORED
134 134
135 135 return orig(self, text, transaction, link, p1, p2, cachedelta=cachedelta,
136 136 node=node, flags=flags, **kwds)
137 137
138 138 def filelogrenamed(orig, self, node):
139 139 if _islfs(self, node):
140 140 rawtext = self.revision(node, raw=True)
141 141 if not rawtext:
142 142 return False
143 143 metadata = pointer.deserialize(rawtext)
144 144 if 'x-hg-copy' in metadata and 'x-hg-copyrev' in metadata:
145 145 return metadata['x-hg-copy'], bin(metadata['x-hg-copyrev'])
146 146 else:
147 147 return False
148 148 return orig(self, node)
149 149
150 150 def filelogsize(orig, self, rev):
151 151 if _islfs(self, rev=rev):
152 152 # fast path: use lfs metadata to answer size
153 153 rawtext = self.revision(rev, raw=True)
154 154 metadata = pointer.deserialize(rawtext)
155 155 return int(metadata['size'])
156 156 return orig(self, rev)
157 157
158 158 def filectxcmp(orig, self, fctx):
159 159 """returns True if text is different than fctx"""
160 160 # some fctx (ex. hg-git) is not based on basefilectx and do not have islfs
161 161 if self.islfs() and getattr(fctx, 'islfs', lambda: False)():
162 162 # fast path: check LFS oid
163 163 p1 = pointer.deserialize(self.rawdata())
164 164 p2 = pointer.deserialize(fctx.rawdata())
165 165 return p1.oid() != p2.oid()
166 166 return orig(self, fctx)
167 167
168 168 def filectxisbinary(orig, self):
169 169 if self.islfs():
170 170 # fast path: use lfs metadata to answer isbinary
171 171 metadata = pointer.deserialize(self.rawdata())
172 172 # if lfs metadata says nothing, assume it's binary by default
173 173 return bool(int(metadata.get('x-is-binary', 1)))
174 174 return orig(self)
175 175
176 176 def filectxislfs(self):
177 177 return _islfs(self.filelog(), self.filenode())
178 178
179 179 def _updatecatformatter(orig, fm, ctx, matcher, path, decode):
180 180 orig(fm, ctx, matcher, path, decode)
181 181 fm.data(rawdata=ctx[path].rawdata())
182 182
183 183 def convertsink(orig, sink):
184 184 sink = orig(sink)
185 185 if sink.repotype == 'hg':
186 186 class lfssink(sink.__class__):
187 187 def putcommit(self, files, copies, parents, commit, source, revmap,
188 188 full, cleanp2):
189 189 pc = super(lfssink, self).putcommit
190 190 node = pc(files, copies, parents, commit, source, revmap, full,
191 191 cleanp2)
192 192
193 193 if 'lfs' not in self.repo.requirements:
194 194 ctx = self.repo[node]
195 195
196 196 # The file list may contain removed files, so check for
197 197 # membership before assuming it is in the context.
198 198 if any(f in ctx and ctx[f].islfs() for f, n in files):
199 199 self.repo.requirements.add('lfs')
200 200 self.repo._writerequirements()
201 201
202 202 # Permanently enable lfs locally
203 203 self.repo.vfs.append(
204 204 'hgrc', util.tonativeeol('\n[extensions]\nlfs=\n'))
205 205
206 206 return node
207 207
208 208 sink.__class__ = lfssink
209 209
210 210 return sink
211 211
212 212 def vfsinit(orig, self, othervfs):
213 213 orig(self, othervfs)
214 214 # copy lfs related options
215 215 for k, v in othervfs.options.items():
216 216 if k.startswith('lfs'):
217 217 self.options[k] = v
218 218 # also copy lfs blobstores. note: this can run before reposetup, so lfs
219 219 # blobstore attributes are not always ready at this time.
220 220 for name in ['lfslocalblobstore', 'lfsremoteblobstore']:
221 221 if util.safehasattr(othervfs, name):
222 222 setattr(self, name, getattr(othervfs, name))
223 223
224 224 def hgclone(orig, ui, opts, *args, **kwargs):
225 225 result = orig(ui, opts, *args, **kwargs)
226 226
227 227 if result is not None:
228 228 sourcerepo, destrepo = result
229 229 repo = destrepo.local()
230 230
231 231 # When cloning to a remote repo (like through SSH), no repo is available
232 232 # from the peer. Therefore the hgrc can't be updated.
233 233 if not repo:
234 234 return result
235 235
236 236 # If lfs is required for this repo, permanently enable it locally
237 237 if 'lfs' in repo.requirements:
238 238 repo.vfs.append('hgrc',
239 239 util.tonativeeol('\n[extensions]\nlfs=\n'))
240 240
241 241 return result
242 242
243 243 def hgpostshare(orig, sourcerepo, destrepo, bookmarks=True, defaultpath=None):
244 244 orig(sourcerepo, destrepo, bookmarks, defaultpath)
245 245
246 246 # If lfs is required for this repo, permanently enable it locally
247 247 if 'lfs' in destrepo.requirements:
248 248 destrepo.vfs.append('hgrc', util.tonativeeol('\n[extensions]\nlfs=\n'))
249 249
250 250 def _prefetchfiles(repo, revs, match):
251 251 """Ensure that required LFS blobs are present, fetching them as a group if
252 252 needed."""
253 253 if not util.safehasattr(repo.svfs, 'lfslocalblobstore'):
254 254 return
255 255
256 256 pointers = []
257 257 oids = set()
258 258 localstore = repo.svfs.lfslocalblobstore
259 259
260 260 for rev in revs:
261 261 ctx = repo[rev]
262 262 for f in ctx.walk(match):
263 263 p = pointerfromctx(ctx, f)
264 264 if p and p.oid() not in oids and not localstore.has(p.oid()):
265 265 p.filename = f
266 266 pointers.append(p)
267 267 oids.add(p.oid())
268 268
269 269 if pointers:
270 270 # Recalculating the repo store here allows 'paths.default' that is set
271 271 # on the repo by a clone command to be used for the update.
272 272 blobstore.remote(repo).readbatch(pointers, localstore)
273 273
274 274 def _canskipupload(repo):
275 275 # Skip if this hasn't been passed to reposetup()
276 276 if not util.safehasattr(repo.svfs, 'lfsremoteblobstore'):
277 277 return True
278 278
279 279 # if remotestore is a null store, upload is a no-op and can be skipped
280 280 return isinstance(repo.svfs.lfsremoteblobstore, blobstore._nullremote)
281 281
282 282 def candownload(repo):
283 283 # Skip if this hasn't been passed to reposetup()
284 284 if not util.safehasattr(repo.svfs, 'lfsremoteblobstore'):
285 285 return False
286 286
287 287 # if remotestore is a null store, downloads will lead to nothing
288 288 return not isinstance(repo.svfs.lfsremoteblobstore, blobstore._nullremote)
289 289
290 290 def uploadblobsfromrevs(repo, revs):
291 291 '''upload lfs blobs introduced by revs
292 292
293 293 Note: also used by other extensions e. g. infinitepush. avoid renaming.
294 294 '''
295 295 if _canskipupload(repo):
296 296 return
297 297 pointers = extractpointers(repo, revs)
298 298 uploadblobs(repo, pointers)
299 299
300 300 def prepush(pushop):
301 301 """Prepush hook.
302 302
303 303 Read through the revisions to push, looking for filelog entries that can be
304 304 deserialized into metadata so that we can block the push on their upload to
305 305 the remote blobstore.
306 306 """
307 307 return uploadblobsfromrevs(pushop.repo, pushop.outgoing.missing)
308 308
309 309 def push(orig, repo, remote, *args, **kwargs):
310 310 """bail on push if the extension isn't enabled on remote when needed, and
311 311 update the remote store based on the destination path."""
312 312 if 'lfs' in repo.requirements:
313 313 # If the remote peer is for a local repo, the requirement tests in the
314 314 # base class method enforce lfs support. Otherwise, some revisions in
315 315 # this repo use lfs, and the remote repo needs the extension loaded.
316 316 if not remote.local() and not remote.capable('lfs'):
317 317 # This is a copy of the message in exchange.push() when requirements
318 318 # are missing between local repos.
319 319 m = _("required features are not supported in the destination: %s")
320 320 raise error.Abort(m % 'lfs',
321 321 hint=_('enable the lfs extension on the server'))
322 322
323 323 # Repositories where this extension is disabled won't have the field.
324 324 # But if there's a requirement, then the extension must be loaded AND
325 325 # there may be blobs to push.
326 326 remotestore = repo.svfs.lfsremoteblobstore
327 327 try:
328 328 repo.svfs.lfsremoteblobstore = blobstore.remote(repo, remote.url())
329 329 return orig(repo, remote, *args, **kwargs)
330 330 finally:
331 331 repo.svfs.lfsremoteblobstore = remotestore
332 332 else:
333 333 return orig(repo, remote, *args, **kwargs)
334 334
335 335 def writenewbundle(orig, ui, repo, source, filename, bundletype, outgoing,
336 336 *args, **kwargs):
337 337 """upload LFS blobs added by outgoing revisions on 'hg bundle'"""
338 338 uploadblobsfromrevs(repo, outgoing.missing)
339 339 return orig(ui, repo, source, filename, bundletype, outgoing, *args,
340 340 **kwargs)
341 341
342 342 def extractpointers(repo, revs):
343 343 """return a list of lfs pointers added by given revs"""
344 344 repo.ui.debug('lfs: computing set of blobs to upload\n')
345 345 pointers = {}
346 for r in revs:
347 ctx = repo[r]
348 for p in pointersfromctx(ctx).values():
349 pointers[p.oid()] = p
350 return sorted(pointers.values())
346
347 progress = repo.ui.makeprogress(_('lfs search'), _('changesets'), len(revs))
348
349 try:
350 for r in revs:
351 ctx = repo[r]
352 for p in pointersfromctx(ctx).values():
353 pointers[p.oid()] = p
354 progress.increment()
355 return sorted(pointers.values())
356 finally:
357 progress.complete()
351 358
352 359 def pointerfromctx(ctx, f, removed=False):
353 360 """return a pointer for the named file from the given changectx, or None if
354 361 the file isn't LFS.
355 362
356 363 Optionally, the pointer for a file deleted from the context can be returned.
357 364 Since no such pointer is actually stored, and to distinguish from a non LFS
358 365 file, this pointer is represented by an empty dict.
359 366 """
360 367 _ctx = ctx
361 368 if f not in ctx:
362 369 if not removed:
363 370 return None
364 371 if f in ctx.p1():
365 372 _ctx = ctx.p1()
366 373 elif f in ctx.p2():
367 374 _ctx = ctx.p2()
368 375 else:
369 376 return None
370 377 fctx = _ctx[f]
371 378 if not _islfs(fctx.filelog(), fctx.filenode()):
372 379 return None
373 380 try:
374 381 p = pointer.deserialize(fctx.rawdata())
375 382 if ctx == _ctx:
376 383 return p
377 384 return {}
378 385 except pointer.InvalidPointer as ex:
379 386 raise error.Abort(_('lfs: corrupted pointer (%s@%s): %s\n')
380 387 % (f, short(_ctx.node()), ex))
381 388
382 389 def pointersfromctx(ctx, removed=False):
383 390 """return a dict {path: pointer} for given single changectx.
384 391
385 392 If ``removed`` == True and the LFS file was removed from ``ctx``, the value
386 393 stored for the path is an empty dict.
387 394 """
388 395 result = {}
389 396 for f in ctx.files():
390 397 p = pointerfromctx(ctx, f, removed=removed)
391 398 if p is not None:
392 399 result[f] = p
393 400 return result
394 401
395 402 def uploadblobs(repo, pointers):
396 403 """upload given pointers from local blobstore"""
397 404 if not pointers:
398 405 return
399 406
400 407 remoteblob = repo.svfs.lfsremoteblobstore
401 408 remoteblob.writebatch(pointers, repo.svfs.lfslocalblobstore)
402 409
403 410 def upgradefinishdatamigration(orig, ui, srcrepo, dstrepo, requirements):
404 411 orig(ui, srcrepo, dstrepo, requirements)
405 412
406 413 # Skip if this hasn't been passed to reposetup()
407 414 if (util.safehasattr(srcrepo.svfs, 'lfslocalblobstore') and
408 415 util.safehasattr(dstrepo.svfs, 'lfslocalblobstore')):
409 416 srclfsvfs = srcrepo.svfs.lfslocalblobstore.vfs
410 417 dstlfsvfs = dstrepo.svfs.lfslocalblobstore.vfs
411 418
412 419 for dirpath, dirs, files in srclfsvfs.walk():
413 420 for oid in files:
414 421 ui.write(_('copying lfs blob %s\n') % oid)
415 422 lfutil.link(srclfsvfs.join(oid), dstlfsvfs.join(oid))
416 423
417 424 def upgraderequirements(orig, repo):
418 425 reqs = orig(repo)
419 426 if 'lfs' in repo.requirements:
420 427 reqs.add('lfs')
421 428 return reqs
General Comments 0
You need to be logged in to leave comments. Login now