##// END OF EJS Templates
largefiles: avoid walking full manifest...
Martin von Zweigbergk -
r41445:4a409c19 default
parent child Browse files
Show More
@@ -1,608 +1,605 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''High-level command function for lfconvert, plus the cmdtable.'''
10 10 from __future__ import absolute_import
11 11
12 12 import errno
13 13 import hashlib
14 14 import os
15 15 import shutil
16 16
17 17 from mercurial.i18n import _
18 18
19 19 from mercurial import (
20 20 cmdutil,
21 21 context,
22 22 error,
23 23 exthelper,
24 24 hg,
25 25 lock,
26 26 match as matchmod,
27 27 node,
28 28 pycompat,
29 29 scmutil,
30 30 util,
31 31 )
32 32
33 33 from ..convert import (
34 34 convcmd,
35 35 filemap,
36 36 )
37 37
38 38 from . import (
39 39 lfutil,
40 40 storefactory
41 41 )
42 42
43 43 release = lock.release
44 44
45 45 # -- Commands ----------------------------------------------------------
46 46
47 47 eh = exthelper.exthelper()
48 48
49 49 @eh.command('lfconvert',
50 50 [('s', 'size', '',
51 51 _('minimum size (MB) for files to be converted as largefiles'), 'SIZE'),
52 52 ('', 'to-normal', False,
53 53 _('convert from a largefiles repo to a normal repo')),
54 54 ],
55 55 _('hg lfconvert SOURCE DEST [FILE ...]'),
56 56 norepo=True,
57 57 inferrepo=True)
58 58 def lfconvert(ui, src, dest, *pats, **opts):
59 59 '''convert a normal repository to a largefiles repository
60 60
61 61 Convert repository SOURCE to a new repository DEST, identical to
62 62 SOURCE except that certain files will be converted as largefiles:
63 63 specifically, any file that matches any PATTERN *or* whose size is
64 64 above the minimum size threshold is converted as a largefile. The
65 65 size used to determine whether or not to track a file as a
66 66 largefile is the size of the first version of the file. The
67 67 minimum size can be specified either with --size or in
68 68 configuration as ``largefiles.size``.
69 69
70 70 After running this command you will need to make sure that
71 71 largefiles is enabled anywhere you intend to push the new
72 72 repository.
73 73
74 74 Use --to-normal to convert largefiles back to normal files; after
75 75 this, the DEST repository can be used without largefiles at all.'''
76 76
77 77 opts = pycompat.byteskwargs(opts)
78 78 if opts['to_normal']:
79 79 tolfile = False
80 80 else:
81 81 tolfile = True
82 82 size = lfutil.getminsize(ui, True, opts.get('size'), default=None)
83 83
84 84 if not hg.islocal(src):
85 85 raise error.Abort(_('%s is not a local Mercurial repo') % src)
86 86 if not hg.islocal(dest):
87 87 raise error.Abort(_('%s is not a local Mercurial repo') % dest)
88 88
89 89 rsrc = hg.repository(ui, src)
90 90 ui.status(_('initializing destination %s\n') % dest)
91 91 rdst = hg.repository(ui, dest, create=True)
92 92
93 93 success = False
94 94 dstwlock = dstlock = None
95 95 try:
96 96 # Get a list of all changesets in the source. The easy way to do this
97 97 # is to simply walk the changelog, using changelog.nodesbetween().
98 98 # Take a look at mercurial/revlog.py:639 for more details.
99 99 # Use a generator instead of a list to decrease memory usage
100 100 ctxs = (rsrc[ctx] for ctx in rsrc.changelog.nodesbetween(None,
101 101 rsrc.heads())[0])
102 102 revmap = {node.nullid: node.nullid}
103 103 if tolfile:
104 104 # Lock destination to prevent modification while it is converted to.
105 105 # Don't need to lock src because we are just reading from its
106 106 # history which can't change.
107 107 dstwlock = rdst.wlock()
108 108 dstlock = rdst.lock()
109 109
110 110 lfiles = set()
111 111 normalfiles = set()
112 112 if not pats:
113 113 pats = ui.configlist(lfutil.longname, 'patterns')
114 114 if pats:
115 115 matcher = matchmod.match(rsrc.root, '', list(pats))
116 116 else:
117 117 matcher = None
118 118
119 119 lfiletohash = {}
120 120 with ui.makeprogress(_('converting revisions'),
121 121 unit=_('revisions'),
122 122 total=rsrc['tip'].rev()) as progress:
123 123 for ctx in ctxs:
124 124 progress.update(ctx.rev())
125 125 _lfconvert_addchangeset(rsrc, rdst, ctx, revmap,
126 126 lfiles, normalfiles, matcher, size, lfiletohash)
127 127
128 128 if rdst.wvfs.exists(lfutil.shortname):
129 129 rdst.wvfs.rmtree(lfutil.shortname)
130 130
131 131 for f in lfiletohash.keys():
132 132 if rdst.wvfs.isfile(f):
133 133 rdst.wvfs.unlink(f)
134 134 try:
135 135 rdst.wvfs.removedirs(rdst.wvfs.dirname(f))
136 136 except OSError:
137 137 pass
138 138
139 139 # If there were any files converted to largefiles, add largefiles
140 140 # to the destination repository's requirements.
141 141 if lfiles:
142 142 rdst.requirements.add('largefiles')
143 143 rdst._writerequirements()
144 144 else:
145 145 class lfsource(filemap.filemap_source):
146 146 def __init__(self, ui, source):
147 147 super(lfsource, self).__init__(ui, source, None)
148 148 self.filemapper.rename[lfutil.shortname] = '.'
149 149
150 150 def getfile(self, name, rev):
151 151 realname, realrev = rev
152 152 f = super(lfsource, self).getfile(name, rev)
153 153
154 154 if (not realname.startswith(lfutil.shortnameslash)
155 155 or f[0] is None):
156 156 return f
157 157
158 158 # Substitute in the largefile data for the hash
159 159 hash = f[0].strip()
160 160 path = lfutil.findfile(rsrc, hash)
161 161
162 162 if path is None:
163 163 raise error.Abort(_("missing largefile for '%s' in %s")
164 164 % (realname, realrev))
165 165 return util.readfile(path), f[1]
166 166
167 167 class converter(convcmd.converter):
168 168 def __init__(self, ui, source, dest, revmapfile, opts):
169 169 src = lfsource(ui, source)
170 170
171 171 super(converter, self).__init__(ui, src, dest, revmapfile,
172 172 opts)
173 173
174 174 found, missing = downloadlfiles(ui, rsrc)
175 175 if missing != 0:
176 176 raise error.Abort(_("all largefiles must be present locally"))
177 177
178 178 orig = convcmd.converter
179 179 convcmd.converter = converter
180 180
181 181 try:
182 182 convcmd.convert(ui, src, dest, source_type='hg', dest_type='hg')
183 183 finally:
184 184 convcmd.converter = orig
185 185 success = True
186 186 finally:
187 187 if tolfile:
188 188 rdst.dirstate.clear()
189 189 release(dstlock, dstwlock)
190 190 if not success:
191 191 # we failed, remove the new directory
192 192 shutil.rmtree(rdst.root)
193 193
194 194 def _lfconvert_addchangeset(rsrc, rdst, ctx, revmap, lfiles, normalfiles,
195 195 matcher, size, lfiletohash):
196 196 # Convert src parents to dst parents
197 197 parents = _convertparents(ctx, revmap)
198 198
199 199 # Generate list of changed files
200 200 files = _getchangedfiles(ctx, parents)
201 201
202 202 dstfiles = []
203 203 for f in files:
204 204 if f not in lfiles and f not in normalfiles:
205 205 islfile = _islfile(f, ctx, matcher, size)
206 206 # If this file was renamed or copied then copy
207 207 # the largefile-ness of its predecessor
208 208 if f in ctx.manifest():
209 209 fctx = ctx.filectx(f)
210 210 renamed = fctx.renamed()
211 211 if renamed is None:
212 212 # the code below assumes renamed to be a boolean or a list
213 213 # and won't quite work with the value None
214 214 renamed = False
215 215 renamedlfile = renamed and renamed[0] in lfiles
216 216 islfile |= renamedlfile
217 217 if 'l' in fctx.flags():
218 218 if renamedlfile:
219 219 raise error.Abort(
220 220 _('renamed/copied largefile %s becomes symlink')
221 221 % f)
222 222 islfile = False
223 223 if islfile:
224 224 lfiles.add(f)
225 225 else:
226 226 normalfiles.add(f)
227 227
228 228 if f in lfiles:
229 229 fstandin = lfutil.standin(f)
230 230 dstfiles.append(fstandin)
231 231 # largefile in manifest if it has not been removed/renamed
232 232 if f in ctx.manifest():
233 233 fctx = ctx.filectx(f)
234 234 if 'l' in fctx.flags():
235 235 renamed = fctx.renamed()
236 236 if renamed and renamed[0] in lfiles:
237 237 raise error.Abort(_('largefile %s becomes symlink') % f)
238 238
239 239 # largefile was modified, update standins
240 240 m = hashlib.sha1('')
241 241 m.update(ctx[f].data())
242 242 hash = node.hex(m.digest())
243 243 if f not in lfiletohash or lfiletohash[f] != hash:
244 244 rdst.wwrite(f, ctx[f].data(), ctx[f].flags())
245 245 executable = 'x' in ctx[f].flags()
246 246 lfutil.writestandin(rdst, fstandin, hash,
247 247 executable)
248 248 lfiletohash[f] = hash
249 249 else:
250 250 # normal file
251 251 dstfiles.append(f)
252 252
253 253 def getfilectx(repo, memctx, f):
254 254 srcfname = lfutil.splitstandin(f)
255 255 if srcfname is not None:
256 256 # if the file isn't in the manifest then it was removed
257 257 # or renamed, return None to indicate this
258 258 try:
259 259 fctx = ctx.filectx(srcfname)
260 260 except error.LookupError:
261 261 return None
262 262 renamed = fctx.renamed()
263 263 if renamed:
264 264 # standin is always a largefile because largefile-ness
265 265 # doesn't change after rename or copy
266 266 renamed = lfutil.standin(renamed[0])
267 267
268 268 return context.memfilectx(repo, memctx, f,
269 269 lfiletohash[srcfname] + '\n',
270 270 'l' in fctx.flags(), 'x' in fctx.flags(),
271 271 renamed)
272 272 else:
273 273 return _getnormalcontext(repo, ctx, f, revmap)
274 274
275 275 # Commit
276 276 _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap)
277 277
278 278 def _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap):
279 279 mctx = context.memctx(rdst, parents, ctx.description(), dstfiles,
280 280 getfilectx, ctx.user(), ctx.date(), ctx.extra())
281 281 ret = rdst.commitctx(mctx)
282 282 lfutil.copyalltostore(rdst, ret)
283 283 rdst.setparents(ret)
284 284 revmap[ctx.node()] = rdst.changelog.tip()
285 285
286 286 # Generate list of changed files
287 287 def _getchangedfiles(ctx, parents):
288 288 files = set(ctx.files())
289 289 if node.nullid not in parents:
290 290 mc = ctx.manifest()
291 mp1 = ctx.p1().manifest()
292 mp2 = ctx.p2().manifest()
293 files |= (set(mp1) | set(mp2)) - set(mc)
294 for f in mc:
295 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
296 files.add(f)
291 for pctx in ctx.parents():
292 for fn in pctx.manifest().diff(mc):
293 files.add(fn)
297 294 return files
298 295
299 296 # Convert src parents to dst parents
300 297 def _convertparents(ctx, revmap):
301 298 parents = []
302 299 for p in ctx.parents():
303 300 parents.append(revmap[p.node()])
304 301 while len(parents) < 2:
305 302 parents.append(node.nullid)
306 303 return parents
307 304
308 305 # Get memfilectx for a normal file
309 306 def _getnormalcontext(repo, ctx, f, revmap):
310 307 try:
311 308 fctx = ctx.filectx(f)
312 309 except error.LookupError:
313 310 return None
314 311 renamed = fctx.renamed()
315 312 if renamed:
316 313 renamed = renamed[0]
317 314
318 315 data = fctx.data()
319 316 if f == '.hgtags':
320 317 data = _converttags (repo.ui, revmap, data)
321 318 return context.memfilectx(repo, ctx, f, data, 'l' in fctx.flags(),
322 319 'x' in fctx.flags(), renamed)
323 320
324 321 # Remap tag data using a revision map
325 322 def _converttags(ui, revmap, data):
326 323 newdata = []
327 324 for line in data.splitlines():
328 325 try:
329 326 id, name = line.split(' ', 1)
330 327 except ValueError:
331 328 ui.warn(_('skipping incorrectly formatted tag %s\n')
332 329 % line)
333 330 continue
334 331 try:
335 332 newid = node.bin(id)
336 333 except TypeError:
337 334 ui.warn(_('skipping incorrectly formatted id %s\n')
338 335 % id)
339 336 continue
340 337 try:
341 338 newdata.append('%s %s\n' % (node.hex(revmap[newid]),
342 339 name))
343 340 except KeyError:
344 341 ui.warn(_('no mapping for id %s\n') % id)
345 342 continue
346 343 return ''.join(newdata)
347 344
348 345 def _islfile(file, ctx, matcher, size):
349 346 '''Return true if file should be considered a largefile, i.e.
350 347 matcher matches it or it is larger than size.'''
351 348 # never store special .hg* files as largefiles
352 349 if file == '.hgtags' or file == '.hgignore' or file == '.hgsigs':
353 350 return False
354 351 if matcher and matcher(file):
355 352 return True
356 353 try:
357 354 return ctx.filectx(file).size() >= size * 1024 * 1024
358 355 except error.LookupError:
359 356 return False
360 357
361 358 def uploadlfiles(ui, rsrc, rdst, files):
362 359 '''upload largefiles to the central store'''
363 360
364 361 if not files:
365 362 return
366 363
367 364 store = storefactory.openstore(rsrc, rdst, put=True)
368 365
369 366 at = 0
370 367 ui.debug("sending statlfile command for %d largefiles\n" % len(files))
371 368 retval = store.exists(files)
372 369 files = [h for h in files if not retval[h]]
373 370 ui.debug("%d largefiles need to be uploaded\n" % len(files))
374 371
375 372 with ui.makeprogress(_('uploading largefiles'), unit=_('files'),
376 373 total=len(files)) as progress:
377 374 for hash in files:
378 375 progress.update(at)
379 376 source = lfutil.findfile(rsrc, hash)
380 377 if not source:
381 378 raise error.Abort(_('largefile %s missing from store'
382 379 ' (needs to be uploaded)') % hash)
383 380 # XXX check for errors here
384 381 store.put(source, hash)
385 382 at += 1
386 383
387 384 def verifylfiles(ui, repo, all=False, contents=False):
388 385 '''Verify that every largefile revision in the current changeset
389 386 exists in the central store. With --contents, also verify that
390 387 the contents of each local largefile file revision are correct (SHA-1 hash
391 388 matches the revision ID). With --all, check every changeset in
392 389 this repository.'''
393 390 if all:
394 391 revs = repo.revs('all()')
395 392 else:
396 393 revs = ['.']
397 394
398 395 store = storefactory.openstore(repo)
399 396 return store.verify(revs, contents=contents)
400 397
401 398 def cachelfiles(ui, repo, node, filelist=None):
402 399 '''cachelfiles ensures that all largefiles needed by the specified revision
403 400 are present in the repository's largefile cache.
404 401
405 402 returns a tuple (cached, missing). cached is the list of files downloaded
406 403 by this operation; missing is the list of files that were needed but could
407 404 not be found.'''
408 405 lfiles = lfutil.listlfiles(repo, node)
409 406 if filelist:
410 407 lfiles = set(lfiles) & set(filelist)
411 408 toget = []
412 409
413 410 ctx = repo[node]
414 411 for lfile in lfiles:
415 412 try:
416 413 expectedhash = lfutil.readasstandin(ctx[lfutil.standin(lfile)])
417 414 except IOError as err:
418 415 if err.errno == errno.ENOENT:
419 416 continue # node must be None and standin wasn't found in wctx
420 417 raise
421 418 if not lfutil.findfile(repo, expectedhash):
422 419 toget.append((lfile, expectedhash))
423 420
424 421 if toget:
425 422 store = storefactory.openstore(repo)
426 423 ret = store.get(toget)
427 424 return ret
428 425
429 426 return ([], [])
430 427
431 428 def downloadlfiles(ui, repo, rev=None):
432 429 match = scmutil.match(repo[None], [repo.wjoin(lfutil.shortname)], {})
433 430 def prepare(ctx, fns):
434 431 pass
435 432 totalsuccess = 0
436 433 totalmissing = 0
437 434 if rev != []: # walkchangerevs on empty list would return all revs
438 435 for ctx in cmdutil.walkchangerevs(repo, match, {'rev' : rev},
439 436 prepare):
440 437 success, missing = cachelfiles(ui, repo, ctx.node())
441 438 totalsuccess += len(success)
442 439 totalmissing += len(missing)
443 440 ui.status(_("%d additional largefiles cached\n") % totalsuccess)
444 441 if totalmissing > 0:
445 442 ui.status(_("%d largefiles failed to download\n") % totalmissing)
446 443 return totalsuccess, totalmissing
447 444
448 445 def updatelfiles(ui, repo, filelist=None, printmessage=None,
449 446 normallookup=False):
450 447 '''Update largefiles according to standins in the working directory
451 448
452 449 If ``printmessage`` is other than ``None``, it means "print (or
453 450 ignore, for false) message forcibly".
454 451 '''
455 452 statuswriter = lfutil.getstatuswriter(ui, repo, printmessage)
456 453 with repo.wlock():
457 454 lfdirstate = lfutil.openlfdirstate(ui, repo)
458 455 lfiles = set(lfutil.listlfiles(repo)) | set(lfdirstate)
459 456
460 457 if filelist is not None:
461 458 filelist = set(filelist)
462 459 lfiles = [f for f in lfiles if f in filelist]
463 460
464 461 update = {}
465 462 dropped = set()
466 463 updated, removed = 0, 0
467 464 wvfs = repo.wvfs
468 465 wctx = repo[None]
469 466 for lfile in lfiles:
470 467 rellfile = lfile
471 468 rellfileorig = os.path.relpath(
472 469 scmutil.origpath(ui, repo, wvfs.join(rellfile)),
473 470 start=repo.root)
474 471 relstandin = lfutil.standin(lfile)
475 472 relstandinorig = os.path.relpath(
476 473 scmutil.origpath(ui, repo, wvfs.join(relstandin)),
477 474 start=repo.root)
478 475 if wvfs.exists(relstandin):
479 476 if (wvfs.exists(relstandinorig) and
480 477 wvfs.exists(rellfile)):
481 478 shutil.copyfile(wvfs.join(rellfile),
482 479 wvfs.join(rellfileorig))
483 480 wvfs.unlinkpath(relstandinorig)
484 481 expecthash = lfutil.readasstandin(wctx[relstandin])
485 482 if expecthash != '':
486 483 if lfile not in wctx: # not switched to normal file
487 484 if repo.dirstate[relstandin] != '?':
488 485 wvfs.unlinkpath(rellfile, ignoremissing=True)
489 486 else:
490 487 dropped.add(rellfile)
491 488
492 489 # use normallookup() to allocate an entry in largefiles
493 490 # dirstate to prevent lfilesrepo.status() from reporting
494 491 # missing files as removed.
495 492 lfdirstate.normallookup(lfile)
496 493 update[lfile] = expecthash
497 494 else:
498 495 # Remove lfiles for which the standin is deleted, unless the
499 496 # lfile is added to the repository again. This happens when a
500 497 # largefile is converted back to a normal file: the standin
501 498 # disappears, but a new (normal) file appears as the lfile.
502 499 if (wvfs.exists(rellfile) and
503 500 repo.dirstate.normalize(lfile) not in wctx):
504 501 wvfs.unlinkpath(rellfile)
505 502 removed += 1
506 503
507 504 # largefile processing might be slow and be interrupted - be prepared
508 505 lfdirstate.write()
509 506
510 507 if lfiles:
511 508 lfiles = [f for f in lfiles if f not in dropped]
512 509
513 510 for f in dropped:
514 511 repo.wvfs.unlinkpath(lfutil.standin(f))
515 512
516 513 # This needs to happen for dropped files, otherwise they stay in
517 514 # the M state.
518 515 lfutil.synclfdirstate(repo, lfdirstate, f, normallookup)
519 516
520 517 statuswriter(_('getting changed largefiles\n'))
521 518 cachelfiles(ui, repo, None, lfiles)
522 519
523 520 for lfile in lfiles:
524 521 update1 = 0
525 522
526 523 expecthash = update.get(lfile)
527 524 if expecthash:
528 525 if not lfutil.copyfromcache(repo, expecthash, lfile):
529 526 # failed ... but already removed and set to normallookup
530 527 continue
531 528 # Synchronize largefile dirstate to the last modified
532 529 # time of the file
533 530 lfdirstate.normal(lfile)
534 531 update1 = 1
535 532
536 533 # copy the exec mode of largefile standin from the repository's
537 534 # dirstate to its state in the lfdirstate.
538 535 rellfile = lfile
539 536 relstandin = lfutil.standin(lfile)
540 537 if wvfs.exists(relstandin):
541 538 # exec is decided by the users permissions using mask 0o100
542 539 standinexec = wvfs.stat(relstandin).st_mode & 0o100
543 540 st = wvfs.stat(rellfile)
544 541 mode = st.st_mode
545 542 if standinexec != mode & 0o100:
546 543 # first remove all X bits, then shift all R bits to X
547 544 mode &= ~0o111
548 545 if standinexec:
549 546 mode |= (mode >> 2) & 0o111 & ~util.umask
550 547 wvfs.chmod(rellfile, mode)
551 548 update1 = 1
552 549
553 550 updated += update1
554 551
555 552 lfutil.synclfdirstate(repo, lfdirstate, lfile, normallookup)
556 553
557 554 lfdirstate.write()
558 555 if lfiles:
559 556 statuswriter(_('%d largefiles updated, %d removed\n') % (updated,
560 557 removed))
561 558
562 559 @eh.command('lfpull',
563 560 [('r', 'rev', [], _('pull largefiles for these revisions'))
564 561 ] + cmdutil.remoteopts,
565 562 _('-r REV... [-e CMD] [--remotecmd CMD] [SOURCE]'))
566 563 def lfpull(ui, repo, source="default", **opts):
567 564 """pull largefiles for the specified revisions from the specified source
568 565
569 566 Pull largefiles that are referenced from local changesets but missing
570 567 locally, pulling from a remote repository to the local cache.
571 568
572 569 If SOURCE is omitted, the 'default' path will be used.
573 570 See :hg:`help urls` for more information.
574 571
575 572 .. container:: verbose
576 573
577 574 Some examples:
578 575
579 576 - pull largefiles for all branch heads::
580 577
581 578 hg lfpull -r "head() and not closed()"
582 579
583 580 - pull largefiles on the default branch::
584 581
585 582 hg lfpull -r "branch(default)"
586 583 """
587 584 repo.lfpullsource = source
588 585
589 586 revs = opts.get(r'rev', [])
590 587 if not revs:
591 588 raise error.Abort(_('no revisions specified'))
592 589 revs = scmutil.revrange(repo, revs)
593 590
594 591 numcached = 0
595 592 for rev in revs:
596 593 ui.note(_('pulling largefiles for revision %d\n') % rev)
597 594 (cached, missing) = cachelfiles(ui, repo, rev)
598 595 numcached += len(cached)
599 596 ui.status(_("%d largefiles cached\n") % numcached)
600 597
601 598 @eh.command('debuglfput',
602 599 [] + cmdutil.remoteopts,
603 600 _('FILE'))
604 601 def debuglfput(ui, repo, filepath, **kwargs):
605 602 hash = lfutil.hashfile(filepath)
606 603 storefactory.openstore(repo).put(filepath, hash)
607 604 ui.write('%s\n' % hash)
608 605 return 0
General Comments 0
You need to be logged in to leave comments. Login now