##// END OF EJS Templates
largefiles: use util.readfile in lfconvert
Bryan O'Sullivan -
r27774:8ceaaf63 default
parent child Browse files
Show More
@@ -1,552 +1,547
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''High-level command function for lfconvert, plus the cmdtable.'''
10 10
11 11 import os, errno
12 12 import shutil
13 13
14 14 from mercurial import util, match as match_, hg, node, context, error, \
15 15 cmdutil, scmutil, commands
16 16 from mercurial.i18n import _
17 17 from mercurial.lock import release
18 18
19 19 from hgext.convert import convcmd
20 20 from hgext.convert import filemap
21 21
22 22 import lfutil
23 23 import basestore
24 24
25 25 # -- Commands ----------------------------------------------------------
26 26
27 27 cmdtable = {}
28 28 command = cmdutil.command(cmdtable)
29 29
30 30 @command('lfconvert',
31 31 [('s', 'size', '',
32 32 _('minimum size (MB) for files to be converted as largefiles'), 'SIZE'),
33 33 ('', 'to-normal', False,
34 34 _('convert from a largefiles repo to a normal repo')),
35 35 ],
36 36 _('hg lfconvert SOURCE DEST [FILE ...]'),
37 37 norepo=True,
38 38 inferrepo=True)
39 39 def lfconvert(ui, src, dest, *pats, **opts):
40 40 '''convert a normal repository to a largefiles repository
41 41
42 42 Convert repository SOURCE to a new repository DEST, identical to
43 43 SOURCE except that certain files will be converted as largefiles:
44 44 specifically, any file that matches any PATTERN *or* whose size is
45 45 above the minimum size threshold is converted as a largefile. The
46 46 size used to determine whether or not to track a file as a
47 47 largefile is the size of the first version of the file. The
48 48 minimum size can be specified either with --size or in
49 49 configuration as ``largefiles.size``.
50 50
51 51 After running this command you will need to make sure that
52 52 largefiles is enabled anywhere you intend to push the new
53 53 repository.
54 54
55 55 Use --to-normal to convert largefiles back to normal files; after
56 56 this, the DEST repository can be used without largefiles at all.'''
57 57
58 58 if opts['to_normal']:
59 59 tolfile = False
60 60 else:
61 61 tolfile = True
62 62 size = lfutil.getminsize(ui, True, opts.get('size'), default=None)
63 63
64 64 if not hg.islocal(src):
65 65 raise error.Abort(_('%s is not a local Mercurial repo') % src)
66 66 if not hg.islocal(dest):
67 67 raise error.Abort(_('%s is not a local Mercurial repo') % dest)
68 68
69 69 rsrc = hg.repository(ui, src)
70 70 ui.status(_('initializing destination %s\n') % dest)
71 71 rdst = hg.repository(ui, dest, create=True)
72 72
73 73 success = False
74 74 dstwlock = dstlock = None
75 75 try:
76 76 # Get a list of all changesets in the source. The easy way to do this
77 77 # is to simply walk the changelog, using changelog.nodesbetween().
78 78 # Take a look at mercurial/revlog.py:639 for more details.
79 79 # Use a generator instead of a list to decrease memory usage
80 80 ctxs = (rsrc[ctx] for ctx in rsrc.changelog.nodesbetween(None,
81 81 rsrc.heads())[0])
82 82 revmap = {node.nullid: node.nullid}
83 83 if tolfile:
84 84 # Lock destination to prevent modification while it is converted to.
85 85 # Don't need to lock src because we are just reading from its
86 86 # history which can't change.
87 87 dstwlock = rdst.wlock()
88 88 dstlock = rdst.lock()
89 89
90 90 lfiles = set()
91 91 normalfiles = set()
92 92 if not pats:
93 93 pats = ui.configlist(lfutil.longname, 'patterns', default=[])
94 94 if pats:
95 95 matcher = match_.match(rsrc.root, '', list(pats))
96 96 else:
97 97 matcher = None
98 98
99 99 lfiletohash = {}
100 100 for ctx in ctxs:
101 101 ui.progress(_('converting revisions'), ctx.rev(),
102 102 unit=_('revision'), total=rsrc['tip'].rev())
103 103 _lfconvert_addchangeset(rsrc, rdst, ctx, revmap,
104 104 lfiles, normalfiles, matcher, size, lfiletohash)
105 105 ui.progress(_('converting revisions'), None)
106 106
107 107 if os.path.exists(rdst.wjoin(lfutil.shortname)):
108 108 shutil.rmtree(rdst.wjoin(lfutil.shortname))
109 109
110 110 for f in lfiletohash.keys():
111 111 if os.path.isfile(rdst.wjoin(f)):
112 112 os.unlink(rdst.wjoin(f))
113 113 try:
114 114 os.removedirs(os.path.dirname(rdst.wjoin(f)))
115 115 except OSError:
116 116 pass
117 117
118 118 # If there were any files converted to largefiles, add largefiles
119 119 # to the destination repository's requirements.
120 120 if lfiles:
121 121 rdst.requirements.add('largefiles')
122 122 rdst._writerequirements()
123 123 else:
124 124 class lfsource(filemap.filemap_source):
125 125 def __init__(self, ui, source):
126 126 super(lfsource, self).__init__(ui, source, None)
127 127 self.filemapper.rename[lfutil.shortname] = '.'
128 128
129 129 def getfile(self, name, rev):
130 130 realname, realrev = rev
131 131 f = super(lfsource, self).getfile(name, rev)
132 132
133 133 if (not realname.startswith(lfutil.shortnameslash)
134 134 or f[0] is None):
135 135 return f
136 136
137 137 # Substitute in the largefile data for the hash
138 138 hash = f[0].strip()
139 139 path = lfutil.findfile(rsrc, hash)
140 140
141 141 if path is None:
142 142 raise error.Abort(_("missing largefile for '%s' in %s")
143 143 % (realname, realrev))
144 fp = open(path, 'rb')
145
146 try:
147 return (fp.read(), f[1])
148 finally:
149 fp.close()
144 return util.readfile(path), f[1]
150 145
151 146 class converter(convcmd.converter):
152 147 def __init__(self, ui, source, dest, revmapfile, opts):
153 148 src = lfsource(ui, source)
154 149
155 150 super(converter, self).__init__(ui, src, dest, revmapfile,
156 151 opts)
157 152
158 153 found, missing = downloadlfiles(ui, rsrc)
159 154 if missing != 0:
160 155 raise error.Abort(_("all largefiles must be present locally"))
161 156
162 157 orig = convcmd.converter
163 158 convcmd.converter = converter
164 159
165 160 try:
166 161 convcmd.convert(ui, src, dest)
167 162 finally:
168 163 convcmd.converter = orig
169 164 success = True
170 165 finally:
171 166 if tolfile:
172 167 rdst.dirstate.clear()
173 168 release(dstlock, dstwlock)
174 169 if not success:
175 170 # we failed, remove the new directory
176 171 shutil.rmtree(rdst.root)
177 172
178 173 def _lfconvert_addchangeset(rsrc, rdst, ctx, revmap, lfiles, normalfiles,
179 174 matcher, size, lfiletohash):
180 175 # Convert src parents to dst parents
181 176 parents = _convertparents(ctx, revmap)
182 177
183 178 # Generate list of changed files
184 179 files = _getchangedfiles(ctx, parents)
185 180
186 181 dstfiles = []
187 182 for f in files:
188 183 if f not in lfiles and f not in normalfiles:
189 184 islfile = _islfile(f, ctx, matcher, size)
190 185 # If this file was renamed or copied then copy
191 186 # the largefile-ness of its predecessor
192 187 if f in ctx.manifest():
193 188 fctx = ctx.filectx(f)
194 189 renamed = fctx.renamed()
195 190 renamedlfile = renamed and renamed[0] in lfiles
196 191 islfile |= renamedlfile
197 192 if 'l' in fctx.flags():
198 193 if renamedlfile:
199 194 raise error.Abort(
200 195 _('renamed/copied largefile %s becomes symlink')
201 196 % f)
202 197 islfile = False
203 198 if islfile:
204 199 lfiles.add(f)
205 200 else:
206 201 normalfiles.add(f)
207 202
208 203 if f in lfiles:
209 204 dstfiles.append(lfutil.standin(f))
210 205 # largefile in manifest if it has not been removed/renamed
211 206 if f in ctx.manifest():
212 207 fctx = ctx.filectx(f)
213 208 if 'l' in fctx.flags():
214 209 renamed = fctx.renamed()
215 210 if renamed and renamed[0] in lfiles:
216 211 raise error.Abort(_('largefile %s becomes symlink') % f)
217 212
218 213 # largefile was modified, update standins
219 214 m = util.sha1('')
220 215 m.update(ctx[f].data())
221 216 hash = m.hexdigest()
222 217 if f not in lfiletohash or lfiletohash[f] != hash:
223 218 rdst.wwrite(f, ctx[f].data(), ctx[f].flags())
224 219 executable = 'x' in ctx[f].flags()
225 220 lfutil.writestandin(rdst, lfutil.standin(f), hash,
226 221 executable)
227 222 lfiletohash[f] = hash
228 223 else:
229 224 # normal file
230 225 dstfiles.append(f)
231 226
232 227 def getfilectx(repo, memctx, f):
233 228 if lfutil.isstandin(f):
234 229 # if the file isn't in the manifest then it was removed
235 230 # or renamed, raise IOError to indicate this
236 231 srcfname = lfutil.splitstandin(f)
237 232 try:
238 233 fctx = ctx.filectx(srcfname)
239 234 except error.LookupError:
240 235 return None
241 236 renamed = fctx.renamed()
242 237 if renamed:
243 238 # standin is always a largefile because largefile-ness
244 239 # doesn't change after rename or copy
245 240 renamed = lfutil.standin(renamed[0])
246 241
247 242 return context.memfilectx(repo, f, lfiletohash[srcfname] + '\n',
248 243 'l' in fctx.flags(), 'x' in fctx.flags(),
249 244 renamed)
250 245 else:
251 246 return _getnormalcontext(repo, ctx, f, revmap)
252 247
253 248 # Commit
254 249 _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap)
255 250
256 251 def _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap):
257 252 mctx = context.memctx(rdst, parents, ctx.description(), dstfiles,
258 253 getfilectx, ctx.user(), ctx.date(), ctx.extra())
259 254 ret = rdst.commitctx(mctx)
260 255 lfutil.copyalltostore(rdst, ret)
261 256 rdst.setparents(ret)
262 257 revmap[ctx.node()] = rdst.changelog.tip()
263 258
264 259 # Generate list of changed files
265 260 def _getchangedfiles(ctx, parents):
266 261 files = set(ctx.files())
267 262 if node.nullid not in parents:
268 263 mc = ctx.manifest()
269 264 mp1 = ctx.parents()[0].manifest()
270 265 mp2 = ctx.parents()[1].manifest()
271 266 files |= (set(mp1) | set(mp2)) - set(mc)
272 267 for f in mc:
273 268 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
274 269 files.add(f)
275 270 return files
276 271
277 272 # Convert src parents to dst parents
278 273 def _convertparents(ctx, revmap):
279 274 parents = []
280 275 for p in ctx.parents():
281 276 parents.append(revmap[p.node()])
282 277 while len(parents) < 2:
283 278 parents.append(node.nullid)
284 279 return parents
285 280
286 281 # Get memfilectx for a normal file
287 282 def _getnormalcontext(repo, ctx, f, revmap):
288 283 try:
289 284 fctx = ctx.filectx(f)
290 285 except error.LookupError:
291 286 return None
292 287 renamed = fctx.renamed()
293 288 if renamed:
294 289 renamed = renamed[0]
295 290
296 291 data = fctx.data()
297 292 if f == '.hgtags':
298 293 data = _converttags (repo.ui, revmap, data)
299 294 return context.memfilectx(repo, f, data, 'l' in fctx.flags(),
300 295 'x' in fctx.flags(), renamed)
301 296
302 297 # Remap tag data using a revision map
303 298 def _converttags(ui, revmap, data):
304 299 newdata = []
305 300 for line in data.splitlines():
306 301 try:
307 302 id, name = line.split(' ', 1)
308 303 except ValueError:
309 304 ui.warn(_('skipping incorrectly formatted tag %s\n')
310 305 % line)
311 306 continue
312 307 try:
313 308 newid = node.bin(id)
314 309 except TypeError:
315 310 ui.warn(_('skipping incorrectly formatted id %s\n')
316 311 % id)
317 312 continue
318 313 try:
319 314 newdata.append('%s %s\n' % (node.hex(revmap[newid]),
320 315 name))
321 316 except KeyError:
322 317 ui.warn(_('no mapping for id %s\n') % id)
323 318 continue
324 319 return ''.join(newdata)
325 320
326 321 def _islfile(file, ctx, matcher, size):
327 322 '''Return true if file should be considered a largefile, i.e.
328 323 matcher matches it or it is larger than size.'''
329 324 # never store special .hg* files as largefiles
330 325 if file == '.hgtags' or file == '.hgignore' or file == '.hgsigs':
331 326 return False
332 327 if matcher and matcher(file):
333 328 return True
334 329 try:
335 330 return ctx.filectx(file).size() >= size * 1024 * 1024
336 331 except error.LookupError:
337 332 return False
338 333
339 334 def uploadlfiles(ui, rsrc, rdst, files):
340 335 '''upload largefiles to the central store'''
341 336
342 337 if not files:
343 338 return
344 339
345 340 store = basestore._openstore(rsrc, rdst, put=True)
346 341
347 342 at = 0
348 343 ui.debug("sending statlfile command for %d largefiles\n" % len(files))
349 344 retval = store.exists(files)
350 345 files = filter(lambda h: not retval[h], files)
351 346 ui.debug("%d largefiles need to be uploaded\n" % len(files))
352 347
353 348 for hash in files:
354 349 ui.progress(_('uploading largefiles'), at, unit='largefile',
355 350 total=len(files))
356 351 source = lfutil.findfile(rsrc, hash)
357 352 if not source:
358 353 raise error.Abort(_('largefile %s missing from store'
359 354 ' (needs to be uploaded)') % hash)
360 355 # XXX check for errors here
361 356 store.put(source, hash)
362 357 at += 1
363 358 ui.progress(_('uploading largefiles'), None)
364 359
365 360 def verifylfiles(ui, repo, all=False, contents=False):
366 361 '''Verify that every largefile revision in the current changeset
367 362 exists in the central store. With --contents, also verify that
368 363 the contents of each local largefile file revision are correct (SHA-1 hash
369 364 matches the revision ID). With --all, check every changeset in
370 365 this repository.'''
371 366 if all:
372 367 revs = repo.revs('all()')
373 368 else:
374 369 revs = ['.']
375 370
376 371 store = basestore._openstore(repo)
377 372 return store.verify(revs, contents=contents)
378 373
379 374 def cachelfiles(ui, repo, node, filelist=None):
380 375 '''cachelfiles ensures that all largefiles needed by the specified revision
381 376 are present in the repository's largefile cache.
382 377
383 378 returns a tuple (cached, missing). cached is the list of files downloaded
384 379 by this operation; missing is the list of files that were needed but could
385 380 not be found.'''
386 381 lfiles = lfutil.listlfiles(repo, node)
387 382 if filelist:
388 383 lfiles = set(lfiles) & set(filelist)
389 384 toget = []
390 385
391 386 for lfile in lfiles:
392 387 try:
393 388 expectedhash = repo[node][lfutil.standin(lfile)].data().strip()
394 389 except IOError as err:
395 390 if err.errno == errno.ENOENT:
396 391 continue # node must be None and standin wasn't found in wctx
397 392 raise
398 393 if not lfutil.findfile(repo, expectedhash):
399 394 toget.append((lfile, expectedhash))
400 395
401 396 if toget:
402 397 store = basestore._openstore(repo)
403 398 ret = store.get(toget)
404 399 return ret
405 400
406 401 return ([], [])
407 402
408 403 def downloadlfiles(ui, repo, rev=None):
409 404 matchfn = scmutil.match(repo[None],
410 405 [repo.wjoin(lfutil.shortname)], {})
411 406 def prepare(ctx, fns):
412 407 pass
413 408 totalsuccess = 0
414 409 totalmissing = 0
415 410 if rev != []: # walkchangerevs on empty list would return all revs
416 411 for ctx in cmdutil.walkchangerevs(repo, matchfn, {'rev' : rev},
417 412 prepare):
418 413 success, missing = cachelfiles(ui, repo, ctx.node())
419 414 totalsuccess += len(success)
420 415 totalmissing += len(missing)
421 416 ui.status(_("%d additional largefiles cached\n") % totalsuccess)
422 417 if totalmissing > 0:
423 418 ui.status(_("%d largefiles failed to download\n") % totalmissing)
424 419 return totalsuccess, totalmissing
425 420
426 421 def updatelfiles(ui, repo, filelist=None, printmessage=None,
427 422 normallookup=False):
428 423 '''Update largefiles according to standins in the working directory
429 424
430 425 If ``printmessage`` is other than ``None``, it means "print (or
431 426 ignore, for false) message forcibly".
432 427 '''
433 428 statuswriter = lfutil.getstatuswriter(ui, repo, printmessage)
434 429 wlock = repo.wlock()
435 430 try:
436 431 lfdirstate = lfutil.openlfdirstate(ui, repo)
437 432 lfiles = set(lfutil.listlfiles(repo)) | set(lfdirstate)
438 433
439 434 if filelist is not None:
440 435 filelist = set(filelist)
441 436 lfiles = [f for f in lfiles if f in filelist]
442 437
443 438 update = {}
444 439 updated, removed = 0, 0
445 440 for lfile in lfiles:
446 441 abslfile = repo.wjoin(lfile)
447 442 abslfileorig = scmutil.origpath(ui, repo, abslfile)
448 443 absstandin = repo.wjoin(lfutil.standin(lfile))
449 444 absstandinorig = scmutil.origpath(ui, repo, absstandin)
450 445 if os.path.exists(absstandin):
451 446 if (os.path.exists(absstandinorig) and
452 447 os.path.exists(abslfile)):
453 448 shutil.copyfile(abslfile, abslfileorig)
454 449 util.unlinkpath(absstandinorig)
455 450 expecthash = lfutil.readstandin(repo, lfile)
456 451 if expecthash != '':
457 452 if lfile not in repo[None]: # not switched to normal file
458 453 util.unlinkpath(abslfile, ignoremissing=True)
459 454 # use normallookup() to allocate an entry in largefiles
460 455 # dirstate to prevent lfilesrepo.status() from reporting
461 456 # missing files as removed.
462 457 lfdirstate.normallookup(lfile)
463 458 update[lfile] = expecthash
464 459 else:
465 460 # Remove lfiles for which the standin is deleted, unless the
466 461 # lfile is added to the repository again. This happens when a
467 462 # largefile is converted back to a normal file: the standin
468 463 # disappears, but a new (normal) file appears as the lfile.
469 464 if (os.path.exists(abslfile) and
470 465 repo.dirstate.normalize(lfile) not in repo[None]):
471 466 util.unlinkpath(abslfile)
472 467 removed += 1
473 468
474 469 # largefile processing might be slow and be interrupted - be prepared
475 470 lfdirstate.write()
476 471
477 472 if lfiles:
478 473 statuswriter(_('getting changed largefiles\n'))
479 474 cachelfiles(ui, repo, None, lfiles)
480 475
481 476 for lfile in lfiles:
482 477 update1 = 0
483 478
484 479 expecthash = update.get(lfile)
485 480 if expecthash:
486 481 if not lfutil.copyfromcache(repo, expecthash, lfile):
487 482 # failed ... but already removed and set to normallookup
488 483 continue
489 484 # Synchronize largefile dirstate to the last modified
490 485 # time of the file
491 486 lfdirstate.normal(lfile)
492 487 update1 = 1
493 488
494 489 # copy the state of largefile standin from the repository's
495 490 # dirstate to its state in the lfdirstate.
496 491 abslfile = repo.wjoin(lfile)
497 492 absstandin = repo.wjoin(lfutil.standin(lfile))
498 493 if os.path.exists(absstandin):
499 494 mode = os.stat(absstandin).st_mode
500 495 if mode != os.stat(abslfile).st_mode:
501 496 os.chmod(abslfile, mode)
502 497 update1 = 1
503 498
504 499 updated += update1
505 500
506 501 lfutil.synclfdirstate(repo, lfdirstate, lfile, normallookup)
507 502
508 503 lfdirstate.write()
509 504 if lfiles:
510 505 statuswriter(_('%d largefiles updated, %d removed\n') % (updated,
511 506 removed))
512 507 finally:
513 508 wlock.release()
514 509
515 510 @command('lfpull',
516 511 [('r', 'rev', [], _('pull largefiles for these revisions'))
517 512 ] + commands.remoteopts,
518 513 _('-r REV... [-e CMD] [--remotecmd CMD] [SOURCE]'))
519 514 def lfpull(ui, repo, source="default", **opts):
520 515 """pull largefiles for the specified revisions from the specified source
521 516
522 517 Pull largefiles that are referenced from local changesets but missing
523 518 locally, pulling from a remote repository to the local cache.
524 519
525 520 If SOURCE is omitted, the 'default' path will be used.
526 521 See :hg:`help urls` for more information.
527 522
528 523 .. container:: verbose
529 524
530 525 Some examples:
531 526
532 527 - pull largefiles for all branch heads::
533 528
534 529 hg lfpull -r "head() and not closed()"
535 530
536 531 - pull largefiles on the default branch::
537 532
538 533 hg lfpull -r "branch(default)"
539 534 """
540 535 repo.lfpullsource = source
541 536
542 537 revs = opts.get('rev', [])
543 538 if not revs:
544 539 raise error.Abort(_('no revisions specified'))
545 540 revs = scmutil.revrange(repo, revs)
546 541
547 542 numcached = 0
548 543 for rev in revs:
549 544 ui.note(_('pulling largefiles for revision %s\n') % rev)
550 545 (cached, missing) = cachelfiles(ui, repo, rev)
551 546 numcached += len(cached)
552 547 ui.status(_("%d largefiles cached\n") % numcached)
General Comments 0
You need to be logged in to leave comments. Login now