##// END OF EJS Templates
largefiles: don't store whole file in memory for 'cat'
Mads Kiilerich -
r18973:5f9019e6 default
parent child Browse files
Show More
@@ -1,564 +1,565 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''High-level command function for lfconvert, plus the cmdtable.'''
10 10
11 11 import os, errno
12 12 import shutil
13 13
14 14 from mercurial import util, match as match_, hg, node, context, error, \
15 15 cmdutil, scmutil, commands
16 16 from mercurial.i18n import _
17 17 from mercurial.lock import release
18 18
19 19 import lfutil
20 20 import basestore
21 21
22 22 # -- Commands ----------------------------------------------------------
23 23
24 24 def lfconvert(ui, src, dest, *pats, **opts):
25 25 '''convert a normal repository to a largefiles repository
26 26
27 27 Convert repository SOURCE to a new repository DEST, identical to
28 28 SOURCE except that certain files will be converted as largefiles:
29 29 specifically, any file that matches any PATTERN *or* whose size is
30 30 above the minimum size threshold is converted as a largefile. The
31 31 size used to determine whether or not to track a file as a
32 32 largefile is the size of the first version of the file. The
33 33 minimum size can be specified either with --size or in
34 34 configuration as ``largefiles.size``.
35 35
36 36 After running this command you will need to make sure that
37 37 largefiles is enabled anywhere you intend to push the new
38 38 repository.
39 39
40 40 Use --to-normal to convert largefiles back to normal files; after
41 41 this, the DEST repository can be used without largefiles at all.'''
42 42
43 43 if opts['to_normal']:
44 44 tolfile = False
45 45 else:
46 46 tolfile = True
47 47 size = lfutil.getminsize(ui, True, opts.get('size'), default=None)
48 48
49 49 if not hg.islocal(src):
50 50 raise util.Abort(_('%s is not a local Mercurial repo') % src)
51 51 if not hg.islocal(dest):
52 52 raise util.Abort(_('%s is not a local Mercurial repo') % dest)
53 53
54 54 rsrc = hg.repository(ui, src)
55 55 ui.status(_('initializing destination %s\n') % dest)
56 56 rdst = hg.repository(ui, dest, create=True)
57 57
58 58 success = False
59 59 dstwlock = dstlock = None
60 60 try:
61 61 # Lock destination to prevent modification while it is converted to.
62 62 # Don't need to lock src because we are just reading from its history
63 63 # which can't change.
64 64 dstwlock = rdst.wlock()
65 65 dstlock = rdst.lock()
66 66
67 67 # Get a list of all changesets in the source. The easy way to do this
68 68 # is to simply walk the changelog, using changelog.nodesbetween().
69 69 # Take a look at mercurial/revlog.py:639 for more details.
70 70 # Use a generator instead of a list to decrease memory usage
71 71 ctxs = (rsrc[ctx] for ctx in rsrc.changelog.nodesbetween(None,
72 72 rsrc.heads())[0])
73 73 revmap = {node.nullid: node.nullid}
74 74 if tolfile:
75 75 lfiles = set()
76 76 normalfiles = set()
77 77 if not pats:
78 78 pats = ui.configlist(lfutil.longname, 'patterns', default=[])
79 79 if pats:
80 80 matcher = match_.match(rsrc.root, '', list(pats))
81 81 else:
82 82 matcher = None
83 83
84 84 lfiletohash = {}
85 85 for ctx in ctxs:
86 86 ui.progress(_('converting revisions'), ctx.rev(),
87 87 unit=_('revision'), total=rsrc['tip'].rev())
88 88 _lfconvert_addchangeset(rsrc, rdst, ctx, revmap,
89 89 lfiles, normalfiles, matcher, size, lfiletohash)
90 90 ui.progress(_('converting revisions'), None)
91 91
92 92 if os.path.exists(rdst.wjoin(lfutil.shortname)):
93 93 shutil.rmtree(rdst.wjoin(lfutil.shortname))
94 94
95 95 for f in lfiletohash.keys():
96 96 if os.path.isfile(rdst.wjoin(f)):
97 97 os.unlink(rdst.wjoin(f))
98 98 try:
99 99 os.removedirs(os.path.dirname(rdst.wjoin(f)))
100 100 except OSError:
101 101 pass
102 102
103 103 # If there were any files converted to largefiles, add largefiles
104 104 # to the destination repository's requirements.
105 105 if lfiles:
106 106 rdst.requirements.add('largefiles')
107 107 rdst._writerequirements()
108 108 else:
109 109 for ctx in ctxs:
110 110 ui.progress(_('converting revisions'), ctx.rev(),
111 111 unit=_('revision'), total=rsrc['tip'].rev())
112 112 _addchangeset(ui, rsrc, rdst, ctx, revmap)
113 113
114 114 ui.progress(_('converting revisions'), None)
115 115 success = True
116 116 finally:
117 117 rdst.dirstate.clear()
118 118 release(dstlock, dstwlock)
119 119 if not success:
120 120 # we failed, remove the new directory
121 121 shutil.rmtree(rdst.root)
122 122
123 123 def _addchangeset(ui, rsrc, rdst, ctx, revmap):
124 124 # Convert src parents to dst parents
125 125 parents = _convertparents(ctx, revmap)
126 126
127 127 # Generate list of changed files
128 128 files = _getchangedfiles(ctx, parents)
129 129
130 130 def getfilectx(repo, memctx, f):
131 131 if lfutil.standin(f) in files:
132 132 # if the file isn't in the manifest then it was removed
133 133 # or renamed, raise IOError to indicate this
134 134 try:
135 135 fctx = ctx.filectx(lfutil.standin(f))
136 136 except error.LookupError:
137 137 raise IOError
138 138 renamed = fctx.renamed()
139 139 if renamed:
140 140 renamed = lfutil.splitstandin(renamed[0])
141 141
142 142 hash = fctx.data().strip()
143 143 path = lfutil.findfile(rsrc, hash)
144 144
145 145 # If one file is missing, likely all files from this rev are
146 146 if path is None:
147 147 cachelfiles(ui, rsrc, ctx.node())
148 148 path = lfutil.findfile(rsrc, hash)
149 149
150 150 if path is None:
151 151 raise util.Abort(
152 152 _("missing largefile \'%s\' from revision %s")
153 153 % (f, node.hex(ctx.node())))
154 154
155 155 data = ''
156 156 fd = None
157 157 try:
158 158 fd = open(path, 'rb')
159 159 data = fd.read()
160 160 finally:
161 161 if fd:
162 162 fd.close()
163 163 return context.memfilectx(f, data, 'l' in fctx.flags(),
164 164 'x' in fctx.flags(), renamed)
165 165 else:
166 166 return _getnormalcontext(repo.ui, ctx, f, revmap)
167 167
168 168 dstfiles = []
169 169 for file in files:
170 170 if lfutil.isstandin(file):
171 171 dstfiles.append(lfutil.splitstandin(file))
172 172 else:
173 173 dstfiles.append(file)
174 174 # Commit
175 175 _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap)
176 176
177 177 def _lfconvert_addchangeset(rsrc, rdst, ctx, revmap, lfiles, normalfiles,
178 178 matcher, size, lfiletohash):
179 179 # Convert src parents to dst parents
180 180 parents = _convertparents(ctx, revmap)
181 181
182 182 # Generate list of changed files
183 183 files = _getchangedfiles(ctx, parents)
184 184
185 185 dstfiles = []
186 186 for f in files:
187 187 if f not in lfiles and f not in normalfiles:
188 188 islfile = _islfile(f, ctx, matcher, size)
189 189 # If this file was renamed or copied then copy
190 190 # the largefile-ness of its predecessor
191 191 if f in ctx.manifest():
192 192 fctx = ctx.filectx(f)
193 193 renamed = fctx.renamed()
194 194 renamedlfile = renamed and renamed[0] in lfiles
195 195 islfile |= renamedlfile
196 196 if 'l' in fctx.flags():
197 197 if renamedlfile:
198 198 raise util.Abort(
199 199 _('renamed/copied largefile %s becomes symlink')
200 200 % f)
201 201 islfile = False
202 202 if islfile:
203 203 lfiles.add(f)
204 204 else:
205 205 normalfiles.add(f)
206 206
207 207 if f in lfiles:
208 208 dstfiles.append(lfutil.standin(f))
209 209 # largefile in manifest if it has not been removed/renamed
210 210 if f in ctx.manifest():
211 211 fctx = ctx.filectx(f)
212 212 if 'l' in fctx.flags():
213 213 renamed = fctx.renamed()
214 214 if renamed and renamed[0] in lfiles:
215 215 raise util.Abort(_('largefile %s becomes symlink') % f)
216 216
217 217 # largefile was modified, update standins
218 218 fullpath = rdst.wjoin(f)
219 219 util.makedirs(os.path.dirname(fullpath))
220 220 m = util.sha1('')
221 221 m.update(ctx[f].data())
222 222 hash = m.hexdigest()
223 223 if f not in lfiletohash or lfiletohash[f] != hash:
224 224 try:
225 225 fd = open(fullpath, 'wb')
226 226 fd.write(ctx[f].data())
227 227 finally:
228 228 if fd:
229 229 fd.close()
230 230 executable = 'x' in ctx[f].flags()
231 231 os.chmod(fullpath, lfutil.getmode(executable))
232 232 lfutil.writestandin(rdst, lfutil.standin(f), hash,
233 233 executable)
234 234 lfiletohash[f] = hash
235 235 else:
236 236 # normal file
237 237 dstfiles.append(f)
238 238
239 239 def getfilectx(repo, memctx, f):
240 240 if lfutil.isstandin(f):
241 241 # if the file isn't in the manifest then it was removed
242 242 # or renamed, raise IOError to indicate this
243 243 srcfname = lfutil.splitstandin(f)
244 244 try:
245 245 fctx = ctx.filectx(srcfname)
246 246 except error.LookupError:
247 247 raise IOError
248 248 renamed = fctx.renamed()
249 249 if renamed:
250 250 # standin is always a largefile because largefile-ness
251 251 # doesn't change after rename or copy
252 252 renamed = lfutil.standin(renamed[0])
253 253
254 254 return context.memfilectx(f, lfiletohash[srcfname] + '\n', 'l' in
255 255 fctx.flags(), 'x' in fctx.flags(), renamed)
256 256 else:
257 257 return _getnormalcontext(repo.ui, ctx, f, revmap)
258 258
259 259 # Commit
260 260 _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap)
261 261
262 262 def _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap):
263 263 mctx = context.memctx(rdst, parents, ctx.description(), dstfiles,
264 264 getfilectx, ctx.user(), ctx.date(), ctx.extra())
265 265 ret = rdst.commitctx(mctx)
266 266 rdst.setparents(ret)
267 267 revmap[ctx.node()] = rdst.changelog.tip()
268 268
269 269 # Generate list of changed files
270 270 def _getchangedfiles(ctx, parents):
271 271 files = set(ctx.files())
272 272 if node.nullid not in parents:
273 273 mc = ctx.manifest()
274 274 mp1 = ctx.parents()[0].manifest()
275 275 mp2 = ctx.parents()[1].manifest()
276 276 files |= (set(mp1) | set(mp2)) - set(mc)
277 277 for f in mc:
278 278 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
279 279 files.add(f)
280 280 return files
281 281
282 282 # Convert src parents to dst parents
283 283 def _convertparents(ctx, revmap):
284 284 parents = []
285 285 for p in ctx.parents():
286 286 parents.append(revmap[p.node()])
287 287 while len(parents) < 2:
288 288 parents.append(node.nullid)
289 289 return parents
290 290
291 291 # Get memfilectx for a normal file
292 292 def _getnormalcontext(ui, ctx, f, revmap):
293 293 try:
294 294 fctx = ctx.filectx(f)
295 295 except error.LookupError:
296 296 raise IOError
297 297 renamed = fctx.renamed()
298 298 if renamed:
299 299 renamed = renamed[0]
300 300
301 301 data = fctx.data()
302 302 if f == '.hgtags':
303 303 data = _converttags (ui, revmap, data)
304 304 return context.memfilectx(f, data, 'l' in fctx.flags(),
305 305 'x' in fctx.flags(), renamed)
306 306
307 307 # Remap tag data using a revision map
308 308 def _converttags(ui, revmap, data):
309 309 newdata = []
310 310 for line in data.splitlines():
311 311 try:
312 312 id, name = line.split(' ', 1)
313 313 except ValueError:
314 314 ui.warn(_('skipping incorrectly formatted tag %s\n'
315 315 % line))
316 316 continue
317 317 try:
318 318 newid = node.bin(id)
319 319 except TypeError:
320 320 ui.warn(_('skipping incorrectly formatted id %s\n'
321 321 % id))
322 322 continue
323 323 try:
324 324 newdata.append('%s %s\n' % (node.hex(revmap[newid]),
325 325 name))
326 326 except KeyError:
327 327 ui.warn(_('no mapping for id %s\n') % id)
328 328 continue
329 329 return ''.join(newdata)
330 330
331 331 def _islfile(file, ctx, matcher, size):
332 332 '''Return true if file should be considered a largefile, i.e.
333 333 matcher matches it or it is larger than size.'''
334 334 # never store special .hg* files as largefiles
335 335 if file == '.hgtags' or file == '.hgignore' or file == '.hgsigs':
336 336 return False
337 337 if matcher and matcher(file):
338 338 return True
339 339 try:
340 340 return ctx.filectx(file).size() >= size * 1024 * 1024
341 341 except error.LookupError:
342 342 return False
343 343
344 344 def uploadlfiles(ui, rsrc, rdst, files):
345 345 '''upload largefiles to the central store'''
346 346
347 347 if not files:
348 348 return
349 349
350 350 store = basestore._openstore(rsrc, rdst, put=True)
351 351
352 352 at = 0
353 353 ui.debug("sending statlfile command for %d largefiles\n" % len(files))
354 354 retval = store.exists(files)
355 355 files = filter(lambda h: not retval[h], files)
356 356 ui.debug("%d largefiles need to be uploaded\n" % len(files))
357 357
358 358 for hash in files:
359 359 ui.progress(_('uploading largefiles'), at, unit='largefile',
360 360 total=len(files))
361 361 source = lfutil.findfile(rsrc, hash)
362 362 if not source:
363 363 raise util.Abort(_('largefile %s missing from store'
364 364 ' (needs to be uploaded)') % hash)
365 365 # XXX check for errors here
366 366 store.put(source, hash)
367 367 at += 1
368 368 ui.progress(_('uploading largefiles'), None)
369 369
370 370 def verifylfiles(ui, repo, all=False, contents=False):
371 371 '''Verify that every largefile revision in the current changeset
372 372 exists in the central store. With --contents, also verify that
373 373 the contents of each local largefile file revision are correct (SHA-1 hash
374 374 matches the revision ID). With --all, check every changeset in
375 375 this repository.'''
376 376 if all:
377 377 # Pass a list to the function rather than an iterator because we know a
378 378 # list will work.
379 379 revs = range(len(repo))
380 380 else:
381 381 revs = ['.']
382 382
383 383 store = basestore._openstore(repo)
384 384 return store.verify(revs, contents=contents)
385 385
386 386 def debugdirstate(ui, repo):
387 387 '''Show basic information for the largefiles dirstate'''
388 388 lfdirstate = lfutil.openlfdirstate(ui, repo)
389 389 for file_, ent in sorted(lfdirstate._map.iteritems()):
390 390 mode = '%3o' % (ent[1] & 0777 & ~util.umask)
391 391 ui.write("%c %s %10d %s\n" % (ent[0], mode, ent[2], file_))
392 392
393 393 def cachelfiles(ui, repo, node, filelist=None):
394 394 '''cachelfiles ensures that all largefiles needed by the specified revision
395 395 are present in the repository's largefile cache.
396 396
397 397 returns a tuple (cached, missing). cached is the list of files downloaded
398 398 by this operation; missing is the list of files that were needed but could
399 399 not be found.'''
400 400 lfiles = lfutil.listlfiles(repo, node)
401 401 if filelist:
402 402 lfiles = set(lfiles) & set(filelist)
403 403 toget = []
404 404
405 405 for lfile in lfiles:
406 406 try:
407 407 expectedhash = repo[node][lfutil.standin(lfile)].data().strip()
408 408 except IOError, err:
409 409 if err.errno == errno.ENOENT:
410 410 continue # node must be None and standin wasn't found in wctx
411 411 raise
412 412 if not lfutil.findfile(repo, expectedhash):
413 413 toget.append((lfile, expectedhash))
414 414
415 415 if toget:
416 416 store = basestore._openstore(repo)
417 417 ret = store.get(toget)
418 418 return ret
419 419
420 420 return ([], [])
421 421
422 422 def downloadlfiles(ui, repo, rev=None):
423 423 matchfn = scmutil.match(repo[None],
424 424 [repo.wjoin(lfutil.shortname)], {})
425 425 def prepare(ctx, fns):
426 426 pass
427 427 totalsuccess = 0
428 428 totalmissing = 0
429 429 if rev != []: # walkchangerevs on empty list would return all revs
430 430 for ctx in cmdutil.walkchangerevs(repo, matchfn, {'rev' : rev},
431 431 prepare):
432 432 success, missing = cachelfiles(ui, repo, ctx.node())
433 433 totalsuccess += len(success)
434 434 totalmissing += len(missing)
435 435 ui.status(_("%d additional largefiles cached\n") % totalsuccess)
436 436 if totalmissing > 0:
437 437 ui.status(_("%d largefiles failed to download\n") % totalmissing)
438 438 return totalsuccess, totalmissing
439 439
440 440 def updatelfiles(ui, repo, filelist=None, printmessage=True):
441 441 wlock = repo.wlock()
442 442 try:
443 443 lfdirstate = lfutil.openlfdirstate(ui, repo)
444 444 lfiles = set(lfutil.listlfiles(repo)) | set(lfdirstate)
445 445
446 446 if filelist is not None:
447 447 lfiles = [f for f in lfiles if f in filelist]
448 448
449 449 printed = False
450 450 if printmessage and lfiles:
451 451 ui.status(_('getting changed largefiles\n'))
452 452 printed = True
453 453 cachelfiles(ui, repo, None, lfiles)
454 454
455 455 updated, removed = 0, 0
456 456 for f in lfiles:
457 457 i = _updatelfile(repo, lfdirstate, f)
458 458 if i:
459 459 if i > 0:
460 460 updated += i
461 461 else:
462 462 removed -= i
463 463 if printmessage and (removed or updated) and not printed:
464 464 ui.status(_('getting changed largefiles\n'))
465 465 printed = True
466 466
467 467 lfdirstate.write()
468 468 if printed and printmessage:
469 469 ui.status(_('%d largefiles updated, %d removed\n') % (updated,
470 470 removed))
471 471 finally:
472 472 wlock.release()
473 473
474 474 def _updatelfile(repo, lfdirstate, lfile):
475 475 '''updates a single largefile and copies the state of its standin from
476 476 the repository's dirstate to its state in the lfdirstate.
477 477
478 478 returns 1 if the file was modified, -1 if the file was removed, 0 if the
479 479 file was unchanged, and None if the needed largefile was missing from the
480 480 cache.'''
481 481 ret = 0
482 482 abslfile = repo.wjoin(lfile)
483 483 absstandin = repo.wjoin(lfutil.standin(lfile))
484 484 if os.path.exists(absstandin):
485 485 if os.path.exists(absstandin + '.orig') and os.path.exists(abslfile):
486 486 shutil.copyfile(abslfile, abslfile + '.orig')
487 487 expecthash = lfutil.readstandin(repo, lfile)
488 488 if (expecthash != '' and
489 489 (not os.path.exists(abslfile) or
490 490 expecthash != lfutil.hashfile(abslfile))):
491 491 if not lfutil.copyfromcache(repo, expecthash, lfile):
492 492 # use normallookup() to allocate entry in largefiles dirstate,
493 493 # because lack of it misleads lfilesrepo.status() into
494 494 # recognition that such cache missing files are REMOVED.
495 495 if lfile not in repo[None]: # not switched to normal file
496 496 util.unlinkpath(abslfile, ignoremissing=True)
497 497 lfdirstate.normallookup(lfile)
498 498 return None # don't try to set the mode
499 499 else:
500 500 # Synchronize largefile dirstate to the last modified time of
501 501 # the file
502 502 lfdirstate.normal(lfile)
503 503 ret = 1
504 504 mode = os.stat(absstandin).st_mode
505 505 if mode != os.stat(abslfile).st_mode:
506 506 os.chmod(abslfile, mode)
507 507 ret = 1
508 508 else:
509 509 # Remove lfiles for which the standin is deleted, unless the
510 510 # lfile is added to the repository again. This happens when a
511 511 # largefile is converted back to a normal file: the standin
512 512 # disappears, but a new (normal) file appears as the lfile.
513 513 if os.path.exists(abslfile) and lfile not in repo[None]:
514 514 util.unlinkpath(abslfile)
515 515 ret = -1
516 516 state = repo.dirstate[lfutil.standin(lfile)]
517 517 if state == 'n':
518 518 # When rebasing, we need to synchronize the standin and the largefile,
519 519 # because otherwise the largefile will get reverted. But for commit's
520 520 # sake, we have to mark the file as unclean.
521 521 if getattr(repo, "_isrebasing", False):
522 522 lfdirstate.normallookup(lfile)
523 523 else:
524 524 lfdirstate.normal(lfile)
525 525 elif state == 'r':
526 526 lfdirstate.remove(lfile)
527 527 elif state == 'a':
528 528 lfdirstate.add(lfile)
529 529 elif state == '?':
530 530 lfdirstate.drop(lfile)
531 531 return ret
532 532
533 533 def catlfile(repo, lfile, rev, filename):
534 534 hash = lfutil.readstandin(repo, lfile, rev)
535 535 if not lfutil.inusercache(repo.ui, hash):
536 536 store = basestore._openstore(repo)
537 537 success, missing = store.get([(lfile, hash)])
538 538 if len(success) != 1:
539 539 raise util.Abort(
540 540 _('largefile %s is not in cache and could not be downloaded')
541 541 % lfile)
542 542 path = lfutil.usercachepath(repo.ui, hash)
543 543 fpout = cmdutil.makefileobj(repo, filename)
544 544 fpin = open(path, "rb")
545 fpout.write(fpin.read())
545 for chunk in lfutil.blockstream(fpin):
546 fpout.write(chunk)
546 547 fpout.close()
547 548 fpin.close()
548 549 return 0
549 550
550 551 # -- hg commands declarations ------------------------------------------------
551 552
552 553 cmdtable = {
553 554 'lfconvert': (lfconvert,
554 555 [('s', 'size', '',
555 556 _('minimum size (MB) for files to be converted '
556 557 'as largefiles'),
557 558 'SIZE'),
558 559 ('', 'to-normal', False,
559 560 _('convert from a largefiles repo to a normal repo')),
560 561 ],
561 562 _('hg lfconvert SOURCE DEST [FILE ...]')),
562 563 }
563 564
564 565 commands.inferrepo += " lfconvert"
General Comments 0
You need to be logged in to leave comments. Login now