##// END OF EJS Templates
largefiles: optimize performance when updating (issue3440)...
Na'Tosha Bard -
r16700:28001e8a default
parent child Browse files
Show More
@@ -1,535 +1,537 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''High-level command function for lfconvert, plus the cmdtable.'''
10 10
11 11 import os
12 12 import shutil
13 13
14 14 from mercurial import util, match as match_, hg, node, context, error, \
15 15 cmdutil, scmutil
16 16 from mercurial.i18n import _
17 17
18 18 import lfutil
19 19 import basestore
20 20
21 21 # -- Commands ----------------------------------------------------------
22 22
23 23 def lfconvert(ui, src, dest, *pats, **opts):
24 24 '''convert a normal repository to a largefiles repository
25 25
26 26 Convert repository SOURCE to a new repository DEST, identical to
27 27 SOURCE except that certain files will be converted as largefiles:
28 28 specifically, any file that matches any PATTERN *or* whose size is
29 29 above the minimum size threshold is converted as a largefile. The
30 30 size used to determine whether or not to track a file as a
31 31 largefile is the size of the first version of the file. The
32 32 minimum size can be specified either with --size or in
33 33 configuration as ``largefiles.size``.
34 34
35 35 After running this command you will need to make sure that
36 36 largefiles is enabled anywhere you intend to push the new
37 37 repository.
38 38
39 39 Use --to-normal to convert largefiles back to normal files; after
40 40 this, the DEST repository can be used without largefiles at all.'''
41 41
42 42 if opts['to_normal']:
43 43 tolfile = False
44 44 else:
45 45 tolfile = True
46 46 size = lfutil.getminsize(ui, True, opts.get('size'), default=None)
47 47
48 48 if not hg.islocal(src):
49 49 raise util.Abort(_('%s is not a local Mercurial repo') % src)
50 50 if not hg.islocal(dest):
51 51 raise util.Abort(_('%s is not a local Mercurial repo') % dest)
52 52
53 53 rsrc = hg.repository(ui, src)
54 54 ui.status(_('initializing destination %s\n') % dest)
55 55 rdst = hg.repository(ui, dest, create=True)
56 56
57 57 success = False
58 58 try:
59 59 # Lock destination to prevent modification while it is converted to.
60 60 # Don't need to lock src because we are just reading from its history
61 61 # which can't change.
62 62 dstlock = rdst.lock()
63 63
64 64 # Get a list of all changesets in the source. The easy way to do this
65 65 # is to simply walk the changelog, using changelog.nodesbewteen().
66 66 # Take a look at mercurial/revlog.py:639 for more details.
67 67 # Use a generator instead of a list to decrease memory usage
68 68 ctxs = (rsrc[ctx] for ctx in rsrc.changelog.nodesbetween(None,
69 69 rsrc.heads())[0])
70 70 revmap = {node.nullid: node.nullid}
71 71 if tolfile:
72 72 lfiles = set()
73 73 normalfiles = set()
74 74 if not pats:
75 75 pats = ui.configlist(lfutil.longname, 'patterns', default=[])
76 76 if pats:
77 77 matcher = match_.match(rsrc.root, '', list(pats))
78 78 else:
79 79 matcher = None
80 80
81 81 lfiletohash = {}
82 82 for ctx in ctxs:
83 83 ui.progress(_('converting revisions'), ctx.rev(),
84 84 unit=_('revision'), total=rsrc['tip'].rev())
85 85 _lfconvert_addchangeset(rsrc, rdst, ctx, revmap,
86 86 lfiles, normalfiles, matcher, size, lfiletohash)
87 87 ui.progress(_('converting revisions'), None)
88 88
89 89 if os.path.exists(rdst.wjoin(lfutil.shortname)):
90 90 shutil.rmtree(rdst.wjoin(lfutil.shortname))
91 91
92 92 for f in lfiletohash.keys():
93 93 if os.path.isfile(rdst.wjoin(f)):
94 94 os.unlink(rdst.wjoin(f))
95 95 try:
96 96 os.removedirs(os.path.dirname(rdst.wjoin(f)))
97 97 except OSError:
98 98 pass
99 99
100 100 # If there were any files converted to largefiles, add largefiles
101 101 # to the destination repository's requirements.
102 102 if lfiles:
103 103 rdst.requirements.add('largefiles')
104 104 rdst._writerequirements()
105 105 else:
106 106 for ctx in ctxs:
107 107 ui.progress(_('converting revisions'), ctx.rev(),
108 108 unit=_('revision'), total=rsrc['tip'].rev())
109 109 _addchangeset(ui, rsrc, rdst, ctx, revmap)
110 110
111 111 ui.progress(_('converting revisions'), None)
112 112 success = True
113 113 finally:
114 114 if not success:
115 115 # we failed, remove the new directory
116 116 shutil.rmtree(rdst.root)
117 117 dstlock.release()
118 118
119 119 def _addchangeset(ui, rsrc, rdst, ctx, revmap):
120 120 # Convert src parents to dst parents
121 121 parents = _convertparents(ctx, revmap)
122 122
123 123 # Generate list of changed files
124 124 files = _getchangedfiles(ctx, parents)
125 125
126 126 def getfilectx(repo, memctx, f):
127 127 if lfutil.standin(f) in files:
128 128 # if the file isn't in the manifest then it was removed
129 129 # or renamed, raise IOError to indicate this
130 130 try:
131 131 fctx = ctx.filectx(lfutil.standin(f))
132 132 except error.LookupError:
133 133 raise IOError
134 134 renamed = fctx.renamed()
135 135 if renamed:
136 136 renamed = lfutil.splitstandin(renamed[0])
137 137
138 138 hash = fctx.data().strip()
139 139 path = lfutil.findfile(rsrc, hash)
140 140 ### TODO: What if the file is not cached?
141 141 data = ''
142 142 fd = None
143 143 try:
144 144 fd = open(path, 'rb')
145 145 data = fd.read()
146 146 finally:
147 147 if fd:
148 148 fd.close()
149 149 return context.memfilectx(f, data, 'l' in fctx.flags(),
150 150 'x' in fctx.flags(), renamed)
151 151 else:
152 152 return _getnormalcontext(repo.ui, ctx, f, revmap)
153 153
154 154 dstfiles = []
155 155 for file in files:
156 156 if lfutil.isstandin(file):
157 157 dstfiles.append(lfutil.splitstandin(file))
158 158 else:
159 159 dstfiles.append(file)
160 160 # Commit
161 161 _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap)
162 162
163 163 def _lfconvert_addchangeset(rsrc, rdst, ctx, revmap, lfiles, normalfiles,
164 164 matcher, size, lfiletohash):
165 165 # Convert src parents to dst parents
166 166 parents = _convertparents(ctx, revmap)
167 167
168 168 # Generate list of changed files
169 169 files = _getchangedfiles(ctx, parents)
170 170
171 171 dstfiles = []
172 172 for f in files:
173 173 if f not in lfiles and f not in normalfiles:
174 174 islfile = _islfile(f, ctx, matcher, size)
175 175 # If this file was renamed or copied then copy
176 176 # the lfileness of its predecessor
177 177 if f in ctx.manifest():
178 178 fctx = ctx.filectx(f)
179 179 renamed = fctx.renamed()
180 180 renamedlfile = renamed and renamed[0] in lfiles
181 181 islfile |= renamedlfile
182 182 if 'l' in fctx.flags():
183 183 if renamedlfile:
184 184 raise util.Abort(
185 185 _('renamed/copied largefile %s becomes symlink')
186 186 % f)
187 187 islfile = False
188 188 if islfile:
189 189 lfiles.add(f)
190 190 else:
191 191 normalfiles.add(f)
192 192
193 193 if f in lfiles:
194 194 dstfiles.append(lfutil.standin(f))
195 195 # largefile in manifest if it has not been removed/renamed
196 196 if f in ctx.manifest():
197 197 fctx = ctx.filectx(f)
198 198 if 'l' in fctx.flags():
199 199 renamed = fctx.renamed()
200 200 if renamed and renamed[0] in lfiles:
201 201 raise util.Abort(_('largefile %s becomes symlink') % f)
202 202
203 203 # largefile was modified, update standins
204 204 fullpath = rdst.wjoin(f)
205 205 util.makedirs(os.path.dirname(fullpath))
206 206 m = util.sha1('')
207 207 m.update(ctx[f].data())
208 208 hash = m.hexdigest()
209 209 if f not in lfiletohash or lfiletohash[f] != hash:
210 210 try:
211 211 fd = open(fullpath, 'wb')
212 212 fd.write(ctx[f].data())
213 213 finally:
214 214 if fd:
215 215 fd.close()
216 216 executable = 'x' in ctx[f].flags()
217 217 os.chmod(fullpath, lfutil.getmode(executable))
218 218 lfutil.writestandin(rdst, lfutil.standin(f), hash,
219 219 executable)
220 220 lfiletohash[f] = hash
221 221 else:
222 222 # normal file
223 223 dstfiles.append(f)
224 224
225 225 def getfilectx(repo, memctx, f):
226 226 if lfutil.isstandin(f):
227 227 # if the file isn't in the manifest then it was removed
228 228 # or renamed, raise IOError to indicate this
229 229 srcfname = lfutil.splitstandin(f)
230 230 try:
231 231 fctx = ctx.filectx(srcfname)
232 232 except error.LookupError:
233 233 raise IOError
234 234 renamed = fctx.renamed()
235 235 if renamed:
236 236 # standin is always a largefile because largefile-ness
237 237 # doesn't change after rename or copy
238 238 renamed = lfutil.standin(renamed[0])
239 239
240 240 return context.memfilectx(f, lfiletohash[srcfname] + '\n', 'l' in
241 241 fctx.flags(), 'x' in fctx.flags(), renamed)
242 242 else:
243 243 return _getnormalcontext(repo.ui, ctx, f, revmap)
244 244
245 245 # Commit
246 246 _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap)
247 247
248 248 def _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap):
249 249 mctx = context.memctx(rdst, parents, ctx.description(), dstfiles,
250 250 getfilectx, ctx.user(), ctx.date(), ctx.extra())
251 251 ret = rdst.commitctx(mctx)
252 252 rdst.setparents(ret)
253 253 revmap[ctx.node()] = rdst.changelog.tip()
254 254
255 255 # Generate list of changed files
256 256 def _getchangedfiles(ctx, parents):
257 257 files = set(ctx.files())
258 258 if node.nullid not in parents:
259 259 mc = ctx.manifest()
260 260 mp1 = ctx.parents()[0].manifest()
261 261 mp2 = ctx.parents()[1].manifest()
262 262 files |= (set(mp1) | set(mp2)) - set(mc)
263 263 for f in mc:
264 264 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
265 265 files.add(f)
266 266 return files
267 267
268 268 # Convert src parents to dst parents
269 269 def _convertparents(ctx, revmap):
270 270 parents = []
271 271 for p in ctx.parents():
272 272 parents.append(revmap[p.node()])
273 273 while len(parents) < 2:
274 274 parents.append(node.nullid)
275 275 return parents
276 276
277 277 # Get memfilectx for a normal file
278 278 def _getnormalcontext(ui, ctx, f, revmap):
279 279 try:
280 280 fctx = ctx.filectx(f)
281 281 except error.LookupError:
282 282 raise IOError
283 283 renamed = fctx.renamed()
284 284 if renamed:
285 285 renamed = renamed[0]
286 286
287 287 data = fctx.data()
288 288 if f == '.hgtags':
289 289 data = _converttags (ui, revmap, data)
290 290 return context.memfilectx(f, data, 'l' in fctx.flags(),
291 291 'x' in fctx.flags(), renamed)
292 292
293 293 # Remap tag data using a revision map
294 294 def _converttags(ui, revmap, data):
295 295 newdata = []
296 296 for line in data.splitlines():
297 297 try:
298 298 id, name = line.split(' ', 1)
299 299 except ValueError:
300 300 ui.warn(_('skipping incorrectly formatted tag %s\n'
301 301 % line))
302 302 continue
303 303 try:
304 304 newid = node.bin(id)
305 305 except TypeError:
306 306 ui.warn(_('skipping incorrectly formatted id %s\n'
307 307 % id))
308 308 continue
309 309 try:
310 310 newdata.append('%s %s\n' % (node.hex(revmap[newid]),
311 311 name))
312 312 except KeyError:
313 313 ui.warn(_('no mapping for id %s\n') % id)
314 314 continue
315 315 return ''.join(newdata)
316 316
317 317 def _islfile(file, ctx, matcher, size):
318 318 '''Return true if file should be considered a largefile, i.e.
319 319 matcher matches it or it is larger than size.'''
320 320 # never store special .hg* files as largefiles
321 321 if file == '.hgtags' or file == '.hgignore' or file == '.hgsigs':
322 322 return False
323 323 if matcher and matcher(file):
324 324 return True
325 325 try:
326 326 return ctx.filectx(file).size() >= size * 1024 * 1024
327 327 except error.LookupError:
328 328 return False
329 329
330 330 def uploadlfiles(ui, rsrc, rdst, files):
331 331 '''upload largefiles to the central store'''
332 332
333 333 if not files:
334 334 return
335 335
336 336 store = basestore._openstore(rsrc, rdst, put=True)
337 337
338 338 at = 0
339 339 files = filter(lambda h: not store.exists(h), files)
340 340 for hash in files:
341 341 ui.progress(_('uploading largefiles'), at, unit='largefile',
342 342 total=len(files))
343 343 source = lfutil.findfile(rsrc, hash)
344 344 if not source:
345 345 raise util.Abort(_('largefile %s missing from store'
346 346 ' (needs to be uploaded)') % hash)
347 347 # XXX check for errors here
348 348 store.put(source, hash)
349 349 at += 1
350 350 ui.progress(_('uploading largefiles'), None)
351 351
352 352 def verifylfiles(ui, repo, all=False, contents=False):
353 353 '''Verify that every big file revision in the current changeset
354 354 exists in the central store. With --contents, also verify that
355 355 the contents of each big file revision are correct (SHA-1 hash
356 356 matches the revision ID). With --all, check every changeset in
357 357 this repository.'''
358 358 if all:
359 359 # Pass a list to the function rather than an iterator because we know a
360 360 # list will work.
361 361 revs = range(len(repo))
362 362 else:
363 363 revs = ['.']
364 364
365 365 store = basestore._openstore(repo)
366 366 return store.verify(revs, contents=contents)
367 367
368 def cachelfiles(ui, repo, node):
368 def cachelfiles(ui, repo, node, filelist=None):
369 369 '''cachelfiles ensures that all largefiles needed by the specified revision
370 370 are present in the repository's largefile cache.
371 371
372 372 returns a tuple (cached, missing). cached is the list of files downloaded
373 373 by this operation; missing is the list of files that were needed but could
374 374 not be found.'''
375 375 lfiles = lfutil.listlfiles(repo, node)
376 if filelist:
377 lfiles = set(lfiles) & set(filelist)
376 378 toget = []
377 379
378 380 for lfile in lfiles:
379 381 # If we are mid-merge, then we have to trust the standin that is in the
380 382 # working copy to have the correct hashvalue. This is because the
381 383 # original hg.merge() already updated the standin as part of the normal
382 384 # merge process -- we just have to udpate the largefile to match.
383 385 if (getattr(repo, "_ismerging", False) and
384 386 os.path.exists(repo.wjoin(lfutil.standin(lfile)))):
385 387 expectedhash = lfutil.readstandin(repo, lfile)
386 388 else:
387 389 expectedhash = repo[node][lfutil.standin(lfile)].data().strip()
388 390
389 391 # if it exists and its hash matches, it might have been locally
390 392 # modified before updating and the user chose 'local'. in this case,
391 393 # it will not be in any store, so don't look for it.
392 394 if ((not os.path.exists(repo.wjoin(lfile)) or
393 395 expectedhash != lfutil.hashfile(repo.wjoin(lfile))) and
394 396 not lfutil.findfile(repo, expectedhash)):
395 397 toget.append((lfile, expectedhash))
396 398
397 399 if toget:
398 400 store = basestore._openstore(repo)
399 401 ret = store.get(toget)
400 402 return ret
401 403
402 404 return ([], [])
403 405
404 406 def downloadlfiles(ui, repo, rev=None):
405 407 matchfn = scmutil.match(repo[None],
406 408 [repo.wjoin(lfutil.shortname)], {})
407 409 def prepare(ctx, fns):
408 410 pass
409 411 totalsuccess = 0
410 412 totalmissing = 0
411 413 for ctx in cmdutil.walkchangerevs(repo, matchfn, {'rev' : rev},
412 414 prepare):
413 415 success, missing = cachelfiles(ui, repo, ctx.node())
414 416 totalsuccess += len(success)
415 417 totalmissing += len(missing)
416 418 ui.status(_("%d additional largefiles cached\n") % totalsuccess)
417 419 if totalmissing > 0:
418 420 ui.status(_("%d largefiles failed to download\n") % totalmissing)
419 421 return totalsuccess, totalmissing
420 422
421 423 def updatelfiles(ui, repo, filelist=None, printmessage=True):
422 424 wlock = repo.wlock()
423 425 try:
424 426 lfdirstate = lfutil.openlfdirstate(ui, repo)
425 427 lfiles = set(lfutil.listlfiles(repo)) | set(lfdirstate)
426 428
427 429 if filelist is not None:
428 430 lfiles = [f for f in lfiles if f in filelist]
429 431
430 432 printed = False
431 433 if printmessage and lfiles:
432 434 ui.status(_('getting changed largefiles\n'))
433 435 printed = True
434 cachelfiles(ui, repo, '.')
436 cachelfiles(ui, repo, '.', lfiles)
435 437
436 438 updated, removed = 0, 0
437 439 for i in map(lambda f: _updatelfile(repo, lfdirstate, f), lfiles):
438 440 # increment the appropriate counter according to _updatelfile's
439 441 # return value
440 442 updated += i > 0 and i or 0
441 443 removed -= i < 0 and i or 0
442 444 if printmessage and (removed or updated) and not printed:
443 445 ui.status(_('getting changed largefiles\n'))
444 446 printed = True
445 447
446 448 lfdirstate.write()
447 449 if printed and printmessage:
448 450 ui.status(_('%d largefiles updated, %d removed\n') % (updated,
449 451 removed))
450 452 finally:
451 453 wlock.release()
452 454
453 455 def _updatelfile(repo, lfdirstate, lfile):
454 456 '''updates a single largefile and copies the state of its standin from
455 457 the repository's dirstate to its state in the lfdirstate.
456 458
457 459 returns 1 if the file was modified, -1 if the file was removed, 0 if the
458 460 file was unchanged, and None if the needed largefile was missing from the
459 461 cache.'''
460 462 ret = 0
461 463 abslfile = repo.wjoin(lfile)
462 464 absstandin = repo.wjoin(lfutil.standin(lfile))
463 465 if os.path.exists(absstandin):
464 466 if os.path.exists(absstandin+'.orig'):
465 467 shutil.copyfile(abslfile, abslfile+'.orig')
466 468 expecthash = lfutil.readstandin(repo, lfile)
467 469 if (expecthash != '' and
468 470 (not os.path.exists(abslfile) or
469 471 expecthash != lfutil.hashfile(abslfile))):
470 472 if not lfutil.copyfromcache(repo, expecthash, lfile):
471 473 # use normallookup() to allocate entry in largefiles dirstate,
472 474 # because lack of it misleads lfilesrepo.status() into
473 475 # recognition that such cache missing files are REMOVED.
474 476 lfdirstate.normallookup(lfile)
475 477 return None # don't try to set the mode
476 478 ret = 1
477 479 mode = os.stat(absstandin).st_mode
478 480 if mode != os.stat(abslfile).st_mode:
479 481 os.chmod(abslfile, mode)
480 482 ret = 1
481 483 else:
482 484 # Remove lfiles for which the standin is deleted, unless the
483 485 # lfile is added to the repository again. This happens when a
484 486 # largefile is converted back to a normal file: the standin
485 487 # disappears, but a new (normal) file appears as the lfile.
486 488 if os.path.exists(abslfile) and lfile not in repo[None]:
487 489 util.unlinkpath(abslfile)
488 490 ret = -1
489 491 state = repo.dirstate[lfutil.standin(lfile)]
490 492 if state == 'n':
491 493 # When rebasing, we need to synchronize the standin and the largefile,
492 494 # because otherwise the largefile will get reverted. But for commit's
493 495 # sake, we have to mark the file as unclean.
494 496 if getattr(repo, "_isrebasing", False):
495 497 lfdirstate.normallookup(lfile)
496 498 else:
497 499 lfdirstate.normal(lfile)
498 500 elif state == 'r':
499 501 lfdirstate.remove(lfile)
500 502 elif state == 'a':
501 503 lfdirstate.add(lfile)
502 504 elif state == '?':
503 505 lfdirstate.drop(lfile)
504 506 return ret
505 507
506 508 def catlfile(repo, lfile, rev, filename):
507 509 hash = lfutil.readstandin(repo, lfile, rev)
508 510 if not lfutil.inusercache(repo.ui, hash):
509 511 store = basestore._openstore(repo)
510 512 success, missing = store.get([(lfile, hash)])
511 513 if len(success) != 1:
512 514 raise util.Abort(
513 515 _('largefile %s is not in cache and could not be downloaded')
514 516 % lfile)
515 517 path = lfutil.usercachepath(repo.ui, hash)
516 518 fpout = cmdutil.makefileobj(repo, filename)
517 519 fpin = open(path, "rb")
518 520 fpout.write(fpin.read())
519 521 fpout.close()
520 522 fpin.close()
521 523 return 0
522 524
523 525 # -- hg commands declarations ------------------------------------------------
524 526
525 527 cmdtable = {
526 528 'lfconvert': (lfconvert,
527 529 [('s', 'size', '',
528 530 _('minimum size (MB) for files to be converted '
529 531 'as largefiles'),
530 532 'SIZE'),
531 533 ('', 'to-normal', False,
532 534 _('convert from a largefiles repo to a normal repo')),
533 535 ],
534 536 _('hg lfconvert SOURCE DEST [FILE ...]')),
535 537 }
General Comments 0
You need to be logged in to leave comments. Login now