##// END OF EJS Templates
largefiles: use wlock for lfconvert (issue3444)...
Mads Kiilerich -
r16717:1eede2ea stable
parent child Browse files
Show More
@@ -1,517 +1,521
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''High-level command function for lfconvert, plus the cmdtable.'''
10 10
11 11 import os
12 12 import shutil
13 13
14 14 from mercurial import util, match as match_, hg, node, context, error, cmdutil
15 15 from mercurial.i18n import _
16 from mercurial.lock import release
16 17
17 18 import lfutil
18 19 import basestore
19 20
20 21 # -- Commands ----------------------------------------------------------
21 22
22 23 def lfconvert(ui, src, dest, *pats, **opts):
23 24 '''convert a normal repository to a largefiles repository
24 25
25 26 Convert repository SOURCE to a new repository DEST, identical to
26 27 SOURCE except that certain files will be converted as largefiles:
27 28 specifically, any file that matches any PATTERN *or* whose size is
28 29 above the minimum size threshold is converted as a largefile. The
29 30 size used to determine whether or not to track a file as a
30 31 largefile is the size of the first version of the file. The
31 32 minimum size can be specified either with --size or in
32 33 configuration as ``largefiles.size``.
33 34
34 35 After running this command you will need to make sure that
35 36 largefiles is enabled anywhere you intend to push the new
36 37 repository.
37 38
38 39 Use --to-normal to convert largefiles back to normal files; after
39 40 this, the DEST repository can be used without largefiles at all.'''
40 41
41 42 if opts['to_normal']:
42 43 tolfile = False
43 44 else:
44 45 tolfile = True
45 46 size = lfutil.getminsize(ui, True, opts.get('size'), default=None)
46 47
47 48 if not hg.islocal(src):
48 49 raise util.Abort(_('%s is not a local Mercurial repo') % src)
49 50 if not hg.islocal(dest):
50 51 raise util.Abort(_('%s is not a local Mercurial repo') % dest)
51 52
52 53 rsrc = hg.repository(ui, src)
53 54 ui.status(_('initializing destination %s\n') % dest)
54 55 rdst = hg.repository(ui, dest, create=True)
55 56
56 57 success = False
58 dstwlock = dstlock = None
57 59 try:
58 60 # Lock destination to prevent modification while it is converted to.
59 61 # Don't need to lock src because we are just reading from its history
60 62 # which can't change.
63 dstwlock = rdst.wlock()
61 64 dstlock = rdst.lock()
62 65
63 66 # Get a list of all changesets in the source. The easy way to do this
64 67 # is to simply walk the changelog, using changelog.nodesbewteen().
65 68 # Take a look at mercurial/revlog.py:639 for more details.
66 69 # Use a generator instead of a list to decrease memory usage
67 70 ctxs = (rsrc[ctx] for ctx in rsrc.changelog.nodesbetween(None,
68 71 rsrc.heads())[0])
69 72 revmap = {node.nullid: node.nullid}
70 73 if tolfile:
71 74 lfiles = set()
72 75 normalfiles = set()
73 76 if not pats:
74 77 pats = ui.configlist(lfutil.longname, 'patterns', default=[])
75 78 if pats:
76 79 matcher = match_.match(rsrc.root, '', list(pats))
77 80 else:
78 81 matcher = None
79 82
80 83 lfiletohash = {}
81 84 for ctx in ctxs:
82 85 ui.progress(_('converting revisions'), ctx.rev(),
83 86 unit=_('revision'), total=rsrc['tip'].rev())
84 87 _lfconvert_addchangeset(rsrc, rdst, ctx, revmap,
85 88 lfiles, normalfiles, matcher, size, lfiletohash)
86 89 ui.progress(_('converting revisions'), None)
87 90
88 91 if os.path.exists(rdst.wjoin(lfutil.shortname)):
89 92 shutil.rmtree(rdst.wjoin(lfutil.shortname))
90 93
91 94 for f in lfiletohash.keys():
92 95 if os.path.isfile(rdst.wjoin(f)):
93 96 os.unlink(rdst.wjoin(f))
94 97 try:
95 98 os.removedirs(os.path.dirname(rdst.wjoin(f)))
96 99 except OSError:
97 100 pass
98 101
99 102 # If there were any files converted to largefiles, add largefiles
100 103 # to the destination repository's requirements.
101 104 if lfiles:
102 105 rdst.requirements.add('largefiles')
103 106 rdst._writerequirements()
104 107 else:
105 108 for ctx in ctxs:
106 109 ui.progress(_('converting revisions'), ctx.rev(),
107 110 unit=_('revision'), total=rsrc['tip'].rev())
108 111 _addchangeset(ui, rsrc, rdst, ctx, revmap)
109 112
110 113 ui.progress(_('converting revisions'), None)
111 114 success = True
112 115 finally:
116 rdst.dirstate.clear()
117 release(dstlock, dstwlock)
113 118 if not success:
114 119 # we failed, remove the new directory
115 120 shutil.rmtree(rdst.root)
116 dstlock.release()
117 121
118 122 def _addchangeset(ui, rsrc, rdst, ctx, revmap):
119 123 # Convert src parents to dst parents
120 124 parents = _convertparents(ctx, revmap)
121 125
122 126 # Generate list of changed files
123 127 files = _getchangedfiles(ctx, parents)
124 128
125 129 def getfilectx(repo, memctx, f):
126 130 if lfutil.standin(f) in files:
127 131 # if the file isn't in the manifest then it was removed
128 132 # or renamed, raise IOError to indicate this
129 133 try:
130 134 fctx = ctx.filectx(lfutil.standin(f))
131 135 except error.LookupError:
132 136 raise IOError()
133 137 renamed = fctx.renamed()
134 138 if renamed:
135 139 renamed = lfutil.splitstandin(renamed[0])
136 140
137 141 hash = fctx.data().strip()
138 142 path = lfutil.findfile(rsrc, hash)
139 143 ### TODO: What if the file is not cached?
140 144 data = ''
141 145 fd = None
142 146 try:
143 147 fd = open(path, 'rb')
144 148 data = fd.read()
145 149 finally:
146 150 if fd:
147 151 fd.close()
148 152 return context.memfilectx(f, data, 'l' in fctx.flags(),
149 153 'x' in fctx.flags(), renamed)
150 154 else:
151 155 return _getnormalcontext(repo.ui, ctx, f, revmap)
152 156
153 157 dstfiles = []
154 158 for file in files:
155 159 if lfutil.isstandin(file):
156 160 dstfiles.append(lfutil.splitstandin(file))
157 161 else:
158 162 dstfiles.append(file)
159 163 # Commit
160 164 _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap)
161 165
162 166 def _lfconvert_addchangeset(rsrc, rdst, ctx, revmap, lfiles, normalfiles,
163 167 matcher, size, lfiletohash):
164 168 # Convert src parents to dst parents
165 169 parents = _convertparents(ctx, revmap)
166 170
167 171 # Generate list of changed files
168 172 files = _getchangedfiles(ctx, parents)
169 173
170 174 dstfiles = []
171 175 for f in files:
172 176 if f not in lfiles and f not in normalfiles:
173 177 islfile = _islfile(f, ctx, matcher, size)
174 178 # If this file was renamed or copied then copy
175 179 # the lfileness of its predecessor
176 180 if f in ctx.manifest():
177 181 fctx = ctx.filectx(f)
178 182 renamed = fctx.renamed()
179 183 renamedlfile = renamed and renamed[0] in lfiles
180 184 islfile |= renamedlfile
181 185 if 'l' in fctx.flags():
182 186 if renamedlfile:
183 187 raise util.Abort(
184 188 _('renamed/copied largefile %s becomes symlink')
185 189 % f)
186 190 islfile = False
187 191 if islfile:
188 192 lfiles.add(f)
189 193 else:
190 194 normalfiles.add(f)
191 195
192 196 if f in lfiles:
193 197 dstfiles.append(lfutil.standin(f))
194 198 # largefile in manifest if it has not been removed/renamed
195 199 if f in ctx.manifest():
196 200 fctx = ctx.filectx(f)
197 201 if 'l' in fctx.flags():
198 202 renamed = fctx.renamed()
199 203 if renamed and renamed[0] in lfiles:
200 204 raise util.Abort(_('largefile %s becomes symlink') % f)
201 205
202 206 # largefile was modified, update standins
203 207 fullpath = rdst.wjoin(f)
204 208 util.makedirs(os.path.dirname(fullpath))
205 209 m = util.sha1('')
206 210 m.update(ctx[f].data())
207 211 hash = m.hexdigest()
208 212 if f not in lfiletohash or lfiletohash[f] != hash:
209 213 try:
210 214 fd = open(fullpath, 'wb')
211 215 fd.write(ctx[f].data())
212 216 finally:
213 217 if fd:
214 218 fd.close()
215 219 executable = 'x' in ctx[f].flags()
216 220 os.chmod(fullpath, lfutil.getmode(executable))
217 221 lfutil.writestandin(rdst, lfutil.standin(f), hash,
218 222 executable)
219 223 lfiletohash[f] = hash
220 224 else:
221 225 # normal file
222 226 dstfiles.append(f)
223 227
224 228 def getfilectx(repo, memctx, f):
225 229 if lfutil.isstandin(f):
226 230 # if the file isn't in the manifest then it was removed
227 231 # or renamed, raise IOError to indicate this
228 232 srcfname = lfutil.splitstandin(f)
229 233 try:
230 234 fctx = ctx.filectx(srcfname)
231 235 except error.LookupError:
232 236 raise IOError()
233 237 renamed = fctx.renamed()
234 238 if renamed:
235 239 # standin is always a largefile because largefile-ness
236 240 # doesn't change after rename or copy
237 241 renamed = lfutil.standin(renamed[0])
238 242
239 243 return context.memfilectx(f, lfiletohash[srcfname] + '\n', 'l' in
240 244 fctx.flags(), 'x' in fctx.flags(), renamed)
241 245 else:
242 246 return _getnormalcontext(repo.ui, ctx, f, revmap)
243 247
244 248 # Commit
245 249 _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap)
246 250
247 251 def _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap):
248 252 mctx = context.memctx(rdst, parents, ctx.description(), dstfiles,
249 253 getfilectx, ctx.user(), ctx.date(), ctx.extra())
250 254 ret = rdst.commitctx(mctx)
251 255 rdst.setparents(ret)
252 256 revmap[ctx.node()] = rdst.changelog.tip()
253 257
254 258 # Generate list of changed files
255 259 def _getchangedfiles(ctx, parents):
256 260 files = set(ctx.files())
257 261 if node.nullid not in parents:
258 262 mc = ctx.manifest()
259 263 mp1 = ctx.parents()[0].manifest()
260 264 mp2 = ctx.parents()[1].manifest()
261 265 files |= (set(mp1) | set(mp2)) - set(mc)
262 266 for f in mc:
263 267 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
264 268 files.add(f)
265 269 return files
266 270
267 271 # Convert src parents to dst parents
268 272 def _convertparents(ctx, revmap):
269 273 parents = []
270 274 for p in ctx.parents():
271 275 parents.append(revmap[p.node()])
272 276 while len(parents) < 2:
273 277 parents.append(node.nullid)
274 278 return parents
275 279
276 280 # Get memfilectx for a normal file
277 281 def _getnormalcontext(ui, ctx, f, revmap):
278 282 try:
279 283 fctx = ctx.filectx(f)
280 284 except error.LookupError:
281 285 raise IOError()
282 286 renamed = fctx.renamed()
283 287 if renamed:
284 288 renamed = renamed[0]
285 289
286 290 data = fctx.data()
287 291 if f == '.hgtags':
288 292 data = _converttags (ui, revmap, data)
289 293 return context.memfilectx(f, data, 'l' in fctx.flags(),
290 294 'x' in fctx.flags(), renamed)
291 295
292 296 # Remap tag data using a revision map
293 297 def _converttags(ui, revmap, data):
294 298 newdata = []
295 299 for line in data.splitlines():
296 300 try:
297 301 id, name = line.split(' ', 1)
298 302 except ValueError:
299 303 ui.warn(_('skipping incorrectly formatted tag %s\n'
300 304 % line))
301 305 continue
302 306 try:
303 307 newid = node.bin(id)
304 308 except TypeError:
305 309 ui.warn(_('skipping incorrectly formatted id %s\n'
306 310 % id))
307 311 continue
308 312 try:
309 313 newdata.append('%s %s\n' % (node.hex(revmap[newid]),
310 314 name))
311 315 except KeyError:
312 316 ui.warn(_('no mapping for id %s\n') % id)
313 317 continue
314 318 return ''.join(newdata)
315 319
316 320 def _islfile(file, ctx, matcher, size):
317 321 '''Return true if file should be considered a largefile, i.e.
318 322 matcher matches it or it is larger than size.'''
319 323 # never store special .hg* files as largefiles
320 324 if file == '.hgtags' or file == '.hgignore' or file == '.hgsigs':
321 325 return False
322 326 if matcher and matcher(file):
323 327 return True
324 328 try:
325 329 return ctx.filectx(file).size() >= size * 1024 * 1024
326 330 except error.LookupError:
327 331 return False
328 332
329 333 def uploadlfiles(ui, rsrc, rdst, files):
330 334 '''upload largefiles to the central store'''
331 335
332 336 if not files:
333 337 return
334 338
335 339 store = basestore._openstore(rsrc, rdst, put=True)
336 340
337 341 at = 0
338 342 files = filter(lambda h: not store.exists(h), files)
339 343 for hash in files:
340 344 ui.progress(_('uploading largefiles'), at, unit='largefile',
341 345 total=len(files))
342 346 source = lfutil.findfile(rsrc, hash)
343 347 if not source:
344 348 raise util.Abort(_('largefile %s missing from store'
345 349 ' (needs to be uploaded)') % hash)
346 350 # XXX check for errors here
347 351 store.put(source, hash)
348 352 at += 1
349 353 ui.progress(_('uploading largefiles'), None)
350 354
351 355 def verifylfiles(ui, repo, all=False, contents=False):
352 356 '''Verify that every big file revision in the current changeset
353 357 exists in the central store. With --contents, also verify that
354 358 the contents of each big file revision are correct (SHA-1 hash
355 359 matches the revision ID). With --all, check every changeset in
356 360 this repository.'''
357 361 if all:
358 362 # Pass a list to the function rather than an iterator because we know a
359 363 # list will work.
360 364 revs = range(len(repo))
361 365 else:
362 366 revs = ['.']
363 367
364 368 store = basestore._openstore(repo)
365 369 return store.verify(revs, contents=contents)
366 370
367 371 def cachelfiles(ui, repo, node):
368 372 '''cachelfiles ensures that all largefiles needed by the specified revision
369 373 are present in the repository's largefile cache.
370 374
371 375 returns a tuple (cached, missing). cached is the list of files downloaded
372 376 by this operation; missing is the list of files that were needed but could
373 377 not be found.'''
374 378 lfiles = lfutil.listlfiles(repo, node)
375 379 toget = []
376 380
377 381 for lfile in lfiles:
378 382 # If we are mid-merge, then we have to trust the standin that is in the
379 383 # working copy to have the correct hashvalue. This is because the
380 384 # original hg.merge() already updated the standin as part of the normal
381 385 # merge process -- we just have to udpate the largefile to match.
382 386 if (getattr(repo, "_ismerging", False) and
383 387 os.path.exists(repo.wjoin(lfutil.standin(lfile)))):
384 388 expectedhash = lfutil.readstandin(repo, lfile)
385 389 else:
386 390 expectedhash = repo[node][lfutil.standin(lfile)].data().strip()
387 391
388 392 # if it exists and its hash matches, it might have been locally
389 393 # modified before updating and the user chose 'local'. in this case,
390 394 # it will not be in any store, so don't look for it.
391 395 if ((not os.path.exists(repo.wjoin(lfile)) or
392 396 expectedhash != lfutil.hashfile(repo.wjoin(lfile))) and
393 397 not lfutil.findfile(repo, expectedhash)):
394 398 toget.append((lfile, expectedhash))
395 399
396 400 if toget:
397 401 store = basestore._openstore(repo)
398 402 ret = store.get(toget)
399 403 return ret
400 404
401 405 return ([], [])
402 406
403 407 def updatelfiles(ui, repo, filelist=None, printmessage=True):
404 408 wlock = repo.wlock()
405 409 try:
406 410 lfdirstate = lfutil.openlfdirstate(ui, repo)
407 411 lfiles = set(lfutil.listlfiles(repo)) | set(lfdirstate)
408 412
409 413 if filelist is not None:
410 414 lfiles = [f for f in lfiles if f in filelist]
411 415
412 416 printed = False
413 417 if printmessage and lfiles:
414 418 ui.status(_('getting changed largefiles\n'))
415 419 printed = True
416 420 cachelfiles(ui, repo, '.')
417 421
418 422 updated, removed = 0, 0
419 423 for i in map(lambda f: _updatelfile(repo, lfdirstate, f), lfiles):
420 424 # increment the appropriate counter according to _updatelfile's
421 425 # return value
422 426 updated += i > 0 and i or 0
423 427 removed -= i < 0 and i or 0
424 428 if printmessage and (removed or updated) and not printed:
425 429 ui.status(_('getting changed largefiles\n'))
426 430 printed = True
427 431
428 432 lfdirstate.write()
429 433 if printed and printmessage:
430 434 ui.status(_('%d largefiles updated, %d removed\n') % (updated,
431 435 removed))
432 436 finally:
433 437 wlock.release()
434 438
435 439 def _updatelfile(repo, lfdirstate, lfile):
436 440 '''updates a single largefile and copies the state of its standin from
437 441 the repository's dirstate to its state in the lfdirstate.
438 442
439 443 returns 1 if the file was modified, -1 if the file was removed, 0 if the
440 444 file was unchanged, and None if the needed largefile was missing from the
441 445 cache.'''
442 446 ret = 0
443 447 abslfile = repo.wjoin(lfile)
444 448 absstandin = repo.wjoin(lfutil.standin(lfile))
445 449 if os.path.exists(absstandin):
446 450 if os.path.exists(absstandin+'.orig'):
447 451 shutil.copyfile(abslfile, abslfile+'.orig')
448 452 expecthash = lfutil.readstandin(repo, lfile)
449 453 if (expecthash != '' and
450 454 (not os.path.exists(abslfile) or
451 455 expecthash != lfutil.hashfile(abslfile))):
452 456 if not lfutil.copyfromcache(repo, expecthash, lfile):
453 457 # use normallookup() to allocate entry in largefiles dirstate,
454 458 # because lack of it misleads lfilesrepo.status() into
455 459 # recognition that such cache missing files are REMOVED.
456 460 lfdirstate.normallookup(lfile)
457 461 return None # don't try to set the mode
458 462 ret = 1
459 463 mode = os.stat(absstandin).st_mode
460 464 if mode != os.stat(abslfile).st_mode:
461 465 os.chmod(abslfile, mode)
462 466 ret = 1
463 467 else:
464 468 # Remove lfiles for which the standin is deleted, unless the
465 469 # lfile is added to the repository again. This happens when a
466 470 # largefile is converted back to a normal file: the standin
467 471 # disappears, but a new (normal) file appears as the lfile.
468 472 if os.path.exists(abslfile) and lfile not in repo[None]:
469 473 util.unlinkpath(abslfile)
470 474 ret = -1
471 475 state = repo.dirstate[lfutil.standin(lfile)]
472 476 if state == 'n':
473 477 # When rebasing, we need to synchronize the standin and the largefile,
474 478 # because otherwise the largefile will get reverted. But for commit's
475 479 # sake, we have to mark the file as unclean.
476 480 if getattr(repo, "_isrebasing", False):
477 481 lfdirstate.normallookup(lfile)
478 482 else:
479 483 lfdirstate.normal(lfile)
480 484 elif state == 'r':
481 485 lfdirstate.remove(lfile)
482 486 elif state == 'a':
483 487 lfdirstate.add(lfile)
484 488 elif state == '?':
485 489 lfdirstate.drop(lfile)
486 490 return ret
487 491
488 492 def catlfile(repo, lfile, rev, filename):
489 493 hash = lfutil.readstandin(repo, lfile, rev)
490 494 if not lfutil.inusercache(repo.ui, hash):
491 495 store = basestore._openstore(repo)
492 496 success, missing = store.get([(lfile, hash)])
493 497 if len(success) != 1:
494 498 raise util.Abort(
495 499 _('largefile %s is not in cache and could not be downloaded')
496 500 % lfile)
497 501 path = lfutil.usercachepath(repo.ui, hash)
498 502 fpout = cmdutil.makefileobj(repo, filename)
499 503 fpin = open(path, "rb")
500 504 fpout.write(fpin.read())
501 505 fpout.close()
502 506 fpin.close()
503 507 return 0
504 508
505 509 # -- hg commands declarations ------------------------------------------------
506 510
507 511 cmdtable = {
508 512 'lfconvert': (lfconvert,
509 513 [('s', 'size', '',
510 514 _('minimum size (MB) for files to be converted '
511 515 'as largefiles'),
512 516 'SIZE'),
513 517 ('', 'to-normal', False,
514 518 _('convert from a largefiles repo to a normal repo')),
515 519 ],
516 520 _('hg lfconvert SOURCE DEST [FILE ...]')),
517 521 }
General Comments 0
You need to be logged in to leave comments. Login now