relink.py
213 lines
| 6.6 KiB
| text/x-python
|
PythonLexer
/ hgext / relink.py
Jesse Glick
|
r9729 | # Mercurial extension to provide 'hg relink' command | ||
# | ||||
# Copyright (C) 2007 Brendan Cully <brendan@kublai.com> | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Jesse Glick
|
r9729 | |||
"""recreates hardlinks between repository clones""" | ||||
Matt Harbison
|
r52756 | from __future__ import annotations | ||
timeless
|
r28380 | import os | ||
import stat | ||||
Yuya Nishihara
|
r29205 | from mercurial.i18n import _ | ||
timeless
|
r28380 | from mercurial import ( | ||
error, | ||||
hg, | ||||
Yuya Nishihara
|
r32337 | registrar, | ||
timeless
|
r28380 | util, | ||
) | ||||
r47704 | from mercurial.utils import ( | |||
stringutil, | ||||
urlutil, | ||||
) | ||||
Jesse Glick
|
r9729 | |||
Gregory Szorc
|
r21252 | cmdtable = {} | ||
Yuya Nishihara
|
r32337 | command = registrar.command(cmdtable) | ||
Augie Fackler
|
r29841 | # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for | ||
Augie Fackler
|
r25186 | # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should | ||
# be specifying the version(s) of Mercurial they are tested with, or | ||||
# leave the attribute unspecified. | ||||
Augie Fackler
|
r43347 | testedwith = b'ships-with-hg-core' | ||
Augie Fackler
|
r16743 | |||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r43347 | @command( | ||
b'relink', [], _(b'[ORIGIN]'), helpcategory=command.CATEGORY_MAINTENANCE | ||||
) | ||||
Jesse Glick
|
r9729 | def relink(ui, repo, origin=None, **opts): | ||
"""recreate hardlinks between two repositories | ||||
Martin Geisler
|
r9886 | When repositories are cloned locally, their data files will be | ||
hardlinked so that they only use the space of a single repository. | ||||
Jesse Glick
|
r9729 | |||
Martin Geisler
|
r9886 | Unfortunately, subsequent pulls into either repository will break | ||
hardlinks for any files touched by the new changesets, even if | ||||
both repositories end up pulling the same changes. | ||||
Jesse Glick
|
r9729 | |||
Martin Geisler
|
r9886 | Similarly, passing --rev to "hg clone" will fail to use any | ||
hardlinks, falling back to a complete copy of the source | ||||
repository. | ||||
Jesse Glick
|
r9729 | |||
Martin Geisler
|
r9886 | This command lets you recreate those hardlinks and reclaim that | ||
wasted space. | ||||
Jesse Glick
|
r9729 | |||
Martin Geisler
|
r9886 | This repository will be relinked to share space with ORIGIN, which | ||
must be on the same local disk. If ORIGIN is omitted, looks for | ||||
"default-relink", then "default", in [paths]. | ||||
Jesse Glick
|
r9729 | |||
Martin Geisler
|
r9886 | Do not attempt any read operations on this repository while the | ||
command is running. (Both repositories will be locked against | ||||
writes.) | ||||
Jesse Glick
|
r9729 | """ | ||
r51821 | if not hasattr(util, 'samefile') or not hasattr(util, 'samedevice'): | |||
Augie Fackler
|
r43347 | raise error.Abort(_(b'hardlinks are not supported on this system')) | ||
r47704 | ||||
if origin is None and b'default-relink' in ui.paths: | ||||
origin = b'default-relink' | ||||
r50625 | path = urlutil.get_unique_pull_path_obj(b'relink', ui, origin) | |||
src = hg.repository(repo.baseui, path.loc) | ||||
Augie Fackler
|
r43347 | ui.status(_(b'relinking %s to %s\n') % (src.store.path, repo.store.path)) | ||
Martin Geisler
|
r13657 | if repo.root == src.root: | ||
Augie Fackler
|
r43347 | ui.status(_(b'there is nothing to relink\n')) | ||
Martin Geisler
|
r13657 | return | ||
Simon Heimberg
|
r20083 | if not util.samedevice(src.store.path, repo.store.path): | ||
# No point in continuing | ||||
Augie Fackler
|
r43347 | raise error.Abort(_(b'source and destination are on different devices')) | ||
Simon Heimberg
|
r20083 | |||
Matt Harbison
|
r38461 | with repo.lock(), src.lock(): | ||
candidates = sorted(collect(src, ui)) | ||||
targets = prune(candidates, src.store.path, repo.store.path, ui) | ||||
do_relink(src.store.path, repo.store.path, targets, ui) | ||||
Jesse Glick
|
r9729 | |||
Augie Fackler
|
r43346 | |||
Jesse Glick
|
r9729 | def collect(src, ui): | ||
seplen = len(os.path.sep) | ||||
candidates = [] | ||||
Augie Fackler
|
r43347 | live = len(src[b'tip'].manifest()) | ||
timeless
|
r11355 | # Your average repository has some files which were deleted before | ||
# the tip revision. We account for that by assuming that there are | ||||
# 3 tracked files for every 2 live files as of the tip version of | ||||
# the repository. | ||||
# | ||||
# mozilla-central as of 2010-06-10 had a ratio of just over 7:5. | ||||
total = live * 3 // 2 | ||||
src = src.store.path | ||||
Augie Fackler
|
r43347 | progress = ui.makeprogress(_(b'collecting'), unit=_(b'files'), total=total) | ||
timeless
|
r11355 | pos = 0 | ||
Augie Fackler
|
r43346 | ui.status( | ||
Augie Fackler
|
r43347 | _(b"tip has %d files, estimated total number of files: %d\n") | ||
Augie Fackler
|
r43346 | % (live, total) | ||
) | ||||
Jesse Glick
|
r9729 | for dirpath, dirnames, filenames in os.walk(src): | ||
Martin Geisler
|
r11357 | dirnames.sort() | ||
Augie Fackler
|
r43346 | relpath = dirpath[len(src) + seplen :] | ||
Martin Geisler
|
r11357 | for filename in sorted(filenames): | ||
Augie Fackler
|
r43347 | if filename[-2:] not in (b'.d', b'.i'): | ||
Jesse Glick
|
r9729 | continue | ||
st = os.stat(os.path.join(dirpath, filename)) | ||||
if not stat.S_ISREG(st.st_mode): | ||||
continue | ||||
timeless
|
r11355 | pos += 1 | ||
Jesse Glick
|
r9729 | candidates.append((os.path.join(relpath, filename), st)) | ||
Martin von Zweigbergk
|
r38423 | progress.update(pos, item=filename) | ||
Jesse Glick
|
r9729 | |||
Martin von Zweigbergk
|
r38423 | progress.complete() | ||
Augie Fackler
|
r43347 | ui.status(_(b'collected %d candidate storage files\n') % len(candidates)) | ||
Jesse Glick
|
r9729 | return candidates | ||
Augie Fackler
|
r43346 | |||
Siddharth Agarwal
|
r10218 | def prune(candidates, src, dst, ui): | ||
def linkfilter(src, dst, st): | ||||
Jesse Glick
|
r9729 | try: | ||
ts = os.stat(dst) | ||||
except OSError: | ||||
# Destination doesn't have this file? | ||||
return False | ||||
Siddharth Agarwal
|
r10218 | if util.samefile(src, dst): | ||
Jesse Glick
|
r9729 | return False | ||
Siddharth Agarwal
|
r10218 | if not util.samedevice(src, dst): | ||
Jesse Glick
|
r9729 | # No point in continuing | ||
Pierre-Yves David
|
r26587 | raise error.Abort( | ||
Augie Fackler
|
r43347 | _(b'source and destination are on different devices') | ||
Augie Fackler
|
r43346 | ) | ||
Jesse Glick
|
r9729 | if st.st_size != ts.st_size: | ||
return False | ||||
return st | ||||
targets = [] | ||||
Augie Fackler
|
r43346 | progress = ui.makeprogress( | ||
Augie Fackler
|
r43347 | _(b'pruning'), unit=_(b'files'), total=len(candidates) | ||
Augie Fackler
|
r43346 | ) | ||
timeless
|
r11354 | pos = 0 | ||
Jesse Glick
|
r9729 | for fn, st in candidates: | ||
timeless
|
r11354 | pos += 1 | ||
Siddharth Agarwal
|
r10218 | srcpath = os.path.join(src, fn) | ||
Jesse Glick
|
r9729 | tgt = os.path.join(dst, fn) | ||
Siddharth Agarwal
|
r10218 | ts = linkfilter(srcpath, tgt, st) | ||
Jesse Glick
|
r9729 | if not ts: | ||
Augie Fackler
|
r43347 | ui.debug(b'not linkable: %s\n' % fn) | ||
Jesse Glick
|
r9729 | continue | ||
targets.append((fn, ts.st_size)) | ||||
Martin von Zweigbergk
|
r38423 | progress.update(pos, item=fn) | ||
Jesse Glick
|
r9729 | |||
Martin von Zweigbergk
|
r38423 | progress.complete() | ||
Augie Fackler
|
r43347 | ui.status( | ||
_(b'pruned down to %d probably relinkable files\n') % len(targets) | ||||
) | ||||
Jesse Glick
|
r9729 | return targets | ||
Augie Fackler
|
r43346 | |||
Jesse Glick
|
r9729 | def do_relink(src, dst, files, ui): | ||
def relinkfile(src, dst): | ||||
Augie Fackler
|
r43347 | bak = dst + b'.bak' | ||
Jesse Glick
|
r9729 | os.rename(dst, bak) | ||
try: | ||||
Adrian Buehlmann
|
r14235 | util.oslink(src, dst) | ||
Jesse Glick
|
r9729 | except OSError: | ||
os.rename(bak, dst) | ||||
raise | ||||
os.remove(bak) | ||||
CHUNKLEN = 65536 | ||||
relinked = 0 | ||||
savedbytes = 0 | ||||
Augie Fackler
|
r43346 | progress = ui.makeprogress( | ||
Augie Fackler
|
r43347 | _(b'relinking'), unit=_(b'files'), total=len(files) | ||
Augie Fackler
|
r43346 | ) | ||
Jesse Glick
|
r9729 | pos = 0 | ||
for f, sz in files: | ||||
pos += 1 | ||||
source = os.path.join(src, f) | ||||
tgt = os.path.join(dst, f) | ||||
Siddharth Agarwal
|
r10218 | # Binary mode, so that read() works correctly, especially on Windows | ||
Matt Harbison
|
r53281 | sfp = open(source, 'rb') | ||
dfp = open(tgt, 'rb') | ||||
Jesse Glick
|
r9729 | sin = sfp.read(CHUNKLEN) | ||
while sin: | ||||
din = dfp.read(CHUNKLEN) | ||||
if sin != din: | ||||
break | ||||
sin = sfp.read(CHUNKLEN) | ||||
Siddharth Agarwal
|
r10218 | sfp.close() | ||
dfp.close() | ||||
Jesse Glick
|
r9729 | if sin: | ||
Augie Fackler
|
r43347 | ui.debug(b'not linkable: %s\n' % f) | ||
Jesse Glick
|
r9729 | continue | ||
try: | ||||
relinkfile(source, tgt) | ||||
Martin von Zweigbergk
|
r38423 | progress.update(pos, item=f) | ||
Jesse Glick
|
r9729 | relinked += 1 | ||
savedbytes += sz | ||||
Gregory Szorc
|
r25660 | except OSError as inst: | ||
Augie Fackler
|
r43347 | ui.warn(b'%s: %s\n' % (tgt, stringutil.forcebytestr(inst))) | ||
Jesse Glick
|
r9729 | |||
Martin von Zweigbergk
|
r38423 | progress.complete() | ||
Augie Fackler
|
r10424 | |||
Augie Fackler
|
r43346 | ui.status( | ||
Augie Fackler
|
r43347 | _(b'relinked %d files (%s reclaimed)\n') | ||
Augie Fackler
|
r43346 | % (relinked, util.bytecount(savedbytes)) | ||
) | ||||