lfutil.py
427 lines
| 14.3 KiB
| text/x-python
|
PythonLexer
various
|
r15168 | # Copyright 2009-2010 Gregory P. Ward | ||
# Copyright 2009-2010 Intelerad Medical Systems Incorporated | ||||
# Copyright 2010-2011 Fog Creek Software | ||||
# Copyright 2010-2011 Unity Technologies | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
# GNU General Public License version 2 or any later version. | ||||
'''largefiles utility code: must not import other modules in this package.''' | ||||
import os | ||||
import errno | ||||
Benjamin Pollack
|
r15320 | import platform | ||
various
|
r15168 | import shutil | ||
import stat | ||||
Na'Tosha Bard
|
r15226 | from mercurial import dirstate, httpconnection, match as match_, util, scmutil | ||
various
|
r15168 | from mercurial.i18n import _ | ||
shortname = '.hglf' | ||||
Mads Kiilerich
|
r18151 | shortnameslash = shortname + '/' | ||
various
|
r15168 | longname = 'largefiles' | ||
# -- Private worker functions ------------------------------------------ | ||||
Greg Ward
|
r15227 | def getminsize(ui, assumelfiles, opt, default=10): | ||
lfsize = opt | ||||
if not lfsize and assumelfiles: | ||||
Greg Ward
|
r15304 | lfsize = ui.config(longname, 'minsize', default=default) | ||
Greg Ward
|
r15227 | if lfsize: | ||
try: | ||||
Greg Ward
|
r15228 | lfsize = float(lfsize) | ||
Greg Ward
|
r15227 | except ValueError: | ||
Greg Ward
|
r15228 | raise util.Abort(_('largefiles: size must be number (not %s)\n') | ||
Greg Ward
|
r15227 | % lfsize) | ||
if lfsize is None: | ||||
raise util.Abort(_('minimum size for largefiles must be specified')) | ||||
return lfsize | ||||
various
|
r15168 | def link(src, dest): | ||
try: | ||||
Na'Tosha Bard
|
r15206 | util.oslink(src, dest) | ||
various
|
r15168 | except OSError: | ||
Martin Geisler
|
r15572 | # if hardlinks fail, fallback on atomic copy | ||
dst = util.atomictempfile(dest) | ||||
Matt Mackall
|
r15699 | for chunk in util.filechunkiter(open(src, 'rb')): | ||
Martin Geisler
|
r15572 | dst.write(chunk) | ||
dst.close() | ||||
various
|
r15168 | os.chmod(dest, os.stat(src).st_mode) | ||
Benjamin Pollack
|
r15316 | def usercachepath(ui, hash): | ||
Greg Ward
|
r15350 | path = ui.configpath(longname, 'usercache', None) | ||
various
|
r15168 | if path: | ||
path = os.path.join(path, hash) | ||||
else: | ||||
if os.name == 'nt': | ||||
Greg Ward
|
r15255 | appdata = os.getenv('LOCALAPPDATA', os.getenv('APPDATA')) | ||
Kevin Gessner
|
r15658 | if appdata: | ||
path = os.path.join(appdata, longname, hash) | ||||
Benjamin Pollack
|
r15320 | elif platform.system() == 'Darwin': | ||
Kevin Gessner
|
r15658 | home = os.getenv('HOME') | ||
if home: | ||||
path = os.path.join(home, 'Library', 'Caches', | ||||
longname, hash) | ||||
various
|
r15168 | elif os.name == 'posix': | ||
Benjamin Pollack
|
r15320 | path = os.getenv('XDG_CACHE_HOME') | ||
if path: | ||||
path = os.path.join(path, longname, hash) | ||||
else: | ||||
Kevin Gessner
|
r15658 | home = os.getenv('HOME') | ||
if home: | ||||
path = os.path.join(home, '.cache', longname, hash) | ||||
various
|
r15168 | else: | ||
Greg Ward
|
r15253 | raise util.Abort(_('unknown operating system: %s\n') % os.name) | ||
various
|
r15168 | return path | ||
Benjamin Pollack
|
r15316 | def inusercache(ui, hash): | ||
Kevin Gessner
|
r15658 | path = usercachepath(ui, hash) | ||
return path and os.path.exists(path) | ||||
various
|
r15168 | |||
def findfile(repo, hash): | ||||
Benjamin Pollack
|
r15316 | if instore(repo, hash): | ||
Martin Geisler
|
r16928 | repo.ui.note(_('found %s in store\n') % hash) | ||
Na'Tosha Bard
|
r15913 | return storepath(repo, hash) | ||
Benjamin Pollack
|
r15317 | elif inusercache(repo.ui, hash): | ||
Martin Geisler
|
r16928 | repo.ui.note(_('found %s in system cache\n') % hash) | ||
Hao Lian
|
r15408 | path = storepath(repo, hash) | ||
util.makedirs(os.path.dirname(path)) | ||||
link(usercachepath(repo.ui, hash), path) | ||||
Na'Tosha Bard
|
r15913 | return path | ||
return None | ||||
various
|
r15168 | |||
Na'Tosha Bard
|
r16247 | class largefilesdirstate(dirstate.dirstate): | ||
various
|
r15168 | def __getitem__(self, key): | ||
Na'Tosha Bard
|
r16247 | return super(largefilesdirstate, self).__getitem__(unixpath(key)) | ||
various
|
r15168 | def normal(self, f): | ||
Na'Tosha Bard
|
r16247 | return super(largefilesdirstate, self).normal(unixpath(f)) | ||
various
|
r15168 | def remove(self, f): | ||
Na'Tosha Bard
|
r16247 | return super(largefilesdirstate, self).remove(unixpath(f)) | ||
various
|
r15168 | def add(self, f): | ||
Na'Tosha Bard
|
r16247 | return super(largefilesdirstate, self).add(unixpath(f)) | ||
various
|
r15168 | def drop(self, f): | ||
Na'Tosha Bard
|
r16247 | return super(largefilesdirstate, self).drop(unixpath(f)) | ||
various
|
r15168 | def forget(self, f): | ||
Na'Tosha Bard
|
r16247 | return super(largefilesdirstate, self).forget(unixpath(f)) | ||
Na'Tosha Bard
|
r15793 | def normallookup(self, f): | ||
Na'Tosha Bard
|
r16247 | return super(largefilesdirstate, self).normallookup(unixpath(f)) | ||
Mads Kiilerich
|
r18148 | def _ignore(self): | ||
return False | ||||
various
|
r15168 | |||
Matt Harbison
|
r17659 | def openlfdirstate(ui, repo, create=True): | ||
various
|
r15168 | ''' | ||
Greg Ward
|
r15252 | Return a dirstate object that tracks largefiles: i.e. its root is | ||
the repo root, but it is saved in .hg/largefiles/dirstate. | ||||
various
|
r15168 | ''' | ||
Mads Kiilerich
|
r18147 | lfstoredir = repo.join(longname) | ||
opener = scmutil.opener(lfstoredir) | ||||
Na'Tosha Bard
|
r16247 | lfdirstate = largefilesdirstate(opener, ui, repo.root, | ||
Greg Ward
|
r15349 | repo.dirstate._validate) | ||
various
|
r15168 | |||
Greg Ward
|
r15252 | # If the largefiles dirstate does not exist, populate and create | ||
# it. This ensures that we create it on the first meaningful | ||||
Levi Bard
|
r15794 | # largefiles operation in a new clone. | ||
Mads Kiilerich
|
r18147 | if create and not os.path.exists(os.path.join(lfstoredir, 'dirstate')): | ||
util.makedirs(lfstoredir) | ||||
various
|
r15168 | matcher = getstandinmatcher(repo) | ||
Mads Kiilerich
|
r18154 | for standin in repo.dirstate.walk(matcher, [], False, False): | ||
various
|
r15168 | lfile = splitstandin(standin) | ||
hash = readstandin(repo, lfile) | ||||
lfdirstate.normallookup(lfile) | ||||
try: | ||||
Mads Kiilerich
|
r15553 | if hash == hashfile(repo.wjoin(lfile)): | ||
various
|
r15168 | lfdirstate.normal(lfile) | ||
Martin Geisler
|
r15548 | except OSError, err: | ||
various
|
r15168 | if err.errno != errno.ENOENT: | ||
raise | ||||
return lfdirstate | ||||
Na'Tosha Bard
|
r16247 | def lfdirstatestatus(lfdirstate, repo, rev): | ||
Levi Bard
|
r15794 | match = match_.always(repo.root, repo.getcwd()) | ||
s = lfdirstate.status(match, [], False, False, False) | ||||
unsure, modified, added, removed, missing, unknown, ignored, clean = s | ||||
for lfile in unsure: | ||||
Mads Kiilerich
|
r18299 | try: | ||
fctx = repo[rev][standin(lfile)] | ||||
except LookupError: | ||||
fctx = None | ||||
if not fctx or fctx.data().strip() != hashfile(repo.wjoin(lfile)): | ||||
Levi Bard
|
r15794 | modified.append(lfile) | ||
else: | ||||
clean.append(lfile) | ||||
lfdirstate.normal(lfile) | ||||
various
|
r15168 | return (modified, added, removed, missing, unknown, ignored, clean) | ||
def listlfiles(repo, rev=None, matcher=None): | ||||
Greg Ward
|
r15252 | '''return a list of largefiles in the working copy or the | ||
specified changeset''' | ||||
various
|
r15168 | |||
if matcher is None: | ||||
matcher = getstandinmatcher(repo) | ||||
# ignore unknown files in working directory | ||||
Greg Ward
|
r15255 | return [splitstandin(f) | ||
for f in repo[rev].walk(matcher) | ||||
various
|
r15168 | if rev is not None or repo.dirstate[f] != '?'] | ||
Benjamin Pollack
|
r15316 | def instore(repo, hash): | ||
return os.path.exists(storepath(repo, hash)) | ||||
various
|
r15168 | |||
Benjamin Pollack
|
r15316 | def storepath(repo, hash): | ||
various
|
r15168 | return repo.join(os.path.join(longname, hash)) | ||
def copyfromcache(repo, hash, filename): | ||||
Greg Ward
|
r15252 | '''Copy the specified largefile from the repo or system cache to | ||
filename in the repository. Return true on success or false if the | ||||
file was not found in either cache (which should not happened: | ||||
this is meant to be called only after ensuring that the needed | ||||
largefile exists in the cache).''' | ||||
various
|
r15168 | path = findfile(repo, hash) | ||
if path is None: | ||||
return False | ||||
util.makedirs(os.path.dirname(repo.wjoin(filename))) | ||||
Martin Geisler
|
r15570 | # The write may fail before the file is fully written, but we | ||
# don't use atomic writes in the working copy. | ||||
various
|
r15168 | shutil.copy(path, repo.wjoin(filename)) | ||
return True | ||||
Benjamin Pollack
|
r15316 | def copytostore(repo, rev, file, uploaded=False): | ||
Matt Harbison
|
r17877 | hash = readstandin(repo, file, rev) | ||
Benjamin Pollack
|
r15316 | if instore(repo, hash): | ||
various
|
r15168 | return | ||
Benjamin Pollack
|
r15316 | copytostoreabsolute(repo, repo.wjoin(file), hash) | ||
various
|
r15168 | |||
Dan Villiom Podlaski Christiansen
|
r15796 | def copyalltostore(repo, node): | ||
'''Copy all largefiles in a given revision to the store''' | ||||
ctx = repo[node] | ||||
for filename in ctx.files(): | ||||
if isstandin(filename) and filename in ctx.manifest(): | ||||
realfile = splitstandin(filename) | ||||
copytostore(repo, ctx.node(), realfile) | ||||
Benjamin Pollack
|
r15316 | def copytostoreabsolute(repo, file, hash): | ||
Hao Lian
|
r15371 | util.makedirs(os.path.dirname(storepath(repo, hash))) | ||
Benjamin Pollack
|
r15316 | if inusercache(repo.ui, hash): | ||
link(usercachepath(repo.ui, hash), storepath(repo, hash)) | ||||
Matt Harbison
|
r17878 | elif not getattr(repo, "_isconverting", False): | ||
Martin Geisler
|
r16153 | dst = util.atomictempfile(storepath(repo, hash), | ||
createmode=repo.store.createmode) | ||||
Matt Mackall
|
r15699 | for chunk in util.filechunkiter(open(file, 'rb')): | ||
Martin Geisler
|
r15571 | dst.write(chunk) | ||
dst.close() | ||||
Benjamin Pollack
|
r15316 | linktousercache(repo, hash) | ||
various
|
r15168 | |||
Benjamin Pollack
|
r15316 | def linktousercache(repo, hash): | ||
Kevin Gessner
|
r15658 | path = usercachepath(repo.ui, hash) | ||
if path: | ||||
util.makedirs(os.path.dirname(path)) | ||||
link(storepath(repo, hash), path) | ||||
various
|
r15168 | |||
def getstandinmatcher(repo, pats=[], opts={}): | ||||
'''Return a match object that applies pats to the standin directory''' | ||||
Mads Kiilerich
|
r18150 | standindir = repo.wjoin(shortname) | ||
various
|
r15168 | if pats: | ||
Mads Kiilerich
|
r18490 | pats = [os.path.join(standindir, pat) for pat in pats] | ||
Mads Kiilerich
|
r18724 | else: | ||
various
|
r15168 | # no patterns: relative to repo root | ||
pats = [standindir] | ||||
Mads Kiilerich
|
r18146 | # no warnings about missing files or directories | ||
Na'Tosha Bard
|
r15224 | match = scmutil.match(repo[None], pats, opts) | ||
Mads Kiilerich
|
r18146 | match.bad = lambda f, msg: None | ||
various
|
r15168 | return match | ||
def composestandinmatcher(repo, rmatcher): | ||||
Greg Ward
|
r15252 | '''Return a matcher that accepts standins corresponding to the | ||
files accepted by rmatcher. Pass the list of files in the matcher | ||||
as the paths specified by the user.''' | ||||
various
|
r15168 | smatcher = getstandinmatcher(repo, rmatcher.files()) | ||
isstandin = smatcher.matchfn | ||||
Na'Tosha Bard
|
r16247 | def composedmatchfn(f): | ||
various
|
r15168 | return isstandin(f) and rmatcher.matchfn(splitstandin(f)) | ||
Na'Tosha Bard
|
r16247 | smatcher.matchfn = composedmatchfn | ||
various
|
r15168 | |||
return smatcher | ||||
def standin(filename): | ||||
'''Return the repo-relative path to the standin for the specified big | ||||
file.''' | ||||
# Notes: | ||||
Mads Kiilerich
|
r17425 | # 1) Some callers want an absolute path, but for instance addlargefiles | ||
Mads Kiilerich
|
r18154 | # needs it repo-relative so it can be passed to repo[None].add(). So | ||
# leave it up to the caller to use repo.wjoin() to get an absolute path. | ||||
various
|
r15168 | # 2) Join with '/' because that's what dirstate always uses, even on | ||
# Windows. Change existing separator to '/' first in case we are | ||||
# passed filenames from an external source (like the command line). | ||||
Mads Kiilerich
|
r18151 | return shortnameslash + util.pconvert(filename) | ||
various
|
r15168 | |||
def isstandin(filename): | ||||
Greg Ward
|
r15252 | '''Return true if filename is a big file standin. filename must be | ||
in Mercurial's internal form (slash-separated).''' | ||||
Mads Kiilerich
|
r18151 | return filename.startswith(shortnameslash) | ||
various
|
r15168 | |||
def splitstandin(filename): | ||||
# Split on / because that's what dirstate always uses, even on Windows. | ||||
# Change local separator to / first just in case we are passed filenames | ||||
# from an external source (like the command line). | ||||
FUJIWARA Katsunori
|
r16066 | bits = util.pconvert(filename).split('/', 1) | ||
various
|
r15168 | if len(bits) == 2 and bits[0] == shortname: | ||
return bits[1] | ||||
else: | ||||
return None | ||||
def updatestandin(repo, standin): | ||||
file = repo.wjoin(splitstandin(standin)) | ||||
if os.path.exists(file): | ||||
hash = hashfile(file) | ||||
executable = getexecutable(file) | ||||
writestandin(repo, standin, hash, executable) | ||||
def readstandin(repo, filename, node=None): | ||||
'''read hex hash from standin for filename at given node, or working | ||||
directory if no node is given''' | ||||
return repo[node][standin(filename)].data().strip() | ||||
def writestandin(repo, standin, hash, executable): | ||||
Greg Ward
|
r15252 | '''write hash to <repo.root>/<standin>''' | ||
various
|
r15168 | writehash(hash, repo.wjoin(standin), executable) | ||
def copyandhash(instream, outfile): | ||||
'''Read bytes from instream (iterable) and write them to outfile, | ||||
computing the SHA-1 hash of the data along the way. Close outfile | ||||
when done and return the binary hash.''' | ||||
hasher = util.sha1('') | ||||
for data in instream: | ||||
hasher.update(data) | ||||
outfile.write(data) | ||||
# Blecch: closing a file that somebody else opened is rude and | ||||
Greg Ward
|
r15252 | # wrong. But it's so darn convenient and practical! After all, | ||
various
|
r15168 | # outfile was opened just to copy and hash. | ||
outfile.close() | ||||
return hasher.digest() | ||||
def hashrepofile(repo, file): | ||||
return hashfile(repo.wjoin(file)) | ||||
def hashfile(file): | ||||
if not os.path.exists(file): | ||||
return '' | ||||
hasher = util.sha1('') | ||||
fd = open(file, 'rb') | ||||
for data in blockstream(fd): | ||||
hasher.update(data) | ||||
fd.close() | ||||
return hasher.hexdigest() | ||||
class limitreader(object): | ||||
def __init__(self, f, limit): | ||||
self.f = f | ||||
self.limit = limit | ||||
def read(self, length): | ||||
if self.limit == 0: | ||||
return '' | ||||
length = length > self.limit and self.limit or length | ||||
self.limit -= length | ||||
return self.f.read(length) | ||||
def close(self): | ||||
pass | ||||
def blockstream(infile, blocksize=128 * 1024): | ||||
"""Generator that yields blocks of data from infile and closes infile.""" | ||||
while True: | ||||
data = infile.read(blocksize) | ||||
if not data: | ||||
break | ||||
yield data | ||||
Greg Ward
|
r15252 | # same blecch as copyandhash() above | ||
various
|
r15168 | infile.close() | ||
def writehash(hash, filename, executable): | ||||
util.makedirs(os.path.dirname(filename)) | ||||
Martin Geisler
|
r15574 | util.writefile(filename, hash + '\n') | ||
os.chmod(filename, getmode(executable)) | ||||
various
|
r15168 | |||
def getexecutable(filename): | ||||
mode = os.stat(filename).st_mode | ||||
Greg Ward
|
r15255 | return ((mode & stat.S_IXUSR) and | ||
(mode & stat.S_IXGRP) and | ||||
(mode & stat.S_IXOTH)) | ||||
various
|
r15168 | |||
def getmode(executable): | ||||
if executable: | ||||
return 0755 | ||||
else: | ||||
return 0644 | ||||
def urljoin(first, second, *arg): | ||||
def join(left, right): | ||||
if not left.endswith('/'): | ||||
left += '/' | ||||
if right.startswith('/'): | ||||
right = right[1:] | ||||
return left + right | ||||
url = join(first, second) | ||||
for a in arg: | ||||
url = join(url, a) | ||||
return url | ||||
def hexsha1(data): | ||||
"""hexsha1 returns the hex-encoded sha1 sum of the data in the file-like | ||||
object data""" | ||||
Thomas Arendsen Hein
|
r15347 | h = util.sha1() | ||
various
|
r15168 | for chunk in util.filechunkiter(data): | ||
h.update(chunk) | ||||
return h.hexdigest() | ||||
def httpsendfile(ui, filename): | ||||
Na'Tosha Bard
|
r15224 | return httpconnection.httpsendfile(ui, filename, 'rb') | ||
various
|
r15168 | |||
def unixpath(path): | ||||
Greg Ward
|
r15252 | '''Return a version of path normalized for use with the lfdirstate.''' | ||
FUJIWARA Katsunori
|
r16066 | return util.pconvert(os.path.normpath(path)) | ||
various
|
r15168 | |||
def islfilesrepo(repo): | ||||
Matt Harbison
|
r17659 | if ('largefiles' in repo.requirements and | ||
Mads Kiilerich
|
r18151 | util.any(shortnameslash in f[0] for f in repo.store.datafiles())): | ||
Matt Harbison
|
r17659 | return True | ||
return util.any(openlfdirstate(repo.ui, repo, False)) | ||||
various
|
r15168 | |||
Matt Mackall
|
r15333 | class storeprotonotcapable(Exception): | ||
various
|
r15168 | def __init__(self, storetypes): | ||
self.storetypes = storetypes | ||||
Na'Tosha Bard
|
r16103 | |||
def getcurrentheads(repo): | ||||
branches = repo.branchmap() | ||||
heads = [] | ||||
for branch in branches: | ||||
newheads = repo.branchheads(branch) | ||||
heads = heads + newheads | ||||
return heads | ||||
Na'Tosha Bard
|
r16120 | |||
def getstandinsstate(repo): | ||||
standins = [] | ||||
matcher = getstandinmatcher(repo) | ||||
Mads Kiilerich
|
r18154 | for standin in repo.dirstate.walk(matcher, [], False, False): | ||
Na'Tosha Bard
|
r16120 | lfile = splitstandin(standin) | ||
Mads Kiilerich
|
r18300 | try: | ||
hash = readstandin(repo, lfile) | ||||
except IOError: | ||||
hash = None | ||||
standins.append((lfile, hash)) | ||||
Na'Tosha Bard
|
r16120 | return standins | ||
Na'Tosha Bard
|
r16245 | |||
def getlfilestoupdate(oldstandins, newstandins): | ||||
changedstandins = set(oldstandins).symmetric_difference(set(newstandins)) | ||||
filelist = [] | ||||
for f in changedstandins: | ||||
if f[0] not in filelist: | ||||
filelist.append(f[0]) | ||||
return filelist | ||||