# HG changeset patch # User Augie Fackler # Date 2013-11-06 23:19:04 # Node ID f962870712da97d317656ccd9b1a93148fa8251a # Parent 175c6fd8cacc46ec69079e62bdcf611c779c67ea pathutil: tease out a new library to break an import cycle from canonpath use diff --git a/hgext/keyword.py b/hgext/keyword.py --- a/hgext/keyword.py +++ b/hgext/keyword.py @@ -84,7 +84,7 @@ like CVS' $Log$, are not supported. A ke from mercurial import commands, context, cmdutil, dispatch, filelog, extensions from mercurial import localrepo, match, patch, templatefilters, templater, util -from mercurial import scmutil +from mercurial import scmutil, pathutil from mercurial.hgweb import webcommands from mercurial.i18n import _ import os, re, shutil, tempfile @@ -673,7 +673,7 @@ def reposetup(ui, repo): expansion. ''' source = repo.dirstate.copied(dest) if 'l' in wctx.flags(source): - source = scmutil.canonpath(repo.root, cwd, + source = pathutil.canonpath(repo.root, cwd, os.path.realpath(source)) return kwt.match(source) diff --git a/hgext/largefiles/overrides.py b/hgext/largefiles/overrides.py --- a/hgext/largefiles/overrides.py +++ b/hgext/largefiles/overrides.py @@ -12,7 +12,7 @@ import os import copy from mercurial import hg, commands, util, cmdutil, scmutil, match as match_, \ - node, archival, error, merge, discovery + node, archival, error, merge, discovery, pathutil from mercurial.i18n import _ from mercurial.node import hex from hgext import rebase @@ -469,7 +469,7 @@ def overridecopy(orig, ui, repo, pats, o return orig(ui, repo, pats, opts, rename) def makestandin(relpath): - path = scmutil.canonpath(repo.root, repo.getcwd(), relpath) + path = pathutil.canonpath(repo.root, repo.getcwd(), relpath) return os.path.join(repo.wjoin(lfutil.standin(path))) fullpats = scmutil.expandpats(pats) diff --git a/mercurial/cmdutil.py b/mercurial/cmdutil.py --- a/mercurial/cmdutil.py +++ b/mercurial/cmdutil.py @@ -10,7 +10,7 @@ from i18n import _ import os, sys, errno, re, tempfile import util, scmutil, templater, patch, error, templatekw, revlog, copies import match as matchmod -import subrepo, context, repair, graphmod, revset, phases, obsolete +import subrepo, context, repair, graphmod, revset, phases, obsolete, pathutil import changelog import bookmarks import lock as lockmod @@ -274,7 +274,7 @@ def copy(ui, repo, pats, opts, rename=Fa # relsrc: ossep # otarget: ossep def copyfile(abssrc, relsrc, otarget, exact): - abstarget = scmutil.canonpath(repo.root, cwd, otarget) + abstarget = pathutil.canonpath(repo.root, cwd, otarget) if '/' in abstarget: # We cannot normalize abstarget itself, this would prevent # case only renames, like a => A. @@ -367,7 +367,7 @@ def copy(ui, repo, pats, opts, rename=Fa # return: function that takes hgsep and returns ossep def targetpathfn(pat, dest, srcs): if os.path.isdir(pat): - abspfx = scmutil.canonpath(repo.root, cwd, pat) + abspfx = pathutil.canonpath(repo.root, cwd, pat) abspfx = util.localpath(abspfx) if destdirexists: striplen = len(os.path.split(abspfx)[0]) @@ -393,7 +393,7 @@ def copy(ui, repo, pats, opts, rename=Fa res = lambda p: os.path.join(dest, os.path.basename(util.localpath(p))) else: - abspfx = scmutil.canonpath(repo.root, cwd, pat) + abspfx = pathutil.canonpath(repo.root, cwd, pat) if len(abspfx) < len(srcs[0][0]): # A directory. Either the target path contains the last # component of the source path or it does not. @@ -2065,7 +2065,7 @@ def revert(ui, repo, ctx, parents, *pats fc = ctx[f] repo.wwrite(f, fc.data(), fc.flags()) - audit_path = scmutil.pathauditor(repo.root) + audit_path = pathutil.pathauditor(repo.root) for f in remove[0]: if repo.dirstate[f] == 'a': repo.dirstate.drop(f) diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py --- a/mercurial/dirstate.py +++ b/mercurial/dirstate.py @@ -8,7 +8,7 @@ import errno from node import nullid from i18n import _ -import scmutil, util, ignore, osutil, parsers, encoding +import scmutil, util, ignore, osutil, parsers, encoding, pathutil import os, stat, errno, gc propertycache = util.propertycache @@ -736,7 +736,7 @@ class dirstate(object): # unknown == True means we walked the full directory tree above. # So if a file is not seen it was either a) not matching matchfn # b) ignored, c) missing, or d) under a symlink directory. - audit_path = scmutil.pathauditor(self._root) + audit_path = pathutil.pathauditor(self._root) for nf in iter(visit): # Report ignored items in the dmap as long as they are not diff --git a/mercurial/hgweb/webutil.py b/mercurial/hgweb/webutil.py --- a/mercurial/hgweb/webutil.py +++ b/mercurial/hgweb/webutil.py @@ -7,7 +7,7 @@ # GNU General Public License version 2 or any later version. import os, copy -from mercurial import match, patch, scmutil, error, ui, util +from mercurial import match, patch, error, ui, util, pathutil from mercurial.i18n import _ from mercurial.node import hex, nullid from common import ErrorResponse @@ -196,7 +196,7 @@ def showbookmark(repo, tmpl, t1, node=nu def cleanpath(repo, path): path = path.lstrip('/') - return scmutil.canonpath(repo.root, '', path) + return pathutil.canonpath(repo.root, '', path) def changeidctx (repo, changeid): try: diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py --- a/mercurial/localrepo.py +++ b/mercurial/localrepo.py @@ -15,7 +15,7 @@ import merge as mergemod import tags as tagsmod from lock import release import weakref, errno, os, time, inspect -import branchmap +import branchmap, pathutil propertycache = util.propertycache filecache = scmutil.filecache @@ -166,7 +166,7 @@ class localrepository(object): self.root = self.wvfs.base self.path = self.wvfs.join(".hg") self.origroot = path - self.auditor = scmutil.pathauditor(self.root, self._checknested) + self.auditor = pathutil.pathauditor(self.root, self._checknested) self.vfs = scmutil.vfs(self.path) self.opener = self.vfs self.baseui = baseui diff --git a/mercurial/match.py b/mercurial/match.py --- a/mercurial/match.py +++ b/mercurial/match.py @@ -6,7 +6,7 @@ # GNU General Public License version 2 or any later version. import re -import scmutil, util, fileset +import util, fileset, pathutil from i18n import _ def _rematcher(pat): @@ -317,7 +317,7 @@ def _normalize(names, default, root, cwd pats = [] for kind, name in [_patsplit(p, default) for p in names]: if kind in ('glob', 'relpath'): - name = scmutil.canonpath(root, cwd, name, auditor) + name = pathutil.canonpath(root, cwd, name, auditor) elif kind in ('relglob', 'path'): name = util.normpath(name) elif kind in ('listfile', 'listfile0'): diff --git a/mercurial/pathutil.py b/mercurial/pathutil.py new file mode 100644 --- /dev/null +++ b/mercurial/pathutil.py @@ -0,0 +1,144 @@ +import os, errno, stat + +import util +from i18n import _ + +class pathauditor(object): + '''ensure that a filesystem path contains no banned components. + the following properties of a path are checked: + + - ends with a directory separator + - under top-level .hg + - starts at the root of a windows drive + - contains ".." + - traverses a symlink (e.g. a/symlink_here/b) + - inside a nested repository (a callback can be used to approve + some nested repositories, e.g., subrepositories) + ''' + + def __init__(self, root, callback=None): + self.audited = set() + self.auditeddir = set() + self.root = root + self.callback = callback + if os.path.lexists(root) and not util.checkcase(root): + self.normcase = util.normcase + else: + self.normcase = lambda x: x + + def __call__(self, path): + '''Check the relative path. + path may contain a pattern (e.g. foodir/**.txt)''' + + path = util.localpath(path) + normpath = self.normcase(path) + if normpath in self.audited: + return + # AIX ignores "/" at end of path, others raise EISDIR. + if util.endswithsep(path): + raise util.Abort(_("path ends in directory separator: %s") % path) + parts = util.splitpath(path) + if (os.path.splitdrive(path)[0] + or parts[0].lower() in ('.hg', '.hg.', '') + or os.pardir in parts): + raise util.Abort(_("path contains illegal component: %s") % path) + if '.hg' in path.lower(): + lparts = [p.lower() for p in parts] + for p in '.hg', '.hg.': + if p in lparts[1:]: + pos = lparts.index(p) + base = os.path.join(*parts[:pos]) + raise util.Abort(_("path '%s' is inside nested repo %r") + % (path, base)) + + normparts = util.splitpath(normpath) + assert len(parts) == len(normparts) + + parts.pop() + normparts.pop() + prefixes = [] + while parts: + prefix = os.sep.join(parts) + normprefix = os.sep.join(normparts) + if normprefix in self.auditeddir: + break + curpath = os.path.join(self.root, prefix) + try: + st = os.lstat(curpath) + except OSError, err: + # EINVAL can be raised as invalid path syntax under win32. + # They must be ignored for patterns can be checked too. + if err.errno not in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL): + raise + else: + if stat.S_ISLNK(st.st_mode): + raise util.Abort( + _('path %r traverses symbolic link %r') + % (path, prefix)) + elif (stat.S_ISDIR(st.st_mode) and + os.path.isdir(os.path.join(curpath, '.hg'))): + if not self.callback or not self.callback(curpath): + raise util.Abort(_("path '%s' is inside nested " + "repo %r") + % (path, prefix)) + prefixes.append(normprefix) + parts.pop() + normparts.pop() + + self.audited.add(normpath) + # only add prefixes to the cache after checking everything: we don't + # want to add "foo/bar/baz" before checking if there's a "foo/.hg" + self.auditeddir.update(prefixes) + + def check(self, path): + try: + self(path) + return True + except (OSError, util.Abort): + return False + +def canonpath(root, cwd, myname, auditor=None): + '''return the canonical path of myname, given cwd and root''' + if util.endswithsep(root): + rootsep = root + else: + rootsep = root + os.sep + name = myname + if not os.path.isabs(name): + name = os.path.join(root, cwd, name) + name = os.path.normpath(name) + if auditor is None: + auditor = pathauditor(root) + if name != rootsep and name.startswith(rootsep): + name = name[len(rootsep):] + auditor(name) + return util.pconvert(name) + elif name == root: + return '' + else: + # Determine whether `name' is in the hierarchy at or beneath `root', + # by iterating name=dirname(name) until that causes no change (can't + # check name == '/', because that doesn't work on windows). The list + # `rel' holds the reversed list of components making up the relative + # file name we want. + rel = [] + while True: + try: + s = util.samefile(name, root) + except OSError: + s = False + if s: + if not rel: + # name was actually the same as root (maybe a symlink) + return '' + rel.reverse() + name = os.path.join(*rel) + auditor(name) + return util.pconvert(name) + dirname, basename = util.split(name) + rel.append(basename) + if dirname == name: + break + name = dirname + + raise util.Abort(_("%s not under root '%s'") % (myname, root)) diff --git a/mercurial/scmutil.py b/mercurial/scmutil.py --- a/mercurial/scmutil.py +++ b/mercurial/scmutil.py @@ -8,8 +8,9 @@ from i18n import _ from mercurial.node import nullrev import util, error, osutil, revset, similar, encoding, phases, parsers +import pathutil import match as matchmod -import os, errno, re, stat, glob +import os, errno, re, glob if os.name == 'nt': import scmwindows as scmplatform @@ -108,100 +109,6 @@ class casecollisionauditor(object): self._loweredfiles.add(fl) self._newfiles.add(f) -class pathauditor(object): - '''ensure that a filesystem path contains no banned components. - the following properties of a path are checked: - - - ends with a directory separator - - under top-level .hg - - starts at the root of a windows drive - - contains ".." - - traverses a symlink (e.g. a/symlink_here/b) - - inside a nested repository (a callback can be used to approve - some nested repositories, e.g., subrepositories) - ''' - - def __init__(self, root, callback=None): - self.audited = set() - self.auditeddir = set() - self.root = root - self.callback = callback - if os.path.lexists(root) and not util.checkcase(root): - self.normcase = util.normcase - else: - self.normcase = lambda x: x - - def __call__(self, path): - '''Check the relative path. - path may contain a pattern (e.g. foodir/**.txt)''' - - path = util.localpath(path) - normpath = self.normcase(path) - if normpath in self.audited: - return - # AIX ignores "/" at end of path, others raise EISDIR. - if util.endswithsep(path): - raise util.Abort(_("path ends in directory separator: %s") % path) - parts = util.splitpath(path) - if (os.path.splitdrive(path)[0] - or parts[0].lower() in ('.hg', '.hg.', '') - or os.pardir in parts): - raise util.Abort(_("path contains illegal component: %s") % path) - if '.hg' in path.lower(): - lparts = [p.lower() for p in parts] - for p in '.hg', '.hg.': - if p in lparts[1:]: - pos = lparts.index(p) - base = os.path.join(*parts[:pos]) - raise util.Abort(_("path '%s' is inside nested repo %r") - % (path, base)) - - normparts = util.splitpath(normpath) - assert len(parts) == len(normparts) - - parts.pop() - normparts.pop() - prefixes = [] - while parts: - prefix = os.sep.join(parts) - normprefix = os.sep.join(normparts) - if normprefix in self.auditeddir: - break - curpath = os.path.join(self.root, prefix) - try: - st = os.lstat(curpath) - except OSError, err: - # EINVAL can be raised as invalid path syntax under win32. - # They must be ignored for patterns can be checked too. - if err.errno not in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL): - raise - else: - if stat.S_ISLNK(st.st_mode): - raise util.Abort( - _('path %r traverses symbolic link %r') - % (path, prefix)) - elif (stat.S_ISDIR(st.st_mode) and - os.path.isdir(os.path.join(curpath, '.hg'))): - if not self.callback or not self.callback(curpath): - raise util.Abort(_("path '%s' is inside nested " - "repo %r") - % (path, prefix)) - prefixes.append(normprefix) - parts.pop() - normparts.pop() - - self.audited.add(normpath) - # only add prefixes to the cache after checking everything: we don't - # want to add "foo/bar/baz" before checking if there's a "foo/.hg" - self.auditeddir.update(prefixes) - - def check(self, path): - try: - self(path) - return True - except (OSError, util.Abort): - return False - class abstractvfs(object): """Abstract base class; cannot be instantiated""" @@ -310,7 +217,7 @@ class vfs(abstractvfs): def _setmustaudit(self, onoff): self._audit = onoff if onoff: - self.audit = pathauditor(self.base) + self.audit = pathutil.pathauditor(self.base) else: self.audit = util.always @@ -445,52 +352,6 @@ class readonlyvfs(abstractvfs, auditvfs) return self.vfs(path, mode, *args, **kw) -def canonpath(root, cwd, myname, auditor=None): - '''return the canonical path of myname, given cwd and root''' - if util.endswithsep(root): - rootsep = root - else: - rootsep = root + os.sep - name = myname - if not os.path.isabs(name): - name = os.path.join(root, cwd, name) - name = os.path.normpath(name) - if auditor is None: - auditor = pathauditor(root) - if name != rootsep and name.startswith(rootsep): - name = name[len(rootsep):] - auditor(name) - return util.pconvert(name) - elif name == root: - return '' - else: - # Determine whether `name' is in the hierarchy at or beneath `root', - # by iterating name=dirname(name) until that causes no change (can't - # check name == '/', because that doesn't work on windows). The list - # `rel' holds the reversed list of components making up the relative - # file name we want. - rel = [] - while True: - try: - s = util.samefile(name, root) - except OSError: - s = False - if s: - if not rel: - # name was actually the same as root (maybe a symlink) - return '' - rel.reverse() - name = os.path.join(*rel) - auditor(name) - return util.pconvert(name) - dirname, basename = util.split(name) - rel.append(basename) - if dirname == name: - break - name = dirname - - raise util.Abort(_("%s not under root '%s'") % (myname, root)) - def walkrepos(path, followsym=False, seen_dirs=None, recurse=False): '''yield every hg repository under path, always recursively. The recurse flag will only control recursion into repo working dirs''' @@ -768,7 +629,7 @@ def _interestingfiles(repo, matcher): This is different from dirstate.status because it doesn't care about whether files are modified or clean.''' added, unknown, deleted, removed = [], [], [], [] - audit_path = pathauditor(repo.root) + audit_path = pathutil.pathauditor(repo.root) ctx = repo[None] dirstate = repo.dirstate diff --git a/mercurial/subrepo.py b/mercurial/subrepo.py --- a/mercurial/subrepo.py +++ b/mercurial/subrepo.py @@ -9,7 +9,8 @@ import errno, os, re, shutil, posixpath, import xml.dom.minidom import stat, subprocess, tarfile from i18n import _ -import config, scmutil, util, node, error, cmdutil, bookmarks, match as matchmod +import config, util, node, error, cmdutil, bookmarks, match as matchmod +import pathutil hg = None propertycache = util.propertycache @@ -332,7 +333,7 @@ def subrepo(ctx, path): import hg as h hg = h - scmutil.pathauditor(ctx._repo.root)(path) + pathutil.pathauditor(ctx._repo.root)(path) state = ctx.substate[path] if state[2] not in types: raise util.Abort(_('unknown subrepo type %s') % state[2])