upstream/mercurial-mirror Commit - r3607:f4c9bb4a

issue352: disallow '\n' and '\r' in filenames (dirstate and manifest)

Benoit Boissinot -

r3607:f4c9bb4a default

parent child

tests/test-issue352

0 created 755 +21 0

@@ -0,0 +1,21 b''
	1	#!/bin/bash
	2	# http://www.selenic.com/mercurial/bts/issue352
	3
	4	hg init foo
	5	cd foo
	6
	7	A=`echo -e -n 'he\rllo'`
	8
	9	echo foo > "hell
	10	o"
	11	echo foo > "$A"
	12	hg add
	13	hg ci -A -m m
	14	rm "$A"
	15	ls
	16	hg add
	17	# BUG ? we don't walk on filenames with '\n' (regexp related) ?
	18	hg debugwalk
	19	hg ci -A -m m
	20
	21	exit 0

tests/test-issue352.out

0 created 644 +7 0

@@ -0,0 +1,7 b''
	1	adding he llo
	2	abort: '\n' and '\r' disallowed in filenames
	3	adding he llo
	4	abort: '\n' and '\r' disallowed in filenames
	5	hell
	6	o
	7	nothing changed

mercurial/dirstate.py

0 +6 -2

             """
             dirstate.py - working directory tracking for mercurial
             Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
             This software may be used and distributed according to the terms
             of the GNU General Public License, incorporated herein by reference.
             """
             from node import *
             from i18n import gettext as _
             from demandload import *
             demandload(globals(), "struct os time bisect stat strutil util re errno")
             class dirstate(object):
                 format = ">cllll"
                 def __init__(self, opener, ui, root):
                     self.opener = opener
                     self.root = root
                     self.dirty = 0
                     self.ui = ui
                     self.map = None
                     self.pl = None
                     self.dirs = None
                     self.copymap = {}
                     self.ignorefunc = None
                 def wjoin(self, f):
                     return os.path.join(self.root, f)
                 def getcwd(self):
                     cwd = os.getcwd()
                     if cwd == self.root: return ''
                     return cwd[len(self.root) + 1:]
                 def hgignore(self):
                     '''return the contents of .hgignore files as a list of patterns.
                     the files parsed for patterns include:
                     .hgignore in the repository root
                     any additional files specified in the [ui] section of ~/.hgrc
                     trailing white space is dropped.
                     the escape character is backslash.
                     comments start with #.
                     empty lines are skipped.
                     lines can be of the following formats:
                     syntax: regexp # defaults following lines to non-rooted regexps
                     syntax: glob   # defaults following lines to non-rooted globs
                     re:pattern     # non-rooted regular expression
                     glob:pattern   # non-rooted glob
                     pattern        # pattern of the current default type'''
                     syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:'}
                     def parselines(fp):
                         for line in fp:
                             escape = False
                             for i in xrange(len(line)):
                                 if escape: escape = False
                                 elif line[i] == '\\': escape = True
                                 elif line[i] == '#': break
                             line = line[:i].rstrip()
                             if line: yield line
                     repoignore = self.wjoin('.hgignore')
                     files = [repoignore]
                     files.extend(self.ui.hgignorefiles())
                     pats = {}
                     for f in files:
                         try:
                             pats[f] = []
                             fp = open(f)
                             syntax = 'relre:'
                             for line in parselines(fp):
                                 if line.startswith('syntax:'):
                                     s = line[7:].strip()
                                     try:
                                         syntax = syntaxes[s]
                                     except KeyError:
                                         self.ui.warn(_("%s: ignoring invalid "
                                                        "syntax '%s'\n") % (f, s))
                                     continue
                                 pat = syntax + line
                                 for s in syntaxes.values():
                                     if line.startswith(s):
                                         pat = line
                                         break
                                 pats[f].append(pat)
                         except IOError, inst:
                             if f != repoignore:
                                 self.ui.warn(_("skipping unreadable ignore file"
                                                " '%s': %s\n") % (f, inst.strerror))
                     return pats
                 def ignore(self, fn):
                     '''default match function used by dirstate and
                     localrepository.  this honours the repository .hgignore file
                     and any other files specified in the [ui] section of .hgrc.'''
                     if not self.ignorefunc:
                         ignore = self.hgignore()
                         allpats = []
                         [allpats.extend(patlist) for patlist in ignore.values()]
                         if allpats:
                             try:
                                 files, self.ignorefunc, anypats = (
                                     util.matcher(self.root, inc=allpats, src='.hgignore'))
                             except util.Abort:
                                 # Re-raise an exception where the src is the right file
                                 for f, patlist in ignore.items():
                                     files, self.ignorefunc, anypats = (
                                         util.matcher(self.root, inc=patlist, src=f))
                         else:
                             self.ignorefunc = util.never
                     return self.ignorefunc(fn)
                 def __del__(self):
                     if self.dirty:
                         self.write()
                 def __getitem__(self, key):
                     try:
                         return self.map[key]
                     except TypeError:
                         self.lazyread()
                         return self[key]
                 def __contains__(self, key):
                     self.lazyread()
                     return key in self.map
                 def parents(self):
                     self.lazyread()
                     return self.pl
                 def markdirty(self):
                     if not self.dirty:
                         self.dirty = 1
                 def setparents(self, p1, p2=nullid):
                     self.lazyread()
                     self.markdirty()
                     self.pl = p1, p2
                 def state(self, key):
                     try:
                         return self[key][0]
                     except KeyError:
                         return "?"
                 def lazyread(self):
                     if self.map is None:
                         self.read()
                 def parse(self, st):
                     self.pl = [st[:20], st[20: 40]]
                     # deref fields so they will be local in loop
                     map = self.map
                     copymap = self.copymap
                     format = self.format
                     unpack = struct.unpack
                     pos = 40
                     e_size = struct.calcsize(format)
                     while pos < len(st):
                         newpos = pos + e_size
                         e = unpack(format, st[pos:newpos])
                         l = e[4]
                         pos = newpos
                         newpos = pos + l
                         f = st[pos:newpos]
                         if '\0' in f:
                             f, c = f.split('\0')
                             copymap[f] = c
                         map[f] = e[:4]
                         pos = newpos
                 def read(self):
                     self.map = {}
                     self.pl = [nullid, nullid]
                     try:
                         st = self.opener("dirstate").read()
                         if st:
                             self.parse(st)
                     except IOError, err:
                         if err.errno != errno.ENOENT: raise
                 def copy(self, source, dest):
                     self.lazyread()
                     self.markdirty()
                     self.copymap[dest] = source
                 def copied(self, file):
                     return self.copymap.get(file, None)
                 def copies(self):
                     return self.copymap
                 def initdirs(self):
                     if self.dirs is None:
                         self.dirs = {}
                         for f in self.map:
                             self.updatedirs(f, 1)
                 def updatedirs(self, path, delta):
                     if self.dirs is not None:
                         for c in strutil.findall(path, '/'):
                             pc = path[:c]
                             self.dirs.setdefault(pc, 0)
                             self.dirs[pc] += delta
-                def checkshadows(self, files):
+                def checkinterfering(self, files):
                     def prefixes(f):
                         for c in strutil.rfindall(f, '/'):
                             yield f[:c]
                     self.lazyread()
                     self.initdirs()
                     seendirs = {}
                     for f in files:
+                        # shadows
                         if self.dirs.get(f):
                             raise util.Abort(_('directory named %r already in dirstate') %
                                              f)
                         for d in prefixes(f):
                             if d in seendirs:
                                 break
                             if d in self.map:
                                 raise util.Abort(_('file named %r already in dirstate') %
                                                  d)
                             seendirs[d] = True
+                        # disallowed
+                        if '\r' in f or '\n' in f:
+                            raise util.Abort(_("'\\n' and '\\r' disallowed in filenames"))
                 def update(self, files, state, **kw):
                     ''' current states:
                     n  normal
                     m  needs merging
                     r  marked for removal
                     a  marked for addition'''
                     if not files: return
                     self.lazyread()
                     self.markdirty()
                     if state == "a":
                         self.initdirs()
-                        self.checkshadows(files)
+                        self.checkinterfering(files)
                     for f in files:
                         if state == "r":
                             self.map[f] = ('r', 0, 0, 0)
                             self.updatedirs(f, -1)
                         else:
                             if state == "a":
                                 self.updatedirs(f, 1)
                             s = os.lstat(self.wjoin(f))
                             st_size = kw.get('st_size', s.st_size)
                             st_mtime = kw.get('st_mtime', s.st_mtime)
                             self.map[f] = (state, s.st_mode, st_size, st_mtime)
                         if self.copymap.has_key(f):
                             del self.copymap[f]
                 def forget(self, files):
                     if not files: return
                     self.lazyread()
                     self.markdirty()
                     self.initdirs()
                     for f in files:
                         try:
                             del self.map[f]
                             self.updatedirs(f, -1)
                         except KeyError:
                             self.ui.warn(_("not in dirstate: %s!\n") % f)
                             pass
                 def clear(self):
                     self.map = {}
                     self.copymap = {}
                     self.dirs = None
                     self.markdirty()
                 def rebuild(self, parent, files):
                     self.clear()
                     umask = os.umask(0)
                     os.umask(umask)
                     for f in files:
                         if files.execf(f):
                             self.map[f] = ('n', ~umask, -1, 0)
                         else:
                             self.map[f] = ('n', ~umask & 0666, -1, 0)
                     self.pl = (parent, nullid)
                     self.markdirty()
                 def write(self):
                     if not self.dirty:
                         return
                     st = self.opener("dirstate", "w", atomic=True)
                     st.write("".join(self.pl))
                     for f, e in self.map.items():
                         c = self.copied(f)
                         if c:
                             f = f + "\0" + c
                         e = struct.pack(self.format, e[0], e[1], e[2], e[3], len(f))
                         st.write(e + f)
                     self.dirty = 0
                 def filterfiles(self, files):
                     ret = {}
                     unknown = []
                     for x in files:
                         if x == '.':
                             return self.map.copy()
                         if x not in self.map:
                             unknown.append(x)
                         else:
                             ret[x] = self.map[x]
                     if not unknown:
                         return ret
                     b = self.map.keys()
                     b.sort()
                     blen = len(b)
                     for x in unknown:
                         bs = bisect.bisect(b, "%s%s" % (x, '/'))
                         while bs < blen:
                             s = b[bs]
                             if len(s) > len(x) and s.startswith(x):
                                 ret[s] = self.map[s]
                             else:
                                 break
                             bs += 1
                     return ret
                 def supported_type(self, f, st, verbose=False):
                     if stat.S_ISREG(st.st_mode):
                         return True
                     if verbose:
                         kind = 'unknown'
                         if stat.S_ISCHR(st.st_mode): kind = _('character device')
                         elif stat.S_ISBLK(st.st_mode): kind = _('block device')
                         elif stat.S_ISFIFO(st.st_mode): kind = _('fifo')
                         elif stat.S_ISLNK(st.st_mode): kind = _('symbolic link')
                         elif stat.S_ISSOCK(st.st_mode): kind = _('socket')
                         elif stat.S_ISDIR(st.st_mode): kind = _('directory')
                         self.ui.warn(_('%s: unsupported file type (type is %s)\n') % (
                             util.pathto(self.getcwd(), f),
                             kind))
                     return False
                 def walk(self, files=None, match=util.always, badmatch=None):
                     # filter out the stat
                     for src, f, st in self.statwalk(files, match, badmatch=badmatch):
                         yield src, f
                 def statwalk(self, files=None, match=util.always, ignored=False,
                              badmatch=None):
                     '''
                     walk recursively through the directory tree, finding all files
                     matched by the match function
                     results are yielded in a tuple (src, filename, st), where src
                     is one of:
                     'f' the file was found in the directory tree
                     'm' the file was only in the dirstate and not in the tree
                     'b' file was not found and matched badmatch
                     and st is the stat result if the file was found in the directory.
                     '''
                     self.lazyread()
                     # walk all files by default
                     if not files:
                         files = [self.root]
                         dc = self.map.copy()
                     else:
                         files = util.unique(files)
                         dc = self.filterfiles(files)
                     def imatch(file_):
                         if file_ not in dc and self.ignore(file_):
                             return False
                         return match(file_)
                     if ignored: imatch = match
                     # self.root may end with a path separator when self.root == '/'
                     common_prefix_len = len(self.root)
                     if not self.root.endswith('/'):
                         common_prefix_len += 1
                     # recursion free walker, faster than os.walk.
                     def findfiles(s):
                         work = [s]
                         while work:
                             top = work.pop()
                             names = os.listdir(top)
                             names.sort()
                             # nd is the top of the repository dir tree
                             nd = util.normpath(top[common_prefix_len:])
                             if nd == '.':
                                 nd = ''
                             else:
                                 # do not recurse into a repo contained in this
                                 # one. use bisect to find .hg directory so speed
                                 # is good on big directory.
                                 hg = bisect.bisect_left(names, '.hg')
                                 if hg < len(names) and names[hg] == '.hg':
                                     if os.path.isdir(os.path.join(top, '.hg')):
                                         continue
                             for f in names:
                                 np = util.pconvert(os.path.join(nd, f))
                                 if seen(np):
                                     continue
                                 p = os.path.join(top, f)
                                 # don't trip over symlinks
                                 st = os.lstat(p)
                                 if stat.S_ISDIR(st.st_mode):
                                     ds = util.pconvert(os.path.join(nd, f +'/'))
                                     if imatch(ds):
                                         work.append(p)
                                     if imatch(np) and np in dc:
                                         yield 'm', np, st
                                 elif imatch(np):
                                     if self.supported_type(np, st):
                                         yield 'f', np, st
                                     elif np in dc:
                                         yield 'm', np, st
                     known = {'.hg': 1}
                     def seen(fn):
                         if fn in known: return True
                         known[fn] = 1
                     # step one, find all files that match our criteria
                     files.sort()
                     for ff in files:
                         nf = util.normpath(ff)
                         f = self.wjoin(ff)
                         try:
                             st = os.lstat(f)
                         except OSError, inst:
                             found = False
                             for fn in dc:
                                 if nf == fn or (fn.startswith(nf) and fn[len(nf)] == '/'):
                                     found = True
                                     break
                             if not found:
                                 if inst.errno != errno.ENOENT or not badmatch:
                                     self.ui.warn('%s: %s\n' % (
                                         util.pathto(self.getcwd(), ff),
                                         inst.strerror))
                                 elif badmatch and badmatch(ff) and imatch(nf):
                                     yield 'b', ff, None
                             continue
                         if stat.S_ISDIR(st.st_mode):
                             cmp1 = (lambda x, y: cmp(x[1], y[1]))
                             sorted_ = [ x for x in findfiles(f) ]
                             sorted_.sort(cmp1)
                             for e in sorted_:
                                 yield e
                         else:
                             if not seen(nf) and match(nf):
                                 if self.supported_type(ff, st, verbose=True):
                                     yield 'f', nf, st
                                 elif ff in dc:
                                     yield 'm', nf, st
                     # step two run through anything left in the dc hash and yield
                     # if we haven't already seen it
                     ks = dc.keys()
                     ks.sort()
                     for k in ks:
                         if not seen(k) and imatch(k):
                             yield 'm', k, None
                 def status(self, files=None, match=util.always, list_ignored=False,
                            list_clean=False):
                     lookup, modified, added, unknown, ignored = [], [], [], [], []
                     removed, deleted, clean = [], [], []
                     for src, fn, st in self.statwalk(files, match, ignored=list_ignored):
                         try:
                             type_, mode, size, time = self[fn]
                         except KeyError:
                             if list_ignored and self.ignore(fn):
                                 ignored.append(fn)
                             else:
                                 unknown.append(fn)
                             continue
                         if src == 'm':
                             nonexistent = True
                             if not st:
                                 try:
                                     st = os.lstat(self.wjoin(fn))
                                 except OSError, inst:
                                     if inst.errno != errno.ENOENT:
                                         raise
                                     st = None
                                 # We need to re-check that it is a valid file
                                 if st and self.supported_type(fn, st):
                                     nonexistent = False
                             # XXX: what to do with file no longer present in the fs
                             # who are not removed in the dirstate ?
                             if nonexistent and type_ in "nm":
                                 deleted.append(fn)
                                 continue
                         # check the common case first
                         if type_ == 'n':
                             if not st:
                                 st = os.lstat(self.wjoin(fn))
                             if size >= 0 and (size != st.st_size
                                               or (mode ^ st.st_mode) & 0100):
                                 modified.append(fn)
                             elif time != int(st.st_mtime):
                                 lookup.append(fn)
                             elif list_clean:
                                 clean.append(fn)
                         elif type_ == 'm':
                             modified.append(fn)
                         elif type_ == 'a':
                             added.append(fn)
                         elif type_ == 'r':
                             removed.append(fn)
                     return (lookup, modified, added, removed, deleted, unknown, ignored,
                             clean)

mercurial/manifest.py

0 +9 0

             # manifest.py - manifest revision class for mercurial
             #
             # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms
             # of the GNU General Public License, incorporated herein by reference.
             from revlog import *
             from i18n import gettext as _
             from demandload import *
             demandload(globals(), "array bisect struct")
             demandload(globals(), "mdiff")
             class manifestdict(dict):
                 def __init__(self, mapping=None, flags=None):
                     if mapping is None: mapping = {}
                     if flags is None: flags = {}
                     dict.__init__(self, mapping)
                     self._flags = flags
                 def flags(self, f):
                     return self._flags.get(f, "")
                 def execf(self, f):
                     "test for executable in manifest flags"
                     return "x" in self.flags(f)
                 def linkf(self, f):
                     "test for symlink in manifest flags"
                     return "l" in self.flags(f)
                 def rawset(self, f, entry):
                     self[f] = bin(entry[:40])
                     fl = entry[40:-1]
                     if fl: self._flags[f] = fl
                 def set(self, f, execf=False, linkf=False):
                     if linkf: self._flags[f] = "l"
                     elif execf: self._flags[f] = "x"
                     else: self._flags[f] = ""
                 def copy(self):
                     return manifestdict(dict.copy(self), dict.copy(self._flags))
             class manifest(revlog):
                 def __init__(self, opener, defversion=REVLOGV0):
                     self.mapcache = None
                     self.listcache = None
                     revlog.__init__(self, opener, "00manifest.i", "00manifest.d",
                                     defversion)
                 def parselines(self, lines):
                     for l in lines.splitlines(1):
                         yield l.split('\0')
                 def readdelta(self, node):
                     delta = mdiff.patchtext(self.delta(node))
                     deltamap = manifestdict()
                     for f, n in self.parselines(delta):
                         deltamap.rawset(f, n)
                     return deltamap
                 def read(self, node):
                     if node == nullid: return manifestdict() # don't upset local cache
                     if self.mapcache and self.mapcache[0] == node:
                         return self.mapcache[1]
                     text = self.revision(node)
                     self.listcache = array.array('c', text)
                     mapping = manifestdict()
                     for f, n in self.parselines(text):
                         mapping.rawset(f, n)
                     self.mapcache = (node, mapping)
                     return mapping
                 def _search(self, m, s, lo=0, hi=None):
                     '''return a tuple (start, end) that says where to find s within m.
                     If the string is found m[start:end] are the line containing
                     that string.  If start == end the string was not found and
                     they indicate the proper sorted insertion point.  This was
                     taken from bisect_left, and modified to find line start/end as
                     it goes along.
                     m should be a buffer or a string
                     s is a string'''
                     def advance(i, c):
                         while i < lenm and m[i] != c:
                             i += 1
                         return i
                     lenm = len(m)
                     if not hi:
                         hi = lenm
                     while lo < hi:
                         mid = (lo + hi) // 2
                         start = mid
                         while start > 0 and m[start-1] != '\n':
                             start -= 1
                         end = advance(start, '\0')
                         if m[start:end] < s:
                             # we know that after the null there are 40 bytes of sha1
                             # this translates to the bisect lo = mid + 1
                             lo = advance(end + 40, '\n') + 1
                         else:
                             # this translates to the bisect hi = mid
                             hi = start
                     end = advance(lo, '\0')
                     found = m[lo:end]
                     if cmp(s, found) == 0:
                         # we know that after the null there are 40 bytes of sha1
                         end = advance(end + 40, '\n')
                         return (lo, end+1)
                     else:
                         return (lo, lo)
                 def find(self, node, f):
                     '''look up entry for a single file efficiently.
                     return (node, flag) pair if found, (None, None) if not.'''
                     if self.mapcache and node == self.mapcache[0]:
                         return self.mapcache[1].get(f), self.mapcache[1].flags(f)
                     text = self.revision(node)
                     start, end = self._search(text, f)
                     if start == end:
                         return None, None
                     l = text[start:end]
                     f, n = l.split('\0')
                     return bin(n[:40]), n[40:-1] == 'x'
                 def add(self, map, transaction, link, p1=None, p2=None,
                         changed=None):
                     # apply the changes collected during the bisect loop to our addlist
                     # return a delta suitable for addrevision
                     def addlistdelta(addlist, x):
                         # start from the bottom up
                         # so changes to the offsets don't mess things up.
                         i = len(x)
                         while i > 0:
                             i -= 1
                             start = x[i][0]
                             end = x[i][1]
                             if x[i][2]:
                                 addlist[start:end] = array.array('c', x[i][2])
                             else:
                                 del addlist[start:end]
                         return "".join([struct.pack(">lll", d[0], d[1], len(d[2])) + d[2] \
                                         for d in x ])
+                    def checkforbidden(f):
+                        if '\n' in f or '\r' in f:
+                            raise RevlogError(_("'\\n' and '\\r' disallowed in filenames"))
                     # if we're using the listcache, make sure it is valid and
                     # parented by the same node we're diffing against
                     if not changed or not self.listcache or not p1 or \
                            self.mapcache[0] != p1:
                         files = map.keys()
                         files.sort()
+                        for f in files:
+                            checkforbidden(f)
                         # if this is changed to support newlines in filenames,
                         # be sure to check the templates/ dir again (especially *-raw.tmpl)
                         text = ["%s\000%s%s\n" % (f, hex(map[f]), map.flags(f)) for f in files]
                         self.listcache = array.array('c', "".join(text))
                         cachedelta = None
                     else:
                         addlist = self.listcache
+                        for f in changed[0]:
+                            checkforbidden(f)
                         # combine the changed lists into one list for sorting
                         work = [[x, 0] for x in changed[0]]
                         work[len(work):] = [[x, 1] for x in changed[1]]
                         work.sort()
                         delta = []
                         dstart = None
                         dend = None
                         dline = [""]
                         start = 0
                         # zero copy representation of addlist as a buffer
                         addbuf = buffer(addlist)
                         # start with a readonly loop that finds the offset of
                         # each line and creates the deltas
                         for w in work:
                             f = w[0]
                             # bs will either be the index of the item or the insert point
                             start, end = self._search(addbuf, f, start)
                             if w[1] == 0:
                                 l = "%s\000%s%s\n" % (f, hex(map[f]), map.flags(f))
                             else:
                                 l = ""
                             if start == end and w[1] == 1:
                                 # item we want to delete was not found, error out
                                 raise AssertionError(
                                         _("failed to remove %s from manifest") % f)
                             if dstart != None and dstart <= start and dend >= start:
                                 if dend < end:
                                     dend = end
                                 if l:
                                     dline.append(l)
                             else:
                                 if dstart != None:
                                     delta.append([dstart, dend, "".join(dline)])
                                 dstart = start
                                 dend = end
                                 dline = [l]
                         if dstart != None:
                             delta.append([dstart, dend, "".join(dline)])
                         # apply the delta to the addlist, and get a delta for addrevision
                         cachedelta = addlistdelta(addlist, delta)
                         # the delta is only valid if we've been processing the tip revision
                         if self.mapcache[0] != self.tip():
                             cachedelta = None
                         self.listcache = addlist
                     n = self.addrevision(buffer(self.listcache), transaction, link, p1,  \
                                          p2, cachedelta)
                     self.mapcache = (n, map)
                     return n

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages