upstream/mercurial-mirror Commit - r3607:f4c9bb4a

issue352: disallow '\n' and '\r' in filenames (dirstate and manifest)

Benoit Boissinot -

r3607:f4c9bb4a default

parent child

tests/test-issue352

0 created 755 +21 0

			@@ -0,0 +1,21
		1	#!/bin/bash
		2	# http://www.selenic.com/mercurial/bts/issue352
		3
		4	hg init foo
		5	cd foo
		6
		7	A=`echo -e -n 'he\rllo'`
		8
		9	echo foo > "hell
		10	o"
		11	echo foo > "$A"
		12	hg add
		13	hg ci -A -m m
		14	rm "$A"
		15	ls
		16	hg add
		17	# BUG ? we don't walk on filenames with '\n' (regexp related) ?
		18	hg debugwalk
		19	hg ci -A -m m
		20
		21	exit 0

tests/test-issue352.out

0 created 644 +7 0

			@@ -0,0 +1,7
		1	adding he llo
		2	abort: '\n' and '\r' disallowed in filenames
		3	adding he llo
		4	abort: '\n' and '\r' disallowed in filenames
		5	hell
		6	o
		7	nothing changed

mercurial/dirstate.py

0 +6 -2

              """
              dirstate.py - working directory tracking for mercurial
              Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
              This software may be used and distributed according to the terms
              of the GNU General Public License, incorporated herein by reference.
              """
              from node import *
              from i18n import gettext as _
              from demandload import *
              demandload(globals(), "struct os time bisect stat strutil util re errno")
              class dirstate(object):
                  format = ">cllll"
                  def __init__(self, opener, ui, root):
                      self.opener = opener
                      self.root = root
                      self.dirty = 0
                      self.ui = ui
                      self.map = None
                      self.pl = None
                      self.dirs = None
                      self.copymap = {}
                      self.ignorefunc = None
                  def wjoin(self, f):
                      return os.path.join(self.root, f)
                  def getcwd(self):
                      cwd = os.getcwd()
                      if cwd == self.root: return ''
                      return cwd[len(self.root) + 1:]
                  def hgignore(self):
                      '''return the contents of .hgignore files as a list of patterns.
                      the files parsed for patterns include:
                      .hgignore in the repository root
                      any additional files specified in the [ui] section of ~/.hgrc
                      trailing white space is dropped.
                      the escape character is backslash.
                      comments start with #.
                      empty lines are skipped.
                      lines can be of the following formats:
                      syntax: regexp # defaults following lines to non-rooted regexps
                      syntax: glob   # defaults following lines to non-rooted globs
                      re:pattern     # non-rooted regular expression
                      glob:pattern   # non-rooted glob
                      pattern        # pattern of the current default type'''
                      syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:'}
                      def parselines(fp):
                          for line in fp:
                              escape = False
                              for i in xrange(len(line)):
                                  if escape: escape = False
                                  elif line[i] == '\\': escape = True
                                  elif line[i] == '#': break
                              line = line[:i].rstrip()
                              if line: yield line
                      repoignore = self.wjoin('.hgignore')
                      files = [repoignore]
                      files.extend(self.ui.hgignorefiles())
                      pats = {}
                      for f in files:
                          try:
                              pats[f] = []
                              fp = open(f)
                              syntax = 'relre:'
                              for line in parselines(fp):
                                  if line.startswith('syntax:'):
                                      s = line[7:].strip()
                                      try:
                                          syntax = syntaxes[s]
                                      except KeyError:
                                          self.ui.warn(_("%s: ignoring invalid "
                                                         "syntax '%s'\n") % (f, s))
                                      continue
                                  pat = syntax + line
                                  for s in syntaxes.values():
                                      if line.startswith(s):
                                          pat = line
                                          break
                                  pats[f].append(pat)
                          except IOError, inst:
                              if f != repoignore:
                                  self.ui.warn(_("skipping unreadable ignore file"
                                                 " '%s': %s\n") % (f, inst.strerror))
                      return pats
                  def ignore(self, fn):
                      '''default match function used by dirstate and
                      localrepository.  this honours the repository .hgignore file
                      and any other files specified in the [ui] section of .hgrc.'''
                      if not self.ignorefunc:
                          ignore = self.hgignore()
                          allpats = []
                          [allpats.extend(patlist) for patlist in ignore.values()]
                          if allpats:
                              try:
                                  files, self.ignorefunc, anypats = (
                                      util.matcher(self.root, inc=allpats, src='.hgignore'))
                              except util.Abort:
                                  # Re-raise an exception where the src is the right file
                                  for f, patlist in ignore.items():
                                      files, self.ignorefunc, anypats = (
                                          util.matcher(self.root, inc=patlist, src=f))
                          else:
                              self.ignorefunc = util.never
                      return self.ignorefunc(fn)
                  def __del__(self):
                      if self.dirty:
                          self.write()
                  def __getitem__(self, key):
                      try:
                          return self.map[key]
                      except TypeError:
                          self.lazyread()
                          return self[key]
                  def __contains__(self, key):
                      self.lazyread()
                      return key in self.map
                  def parents(self):
                      self.lazyread()
                      return self.pl
                  def markdirty(self):
                      if not self.dirty:
                          self.dirty = 1
                  def setparents(self, p1, p2=nullid):
                      self.lazyread()
                      self.markdirty()
                      self.pl = p1, p2
                  def state(self, key):
                      try:
                          return self[key][0]
                      except KeyError:
                          return "?"
                  def lazyread(self):
                      if self.map is None:
                          self.read()
                  def parse(self, st):
                      self.pl = [st[:20], st[20: 40]]
                      # deref fields so they will be local in loop
                      map = self.map
                      copymap = self.copymap
                      format = self.format
                      unpack = struct.unpack
                      pos = 40
                      e_size = struct.calcsize(format)
                      while pos < len(st):
                          newpos = pos + e_size
                          e = unpack(format, st[pos:newpos])
                          l = e[4]
                          pos = newpos
                          newpos = pos + l
                          f = st[pos:newpos]
                          if '\0' in f:
                              f, c = f.split('\0')
                              copymap[f] = c
                          map[f] = e[:4]
                          pos = newpos
                  def read(self):
                      self.map = {}
                      self.pl = [nullid, nullid]
                      try:
                          st = self.opener("dirstate").read()
                          if st:
                              self.parse(st)
                      except IOError, err:
                          if err.errno != errno.ENOENT: raise
                  def copy(self, source, dest):
                      self.lazyread()
                      self.markdirty()
                      self.copymap[dest] = source
                  def copied(self, file):
                      return self.copymap.get(file, None)
                  def copies(self):
                      return self.copymap
                  def initdirs(self):
                      if self.dirs is None:
                          self.dirs = {}
                          for f in self.map:
                              self.updatedirs(f, 1)
                  def updatedirs(self, path, delta):
                      if self.dirs is not None:
                          for c in strutil.findall(path, '/'):
                              pc = path[:c]
                              self.dirs.setdefault(pc, 0)
                              self.dirs[pc] += delta
-                 def checkshadows(self, files):
+                 def checkinterfering(self, files):
                      def prefixes(f):
                          for c in strutil.rfindall(f, '/'):
                              yield f[:c]
                      self.lazyread()
                      self.initdirs()
                      seendirs = {}
                      for f in files:
+                         # shadows
                          if self.dirs.get(f):
                              raise util.Abort(_('directory named %r already in dirstate') %
                                               f)
                          for d in prefixes(f):
                              if d in seendirs:
                                  break
                              if d in self.map:
                                  raise util.Abort(_('file named %r already in dirstate') %
                                                   d)
                              seendirs[d] = True
+                         # disallowed
+                         if '\r' in f or '\n' in f:
+                             raise util.Abort(_("'\\n' and '\\r' disallowed in filenames"))
                  def update(self, files, state, **kw):
                      ''' current states:
                      n  normal
                      m  needs merging
                      r  marked for removal
                      a  marked for addition'''
                      if not files: return
                      self.lazyread()
                      self.markdirty()
                      if state == "a":
                          self.initdirs()
-                         self.checkshadows(files)
+                         self.checkinterfering(files)
                      for f in files:
                          if state == "r":
                              self.map[f] = ('r', 0, 0, 0)
                              self.updatedirs(f, -1)
                          else:
                              if state == "a":
                                  self.updatedirs(f, 1)
                              s = os.lstat(self.wjoin(f))
                              st_size = kw.get('st_size', s.st_size)
                              st_mtime = kw.get('st_mtime', s.st_mtime)
                              self.map[f] = (state, s.st_mode, st_size, st_mtime)
                          if self.copymap.has_key(f):
                              del self.copymap[f]
                  def forget(self, files):
                      if not files: return
                      self.lazyread()
                      self.markdirty()
                      self.initdirs()
                      for f in files:
                          try:
                              del self.map[f]
                              self.updatedirs(f, -1)
                          except KeyError:
                              self.ui.warn(_("not in dirstate: %s!\n") % f)
                              pass
                  def clear(self):
                      self.map = {}
                      self.copymap = {}
                      self.dirs = None
                      self.markdirty()
                  def rebuild(self, parent, files):
                      self.clear()
                      umask = os.umask(0)
                      os.umask(umask)
                      for f in files:
                          if files.execf(f):
                              self.map[f] = ('n', ~umask, -1, 0)
                          else:
                              self.map[f] = ('n', ~umask & 0666, -1, 0)
                      self.pl = (parent, nullid)
                      self.markdirty()
                  def write(self):
                      if not self.dirty:
                          return
                      st = self.opener("dirstate", "w", atomic=True)
                      st.write("".join(self.pl))
                      for f, e in self.map.items():
                          c = self.copied(f)
                          if c:
                              f = f + "\0" + c
                          e = struct.pack(self.format, e[0], e[1], e[2], e[3], len(f))
                          st.write(e + f)
                      self.dirty = 0
                  def filterfiles(self, files):
                      ret = {}
                      unknown = []
                      for x in files:
                          if x == '.':
                              return self.map.copy()
                          if x not in self.map:
                              unknown.append(x)
                          else:
                              ret[x] = self.map[x]
                      if not unknown:
                          return ret
                      b = self.map.keys()
                      b.sort()
                      blen = len(b)
                      for x in unknown:
                          bs = bisect.bisect(b, "%s%s" % (x, '/'))
                          while bs < blen:
                              s = b[bs]
                              if len(s) > len(x) and s.startswith(x):
                                  ret[s] = self.map[s]
                              else:
                                  break
                              bs += 1
                      return ret
                  def supported_type(self, f, st, verbose=False):
                      if stat.S_ISREG(st.st_mode):
                          return True
                      if verbose:
                          kind = 'unknown'
                          if stat.S_ISCHR(st.st_mode): kind = _('character device')
                          elif stat.S_ISBLK(st.st_mode): kind = _('block device')
                          elif stat.S_ISFIFO(st.st_mode): kind = _('fifo')
                          elif stat.S_ISLNK(st.st_mode): kind = _('symbolic link')
                          elif stat.S_ISSOCK(st.st_mode): kind = _('socket')
                          elif stat.S_ISDIR(st.st_mode): kind = _('directory')
                          self.ui.warn(_('%s: unsupported file type (type is %s)\n') % (
                              util.pathto(self.getcwd(), f),
                              kind))
                      return False
                  def walk(self, files=None, match=util.always, badmatch=None):
                      # filter out the stat
                      for src, f, st in self.statwalk(files, match, badmatch=badmatch):
                          yield src, f
                  def statwalk(self, files=None, match=util.always, ignored=False,
                               badmatch=None):
                      '''
                      walk recursively through the directory tree, finding all files
                      matched by the match function
                      results are yielded in a tuple (src, filename, st), where src
                      is one of:
                      'f' the file was found in the directory tree
                      'm' the file was only in the dirstate and not in the tree
                      'b' file was not found and matched badmatch
                      and st is the stat result if the file was found in the directory.
                      '''
                      self.lazyread()
                      # walk all files by default
                      if not files:
                          files = [self.root]
                          dc = self.map.copy()
                      else:
                          files = util.unique(files)
                          dc = self.filterfiles(files)
                      def imatch(file_):
                          if file_ not in dc and self.ignore(file_):
                              return False
                          return match(file_)
                      if ignored: imatch = match
                      # self.root may end with a path separator when self.root == '/'
                      common_prefix_len = len(self.root)
                      if not self.root.endswith('/'):
                          common_prefix_len += 1
                      # recursion free walker, faster than os.walk.
                      def findfiles(s):
                          work = [s]
                          while work:
                              top = work.pop()
                              names = os.listdir(top)
                              names.sort()
                              # nd is the top of the repository dir tree
                              nd = util.normpath(top[common_prefix_len:])
                              if nd == '.':
                                  nd = ''
                              else:
                                  # do not recurse into a repo contained in this
                                  # one. use bisect to find .hg directory so speed
                                  # is good on big directory.
                                  hg = bisect.bisect_left(names, '.hg')
                                  if hg < len(names) and names[hg] == '.hg':
                                      if os.path.isdir(os.path.join(top, '.hg')):
                                          continue
                              for f in names:
                                  np = util.pconvert(os.path.join(nd, f))
                                  if seen(np):
                                      continue
                                  p = os.path.join(top, f)
                                  # don't trip over symlinks
                                  st = os.lstat(p)
                                  if stat.S_ISDIR(st.st_mode):
                                      ds = util.pconvert(os.path.join(nd, f +'/'))
                                      if imatch(ds):
                                          work.append(p)
                                      if imatch(np) and np in dc:
                                          yield 'm', np, st
                                  elif imatch(np):
                                      if self.supported_type(np, st):
                                          yield 'f', np, st
                                      elif np in dc:
                                          yield 'm', np, st
                      known = {'.hg': 1}
                      def seen(fn):
                          if fn in known: return True
                          known[fn] = 1
                      # step one, find all files that match our criteria
                      files.sort()
                      for ff in files:
                          nf = util.normpath(ff)
                          f = self.wjoin(ff)
                          try:
                              st = os.lstat(f)
                          except OSError, inst:
                              found = False
                              for fn in dc:
                                  if nf == fn or (fn.startswith(nf) and fn[len(nf)] == '/'):
                                      found = True
                                      break
                              if not found:
                                  if inst.errno != errno.ENOENT or not badmatch:
                                      self.ui.warn('%s: %s\n' % (
                                          util.pathto(self.getcwd(), ff),
                                          inst.strerror))
                                  elif badmatch and badmatch(ff) and imatch(nf):
                                      yield 'b', ff, None
                              continue
                          if stat.S_ISDIR(st.st_mode):
                              cmp1 = (lambda x, y: cmp(x[1], y[1]))
                              sorted_ = [ x for x in findfiles(f) ]
                              sorted_.sort(cmp1)
                              for e in sorted_:
                                  yield e
                          else:
                              if not seen(nf) and match(nf):
                                  if self.supported_type(ff, st, verbose=True):
                                      yield 'f', nf, st
                                  elif ff in dc:
                                      yield 'm', nf, st
                      # step two run through anything left in the dc hash and yield
                      # if we haven't already seen it
                      ks = dc.keys()
                      ks.sort()
                      for k in ks:
                          if not seen(k) and imatch(k):
                              yield 'm', k, None
                  def status(self, files=None, match=util.always, list_ignored=False,
                             list_clean=False):
                      lookup, modified, added, unknown, ignored = [], [], [], [], []
                      removed, deleted, clean = [], [], []
                      for src, fn, st in self.statwalk(files, match, ignored=list_ignored):
                          try:
                              type_, mode, size, time = self[fn]
                          except KeyError:
                              if list_ignored and self.ignore(fn):
                                  ignored.append(fn)
                              else:
                                  unknown.append(fn)
                              continue
                          if src == 'm':
                              nonexistent = True
                              if not st:
                                  try:
                                      st = os.lstat(self.wjoin(fn))
                                  except OSError, inst:
                                      if inst.errno != errno.ENOENT:
                                          raise
                                      st = None
                                  # We need to re-check that it is a valid file
                                  if st and self.supported_type(fn, st):
                                      nonexistent = False
                              # XXX: what to do with file no longer present in the fs
                              # who are not removed in the dirstate ?
                              if nonexistent and type_ in "nm":
                                  deleted.append(fn)
                                  continue
                          # check the common case first
                          if type_ == 'n':
                              if not st:
                                  st = os.lstat(self.wjoin(fn))
                              if size >= 0 and (size != st.st_size
                                                or (mode ^ st.st_mode) & 0100):
                                  modified.append(fn)
                              elif time != int(st.st_mtime):
                                  lookup.append(fn)
                              elif list_clean:
                                  clean.append(fn)
                          elif type_ == 'm':
                              modified.append(fn)
                          elif type_ == 'a':
                              added.append(fn)
                          elif type_ == 'r':
                              removed.append(fn)
                      return (lookup, modified, added, removed, deleted, unknown, ignored,
                              clean)

mercurial/manifest.py

0 +9 0

              # manifest.py - manifest revision class for mercurial
              #
              # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
              #
              # This software may be used and distributed according to the terms
              # of the GNU General Public License, incorporated herein by reference.
              from revlog import *
              from i18n import gettext as _
              from demandload import *
              demandload(globals(), "array bisect struct")
              demandload(globals(), "mdiff")
              class manifestdict(dict):
                  def __init__(self, mapping=None, flags=None):
                      if mapping is None: mapping = {}
                      if flags is None: flags = {}
                      dict.__init__(self, mapping)
                      self._flags = flags
                  def flags(self, f):
                      return self._flags.get(f, "")
                  def execf(self, f):
                      "test for executable in manifest flags"
                      return "x" in self.flags(f)
                  def linkf(self, f):
                      "test for symlink in manifest flags"
                      return "l" in self.flags(f)
                  def rawset(self, f, entry):
                      self[f] = bin(entry[:40])
                      fl = entry[40:-1]
                      if fl: self._flags[f] = fl
                  def set(self, f, execf=False, linkf=False):
                      if linkf: self._flags[f] = "l"
                      elif execf: self._flags[f] = "x"
                      else: self._flags[f] = ""
                  def copy(self):
                      return manifestdict(dict.copy(self), dict.copy(self._flags))
              class manifest(revlog):
                  def __init__(self, opener, defversion=REVLOGV0):
                      self.mapcache = None
                      self.listcache = None
                      revlog.__init__(self, opener, "00manifest.i", "00manifest.d",
                                      defversion)
                  def parselines(self, lines):
                      for l in lines.splitlines(1):
                          yield l.split('\0')
                  def readdelta(self, node):
                      delta = mdiff.patchtext(self.delta(node))
                      deltamap = manifestdict()
                      for f, n in self.parselines(delta):
                          deltamap.rawset(f, n)
                      return deltamap
                  def read(self, node):
                      if node == nullid: return manifestdict() # don't upset local cache
                      if self.mapcache and self.mapcache[0] == node:
                          return self.mapcache[1]
                      text = self.revision(node)
                      self.listcache = array.array('c', text)
                      mapping = manifestdict()
                      for f, n in self.parselines(text):
                          mapping.rawset(f, n)
                      self.mapcache = (node, mapping)
                      return mapping
                  def _search(self, m, s, lo=0, hi=None):
                      '''return a tuple (start, end) that says where to find s within m.
                      If the string is found m[start:end] are the line containing
                      that string.  If start == end the string was not found and
                      they indicate the proper sorted insertion point.  This was
                      taken from bisect_left, and modified to find line start/end as
                      it goes along.
                      m should be a buffer or a string
                      s is a string'''
                      def advance(i, c):
                          while i < lenm and m[i] != c:
                              i += 1
                          return i
                      lenm = len(m)
                      if not hi:
                          hi = lenm
                      while lo < hi:
                          mid = (lo + hi) // 2
                          start = mid
                          while start > 0 and m[start-1] != '\n':
                              start -= 1
                          end = advance(start, '\0')
                          if m[start:end] < s:
                              # we know that after the null there are 40 bytes of sha1
                              # this translates to the bisect lo = mid + 1
                              lo = advance(end + 40, '\n') + 1
                          else:
                              # this translates to the bisect hi = mid
                              hi = start
                      end = advance(lo, '\0')
                      found = m[lo:end]
                      if cmp(s, found) == 0:
                          # we know that after the null there are 40 bytes of sha1
                          end = advance(end + 40, '\n')
                          return (lo, end+1)
                      else:
                          return (lo, lo)
                  def find(self, node, f):
                      '''look up entry for a single file efficiently.
                      return (node, flag) pair if found, (None, None) if not.'''
                      if self.mapcache and node == self.mapcache[0]:
                          return self.mapcache[1].get(f), self.mapcache[1].flags(f)
                      text = self.revision(node)
                      start, end = self._search(text, f)
                      if start == end:
                          return None, None
                      l = text[start:end]
                      f, n = l.split('\0')
                      return bin(n[:40]), n[40:-1] == 'x'
                  def add(self, map, transaction, link, p1=None, p2=None,
                          changed=None):
                      # apply the changes collected during the bisect loop to our addlist
                      # return a delta suitable for addrevision
                      def addlistdelta(addlist, x):
                          # start from the bottom up
                          # so changes to the offsets don't mess things up.
                          i = len(x)
                          while i > 0:
                              i -= 1
                              start = x[i][0]
                              end = x[i][1]
                              if x[i][2]:
                                  addlist[start:end] = array.array('c', x[i][2])
                              else:
                                  del addlist[start:end]
                          return "".join([struct.pack(">lll", d[0], d[1], len(d[2])) + d[2] \
                                          for d in x ])
+                     def checkforbidden(f):
+                         if '\n' in f or '\r' in f:
+                             raise RevlogError(_("'\\n' and '\\r' disallowed in filenames"))
                      # if we're using the listcache, make sure it is valid and
                      # parented by the same node we're diffing against
                      if not changed or not self.listcache or not p1 or \
                             self.mapcache[0] != p1:
                          files = map.keys()
                          files.sort()
+                         for f in files:
+                             checkforbidden(f)
                          # if this is changed to support newlines in filenames,
                          # be sure to check the templates/ dir again (especially *-raw.tmpl)
                          text = ["%s\000%s%s\n" % (f, hex(map[f]), map.flags(f)) for f in files]
                          self.listcache = array.array('c', "".join(text))
                          cachedelta = None
                      else:
                          addlist = self.listcache
+                         for f in changed[0]:
+                             checkforbidden(f)
                          # combine the changed lists into one list for sorting
                          work = [[x, 0] for x in changed[0]]
                          work[len(work):] = [[x, 1] for x in changed[1]]
                          work.sort()
                          delta = []
                          dstart = None
                          dend = None
                          dline = [""]
                          start = 0
                          # zero copy representation of addlist as a buffer
                          addbuf = buffer(addlist)
                          # start with a readonly loop that finds the offset of
                          # each line and creates the deltas
                          for w in work:
                              f = w[0]
                              # bs will either be the index of the item or the insert point
                              start, end = self._search(addbuf, f, start)
                              if w[1] == 0:
                                  l = "%s\000%s%s\n" % (f, hex(map[f]), map.flags(f))
                              else:
                                  l = ""
                              if start == end and w[1] == 1:
                                  # item we want to delete was not found, error out
                                  raise AssertionError(
                                          _("failed to remove %s from manifest") % f)
                              if dstart != None and dstart <= start and dend >= start:
                                  if dend < end:
                                      dend = end
                                  if l:
                                      dline.append(l)
                              else:
                                  if dstart != None:
                                      delta.append([dstart, dend, "".join(dline)])
                                  dstart = start
                                  dend = end
                                  dline = [l]
                          if dstart != None:
                              delta.append([dstart, dend, "".join(dline)])
                          # apply the delta to the addlist, and get a delta for addrevision
                          cachedelta = addlistdelta(addlist, delta)
                          # the delta is only valid if we've been processing the tip revision
                          if self.mapcache[0] != self.tip():
                              cachedelta = None
                          self.listcache = addlist
                      n = self.addrevision(buffer(self.listcache), transaction, link, p1,  \
                                           p2, cachedelta)
                      self.mapcache = (n, map)
                      return n

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages