upstream/mercurial-mirror Commit - r2486:3ea8111e

simplify filterfiles when filtering based on a directory...

Benoit Boissinot -

r2486:3ea8111e default

parent child

mercurial/dirstate.py

0 +1 -5

              """
              dirstate.py - working directory tracking for mercurial
              Copyright 2005 Matt Mackall <mpm@selenic.com>
              This software may be used and distributed according to the terms
              of the GNU General Public License, incorporated herein by reference.
              """
              from node import *
              from i18n import gettext as _
              from demandload import *
              demandload(globals(), "struct os time bisect stat util re errno")
              class dirstate(object):
                  format = ">cllll"
                  def __init__(self, opener, ui, root):
                      self.opener = opener
                      self.root = root
                      self.dirty = 0
                      self.ui = ui
                      self.map = None
                      self.pl = None
                      self.copies = {}
                      self.ignorefunc = None
                      self.blockignore = False
                  def wjoin(self, f):
                      return os.path.join(self.root, f)
                  def getcwd(self):
                      cwd = os.getcwd()
                      if cwd == self.root: return ''
                      return cwd[len(self.root) + 1:]
                  def hgignore(self):
                      '''return the contents of .hgignore files as a list of patterns.
                      the files parsed for patterns include:
                      .hgignore in the repository root
                      any additional files specified in the [ui] section of ~/.hgrc
                      trailing white space is dropped.
                      the escape character is backslash.
                      comments start with #.
                      empty lines are skipped.
                      lines can be of the following formats:
                      syntax: regexp # defaults following lines to non-rooted regexps
                      syntax: glob   # defaults following lines to non-rooted globs
                      re:pattern     # non-rooted regular expression
                      glob:pattern   # non-rooted glob
                      pattern        # pattern of the current default type'''
                      syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:'}
                      def parselines(fp):
                          for line in fp:
                              escape = False
                              for i in xrange(len(line)):
                                  if escape: escape = False
                                  elif line[i] == '\\': escape = True
                                  elif line[i] == '#': break
                              line = line[:i].rstrip()
                              if line: yield line
                      repoignore = self.wjoin('.hgignore')
                      files = [repoignore]
                      files.extend(self.ui.hgignorefiles())
                      pats = {}
                      for f in files:
                          try:
                              pats[f] = []
                              fp = open(f)
                              syntax = 'relre:'
                              for line in parselines(fp):
                                  if line.startswith('syntax:'):
                                      s = line[7:].strip()
                                      try:
                                          syntax = syntaxes[s]
                                      except KeyError:
                                          self.ui.warn(_("%s: ignoring invalid "
                                                         "syntax '%s'\n") % (f, s))
                                      continue
                                  pat = syntax + line
                                  for s in syntaxes.values():
                                      if line.startswith(s):
                                          pat = line
                                          break
                                  pats[f].append(pat)
                          except IOError, inst:
                              if f != repoignore:
                                  self.ui.warn(_("skipping unreadable ignore file"
                                                 " '%s': %s\n") % (f, inst.strerror))
                      return pats
                  def ignore(self, fn):
                      '''default match function used by dirstate and
                      localrepository.  this honours the repository .hgignore file
                      and any other files specified in the [ui] section of .hgrc.'''
                      if self.blockignore:
                          return False
                      if not self.ignorefunc:
                          ignore = self.hgignore()
                          allpats = []
                          [allpats.extend(patlist) for patlist in ignore.values()]
                          if allpats:
                              try:
                                  files, self.ignorefunc, anypats = (
                                      util.matcher(self.root, inc=allpats, src='.hgignore'))
                              except util.Abort:
                                  # Re-raise an exception where the src is the right file
                                  for f, patlist in ignore.items():
                                      files, self.ignorefunc, anypats = (
                                          util.matcher(self.root, inc=patlist, src=f))
                          else:
                              self.ignorefunc = util.never
                      return self.ignorefunc(fn)
                  def __del__(self):
                      if self.dirty:
                          self.write()
                  def __getitem__(self, key):
                      try:
                          return self.map[key]
                      except TypeError:
                          self.lazyread()
                          return self[key]
                  def __contains__(self, key):
                      self.lazyread()
                      return key in self.map
                  def parents(self):
                      self.lazyread()
                      return self.pl
                  def markdirty(self):
                      if not self.dirty:
                          self.dirty = 1
                  def setparents(self, p1, p2=nullid):
                      self.lazyread()
                      self.markdirty()
                      self.pl = p1, p2
                  def state(self, key):
                      try:
                          return self[key][0]
                      except KeyError:
                          return "?"
                  def lazyread(self):
                      if self.map is None:
                          self.read()
                  def parse(self, st):
                      self.pl = [st[:20], st[20: 40]]
                      # deref fields so they will be local in loop
                      map = self.map
                      copies = self.copies
                      format = self.format
                      unpack = struct.unpack
                      pos = 40
                      e_size = struct.calcsize(format)
                      while pos < len(st):
                          newpos = pos + e_size
                          e = unpack(format, st[pos:newpos])
                          l = e[4]
                          pos = newpos
                          newpos = pos + l
                          f = st[pos:newpos]
                          if '\0' in f:
                              f, c = f.split('\0')
                              copies[f] = c
                          map[f] = e[:4]
                          pos = newpos
                  def read(self):
                      self.map = {}
                      self.pl = [nullid, nullid]
                      try:
                          st = self.opener("dirstate").read()
                          if st:
                              self.parse(st)
                      except IOError, err:
                          if err.errno != errno.ENOENT: raise
                  def copy(self, source, dest):
                      self.lazyread()
                      self.markdirty()
                      self.copies[dest] = source
                  def copied(self, file):
                      return self.copies.get(file, None)
                  def update(self, files, state, **kw):
                      ''' current states:
                      n  normal
                      m  needs merging
                      r  marked for removal
                      a  marked for addition'''
                      if not files: return
                      self.lazyread()
                      self.markdirty()
                      for f in files:
                          if state == "r":
                              self.map[f] = ('r', 0, 0, 0)
                          else:
                              s = os.lstat(self.wjoin(f))
                              st_size = kw.get('st_size', s.st_size)
                              st_mtime = kw.get('st_mtime', s.st_mtime)
                              self.map[f] = (state, s.st_mode, st_size, st_mtime)
                          if self.copies.has_key(f):
                              del self.copies[f]
                  def forget(self, files):
                      if not files: return
                      self.lazyread()
                      self.markdirty()
                      for f in files:
                          try:
                              del self.map[f]
                          except KeyError:
                              self.ui.warn(_("not in dirstate: %s!\n") % f)
                              pass
                  def clear(self):
                      self.map = {}
                      self.copies = {}
                      self.markdirty()
                  def rebuild(self, parent, files):
                      self.clear()
                      umask = os.umask(0)
                      os.umask(umask)
                      for f, mode in files:
                          if mode:
                              self.map[f] = ('n', ~umask, -1, 0)
                          else:
                              self.map[f] = ('n', ~umask & 0666, -1, 0)
                      self.pl = (parent, nullid)
                      self.markdirty()
                  def write(self):
                      if not self.dirty:
                          return
                      st = self.opener("dirstate", "w", atomic=True)
                      st.write("".join(self.pl))
                      for f, e in self.map.items():
                          c = self.copied(f)
                          if c:
                              f = f + "\0" + c
                          e = struct.pack(self.format, e[0], e[1], e[2], e[3], len(f))
                          st.write(e + f)
                      self.dirty = 0
                  def filterfiles(self, files):
                      ret = {}
                      unknown = []
                      for x in files:
                          if x == '.':
                              return self.map.copy()
                          if x not in self.map:
                              unknown.append(x)
                          else:
                              ret[x] = self.map[x]
                      if not unknown:
                          return ret
                      b = self.map.keys()
                      b.sort()
                      blen = len(b)
                      for x in unknown:
-                         bs = bisect.bisect(b, x)
-                         if bs != 0 and  b[bs-1] == x:
-                             ret[x] = self.map[x]
-                             continue
+                         bs = bisect.bisect(b, "%s%s" % (x, '/'))
                          while bs < blen:
                              s = b[bs]
                              if len(s) > len(x) and s.startswith(x):
-                                 if s[len(x)] == '/':
-                                     ret[s] = self.map[s]
                              else:
                                  break
                              bs += 1
                      return ret
                  def supported_type(self, f, st, verbose=False):
                      if stat.S_ISREG(st.st_mode):
                          return True
                      if verbose:
                          kind = 'unknown'
                          if stat.S_ISCHR(st.st_mode): kind = _('character device')
                          elif stat.S_ISBLK(st.st_mode): kind = _('block device')
                          elif stat.S_ISFIFO(st.st_mode): kind = _('fifo')
                          elif stat.S_ISLNK(st.st_mode): kind = _('symbolic link')
                          elif stat.S_ISSOCK(st.st_mode): kind = _('socket')
                          elif stat.S_ISDIR(st.st_mode): kind = _('directory')
                          self.ui.warn(_('%s: unsupported file type (type is %s)\n') % (
                              util.pathto(self.getcwd(), f),
                              kind))
                      return False
                  def statwalk(self, files=None, match=util.always, dc=None, ignored=False,
                               badmatch=None):
                      self.lazyread()
                      # walk all files by default
                      if not files:
                          files = [self.root]
                          if not dc:
                              dc = self.map.copy()
                      elif not dc:
                          dc = self.filterfiles(files)
                      def statmatch(file_, stat):
                          file_ = util.pconvert(file_)
                          if not ignored and file_ not in dc and self.ignore(file_):
                              return False
                          return match(file_)
                      return self.walkhelper(files=files, statmatch=statmatch, dc=dc,
                                             badmatch=badmatch)
                  def walk(self, files=None, match=util.always, dc=None, badmatch=None):
                      # filter out the stat
                      for src, f, st in self.statwalk(files, match, dc, badmatch=badmatch):
                          yield src, f
                  # walk recursively through the directory tree, finding all files
                  # matched by the statmatch function
                  #
                  # results are yielded in a tuple (src, filename, st), where src
                  # is one of:
                  # 'f' the file was found in the directory tree
                  # 'm' the file was only in the dirstate and not in the tree
                  # and st is the stat result if the file was found in the directory.
                  #
                  # dc is an optional arg for the current dirstate.  dc is not modified
                  # directly by this function, but might be modified by your statmatch call.
                  #
                  def walkhelper(self, files, statmatch, dc, badmatch=None):
                      # recursion free walker, faster than os.walk.
                      def findfiles(s):
                          work = [s]
                          while work:
                              top = work.pop()
                              names = os.listdir(top)
                              names.sort()
                              # nd is the top of the repository dir tree
                              nd = util.normpath(top[len(self.root) + 1:])
                              if nd == '.':
                                  nd = ''
                              else:
                                  # do not recurse into a repo contained in this
                                  # one. use bisect to find .hg directory so speed
                                  # is good on big directory.
                                  hg = bisect.bisect_left(names, '.hg')
                                  if hg < len(names) and names[hg] == '.hg':
                                      if os.path.isdir(os.path.join(top, '.hg')):
                                          continue
                              for f in names:
                                  np = util.pconvert(os.path.join(nd, f))
                                  if seen(np):
                                      continue
                                  p = os.path.join(top, f)
                                  # don't trip over symlinks
                                  st = os.lstat(p)
                                  if stat.S_ISDIR(st.st_mode):
                                      ds = os.path.join(nd, f +'/')
                                      if statmatch(ds, st):
                                          work.append(p)
                                      if statmatch(np, st) and np in dc:
                                          yield 'm', np, st
                                  elif statmatch(np, st):
                                      if self.supported_type(np, st):
                                          yield 'f', np, st
                                      elif np in dc:
                                          yield 'm', np, st
                      known = {'.hg': 1}
                      def seen(fn):
                          if fn in known: return True
                          known[fn] = 1
                      # step one, find all files that match our criteria
                      files.sort()
                      for ff in util.unique(files):
                          f = self.wjoin(ff)
                          try:
                              st = os.lstat(f)
                          except OSError, inst:
                              nf = util.normpath(ff)
                              found = False
                              for fn in dc:
                                  if nf == fn or (fn.startswith(nf) and fn[len(nf)] == '/'):
                                      found = True
                                      break
                              if not found:
                                  if inst.errno != errno.ENOENT or not badmatch:
                                      self.ui.warn('%s: %s\n' % (
                                          util.pathto(self.getcwd(), ff),
                                          inst.strerror))
                                  elif badmatch and badmatch(ff) and statmatch(ff, None):
                                      yield 'b', ff, None
                              continue
                          if stat.S_ISDIR(st.st_mode):
                              cmp1 = (lambda x, y: cmp(x[1], y[1]))
                              sorted_ = [ x for x in findfiles(f) ]
                              sorted_.sort(cmp1)
                              for e in sorted_:
                                  yield e
                          else:
                              ff = util.normpath(ff)
                              if seen(ff):
                                  continue
                              self.blockignore = True
                              if statmatch(ff, st):
                                  if self.supported_type(ff, st, verbose=True):
                                      yield 'f', ff, st
                                  elif ff in dc:
                                      yield 'm', ff, st
                              self.blockignore = False
                      # step two run through anything left in the dc hash and yield
                      # if we haven't already seen it
                      ks = dc.keys()
                      ks.sort()
                      for k in ks:
                          if not seen(k) and (statmatch(k, None)):
                              yield 'm', k, None
                  def changes(self, files=None, match=util.always, show_ignored=None):
                      lookup, modified, added, unknown, ignored = [], [], [], [], []
                      removed, deleted = [], []
                      for src, fn, st in self.statwalk(files, match, ignored=show_ignored):
                          try:
                              type_, mode, size, time = self[fn]
                          except KeyError:
                              if show_ignored and self.ignore(fn):
                                  ignored.append(fn)
                              else:
                                  unknown.append(fn)
                              continue
                          if src == 'm':
                              nonexistent = True
                              if not st:
                                  try:
                                      st = os.lstat(self.wjoin(fn))
                                  except OSError, inst:
                                      if inst.errno != errno.ENOENT:
                                          raise
                                      st = None
                                  # We need to re-check that it is a valid file
                                  if st and self.supported_type(fn, st):
                                      nonexistent = False
                              # XXX: what to do with file no longer present in the fs
                              # who are not removed in the dirstate ?
                              if nonexistent and type_ in "nm":
                                  deleted.append(fn)
                                  continue
                          # check the common case first
                          if type_ == 'n':
                              if not st:
                                  st = os.lstat(self.wjoin(fn))
                              if size >= 0 and (size != st.st_size
                                                or (mode ^ st.st_mode) & 0100):
                                  modified.append(fn)
                              elif time != st.st_mtime:
                                  lookup.append(fn)
                          elif type_ == 'm':
                              modified.append(fn)
                          elif type_ == 'a':
                              added.append(fn)
                          elif type_ == 'r':
                              removed.append(fn)
                      return (lookup, modified, added, removed, deleted, unknown, ignored)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages