upstream/mercurial-mirror Commit - r10577:d5bd1bef

store: only add new entries to the fncache file...

Adrian Buehlmann -

r10577:d5bd1bef stable

parent child

mercurial/store.py

0 +3 -3

              # store.py - repository store handling for Mercurial
              #
              # Copyright 2008 Matt Mackall <mpm@selenic.com>
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from i18n import _
              import osutil, util
              import os, stat
              _sha = util.sha1
              # This avoids a collision between a file named foo and a dir named
              # foo.i or foo.d
              def encodedir(path):
                  if not path.startswith('data/'):
                      return path
                  return (path
                          .replace(".hg/", ".hg.hg/")
                          .replace(".i/", ".i.hg/")
                          .replace(".d/", ".d.hg/"))
              def decodedir(path):
                  if not path.startswith('data/'):
                      return path
                  return (path
                          .replace(".d.hg/", ".d/")
                          .replace(".i.hg/", ".i/")
                          .replace(".hg.hg/", ".hg/"))
              def _buildencodefun():
                  e = '_'
                  win_reserved = [ord(x) for x in '\\:*?"<>|']
                  cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
                  for x in (range(32) + range(126, 256) + win_reserved):
                      cmap[chr(x)] = "~%02x" % x
                  for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
                      cmap[chr(x)] = e + chr(x).lower()
                  dmap = {}
                  for k, v in cmap.iteritems():
                      dmap[v] = k
                  def decode(s):
                      i = 0
                      while i < len(s):
                          for l in xrange(1, 4):
                              try:
                                  yield dmap[s[i:i + l]]
                                  i += l
                                  break
                              except KeyError:
                                  pass
                          else:
                              raise KeyError
                  return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
                          lambda s: decodedir("".join(list(decode(s)))))
              encodefilename, decodefilename = _buildencodefun()
              def _build_lower_encodefun():
                  win_reserved = [ord(x) for x in '\\:*?"<>|']
                  cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
                  for x in (range(32) + range(126, 256) + win_reserved):
                      cmap[chr(x)] = "~%02x" % x
                  for x in range(ord("A"), ord("Z")+1):
                      cmap[chr(x)] = chr(x).lower()
                  return lambda s: "".join([cmap[c] for c in s])
              lowerencode = _build_lower_encodefun()
              _windows_reserved_filenames = '''con prn aux nul
                  com1 com2 com3 com4 com5 com6 com7 com8 com9
                  lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
              def auxencode(path):
                  res = []
                  for n in path.split('/'):
                      if n:
                          base = n.split('.')[0]
                          if base and (base in _windows_reserved_filenames):
                              # encode third letter ('aux' -> 'au~78')
                              ec = "~%02x" % ord(n[2])
                              n = n[0:2] + ec + n[3:]
                          if n[-1] in '. ':
                              # encode last period or space ('foo...' -> 'foo..~2e')
                              n = n[:-1] + "~%02x" % ord(n[-1])
                      res.append(n)
                  return '/'.join(res)
              MAX_PATH_LEN_IN_HGSTORE = 120
              DIR_PREFIX_LEN = 8
              _MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4
              def hybridencode(path):
                  '''encodes path with a length limit
                  Encodes all paths that begin with 'data/', according to the following.
                  Default encoding (reversible):
                  Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
                  characters are encoded as '~xx', where xx is the two digit hex code
                  of the character (see encodefilename).
                  Relevant path components consisting of Windows reserved filenames are
                  masked by encoding the third character ('aux' -> 'au~78', see auxencode).
                  Hashed encoding (not reversible):
                  If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a
                  non-reversible hybrid hashing of the path is done instead.
                  This encoding uses up to DIR_PREFIX_LEN characters of all directory
                  levels of the lowerencoded path, but not more levels than can fit into
                  _MAX_SHORTENED_DIRS_LEN.
                  Then follows the filler followed by the sha digest of the full path.
                  The filler is the beginning of the basename of the lowerencoded path
                  (the basename is everything after the last path separator). The filler
                  is as long as possible, filling in characters from the basename until
                  the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars
                  of the basename have been taken).
                  The extension (e.g. '.i' or '.d') is preserved.
                  The string 'data/' at the beginning is replaced with 'dh/', if the hashed
                  encoding was used.
                  '''
                  if not path.startswith('data/'):
                      return path
                  # escape directories ending with .i and .d
                  path = encodedir(path)
                  ndpath = path[len('data/'):]
                  res = 'data/' + auxencode(encodefilename(ndpath))
                  if len(res) > MAX_PATH_LEN_IN_HGSTORE:
                      digest = _sha(path).hexdigest()
                      aep = auxencode(lowerencode(ndpath))
                      _root, ext = os.path.splitext(aep)
                      parts = aep.split('/')
                      basename = parts[-1]
                      sdirs = []
                      for p in parts[:-1]:
                          d = p[:DIR_PREFIX_LEN]
                          if d[-1] in '. ':
                              # Windows can't access dirs ending in period or space
                              d = d[:-1] + '_'
                          t = '/'.join(sdirs) + '/' + d
                          if len(t) > _MAX_SHORTENED_DIRS_LEN:
                              break
                          sdirs.append(d)
                      dirs = '/'.join(sdirs)
                      if len(dirs) > 0:
                          dirs += '/'
                      res = 'dh/' + dirs + digest + ext
                      space_left = MAX_PATH_LEN_IN_HGSTORE - len(res)
                      if space_left > 0:
                          filler = basename[:space_left]
                          res = 'dh/' + dirs + filler + digest + ext
                  return res
              def _calcmode(path):
                  try:
                      # files in .hg/ will be created using this mode
                      mode = os.stat(path).st_mode
                          # avoid some useless chmods
                      if (0777 & ~util.umask) == (0777 & mode):
                          mode = None
                  except OSError:
                      mode = None
                  return mode
              _data = 'data 00manifest.d 00manifest.i 00changelog.d  00changelog.i'
              class basicstore(object):
                  '''base class for local repository stores'''
                  def __init__(self, path, opener, pathjoiner):
                      self.pathjoiner = pathjoiner
                      self.path = path
                      self.createmode = _calcmode(path)
                      op = opener(self.path)
                      op.createmode = self.createmode
                      self.opener = lambda f, *args, **kw: op(encodedir(f), *args, **kw)
                  def join(self, f):
                      return self.pathjoiner(self.path, encodedir(f))
                  def _walk(self, relpath, recurse):
                      '''yields (unencoded, encoded, size)'''
                      path = self.pathjoiner(self.path, relpath)
                      striplen = len(self.path) + len(os.sep)
                      l = []
                      if os.path.isdir(path):
                          visit = [path]
                          while visit:
                              p = visit.pop()
                              for f, kind, st in osutil.listdir(p, stat=True):
                                  fp = self.pathjoiner(p, f)
                                  if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
                                      n = util.pconvert(fp[striplen:])
                                      l.append((decodedir(n), n, st.st_size))
                                  elif kind == stat.S_IFDIR and recurse:
                                      visit.append(fp)
                      return sorted(l)
                  def datafiles(self):
                      return self._walk('data', True)
                  def walk(self):
                      '''yields (unencoded, encoded, size)'''
                      # yield data files first
                      for x in self.datafiles():
                          yield x
                      # yield manifest before changelog
                      for x in reversed(self._walk('', False)):
                          yield x
                  def copylist(self):
                      return ['requires'] + _data.split()
              class encodedstore(basicstore):
                  def __init__(self, path, opener, pathjoiner):
                      self.pathjoiner = pathjoiner
                      self.path = self.pathjoiner(path, 'store')
                      self.createmode = _calcmode(self.path)
                      op = opener(self.path)
                      op.createmode = self.createmode
                      self.opener = lambda f, *args, **kw: op(encodefilename(f), *args, **kw)
                  def datafiles(self):
                      for a, b, size in self._walk('data', True):
                          try:
                              a = decodefilename(a)
                          except KeyError:
                              a = None
                          yield a, b, size
                  def join(self, f):
                      return self.pathjoiner(self.path, encodefilename(f))
                  def copylist(self):
                      return (['requires', '00changelog.i'] +
                              [self.pathjoiner('store', f) for f in _data.split()])
              class fncache(object):
                  # the filename used to be partially encoded
                  # hence the encodedir/decodedir dance
                  def __init__(self, opener):
                      self.opener = opener
                      self.entries = None
                  def _load(self):
                      '''fill the entries from the fncache file'''
                      self.entries = set()
                      try:
                          fp = self.opener('fncache', mode='rb')
                      except IOError:
                          # skip nonexistent file
                          return
                      for n, line in enumerate(fp):
                          if (len(line) < 2) or (line[-1] != '\n'):
                              t = _('invalid entry in fncache, line %s') % (n + 1)
                              raise util.Abort(t)
                          self.entries.add(decodedir(line[:-1]))
                      fp.close()
                  def rewrite(self, files):
                      fp = self.opener('fncache', mode='wb')
                      for p in files:
                          fp.write(encodedir(p) + '\n')
                      fp.close()
                      self.entries = set(files)
                  def add(self, fn):
                      if self.entries is None:
                          self._load()
+                     if fn not in self.entries:
                      self.opener('fncache', 'ab').write(encodedir(fn) + '\n')
+                         self.entries.add(fn)
                  def __contains__(self, fn):
                      if self.entries is None:
                          self._load()
                      return fn in self.entries
                  def __iter__(self):
                      if self.entries is None:
                          self._load()
                      return iter(self.entries)
              class fncachestore(basicstore):
                  def __init__(self, path, opener, pathjoiner):
                      self.pathjoiner = pathjoiner
                      self.path = self.pathjoiner(path, 'store')
                      self.createmode = _calcmode(self.path)
                      op = opener(self.path)
                      op.createmode = self.createmode
                      fnc = fncache(op)
                      self.fncache = fnc
                      def fncacheopener(path, mode='r', *args, **kw):
-                         if (mode not in ('r', 'rb')
-                             and path.startswith('data/')
-                             and path not in fnc):
+                         if mode not in ('r', 'rb') and path.startswith('data/'):
                              fnc.add(path)
                          return op(hybridencode(path), mode, *args, **kw)
                      self.opener = fncacheopener
                  def join(self, f):
                      return self.pathjoiner(self.path, hybridencode(f))
                  def datafiles(self):
                      rewrite = False
                      existing = []
                      pjoin = self.pathjoiner
                      spath = self.path
                      for f in self.fncache:
                          ef = hybridencode(f)
                          try:
                              st = os.stat(pjoin(spath, ef))
                              yield f, ef, st.st_size
                              existing.append(f)
                          except OSError:
                              # nonexistent entry
                              rewrite = True
                      if rewrite:
                          # rewrite fncache to remove nonexistent entries
                          # (may be caused by rollback / strip)
                          self.fncache.rewrite(existing)
                  def copylist(self):
                      d = _data + ' dh fncache'
                      return (['requires', '00changelog.i'] +
                              [self.pathjoiner('store', f) for f in d.split()])
              def store(requirements, path, opener, pathjoiner=None):
                  pathjoiner = pathjoiner or os.path.join
                  if 'store' in requirements:
                      if 'fncache' in requirements:
                          return fncachestore(path, opener, pathjoiner)
                      return encodedstore(path, opener, pathjoiner)
                  return basicstore(path, opener, pathjoiner)

tests/test-convert

0 +6 0

              #!/bin/sh
              cat >> $HGRCPATH <<EOF
              [extensions]
              convert=
              [convert]
              hg.saverev=False
              EOF
              hg help convert
              hg init a
              cd a
              echo a > a
              hg ci -d'0 0' -Ama
              hg cp a b
              hg ci -d'1 0' -mb
              hg rm a
              hg ci -d'2 0' -mc
              hg mv b a
              hg ci -d'3 0' -md
              echo a >> a
              hg ci -d'4 0' -me
              cd ..
              hg convert a 2>&1 | grep -v 'subversion python bindings could not be loaded'
              hg --cwd a-hg pull ../a
              touch bogusfile
              echo % should fail
              hg convert a bogusfile
              mkdir bogusdir
              chmod 000 bogusdir
              echo % should fail
              hg convert a bogusdir
              echo % should succeed
              chmod 700 bogusdir
              hg convert a bogusdir
              echo % test pre and post conversion actions
              echo 'include b' > filemap
              hg convert --debug --filemap filemap a partialb | \
                  grep 'run hg'
              echo % converting empty dir should fail "nicely"
              mkdir emptydir
              # override $PATH to ensure p4 not visible; use $PYTHON in case we're
              # running from a devel copy, not a temp installation
              PATH=$BINDIR $PYTHON $BINDIR/hg convert emptydir 2>&1 | sed 's,file://.*/emptydir,.../emptydir,g'
              echo % convert with imaginary source type
              hg convert --source-type foo a a-foo
              echo % convert with imaginary sink type
              hg convert --dest-type foo a a-foo
+             echo
+             echo % "testing: convert must not produce duplicate entries in fncache"
+             hg convert a b
+             echo % "contents of fncache file:"
+             cat b/.hg/store/fncache
              true

tests/test-convert.out

0 +14 0

              hg convert [OPTION]... SOURCE [DEST [REVMAP]]
              convert a foreign SCM repository to a Mercurial one.
                  Accepted source formats [identifiers]:
                  - Mercurial [hg]
                  - CVS [cvs]
                  - Darcs [darcs]
                  - git [git]
                  - Subversion [svn]
                  - Monotone [mtn]
                  - GNU Arch [gnuarch]
                  - Bazaar [bzr]
                  - Perforce [p4]
                  Accepted destination formats [identifiers]:
                  - Mercurial [hg]
                  - Subversion [svn] (history on branches is not preserved)
                  If no revision is given, all revisions will be converted. Otherwise,
                  convert will only import up to the named revision (given in a format
                  understood by the source).
                  If no destination directory name is specified, it defaults to the basename
                  of the source with '-hg' appended. If the destination repository doesn't
                  exist, it will be created.
                  By default, all sources except Mercurial will use --branchsort. Mercurial
                  uses --sourcesort to preserve original revision numbers order. Sort modes
                  have the following effects:
                  --branchsort  convert from parent to child revision when possible, which
                                means branches are usually converted one after the other. It
                                generates more compact repositories.
                  --datesort    sort revisions by date. Converted repositories have good-
                                looking changelogs but are often an order of magnitude
                                larger than the same ones generated by --branchsort.
                  --sourcesort  try to preserve source revisions order, only supported by
                                Mercurial sources.
                  If <REVMAP> isn't given, it will be put in a default location
                  (<dest>/.hg/shamap by default). The <REVMAP> is a simple text file that
                  maps each source commit ID to the destination ID for that revision, like
                  so:
                    <source ID> <destination ID>
                  If the file doesn't exist, it's automatically created. It's updated on
                  each commit copied, so convert-repo can be interrupted and can be run
                  repeatedly to copy new commits.
                  The [username mapping] file is a simple text file that maps each source
                  commit author to a destination commit author. It is handy for source SCMs
                  that use unix logins to identify authors (eg: CVS). One line per author
                  mapping and the line format is: srcauthor=whatever string you want
                  The filemap is a file that allows filtering and remapping of files and
                  directories. Comment lines start with '#'. Each line can contain one of
                  the following directives:
                    include path/to/file
                    exclude path/to/file
                    rename from/file to/file
                  The 'include' directive causes a file, or all files under a directory, to
                  be included in the destination repository, and the exclusion of all other
                  files and directories not explicitly included. The 'exclude' directive
                  causes files or directories to be omitted. The 'rename' directive renames
                  a file or directory. To rename from a subdirectory into the root of the
                  repository, use '.' as the path to rename to.
                  The splicemap is a file that allows insertion of synthetic history,
                  letting you specify the parents of a revision. This is useful if you want
                  to e.g. give a Subversion merge two parents, or graft two disconnected
                  series of history together. Each entry contains a key, followed by a
                  space, followed by one or two comma-separated values. The key is the
                  revision ID in the source revision control system whose parents should be
                  modified (same format as a key in .hg/shamap). The values are the revision
                  IDs (in either the source or destination revision control system) that
                  should be used as the new parents for that node. For example, if you have
                  merged "release-1.0" into "trunk", then you should specify the revision on
                  "trunk" as the first parent and the one on the "release-1.0" branch as the
                  second.
                  The branchmap is a file that allows you to rename a branch when it is
                  being brought in from whatever external repository. When used in
                  conjunction with a splicemap, it allows for a powerful combination to help
                  fix even the most badly mismanaged repositories and turn them into nicely
                  structured Mercurial repositories. The branchmap contains lines of the
                  form "original_branch_name new_branch_name". "original_branch_name" is the
                  name of the branch in the source repository, and "new_branch_name" is the
                  name of the branch is the destination repository. This can be used to (for
                  instance) move code in one repository from "default" to a named branch.
                  Mercurial Source
                  ----------------
                  --config convert.hg.ignoreerrors=False    (boolean)
                      ignore integrity errors when reading. Use it to fix Mercurial
                      repositories with missing revlogs, by converting from and to
                      Mercurial.
                  --config convert.hg.saverev=False         (boolean)
                      store original revision ID in changeset (forces target IDs to change)
                  --config convert.hg.startrev=0            (hg revision identifier)
                      convert start revision and its descendants
                  CVS Source
                  ----------
                  CVS source will use a sandbox (i.e. a checked-out copy) from CVS to
                  indicate the starting point of what will be converted. Direct access to
                  the repository files is not needed, unless of course the repository is
                  :local:. The conversion uses the top level directory in the sandbox to
                  find the CVS repository, and then uses CVS rlog commands to find files to
                  convert. This means that unless a filemap is given, all files under the
                  starting directory will be converted, and that any directory
                  reorganization in the CVS sandbox is ignored.
                  The options shown are the defaults.
                  --config convert.cvsps.cache=True         (boolean)
                      Set to False to disable remote log caching, for testing and debugging
                      purposes.
                  --config convert.cvsps.fuzz=60            (integer)
                      Specify the maximum time (in seconds) that is allowed between commits
                      with identical user and log message in a single changeset. When very
                      large files were checked in as part of a changeset then the default
                      may not be long enough.
                  --config convert.cvsps.mergeto='{{mergetobranch ([-\w]+)}}'
                      Specify a regular expression to which commit log messages are matched.
                      If a match occurs, then the conversion process will insert a dummy
                      revision merging the branch on which this log message occurs to the
                      branch indicated in the regex.
                  --config convert.cvsps.mergefrom='{{mergefrombranch ([-\w]+)}}'
                      Specify a regular expression to which commit log messages are matched.
                      If a match occurs, then the conversion process will add the most
                      recent revision on the branch indicated in the regex as the second
                      parent of the changeset.
                  --config hook.cvslog
                      Specify a Python function to be called at the end of gathering the CVS
                      log. The function is passed a list with the log entries, and can
                      modify the entries in-place, or add or delete them.
                  --config hook.cvschangesets
                      Specify a Python function to be called after the changesets are
                      calculated from the the CVS log. The function is passed a list with
                      the changeset entries, and can modify the changesets in-place, or add
                      or delete them.
                  An additional "debugcvsps" Mercurial command allows the builtin changeset
                  merging code to be run without doing a conversion. Its parameters and
                  output are similar to that of cvsps 2.1. Please see the command help for
                  more details.
                  Subversion Source
                  -----------------
                  Subversion source detects classical trunk/branches/tags layouts. By
                  default, the supplied "svn://repo/path/" source URL is converted as a
                  single branch. If "svn://repo/path/trunk" exists it replaces the default
                  branch. If "svn://repo/path/branches" exists, its subdirectories are
                  listed as possible branches. If "svn://repo/path/tags" exists, it is
                  looked for tags referencing converted branches. Default "trunk",
                  "branches" and "tags" values can be overridden with following options. Set
                  them to paths relative to the source URL, or leave them blank to disable
                  auto detection.
                  --config convert.svn.branches=branches    (directory name)
                      specify the directory containing branches
                  --config convert.svn.tags=tags            (directory name)
                      specify the directory containing tags
                  --config convert.svn.trunk=trunk          (directory name)
                      specify the name of the trunk branch
                  Source history can be retrieved starting at a specific revision, instead
                  of being integrally converted. Only single branch conversions are
                  supported.
                  --config convert.svn.startrev=0           (svn revision number)
                      specify start Subversion revision.
                  Perforce Source
                  ---------------
                  The Perforce (P4) importer can be given a p4 depot path or a client
                  specification as source. It will convert all files in the source to a flat
                  Mercurial repository, ignoring labels, branches and integrations. Note
                  that when a depot path is given you then usually should specify a target
                  directory, because otherwise the target may be named ...-hg.
                  It is possible to limit the amount of source history to be converted by
                  specifying an initial Perforce revision.
                  --config convert.p4.startrev=0            (perforce changelist number)
                      specify initial Perforce revision.
                  Mercurial Destination
                  ---------------------
                  --config convert.hg.clonebranches=False   (boolean)
                      dispatch source branches in separate clones.
                  --config convert.hg.tagsbranch=default    (branch name)
                      tag revisions branch name
                  --config convert.hg.usebranchnames=True   (boolean)
                      preserve branch names
              options:
               -A --authors      username mapping filename
               -d --dest-type    destination repository type
                  --filemap      remap file names using contents of file
               -r --rev          import up to target revision REV
               -s --source-type  source repository type
                  --splicemap    splice synthesized history into place
                  --branchmap    change branch names while converting
                  --branchsort   try to sort changesets by branches
                  --datesort     try to sort changesets by date
                  --sourcesort   preserve source changesets order
              use "hg -v help convert" to show global options
              adding a
              assuming destination a-hg
              initializing destination a-hg repository
              scanning source...
              sorting...
              converting...
 a
 b
 c
 d
 e
              pulling from ../a
              searching for changes
              no changes found
              % should fail
              initializing destination bogusfile repository
              abort: cannot create new bundle repository
              % should fail
              abort: Permission denied: bogusdir
              % should succeed
              initializing destination bogusdir repository
              scanning source...
              sorting...
              converting...
 a
 b
 c
 d
 e
              % test pre and post conversion actions
              run hg source pre-conversion action
              run hg sink pre-conversion action
              run hg sink post-conversion action
              run hg source post-conversion action
              % converting empty dir should fail nicely
              assuming destination emptydir-hg
              initializing destination emptydir-hg repository
              emptydir does not look like a CVS checkout
              emptydir does not look like a Git repo
              emptydir does not look like a Subversion repo
              emptydir is not a local Mercurial repo
              emptydir does not look like a darcs repo
              emptydir does not look like a monotone repo
              emptydir does not look like a GNU Arch repo
              emptydir does not look like a Bazaar repo
              cannot find required "p4" tool
              abort: emptydir: missing or unsupported repository
              % convert with imaginary source type
              initializing destination a-foo repository
              abort: foo: invalid source repository type
              % convert with imaginary sink type
              abort: foo: invalid destination repository type
+             % testing: convert must not produce duplicate entries in fncache
+             initializing destination b repository
+             scanning source...
+             sorting...
+             converting...
+a
+b
+c
+d
+e
+             % contents of fncache file:
+             data/a.i
+             data/b.i

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No reviewers

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages