upstream/mercurial-mirror Commit - r10577:d5bd1bef

store: only add new entries to the fncache file...

Adrian Buehlmann -

r10577:d5bd1bef stable

parent child

mercurial/store.py

0 +4 -4

             # store.py - repository store handling for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from i18n import _
             import osutil, util
             import os, stat
             _sha = util.sha1
             # This avoids a collision between a file named foo and a dir named
             # foo.i or foo.d
             def encodedir(path):
                 if not path.startswith('data/'):
                     return path
                 return (path
                         .replace(".hg/", ".hg.hg/")
                         .replace(".i/", ".i.hg/")
                         .replace(".d/", ".d.hg/"))
             def decodedir(path):
                 if not path.startswith('data/'):
                     return path
                 return (path
                         .replace(".d.hg/", ".d/")
                         .replace(".i.hg/", ".i/")
                         .replace(".hg.hg/", ".hg/"))
             def _buildencodefun():
                 e = '_'
                 win_reserved = [ord(x) for x in '\\:*?"<>|']
                 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
                 for x in (range(32) + range(126, 256) + win_reserved):
                     cmap[chr(x)] = "~%02x" % x
                 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
                     cmap[chr(x)] = e + chr(x).lower()
                 dmap = {}
                 for k, v in cmap.iteritems():
                     dmap[v] = k
                 def decode(s):
                     i = 0
                     while i < len(s):
                         for l in xrange(1, 4):
                             try:
                                 yield dmap[s[i:i + l]]
                                 i += l
                                 break
                             except KeyError:
                                 pass
                         else:
                             raise KeyError
                 return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
                         lambda s: decodedir("".join(list(decode(s)))))
             encodefilename, decodefilename = _buildencodefun()
             def _build_lower_encodefun():
                 win_reserved = [ord(x) for x in '\\:*?"<>|']
                 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
                 for x in (range(32) + range(126, 256) + win_reserved):
                     cmap[chr(x)] = "~%02x" % x
                 for x in range(ord("A"), ord("Z")+1):
                     cmap[chr(x)] = chr(x).lower()
                 return lambda s: "".join([cmap[c] for c in s])
             lowerencode = _build_lower_encodefun()
             _windows_reserved_filenames = '''con prn aux nul
                 com1 com2 com3 com4 com5 com6 com7 com8 com9
                 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
             def auxencode(path):
                 res = []
                 for n in path.split('/'):
                     if n:
                         base = n.split('.')[0]
                         if base and (base in _windows_reserved_filenames):
                             # encode third letter ('aux' -> 'au~78')
                             ec = "~%02x" % ord(n[2])
                             n = n[0:2] + ec + n[3:]
                         if n[-1] in '. ':
                             # encode last period or space ('foo...' -> 'foo..~2e')
                             n = n[:-1] + "~%02x" % ord(n[-1])
                     res.append(n)
                 return '/'.join(res)
             MAX_PATH_LEN_IN_HGSTORE = 120
             DIR_PREFIX_LEN = 8
             _MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4
             def hybridencode(path):
                 '''encodes path with a length limit
                 Encodes all paths that begin with 'data/', according to the following.
                 Default encoding (reversible):
                 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
                 characters are encoded as '~xx', where xx is the two digit hex code
                 of the character (see encodefilename).
                 Relevant path components consisting of Windows reserved filenames are
                 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
                 Hashed encoding (not reversible):
                 If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a
                 non-reversible hybrid hashing of the path is done instead.
                 This encoding uses up to DIR_PREFIX_LEN characters of all directory
                 levels of the lowerencoded path, but not more levels than can fit into
                 _MAX_SHORTENED_DIRS_LEN.
                 Then follows the filler followed by the sha digest of the full path.
                 The filler is the beginning of the basename of the lowerencoded path
                 (the basename is everything after the last path separator). The filler
                 is as long as possible, filling in characters from the basename until
                 the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars
                 of the basename have been taken).
                 The extension (e.g. '.i' or '.d') is preserved.
                 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
                 encoding was used.
                 '''
                 if not path.startswith('data/'):
                     return path
                 # escape directories ending with .i and .d
                 path = encodedir(path)
                 ndpath = path[len('data/'):]
                 res = 'data/' + auxencode(encodefilename(ndpath))
                 if len(res) > MAX_PATH_LEN_IN_HGSTORE:
                     digest = _sha(path).hexdigest()
                     aep = auxencode(lowerencode(ndpath))
                     _root, ext = os.path.splitext(aep)
                     parts = aep.split('/')
                     basename = parts[-1]
                     sdirs = []
                     for p in parts[:-1]:
                         d = p[:DIR_PREFIX_LEN]
                         if d[-1] in '. ':
                             # Windows can't access dirs ending in period or space
                             d = d[:-1] + '_'
                         t = '/'.join(sdirs) + '/' + d
                         if len(t) > _MAX_SHORTENED_DIRS_LEN:
                             break
                         sdirs.append(d)
                     dirs = '/'.join(sdirs)
                     if len(dirs) > 0:
                         dirs += '/'
                     res = 'dh/' + dirs + digest + ext
                     space_left = MAX_PATH_LEN_IN_HGSTORE - len(res)
                     if space_left > 0:
                         filler = basename[:space_left]
                         res = 'dh/' + dirs + filler + digest + ext
                 return res
             def _calcmode(path):
                 try:
                     # files in .hg/ will be created using this mode
                     mode = os.stat(path).st_mode
                         # avoid some useless chmods
                     if (0777 & ~util.umask) == (0777 & mode):
                         mode = None
                 except OSError:
                     mode = None
                 return mode
             _data = 'data 00manifest.d 00manifest.i 00changelog.d  00changelog.i'
             class basicstore(object):
                 '''base class for local repository stores'''
                 def __init__(self, path, opener, pathjoiner):
                     self.pathjoiner = pathjoiner
                     self.path = path
                     self.createmode = _calcmode(path)
                     op = opener(self.path)
                     op.createmode = self.createmode
                     self.opener = lambda f, *args, **kw: op(encodedir(f), *args, **kw)
                 def join(self, f):
                     return self.pathjoiner(self.path, encodedir(f))
                 def _walk(self, relpath, recurse):
                     '''yields (unencoded, encoded, size)'''
                     path = self.pathjoiner(self.path, relpath)
                     striplen = len(self.path) + len(os.sep)
                     l = []
                     if os.path.isdir(path):
                         visit = [path]
                         while visit:
                             p = visit.pop()
                             for f, kind, st in osutil.listdir(p, stat=True):
                                 fp = self.pathjoiner(p, f)
                                 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
                                     n = util.pconvert(fp[striplen:])
                                     l.append((decodedir(n), n, st.st_size))
                                 elif kind == stat.S_IFDIR and recurse:
                                     visit.append(fp)
                     return sorted(l)
                 def datafiles(self):
                     return self._walk('data', True)
                 def walk(self):
                     '''yields (unencoded, encoded, size)'''
                     # yield data files first
                     for x in self.datafiles():
                         yield x
                     # yield manifest before changelog
                     for x in reversed(self._walk('', False)):
                         yield x
                 def copylist(self):
                     return ['requires'] + _data.split()
             class encodedstore(basicstore):
                 def __init__(self, path, opener, pathjoiner):
                     self.pathjoiner = pathjoiner
                     self.path = self.pathjoiner(path, 'store')
                     self.createmode = _calcmode(self.path)
                     op = opener(self.path)
                     op.createmode = self.createmode
                     self.opener = lambda f, *args, **kw: op(encodefilename(f), *args, **kw)
                 def datafiles(self):
                     for a, b, size in self._walk('data', True):
                         try:
                             a = decodefilename(a)
                         except KeyError:
                             a = None
                         yield a, b, size
                 def join(self, f):
                     return self.pathjoiner(self.path, encodefilename(f))
                 def copylist(self):
                     return (['requires', '00changelog.i'] +
                             [self.pathjoiner('store', f) for f in _data.split()])
             class fncache(object):
                 # the filename used to be partially encoded
                 # hence the encodedir/decodedir dance
                 def __init__(self, opener):
                     self.opener = opener
                     self.entries = None
                 def _load(self):
                     '''fill the entries from the fncache file'''
                     self.entries = set()
                     try:
                         fp = self.opener('fncache', mode='rb')
                     except IOError:
                         # skip nonexistent file
                         return
                     for n, line in enumerate(fp):
                         if (len(line) < 2) or (line[-1] != '\n'):
                             t = _('invalid entry in fncache, line %s') % (n + 1)
                             raise util.Abort(t)
                         self.entries.add(decodedir(line[:-1]))
                     fp.close()
                 def rewrite(self, files):
                     fp = self.opener('fncache', mode='wb')
                     for p in files:
                         fp.write(encodedir(p) + '\n')
                     fp.close()
                     self.entries = set(files)
                 def add(self, fn):
                     if self.entries is None:
                         self._load()
-                    self.opener('fncache', 'ab').write(encodedir(fn) + '\n')
+                    if fn not in self.entries:
+                        self.opener('fncache', 'ab').write(encodedir(fn) + '\n')
+                        self.entries.add(fn)
                 def __contains__(self, fn):
                     if self.entries is None:
                         self._load()
                     return fn in self.entries
                 def __iter__(self):
                     if self.entries is None:
                         self._load()
                     return iter(self.entries)
             class fncachestore(basicstore):
                 def __init__(self, path, opener, pathjoiner):
                     self.pathjoiner = pathjoiner
                     self.path = self.pathjoiner(path, 'store')
                     self.createmode = _calcmode(self.path)
                     op = opener(self.path)
                     op.createmode = self.createmode
                     fnc = fncache(op)
                     self.fncache = fnc
                     def fncacheopener(path, mode='r', *args, **kw):
-                        if (mode not in ('r', 'rb')
+                        if mode not in ('r', 'rb') and path.startswith('data/'):
-                            and path.startswith('data/')
-                            and path not in fnc):
                             fnc.add(path)
                         return op(hybridencode(path), mode, *args, **kw)
                     self.opener = fncacheopener
                 def join(self, f):
                     return self.pathjoiner(self.path, hybridencode(f))
                 def datafiles(self):
                     rewrite = False
                     existing = []
                     pjoin = self.pathjoiner
                     spath = self.path
                     for f in self.fncache:
                         ef = hybridencode(f)
                         try:
                             st = os.stat(pjoin(spath, ef))
                             yield f, ef, st.st_size
                             existing.append(f)
                         except OSError:
                             # nonexistent entry
                             rewrite = True
                     if rewrite:
                         # rewrite fncache to remove nonexistent entries
                         # (may be caused by rollback / strip)
                         self.fncache.rewrite(existing)
                 def copylist(self):
                     d = _data + ' dh fncache'
                     return (['requires', '00changelog.i'] +
                             [self.pathjoiner('store', f) for f in d.split()])
             def store(requirements, path, opener, pathjoiner=None):
                 pathjoiner = pathjoiner or os.path.join
                 if 'store' in requirements:
                     if 'fncache' in requirements:
                         return fncachestore(path, opener, pathjoiner)
                     return encodedstore(path, opener, pathjoiner)
                 return basicstore(path, opener, pathjoiner)

tests/test-convert

0 +6 0

             #!/bin/sh
             cat >> $HGRCPATH <<EOF
             [extensions]
             convert=
             [convert]
             hg.saverev=False
             EOF
             hg help convert
             hg init a
             cd a
             echo a > a
             hg ci -d'0 0' -Ama
             hg cp a b
             hg ci -d'1 0' -mb
             hg rm a
             hg ci -d'2 0' -mc
             hg mv b a
             hg ci -d'3 0' -md
             echo a >> a
             hg ci -d'4 0' -me
             cd ..
             hg convert a 2>&1 | grep -v 'subversion python bindings could not be loaded'
             hg --cwd a-hg pull ../a
             touch bogusfile
             echo % should fail
             hg convert a bogusfile
             mkdir bogusdir
             chmod 000 bogusdir
             echo % should fail
             hg convert a bogusdir
             echo % should succeed
             chmod 700 bogusdir
             hg convert a bogusdir
             echo % test pre and post conversion actions
             echo 'include b' > filemap
             hg convert --debug --filemap filemap a partialb | \
                 grep 'run hg'
             echo % converting empty dir should fail "nicely"
             mkdir emptydir
             # override $PATH to ensure p4 not visible; use $PYTHON in case we're
             # running from a devel copy, not a temp installation
             PATH=$BINDIR $PYTHON $BINDIR/hg convert emptydir 2>&1 | sed 's,file://.*/emptydir,.../emptydir,g'
             echo % convert with imaginary source type
             hg convert --source-type foo a a-foo
             echo % convert with imaginary sink type
             hg convert --dest-type foo a a-foo
+            echo
+            echo % "testing: convert must not produce duplicate entries in fncache"
+            hg convert a b
+            echo % "contents of fncache file:"
+            cat b/.hg/store/fncache
             true

tests/test-convert.out

0 +14 0

             hg convert [OPTION]... SOURCE [DEST [REVMAP]]
             convert a foreign SCM repository to a Mercurial one.
                 Accepted source formats [identifiers]:
                 - Mercurial [hg]
                 - CVS [cvs]
                 - Darcs [darcs]
                 - git [git]
                 - Subversion [svn]
                 - Monotone [mtn]
                 - GNU Arch [gnuarch]
                 - Bazaar [bzr]
                 - Perforce [p4]
                 Accepted destination formats [identifiers]:
                 - Mercurial [hg]
                 - Subversion [svn] (history on branches is not preserved)
                 If no revision is given, all revisions will be converted. Otherwise,
                 convert will only import up to the named revision (given in a format
                 understood by the source).
                 If no destination directory name is specified, it defaults to the basename
                 of the source with '-hg' appended. If the destination repository doesn't
                 exist, it will be created.
                 By default, all sources except Mercurial will use --branchsort. Mercurial
                 uses --sourcesort to preserve original revision numbers order. Sort modes
                 have the following effects:
                 --branchsort  convert from parent to child revision when possible, which
                               means branches are usually converted one after the other. It
                               generates more compact repositories.
                 --datesort    sort revisions by date. Converted repositories have good-
                               looking changelogs but are often an order of magnitude
                               larger than the same ones generated by --branchsort.
                 --sourcesort  try to preserve source revisions order, only supported by
                               Mercurial sources.
                 If <REVMAP> isn't given, it will be put in a default location
                 (<dest>/.hg/shamap by default). The <REVMAP> is a simple text file that
                 maps each source commit ID to the destination ID for that revision, like
                 so:
                   <source ID> <destination ID>
                 If the file doesn't exist, it's automatically created. It's updated on
                 each commit copied, so convert-repo can be interrupted and can be run
                 repeatedly to copy new commits.
                 The [username mapping] file is a simple text file that maps each source
                 commit author to a destination commit author. It is handy for source SCMs
                 that use unix logins to identify authors (eg: CVS). One line per author
                 mapping and the line format is: srcauthor=whatever string you want
                 The filemap is a file that allows filtering and remapping of files and
                 directories. Comment lines start with '#'. Each line can contain one of
                 the following directives:
                   include path/to/file
                   exclude path/to/file
                   rename from/file to/file
                 The 'include' directive causes a file, or all files under a directory, to
                 be included in the destination repository, and the exclusion of all other
                 files and directories not explicitly included. The 'exclude' directive
                 causes files or directories to be omitted. The 'rename' directive renames
                 a file or directory. To rename from a subdirectory into the root of the
                 repository, use '.' as the path to rename to.
                 The splicemap is a file that allows insertion of synthetic history,
                 letting you specify the parents of a revision. This is useful if you want
                 to e.g. give a Subversion merge two parents, or graft two disconnected
                 series of history together. Each entry contains a key, followed by a
                 space, followed by one or two comma-separated values. The key is the
                 revision ID in the source revision control system whose parents should be
                 modified (same format as a key in .hg/shamap). The values are the revision
                 IDs (in either the source or destination revision control system) that
                 should be used as the new parents for that node. For example, if you have
                 merged "release-1.0" into "trunk", then you should specify the revision on
                 "trunk" as the first parent and the one on the "release-1.0" branch as the
                 second.
                 The branchmap is a file that allows you to rename a branch when it is
                 being brought in from whatever external repository. When used in
                 conjunction with a splicemap, it allows for a powerful combination to help
                 fix even the most badly mismanaged repositories and turn them into nicely
                 structured Mercurial repositories. The branchmap contains lines of the
                 form "original_branch_name new_branch_name". "original_branch_name" is the
                 name of the branch in the source repository, and "new_branch_name" is the
                 name of the branch is the destination repository. This can be used to (for
                 instance) move code in one repository from "default" to a named branch.
                 Mercurial Source
                 ----------------
                 --config convert.hg.ignoreerrors=False    (boolean)
                     ignore integrity errors when reading. Use it to fix Mercurial
                     repositories with missing revlogs, by converting from and to
                     Mercurial.
                 --config convert.hg.saverev=False         (boolean)
                     store original revision ID in changeset (forces target IDs to change)
                 --config convert.hg.startrev=0            (hg revision identifier)
                     convert start revision and its descendants
                 CVS Source
                 ----------
                 CVS source will use a sandbox (i.e. a checked-out copy) from CVS to
                 indicate the starting point of what will be converted. Direct access to
                 the repository files is not needed, unless of course the repository is
                 :local:. The conversion uses the top level directory in the sandbox to
                 find the CVS repository, and then uses CVS rlog commands to find files to
                 convert. This means that unless a filemap is given, all files under the
                 starting directory will be converted, and that any directory
                 reorganization in the CVS sandbox is ignored.
                 The options shown are the defaults.
                 --config convert.cvsps.cache=True         (boolean)
                     Set to False to disable remote log caching, for testing and debugging
                     purposes.
                 --config convert.cvsps.fuzz=60            (integer)
                     Specify the maximum time (in seconds) that is allowed between commits
                     with identical user and log message in a single changeset. When very
                     large files were checked in as part of a changeset then the default
                     may not be long enough.
                 --config convert.cvsps.mergeto='{{mergetobranch ([-\w]+)}}'
                     Specify a regular expression to which commit log messages are matched.
                     If a match occurs, then the conversion process will insert a dummy
                     revision merging the branch on which this log message occurs to the
                     branch indicated in the regex.
                 --config convert.cvsps.mergefrom='{{mergefrombranch ([-\w]+)}}'
                     Specify a regular expression to which commit log messages are matched.
                     If a match occurs, then the conversion process will add the most
                     recent revision on the branch indicated in the regex as the second
                     parent of the changeset.
                 --config hook.cvslog
                     Specify a Python function to be called at the end of gathering the CVS
                     log. The function is passed a list with the log entries, and can
                     modify the entries in-place, or add or delete them.
                 --config hook.cvschangesets
                     Specify a Python function to be called after the changesets are
                     calculated from the the CVS log. The function is passed a list with
                     the changeset entries, and can modify the changesets in-place, or add
                     or delete them.
                 An additional "debugcvsps" Mercurial command allows the builtin changeset
                 merging code to be run without doing a conversion. Its parameters and
                 output are similar to that of cvsps 2.1. Please see the command help for
                 more details.
                 Subversion Source
                 -----------------
                 Subversion source detects classical trunk/branches/tags layouts. By
                 default, the supplied "svn://repo/path/" source URL is converted as a
                 single branch. If "svn://repo/path/trunk" exists it replaces the default
                 branch. If "svn://repo/path/branches" exists, its subdirectories are
                 listed as possible branches. If "svn://repo/path/tags" exists, it is
                 looked for tags referencing converted branches. Default "trunk",
                 "branches" and "tags" values can be overridden with following options. Set
                 them to paths relative to the source URL, or leave them blank to disable
                 auto detection.
                 --config convert.svn.branches=branches    (directory name)
                     specify the directory containing branches
                 --config convert.svn.tags=tags            (directory name)
                     specify the directory containing tags
                 --config convert.svn.trunk=trunk          (directory name)
                     specify the name of the trunk branch
                 Source history can be retrieved starting at a specific revision, instead
                 of being integrally converted. Only single branch conversions are
                 supported.
                 --config convert.svn.startrev=0           (svn revision number)
                     specify start Subversion revision.
                 Perforce Source
                 ---------------
                 The Perforce (P4) importer can be given a p4 depot path or a client
                 specification as source. It will convert all files in the source to a flat
                 Mercurial repository, ignoring labels, branches and integrations. Note
                 that when a depot path is given you then usually should specify a target
                 directory, because otherwise the target may be named ...-hg.
                 It is possible to limit the amount of source history to be converted by
                 specifying an initial Perforce revision.
                 --config convert.p4.startrev=0            (perforce changelist number)
                     specify initial Perforce revision.
                 Mercurial Destination
                 ---------------------
                 --config convert.hg.clonebranches=False   (boolean)
                     dispatch source branches in separate clones.
                 --config convert.hg.tagsbranch=default    (branch name)
                     tag revisions branch name
                 --config convert.hg.usebranchnames=True   (boolean)
                     preserve branch names
             options:
              -A --authors      username mapping filename
              -d --dest-type    destination repository type
                 --filemap      remap file names using contents of file
              -r --rev          import up to target revision REV
              -s --source-type  source repository type
                 --splicemap    splice synthesized history into place
                 --branchmap    change branch names while converting
                 --branchsort   try to sort changesets by branches
                 --datesort     try to sort changesets by date
                 --sourcesort   preserve source changesets order
             use "hg -v help convert" to show global options
             adding a
             assuming destination a-hg
             initializing destination a-hg repository
             scanning source...
             sorting...
             converting...
 a
 b
 c
 d
 e
             pulling from ../a
             searching for changes
             no changes found
             % should fail
             initializing destination bogusfile repository
             abort: cannot create new bundle repository
             % should fail
             abort: Permission denied: bogusdir
             % should succeed
             initializing destination bogusdir repository
             scanning source...
             sorting...
             converting...
 a
 b
 c
 d
 e
             % test pre and post conversion actions
             run hg source pre-conversion action
             run hg sink pre-conversion action
             run hg sink post-conversion action
             run hg source post-conversion action
             % converting empty dir should fail nicely
             assuming destination emptydir-hg
             initializing destination emptydir-hg repository
             emptydir does not look like a CVS checkout
             emptydir does not look like a Git repo
             emptydir does not look like a Subversion repo
             emptydir is not a local Mercurial repo
             emptydir does not look like a darcs repo
             emptydir does not look like a monotone repo
             emptydir does not look like a GNU Arch repo
             emptydir does not look like a Bazaar repo
             cannot find required "p4" tool
             abort: emptydir: missing or unsupported repository
             % convert with imaginary source type
             initializing destination a-foo repository
             abort: foo: invalid source repository type
             % convert with imaginary sink type
             abort: foo: invalid destination repository type
+            % testing: convert must not produce duplicate entries in fncache
+            initializing destination b repository
+            scanning source...
+            sorting...
+            converting...
+a
+b
+c
+d
+e
+            % contents of fncache file:
+            data/a.i
+            data/b.i

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages