upstream/mercurial-mirror Commit - r33388:0823f098

convert: transcode CVS log messages by specified encoding (issue5597)...

FUJIWARA Katsunori -

r33388:0823f098 default

parent child

hgext/convert/__init__.py

0 +6 0

              # convert.py Foreign SCM converter
              #
              # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              '''import revisions from foreign VCS repositories into Mercurial'''
              from __future__ import absolute_import
              from mercurial.i18n import _
              from mercurial import (
                  registrar,
              )
              from . import (
                  convcmd,
                  cvsps,
                  subversion,
              )
              cmdtable = {}
              command = registrar.command(cmdtable)
              # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
              # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
              # be specifying the version(s) of Mercurial they are tested with, or
              # leave the attribute unspecified.
              testedwith = 'ships-with-hg-core'
              # Commands definition was moved elsewhere to ease demandload job.
              @command('convert',
                  [('', 'authors', '',
                    _('username mapping filename (DEPRECATED) (use --authormap instead)'),
                    _('FILE')),
                  ('s', 'source-type', '', _('source repository type'), _('TYPE')),
                  ('d', 'dest-type', '', _('destination repository type'), _('TYPE')),
                  ('r', 'rev', [], _('import up to source revision REV'), _('REV')),
                  ('A', 'authormap', '', _('remap usernames using this file'), _('FILE')),
                  ('', 'filemap', '', _('remap file names using contents of file'),
                   _('FILE')),
                  ('', 'full', None,
                   _('apply filemap changes by converting all files again')),
                  ('', 'splicemap', '', _('splice synthesized history into place'),
                   _('FILE')),
                  ('', 'branchmap', '', _('change branch names while converting'),
                   _('FILE')),
                  ('', 'branchsort', None, _('try to sort changesets by branches')),
                  ('', 'datesort', None, _('try to sort changesets by date')),
                  ('', 'sourcesort', None, _('preserve source changesets order')),
                  ('', 'closesort', None, _('try to reorder closed revisions'))],
                 _('hg convert [OPTION]... SOURCE [DEST [REVMAP]]'),
                 norepo=True)
              def convert(ui, src, dest=None, revmapfile=None, **opts):
                  """convert a foreign SCM repository to a Mercurial one.
                  Accepted source formats [identifiers]:
                  - Mercurial [hg]
                  - CVS [cvs]
                  - Darcs [darcs]
                  - git [git]
                  - Subversion [svn]
                  - Monotone [mtn]
                  - GNU Arch [gnuarch]
                  - Bazaar [bzr]
                  - Perforce [p4]
                  Accepted destination formats [identifiers]:
                  - Mercurial [hg]
                  - Subversion [svn] (history on branches is not preserved)
                  If no revision is given, all revisions will be converted.
                  Otherwise, convert will only import up to the named revision
                  (given in a format understood by the source).
                  If no destination directory name is specified, it defaults to the
                  basename of the source with ``-hg`` appended. If the destination
                  repository doesn't exist, it will be created.
                  By default, all sources except Mercurial will use --branchsort.
                  Mercurial uses --sourcesort to preserve original revision numbers
                  order. Sort modes have the following effects:
                  --branchsort  convert from parent to child revision when possible,
                                which means branches are usually converted one after
                                the other. It generates more compact repositories.
                  --datesort    sort revisions by date. Converted repositories have
                                good-looking changelogs but are often an order of
                                magnitude larger than the same ones generated by
                                --branchsort.
                  --sourcesort  try to preserve source revisions order, only
                                supported by Mercurial sources.
                  --closesort   try to move closed revisions as close as possible
                                to parent branches, only supported by Mercurial
                                sources.
                  If ``REVMAP`` isn't given, it will be put in a default location
                  (``<dest>/.hg/shamap`` by default). The ``REVMAP`` is a simple
                  text file that maps each source commit ID to the destination ID
                  for that revision, like so::
                    <source ID> <destination ID>
                  If the file doesn't exist, it's automatically created. It's
                  updated on each commit copied, so :hg:`convert` can be interrupted
                  and can be run repeatedly to copy new commits.
                  The authormap is a simple text file that maps each source commit
                  author to a destination commit author. It is handy for source SCMs
                  that use unix logins to identify authors (e.g.: CVS). One line per
                  author mapping and the line format is::
                    source author = destination author
                  Empty lines and lines starting with a ``#`` are ignored.
                  The filemap is a file that allows filtering and remapping of files
                  and directories. Each line can contain one of the following
                  directives::
                    include path/to/file-or-dir
                    exclude path/to/file-or-dir
                    rename path/to/source path/to/destination
                  Comment lines start with ``#``. A specified path matches if it
                  equals the full relative name of a file or one of its parent
                  directories. The ``include`` or ``exclude`` directive with the
                  longest matching path applies, so line order does not matter.
                  The ``include`` directive causes a file, or all files under a
                  directory, to be included in the destination repository. The default
                  if there are no ``include`` statements is to include everything.
                  If there are any ``include`` statements, nothing else is included.
                  The ``exclude`` directive causes files or directories to
                  be omitted. The ``rename`` directive renames a file or directory if
                  it is converted. To rename from a subdirectory into the root of
                  the repository, use ``.`` as the path to rename to.
                  ``--full`` will make sure the converted changesets contain exactly
                  the right files with the right content. It will make a full
                  conversion of all files, not just the ones that have
                  changed. Files that already are correct will not be changed. This
                  can be used to apply filemap changes when converting
                  incrementally. This is currently only supported for Mercurial and
                  Subversion.
                  The splicemap is a file that allows insertion of synthetic
                  history, letting you specify the parents of a revision. This is
                  useful if you want to e.g. give a Subversion merge two parents, or
                  graft two disconnected series of history together. Each entry
                  contains a key, followed by a space, followed by one or two
                  comma-separated values::
                    key parent1, parent2
                  The key is the revision ID in the source
                  revision control system whose parents should be modified (same
                  format as a key in .hg/shamap). The values are the revision IDs
                  (in either the source or destination revision control system) that
                  should be used as the new parents for that node. For example, if
                  you have merged "release-1.0" into "trunk", then you should
                  specify the revision on "trunk" as the first parent and the one on
                  the "release-1.0" branch as the second.
                  The branchmap is a file that allows you to rename a branch when it is
                  being brought in from whatever external repository. When used in
                  conjunction with a splicemap, it allows for a powerful combination
                  to help fix even the most badly mismanaged repositories and turn them
                  into nicely structured Mercurial repositories. The branchmap contains
                  lines of the form::
                    original_branch_name new_branch_name
                  where "original_branch_name" is the name of the branch in the
                  source repository, and "new_branch_name" is the name of the branch
                  is the destination repository. No whitespace is allowed in the new
                  branch name. This can be used to (for instance) move code in one
                  repository from "default" to a named branch.
                  Mercurial Source
                  ################
                  The Mercurial source recognizes the following configuration
                  options, which you can set on the command line with ``--config``:
                  :convert.hg.ignoreerrors: ignore integrity errors when reading.
                      Use it to fix Mercurial repositories with missing revlogs, by
                      converting from and to Mercurial. Default is False.
                  :convert.hg.saverev: store original revision ID in changeset
                      (forces target IDs to change). It takes a boolean argument and
                      defaults to False.
                  :convert.hg.startrev: specify the initial Mercurial revision.
                      The default is 0.
                  :convert.hg.revs: revset specifying the source revisions to convert.
                  CVS Source
                  ##########
                  CVS source will use a sandbox (i.e. a checked-out copy) from CVS
                  to indicate the starting point of what will be converted. Direct
                  access to the repository files is not needed, unless of course the
                  repository is ``:local:``. The conversion uses the top level
                  directory in the sandbox to find the CVS repository, and then uses
                  CVS rlog commands to find files to convert. This means that unless
                  a filemap is given, all files under the starting directory will be
                  converted, and that any directory reorganization in the CVS
                  sandbox is ignored.
                  The following options can be used with ``--config``:
                  :convert.cvsps.cache: Set to False to disable remote log caching,
                      for testing and debugging purposes. Default is True.
                  :convert.cvsps.fuzz: Specify the maximum time (in seconds) that is
                      allowed between commits with identical user and log message in
                      a single changeset. When very large files were checked in as
                      part of a changeset then the default may not be long enough.
                      The default is 60.
+                 :convert.cvsps.logencoding: Specify encoding name to be used for
+                     transcoding CVS log messages. Multiple encoding names can be
+                     specified as a list (see :hg:`help config.Syntax`), but only
+                     the first acceptable encoding in the list is used per CVS log
+                     entries. This transcoding is executed before cvslog hook below.
                  :convert.cvsps.mergeto: Specify a regular expression to which
                      commit log messages are matched. If a match occurs, then the
                      conversion process will insert a dummy revision merging the
                      branch on which this log message occurs to the branch
                      indicated in the regex. Default is ``{{mergetobranch
                      ([-\\w]+)}}``
                  :convert.cvsps.mergefrom: Specify a regular expression to which
                      commit log messages are matched. If a match occurs, then the
                      conversion process will add the most recent revision on the
                      branch indicated in the regex as the second parent of the
                      changeset. Default is ``{{mergefrombranch ([-\\w]+)}}``
                  :convert.localtimezone: use local time (as determined by the TZ
                      environment variable) for changeset date/times. The default
                      is False (use UTC).
                  :hooks.cvslog: Specify a Python function to be called at the end of
                      gathering the CVS log. The function is passed a list with the
                      log entries, and can modify the entries in-place, or add or
                      delete them.
                  :hooks.cvschangesets: Specify a Python function to be called after
                      the changesets are calculated from the CVS log. The
                      function is passed a list with the changeset entries, and can
                      modify the changesets in-place, or add or delete them.
                  An additional "debugcvsps" Mercurial command allows the builtin
                  changeset merging code to be run without doing a conversion. Its
                  parameters and output are similar to that of cvsps 2.1. Please see
                  the command help for more details.
                  Subversion Source
                  #################
                  Subversion source detects classical trunk/branches/tags layouts.
                  By default, the supplied ``svn://repo/path/`` source URL is
                  converted as a single branch. If ``svn://repo/path/trunk`` exists
                  it replaces the default branch. If ``svn://repo/path/branches``
                  exists, its subdirectories are listed as possible branches. If
                  ``svn://repo/path/tags`` exists, it is looked for tags referencing
                  converted branches. Default ``trunk``, ``branches`` and ``tags``
                  values can be overridden with following options. Set them to paths
                  relative to the source URL, or leave them blank to disable auto
                  detection.
                  The following options can be set with ``--config``:
                  :convert.svn.branches: specify the directory containing branches.
                      The default is ``branches``.
                  :convert.svn.tags: specify the directory containing tags. The
                      default is ``tags``.
                  :convert.svn.trunk: specify the name of the trunk branch. The
                      default is ``trunk``.
                  :convert.localtimezone: use local time (as determined by the TZ
                      environment variable) for changeset date/times. The default
                      is False (use UTC).
                  Source history can be retrieved starting at a specific revision,
                  instead of being integrally converted. Only single branch
                  conversions are supported.
                  :convert.svn.startrev: specify start Subversion revision number.
                      The default is 0.
                  Git Source
                  ##########
                  The Git importer converts commits from all reachable branches (refs
                  in refs/heads) and remotes (refs in refs/remotes) to Mercurial.
                  Branches are converted to bookmarks with the same name, with the
                  leading 'refs/heads' stripped. Git submodules are converted to Git
                  subrepos in Mercurial.
                  The following options can be set with ``--config``:
                  :convert.git.similarity: specify how similar files modified in a
                      commit must be to be imported as renames or copies, as a
                      percentage between ``0`` (disabled) and ``100`` (files must be
                      identical). For example, ``90`` means that a delete/add pair will
                      be imported as a rename if more than 90% of the file hasn't
                      changed. The default is ``50``.
                  :convert.git.findcopiesharder: while detecting copies, look at all
                      files in the working copy instead of just changed ones. This
                      is very expensive for large projects, and is only effective when
                      ``convert.git.similarity`` is greater than 0. The default is False.
                  :convert.git.renamelimit: perform rename and copy detection up to this
                      many changed files in a commit. Increasing this will make rename
                      and copy detection more accurate but will significantly slow down
                      computation on large projects. The option is only relevant if
                      ``convert.git.similarity`` is greater than 0. The default is
                      ``400``.
                  :convert.git.committeractions: list of actions to take when processing
                      author and committer values.
                      Git commits have separate author (who wrote the commit) and committer
                      (who applied the commit) fields. Not all destinations support separate
                      author and committer fields (including Mercurial). This config option
                      controls what to do with these author and committer fields during
                      conversion.
                      A value of ``messagedifferent`` will append a ``committer: ...``
                      line to the commit message if the Git committer is different from the
                      author. The prefix of that line can be specified using the syntax
                      ``messagedifferent=<prefix>``. e.g. ``messagedifferent=git-committer:``.
                      When a prefix is specified, a space will always be inserted between the
                      prefix and the value.
                      ``messagealways`` behaves like ``messagedifferent`` except it will
                      always result in a ``committer: ...`` line being appended to the commit
                      message. This value is mutually exclusive with ``messagedifferent``.
                      ``dropcommitter`` will remove references to the committer. Only
                      references to the author will remain. Actions that add references
                      to the committer will have no effect when this is set.
                      ``replaceauthor`` will replace the value of the author field with
                      the committer. Other actions that add references to the committer
                      will still take effect when this is set.
                      The default is ``messagedifferent``.
                  :convert.git.extrakeys: list of extra keys from commit metadata to copy to
                      the destination. Some Git repositories store extra metadata in commits.
                      By default, this non-default metadata will be lost during conversion.
                      Setting this config option can retain that metadata. Some built-in
                      keys such as ``parent`` and ``branch`` are not allowed to be copied.
                  :convert.git.remoteprefix: remote refs are converted as bookmarks with
                      ``convert.git.remoteprefix`` as a prefix followed by a /. The default
                      is 'remote'.
                  :convert.git.saverev: whether to store the original Git commit ID in the
                      metadata of the destination commit. The default is True.
                  :convert.git.skipsubmodules: does not convert root level .gitmodules files
                      or files with 160000 mode indicating a submodule. Default is False.
                  Perforce Source
                  ###############
                  The Perforce (P4) importer can be given a p4 depot path or a
                  client specification as source. It will convert all files in the
                  source to a flat Mercurial repository, ignoring labels, branches
                  and integrations. Note that when a depot path is given you then
                  usually should specify a target directory, because otherwise the
                  target may be named ``...-hg``.
                  The following options can be set with ``--config``:
                  :convert.p4.encoding: specify the encoding to use when decoding standard
                      output of the Perforce command line tool. The default is default system
                      encoding.
                  :convert.p4.startrev: specify initial Perforce revision (a
                      Perforce changelist number).
                  Mercurial Destination
                  #####################
                  The Mercurial destination will recognize Mercurial subrepositories in the
                  destination directory, and update the .hgsubstate file automatically if the
                  destination subrepositories contain the <dest>/<sub>/.hg/shamap file.
                  Converting a repository with subrepositories requires converting a single
                  repository at a time, from the bottom up.
                  .. container:: verbose
                     An example showing how to convert a repository with subrepositories::
                       # so convert knows the type when it sees a non empty destination
                       $ hg init converted
                       $ hg convert orig/sub1 converted/sub1
                       $ hg convert orig/sub2 converted/sub2
                       $ hg convert orig converted
                  The following options are supported:
                  :convert.hg.clonebranches: dispatch source branches in separate
                      clones. The default is False.
                  :convert.hg.tagsbranch: branch name for tag revisions, defaults to
                      ``default``.
                  :convert.hg.usebranchnames: preserve branch names. The default is
                      True.
                  :convert.hg.sourcename: records the given string as a 'convert_source' extra
                      value on each commit made in the target repository. The default is None.
                  All Destinations
                  ################
                  All destination types accept the following options:
                  :convert.skiptags: does not convert tags from the source repo to the target
                      repo. The default is False.
                  """
                  return convcmd.convert(ui, src, dest, revmapfile, **opts)
              @command('debugsvnlog', [], 'hg debugsvnlog', norepo=True)
              def debugsvnlog(ui, **opts):
                  return subversion.debugsvnlog(ui, **opts)
              @command('debugcvsps',
                  [
                  # Main options shared with cvsps-2.1
                  ('b', 'branches', [], _('only return changes on specified branches')),
                  ('p', 'prefix', '', _('prefix to remove from file names')),
                  ('r', 'revisions', [],
                   _('only return changes after or between specified tags')),
                  ('u', 'update-cache', None, _("update cvs log cache")),
                  ('x', 'new-cache', None, _("create new cvs log cache")),
                  ('z', 'fuzz', 60, _('set commit time fuzz in seconds')),
                  ('', 'root', '', _('specify cvsroot')),
                  # Options specific to builtin cvsps
                  ('', 'parents', '', _('show parent changesets')),
                  ('', 'ancestors', '', _('show current changeset in ancestor branches')),
                  # Options that are ignored for compatibility with cvsps-2.1
                  ('A', 'cvs-direct', None, _('ignored for compatibility')),
                  ],
                  _('hg debugcvsps [OPTION]... [PATH]...'),
                  norepo=True)
              def debugcvsps(ui, *args, **opts):
                  '''create changeset information from CVS
                  This command is intended as a debugging tool for the CVS to
                  Mercurial converter, and can be used as a direct replacement for
                  cvsps.
                  Hg debugcvsps reads the CVS rlog for current directory (or any
                  named directory) in the CVS repository, and converts the log to a
                  series of changesets based on matching commit log entries and
                  dates.'''
                  return cvsps.debugcvsps(ui, *args, **opts)
              def kwconverted(ctx, name):
                  rev = ctx.extra().get('convert_revision', '')
                  if rev.startswith('svn:'):
                      if name == 'svnrev':
                          return str(subversion.revsplit(rev)[2])
                      elif name == 'svnpath':
                          return subversion.revsplit(rev)[1]
                      elif name == 'svnuuid':
                          return subversion.revsplit(rev)[0]
                  return rev
              templatekeyword = registrar.templatekeyword()
              @templatekeyword('svnrev')
              def kwsvnrev(repo, ctx, **args):
                  """String. Converted subversion revision number."""
                  return kwconverted(ctx, 'svnrev')
              @templatekeyword('svnpath')
              def kwsvnpath(repo, ctx, **args):
                  """String. Converted subversion revision project path."""
                  return kwconverted(ctx, 'svnpath')
              @templatekeyword('svnuuid')
              def kwsvnuuid(repo, ctx, **args):
                  """String. Converted subversion revision repository identifier."""
                  return kwconverted(ctx, 'svnuuid')
              # tell hggettext to extract docstrings from these functions:
              i18nfunctions = [kwsvnrev, kwsvnpath, kwsvnuuid]

hgext/convert/cvsps.py

0 +30 0

              # Mercurial built-in replacement for cvsps.
              #
              # Copyright 2008, Frank Kingswood <frank@kingswood-consulting.co.uk>
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import
              import os
              import re
              from mercurial.i18n import _
              from mercurial import (
                  encoding,
+                 error,
                  hook,
                  pycompat,
                  util,
              )
              pickle = util.pickle
              class logentry(object):
                  '''Class logentry has the following attributes:
                      .author    - author name as CVS knows it
                      .branch    - name of branch this revision is on
                      .branches  - revision tuple of branches starting at this revision
                      .comment   - commit message
                      .commitid  - CVS commitid or None
                      .date      - the commit date as a (time, tz) tuple
                      .dead      - true if file revision is dead
                      .file      - Name of file
                      .lines     - a tuple (+lines, -lines) or None
                      .parent    - Previous revision of this entry
                      .rcs       - name of file as returned from CVS
                      .revision  - revision number as tuple
                      .tags      - list of tags on the file
                      .synthetic - is this a synthetic "file ... added on ..." revision?
                      .mergepoint - the branch that has been merged from (if present in
                                    rlog output) or None
                      .branchpoints - the branches that start at the current entry or empty
                  '''
                  def __init__(self, **entries):
                      self.synthetic = False
                      self.__dict__.update(entries)
                  def __repr__(self):
                      items = ("%s=%r"%(k, self.__dict__[k]) for k in sorted(self.__dict__))
                      return "%s(%s)"%(type(self).__name__, ", ".join(items))
              class logerror(Exception):
                  pass
              def getrepopath(cvspath):
                  """Return the repository path from a CVS path.
                  >>> getrepopath('/foo/bar')
                  '/foo/bar'
                  >>> getrepopath('c:/foo/bar')
                  '/foo/bar'
                  >>> getrepopath(':pserver:10/foo/bar')
                  '/foo/bar'
                  >>> getrepopath(':pserver:10c:/foo/bar')
                  '/foo/bar'
                  >>> getrepopath(':pserver:/foo/bar')
                  '/foo/bar'
                  >>> getrepopath(':pserver:c:/foo/bar')
                  '/foo/bar'
                  >>> getrepopath(':pserver:truc@foo.bar:/foo/bar')
                  '/foo/bar'
                  >>> getrepopath(':pserver:truc@foo.bar:c:/foo/bar')
                  '/foo/bar'
                  >>> getrepopath('user@server/path/to/repository')
                  '/path/to/repository'
                  """
                  # According to CVS manual, CVS paths are expressed like:
                  # [:method:][[user][:password]@]hostname[:[port]]/path/to/repository
                  #
                  # CVSpath is splitted into parts and then position of the first occurrence
                  # of the '/' char after the '@' is located. The solution is the rest of the
                  # string after that '/' sign including it
                  parts = cvspath.split(':')
                  atposition = parts[-1].find('@')
                  start = 0
                  if atposition != -1:
                      start = atposition
                  repopath = parts[-1][parts[-1].find('/', start):]
                  return repopath
              def createlog(ui, directory=None, root="", rlog=True, cache=None):
                  '''Collect the CVS rlog'''
                  # Because we store many duplicate commit log messages, reusing strings
                  # saves a lot of memory and pickle storage space.
                  _scache = {}
                  def scache(s):
                      "return a shared version of a string"
                      return _scache.setdefault(s, s)
                  ui.status(_('collecting CVS rlog\n'))
                  log = []      # list of logentry objects containing the CVS state
                  # patterns to match in CVS (r)log output, by state of use
                  re_00 = re.compile('RCS file: (.+)$')
                  re_01 = re.compile('cvs \\[r?log aborted\\]: (.+)$')
                  re_02 = re.compile('cvs (r?log|server): (.+)\n$')
                  re_03 = re.compile("(Cannot access.+CVSROOT)|"
                                     "(can't create temporary directory.+)$")
                  re_10 = re.compile('Working file: (.+)$')
                  re_20 = re.compile('symbolic names:')
                  re_30 = re.compile('\t(.+): ([\\d.]+)$')
                  re_31 = re.compile('----------------------------$')
                  re_32 = re.compile('======================================='
                                     '======================================$')
                  re_50 = re.compile('revision ([\\d.]+)(\s+locked by:\s+.+;)?$')
                  re_60 = re.compile(r'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);'
                                     r'(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?'
                                     r'(\s+commitid:\s+([^;]+);)?'
                                     r'(.*mergepoint:\s+([^;]+);)?')
                  re_70 = re.compile('branches: (.+);$')
                  file_added_re = re.compile(r'file [^/]+ was (initially )?added on branch')
                  prefix = ''   # leading path to strip of what we get from CVS
                  if directory is None:
                      # Current working directory
                      # Get the real directory in the repository
                      try:
                          prefix = open(os.path.join('CVS','Repository')).read().strip()
                          directory = prefix
                          if prefix == ".":
                              prefix = ""
                      except IOError:
                          raise logerror(_('not a CVS sandbox'))
                      if prefix and not prefix.endswith(pycompat.ossep):
                          prefix += pycompat.ossep
                      # Use the Root file in the sandbox, if it exists
                      try:
                          root = open(os.path.join('CVS','Root')).read().strip()
                      except IOError:
                          pass
                  if not root:
                      root = encoding.environ.get('CVSROOT', '')
                  # read log cache if one exists
                  oldlog = []
                  date = None
                  if cache:
                      cachedir = os.path.expanduser('~/.hg.cvsps')
                      if not os.path.exists(cachedir):
                          os.mkdir(cachedir)
                      # The cvsps cache pickle needs a uniquified name, based on the
                      # repository location. The address may have all sort of nasties
                      # in it, slashes, colons and such. So here we take just the
                      # alphanumeric characters, concatenated in a way that does not
                      # mix up the various components, so that
                      #    :pserver:user@server:/path
                      # and
                      #    /pserver/user/server/path
                      # are mapped to different cache file names.
                      cachefile = root.split(":") + [directory, "cache"]
                      cachefile = ['-'.join(re.findall(r'\w+', s)) for s in cachefile if s]
                      cachefile = os.path.join(cachedir,
                                               '.'.join([s for s in cachefile if s]))
                  if cache == 'update':
                      try:
                          ui.note(_('reading cvs log cache %s\n') % cachefile)
                          oldlog = pickle.load(open(cachefile))
                          for e in oldlog:
                              if not (util.safehasattr(e, 'branchpoints') and
                                      util.safehasattr(e, 'commitid') and
                                      util.safehasattr(e, 'mergepoint')):
                                  ui.status(_('ignoring old cache\n'))
                                  oldlog = []
                                  break
                          ui.note(_('cache has %d log entries\n') % len(oldlog))
                      except Exception as e:
                          ui.note(_('error reading cache: %r\n') % e)
                      if oldlog:
                          date = oldlog[-1].date    # last commit date as a (time,tz) tuple
                          date = util.datestr(date, '%Y/%m/%d %H:%M:%S %1%2')
                  # build the CVS commandline
                  cmd = ['cvs', '-q']
                  if root:
                      cmd.append('-d%s' % root)
                      p = util.normpath(getrepopath(root))
                      if not p.endswith('/'):
                          p += '/'
                      if prefix:
                          # looks like normpath replaces "" by "."
                          prefix = p + util.normpath(prefix)
                      else:
                          prefix = p
                  cmd.append(['log', 'rlog'][rlog])
                  if date:
                      # no space between option and date string
                      cmd.append('-d>%s' % date)
                  cmd.append(directory)
                  # state machine begins here
                  tags = {}     # dictionary of revisions on current file with their tags
                  branchmap = {} # mapping between branch names and revision numbers
                  rcsmap = {}
                  state = 0
                  store = False # set when a new record can be appended
                  cmd = [util.shellquote(arg) for arg in cmd]
                  ui.note(_("running %s\n") % (' '.join(cmd)))
                  ui.debug("prefix=%r directory=%r root=%r\n" % (prefix, directory, root))
                  pfp = util.popen(' '.join(cmd))
                  peek = pfp.readline()
                  while True:
                      line = peek
                      if line == '':
                          break
                      peek = pfp.readline()
                      if line.endswith('\n'):
                          line = line[:-1]
                      #ui.debug('state=%d line=%r\n' % (state, line))
                      if state == 0:
                          # initial state, consume input until we see 'RCS file'
                          match = re_00.match(line)
                          if match:
                              rcs = match.group(1)
                              tags = {}
                              if rlog:
                                  filename = util.normpath(rcs[:-2])
                                  if filename.startswith(prefix):
                                      filename = filename[len(prefix):]
                                  if filename.startswith('/'):
                                      filename = filename[1:]
                                  if filename.startswith('Attic/'):
                                      filename = filename[6:]
                                  else:
                                      filename = filename.replace('/Attic/', '/')
                                  state = 2
                                  continue
                              state = 1
                              continue
                          match = re_01.match(line)
                          if match:
                              raise logerror(match.group(1))
                          match = re_02.match(line)
                          if match:
                              raise logerror(match.group(2))
                          if re_03.match(line):
                              raise logerror(line)
                      elif state == 1:
                          # expect 'Working file' (only when using log instead of rlog)
                          match = re_10.match(line)
                          assert match, _('RCS file must be followed by working file')
                          filename = util.normpath(match.group(1))
                          state = 2
                      elif state == 2:
                          # expect 'symbolic names'
                          if re_20.match(line):
                              branchmap = {}
                              state = 3
                      elif state == 3:
                          # read the symbolic names and store as tags
                          match = re_30.match(line)
                          if match:
                              rev = [int(x) for x in match.group(2).split('.')]
                              # Convert magic branch number to an odd-numbered one
                              revn = len(rev)
                              if revn > 3 and (revn % 2) == 0 and rev[-2] == 0:
                                  rev = rev[:-2] + rev[-1:]
                              rev = tuple(rev)
                              if rev not in tags:
                                  tags[rev] = []
                              tags[rev].append(match.group(1))
                              branchmap[match.group(1)] = match.group(2)
                          elif re_31.match(line):
                              state = 5
                          elif re_32.match(line):
                              state = 0
                      elif state == 4:
                          # expecting '------' separator before first revision
                          if re_31.match(line):
                              state = 5
                          else:
                              assert not re_32.match(line), _('must have at least '
                                                              'some revisions')
                      elif state == 5:
                          # expecting revision number and possibly (ignored) lock indication
                          # we create the logentry here from values stored in states 0 to 4,
                          # as this state is re-entered for subsequent revisions of a file.
                          match = re_50.match(line)
                          assert match, _('expected revision number')
                          e = logentry(rcs=scache(rcs),
                                       file=scache(filename),
                                       revision=tuple([int(x) for x in
                                                       match.group(1).split('.')]),
                                       branches=[],
                                       parent=None,
                                       commitid=None,
                                       mergepoint=None,
                                       branchpoints=set())
                          state = 6
                      elif state == 6:
                          # expecting date, author, state, lines changed
                          match = re_60.match(line)
                          assert match, _('revision must be followed by date line')
                          d = match.group(1)
                          if d[2] == '/':
                              # Y2K
                              d = '19' + d
                          if len(d.split()) != 3:
                              # cvs log dates always in GMT
                              d = d + ' UTC'
                          e.date = util.parsedate(d, ['%y/%m/%d %H:%M:%S',
                                                      '%Y/%m/%d %H:%M:%S',
                                                      '%Y-%m-%d %H:%M:%S'])
                          e.author = scache(match.group(2))
                          e.dead = match.group(3).lower() == 'dead'
                          if match.group(5):
                              if match.group(6):
                                  e.lines = (int(match.group(5)), int(match.group(6)))
                              else:
                                  e.lines = (int(match.group(5)), 0)
                          elif match.group(6):
                              e.lines = (0, int(match.group(6)))
                          else:
                              e.lines = None
                          if match.group(7): # cvs 1.12 commitid
                              e.commitid = match.group(8)
                          if match.group(9): # cvsnt mergepoint
                              myrev = match.group(10).split('.')
                              if len(myrev) == 2: # head
                                  e.mergepoint = 'HEAD'
                              else:
                                  myrev = '.'.join(myrev[:-2] + ['0', myrev[-2]])
                                  branches = [b for b in branchmap if branchmap[b] == myrev]
                                  assert len(branches) == 1, ('unknown branch: %s'
                                                              % e.mergepoint)
                                  e.mergepoint = branches[0]
                          e.comment = []
                          state = 7
                      elif state == 7:
                          # read the revision numbers of branches that start at this revision
                          # or store the commit log message otherwise
                          m = re_70.match(line)
                          if m:
                              e.branches = [tuple([int(y) for y in x.strip().split('.')])
                                              for x in m.group(1).split(';')]
                              state = 8
                          elif re_31.match(line) and re_50.match(peek):
                              state = 5
                              store = True
                          elif re_32.match(line):
                              state = 0
                              store = True
                          else:
                              e.comment.append(line)
                      elif state == 8:
                          # store commit log message
                          if re_31.match(line):
                              cpeek = peek
                              if cpeek.endswith('\n'):
                                  cpeek = cpeek[:-1]
                              if re_50.match(cpeek):
                                  state = 5
                                  store = True
                              else:
                                  e.comment.append(line)
                          elif re_32.match(line):
                              state = 0
                              store = True
                          else:
                              e.comment.append(line)
                      # When a file is added on a branch B1, CVS creates a synthetic
                      # dead trunk revision 1.1 so that the branch has a root.
                      # Likewise, if you merge such a file to a later branch B2 (one
                      # that already existed when the file was added on B1), CVS
                      # creates a synthetic dead revision 1.1.x.1 on B2.  Don't drop
                      # these revisions now, but mark them synthetic so
                      # createchangeset() can take care of them.
                      if (store and
                            e.dead and
                            e.revision[-1] == 1 and      # 1.1 or 1.1.x.1
                            len(e.comment) == 1 and
                            file_added_re.match(e.comment[0])):
                          ui.debug('found synthetic revision in %s: %r\n'
                                   % (e.rcs, e.comment[0]))
                          e.synthetic = True
                      if store:
                          # clean up the results and save in the log.
                          store = False
                          e.tags = sorted([scache(x) for x in tags.get(e.revision, [])])
                          e.comment = scache('\n'.join(e.comment))
                          revn = len(e.revision)
                          if revn > 3 and (revn % 2) == 0:
                              e.branch = tags.get(e.revision[:-1], [None])[0]
                          else:
                              e.branch = None
                          # find the branches starting from this revision
                          branchpoints = set()
                          for branch, revision in branchmap.iteritems():
                              revparts = tuple([int(i) for i in revision.split('.')])
                              if len(revparts) < 2: # bad tags
                                  continue
                              if revparts[-2] == 0 and revparts[-1] % 2 == 0:
                                  # normal branch
                                  if revparts[:-2] == e.revision:
                                      branchpoints.add(branch)
                              elif revparts == (1, 1, 1): # vendor branch
                                  if revparts in e.branches:
                                      branchpoints.add(branch)
                          e.branchpoints = branchpoints
                          log.append(e)
                          rcsmap[e.rcs.replace('/Attic/', '/')] = e.rcs
                          if len(log) % 100 == 0:
                              ui.status(util.ellipsis('%d %s' % (len(log), e.file), 80)+'\n')
                  log.sort(key=lambda x: (x.rcs, x.revision))
                  # find parent revisions of individual files
                  versions = {}
                  for e in sorted(oldlog, key=lambda x: (x.rcs, x.revision)):
                      rcs = e.rcs.replace('/Attic/', '/')
                      if rcs in rcsmap:
                          e.rcs = rcsmap[rcs]
                      branch = e.revision[:-1]
                      versions[(e.rcs, branch)] = e.revision
                  for e in log:
                      branch = e.revision[:-1]
                      p = versions.get((e.rcs, branch), None)
                      if p is None:
                          p = e.revision[:-2]
                      e.parent = p
                      versions[(e.rcs, branch)] = e.revision
                  # update the log cache
                  if cache:
                      if log:
                          # join up the old and new logs
                          log.sort(key=lambda x: x.date)
                          if oldlog and oldlog[-1].date >= log[0].date:
                              raise logerror(_('log cache overlaps with new log entries,'
                                               ' re-run without cache.'))
                          log = oldlog + log
                          # write the new cachefile
                          ui.note(_('writing cvs log cache %s\n') % cachefile)
                          pickle.dump(log, open(cachefile, 'w'))
                      else:
                          log = oldlog
                  ui.status(_('%d log entries\n') % len(log))
+                 encodings = ui.configlist('convert', 'cvsps.logencoding')
+                 if encodings:
+                     def revstr(r):
+                         # this is needed, because logentry.revision is a tuple of "int"
+                         # (e.g. (1, 2) for "1.2")
+                         return '.'.join(pycompat.maplist(pycompat.bytestr, r))
+                     for entry in log:
+                         comment = entry.comment
+                         for e in encodings:
+                             try:
+                                 entry.comment = comment.decode(e).encode('utf-8')
+                                 if ui.debugflag:
+                                     ui.debug("transcoding by %s: %s of %s\n" %
+                                              (e, revstr(entry.revision), entry.file))
+                                 break
+                             except UnicodeDecodeError:
+                                 pass # try next encoding
+                             except LookupError as inst: # unknown encoding, maybe
+                                 raise error.Abort(inst,
+                                                   hint=_('check convert.cvsps.logencoding'
+                                                          ' configuration'))
+                         else:
+                             raise error.Abort(_("no encoding can transcode"
+                                                 " CVS log message for %s of %s")
+                                               % (revstr(entry.revision), entry.file),
+                                               hint=_('check convert.cvsps.logencoding'
+                                                      ' configuration'))
                  hook.hook(ui, None, "cvslog", True, log=log)
                  return log
              class changeset(object):
                  '''Class changeset has the following attributes:
                      .id        - integer identifying this changeset (list index)
                      .author    - author name as CVS knows it
                      .branch    - name of branch this changeset is on, or None
                      .comment   - commit message
                      .commitid  - CVS commitid or None
                      .date      - the commit date as a (time,tz) tuple
                      .entries   - list of logentry objects in this changeset
                      .parents   - list of one or two parent changesets
                      .tags      - list of tags on this changeset
                      .synthetic - from synthetic revision "file ... added on branch ..."
                      .mergepoint- the branch that has been merged from or None
                      .branchpoints- the branches that start at the current entry or empty
                  '''
                  def __init__(self, **entries):
                      self.id = None
                      self.synthetic = False
                      self.__dict__.update(entries)
                  def __repr__(self):
                      items = ("%s=%r"%(k, self.__dict__[k]) for k in sorted(self.__dict__))
                      return "%s(%s)"%(type(self).__name__, ", ".join(items))
              def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None):
                  '''Convert log into changesets.'''
                  ui.status(_('creating changesets\n'))
                  # try to order commitids by date
                  mindate = {}
                  for e in log:
                      if e.commitid:
                          mindate[e.commitid] = min(e.date, mindate.get(e.commitid))
                  # Merge changesets
                  log.sort(key=lambda x: (mindate.get(x.commitid), x.commitid, x.comment,
                                          x.author, x.branch, x.date, x.branchpoints))
                  changesets = []
                  files = set()
                  c = None
                  for i, e in enumerate(log):
                      # Check if log entry belongs to the current changeset or not.
                      # Since CVS is file-centric, two different file revisions with
                      # different branchpoints should be treated as belonging to two
                      # different changesets (and the ordering is important and not
                      # honoured by cvsps at this point).
                      #
                      # Consider the following case:
                      # foo 1.1 branchpoints: [MYBRANCH]
                      # bar 1.1 branchpoints: [MYBRANCH, MYBRANCH2]
                      #
                      # Here foo is part only of MYBRANCH, but not MYBRANCH2, e.g. a
                      # later version of foo may be in MYBRANCH2, so foo should be the
                      # first changeset and bar the next and MYBRANCH and MYBRANCH2
                      # should both start off of the bar changeset. No provisions are
                      # made to ensure that this is, in fact, what happens.
                      if not (c and e.branchpoints == c.branchpoints and
                              (# cvs commitids
                               (e.commitid is not None and e.commitid == c.commitid) or
                               (# no commitids, use fuzzy commit detection
                                (e.commitid is None or c.commitid is None) and
                                 e.comment == c.comment and
                                 e.author == c.author and
                                 e.branch == c.branch and
                                 ((c.date[0] + c.date[1]) <=
                                  (e.date[0] + e.date[1]) <=
                                  (c.date[0] + c.date[1]) + fuzz) and
                                 e.file not in files))):
                          c = changeset(comment=e.comment, author=e.author,
                                        branch=e.branch, date=e.date,
                                        entries=[], mergepoint=e.mergepoint,
                                        branchpoints=e.branchpoints, commitid=e.commitid)
                          changesets.append(c)
                          files = set()
                          if len(changesets) % 100 == 0:
                              t = '%d %s' % (len(changesets), repr(e.comment)[1:-1])
                              ui.status(util.ellipsis(t, 80) + '\n')
                      c.entries.append(e)
                      files.add(e.file)
                      c.date = e.date       # changeset date is date of latest commit in it
                  # Mark synthetic changesets
                  for c in changesets:
                      # Synthetic revisions always get their own changeset, because
                      # the log message includes the filename.  E.g. if you add file3
                      # and file4 on a branch, you get four log entries and three
                      # changesets:
                      #   "File file3 was added on branch ..." (synthetic, 1 entry)
                      #   "File file4 was added on branch ..." (synthetic, 1 entry)
                      #   "Add file3 and file4 to fix ..."     (real, 2 entries)
                      # Hence the check for 1 entry here.
                      c.synthetic = len(c.entries) == 1 and c.entries[0].synthetic
                  # Sort files in each changeset
                  def entitycompare(l, r):
                      'Mimic cvsps sorting order'
                      l = l.file.split('/')
                      r = r.file.split('/')
                      nl = len(l)
                      nr = len(r)
                      n = min(nl, nr)
                      for i in range(n):
                          if i + 1 == nl and nl < nr:
                              return -1
                          elif i + 1 == nr and nl > nr:
                              return +1
                          elif l[i] < r[i]:
                              return -1
                          elif l[i] > r[i]:
                              return +1
                      return 0
                  for c in changesets:
                      c.entries.sort(entitycompare)
                  # Sort changesets by date
                  odd = set()
                  def cscmp(l, r):
                      d = sum(l.date) - sum(r.date)
                      if d:
                          return d
                      # detect vendor branches and initial commits on a branch
                      le = {}
                      for e in l.entries:
                          le[e.rcs] = e.revision
                      re = {}
                      for e in r.entries:
                          re[e.rcs] = e.revision
                      d = 0
                      for e in l.entries:
                          if re.get(e.rcs, None) == e.parent:
                              assert not d
                              d = 1
                              break
                      for e in r.entries:
                          if le.get(e.rcs, None) == e.parent:
                              if d:
                                  odd.add((l, r))
                              d = -1
                              break
                      # By this point, the changesets are sufficiently compared that
                      # we don't really care about ordering. However, this leaves
                      # some race conditions in the tests, so we compare on the
                      # number of files modified, the files contained in each
                      # changeset, and the branchpoints in the change to ensure test
                      # output remains stable.
                      # recommended replacement for cmp from
                      # https://docs.python.org/3.0/whatsnew/3.0.html
                      c = lambda x, y: (x > y) - (x < y)
                      # Sort bigger changes first.
                      if not d:
                          d = c(len(l.entries), len(r.entries))
                      # Try sorting by filename in the change.
                      if not d:
                          d = c([e.file for e in l.entries], [e.file for e in r.entries])
                      # Try and put changes without a branch point before ones with
                      # a branch point.
                      if not d:
                          d = c(len(l.branchpoints), len(r.branchpoints))
                      return d
                  changesets.sort(cscmp)
                  # Collect tags
                  globaltags = {}
                  for c in changesets:
                      for e in c.entries:
                          for tag in e.tags:
                              # remember which is the latest changeset to have this tag
                              globaltags[tag] = c
                  for c in changesets:
                      tags = set()
                      for e in c.entries:
                          tags.update(e.tags)
                      # remember tags only if this is the latest changeset to have it
                      c.tags = sorted(tag for tag in tags if globaltags[tag] is c)
                  # Find parent changesets, handle {{mergetobranch BRANCHNAME}}
                  # by inserting dummy changesets with two parents, and handle
                  # {{mergefrombranch BRANCHNAME}} by setting two parents.
                  if mergeto is None:
                      mergeto = r'{{mergetobranch ([-\w]+)}}'
                  if mergeto:
                      mergeto = re.compile(mergeto)
                  if mergefrom is None:
                      mergefrom = r'{{mergefrombranch ([-\w]+)}}'
                  if mergefrom:
                      mergefrom = re.compile(mergefrom)
                  versions = {}    # changeset index where we saw any particular file version
                  branches = {}    # changeset index where we saw a branch
                  n = len(changesets)
                  i = 0
                  while i < n:
                      c = changesets[i]
                      for f in c.entries:
                          versions[(f.rcs, f.revision)] = i
                      p = None
                      if c.branch in branches:
                          p = branches[c.branch]
                      else:
                          # first changeset on a new branch
                          # the parent is a changeset with the branch in its
                          # branchpoints such that it is the latest possible
                          # commit without any intervening, unrelated commits.
                          for candidate in xrange(i):
                              if c.branch not in changesets[candidate].branchpoints:
                                  if p is not None:
                                      break
                                  continue
                              p = candidate
                      c.parents = []
                      if p is not None:
                          p = changesets[p]
                          # Ensure no changeset has a synthetic changeset as a parent.
                          while p.synthetic:
                              assert len(p.parents) <= 1, \
                                     _('synthetic changeset cannot have multiple parents')
                              if p.parents:
                                  p = p.parents[0]
                              else:
                                  p = None
                                  break
                          if p is not None:
                              c.parents.append(p)
                      if c.mergepoint:
                          if c.mergepoint == 'HEAD':
                              c.mergepoint = None
                          c.parents.append(changesets[branches[c.mergepoint]])
                      if mergefrom:
                          m = mergefrom.search(c.comment)
                          if m:
                              m = m.group(1)
                              if m == 'HEAD':
                                  m = None
                              try:
                                  candidate = changesets[branches[m]]
                              except KeyError:
                                  ui.warn(_("warning: CVS commit message references "
                                            "non-existent branch %r:\n%s\n")
                                          % (m, c.comment))
                              if m in branches and c.branch != m and not candidate.synthetic:
                                  c.parents.append(candidate)
                      if mergeto:
                          m = mergeto.search(c.comment)
                          if m:
                              if m.groups():
                                  m = m.group(1)
                                  if m == 'HEAD':
                                      m = None
                              else:
                                  m = None   # if no group found then merge to HEAD
                              if m in branches and c.branch != m:
                                  # insert empty changeset for merge
                                  cc = changeset(
                                      author=c.author, branch=m, date=c.date,
                                      comment='convert-repo: CVS merge from branch %s'
                                      % c.branch,
                                      entries=[], tags=[],
                                      parents=[changesets[branches[m]], c])
                                  changesets.insert(i + 1, cc)
                                  branches[m] = i + 1
                                  # adjust our loop counters now we have inserted a new entry
                                  n += 1
                                  i += 2
                                  continue
                      branches[c.branch] = i
                      i += 1
                  # Drop synthetic changesets (safe now that we have ensured no other
                  # changesets can have them as parents).
                  i = 0
                  while i < len(changesets):
                      if changesets[i].synthetic:
                          del changesets[i]
                      else:
                          i += 1
                  # Number changesets
                  for i, c in enumerate(changesets):
                      c.id = i + 1
                  if odd:
                      for l, r in odd:
                          if l.id is not None and r.id is not None:
                              ui.warn(_('changeset %d is both before and after %d\n')
                                      % (l.id, r.id))
                  ui.status(_('%d changeset entries\n') % len(changesets))
                  hook.hook(ui, None, "cvschangesets", True, changesets=changesets)
                  return changesets
              def debugcvsps(ui, *args, **opts):
                  '''Read CVS rlog for current directory or named path in
                  repository, and convert the log to changesets based on matching
                  commit log entries and dates.
                  '''
                  if opts["new_cache"]:
                      cache = "write"
                  elif opts["update_cache"]:
                      cache = "update"
                  else:
                      cache = None
                  revisions = opts["revisions"]
                  try:
                      if args:
                          log = []
                          for d in args:
                              log += createlog(ui, d, root=opts["root"], cache=cache)
                      else:
                          log = createlog(ui, root=opts["root"], cache=cache)
                  except logerror as e:
                      ui.write("%r\n"%e)
                      return
                  changesets = createchangeset(ui, log, opts["fuzz"])
                  del log
                  # Print changesets (optionally filtered)
                  off = len(revisions)
                  branches = {}    # latest version number in each branch
                  ancestors = {}   # parent branch
                  for cs in changesets:
                      if opts["ancestors"]:
                          if cs.branch not in branches and cs.parents and cs.parents[0].id:
                              ancestors[cs.branch] = (changesets[cs.parents[0].id - 1].branch,
                                                      cs.parents[0].id)
                          branches[cs.branch] = cs.id
                      # limit by branches
                      if opts["branches"] and (cs.branch or 'HEAD') not in opts["branches"]:
                          continue
                      if not off:
                          # Note: trailing spaces on several lines here are needed to have
                          #       bug-for-bug compatibility with cvsps.
                          ui.write('---------------------\n')
                          ui.write(('PatchSet %d \n' % cs.id))
                          ui.write(('Date: %s\n' % util.datestr(cs.date,
                                                               '%Y/%m/%d %H:%M:%S %1%2')))
                          ui.write(('Author: %s\n' % cs.author))
                          ui.write(('Branch: %s\n' % (cs.branch or 'HEAD')))
                          ui.write(('Tag%s: %s \n' % (['', 's'][len(cs.tags) > 1],
                                                ','.join(cs.tags) or '(none)')))
                          if cs.branchpoints:
                              ui.write(('Branchpoints: %s \n') %
                                       ', '.join(sorted(cs.branchpoints)))
                          if opts["parents"] and cs.parents:
                              if len(cs.parents) > 1:
                                  ui.write(('Parents: %s\n' %
                                           (','.join([str(p.id) for p in cs.parents]))))
                              else:
                                  ui.write(('Parent: %d\n' % cs.parents[0].id))
                          if opts["ancestors"]:
                              b = cs.branch
                              r = []
                              while b:
                                  b, c = ancestors[b]
                                  r.append('%s:%d:%d' % (b or "HEAD", c, branches[b]))
                              if r:
                                  ui.write(('Ancestors: %s\n' % (','.join(r))))
                          ui.write(('Log:\n'))
                          ui.write('%s\n\n' % cs.comment)
                          ui.write(('Members: \n'))
                          for f in cs.entries:
                              fn = f.file
                              if fn.startswith(opts["prefix"]):
                                  fn = fn[len(opts["prefix"]):]
                              ui.write('\t%s:%s->%s%s \n' % (
                                      fn, '.'.join([str(x) for x in f.parent]) or 'INITIAL',
                                      '.'.join([str(x) for x in f.revision]),
                                      ['', '(DEAD)'][f.dead]))
                          ui.write('\n')
                      # have we seen the start tag?
                      if revisions and off:
                          if revisions[0] == str(cs.id) or \
                              revisions[0] in cs.tags:
                              off = False
                      # see if we reached the end tag
                      if len(revisions) > 1 and not off:
                          if revisions[1] == str(cs.id) or \
                              revisions[1] in cs.tags:
                              break

tests/test-convert-cvs.t

0 +154 0

              #require cvs
                $ cvscall()
                > {
                >     cvs -f "$@"
                > }
                $ hgcat()
                > {
                >     hg --cwd src-hg cat -r tip "$1"
                > }
                $ echo "[extensions]" >> $HGRCPATH
                $ echo "convert = " >> $HGRCPATH
                $ cat > cvshooks.py <<EOF
                > def cvslog(ui,repo,hooktype,log):
                >     print "%s hook: %d entries"%(hooktype,len(log))
                >
                > def cvschangesets(ui,repo,hooktype,changesets):
                >     print "%s hook: %d changesets"%(hooktype,len(changesets))
                > EOF
                $ hookpath=`pwd`
                $ cat <<EOF >> $HGRCPATH
                > [hooks]
                > cvslog = python:$hookpath/cvshooks.py:cvslog
                > cvschangesets = python:$hookpath/cvshooks.py:cvschangesets
                > EOF
              create cvs repository
                $ mkdir cvsrepo
                $ cd cvsrepo
                $ CVSROOT=`pwd`
                $ export CVSROOT
                $ CVS_OPTIONS=-f
                $ export CVS_OPTIONS
                $ cd ..
                $ rmdir cvsrepo
                $ cvscall -q -d "$CVSROOT" init
              create source directory
                $ mkdir src-temp
                $ cd src-temp
                $ echo a > a
                $ mkdir b
                $ cd b
                $ echo c > c
                $ cd ..
              import source directory
                $ cvscall -q import -m import src INITIAL start
                N src/a
                N src/b/c
                No conflicts created by this import
                $ cd ..
              checkout source directory
                $ cvscall -q checkout src
                U src/a
                U src/b/c
              commit a new revision changing b/c
                $ cd src
                $ sleep 1
                $ echo c >> b/c
                $ cvscall -q commit -mci0 . | grep '<--'
                $TESTTMP/cvsrepo/src/b/c,v  <--  *c (glob)
                $ cd ..
              convert fresh repo and also check localtimezone option
              NOTE: This doesn't check all time zones -- it merely determines that
              the configuration option is taking effect.
              An arbitrary (U.S.) time zone is used here.  TZ=US/Hawaii is selected
              since it does not use DST (unlike other U.S. time zones) and is always
              a fixed difference from UTC.
                $ TZ=US/Hawaii hg convert --config convert.localtimezone=True src src-hg
                initializing destination src-hg repository
                connecting to $TESTTMP/cvsrepo
                scanning source...
                collecting CVS rlog
 log entries
                cvslog hook: 5 entries
                creating changesets
 changeset entries
                cvschangesets hook: 3 changesets
                sorting...
                converting...
 Initial revision
 ci0
 import
                updating tags
                $ hgcat a
                a
                $ hgcat b/c
                c
                c
              convert fresh repo with --filemap
                $ echo include b/c > filemap
                $ hg convert --filemap filemap src src-filemap
                initializing destination src-filemap repository
                connecting to $TESTTMP/cvsrepo
                scanning source...
                collecting CVS rlog
 log entries
                cvslog hook: 5 entries
                creating changesets
 changeset entries
                cvschangesets hook: 3 changesets
                sorting...
                converting...
 Initial revision
 ci0
 import
                filtering out empty revision
                repository tip rolled back to revision 1 (undo convert)
                updating tags
                $ hgcat b/c
                c
                c
                $ hg -R src-filemap log --template '{rev} {desc} files: {files}\n'
 update tags files: .hgtags
 ci0 files: b/c
 Initial revision files: b/c
              convert full repository (issue1649)
                $ cvscall -q -d "$CVSROOT" checkout -d srcfull "." | grep -v CVSROOT
                U srcfull/src/a
                U srcfull/src/b/c
                $ ls srcfull
                CVS
                CVSROOT
                src
                $ hg convert srcfull srcfull-hg \
                >     | grep -v 'log entries' | grep -v 'hook:' \
                >     | grep -v '^[0-3] .*' # filter instable changeset order
                initializing destination srcfull-hg repository
                connecting to $TESTTMP/cvsrepo
                scanning source...
                collecting CVS rlog
                creating changesets
 changeset entries
                sorting...
                converting...
                updating tags
                $ hg cat -r tip --cwd srcfull-hg src/a
                a
                $ hg cat -r tip --cwd srcfull-hg src/b/c
                c
                c
              commit new file revisions
                $ cd src
                $ echo a >> a
                $ echo c >> b/c
                $ cvscall -q commit -mci1 . | grep '<--'
                $TESTTMP/cvsrepo/src/a,v  <--  a
                $TESTTMP/cvsrepo/src/b/c,v  <--  *c (glob)
                $ cd ..
              convert again
                $ TZ=US/Hawaii hg convert --config convert.localtimezone=True src src-hg
                connecting to $TESTTMP/cvsrepo
                scanning source...
                collecting CVS rlog
 log entries
                cvslog hook: 7 entries
                creating changesets
 changeset entries
                cvschangesets hook: 4 changesets
                sorting...
                converting...
 ci1
                $ hgcat a
                a
                a
                $ hgcat b/c
                c
                c
                c
              convert again with --filemap
                $ hg convert --filemap filemap src src-filemap
                connecting to $TESTTMP/cvsrepo
                scanning source...
                collecting CVS rlog
 log entries
                cvslog hook: 7 entries
                creating changesets
 changeset entries
                cvschangesets hook: 4 changesets
                sorting...
                converting...
 ci1
                $ hgcat b/c
                c
                c
                c
                $ hg -R src-filemap log --template '{rev} {desc} files: {files}\n'
 ci1 files: b/c
 update tags files: .hgtags
 ci0 files: b/c
 Initial revision files: b/c
              commit branch
                $ cd src
                $ cvs -q update -r1.1 b/c
                U b/c
                $ cvs -q tag -b branch
                T a
                T b/c
                $ cvs -q update -r branch > /dev/null
                $ sleep 1
                $ echo d >> b/c
                $ cvs -q commit -mci2 . | grep '<--'
                $TESTTMP/cvsrepo/src/b/c,v  <--  *c (glob)
                $ cd ..
              convert again
                $ TZ=US/Hawaii hg convert --config convert.localtimezone=True src src-hg
                connecting to $TESTTMP/cvsrepo
                scanning source...
                collecting CVS rlog
 log entries
                cvslog hook: 8 entries
                creating changesets
 changeset entries
                cvschangesets hook: 5 changesets
                sorting...
                converting...
 ci2
                $ hgcat b/c
                c
                d
              convert again with --filemap
                $ TZ=US/Hawaii hg convert --config convert.localtimezone=True --filemap filemap src src-filemap
                connecting to $TESTTMP/cvsrepo
                scanning source...
                collecting CVS rlog
 log entries
                cvslog hook: 8 entries
                creating changesets
 changeset entries
                cvschangesets hook: 5 changesets
                sorting...
                converting...
 ci2
                $ hgcat b/c
                c
                d
                $ hg -R src-filemap log --template '{rev} {desc} files: {files}\n'
 ci2 files: b/c
 ci1 files: b/c
 update tags files: .hgtags
 ci0 files: b/c
 Initial revision files: b/c
              commit a new revision with funny log message
                $ cd src
                $ sleep 1
                $ echo e >> a
                $ cvscall -q commit -m'funny
                > ----------------------------
                > log message' . | grep '<--' |\
                >  sed -e 's:.*src/\(.*\),v.*:checking in src/\1,v:g'
                checking in src/a,v
              commit new file revisions with some fuzz
                $ sleep 1
                $ echo f >> a
                $ cvscall -q commit -mfuzzy . | grep '<--'
                $TESTTMP/cvsrepo/src/a,v  <--  a
                $ sleep 4 # the two changes will be split if fuzz < 4
                $ echo g >> b/c
                $ cvscall -q commit -mfuzzy . | grep '<--'
                $TESTTMP/cvsrepo/src/b/c,v  <--  *c (glob)
                $ cd ..
              convert again
                $ TZ=US/Hawaii hg convert --config convert.cvsps.fuzz=2 --config convert.localtimezone=True src src-hg
                connecting to $TESTTMP/cvsrepo
                scanning source...
                collecting CVS rlog
 log entries
                cvslog hook: 11 entries
                creating changesets
 changeset entries
                cvschangesets hook: 8 changesets
                sorting...
                converting...
 funny
 fuzzy
 fuzzy
                $ hg -R src-hg log -G --template '{rev} ({branches}) {desc} date: {date|date} files: {files}\n'
                o  8 (branch) fuzzy date: * -1000 files: b/c (glob)
                |
                o  7 (branch) fuzzy date: * -1000 files: a (glob)
                |
                o  6 (branch) funny
                |  ----------------------------
                |  log message date: * -1000 files: a (glob)
                o  5 (branch) ci2 date: * -1000 files: b/c (glob)
                o  4 () ci1 date: * -1000 files: a b/c (glob)
                |
                o  3 () update tags date: * +0000 files: .hgtags (glob)
                |
                | o  2 (INITIAL) import date: * -1000 files: (glob)
                | |
                o |  1 () ci0 date: * -1000 files: b/c (glob)
                |/
                o  0 () Initial revision date: * -1000 files: a b/c (glob)
              testing debugcvsps
                $ cd src
                $ hg debugcvsps --fuzz=2 -x >/dev/null
              commit a new revision changing a and removing b/c
                $ cvscall -q update -A
                U a
                U b/c
                $ sleep 1
                $ echo h >> a
                $ cvscall -Q remove -f b/c
                $ cvscall -q commit -mci | grep '<--'
                $TESTTMP/cvsrepo/src/a,v  <--  a
                $TESTTMP/cvsrepo/src/b/c,v  <--  *c (glob)
              update and verify the cvsps cache
                $ hg debugcvsps --fuzz=2 -u
                collecting CVS rlog
 log entries
                cvslog hook: 13 entries
                creating changesets
 changeset entries
                cvschangesets hook: 11 changesets
                ---------------------
                PatchSet 1
                Date: * (glob)
                Author: * (glob)
                Branch: HEAD
                Tag: (none)
                Branchpoints: INITIAL
                Log:
                Initial revision
                Members:
                	a:INITIAL->1.1
                ---------------------
                PatchSet 2
                Date: * (glob)
                Author: * (glob)
                Branch: HEAD
                Tag: (none)
                Branchpoints: INITIAL, branch
                Log:
                Initial revision
                Members:
                	b/c:INITIAL->1.1
                ---------------------
                PatchSet 3
                Date: * (glob)
                Author: * (glob)
                Branch: INITIAL
                Tag: start
                Log:
                import
                Members:
                	a:1.1->1.1.1.1
                	b/c:1.1->1.1.1.1
                ---------------------
                PatchSet 4
                Date: * (glob)
                Author: * (glob)
                Branch: HEAD
                Tag: (none)
                Log:
                ci0
                Members:
                	b/c:1.1->1.2
                ---------------------
                PatchSet 5
                Date: * (glob)
                Author: * (glob)
                Branch: HEAD
                Tag: (none)
                Branchpoints: branch
                Log:
                ci1
                Members:
                	a:1.1->1.2
                ---------------------
                PatchSet 6
                Date: * (glob)
                Author: * (glob)
                Branch: HEAD
                Tag: (none)
                Log:
                ci1
                Members:
                	b/c:1.2->1.3
                ---------------------
                PatchSet 7
                Date: * (glob)
                Author: * (glob)
                Branch: branch
                Tag: (none)
                Log:
                ci2
                Members:
                	b/c:1.1->1.1.2.1
                ---------------------
                PatchSet 8
                Date: * (glob)
                Author: * (glob)
                Branch: branch
                Tag: (none)
                Log:
                funny
                ----------------------------
                log message
                Members:
                	a:1.2->1.2.2.1
                ---------------------
                PatchSet 9
                Date: * (glob)
                Author: * (glob)
                Branch: branch
                Tag: (none)
                Log:
                fuzzy
                Members:
                	a:1.2.2.1->1.2.2.2
                ---------------------
                PatchSet 10
                Date: * (glob)
                Author: * (glob)
                Branch: branch
                Tag: (none)
                Log:
                fuzzy
                Members:
                	b/c:1.1.2.1->1.1.2.2
                ---------------------
                PatchSet 11
                Date: * (glob)
                Author: * (glob)
                Branch: HEAD
                Tag: (none)
                Log:
                ci
                Members:
                	a:1.2->1.3
                	b/c:1.3->1.4(DEAD)
                $ cd ..
+             Test transcoding CVS log messages (issue5597)
+             =============================================
+             To emulate commit messages in (non-ascii) multiple encodings portably,
+             this test scenario writes CVS history file (*,v file) directly via
+             python code.
+             Commit messages of version 1.2 - 1.4 use u3042 in 3 encodings below.
+             |encoding  |byte sequence | decodable as:      |
+             |          |              | utf-8 euc-jp cp932 |
+             +----------+--------------+--------------------+
+             |utf-8     |\xe3\x81\x82  |  o      x     x    |
+             |euc-jp    |\xa4\xa2      |  x      o     o    |
+             |cp932     |\x82\xa0      |  x      x     o    |
+               $ mkdir -p cvsrepo/transcoding
+               $ python <<EOF
+               > fp = open('cvsrepo/transcoding/file,v', 'w')
+               > fp.write(('''
+               > head	1.4;
+               > access;
+               > symbols
+               > 	start:1.1.1.1 INITIAL:1.1.1;
+               > locks; strict;
+               > comment	@# @;
+               >
+               >
+               > 1.4
+               > date	2017.07.10.00.00.04;	author nobody;	state Exp;
+               > branches;
+               > next	1.3;
+               > commitid	10059635D016A510FFA;
+               >
+               > 1.3
+               > date	2017.07.10.00.00.03;	author nobody;	state Exp;
+               > branches;
+               > next	1.2;
+               > commitid	10059635CFF6A4FF34E;
+               >
+               > 1.2
+               > date	2017.07.10.00.00.02;	author nobody;	state Exp;
+               > branches;
+               > next	1.1;
+               > commitid	10059635CFD6A4D5095;
+               >
+               > 1.1
+               > date	2017.07.10.00.00.01;	author nobody;	state Exp;
+               > branches
+               > 	1.1.1.1;
+               > next	;
+               > commitid	10059635CFB6A4A3C33;
+               >
+               > 1.1.1.1
+               > date	2017.07.10.00.00.01;	author nobody;	state Exp;
+               > branches;
+               > next	;
+               > commitid	10059635CFB6A4A3C33;
+               >
+               >
+               > desc
+               > @@
+               >
+               >
+               > 1.4
+               > log
+               > @''' + u'\u3042'.encode('cp932') + ''' (cp932)
+               > @
+               > text
+               > @1
+               > 2
+               > 3
+               > 4
+               > @
+               >
+               >
+               > 1.3
+               > log
+               > @''' + u'\u3042'.encode('euc-jp') + ''' (euc-jp)
+               > @
+               > text
+               > @d4 1
+               > @
+               >
+               >
+               > 1.2
+               > log
+               > @''' + u'\u3042'.encode('utf-8') +  ''' (utf-8)
+               > @
+               > text
+               > @d3 1
+               > @
+               >
+               >
+               > 1.1
+               > log
+               > @Initial revision
+               > @
+               > text
+               > @d2 1
+               > @
+               >
+               >
+               > 1.1.1.1
+               > log
+               > @import
+               > @
+               > text
+               > @@
+               > ''').lstrip())
+               > EOF
+               $ cvscall -q checkout transcoding
+               U transcoding/file
+             Test converting in normal case
+             ------------------------------
+             (filtering by grep in order to check only form of debug messages)
+               $ hg convert --config convert.cvsps.logencoding=utf-8,euc-jp,cp932 -q --debug transcoding transcoding-hg | grep 'transcoding by'
+               transcoding by utf-8: 1.1 of file
+               transcoding by utf-8: 1.1.1.1 of file
+               transcoding by utf-8: 1.2 of file
+               transcoding by euc-jp: 1.3 of file
+               transcoding by cp932: 1.4 of file
+               $ hg -R transcoding-hg --encoding utf-8 log -T "{rev}: {desc}\n"
+: update tags
+: import
+: \xe3\x81\x82 (cp932) (esc)
+: \xe3\x81\x82 (euc-jp) (esc)
+: \xe3\x81\x82 (utf-8) (esc)
+: Initial revision
+               $ rm -rf transcoding-hg
+             Test converting in error cases
+             ------------------------------
+             unknown encoding in convert.cvsps.logencoding
+               $ hg convert --config convert.cvsps.logencoding=foobar -q transcoding transcoding-hg
+               abort: unknown encoding: foobar
+               (check convert.cvsps.logencoding configuration)
+               [255]
+               $ rm -rf transcoding-hg
+             no acceptable encoding in convert.cvsps.logencoding
+               $ hg convert --config convert.cvsps.logencoding=utf-8,euc-jp -q transcoding transcoding-hg
+               abort: no encoding can transcode CVS log message for 1.4 of file
+               (check convert.cvsps.logencoding configuration)
+               [255]
+               $ rm -rf transcoding-hg

tests/test-convert.t

0 +6 0

                $ cat >> $HGRCPATH <<EOF
                > [extensions]
                > convert=
                > [convert]
                > hg.saverev=False
                > EOF
                $ hg help convert
                hg convert [OPTION]... SOURCE [DEST [REVMAP]]
                convert a foreign SCM repository to a Mercurial one.
                    Accepted source formats [identifiers]:
                    - Mercurial [hg]
                    - CVS [cvs]
                    - Darcs [darcs]
                    - git [git]
                    - Subversion [svn]
                    - Monotone [mtn]
                    - GNU Arch [gnuarch]
                    - Bazaar [bzr]
                    - Perforce [p4]
                    Accepted destination formats [identifiers]:
                    - Mercurial [hg]
                    - Subversion [svn] (history on branches is not preserved)
                    If no revision is given, all revisions will be converted. Otherwise,
                    convert will only import up to the named revision (given in a format
                    understood by the source).
                    If no destination directory name is specified, it defaults to the basename
                    of the source with "-hg" appended. If the destination repository doesn't
                    exist, it will be created.
                    By default, all sources except Mercurial will use --branchsort. Mercurial
                    uses --sourcesort to preserve original revision numbers order. Sort modes
                    have the following effects:
                    --branchsort  convert from parent to child revision when possible, which
                                  means branches are usually converted one after the other.
                                  It generates more compact repositories.
                    --datesort    sort revisions by date. Converted repositories have good-
                                  looking changelogs but are often an order of magnitude
                                  larger than the same ones generated by --branchsort.
                    --sourcesort  try to preserve source revisions order, only supported by
                                  Mercurial sources.
                    --closesort   try to move closed revisions as close as possible to parent
                                  branches, only supported by Mercurial sources.
                    If "REVMAP" isn't given, it will be put in a default location
                    ("<dest>/.hg/shamap" by default). The "REVMAP" is a simple text file that
                    maps each source commit ID to the destination ID for that revision, like
                    so:
                      <source ID> <destination ID>
                    If the file doesn't exist, it's automatically created. It's updated on
                    each commit copied, so 'hg convert' can be interrupted and can be run
                    repeatedly to copy new commits.
                    The authormap is a simple text file that maps each source commit author to
                    a destination commit author. It is handy for source SCMs that use unix
                    logins to identify authors (e.g.: CVS). One line per author mapping and
                    the line format is:
                      source author = destination author
                    Empty lines and lines starting with a "#" are ignored.
                    The filemap is a file that allows filtering and remapping of files and
                    directories. Each line can contain one of the following directives:
                      include path/to/file-or-dir
                      exclude path/to/file-or-dir
                      rename path/to/source path/to/destination
                    Comment lines start with "#". A specified path matches if it equals the
                    full relative name of a file or one of its parent directories. The
                    "include" or "exclude" directive with the longest matching path applies,
                    so line order does not matter.
                    The "include" directive causes a file, or all files under a directory, to
                    be included in the destination repository. The default if there are no
                    "include" statements is to include everything. If there are any "include"
                    statements, nothing else is included. The "exclude" directive causes files
                    or directories to be omitted. The "rename" directive renames a file or
                    directory if it is converted. To rename from a subdirectory into the root
                    of the repository, use "." as the path to rename to.
                    "--full" will make sure the converted changesets contain exactly the right
                    files with the right content. It will make a full conversion of all files,
                    not just the ones that have changed. Files that already are correct will
                    not be changed. This can be used to apply filemap changes when converting
                    incrementally. This is currently only supported for Mercurial and
                    Subversion.
                    The splicemap is a file that allows insertion of synthetic history,
                    letting you specify the parents of a revision. This is useful if you want
                    to e.g. give a Subversion merge two parents, or graft two disconnected
                    series of history together. Each entry contains a key, followed by a
                    space, followed by one or two comma-separated values:
                      key parent1, parent2
                    The key is the revision ID in the source revision control system whose
                    parents should be modified (same format as a key in .hg/shamap). The
                    values are the revision IDs (in either the source or destination revision
                    control system) that should be used as the new parents for that node. For
                    example, if you have merged "release-1.0" into "trunk", then you should
                    specify the revision on "trunk" as the first parent and the one on the
                    "release-1.0" branch as the second.
                    The branchmap is a file that allows you to rename a branch when it is
                    being brought in from whatever external repository. When used in
                    conjunction with a splicemap, it allows for a powerful combination to help
                    fix even the most badly mismanaged repositories and turn them into nicely
                    structured Mercurial repositories. The branchmap contains lines of the
                    form:
                      original_branch_name new_branch_name
                    where "original_branch_name" is the name of the branch in the source
                    repository, and "new_branch_name" is the name of the branch is the
                    destination repository. No whitespace is allowed in the new branch name.
                    This can be used to (for instance) move code in one repository from
                    "default" to a named branch.
                    Mercurial Source
                    ################
                    The Mercurial source recognizes the following configuration options, which
                    you can set on the command line with "--config":
                    convert.hg.ignoreerrors
                                  ignore integrity errors when reading. Use it to fix
                                  Mercurial repositories with missing revlogs, by converting
                                  from and to Mercurial. Default is False.
                    convert.hg.saverev
                                  store original revision ID in changeset (forces target IDs
                                  to change). It takes a boolean argument and defaults to
                                  False.
                    convert.hg.startrev
                                  specify the initial Mercurial revision. The default is 0.
                    convert.hg.revs
                                  revset specifying the source revisions to convert.
                    CVS Source
                    ##########
                    CVS source will use a sandbox (i.e. a checked-out copy) from CVS to
                    indicate the starting point of what will be converted. Direct access to
                    the repository files is not needed, unless of course the repository is
                    ":local:". The conversion uses the top level directory in the sandbox to
                    find the CVS repository, and then uses CVS rlog commands to find files to
                    convert. This means that unless a filemap is given, all files under the
                    starting directory will be converted, and that any directory
                    reorganization in the CVS sandbox is ignored.
                    The following options can be used with "--config":
                    convert.cvsps.cache
                                  Set to False to disable remote log caching, for testing and
                                  debugging purposes. Default is True.
                    convert.cvsps.fuzz
                                  Specify the maximum time (in seconds) that is allowed
                                  between commits with identical user and log message in a
                                  single changeset. When very large files were checked in as
                                  part of a changeset then the default may not be long enough.
                                  The default is 60.
+                   convert.cvsps.logencoding
+                                 Specify encoding name to be used for transcoding CVS log
+                                 messages. Multiple encoding names can be specified as a list
+                                 (see 'hg help config.Syntax'), but only the first acceptable
+                                 encoding in the list is used per CVS log entries. This
+                                 transcoding is executed before cvslog hook below.
                    convert.cvsps.mergeto
                                  Specify a regular expression to which commit log messages
                                  are matched. If a match occurs, then the conversion process
                                  will insert a dummy revision merging the branch on which
                                  this log message occurs to the branch indicated in the
                                  regex. Default is "{{mergetobranch ([-\w]+)}}"
                    convert.cvsps.mergefrom
                                  Specify a regular expression to which commit log messages
                                  are matched. If a match occurs, then the conversion process
                                  will add the most recent revision on the branch indicated in
                                  the regex as the second parent of the changeset. Default is
                                  "{{mergefrombranch ([-\w]+)}}"
                    convert.localtimezone
                                  use local time (as determined by the TZ environment
                                  variable) for changeset date/times. The default is False
                                  (use UTC).
                    hooks.cvslog  Specify a Python function to be called at the end of
                                  gathering the CVS log. The function is passed a list with
                                  the log entries, and can modify the entries in-place, or add
                                  or delete them.
                    hooks.cvschangesets
                                  Specify a Python function to be called after the changesets
                                  are calculated from the CVS log. The function is passed a
                                  list with the changeset entries, and can modify the
                                  changesets in-place, or add or delete them.
                    An additional "debugcvsps" Mercurial command allows the builtin changeset
                    merging code to be run without doing a conversion. Its parameters and
                    output are similar to that of cvsps 2.1. Please see the command help for
                    more details.
                    Subversion Source
                    #################
                    Subversion source detects classical trunk/branches/tags layouts. By
                    default, the supplied "svn://repo/path/" source URL is converted as a
                    single branch. If "svn://repo/path/trunk" exists it replaces the default
                    branch. If "svn://repo/path/branches" exists, its subdirectories are
                    listed as possible branches. If "svn://repo/path/tags" exists, it is
                    looked for tags referencing converted branches. Default "trunk",
                    "branches" and "tags" values can be overridden with following options. Set
                    them to paths relative to the source URL, or leave them blank to disable
                    auto detection.
                    The following options can be set with "--config":
                    convert.svn.branches
                                  specify the directory containing branches. The default is
                                  "branches".
                    convert.svn.tags
                                  specify the directory containing tags. The default is
                                  "tags".
                    convert.svn.trunk
                                  specify the name of the trunk branch. The default is
                                  "trunk".
                    convert.localtimezone
                                  use local time (as determined by the TZ environment
                                  variable) for changeset date/times. The default is False
                                  (use UTC).
                    Source history can be retrieved starting at a specific revision, instead
                    of being integrally converted. Only single branch conversions are
                    supported.
                    convert.svn.startrev
                                  specify start Subversion revision number. The default is 0.
                    Git Source
                    ##########
                    The Git importer converts commits from all reachable branches (refs in
                    refs/heads) and remotes (refs in refs/remotes) to Mercurial. Branches are
                    converted to bookmarks with the same name, with the leading 'refs/heads'
                    stripped. Git submodules are converted to Git subrepos in Mercurial.
                    The following options can be set with "--config":
                    convert.git.similarity
                                  specify how similar files modified in a commit must be to be
                                  imported as renames or copies, as a percentage between "0"
                                  (disabled) and "100" (files must be identical). For example,
                                  "90" means that a delete/add pair will be imported as a
                                  rename if more than 90% of the file hasn't changed. The
                                  default is "50".
                    convert.git.findcopiesharder
                                  while detecting copies, look at all files in the working
                                  copy instead of just changed ones. This is very expensive
                                  for large projects, and is only effective when
                                  "convert.git.similarity" is greater than 0. The default is
                                  False.
                    convert.git.renamelimit
                                  perform rename and copy detection up to this many changed
                                  files in a commit. Increasing this will make rename and copy
                                  detection more accurate but will significantly slow down
                                  computation on large projects. The option is only relevant
                                  if "convert.git.similarity" is greater than 0. The default
                                  is "400".
                    convert.git.committeractions
                                  list of actions to take when processing author and committer
                                  values.
                        Git commits have separate author (who wrote the commit) and committer
                        (who applied the commit) fields. Not all destinations support separate
                        author and committer fields (including Mercurial). This config option
                        controls what to do with these author and committer fields during
                        conversion.
                        A value of "messagedifferent" will append a "committer: ..." line to
                        the commit message if the Git committer is different from the author.
                        The prefix of that line can be specified using the syntax
                        "messagedifferent=<prefix>". e.g. "messagedifferent=git-committer:".
                        When a prefix is specified, a space will always be inserted between
                        the prefix and the value.
                        "messagealways" behaves like "messagedifferent" except it will always
                        result in a "committer: ..." line being appended to the commit
                        message. This value is mutually exclusive with "messagedifferent".
                        "dropcommitter" will remove references to the committer. Only
                        references to the author will remain. Actions that add references to
                        the committer will have no effect when this is set.
                        "replaceauthor" will replace the value of the author field with the
                        committer. Other actions that add references to the committer will
                        still take effect when this is set.
                        The default is "messagedifferent".
                    convert.git.extrakeys
                                  list of extra keys from commit metadata to copy to the
                                  destination. Some Git repositories store extra metadata in
                                  commits. By default, this non-default metadata will be lost
                                  during conversion. Setting this config option can retain
                                  that metadata. Some built-in keys such as "parent" and
                                  "branch" are not allowed to be copied.
                    convert.git.remoteprefix
                                  remote refs are converted as bookmarks with
                                  "convert.git.remoteprefix" as a prefix followed by a /. The
                                  default is 'remote'.
                    convert.git.saverev
                                  whether to store the original Git commit ID in the metadata
                                  of the destination commit. The default is True.
                    convert.git.skipsubmodules
                                  does not convert root level .gitmodules files or files with
 mode indicating a submodule. Default is False.
                    Perforce Source
                    ###############
                    The Perforce (P4) importer can be given a p4 depot path or a client
                    specification as source. It will convert all files in the source to a flat
                    Mercurial repository, ignoring labels, branches and integrations. Note
                    that when a depot path is given you then usually should specify a target
                    directory, because otherwise the target may be named "...-hg".
                    The following options can be set with "--config":
                    convert.p4.encoding
                                  specify the encoding to use when decoding standard output of
                                  the Perforce command line tool. The default is default
                                  system encoding.
                    convert.p4.startrev
                                  specify initial Perforce revision (a Perforce changelist
                                  number).
                    Mercurial Destination
                    #####################
                    The Mercurial destination will recognize Mercurial subrepositories in the
                    destination directory, and update the .hgsubstate file automatically if
                    the destination subrepositories contain the <dest>/<sub>/.hg/shamap file.
                    Converting a repository with subrepositories requires converting a single
                    repository at a time, from the bottom up.
                    The following options are supported:
                    convert.hg.clonebranches
                                  dispatch source branches in separate clones. The default is
                                  False.
                    convert.hg.tagsbranch
                                  branch name for tag revisions, defaults to "default".
                    convert.hg.usebranchnames
                                  preserve branch names. The default is True.
                    convert.hg.sourcename
                                  records the given string as a 'convert_source' extra value
                                  on each commit made in the target repository. The default is
                                  None.
                    All Destinations
                    ################
                    All destination types accept the following options:
                    convert.skiptags
                                  does not convert tags from the source repo to the target
                                  repo. The default is False.
                options ([+] can be repeated):
                 -s --source-type TYPE source repository type
                 -d --dest-type TYPE   destination repository type
                 -r --rev REV [+]      import up to source revision REV
                 -A --authormap FILE   remap usernames using this file
                    --filemap FILE     remap file names using contents of file
                    --full             apply filemap changes by converting all files again
                    --splicemap FILE   splice synthesized history into place
                    --branchmap FILE   change branch names while converting
                    --branchsort       try to sort changesets by branches
                    --datesort         try to sort changesets by date
                    --sourcesort       preserve source changesets order
                    --closesort        try to reorder closed revisions
                (some details hidden, use --verbose to show complete help)
                $ hg init a
                $ cd a
                $ echo a > a
                $ hg ci -d'0 0' -Ama
                adding a
                $ hg cp a b
                $ hg ci -d'1 0' -mb
                $ hg rm a
                $ hg ci -d'2 0' -mc
                $ hg mv b a
                $ hg ci -d'3 0' -md
                $ echo a >> a
                $ hg ci -d'4 0' -me
                $ cd ..
                $ hg convert a 2>&1 | grep -v 'subversion python bindings could not be loaded'
                assuming destination a-hg
                initializing destination a-hg repository
                scanning source...
                sorting...
                converting...
 a
 b
 c
 d
 e
                $ hg --cwd a-hg pull ../a
                pulling from ../a
                searching for changes
                no changes found
              conversion to existing file should fail
                $ touch bogusfile
                $ hg convert a bogusfile
                initializing destination bogusfile repository
                abort: cannot create new bundle repository
                [255]
              #if unix-permissions no-root
              conversion to dir without permissions should fail
                $ mkdir bogusdir
                $ chmod 000 bogusdir
                $ hg convert a bogusdir
                abort: Permission denied: 'bogusdir'
                [255]
              user permissions should succeed
                $ chmod 700 bogusdir
                $ hg convert a bogusdir
                initializing destination bogusdir repository
                scanning source...
                sorting...
                converting...
 a
 b
 c
 d
 e
              #endif
              test pre and post conversion actions
                $ echo 'include b' > filemap
                $ hg convert --debug --filemap filemap a partialb | \
                >     grep 'run hg'
                run hg source pre-conversion action
                run hg sink pre-conversion action
                run hg sink post-conversion action
                run hg source post-conversion action
              converting empty dir should fail "nicely
                $ mkdir emptydir
              override $PATH to ensure p4 not visible; use $PYTHON in case we're
              running from a devel copy, not a temp installation
                $ PATH="$BINDIR" $PYTHON "$BINDIR"/hg convert emptydir
                assuming destination emptydir-hg
                initializing destination emptydir-hg repository
                emptydir does not look like a CVS checkout
                $TESTTMP/emptydir does not look like a Git repository (glob)
                emptydir does not look like a Subversion repository
                emptydir is not a local Mercurial repository
                emptydir does not look like a darcs repository
                emptydir does not look like a monotone repository
                emptydir does not look like a GNU Arch repository
                emptydir does not look like a Bazaar repository
                cannot find required "p4" tool
                abort: emptydir: missing or unsupported repository
                [255]
              convert with imaginary source type
                $ hg convert --source-type foo a a-foo
                initializing destination a-foo repository
                abort: foo: invalid source repository type
                [255]
              convert with imaginary sink type
                $ hg convert --dest-type foo a a-foo
                abort: foo: invalid destination repository type
                [255]
              testing: convert must not produce duplicate entries in fncache
                $ hg convert a b
                initializing destination b repository
                scanning source...
                sorting...
                converting...
 a
 b
 c
 d
 e
              contents of fncache file:
                $ cat b/.hg/store/fncache | sort
                data/a.i
                data/b.i
              test bogus URL
                $ hg convert -q bzr+ssh://foobar@selenic.com/baz baz
                abort: bzr+ssh://foobar@selenic.com/baz: missing or unsupported repository
                [255]
              test revset converted() lookup
                $ hg --config convert.hg.saverev=True convert a c
                initializing destination c repository
                scanning source...
                sorting...
                converting...
 a
 b
 c
 d
 e
                $ echo f > c/f
                $ hg -R c ci -d'0 0' -Amf
                adding f
                created new head
                $ hg -R c log -r "converted(09d945a62ce6)"
                changeset:   1:98c3dd46a874
                user:        test
                date:        Thu Jan 01 00:00:01 1970 +0000
                summary:     b
                $ hg -R c log -r "converted()"
                changeset:   0:31ed57b2037c
                user:        test
                date:        Thu Jan 01 00:00:00 1970 +0000
                summary:     a
                changeset:   1:98c3dd46a874
                user:        test
                date:        Thu Jan 01 00:00:01 1970 +0000
                summary:     b
                changeset:   2:3b9ca06ef716
                user:        test
                date:        Thu Jan 01 00:00:02 1970 +0000
                summary:     c
                changeset:   3:4e0debd37cf2
                user:        test
                date:        Thu Jan 01 00:00:03 1970 +0000
                summary:     d
                changeset:   4:9de3bc9349c5
                user:        test
                date:        Thu Jan 01 00:00:04 1970 +0000
                summary:     e
              test specifying a sourcename
                $ echo g > a/g
                $ hg -R a ci -d'0 0' -Amg
                adding g
                $ hg --config convert.hg.sourcename=mysource --config convert.hg.saverev=True convert a c
                scanning source...
                sorting...
                converting...
 g
                $ hg -R c log -r tip --template '{extras % "{extra}\n"}'
                branch=default
                convert_revision=a3bc6100aa8ec03e00aaf271f1f50046fb432072
                convert_source=mysource
                $ cat > branchmap.txt << EOF
                > old branch new_branch
                > EOF
                $ hg -R a branch -q 'old branch'
                $ echo gg > a/g
                $ hg -R a ci -m 'branch name with spaces'
                $ hg convert --branchmap branchmap.txt a d
                initializing destination d repository
                scanning source...
                sorting...
                converting...
 a
 b
 c
 d
 e
 g
 branch name with spaces
                $ hg -R a branches
                old branch                     6:a24a66ade009
                default                        5:a3bc6100aa8e (inactive)
                $ hg -R d branches
                new_branch                     6:64ed208b732b
                default                        5:a3bc6100aa8e (inactive)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages