upstream/mercurial-mirror Files · hgext/convert/cvsps.py

merge with crew-stable

Patrick Mezard - - Load All Authors

File last commit:

r7097:d4218edd default


                r7185:67ba7493

default

Download file

             cvsps.py
        
                    587 lines
            
             | 19.4 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / hgext / convert / cvsps.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
      #

      # Mercurial built-in replacement for cvsps.

      #

      # Copyright 2008, Frank Kingswood <frank@kingswood-consulting.co.uk>

      #

      # This software may be used and distributed according to the terms

      # of the GNU General Public License, incorporated herein by reference.

      import os

      import re

      import sys

      import cPickle as pickle

      from mercurial import util

      from mercurial.i18n import _

      def listsort(list, key):

          "helper to sort by key in Python 2.3"

          try:

              list.sort(key=key)

          except TypeError:

        Matt Mackall
    
cvsps: fix up some whitespace

              r6688
            
              list.sort(lambda l, r: cmp(key(l), key(r)))

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
      class logentry(object):

          '''Class logentry has the following attributes:

              .author    - author name as CVS knows it

              .branch    - name of branch this revision is on

              .branches  - revision tuple of branches starting at this revision

              .comment   - commit message

              .date      - the commit date as a (time, tz) tuple

              .dead      - true if file revision is dead

              .file      - Name of file

              .lines     - a tuple (+lines, -lines) or None

              .parent    - Previous revision of this entry

              .rcs       - name of file as returned from CVS

              .revision  - revision number as tuple

              .tags      - list of tags on the file

          '''

          def __init__(self, **entries):

              self.__dict__.update(entries)

      class logerror(Exception):

          pass

        Patrick Mezard
    
convert: fix builtin cvsps under Windows...

              r7097
            
      def getrepopath(cvspath):

          """Return the repository path from a CVS path.

          >>> getrepopath('/foo/bar')

          '/foo/bar'

          >>> getrepopath('c:/foo/bar')

          'c:/foo/bar'

          >>> getrepopath(':pserver:10/foo/bar')

          '/foo/bar'

          >>> getrepopath(':pserver:10c:/foo/bar')

          '/foo/bar'

          >>> getrepopath(':pserver:/foo/bar')

          '/foo/bar'

          >>> getrepopath(':pserver:c:/foo/bar')

          'c:/foo/bar'

          >>> getrepopath(':pserver:truc@foo.bar:/foo/bar')

          '/foo/bar'

          >>> getrepopath(':pserver:truc@foo.bar:c:/foo/bar')

          'c:/foo/bar'

          """

          # According to CVS manual, CVS paths are expressed like:

          # [:method:][[user][:password]@]hostname[:[port]]/path/to/repository

          #

          # Unfortunately, Windows absolute paths start with a drive letter

          # like 'c:' making it harder to parse. Here we assume that drive

          # letters are only one character long and any CVS component before

          # the repository path is at least 2 characters long, and use this

          # to disambiguate.

          parts = cvspath.split(':')

          if len(parts) == 1:

              return parts[0]

          # Here there is an ambiguous case if we have a port number

          # immediately followed by a Windows driver letter. We assume this

          # never happens and decide it must be CVS path component,

          # therefore ignoring it.

          if len(parts[-2]) > 1:

              return parts[-1].lstrip('0123456789')

          return parts[-2] + ':' + parts[-1]

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
      def createlog(ui, directory=None, root="", rlog=True, cache=None):

          '''Collect the CVS rlog'''

          # Because we store many duplicate commit log messages, reusing strings

          # saves a lot of memory and pickle storage space.

          _scache = {}

          def scache(s):

              "return a shared version of a string"

              return _scache.setdefault(s, s)

          ui.status(_('collecting CVS rlog\n'))

          log = []      # list of logentry objects containing the CVS state

          # patterns to match in CVS (r)log output, by state of use

          re_00 = re.compile('RCS file: (.+)$')

          re_01 = re.compile('cvs \\[r?log aborted\\]: (.+)$')

          re_02 = re.compile('cvs (r?log|server): (.+)\n$')

          re_03 = re.compile("(Cannot access.+CVSROOT)|(can't create temporary directory.+)$")

          re_10 = re.compile('Working file: (.+)$')

          re_20 = re.compile('symbolic names:')

          re_30 = re.compile('\t(.+): ([\\d.]+)$')

          re_31 = re.compile('----------------------------$')

          re_32 = re.compile('=============================================================================$')

          re_50 = re.compile('revision ([\\d.]+)(\s+locked by:\s+.+;)?$')

          re_60 = re.compile(r'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?')

          re_70 = re.compile('branches: (.+);$')

          prefix = ''   # leading path to strip of what we get from CVS

          if directory is None:

              # Current working directory

              # Get the real directory in the repository

              try:

                  prefix = file(os.path.join('CVS','Repository')).read().strip()

                  if prefix == ".":

        Matt Mackall
    
cvsps: fix up some whitespace

              r6688
            
                      prefix = ""

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
                  directory = prefix

              except IOError:

                  raise logerror('Not a CVS sandbox')

        Patrick Mezard
    
convert: fix builtin cvsps under Windows...

              r7097
            
              if prefix and not prefix.endswith(os.sep):

                  prefix += os.sep

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
              # Use the Root file in the sandbox, if it exists

              try:

                  root = file(os.path.join('CVS','Root')).read().strip()

              except IOError:

                  pass

          if not root:

              root = os.environ.get('CVSROOT', '')

          # read log cache if one exists

          oldlog = []

          date = None

          if cache:

              cachedir = os.path.expanduser('~/.hg.cvsps')

              if not os.path.exists(cachedir):

                  os.mkdir(cachedir)

              # The cvsps cache pickle needs a uniquified name, based on the

              # repository location. The address may have all sort of nasties

              # in it, slashes, colons and such. So here we take just the

              # alphanumerics, concatenated in a way that does not mix up the

        Dirkjan Ochtman
    
cvsps: fix a final whitespace nit

              r6696
            
              # various components, so that

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
              #    :pserver:user@server:/path

              # and

              #    /pserver/user/server/path

              # are mapped to different cache file names.

        Matt Mackall
    
cvsps: fix up some whitespace

              r6688
            
              cachefile = root.split(":") + [directory, "cache"]

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
              cachefile = ['-'.join(re.findall(r'\w+', s)) for s in cachefile if s]

        Matt Mackall
    
cvsps: fix up some whitespace

              r6688
            
              cachefile = os.path.join(cachedir,

                                       '.'.join([s for s in cachefile if s]))

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
          if cache == 'update':

              try:

                  ui.note(_('reading cvs log cache %s\n') % cachefile)

                  oldlog = pickle.load(file(cachefile))

                  ui.note(_('cache has %d log entries\n') % len(oldlog))

              except Exception, e:

                  ui.note(_('error reading cache: %r\n') % e)

              if oldlog:

                  date = oldlog[-1].date    # last commit date as a (time,tz) tuple

                  date = util.datestr(date, '%Y/%m/%d %H:%M:%S %1%2')

          # build the CVS commandline

          cmd = ['cvs', '-q']

          if root:

              cmd.append('-d%s' % root)

        Patrick Mezard
    
convert: fix builtin cvsps under Windows...

              r7097
            
              p = util.normpath(getrepopath(root))

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
              if not p.endswith('/'):

        Matt Mackall
    
cvsps: fix up some whitespace

              r6688
            
                  p += '/'

        Patrick Mezard
    
convert: fix builtin cvsps under Windows...

              r7097
            
              prefix = p + util.normpath(prefix)

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
          cmd.append(['log', 'rlog'][rlog])

          if date:

              # no space between option and date string

              cmd.append('-d>%s' % date)

          cmd.append(directory)

          # state machine begins here

          tags = {}     # dictionary of revisions on current file with their tags

          state = 0

          store = False # set when a new record can be appended

          cmd = [util.shellquote(arg) for arg in cmd]

        Martin Geisler
    
i18n: mark strings for translation in convert extension

              r6956
            
          ui.note(_("running %s\n") % (' '.join(cmd)))

          ui.debug(_("prefix=%r directory=%r root=%r\n") % (prefix, directory, root))

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
          for line in util.popen(' '.join(cmd)):

              if line.endswith('\n'):

                  line = line[:-1]

              #ui.debug('state=%d line=%r\n' % (state, line))

              if state == 0:

                  # initial state, consume input until we see 'RCS file'

                  match = re_00.match(line)

                  if match:

                      rcs = match.group(1)

                      tags = {}

                      if rlog:

        Patrick Mezard
    
convert: fix builtin cvsps under Windows...

              r7097
            
                          filename = util.normpath(rcs[:-2])

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
                          if filename.startswith(prefix):

                              filename = filename[len(prefix):]

                          if filename.startswith('/'):

                              filename = filename[1:]

                          if filename.startswith('Attic/'):

                              filename = filename[6:]

                          else:

                              filename = filename.replace('/Attic/', '/')

                          state = 2

                          continue

                      state = 1

                      continue

                  match = re_01.match(line)

                  if match:

                      raise Exception(match.group(1))

                  match = re_02.match(line)

                  if match:

                      raise Exception(match.group(2))

                  if re_03.match(line):

                      raise Exception(line)

              elif state == 1:

                  # expect 'Working file' (only when using log instead of rlog)

                  match = re_10.match(line)

                  assert match, _('RCS file must be followed by working file')

        Patrick Mezard
    
convert: fix builtin cvsps under Windows...

              r7097
            
                  filename = util.normpath(match.group(1))

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
                  state = 2

              elif state == 2:

                  # expect 'symbolic names'

                  if re_20.match(line):

                      state = 3

              elif state == 3:

                  # read the symbolic names and store as tags

                  match = re_30.match(line)

                  if match:

                      rev = [int(x) for x in match.group(2).split('.')]

                      # Convert magic branch number to an odd-numbered one

                      revn = len(rev)

        Matt Mackall
    
cvsps: fix up some whitespace

              r6688
            
                      if revn > 3 and (revn % 2) == 0 and rev[-2] == 0:

                          rev = rev[:-2] + rev[-1:]

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
                      rev = tuple(rev)

                      if rev not in tags:

                          tags[rev] = []

                      tags[rev].append(match.group(1))

                  elif re_31.match(line):

                      state = 5

                  elif re_32.match(line):

                      state = 0

              elif state == 4:

                  # expecting '------' separator before first revision

                  if re_31.match(line):

                      state = 5

                  else:

                      assert not re_32.match(line), _('Must have at least some revisions')

              elif state == 5:

                  # expecting revision number and possibly (ignored) lock indication

                  # we create the logentry here from values stored in states 0 to 4,

                  # as this state is re-entered for subsequent revisions of a file.

                  match = re_50.match(line)

                  assert match, _('expected revision number')

                  e = logentry(rcs=scache(rcs), file=scache(filename),

                          revision=tuple([int(x) for x in match.group(1).split('.')]),

                          branches=[], parent=None)

                  state = 6

              elif state == 6:

                  # expecting date, author, state, lines changed

                  match = re_60.match(line)

                  assert match, _('revision must be followed by date line')

                  d = match.group(1)

                  if d[2] == '/':

                      # Y2K

        Matt Mackall
    
cvsps: fix up some whitespace

              r6688
            
                      d = '19' + d

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
                  if len(d.split()) != 3:

                      # cvs log dates always in GMT

        Matt Mackall
    
cvsps: fix up some whitespace

              r6688
            
                      d = d + ' UTC'

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
                  e.date = util.parsedate(d, ['%y/%m/%d %H:%M:%S', '%Y/%m/%d %H:%M:%S', '%Y-%m-%d %H:%M:%S'])

                  e.author = scache(match.group(2))

                  e.dead = match.group(3).lower() == 'dead'

                  if match.group(5):

                      if match.group(6):

                          e.lines = (int(match.group(5)), int(match.group(6)))

                      else:

                          e.lines = (int(match.group(5)), 0)

                  elif match.group(6):

                      e.lines = (0, int(match.group(6)))

                  else:

                      e.lines = None

                  e.comment = []

                  state = 7

              elif state == 7:

        Matt Mackall
    
cvsps: fix up some whitespace

              r6688
            
                  # read the revision numbers of branches that start at this revision

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
                  # or store the commit log message otherwise

                  m = re_70.match(line)

                  if m:

                      e.branches = [tuple([int(y) for y in x.strip().split('.')])

                                      for x in m.group(1).split(';')]

                      state = 8

                  elif re_31.match(line):

                      state = 5

                      store = True

                  elif re_32.match(line):

                      state = 0

                      store = True

                  else:

                      e.comment.append(line)

              elif state == 8:

                  # store commit log message

                  if re_31.match(line):

                      state = 5

                      store = True

                  elif re_32.match(line):

                      state = 0

                      store = True

                  else:

                      e.comment.append(line)

              if store:

                  # clean up the results and save in the log.

                  store = False

        Matt Mackall
    
util: add sort helper

              r6762
            
                  e.tags = util.sort([scache(x) for x in tags.get(e.revision, [])])

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
                  e.comment = scache('\n'.join(e.comment))

                  revn = len(e.revision)

        Matt Mackall
    
cvsps: fix up some whitespace

              r6688
            
                  if revn > 3 and (revn % 2) == 0:

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
                      e.branch = tags.get(e.revision[:-1], [None])[0]

                  else:

                      e.branch = None

                  log.append(e)

        Matt Mackall
    
cvsps: fix up some whitespace

              r6688
            
                  if len(log) % 100 == 0:

                      ui.status(util.ellipsis('%d %s' % (len(log), e.file), 80)+'\n')

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
          listsort(log, key=lambda x:(x.rcs, x.revision))

          # find parent revisions of individual files

          versions = {}

          for e in log:

              branch = e.revision[:-1]

              p = versions.get((e.rcs, branch), None)

              if p is None:

                  p = e.revision[:-2]

              e.parent = p

              versions[(e.rcs, branch)] = e.revision

          # update the log cache

          if cache:

              if log:

                  # join up the old and new logs

                  listsort(log, key=lambda x:x.date)

                  if oldlog and oldlog[-1].date >= log[0].date:

        Matt Mackall
    
cvsps: fix up some whitespace

              r6688
            
                      raise logerror('Log cache overlaps with new log entries,'

                                     ' re-run without cache.')

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
        Matt Mackall
    
cvsps: fix up some whitespace

              r6688
            
                  log = oldlog + log

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
                  # write the new cachefile

                  ui.note(_('writing cvs log cache %s\n') % cachefile)

                  pickle.dump(log, file(cachefile, 'w'))

              else:

                  log = oldlog

          ui.status(_('%d log entries\n') % len(log))

          return log

      class changeset(object):

          '''Class changeset has the following attributes:

              .author    - author name as CVS knows it

              .branch    - name of branch this changeset is on, or None

              .comment   - commit message

              .date      - the commit date as a (time,tz) tuple

              .entries   - list of logentry objects in this changeset

              .parents   - list of one or two parent changesets

              .tags      - list of tags on this changeset

          '''

          def __init__(self, **entries):

              self.__dict__.update(entries)

      def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None):

          '''Convert log into changesets.'''

          ui.status(_('creating changesets\n'))

          # Merge changesets

          listsort(log, key=lambda x:(x.comment, x.author, x.branch, x.date))

          changesets = []

          files = {}

          c = None

          for i, e in enumerate(log):

              # Check if log entry belongs to the current changeset or not.

              if not (c and

                        e.comment == c.comment and

                        e.author == c.author and

                        e.branch == c.branch and

        Matt Mackall
    
cvsps: fix up some whitespace

              r6688
            
                        ((c.date[0] + c.date[1]) <=

                         (e.date[0] + e.date[1]) <=

                         (c.date[0] + c.date[1]) + fuzz) and

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
                        e.file not in files):

                  c = changeset(comment=e.comment, author=e.author,

                                branch=e.branch, date=e.date, entries=[])

                  changesets.append(c)

                  files = {}

        Matt Mackall
    
cvsps: fix up some whitespace

              r6688
            
                  if len(changesets) % 100 == 0:

                      t = '%d %s' % (len(changesets), repr(e.comment)[1:-1])

                      ui.status(util.ellipsis(t, 80) + '\n')

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
              c.entries.append(e)

              files[e.file] = True

              c.date = e.date       # changeset date is date of latest commit in it

          # Sort files in each changeset

          for c in changesets:

              def pathcompare(l, r):

                  'Mimic cvsps sorting order'

                  l = l.split('/')

                  r = r.split('/')

                  nl = len(l)

                  nr = len(r)

                  n = min(nl, nr)

                  for i in range(n):

        Matt Mackall
    
cvsps: fix up some whitespace

              r6688
            
                      if i + 1 == nl and nl < nr:

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
                          return -1

        Matt Mackall
    
cvsps: fix up some whitespace

              r6688
            
                      elif i + 1 == nr and nl > nr:

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
                          return +1

        Matt Mackall
    
cvsps: fix up some whitespace

              r6688
            
                      elif l[i] < r[i]:

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
                          return -1

        Matt Mackall
    
cvsps: fix up some whitespace

              r6688
            
                      elif l[i] > r[i]:

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
                          return +1

                  return 0

              def entitycompare(l, r):

                  return pathcompare(l.file, r.file)

              c.entries.sort(entitycompare)

          # Sort changesets by date

          def cscmp(l, r):

        Matt Mackall
    
cvsps: fix up some whitespace

              r6688
            
              d = sum(l.date) - sum(r.date)

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
              if d:

                  return d

              # detect vendor branches and initial commits on a branch

              le = {}

              for e in l.entries:

                  le[e.rcs] = e.revision

              re = {}

              for e in r.entries:

                  re[e.rcs] = e.revision

              d = 0

              for e in l.entries:

                  if re.get(e.rcs, None) == e.parent:

                      assert not d

                      d = 1

                      break

              for e in r.entries:

                  if le.get(e.rcs, None) == e.parent:

                      assert not d

                      d = -1

                      break

              return d

          changesets.sort(cscmp)

          # Collect tags

          globaltags = {}

          for c in changesets:

              tags = {}

              for e in c.entries:

                  for tag in e.tags:

                      # remember which is the latest changeset to have this tag

                      globaltags[tag] = c

          for c in changesets:

              tags = {}

              for e in c.entries:

                  for tag in e.tags:

                      tags[tag] = True

              # remember tags only if this is the latest changeset to have it

        Matt Mackall
    
util: add sort helper

              r6762
            
              c.tags = util.sort([tag for tag in tags if globaltags[tag] is c])

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
          # Find parent changesets, handle {{mergetobranch BRANCHNAME}}

          # by inserting dummy changesets with two parents, and handle

          # {{mergefrombranch BRANCHNAME}} by setting two parents.

          if mergeto is None:

              mergeto = r'{{mergetobranch ([-\w]+)}}'

          if mergeto:

              mergeto = re.compile(mergeto)

          if mergefrom is None:

              mergefrom = r'{{mergefrombranch ([-\w]+)}}'

          if mergefrom:

              mergefrom = re.compile(mergefrom)

          versions = {}    # changeset index where we saw any particular file version

          branches = {}    # changeset index where we saw a branch

          n = len(changesets)

          i = 0

          while i<n:

              c = changesets[i]

              for f in c.entries:

                  versions[(f.rcs, f.revision)] = i

              p = None

              if c.branch in branches:

                  p = branches[c.branch]

              else:

                  for f in c.entries:

                      p = max(p, versions.get((f.rcs, f.parent), None))

              c.parents = []

              if p is not None:

                  c.parents.append(changesets[p])

              if mergefrom:

                  m = mergefrom.search(c.comment)

                  if m:

                      m = m.group(1)

                      if m == 'HEAD':

                          m = None

                      if m in branches and c.branch != m:

                          c.parents.append(changesets[branches[m]])

              if mergeto:

                  m = mergeto.search(c.comment)

                  if m:

                      try:

                          m = m.group(1)

                          if m == 'HEAD':

                              m = None

                      except:

                          m = None   # if no group found then merge to HEAD

                      if m in branches and c.branch != m:

                          # insert empty changeset for merge

                          cc = changeset(author=c.author, branch=m, date=c.date,

                                  comment='convert-repo: CVS merge from branch %s' % c.branch,

                                  entries=[], tags=[], parents=[changesets[branches[m]], c])

        Matt Mackall
    
cvsps: fix up some whitespace

              r6688
            
                          changesets.insert(i + 1, cc)

                          branches[m] = i + 1

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
                          # adjust our loop counters now we have inserted a new entry

                          n += 1

                          i += 2

                          continue

              branches[c.branch] = i

              i += 1

          # Number changesets

          for i, c in enumerate(changesets):

        Matt Mackall
    
cvsps: fix up some whitespace

              r6688
            
              c.id = i + 1

        Frank Kingswood
    
convert: cvsps.py - code to generate changesets from a CVS repository

              r6687
            
          ui.status(_('%d changeset entries\n') % len(changesets))

          return changesets

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687	#
		# Mercurial built-in replacement for cvsps.
		#
		# Copyright 2008, Frank Kingswood <frank@kingswood-consulting.co.uk>
		#
		# This software may be used and distributed according to the terms
		# of the GNU General Public License, incorporated herein by reference.

		import os
		import re
		import sys
		import cPickle as pickle
		from mercurial import util
		from mercurial.i18n import _

		def listsort(list, key):
		"helper to sort by key in Python 2.3"
		try:
		list.sort(key=key)
		except TypeError:
Matt Mackall cvsps: fix up some whitespace	r6688	list.sort(lambda l, r: cmp(key(l), key(r)))
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687
		class logentry(object):
		'''Class logentry has the following attributes:
		.author - author name as CVS knows it
		.branch - name of branch this revision is on
		.branches - revision tuple of branches starting at this revision
		.comment - commit message
		.date - the commit date as a (time, tz) tuple
		.dead - true if file revision is dead
		.file - Name of file
		.lines - a tuple (+lines, -lines) or None
		.parent - Previous revision of this entry
		.rcs - name of file as returned from CVS
		.revision - revision number as tuple
		.tags - list of tags on the file
		'''
		def __init__(self, **entries):
		self.__dict__.update(entries)

		class logerror(Exception):
		pass

Patrick Mezard convert: fix builtin cvsps under Windows...	r7097	def getrepopath(cvspath):
		"""Return the repository path from a CVS path.

		>>> getrepopath('/foo/bar')
		'/foo/bar'
		>>> getrepopath('c:/foo/bar')
		'c:/foo/bar'
		>>> getrepopath(':pserver:10/foo/bar')
		'/foo/bar'
		>>> getrepopath(':pserver:10c:/foo/bar')
		'/foo/bar'
		>>> getrepopath(':pserver:/foo/bar')
		'/foo/bar'
		>>> getrepopath(':pserver:c:/foo/bar')
		'c:/foo/bar'
		>>> getrepopath(':pserver:truc@foo.bar:/foo/bar')
		'/foo/bar'
		>>> getrepopath(':pserver:truc@foo.bar:c:/foo/bar')
		'c:/foo/bar'
		"""
		# According to CVS manual, CVS paths are expressed like:
		# [:method:][[user][:password]@]hostname[:[port]]/path/to/repository
		#
		# Unfortunately, Windows absolute paths start with a drive letter
		# like 'c:' making it harder to parse. Here we assume that drive
		# letters are only one character long and any CVS component before
		# the repository path is at least 2 characters long, and use this
		# to disambiguate.
		parts = cvspath.split(':')
		if len(parts) == 1:
		return parts[0]
		# Here there is an ambiguous case if we have a port number
		# immediately followed by a Windows driver letter. We assume this
		# never happens and decide it must be CVS path component,
		# therefore ignoring it.
		if len(parts[-2]) > 1:
		return parts[-1].lstrip('0123456789')
		return parts[-2] + ':' + parts[-1]

Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687	def createlog(ui, directory=None, root="", rlog=True, cache=None):
		'''Collect the CVS rlog'''

		# Because we store many duplicate commit log messages, reusing strings
		# saves a lot of memory and pickle storage space.
		_scache = {}
		def scache(s):
		"return a shared version of a string"
		return _scache.setdefault(s, s)

		ui.status(_('collecting CVS rlog\n'))

		log = [] # list of logentry objects containing the CVS state

		# patterns to match in CVS (r)log output, by state of use
		re_00 = re.compile('RCS file: (.+)$')
		re_01 = re.compile('cvs \\[r?log aborted\\]: (.+)$')
		re_02 = re.compile('cvs (r?log\|server): (.+)\n$')
		re_03 = re.compile("(Cannot access.+CVSROOT)\|(can't create temporary directory.+)$")
		re_10 = re.compile('Working file: (.+)$')
		re_20 = re.compile('symbolic names:')
		re_30 = re.compile('\t(.+): ([\\d.]+)$')
		re_31 = re.compile('----------------------------$')
		re_32 = re.compile('=============================================================================$')
		re_50 = re.compile('revision ([\\d.]+)(\s+locked by:\s+.+;)?$')
		re_60 = re.compile(r'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?')
		re_70 = re.compile('branches: (.+);$')

		prefix = '' # leading path to strip of what we get from CVS

		if directory is None:
		# Current working directory

		# Get the real directory in the repository
		try:
		prefix = file(os.path.join('CVS','Repository')).read().strip()
		if prefix == ".":
Matt Mackall cvsps: fix up some whitespace	r6688	prefix = ""
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687	directory = prefix
		except IOError:
		raise logerror('Not a CVS sandbox')

Patrick Mezard convert: fix builtin cvsps under Windows...	r7097	if prefix and not prefix.endswith(os.sep):
		prefix += os.sep
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687
		# Use the Root file in the sandbox, if it exists
		try:
		root = file(os.path.join('CVS','Root')).read().strip()
		except IOError:
		pass

		if not root:
		root = os.environ.get('CVSROOT', '')

		# read log cache if one exists
		oldlog = []
		date = None

		if cache:
		cachedir = os.path.expanduser('~/.hg.cvsps')
		if not os.path.exists(cachedir):
		os.mkdir(cachedir)

		# The cvsps cache pickle needs a uniquified name, based on the
		# repository location. The address may have all sort of nasties
		# in it, slashes, colons and such. So here we take just the
		# alphanumerics, concatenated in a way that does not mix up the
Dirkjan Ochtman cvsps: fix a final whitespace nit	r6696	# various components, so that
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687	# :pserver:user@server:/path
		# and
		# /pserver/user/server/path
		# are mapped to different cache file names.
Matt Mackall cvsps: fix up some whitespace	r6688	cachefile = root.split(":") + [directory, "cache"]
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687	cachefile = ['-'.join(re.findall(r'\w+', s)) for s in cachefile if s]
Matt Mackall cvsps: fix up some whitespace	r6688	cachefile = os.path.join(cachedir,
		'.'.join([s for s in cachefile if s]))
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687
		if cache == 'update':
		try:
		ui.note(_('reading cvs log cache %s\n') % cachefile)
		oldlog = pickle.load(file(cachefile))
		ui.note(_('cache has %d log entries\n') % len(oldlog))
		except Exception, e:
		ui.note(_('error reading cache: %r\n') % e)

		if oldlog:
		date = oldlog[-1].date # last commit date as a (time,tz) tuple
		date = util.datestr(date, '%Y/%m/%d %H:%M:%S %1%2')

		# build the CVS commandline
		cmd = ['cvs', '-q']
		if root:
		cmd.append('-d%s' % root)
Patrick Mezard convert: fix builtin cvsps under Windows...	r7097	p = util.normpath(getrepopath(root))
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687	if not p.endswith('/'):
Matt Mackall cvsps: fix up some whitespace	r6688	p += '/'
Patrick Mezard convert: fix builtin cvsps under Windows...	r7097	prefix = p + util.normpath(prefix)
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687	cmd.append(['log', 'rlog'][rlog])
		if date:
		# no space between option and date string
		cmd.append('-d>%s' % date)
		cmd.append(directory)

		# state machine begins here
		tags = {} # dictionary of revisions on current file with their tags
		state = 0
		store = False # set when a new record can be appended

		cmd = [util.shellquote(arg) for arg in cmd]
Martin Geisler i18n: mark strings for translation in convert extension	r6956	ui.note(_("running %s\n") % (' '.join(cmd)))
		ui.debug(_("prefix=%r directory=%r root=%r\n") % (prefix, directory, root))
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687
		for line in util.popen(' '.join(cmd)):
		if line.endswith('\n'):
		line = line[:-1]
		#ui.debug('state=%d line=%r\n' % (state, line))

		if state == 0:
		# initial state, consume input until we see 'RCS file'
		match = re_00.match(line)
		if match:
		rcs = match.group(1)
		tags = {}
		if rlog:
Patrick Mezard convert: fix builtin cvsps under Windows...	r7097	filename = util.normpath(rcs[:-2])
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687	if filename.startswith(prefix):
		filename = filename[len(prefix):]
		if filename.startswith('/'):
		filename = filename[1:]
		if filename.startswith('Attic/'):
		filename = filename[6:]
		else:
		filename = filename.replace('/Attic/', '/')
		state = 2
		continue
		state = 1
		continue
		match = re_01.match(line)
		if match:
		raise Exception(match.group(1))
		match = re_02.match(line)
		if match:
		raise Exception(match.group(2))
		if re_03.match(line):
		raise Exception(line)

		elif state == 1:
		# expect 'Working file' (only when using log instead of rlog)
		match = re_10.match(line)
		assert match, _('RCS file must be followed by working file')
Patrick Mezard convert: fix builtin cvsps under Windows...	r7097	filename = util.normpath(match.group(1))
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687	state = 2

		elif state == 2:
		# expect 'symbolic names'
		if re_20.match(line):
		state = 3

		elif state == 3:
		# read the symbolic names and store as tags
		match = re_30.match(line)
		if match:
		rev = [int(x) for x in match.group(2).split('.')]

		# Convert magic branch number to an odd-numbered one
		revn = len(rev)
Matt Mackall cvsps: fix up some whitespace	r6688	if revn > 3 and (revn % 2) == 0 and rev[-2] == 0:
		rev = rev[:-2] + rev[-1:]
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687	rev = tuple(rev)

		if rev not in tags:
		tags[rev] = []
		tags[rev].append(match.group(1))

		elif re_31.match(line):
		state = 5
		elif re_32.match(line):
		state = 0

		elif state == 4:
		# expecting '------' separator before first revision
		if re_31.match(line):
		state = 5
		else:
		assert not re_32.match(line), _('Must have at least some revisions')

		elif state == 5:
		# expecting revision number and possibly (ignored) lock indication
		# we create the logentry here from values stored in states 0 to 4,
		# as this state is re-entered for subsequent revisions of a file.
		match = re_50.match(line)
		assert match, _('expected revision number')
		e = logentry(rcs=scache(rcs), file=scache(filename),
		revision=tuple([int(x) for x in match.group(1).split('.')]),
		branches=[], parent=None)
		state = 6

		elif state == 6:
		# expecting date, author, state, lines changed
		match = re_60.match(line)
		assert match, _('revision must be followed by date line')
		d = match.group(1)
		if d[2] == '/':
		# Y2K
Matt Mackall cvsps: fix up some whitespace	r6688	d = '19' + d
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687
		if len(d.split()) != 3:
		# cvs log dates always in GMT
Matt Mackall cvsps: fix up some whitespace	r6688	d = d + ' UTC'
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687	e.date = util.parsedate(d, ['%y/%m/%d %H:%M:%S', '%Y/%m/%d %H:%M:%S', '%Y-%m-%d %H:%M:%S'])
		e.author = scache(match.group(2))
		e.dead = match.group(3).lower() == 'dead'

		if match.group(5):
		if match.group(6):
		e.lines = (int(match.group(5)), int(match.group(6)))
		else:
		e.lines = (int(match.group(5)), 0)
		elif match.group(6):
		e.lines = (0, int(match.group(6)))
		else:
		e.lines = None
		e.comment = []
		state = 7

		elif state == 7:
Matt Mackall cvsps: fix up some whitespace	r6688	# read the revision numbers of branches that start at this revision
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687	# or store the commit log message otherwise
		m = re_70.match(line)
		if m:
		e.branches = [tuple([int(y) for y in x.strip().split('.')])
		for x in m.group(1).split(';')]
		state = 8
		elif re_31.match(line):
		state = 5
		store = True
		elif re_32.match(line):
		state = 0
		store = True
		else:
		e.comment.append(line)

		elif state == 8:
		# store commit log message
		if re_31.match(line):
		state = 5
		store = True
		elif re_32.match(line):
		state = 0
		store = True
		else:
		e.comment.append(line)

		if store:
		# clean up the results and save in the log.
		store = False
Matt Mackall util: add sort helper	r6762	e.tags = util.sort([scache(x) for x in tags.get(e.revision, [])])
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687	e.comment = scache('\n'.join(e.comment))

		revn = len(e.revision)
Matt Mackall cvsps: fix up some whitespace	r6688	if revn > 3 and (revn % 2) == 0:
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687	e.branch = tags.get(e.revision[:-1], [None])[0]
		else:
		e.branch = None

		log.append(e)

Matt Mackall cvsps: fix up some whitespace	r6688	if len(log) % 100 == 0:
		ui.status(util.ellipsis('%d %s' % (len(log), e.file), 80)+'\n')
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687
		listsort(log, key=lambda x:(x.rcs, x.revision))

		# find parent revisions of individual files
		versions = {}
		for e in log:
		branch = e.revision[:-1]
		p = versions.get((e.rcs, branch), None)
		if p is None:
		p = e.revision[:-2]
		e.parent = p
		versions[(e.rcs, branch)] = e.revision

		# update the log cache
		if cache:
		if log:
		# join up the old and new logs
		listsort(log, key=lambda x:x.date)

		if oldlog and oldlog[-1].date >= log[0].date:
Matt Mackall cvsps: fix up some whitespace	r6688	raise logerror('Log cache overlaps with new log entries,'
		' re-run without cache.')
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687
Matt Mackall cvsps: fix up some whitespace	r6688	log = oldlog + log
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687
		# write the new cachefile
		ui.note(_('writing cvs log cache %s\n') % cachefile)
		pickle.dump(log, file(cachefile, 'w'))
		else:
		log = oldlog

		ui.status(_('%d log entries\n') % len(log))

		return log


		class changeset(object):
		'''Class changeset has the following attributes:
		.author - author name as CVS knows it
		.branch - name of branch this changeset is on, or None
		.comment - commit message
		.date - the commit date as a (time,tz) tuple
		.entries - list of logentry objects in this changeset
		.parents - list of one or two parent changesets
		.tags - list of tags on this changeset
		'''
		def __init__(self, **entries):
		self.__dict__.update(entries)

		def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None):
		'''Convert log into changesets.'''

		ui.status(_('creating changesets\n'))

		# Merge changesets

		listsort(log, key=lambda x:(x.comment, x.author, x.branch, x.date))

		changesets = []
		files = {}
		c = None
		for i, e in enumerate(log):

		# Check if log entry belongs to the current changeset or not.
		if not (c and
		e.comment == c.comment and
		e.author == c.author and
		e.branch == c.branch and
Matt Mackall cvsps: fix up some whitespace	r6688	((c.date[0] + c.date[1]) <=
		(e.date[0] + e.date[1]) <=
		(c.date[0] + c.date[1]) + fuzz) and
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687	e.file not in files):
		c = changeset(comment=e.comment, author=e.author,
		branch=e.branch, date=e.date, entries=[])
		changesets.append(c)
		files = {}
Matt Mackall cvsps: fix up some whitespace	r6688	if len(changesets) % 100 == 0:
		t = '%d %s' % (len(changesets), repr(e.comment)[1:-1])
		ui.status(util.ellipsis(t, 80) + '\n')
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687
		c.entries.append(e)
		files[e.file] = True
		c.date = e.date # changeset date is date of latest commit in it

		# Sort files in each changeset

		for c in changesets:
		def pathcompare(l, r):
		'Mimic cvsps sorting order'
		l = l.split('/')
		r = r.split('/')
		nl = len(l)
		nr = len(r)
		n = min(nl, nr)
		for i in range(n):
Matt Mackall cvsps: fix up some whitespace	r6688	if i + 1 == nl and nl < nr:
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687	return -1
Matt Mackall cvsps: fix up some whitespace	r6688	elif i + 1 == nr and nl > nr:
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687	return +1
Matt Mackall cvsps: fix up some whitespace	r6688	elif l[i] < r[i]:
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687	return -1
Matt Mackall cvsps: fix up some whitespace	r6688	elif l[i] > r[i]:
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687	return +1
		return 0
		def entitycompare(l, r):
		return pathcompare(l.file, r.file)

		c.entries.sort(entitycompare)

		# Sort changesets by date

		def cscmp(l, r):
Matt Mackall cvsps: fix up some whitespace	r6688	d = sum(l.date) - sum(r.date)
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687	if d:
		return d

		# detect vendor branches and initial commits on a branch
		le = {}
		for e in l.entries:
		le[e.rcs] = e.revision
		re = {}
		for e in r.entries:
		re[e.rcs] = e.revision

		d = 0
		for e in l.entries:
		if re.get(e.rcs, None) == e.parent:
		assert not d
		d = 1
		break

		for e in r.entries:
		if le.get(e.rcs, None) == e.parent:
		assert not d
		d = -1
		break

		return d

		changesets.sort(cscmp)

		# Collect tags

		globaltags = {}
		for c in changesets:
		tags = {}
		for e in c.entries:
		for tag in e.tags:
		# remember which is the latest changeset to have this tag
		globaltags[tag] = c

		for c in changesets:
		tags = {}
		for e in c.entries:
		for tag in e.tags:
		tags[tag] = True
		# remember tags only if this is the latest changeset to have it
Matt Mackall util: add sort helper	r6762	c.tags = util.sort([tag for tag in tags if globaltags[tag] is c])
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687
		# Find parent changesets, handle {{mergetobranch BRANCHNAME}}
		# by inserting dummy changesets with two parents, and handle
		# {{mergefrombranch BRANCHNAME}} by setting two parents.

		if mergeto is None:
		mergeto = r'{{mergetobranch ([-\w]+)}}'
		if mergeto:
		mergeto = re.compile(mergeto)

		if mergefrom is None:
		mergefrom = r'{{mergefrombranch ([-\w]+)}}'
		if mergefrom:
		mergefrom = re.compile(mergefrom)

		versions = {} # changeset index where we saw any particular file version
		branches = {} # changeset index where we saw a branch
		n = len(changesets)
		i = 0
		while i<n:
		c = changesets[i]

		for f in c.entries:
		versions[(f.rcs, f.revision)] = i

		p = None
		if c.branch in branches:
		p = branches[c.branch]
		else:
		for f in c.entries:
		p = max(p, versions.get((f.rcs, f.parent), None))

		c.parents = []
		if p is not None:
		c.parents.append(changesets[p])

		if mergefrom:
		m = mergefrom.search(c.comment)
		if m:
		m = m.group(1)
		if m == 'HEAD':
		m = None
		if m in branches and c.branch != m:
		c.parents.append(changesets[branches[m]])

		if mergeto:
		m = mergeto.search(c.comment)
		if m:
		try:
		m = m.group(1)
		if m == 'HEAD':
		m = None
		except:
		m = None # if no group found then merge to HEAD
		if m in branches and c.branch != m:
		# insert empty changeset for merge
		cc = changeset(author=c.author, branch=m, date=c.date,
		comment='convert-repo: CVS merge from branch %s' % c.branch,
		entries=[], tags=[], parents=[changesets[branches[m]], c])
Matt Mackall cvsps: fix up some whitespace	r6688	changesets.insert(i + 1, cc)
		branches[m] = i + 1
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687
		# adjust our loop counters now we have inserted a new entry
		n += 1
		i += 2
		continue

		branches[c.branch] = i
		i += 1

		# Number changesets

		for i, c in enumerate(changesets):
Matt Mackall cvsps: fix up some whitespace	r6688	c.id = i + 1
Frank Kingswood convert: cvsps.py - code to generate changesets from a CVS repository	r6687
		ui.status(_('%d changeset entries\n') % len(changesets))

		return changesets