upstream/mercurial-mirror Commit - r7097:d4218edd

convert: fix builtin cvsps under Windows...

Patrick Mezard -

r7097:d4218edd default

parent child

hgext/convert/cvsps.py

0 +45 -6

              #
              # Mercurial built-in replacement for cvsps.
              #
              # Copyright 2008, Frank Kingswood <frank@kingswood-consulting.co.uk>
              #
              # This software may be used and distributed according to the terms
              # of the GNU General Public License, incorporated herein by reference.
              import os
              import re
              import sys
              import cPickle as pickle
              from mercurial import util
              from mercurial.i18n import _
              def listsort(list, key):
                  "helper to sort by key in Python 2.3"
                  try:
                      list.sort(key=key)
                  except TypeError:
                      list.sort(lambda l, r: cmp(key(l), key(r)))
              class logentry(object):
                  '''Class logentry has the following attributes:
                      .author    - author name as CVS knows it
                      .branch    - name of branch this revision is on
                      .branches  - revision tuple of branches starting at this revision
                      .comment   - commit message
                      .date      - the commit date as a (time, tz) tuple
                      .dead      - true if file revision is dead
                      .file      - Name of file
                      .lines     - a tuple (+lines, -lines) or None
                      .parent    - Previous revision of this entry
                      .rcs       - name of file as returned from CVS
                      .revision  - revision number as tuple
                      .tags      - list of tags on the file
                  '''
                  def __init__(self, **entries):
                      self.__dict__.update(entries)
              class logerror(Exception):
                  pass
+             def getrepopath(cvspath):
+                 """Return the repository path from a CVS path.
+                 >>> getrepopath('/foo/bar')
+                 '/foo/bar'
+                 >>> getrepopath('c:/foo/bar')
+                 'c:/foo/bar'
+                 >>> getrepopath(':pserver:10/foo/bar')
+                 '/foo/bar'
+                 >>> getrepopath(':pserver:10c:/foo/bar')
+                 '/foo/bar'
+                 >>> getrepopath(':pserver:/foo/bar')
+                 '/foo/bar'
+                 >>> getrepopath(':pserver:c:/foo/bar')
+                 'c:/foo/bar'
+                 >>> getrepopath(':pserver:truc@foo.bar:/foo/bar')
+                 '/foo/bar'
+                 >>> getrepopath(':pserver:truc@foo.bar:c:/foo/bar')
+                 'c:/foo/bar'
+                 """
+                 # According to CVS manual, CVS paths are expressed like:
+                 # [:method:][[user][:password]@]hostname[:[port]]/path/to/repository
+                 #
+                 # Unfortunately, Windows absolute paths start with a drive letter
+                 # like 'c:' making it harder to parse. Here we assume that drive
+                 # letters are only one character long and any CVS component before
+                 # the repository path is at least 2 characters long, and use this
+                 # to disambiguate.
+                 parts = cvspath.split(':')
+                 if len(parts) == 1:
+                     return parts[0]
+                 # Here there is an ambiguous case if we have a port number
+                 # immediately followed by a Windows driver letter. We assume this
+                 # never happens and decide it must be CVS path component,
+                 # therefore ignoring it.
+                 if len(parts[-2]) > 1:
+                     return parts[-1].lstrip('0123456789')
+                 return parts[-2] + ':' + parts[-1]
              def createlog(ui, directory=None, root="", rlog=True, cache=None):
                  '''Collect the CVS rlog'''
                  # Because we store many duplicate commit log messages, reusing strings
                  # saves a lot of memory and pickle storage space.
                  _scache = {}
                  def scache(s):
                      "return a shared version of a string"
                      return _scache.setdefault(s, s)
                  ui.status(_('collecting CVS rlog\n'))
                  log = []      # list of logentry objects containing the CVS state
                  # patterns to match in CVS (r)log output, by state of use
                  re_00 = re.compile('RCS file: (.+)$')
                  re_01 = re.compile('cvs \\[r?log aborted\\]: (.+)$')
                  re_02 = re.compile('cvs (r?log|server): (.+)\n$')
                  re_03 = re.compile("(Cannot access.+CVSROOT)|(can't create temporary directory.+)$")
                  re_10 = re.compile('Working file: (.+)$')
                  re_20 = re.compile('symbolic names:')
                  re_30 = re.compile('\t(.+): ([\\d.]+)$')
                  re_31 = re.compile('----------------------------$')
                  re_32 = re.compile('=============================================================================$')
                  re_50 = re.compile('revision ([\\d.]+)(\s+locked by:\s+.+;)?$')
                  re_60 = re.compile(r'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?')
                  re_70 = re.compile('branches: (.+);$')
                  prefix = ''   # leading path to strip of what we get from CVS
                  if directory is None:
                      # Current working directory
                      # Get the real directory in the repository
                      try:
                          prefix = file(os.path.join('CVS','Repository')).read().strip()
                          if prefix == ".":
                              prefix = ""
                          directory = prefix
                      except IOError:
                          raise logerror('Not a CVS sandbox')
-                     if prefix and not prefix.endswith('/'):
-                         prefix += '/'
+                     if prefix and not prefix.endswith(os.sep):
+                         prefix += os.sep
                      # Use the Root file in the sandbox, if it exists
                      try:
                          root = file(os.path.join('CVS','Root')).read().strip()
                      except IOError:
                          pass
                  if not root:
                      root = os.environ.get('CVSROOT', '')
                  # read log cache if one exists
                  oldlog = []
                  date = None
                  if cache:
                      cachedir = os.path.expanduser('~/.hg.cvsps')
                      if not os.path.exists(cachedir):
                          os.mkdir(cachedir)
                      # The cvsps cache pickle needs a uniquified name, based on the
                      # repository location. The address may have all sort of nasties
                      # in it, slashes, colons and such. So here we take just the
                      # alphanumerics, concatenated in a way that does not mix up the
                      # various components, so that
                      #    :pserver:user@server:/path
                      # and
                      #    /pserver/user/server/path
                      # are mapped to different cache file names.
                      cachefile = root.split(":") + [directory, "cache"]
                      cachefile = ['-'.join(re.findall(r'\w+', s)) for s in cachefile if s]
                      cachefile = os.path.join(cachedir,
                                               '.'.join([s for s in cachefile if s]))
                  if cache == 'update':
                      try:
                          ui.note(_('reading cvs log cache %s\n') % cachefile)
                          oldlog = pickle.load(file(cachefile))
                          ui.note(_('cache has %d log entries\n') % len(oldlog))
                      except Exception, e:
                          ui.note(_('error reading cache: %r\n') % e)
                      if oldlog:
                          date = oldlog[-1].date    # last commit date as a (time,tz) tuple
                          date = util.datestr(date, '%Y/%m/%d %H:%M:%S %1%2')
                  # build the CVS commandline
                  cmd = ['cvs', '-q']
                  if root:
                      cmd.append('-d%s' % root)
-                     p = root.split(':')[-1]
+                     p = util.normpath(getrepopath(root))
                      if not p.endswith('/'):
                          p += '/'
-                     prefix = p + prefix
+                     prefix = p + util.normpath(prefix)
                  cmd.append(['log', 'rlog'][rlog])
                  if date:
                      # no space between option and date string
                      cmd.append('-d>%s' % date)
                  cmd.append(directory)
                  # state machine begins here
                  tags = {}     # dictionary of revisions on current file with their tags
                  state = 0
                  store = False # set when a new record can be appended
                  cmd = [util.shellquote(arg) for arg in cmd]
                  ui.note(_("running %s\n") % (' '.join(cmd)))
                  ui.debug(_("prefix=%r directory=%r root=%r\n") % (prefix, directory, root))
                  for line in util.popen(' '.join(cmd)):
                      if line.endswith('\n'):
                          line = line[:-1]
                      #ui.debug('state=%d line=%r\n' % (state, line))
                      if state == 0:
                          # initial state, consume input until we see 'RCS file'
                          match = re_00.match(line)
                          if match:
                              rcs = match.group(1)
                              tags = {}
                              if rlog:
-                                 filename = rcs[:-2]
+                                 filename = util.normpath(rcs[:-2])
                                  if filename.startswith(prefix):
                                      filename = filename[len(prefix):]
                                  if filename.startswith('/'):
                                      filename = filename[1:]
                                  if filename.startswith('Attic/'):
                                      filename = filename[6:]
                                  else:
                                      filename = filename.replace('/Attic/', '/')
                                  state = 2
                                  continue
                              state = 1
                              continue
                          match = re_01.match(line)
                          if match:
                              raise Exception(match.group(1))
                          match = re_02.match(line)
                          if match:
                              raise Exception(match.group(2))
                          if re_03.match(line):
                              raise Exception(line)
                      elif state == 1:
                          # expect 'Working file' (only when using log instead of rlog)
                          match = re_10.match(line)
                          assert match, _('RCS file must be followed by working file')
-                         filename = match.group(1)
+                         filename = util.normpath(match.group(1))
                          state = 2
                      elif state == 2:
                          # expect 'symbolic names'
                          if re_20.match(line):
                              state = 3
                      elif state == 3:
                          # read the symbolic names and store as tags
                          match = re_30.match(line)
                          if match:
                              rev = [int(x) for x in match.group(2).split('.')]
                              # Convert magic branch number to an odd-numbered one
                              revn = len(rev)
                              if revn > 3 and (revn % 2) == 0 and rev[-2] == 0:
                                  rev = rev[:-2] + rev[-1:]
                              rev = tuple(rev)
                              if rev not in tags:
                                  tags[rev] = []
                              tags[rev].append(match.group(1))
                          elif re_31.match(line):
                              state = 5
                          elif re_32.match(line):
                              state = 0
                      elif state == 4:
                          # expecting '------' separator before first revision
                          if re_31.match(line):
                              state = 5
                          else:
                              assert not re_32.match(line), _('Must have at least some revisions')
                      elif state == 5:
                          # expecting revision number and possibly (ignored) lock indication
                          # we create the logentry here from values stored in states 0 to 4,
                          # as this state is re-entered for subsequent revisions of a file.
                          match = re_50.match(line)
                          assert match, _('expected revision number')
                          e = logentry(rcs=scache(rcs), file=scache(filename),
                                  revision=tuple([int(x) for x in match.group(1).split('.')]),
                                  branches=[], parent=None)
                          state = 6
                      elif state == 6:
                          # expecting date, author, state, lines changed
                          match = re_60.match(line)
                          assert match, _('revision must be followed by date line')
                          d = match.group(1)
                          if d[2] == '/':
                              # Y2K
                              d = '19' + d
                          if len(d.split()) != 3:
                              # cvs log dates always in GMT
                              d = d + ' UTC'
                          e.date = util.parsedate(d, ['%y/%m/%d %H:%M:%S', '%Y/%m/%d %H:%M:%S', '%Y-%m-%d %H:%M:%S'])
                          e.author = scache(match.group(2))
                          e.dead = match.group(3).lower() == 'dead'
                          if match.group(5):
                              if match.group(6):
                                  e.lines = (int(match.group(5)), int(match.group(6)))
                              else:
                                  e.lines = (int(match.group(5)), 0)
                          elif match.group(6):
                              e.lines = (0, int(match.group(6)))
                          else:
                              e.lines = None
                          e.comment = []
                          state = 7
                      elif state == 7:
                          # read the revision numbers of branches that start at this revision
                          # or store the commit log message otherwise
                          m = re_70.match(line)
                          if m:
                              e.branches = [tuple([int(y) for y in x.strip().split('.')])
                                              for x in m.group(1).split(';')]
                              state = 8
                          elif re_31.match(line):
                              state = 5
                              store = True
                          elif re_32.match(line):
                              state = 0
                              store = True
                          else:
                              e.comment.append(line)
                      elif state == 8:
                          # store commit log message
                          if re_31.match(line):
                              state = 5
                              store = True
                          elif re_32.match(line):
                              state = 0
                              store = True
                          else:
                              e.comment.append(line)
                      if store:
                          # clean up the results and save in the log.
                          store = False
                          e.tags = util.sort([scache(x) for x in tags.get(e.revision, [])])
                          e.comment = scache('\n'.join(e.comment))
                          revn = len(e.revision)
                          if revn > 3 and (revn % 2) == 0:
                              e.branch = tags.get(e.revision[:-1], [None])[0]
                          else:
                              e.branch = None
                          log.append(e)
                          if len(log) % 100 == 0:
                              ui.status(util.ellipsis('%d %s' % (len(log), e.file), 80)+'\n')
                  listsort(log, key=lambda x:(x.rcs, x.revision))
                  # find parent revisions of individual files
                  versions = {}
                  for e in log:
                      branch = e.revision[:-1]
                      p = versions.get((e.rcs, branch), None)
                      if p is None:
                          p = e.revision[:-2]
                      e.parent = p
                      versions[(e.rcs, branch)] = e.revision
                  # update the log cache
                  if cache:
                      if log:
                          # join up the old and new logs
                          listsort(log, key=lambda x:x.date)
                          if oldlog and oldlog[-1].date >= log[0].date:
                              raise logerror('Log cache overlaps with new log entries,'
                                             ' re-run without cache.')
                          log = oldlog + log
                          # write the new cachefile
                          ui.note(_('writing cvs log cache %s\n') % cachefile)
                          pickle.dump(log, file(cachefile, 'w'))
                      else:
                          log = oldlog
                  ui.status(_('%d log entries\n') % len(log))
                  return log
              class changeset(object):
                  '''Class changeset has the following attributes:
                      .author    - author name as CVS knows it
                      .branch    - name of branch this changeset is on, or None
                      .comment   - commit message
                      .date      - the commit date as a (time,tz) tuple
                      .entries   - list of logentry objects in this changeset
                      .parents   - list of one or two parent changesets
                      .tags      - list of tags on this changeset
                  '''
                  def __init__(self, **entries):
                      self.__dict__.update(entries)
              def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None):
                  '''Convert log into changesets.'''
                  ui.status(_('creating changesets\n'))
                  # Merge changesets
                  listsort(log, key=lambda x:(x.comment, x.author, x.branch, x.date))
                  changesets = []
                  files = {}
                  c = None
                  for i, e in enumerate(log):
                      # Check if log entry belongs to the current changeset or not.
                      if not (c and
                                e.comment == c.comment and
                                e.author == c.author and
                                e.branch == c.branch and
                                ((c.date[0] + c.date[1]) <=
                                 (e.date[0] + e.date[1]) <=
                                 (c.date[0] + c.date[1]) + fuzz) and
                                e.file not in files):
                          c = changeset(comment=e.comment, author=e.author,
                                        branch=e.branch, date=e.date, entries=[])
                          changesets.append(c)
                          files = {}
                          if len(changesets) % 100 == 0:
                              t = '%d %s' % (len(changesets), repr(e.comment)[1:-1])
                              ui.status(util.ellipsis(t, 80) + '\n')
                      c.entries.append(e)
                      files[e.file] = True
                      c.date = e.date       # changeset date is date of latest commit in it
                  # Sort files in each changeset
                  for c in changesets:
                      def pathcompare(l, r):
                          'Mimic cvsps sorting order'
                          l = l.split('/')
                          r = r.split('/')
                          nl = len(l)
                          nr = len(r)
                          n = min(nl, nr)
                          for i in range(n):
                              if i + 1 == nl and nl < nr:
                                  return -1
                              elif i + 1 == nr and nl > nr:
                                  return +1
                              elif l[i] < r[i]:
                                  return -1
                              elif l[i] > r[i]:
                                  return +1
                          return 0
                      def entitycompare(l, r):
                          return pathcompare(l.file, r.file)
                      c.entries.sort(entitycompare)
                  # Sort changesets by date
                  def cscmp(l, r):
                      d = sum(l.date) - sum(r.date)
                      if d:
                          return d
                      # detect vendor branches and initial commits on a branch
                      le = {}
                      for e in l.entries:
                          le[e.rcs] = e.revision
                      re = {}
                      for e in r.entries:
                          re[e.rcs] = e.revision
                      d = 0
                      for e in l.entries:
                          if re.get(e.rcs, None) == e.parent:
                              assert not d
                              d = 1
                              break
                      for e in r.entries:
                          if le.get(e.rcs, None) == e.parent:
                              assert not d
                              d = -1
                              break
                      return d
                  changesets.sort(cscmp)
                  # Collect tags
                  globaltags = {}
                  for c in changesets:
                      tags = {}
                      for e in c.entries:
                          for tag in e.tags:
                              # remember which is the latest changeset to have this tag
                              globaltags[tag] = c
                  for c in changesets:
                      tags = {}
                      for e in c.entries:
                          for tag in e.tags:
                              tags[tag] = True
                      # remember tags only if this is the latest changeset to have it
                      c.tags = util.sort([tag for tag in tags if globaltags[tag] is c])
                  # Find parent changesets, handle {{mergetobranch BRANCHNAME}}
                  # by inserting dummy changesets with two parents, and handle
                  # {{mergefrombranch BRANCHNAME}} by setting two parents.
                  if mergeto is None:
                      mergeto = r'{{mergetobranch ([-\w]+)}}'
                  if mergeto:
                      mergeto = re.compile(mergeto)
                  if mergefrom is None:
                      mergefrom = r'{{mergefrombranch ([-\w]+)}}'
                  if mergefrom:
                      mergefrom = re.compile(mergefrom)
                  versions = {}    # changeset index where we saw any particular file version
                  branches = {}    # changeset index where we saw a branch
                  n = len(changesets)
                  i = 0
                  while i<n:
                      c = changesets[i]
                      for f in c.entries:
                          versions[(f.rcs, f.revision)] = i
                      p = None
                      if c.branch in branches:
                          p = branches[c.branch]
                      else:
                          for f in c.entries:
                              p = max(p, versions.get((f.rcs, f.parent), None))
                      c.parents = []
                      if p is not None:
                          c.parents.append(changesets[p])
                      if mergefrom:
                          m = mergefrom.search(c.comment)
                          if m:
                              m = m.group(1)
                              if m == 'HEAD':
                                  m = None
                              if m in branches and c.branch != m:
                                  c.parents.append(changesets[branches[m]])
                      if mergeto:
                          m = mergeto.search(c.comment)
                          if m:
                              try:
                                  m = m.group(1)
                                  if m == 'HEAD':
                                      m = None
                              except:
                                  m = None   # if no group found then merge to HEAD
                              if m in branches and c.branch != m:
                                  # insert empty changeset for merge
                                  cc = changeset(author=c.author, branch=m, date=c.date,
                                          comment='convert-repo: CVS merge from branch %s' % c.branch,
                                          entries=[], tags=[], parents=[changesets[branches[m]], c])
                                  changesets.insert(i + 1, cc)
                                  branches[m] = i + 1
                                  # adjust our loop counters now we have inserted a new entry
                                  n += 1
                                  i += 2
                                  continue
                      branches[c.branch] = i
                      i += 1
                  # Number changesets
                  for i, c in enumerate(changesets):
                      c.id = i + 1
                  ui.status(_('%d changeset entries\n') % len(changesets))
                  return changesets

tests/test-doctest.py

0 +3 0

              # this is hack to make sure no escape characters are inserted into the output
              import os;
              if 'TERM' in os.environ:
                  del os.environ['TERM']
              import doctest
              import mercurial.changelog
              # test doctest from changelog
              doctest.testmod(mercurial.changelog)
              import mercurial.httprepo
              doctest.testmod(mercurial.httprepo)
              import mercurial.util
              doctest.testmod(mercurial.util)
+             import hgext.convert.cvsps
+             doctest.testmod(hgext.convert.cvsps)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages