##// END OF EJS Templates
convert: use set instead of dict
Benoit Boissinot -
r8456:e9e2a2c9 default
parent child Browse files
Show More
@@ -1,351 +1,351
1 1 # convcmd - convert extension commands definition
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2, incorporated herein by reference.
7 7
8 8 from common import NoRepo, MissingTool, SKIPREV, mapfile
9 9 from cvs import convert_cvs
10 10 from darcs import darcs_source
11 11 from git import convert_git
12 12 from hg import mercurial_source, mercurial_sink
13 13 from subversion import svn_source, svn_sink
14 14 from monotone import monotone_source
15 15 from gnuarch import gnuarch_source
16 16 from bzr import bzr_source
17 17 from p4 import p4_source
18 18 import filemap
19 19
20 20 import os, shutil
21 21 from mercurial import hg, util, encoding
22 22 from mercurial.i18n import _
23 23
24 24 orig_encoding = 'ascii'
25 25
26 26 def recode(s):
27 27 if isinstance(s, unicode):
28 28 return s.encode(orig_encoding, 'replace')
29 29 else:
30 30 return s.decode('utf-8').encode(orig_encoding, 'replace')
31 31
32 32 source_converters = [
33 33 ('cvs', convert_cvs),
34 34 ('git', convert_git),
35 35 ('svn', svn_source),
36 36 ('hg', mercurial_source),
37 37 ('darcs', darcs_source),
38 38 ('mtn', monotone_source),
39 39 ('gnuarch', gnuarch_source),
40 40 ('bzr', bzr_source),
41 41 ('p4', p4_source),
42 42 ]
43 43
44 44 sink_converters = [
45 45 ('hg', mercurial_sink),
46 46 ('svn', svn_sink),
47 47 ]
48 48
49 49 def convertsource(ui, path, type, rev):
50 50 exceptions = []
51 51 for name, source in source_converters:
52 52 try:
53 53 if not type or name == type:
54 54 return source(ui, path, rev)
55 55 except (NoRepo, MissingTool), inst:
56 56 exceptions.append(inst)
57 57 if not ui.quiet:
58 58 for inst in exceptions:
59 59 ui.write("%s\n" % inst)
60 60 raise util.Abort(_('%s: missing or unsupported repository') % path)
61 61
62 62 def convertsink(ui, path, type):
63 63 for name, sink in sink_converters:
64 64 try:
65 65 if not type or name == type:
66 66 return sink(ui, path)
67 67 except NoRepo, inst:
68 68 ui.note(_("convert: %s\n") % inst)
69 69 raise util.Abort(_('%s: unknown repository type') % path)
70 70
71 71 class converter(object):
72 72 def __init__(self, ui, source, dest, revmapfile, opts):
73 73
74 74 self.source = source
75 75 self.dest = dest
76 76 self.ui = ui
77 77 self.opts = opts
78 78 self.commitcache = {}
79 79 self.authors = {}
80 80 self.authorfile = None
81 81
82 82 # Record converted revisions persistently: maps source revision
83 83 # ID to target revision ID (both strings). (This is how
84 84 # incremental conversions work.)
85 85 self.map = mapfile(ui, revmapfile)
86 86
87 87 # Read first the dst author map if any
88 88 authorfile = self.dest.authorfile()
89 89 if authorfile and os.path.exists(authorfile):
90 90 self.readauthormap(authorfile)
91 91 # Extend/Override with new author map if necessary
92 92 if opts.get('authors'):
93 93 self.readauthormap(opts.get('authors'))
94 94 self.authorfile = self.dest.authorfile()
95 95
96 96 self.splicemap = mapfile(ui, opts.get('splicemap'))
97 97 self.branchmap = mapfile(ui, opts.get('branchmap'))
98 98
99 99 def walktree(self, heads):
100 100 '''Return a mapping that identifies the uncommitted parents of every
101 101 uncommitted changeset.'''
102 102 visit = heads
103 known = {}
103 known = set()
104 104 parents = {}
105 105 while visit:
106 106 n = visit.pop(0)
107 107 if n in known or n in self.map: continue
108 known[n] = 1
108 known.add(n)
109 109 commit = self.cachecommit(n)
110 110 parents[n] = []
111 111 for p in commit.parents:
112 112 parents[n].append(p)
113 113 visit.append(p)
114 114
115 115 return parents
116 116
117 117 def toposort(self, parents):
118 118 '''Return an ordering such that every uncommitted changeset is
119 119 preceeded by all its uncommitted ancestors.'''
120 120 visit = parents.keys()
121 seen = {}
121 seen = set()
122 122 children = {}
123 123 actives = []
124 124
125 125 while visit:
126 126 n = visit.pop(0)
127 127 if n in seen: continue
128 seen[n] = 1
128 seen.add(n)
129 129 # Ensure that nodes without parents are present in the 'children'
130 130 # mapping.
131 131 children.setdefault(n, [])
132 132 hasparent = False
133 133 for p in parents[n]:
134 134 if not p in self.map:
135 135 visit.append(p)
136 136 hasparent = True
137 137 children.setdefault(p, []).append(n)
138 138 if not hasparent:
139 139 actives.append(n)
140 140
141 141 del seen
142 142 del visit
143 143
144 144 if self.opts.get('datesort'):
145 145 dates = {}
146 146 def getdate(n):
147 147 if n not in dates:
148 148 dates[n] = util.parsedate(self.commitcache[n].date)
149 149 return dates[n]
150 150
151 151 def picknext(nodes):
152 152 return min([(getdate(n), n) for n in nodes])[1]
153 153 else:
154 154 prev = [None]
155 155 def picknext(nodes):
156 156 # Return the first eligible child of the previously converted
157 157 # revision, or any of them.
158 158 next = nodes[0]
159 159 for n in nodes:
160 160 if prev[0] in parents[n]:
161 161 next = n
162 162 break
163 163 prev[0] = next
164 164 return next
165 165
166 166 s = []
167 167 pendings = {}
168 168 while actives:
169 169 n = picknext(actives)
170 170 actives.remove(n)
171 171 s.append(n)
172 172
173 173 # Update dependents list
174 174 for c in children.get(n, []):
175 175 if c not in pendings:
176 176 pendings[c] = [p for p in parents[c] if p not in self.map]
177 177 try:
178 178 pendings[c].remove(n)
179 179 except ValueError:
180 180 raise util.Abort(_('cycle detected between %s and %s')
181 181 % (recode(c), recode(n)))
182 182 if not pendings[c]:
183 183 # Parents are converted, node is eligible
184 184 actives.insert(0, c)
185 185 pendings[c] = None
186 186
187 187 if len(s) != len(parents):
188 188 raise util.Abort(_("not all revisions were sorted"))
189 189
190 190 return s
191 191
192 192 def writeauthormap(self):
193 193 authorfile = self.authorfile
194 194 if authorfile:
195 195 self.ui.status(_('Writing author map file %s\n') % authorfile)
196 196 ofile = open(authorfile, 'w+')
197 197 for author in self.authors:
198 198 ofile.write("%s=%s\n" % (author, self.authors[author]))
199 199 ofile.close()
200 200
201 201 def readauthormap(self, authorfile):
202 202 afile = open(authorfile, 'r')
203 203 for line in afile:
204 204
205 205 line = line.strip()
206 206 if not line or line.startswith('#'):
207 207 continue
208 208
209 209 try:
210 210 srcauthor, dstauthor = line.split('=', 1)
211 211 except ValueError:
212 212 msg = _('Ignoring bad line in author map file %s: %s\n')
213 213 self.ui.warn(msg % (authorfile, line.rstrip()))
214 214 continue
215 215
216 216 srcauthor = srcauthor.strip()
217 217 dstauthor = dstauthor.strip()
218 218 if self.authors.get(srcauthor) in (None, dstauthor):
219 219 msg = _('mapping author %s to %s\n')
220 220 self.ui.debug(msg % (srcauthor, dstauthor))
221 221 self.authors[srcauthor] = dstauthor
222 222 continue
223 223
224 224 m = _('overriding mapping for author %s, was %s, will be %s\n')
225 225 self.ui.status(m % (srcauthor, self.authors[srcauthor], dstauthor))
226 226
227 227 afile.close()
228 228
229 229 def cachecommit(self, rev):
230 230 commit = self.source.getcommit(rev)
231 231 commit.author = self.authors.get(commit.author, commit.author)
232 232 commit.branch = self.branchmap.get(commit.branch, commit.branch)
233 233 self.commitcache[rev] = commit
234 234 return commit
235 235
236 236 def copy(self, rev):
237 237 commit = self.commitcache[rev]
238 238
239 239 changes = self.source.getchanges(rev)
240 240 if isinstance(changes, basestring):
241 241 if changes == SKIPREV:
242 242 dest = SKIPREV
243 243 else:
244 244 dest = self.map[changes]
245 245 self.map[rev] = dest
246 246 return
247 247 files, copies = changes
248 248 pbranches = []
249 249 if commit.parents:
250 250 for prev in commit.parents:
251 251 if prev not in self.commitcache:
252 252 self.cachecommit(prev)
253 253 pbranches.append((self.map[prev],
254 254 self.commitcache[prev].branch))
255 255 self.dest.setbranch(commit.branch, pbranches)
256 256 try:
257 257 parents = self.splicemap[rev].replace(',', ' ').split()
258 258 self.ui.status(_('spliced in %s as parents of %s\n') %
259 259 (parents, rev))
260 260 parents = [self.map.get(p, p) for p in parents]
261 261 except KeyError:
262 262 parents = [b[0] for b in pbranches]
263 263 newnode = self.dest.putcommit(files, copies, parents, commit, self.source)
264 264 self.source.converted(rev, newnode)
265 265 self.map[rev] = newnode
266 266
267 267 def convert(self):
268 268
269 269 try:
270 270 self.source.before()
271 271 self.dest.before()
272 272 self.source.setrevmap(self.map)
273 273 self.ui.status(_("scanning source...\n"))
274 274 heads = self.source.getheads()
275 275 parents = self.walktree(heads)
276 276 self.ui.status(_("sorting...\n"))
277 277 t = self.toposort(parents)
278 278 num = len(t)
279 279 c = None
280 280
281 281 self.ui.status(_("converting...\n"))
282 282 for c in t:
283 283 num -= 1
284 284 desc = self.commitcache[c].desc
285 285 if "\n" in desc:
286 286 desc = desc.splitlines()[0]
287 287 # convert log message to local encoding without using
288 288 # tolocal() because encoding.encoding conver() use it as
289 289 # 'utf-8'
290 290 self.ui.status("%d %s\n" % (num, recode(desc)))
291 291 self.ui.note(_("source: %s\n") % recode(c))
292 292 self.copy(c)
293 293
294 294 tags = self.source.gettags()
295 295 ctags = {}
296 296 for k in tags:
297 297 v = tags[k]
298 298 if self.map.get(v, SKIPREV) != SKIPREV:
299 299 ctags[k] = self.map[v]
300 300
301 301 if c and ctags:
302 302 nrev = self.dest.puttags(ctags)
303 303 # write another hash correspondence to override the previous
304 304 # one so we don't end up with extra tag heads
305 305 if nrev:
306 306 self.map[c] = nrev
307 307
308 308 self.writeauthormap()
309 309 finally:
310 310 self.cleanup()
311 311
312 312 def cleanup(self):
313 313 try:
314 314 self.dest.after()
315 315 finally:
316 316 self.source.after()
317 317 self.map.close()
318 318
319 319 def convert(ui, src, dest=None, revmapfile=None, **opts):
320 320 global orig_encoding
321 321 orig_encoding = encoding.encoding
322 322 encoding.encoding = 'UTF-8'
323 323
324 324 if not dest:
325 325 dest = hg.defaultdest(src) + "-hg"
326 326 ui.status(_("assuming destination %s\n") % dest)
327 327
328 328 destc = convertsink(ui, dest, opts.get('dest_type'))
329 329
330 330 try:
331 331 srcc = convertsource(ui, src, opts.get('source_type'),
332 332 opts.get('rev'))
333 333 except Exception:
334 334 for path in destc.created:
335 335 shutil.rmtree(path, True)
336 336 raise
337 337
338 338 fmap = opts.get('filemap')
339 339 if fmap:
340 340 srcc = filemap.filemap_source(ui, srcc, fmap)
341 341 destc.setfilemapmode(True)
342 342
343 343 if not revmapfile:
344 344 try:
345 345 revmapfile = destc.revmapfile()
346 346 except:
347 347 revmapfile = os.path.join(destc, "map")
348 348
349 349 c = converter(ui, srcc, destc, revmapfile, opts)
350 350 c.convert()
351 351
@@ -1,782 +1,781
1 1 #
2 2 # Mercurial built-in replacement for cvsps.
3 3 #
4 4 # Copyright 2008, Frank Kingswood <frank@kingswood-consulting.co.uk>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2, incorporated herein by reference.
8 8
9 9 import os
10 10 import re
11 11 import cPickle as pickle
12 12 from mercurial import util
13 13 from mercurial.i18n import _
14 14
15 15 def listsort(list, key):
16 16 "helper to sort by key in Python 2.3"
17 17 try:
18 18 list.sort(key=key)
19 19 except TypeError:
20 20 list.sort(lambda l, r: cmp(key(l), key(r)))
21 21
22 22 class logentry(object):
23 23 '''Class logentry has the following attributes:
24 24 .author - author name as CVS knows it
25 25 .branch - name of branch this revision is on
26 26 .branches - revision tuple of branches starting at this revision
27 27 .comment - commit message
28 28 .date - the commit date as a (time, tz) tuple
29 29 .dead - true if file revision is dead
30 30 .file - Name of file
31 31 .lines - a tuple (+lines, -lines) or None
32 32 .parent - Previous revision of this entry
33 33 .rcs - name of file as returned from CVS
34 34 .revision - revision number as tuple
35 35 .tags - list of tags on the file
36 36 .synthetic - is this a synthetic "file ... added on ..." revision?
37 37 .mergepoint- the branch that has been merged from (if present in rlog output)
38 38 '''
39 39 def __init__(self, **entries):
40 40 self.__dict__.update(entries)
41 41
42 42 def __repr__(self):
43 43 return "<%s at 0x%x: %s %s>" % (self.__class__.__name__,
44 44 id(self),
45 45 self.file,
46 46 ".".join(map(str, self.revision)))
47 47
48 48 class logerror(Exception):
49 49 pass
50 50
51 51 def getrepopath(cvspath):
52 52 """Return the repository path from a CVS path.
53 53
54 54 >>> getrepopath('/foo/bar')
55 55 '/foo/bar'
56 56 >>> getrepopath('c:/foo/bar')
57 57 'c:/foo/bar'
58 58 >>> getrepopath(':pserver:10/foo/bar')
59 59 '/foo/bar'
60 60 >>> getrepopath(':pserver:10c:/foo/bar')
61 61 '/foo/bar'
62 62 >>> getrepopath(':pserver:/foo/bar')
63 63 '/foo/bar'
64 64 >>> getrepopath(':pserver:c:/foo/bar')
65 65 'c:/foo/bar'
66 66 >>> getrepopath(':pserver:truc@foo.bar:/foo/bar')
67 67 '/foo/bar'
68 68 >>> getrepopath(':pserver:truc@foo.bar:c:/foo/bar')
69 69 'c:/foo/bar'
70 70 """
71 71 # According to CVS manual, CVS paths are expressed like:
72 72 # [:method:][[user][:password]@]hostname[:[port]]/path/to/repository
73 73 #
74 74 # Unfortunately, Windows absolute paths start with a drive letter
75 75 # like 'c:' making it harder to parse. Here we assume that drive
76 76 # letters are only one character long and any CVS component before
77 77 # the repository path is at least 2 characters long, and use this
78 78 # to disambiguate.
79 79 parts = cvspath.split(':')
80 80 if len(parts) == 1:
81 81 return parts[0]
82 82 # Here there is an ambiguous case if we have a port number
83 83 # immediately followed by a Windows driver letter. We assume this
84 84 # never happens and decide it must be CVS path component,
85 85 # therefore ignoring it.
86 86 if len(parts[-2]) > 1:
87 87 return parts[-1].lstrip('0123456789')
88 88 return parts[-2] + ':' + parts[-1]
89 89
90 90 def createlog(ui, directory=None, root="", rlog=True, cache=None):
91 91 '''Collect the CVS rlog'''
92 92
93 93 # Because we store many duplicate commit log messages, reusing strings
94 94 # saves a lot of memory and pickle storage space.
95 95 _scache = {}
96 96 def scache(s):
97 97 "return a shared version of a string"
98 98 return _scache.setdefault(s, s)
99 99
100 100 ui.status(_('collecting CVS rlog\n'))
101 101
102 102 log = [] # list of logentry objects containing the CVS state
103 103
104 104 # patterns to match in CVS (r)log output, by state of use
105 105 re_00 = re.compile('RCS file: (.+)$')
106 106 re_01 = re.compile('cvs \\[r?log aborted\\]: (.+)$')
107 107 re_02 = re.compile('cvs (r?log|server): (.+)\n$')
108 108 re_03 = re.compile("(Cannot access.+CVSROOT)|(can't create temporary directory.+)$")
109 109 re_10 = re.compile('Working file: (.+)$')
110 110 re_20 = re.compile('symbolic names:')
111 111 re_30 = re.compile('\t(.+): ([\\d.]+)$')
112 112 re_31 = re.compile('----------------------------$')
113 113 re_32 = re.compile('=============================================================================$')
114 114 re_50 = re.compile('revision ([\\d.]+)(\s+locked by:\s+.+;)?$')
115 115 re_60 = re.compile(r'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?(.*mergepoint:\s+([^;]+);)?')
116 116 re_70 = re.compile('branches: (.+);$')
117 117
118 118 file_added_re = re.compile(r'file [^/]+ was (initially )?added on branch')
119 119
120 120 prefix = '' # leading path to strip of what we get from CVS
121 121
122 122 if directory is None:
123 123 # Current working directory
124 124
125 125 # Get the real directory in the repository
126 126 try:
127 127 prefix = file(os.path.join('CVS','Repository')).read().strip()
128 128 if prefix == ".":
129 129 prefix = ""
130 130 directory = prefix
131 131 except IOError:
132 132 raise logerror('Not a CVS sandbox')
133 133
134 134 if prefix and not prefix.endswith(os.sep):
135 135 prefix += os.sep
136 136
137 137 # Use the Root file in the sandbox, if it exists
138 138 try:
139 139 root = file(os.path.join('CVS','Root')).read().strip()
140 140 except IOError:
141 141 pass
142 142
143 143 if not root:
144 144 root = os.environ.get('CVSROOT', '')
145 145
146 146 # read log cache if one exists
147 147 oldlog = []
148 148 date = None
149 149
150 150 if cache:
151 151 cachedir = os.path.expanduser('~/.hg.cvsps')
152 152 if not os.path.exists(cachedir):
153 153 os.mkdir(cachedir)
154 154
155 155 # The cvsps cache pickle needs a uniquified name, based on the
156 156 # repository location. The address may have all sort of nasties
157 157 # in it, slashes, colons and such. So here we take just the
158 158 # alphanumerics, concatenated in a way that does not mix up the
159 159 # various components, so that
160 160 # :pserver:user@server:/path
161 161 # and
162 162 # /pserver/user/server/path
163 163 # are mapped to different cache file names.
164 164 cachefile = root.split(":") + [directory, "cache"]
165 165 cachefile = ['-'.join(re.findall(r'\w+', s)) for s in cachefile if s]
166 166 cachefile = os.path.join(cachedir,
167 167 '.'.join([s for s in cachefile if s]))
168 168
169 169 if cache == 'update':
170 170 try:
171 171 ui.note(_('reading cvs log cache %s\n') % cachefile)
172 172 oldlog = pickle.load(file(cachefile))
173 173 ui.note(_('cache has %d log entries\n') % len(oldlog))
174 174 except Exception, e:
175 175 ui.note(_('error reading cache: %r\n') % e)
176 176
177 177 if oldlog:
178 178 date = oldlog[-1].date # last commit date as a (time,tz) tuple
179 179 date = util.datestr(date, '%Y/%m/%d %H:%M:%S %1%2')
180 180
181 181 # build the CVS commandline
182 182 cmd = ['cvs', '-q']
183 183 if root:
184 184 cmd.append('-d%s' % root)
185 185 p = util.normpath(getrepopath(root))
186 186 if not p.endswith('/'):
187 187 p += '/'
188 188 prefix = p + util.normpath(prefix)
189 189 cmd.append(['log', 'rlog'][rlog])
190 190 if date:
191 191 # no space between option and date string
192 192 cmd.append('-d>%s' % date)
193 193 cmd.append(directory)
194 194
195 195 # state machine begins here
196 196 tags = {} # dictionary of revisions on current file with their tags
197 197 branchmap = {} # mapping between branch names and revision numbers
198 198 state = 0
199 199 store = False # set when a new record can be appended
200 200
201 201 cmd = [util.shellquote(arg) for arg in cmd]
202 202 ui.note(_("running %s\n") % (' '.join(cmd)))
203 203 ui.debug(_("prefix=%r directory=%r root=%r\n") % (prefix, directory, root))
204 204
205 205 pfp = util.popen(' '.join(cmd))
206 206 peek = pfp.readline()
207 207 while True:
208 208 line = peek
209 209 if line == '':
210 210 break
211 211 peek = pfp.readline()
212 212 if line.endswith('\n'):
213 213 line = line[:-1]
214 214 #ui.debug('state=%d line=%r\n' % (state, line))
215 215
216 216 if state == 0:
217 217 # initial state, consume input until we see 'RCS file'
218 218 match = re_00.match(line)
219 219 if match:
220 220 rcs = match.group(1)
221 221 tags = {}
222 222 if rlog:
223 223 filename = util.normpath(rcs[:-2])
224 224 if filename.startswith(prefix):
225 225 filename = filename[len(prefix):]
226 226 if filename.startswith('/'):
227 227 filename = filename[1:]
228 228 if filename.startswith('Attic/'):
229 229 filename = filename[6:]
230 230 else:
231 231 filename = filename.replace('/Attic/', '/')
232 232 state = 2
233 233 continue
234 234 state = 1
235 235 continue
236 236 match = re_01.match(line)
237 237 if match:
238 238 raise Exception(match.group(1))
239 239 match = re_02.match(line)
240 240 if match:
241 241 raise Exception(match.group(2))
242 242 if re_03.match(line):
243 243 raise Exception(line)
244 244
245 245 elif state == 1:
246 246 # expect 'Working file' (only when using log instead of rlog)
247 247 match = re_10.match(line)
248 248 assert match, _('RCS file must be followed by working file')
249 249 filename = util.normpath(match.group(1))
250 250 state = 2
251 251
252 252 elif state == 2:
253 253 # expect 'symbolic names'
254 254 if re_20.match(line):
255 255 branchmap = {}
256 256 state = 3
257 257
258 258 elif state == 3:
259 259 # read the symbolic names and store as tags
260 260 match = re_30.match(line)
261 261 if match:
262 262 rev = [int(x) for x in match.group(2).split('.')]
263 263
264 264 # Convert magic branch number to an odd-numbered one
265 265 revn = len(rev)
266 266 if revn > 3 and (revn % 2) == 0 and rev[-2] == 0:
267 267 rev = rev[:-2] + rev[-1:]
268 268 rev = tuple(rev)
269 269
270 270 if rev not in tags:
271 271 tags[rev] = []
272 272 tags[rev].append(match.group(1))
273 273 branchmap[match.group(1)] = match.group(2)
274 274
275 275 elif re_31.match(line):
276 276 state = 5
277 277 elif re_32.match(line):
278 278 state = 0
279 279
280 280 elif state == 4:
281 281 # expecting '------' separator before first revision
282 282 if re_31.match(line):
283 283 state = 5
284 284 else:
285 285 assert not re_32.match(line), _('must have at least some revisions')
286 286
287 287 elif state == 5:
288 288 # expecting revision number and possibly (ignored) lock indication
289 289 # we create the logentry here from values stored in states 0 to 4,
290 290 # as this state is re-entered for subsequent revisions of a file.
291 291 match = re_50.match(line)
292 292 assert match, _('expected revision number')
293 293 e = logentry(rcs=scache(rcs), file=scache(filename),
294 294 revision=tuple([int(x) for x in match.group(1).split('.')]),
295 295 branches=[], parent=None,
296 296 synthetic=False)
297 297 state = 6
298 298
299 299 elif state == 6:
300 300 # expecting date, author, state, lines changed
301 301 match = re_60.match(line)
302 302 assert match, _('revision must be followed by date line')
303 303 d = match.group(1)
304 304 if d[2] == '/':
305 305 # Y2K
306 306 d = '19' + d
307 307
308 308 if len(d.split()) != 3:
309 309 # cvs log dates always in GMT
310 310 d = d + ' UTC'
311 311 e.date = util.parsedate(d, ['%y/%m/%d %H:%M:%S', '%Y/%m/%d %H:%M:%S', '%Y-%m-%d %H:%M:%S'])
312 312 e.author = scache(match.group(2))
313 313 e.dead = match.group(3).lower() == 'dead'
314 314
315 315 if match.group(5):
316 316 if match.group(6):
317 317 e.lines = (int(match.group(5)), int(match.group(6)))
318 318 else:
319 319 e.lines = (int(match.group(5)), 0)
320 320 elif match.group(6):
321 321 e.lines = (0, int(match.group(6)))
322 322 else:
323 323 e.lines = None
324 324
325 325 if match.group(7): # cvsnt mergepoint
326 326 myrev = match.group(8).split('.')
327 327 if len(myrev) == 2: # head
328 328 e.mergepoint = 'HEAD'
329 329 else:
330 330 myrev = '.'.join(myrev[:-2] + ['0', myrev[-2]])
331 331 branches = [b for b in branchmap if branchmap[b] == myrev]
332 332 assert len(branches) == 1, 'unknown branch: %s' % e.mergepoint
333 333 e.mergepoint = branches[0]
334 334 else:
335 335 e.mergepoint = None
336 336 e.comment = []
337 337 state = 7
338 338
339 339 elif state == 7:
340 340 # read the revision numbers of branches that start at this revision
341 341 # or store the commit log message otherwise
342 342 m = re_70.match(line)
343 343 if m:
344 344 e.branches = [tuple([int(y) for y in x.strip().split('.')])
345 345 for x in m.group(1).split(';')]
346 346 state = 8
347 347 elif re_31.match(line) and re_50.match(peek):
348 348 state = 5
349 349 store = True
350 350 elif re_32.match(line):
351 351 state = 0
352 352 store = True
353 353 else:
354 354 e.comment.append(line)
355 355
356 356 elif state == 8:
357 357 # store commit log message
358 358 if re_31.match(line):
359 359 state = 5
360 360 store = True
361 361 elif re_32.match(line):
362 362 state = 0
363 363 store = True
364 364 else:
365 365 e.comment.append(line)
366 366
367 367 # When a file is added on a branch B1, CVS creates a synthetic
368 368 # dead trunk revision 1.1 so that the branch has a root.
369 369 # Likewise, if you merge such a file to a later branch B2 (one
370 370 # that already existed when the file was added on B1), CVS
371 371 # creates a synthetic dead revision 1.1.x.1 on B2. Don't drop
372 372 # these revisions now, but mark them synthetic so
373 373 # createchangeset() can take care of them.
374 374 if (store and
375 375 e.dead and
376 376 e.revision[-1] == 1 and # 1.1 or 1.1.x.1
377 377 len(e.comment) == 1 and
378 378 file_added_re.match(e.comment[0])):
379 379 ui.debug(_('found synthetic revision in %s: %r\n')
380 380 % (e.rcs, e.comment[0]))
381 381 e.synthetic = True
382 382
383 383 if store:
384 384 # clean up the results and save in the log.
385 385 store = False
386 386 e.tags = sorted([scache(x) for x in tags.get(e.revision, [])])
387 387 e.comment = scache('\n'.join(e.comment))
388 388
389 389 revn = len(e.revision)
390 390 if revn > 3 and (revn % 2) == 0:
391 391 e.branch = tags.get(e.revision[:-1], [None])[0]
392 392 else:
393 393 e.branch = None
394 394
395 395 log.append(e)
396 396
397 397 if len(log) % 100 == 0:
398 398 ui.status(util.ellipsis('%d %s' % (len(log), e.file), 80)+'\n')
399 399
400 400 listsort(log, key=lambda x:(x.rcs, x.revision))
401 401
402 402 # find parent revisions of individual files
403 403 versions = {}
404 404 for e in log:
405 405 branch = e.revision[:-1]
406 406 p = versions.get((e.rcs, branch), None)
407 407 if p is None:
408 408 p = e.revision[:-2]
409 409 e.parent = p
410 410 versions[(e.rcs, branch)] = e.revision
411 411
412 412 # update the log cache
413 413 if cache:
414 414 if log:
415 415 # join up the old and new logs
416 416 listsort(log, key=lambda x:x.date)
417 417
418 418 if oldlog and oldlog[-1].date >= log[0].date:
419 419 raise logerror('Log cache overlaps with new log entries,'
420 420 ' re-run without cache.')
421 421
422 422 log = oldlog + log
423 423
424 424 # write the new cachefile
425 425 ui.note(_('writing cvs log cache %s\n') % cachefile)
426 426 pickle.dump(log, file(cachefile, 'w'))
427 427 else:
428 428 log = oldlog
429 429
430 430 ui.status(_('%d log entries\n') % len(log))
431 431
432 432 return log
433 433
434 434
435 435 class changeset(object):
436 436 '''Class changeset has the following attributes:
437 437 .id - integer identifying this changeset (list index)
438 438 .author - author name as CVS knows it
439 439 .branch - name of branch this changeset is on, or None
440 440 .comment - commit message
441 441 .date - the commit date as a (time,tz) tuple
442 442 .entries - list of logentry objects in this changeset
443 443 .parents - list of one or two parent changesets
444 444 .tags - list of tags on this changeset
445 445 .synthetic - from synthetic revision "file ... added on branch ..."
446 446 .mergepoint- the branch that has been merged from (if present in rlog output)
447 447 '''
448 448 def __init__(self, **entries):
449 449 self.__dict__.update(entries)
450 450
451 451 def __repr__(self):
452 452 return "<%s at 0x%x: %s>" % (self.__class__.__name__,
453 453 id(self),
454 454 getattr(self, 'id', "(no id)"))
455 455
456 456 def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None):
457 457 '''Convert log into changesets.'''
458 458
459 459 ui.status(_('creating changesets\n'))
460 460
461 461 # Merge changesets
462 462
463 463 listsort(log, key=lambda x:(x.comment, x.author, x.branch, x.date))
464 464
465 465 changesets = []
466 files = {}
466 files = set()
467 467 c = None
468 468 for i, e in enumerate(log):
469 469
470 470 # Check if log entry belongs to the current changeset or not.
471 471 if not (c and
472 472 e.comment == c.comment and
473 473 e.author == c.author and
474 474 e.branch == c.branch and
475 475 ((c.date[0] + c.date[1]) <=
476 476 (e.date[0] + e.date[1]) <=
477 477 (c.date[0] + c.date[1]) + fuzz) and
478 478 e.file not in files):
479 479 c = changeset(comment=e.comment, author=e.author,
480 480 branch=e.branch, date=e.date, entries=[],
481 481 mergepoint=getattr(e, 'mergepoint', None))
482 482 changesets.append(c)
483 files = {}
483 files = set()
484 484 if len(changesets) % 100 == 0:
485 485 t = '%d %s' % (len(changesets), repr(e.comment)[1:-1])
486 486 ui.status(util.ellipsis(t, 80) + '\n')
487 487
488 488 c.entries.append(e)
489 files[e.file] = True
489 files.add(e.file)
490 490 c.date = e.date # changeset date is date of latest commit in it
491 491
492 492 # Mark synthetic changesets
493 493
494 494 for c in changesets:
495 495 # Synthetic revisions always get their own changeset, because
496 496 # the log message includes the filename. E.g. if you add file3
497 497 # and file4 on a branch, you get four log entries and three
498 498 # changesets:
499 499 # "File file3 was added on branch ..." (synthetic, 1 entry)
500 500 # "File file4 was added on branch ..." (synthetic, 1 entry)
501 501 # "Add file3 and file4 to fix ..." (real, 2 entries)
502 502 # Hence the check for 1 entry here.
503 503 synth = getattr(c.entries[0], 'synthetic', None)
504 504 c.synthetic = (len(c.entries) == 1 and synth)
505 505
506 506 # Sort files in each changeset
507 507
508 508 for c in changesets:
509 509 def pathcompare(l, r):
510 510 'Mimic cvsps sorting order'
511 511 l = l.split('/')
512 512 r = r.split('/')
513 513 nl = len(l)
514 514 nr = len(r)
515 515 n = min(nl, nr)
516 516 for i in range(n):
517 517 if i + 1 == nl and nl < nr:
518 518 return -1
519 519 elif i + 1 == nr and nl > nr:
520 520 return +1
521 521 elif l[i] < r[i]:
522 522 return -1
523 523 elif l[i] > r[i]:
524 524 return +1
525 525 return 0
526 526 def entitycompare(l, r):
527 527 return pathcompare(l.file, r.file)
528 528
529 529 c.entries.sort(entitycompare)
530 530
531 531 # Sort changesets by date
532 532
533 533 def cscmp(l, r):
534 534 d = sum(l.date) - sum(r.date)
535 535 if d:
536 536 return d
537 537
538 538 # detect vendor branches and initial commits on a branch
539 539 le = {}
540 540 for e in l.entries:
541 541 le[e.rcs] = e.revision
542 542 re = {}
543 543 for e in r.entries:
544 544 re[e.rcs] = e.revision
545 545
546 546 d = 0
547 547 for e in l.entries:
548 548 if re.get(e.rcs, None) == e.parent:
549 549 assert not d
550 550 d = 1
551 551 break
552 552
553 553 for e in r.entries:
554 554 if le.get(e.rcs, None) == e.parent:
555 555 assert not d
556 556 d = -1
557 557 break
558 558
559 559 return d
560 560
561 561 changesets.sort(cscmp)
562 562
563 563 # Collect tags
564 564
565 565 globaltags = {}
566 566 for c in changesets:
567 tags = {}
568 567 for e in c.entries:
569 568 for tag in e.tags:
570 569 # remember which is the latest changeset to have this tag
571 570 globaltags[tag] = c
572 571
573 572 for c in changesets:
574 tags = {}
573 tags = set()
575 574 for e in c.entries:
576 575 for tag in e.tags:
577 tags[tag] = True
576 tags.add(tag)
578 577 # remember tags only if this is the latest changeset to have it
579 c.tags = sorted([tag for tag in tags if globaltags[tag] is c])
578 c.tags = sorted(tag for tag in tags if globaltags[tag] is c)
580 579
581 580 # Find parent changesets, handle {{mergetobranch BRANCHNAME}}
582 581 # by inserting dummy changesets with two parents, and handle
583 582 # {{mergefrombranch BRANCHNAME}} by setting two parents.
584 583
585 584 if mergeto is None:
586 585 mergeto = r'{{mergetobranch ([-\w]+)}}'
587 586 if mergeto:
588 587 mergeto = re.compile(mergeto)
589 588
590 589 if mergefrom is None:
591 590 mergefrom = r'{{mergefrombranch ([-\w]+)}}'
592 591 if mergefrom:
593 592 mergefrom = re.compile(mergefrom)
594 593
595 594 versions = {} # changeset index where we saw any particular file version
596 595 branches = {} # changeset index where we saw a branch
597 596 n = len(changesets)
598 597 i = 0
599 598 while i<n:
600 599 c = changesets[i]
601 600
602 601 for f in c.entries:
603 602 versions[(f.rcs, f.revision)] = i
604 603
605 604 p = None
606 605 if c.branch in branches:
607 606 p = branches[c.branch]
608 607 else:
609 608 for f in c.entries:
610 609 p = max(p, versions.get((f.rcs, f.parent), None))
611 610
612 611 c.parents = []
613 612 if p is not None:
614 613 p = changesets[p]
615 614
616 615 # Ensure no changeset has a synthetic changeset as a parent.
617 616 while p.synthetic:
618 617 assert len(p.parents) <= 1, \
619 618 _('synthetic changeset cannot have multiple parents')
620 619 if p.parents:
621 620 p = p.parents[0]
622 621 else:
623 622 p = None
624 623 break
625 624
626 625 if p is not None:
627 626 c.parents.append(p)
628 627
629 628 if c.mergepoint:
630 629 if c.mergepoint == 'HEAD':
631 630 c.mergepoint = None
632 631 c.parents.append(changesets[branches[c.mergepoint]])
633 632
634 633 if mergefrom:
635 634 m = mergefrom.search(c.comment)
636 635 if m:
637 636 m = m.group(1)
638 637 if m == 'HEAD':
639 638 m = None
640 639 try:
641 640 candidate = changesets[branches[m]]
642 641 except KeyError:
643 642 ui.warn(_("warning: CVS commit message references "
644 643 "non-existent branch %r:\n%s\n")
645 644 % (m, c.comment))
646 645 if m in branches and c.branch != m and not candidate.synthetic:
647 646 c.parents.append(candidate)
648 647
649 648 if mergeto:
650 649 m = mergeto.search(c.comment)
651 650 if m:
652 651 try:
653 652 m = m.group(1)
654 653 if m == 'HEAD':
655 654 m = None
656 655 except:
657 656 m = None # if no group found then merge to HEAD
658 657 if m in branches and c.branch != m:
659 658 # insert empty changeset for merge
660 659 cc = changeset(author=c.author, branch=m, date=c.date,
661 660 comment='convert-repo: CVS merge from branch %s' % c.branch,
662 661 entries=[], tags=[], parents=[changesets[branches[m]], c])
663 662 changesets.insert(i + 1, cc)
664 663 branches[m] = i + 1
665 664
666 665 # adjust our loop counters now we have inserted a new entry
667 666 n += 1
668 667 i += 2
669 668 continue
670 669
671 670 branches[c.branch] = i
672 671 i += 1
673 672
674 673 # Drop synthetic changesets (safe now that we have ensured no other
675 674 # changesets can have them as parents).
676 675 i = 0
677 676 while i < len(changesets):
678 677 if changesets[i].synthetic:
679 678 del changesets[i]
680 679 else:
681 680 i += 1
682 681
683 682 # Number changesets
684 683
685 684 for i, c in enumerate(changesets):
686 685 c.id = i + 1
687 686
688 687 ui.status(_('%d changeset entries\n') % len(changesets))
689 688
690 689 return changesets
691 690
692 691
693 692 def debugcvsps(ui, *args, **opts):
694 693 '''Read CVS rlog for current directory or named path in repository, and
695 694 convert the log to changesets based on matching commit log entries and dates.'''
696 695
697 696 if opts["new_cache"]:
698 697 cache = "write"
699 698 elif opts["update_cache"]:
700 699 cache = "update"
701 700 else:
702 701 cache = None
703 702
704 703 revisions = opts["revisions"]
705 704
706 705 try:
707 706 if args:
708 707 log = []
709 708 for d in args:
710 709 log += createlog(ui, d, root=opts["root"], cache=cache)
711 710 else:
712 711 log = createlog(ui, root=opts["root"], cache=cache)
713 712 except logerror, e:
714 713 ui.write("%r\n"%e)
715 714 return
716 715
717 716 changesets = createchangeset(ui, log, opts["fuzz"])
718 717 del log
719 718
720 719 # Print changesets (optionally filtered)
721 720
722 721 off = len(revisions)
723 722 branches = {} # latest version number in each branch
724 723 ancestors = {} # parent branch
725 724 for cs in changesets:
726 725
727 726 if opts["ancestors"]:
728 727 if cs.branch not in branches and cs.parents and cs.parents[0].id:
729 728 ancestors[cs.branch] = changesets[cs.parents[0].id-1].branch, cs.parents[0].id
730 729 branches[cs.branch] = cs.id
731 730
732 731 # limit by branches
733 732 if opts["branches"] and (cs.branch or 'HEAD') not in opts["branches"]:
734 733 continue
735 734
736 735 if not off:
737 736 # Note: trailing spaces on several lines here are needed to have
738 737 # bug-for-bug compatibility with cvsps.
739 738 ui.write('---------------------\n')
740 739 ui.write('PatchSet %d \n' % cs.id)
741 740 ui.write('Date: %s\n' % util.datestr(cs.date, '%Y/%m/%d %H:%M:%S %1%2'))
742 741 ui.write('Author: %s\n' % cs.author)
743 742 ui.write('Branch: %s\n' % (cs.branch or 'HEAD'))
744 743 ui.write('Tag%s: %s \n' % (['', 's'][len(cs.tags)>1],
745 744 ','.join(cs.tags) or '(none)'))
746 745 if opts["parents"] and cs.parents:
747 746 if len(cs.parents)>1:
748 747 ui.write('Parents: %s\n' % (','.join([str(p.id) for p in cs.parents])))
749 748 else:
750 749 ui.write('Parent: %d\n' % cs.parents[0].id)
751 750
752 751 if opts["ancestors"]:
753 752 b = cs.branch
754 753 r = []
755 754 while b:
756 755 b, c = ancestors[b]
757 756 r.append('%s:%d:%d' % (b or "HEAD", c, branches[b]))
758 757 if r:
759 758 ui.write('Ancestors: %s\n' % (','.join(r)))
760 759
761 760 ui.write('Log:\n')
762 761 ui.write('%s\n\n' % cs.comment)
763 762 ui.write('Members: \n')
764 763 for f in cs.entries:
765 764 fn = f.file
766 765 if fn.startswith(opts["prefix"]):
767 766 fn = fn[len(opts["prefix"]):]
768 767 ui.write('\t%s:%s->%s%s \n' % (fn, '.'.join([str(x) for x in f.parent]) or 'INITIAL',
769 768 '.'.join([str(x) for x in f.revision]), ['', '(DEAD)'][f.dead]))
770 769 ui.write('\n')
771 770
772 771 # have we seen the start tag?
773 772 if revisions and off:
774 773 if revisions[0] == str(cs.id) or \
775 774 revisions[0] in cs.tags:
776 775 off = False
777 776
778 777 # see if we reached the end tag
779 778 if len(revisions)>1 and not off:
780 779 if revisions[1] == str(cs.id) or \
781 780 revisions[1] in cs.tags:
782 781 break
@@ -1,152 +1,152
1 1 # git.py - git support for the convert extension
2 2 #
3 3 # Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2, incorporated herein by reference.
7 7
8 8 import os
9 9 from mercurial import util
10 10
11 11 from common import NoRepo, commit, converter_source, checktool
12 12
13 13 class convert_git(converter_source):
14 14 # Windows does not support GIT_DIR= construct while other systems
15 15 # cannot remove environment variable. Just assume none have
16 16 # both issues.
17 17 if hasattr(os, 'unsetenv'):
18 18 def gitcmd(self, s):
19 19 prevgitdir = os.environ.get('GIT_DIR')
20 20 os.environ['GIT_DIR'] = self.path
21 21 try:
22 22 return util.popen(s, 'rb')
23 23 finally:
24 24 if prevgitdir is None:
25 25 del os.environ['GIT_DIR']
26 26 else:
27 27 os.environ['GIT_DIR'] = prevgitdir
28 28 else:
29 29 def gitcmd(self, s):
30 30 return util.popen('GIT_DIR=%s %s' % (self.path, s), 'rb')
31 31
32 32 def __init__(self, ui, path, rev=None):
33 33 super(convert_git, self).__init__(ui, path, rev=rev)
34 34
35 35 if os.path.isdir(path + "/.git"):
36 36 path += "/.git"
37 37 if not os.path.exists(path + "/objects"):
38 38 raise NoRepo("%s does not look like a Git repo" % path)
39 39
40 40 checktool('git', 'git')
41 41
42 42 self.path = path
43 43
44 44 def getheads(self):
45 45 if not self.rev:
46 46 return self.gitcmd('git rev-parse --branches --remotes').read().splitlines()
47 47 else:
48 48 fh = self.gitcmd("git rev-parse --verify %s" % self.rev)
49 49 return [fh.read()[:-1]]
50 50
51 51 def catfile(self, rev, type):
52 52 if rev == "0" * 40: raise IOError()
53 53 fh = self.gitcmd("git cat-file %s %s" % (type, rev))
54 54 return fh.read()
55 55
56 56 def getfile(self, name, rev):
57 57 return self.catfile(rev, "blob")
58 58
59 59 def getmode(self, name, rev):
60 60 return self.modecache[(name, rev)]
61 61
62 62 def getchanges(self, version):
63 63 self.modecache = {}
64 64 fh = self.gitcmd("git diff-tree -z --root -m -r %s" % version)
65 65 changes = []
66 seen = {}
66 seen = set()
67 67 entry = None
68 68 for l in fh.read().split('\x00'):
69 69 if not entry:
70 70 if not l.startswith(':'):
71 71 continue
72 72 entry = l
73 73 continue
74 74 f = l
75 75 if f not in seen:
76 seen[f] = 1
76 seen.add(f)
77 77 entry = entry.split()
78 78 h = entry[3]
79 79 p = (entry[1] == "100755")
80 80 s = (entry[1] == "120000")
81 81 self.modecache[(f, h)] = (p and "x") or (s and "l") or ""
82 82 changes.append((f, h))
83 83 entry = None
84 84 return (changes, {})
85 85
86 86 def getcommit(self, version):
87 87 c = self.catfile(version, "commit") # read the commit hash
88 88 end = c.find("\n\n")
89 89 message = c[end+2:]
90 90 message = self.recode(message)
91 91 l = c[:end].splitlines()
92 92 parents = []
93 93 author = committer = None
94 94 for e in l[1:]:
95 95 n, v = e.split(" ", 1)
96 96 if n == "author":
97 97 p = v.split()
98 98 tm, tz = p[-2:]
99 99 author = " ".join(p[:-2])
100 100 if author[0] == "<": author = author[1:-1]
101 101 author = self.recode(author)
102 102 if n == "committer":
103 103 p = v.split()
104 104 tm, tz = p[-2:]
105 105 committer = " ".join(p[:-2])
106 106 if committer[0] == "<": committer = committer[1:-1]
107 107 committer = self.recode(committer)
108 108 if n == "parent": parents.append(v)
109 109
110 110 if committer and committer != author:
111 111 message += "\ncommitter: %s\n" % committer
112 112 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
113 113 tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
114 114 date = tm + " " + str(tz)
115 115
116 116 c = commit(parents=parents, date=date, author=author, desc=message,
117 117 rev=version)
118 118 return c
119 119
120 120 def gettags(self):
121 121 tags = {}
122 122 fh = self.gitcmd('git ls-remote --tags "%s"' % self.path)
123 123 prefix = 'refs/tags/'
124 124 for line in fh:
125 125 line = line.strip()
126 126 if not line.endswith("^{}"):
127 127 continue
128 128 node, tag = line.split(None, 1)
129 129 if not tag.startswith(prefix):
130 130 continue
131 131 tag = tag[len(prefix):-3]
132 132 tags[tag] = node
133 133
134 134 return tags
135 135
136 136 def getchangedfiles(self, version, i):
137 137 changes = []
138 138 if i is None:
139 139 fh = self.gitcmd("git diff-tree --root -m -r %s" % version)
140 140 for l in fh:
141 141 if "\t" not in l:
142 142 continue
143 143 m, f = l[:-1].split("\t")
144 144 changes.append(f)
145 145 fh.close()
146 146 else:
147 147 fh = self.gitcmd('git diff-tree --name-only --root -r %s "%s^%s" --'
148 148 % (version, version, i+1))
149 149 changes = [f.rstrip('\n') for f in fh]
150 150 fh.close()
151 151
152 152 return changes
@@ -1,338 +1,338
1 1 # hg.py - hg backend for convert extension
2 2 #
3 3 # Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2, incorporated herein by reference.
7 7
8 8 # Notes for hg->hg conversion:
9 9 #
10 10 # * Old versions of Mercurial didn't trim the whitespace from the ends
11 11 # of commit messages, but new versions do. Changesets created by
12 12 # those older versions, then converted, may thus have different
13 13 # hashes for changesets that are otherwise identical.
14 14 #
15 15 # * By default, the source revision is stored in the converted
16 16 # revision. This will cause the converted revision to have a
17 17 # different identity than the source. To avoid this, use the
18 18 # following option: "--config convert.hg.saverev=false"
19 19
20 20
21 21 import os, time
22 22 from mercurial.i18n import _
23 23 from mercurial.node import bin, hex, nullid
24 24 from mercurial import hg, util, context, error
25 25
26 26 from common import NoRepo, commit, converter_source, converter_sink
27 27
28 28 class mercurial_sink(converter_sink):
29 29 def __init__(self, ui, path):
30 30 converter_sink.__init__(self, ui, path)
31 31 self.branchnames = ui.configbool('convert', 'hg.usebranchnames', True)
32 32 self.clonebranches = ui.configbool('convert', 'hg.clonebranches', False)
33 33 self.tagsbranch = ui.config('convert', 'hg.tagsbranch', 'default')
34 34 self.lastbranch = None
35 35 if os.path.isdir(path) and len(os.listdir(path)) > 0:
36 36 try:
37 37 self.repo = hg.repository(self.ui, path)
38 38 if not self.repo.local():
39 39 raise NoRepo(_('%s is not a local Mercurial repo') % path)
40 40 except error.RepoError, err:
41 41 ui.traceback()
42 42 raise NoRepo(err.args[0])
43 43 else:
44 44 try:
45 45 ui.status(_('initializing destination %s repository\n') % path)
46 46 self.repo = hg.repository(self.ui, path, create=True)
47 47 if not self.repo.local():
48 48 raise NoRepo(_('%s is not a local Mercurial repo') % path)
49 49 self.created.append(path)
50 50 except error.RepoError:
51 51 ui.traceback()
52 52 raise NoRepo("could not create hg repo %s as sink" % path)
53 53 self.lock = None
54 54 self.wlock = None
55 55 self.filemapmode = False
56 56
57 57 def before(self):
58 58 self.ui.debug(_('run hg sink pre-conversion action\n'))
59 59 self.wlock = self.repo.wlock()
60 60 self.lock = self.repo.lock()
61 61
62 62 def after(self):
63 63 self.ui.debug(_('run hg sink post-conversion action\n'))
64 64 self.lock.release()
65 65 self.wlock.release()
66 66
67 67 def revmapfile(self):
68 68 return os.path.join(self.path, ".hg", "shamap")
69 69
70 70 def authorfile(self):
71 71 return os.path.join(self.path, ".hg", "authormap")
72 72
73 73 def getheads(self):
74 74 h = self.repo.changelog.heads()
75 75 return [ hex(x) for x in h ]
76 76
77 77 def setbranch(self, branch, pbranches):
78 78 if not self.clonebranches:
79 79 return
80 80
81 81 setbranch = (branch != self.lastbranch)
82 82 self.lastbranch = branch
83 83 if not branch:
84 84 branch = 'default'
85 85 pbranches = [(b[0], b[1] and b[1] or 'default') for b in pbranches]
86 86 pbranch = pbranches and pbranches[0][1] or 'default'
87 87
88 88 branchpath = os.path.join(self.path, branch)
89 89 if setbranch:
90 90 self.after()
91 91 try:
92 92 self.repo = hg.repository(self.ui, branchpath)
93 93 except:
94 94 self.repo = hg.repository(self.ui, branchpath, create=True)
95 95 self.before()
96 96
97 97 # pbranches may bring revisions from other branches (merge parents)
98 98 # Make sure we have them, or pull them.
99 99 missings = {}
100 100 for b in pbranches:
101 101 try:
102 102 self.repo.lookup(b[0])
103 103 except:
104 104 missings.setdefault(b[1], []).append(b[0])
105 105
106 106 if missings:
107 107 self.after()
108 108 for pbranch, heads in missings.iteritems():
109 109 pbranchpath = os.path.join(self.path, pbranch)
110 110 prepo = hg.repository(self.ui, pbranchpath)
111 111 self.ui.note(_('pulling from %s into %s\n') % (pbranch, branch))
112 112 self.repo.pull(prepo, [prepo.lookup(h) for h in heads])
113 113 self.before()
114 114
115 115 def putcommit(self, files, copies, parents, commit, source):
116 116
117 117 files = dict(files)
118 118 def getfilectx(repo, memctx, f):
119 119 v = files[f]
120 120 data = source.getfile(f, v)
121 121 e = source.getmode(f, v)
122 122 return context.memfilectx(f, data, 'l' in e, 'x' in e, copies.get(f))
123 123
124 124 pl = []
125 125 for p in parents:
126 126 if p not in pl:
127 127 pl.append(p)
128 128 parents = pl
129 129 nparents = len(parents)
130 130 if self.filemapmode and nparents == 1:
131 131 m1node = self.repo.changelog.read(bin(parents[0]))[0]
132 132 parent = parents[0]
133 133
134 134 if len(parents) < 2: parents.append("0" * 40)
135 135 if len(parents) < 2: parents.append("0" * 40)
136 136 p2 = parents.pop(0)
137 137
138 138 text = commit.desc
139 139 extra = commit.extra.copy()
140 140 if self.branchnames and commit.branch:
141 141 extra['branch'] = commit.branch
142 142 if commit.rev:
143 143 extra['convert_revision'] = commit.rev
144 144
145 145 while parents:
146 146 p1 = p2
147 147 p2 = parents.pop(0)
148 148 ctx = context.memctx(self.repo, (p1, p2), text, files.keys(), getfilectx,
149 149 commit.author, commit.date, extra)
150 150 self.repo.commitctx(ctx)
151 151 text = "(octopus merge fixup)\n"
152 152 p2 = hex(self.repo.changelog.tip())
153 153
154 154 if self.filemapmode and nparents == 1:
155 155 man = self.repo.manifest
156 156 mnode = self.repo.changelog.read(bin(p2))[0]
157 157 if not man.cmp(m1node, man.revision(mnode)):
158 158 self.repo.rollback()
159 159 return parent
160 160 return p2
161 161
162 162 def puttags(self, tags):
163 163 try:
164 164 parentctx = self.repo[self.tagsbranch]
165 165 tagparent = parentctx.node()
166 166 except error.RepoError:
167 167 parentctx = None
168 168 tagparent = nullid
169 169
170 170 try:
171 171 oldlines = sorted(parentctx['.hgtags'].data().splitlines(1))
172 172 except:
173 173 oldlines = []
174 174
175 175 newlines = sorted([("%s %s\n" % (tags[tag], tag)) for tag in tags])
176 176 if newlines == oldlines:
177 177 return None
178 178 data = "".join(newlines)
179 179 def getfilectx(repo, memctx, f):
180 180 return context.memfilectx(f, data, False, False, None)
181 181
182 182 self.ui.status(_("updating tags\n"))
183 183 date = "%s 0" % int(time.mktime(time.gmtime()))
184 184 extra = {'branch': self.tagsbranch}
185 185 ctx = context.memctx(self.repo, (tagparent, None), "update tags",
186 186 [".hgtags"], getfilectx, "convert-repo", date,
187 187 extra)
188 188 self.repo.commitctx(ctx)
189 189 return hex(self.repo.changelog.tip())
190 190
191 191 def setfilemapmode(self, active):
192 192 self.filemapmode = active
193 193
194 194 class mercurial_source(converter_source):
195 195 def __init__(self, ui, path, rev=None):
196 196 converter_source.__init__(self, ui, path, rev)
197 197 self.ignoreerrors = ui.configbool('convert', 'hg.ignoreerrors', False)
198 self.ignored = {}
198 self.ignored = set()
199 199 self.saverev = ui.configbool('convert', 'hg.saverev', False)
200 200 try:
201 201 self.repo = hg.repository(self.ui, path)
202 202 # try to provoke an exception if this isn't really a hg
203 203 # repo, but some other bogus compatible-looking url
204 204 if not self.repo.local():
205 205 raise error.RepoError()
206 206 except error.RepoError:
207 207 ui.traceback()
208 208 raise NoRepo("%s is not a local Mercurial repo" % path)
209 209 self.lastrev = None
210 210 self.lastctx = None
211 211 self._changescache = None
212 212 self.convertfp = None
213 213 # Restrict converted revisions to startrev descendants
214 214 startnode = ui.config('convert', 'hg.startrev')
215 215 if startnode is not None:
216 216 try:
217 217 startnode = self.repo.lookup(startnode)
218 218 except error.RepoError:
219 219 raise util.Abort(_('%s is not a valid start revision')
220 220 % startnode)
221 221 startrev = self.repo.changelog.rev(startnode)
222 222 children = {startnode: 1}
223 223 for rev in self.repo.changelog.descendants(startrev):
224 224 children[self.repo.changelog.node(rev)] = 1
225 225 self.keep = children.__contains__
226 226 else:
227 227 self.keep = util.always
228 228
229 229 def changectx(self, rev):
230 230 if self.lastrev != rev:
231 231 self.lastctx = self.repo[rev]
232 232 self.lastrev = rev
233 233 return self.lastctx
234 234
235 235 def parents(self, ctx):
236 236 return [p.node() for p in ctx.parents()
237 237 if p and self.keep(p.node())]
238 238
239 239 def getheads(self):
240 240 if self.rev:
241 241 heads = [self.repo[self.rev].node()]
242 242 else:
243 243 heads = self.repo.heads()
244 244 return [hex(h) for h in heads if self.keep(h)]
245 245
246 246 def getfile(self, name, rev):
247 247 try:
248 248 return self.changectx(rev)[name].data()
249 249 except error.LookupError, err:
250 250 raise IOError(err)
251 251
252 252 def getmode(self, name, rev):
253 253 return self.changectx(rev).manifest().flags(name)
254 254
255 255 def getchanges(self, rev):
256 256 ctx = self.changectx(rev)
257 257 parents = self.parents(ctx)
258 258 if not parents:
259 259 files = sorted(ctx.manifest())
260 260 if self.ignoreerrors:
261 261 # calling getcopies() is a simple way to detect missing
262 262 # revlogs and populate self.ignored
263 263 self.getcopies(ctx, files)
264 264 return [(f, rev) for f in files if f not in self.ignored], {}
265 265 if self._changescache and self._changescache[0] == rev:
266 266 m, a, r = self._changescache[1]
267 267 else:
268 268 m, a, r = self.repo.status(parents[0], ctx.node())[:3]
269 269 # getcopies() detects missing revlogs early, run it before
270 270 # filtering the changes.
271 271 copies = self.getcopies(ctx, m + a)
272 272 changes = [(name, rev) for name in m + a + r
273 273 if name not in self.ignored]
274 274 return sorted(changes), copies
275 275
276 276 def getcopies(self, ctx, files):
277 277 copies = {}
278 278 for name in files:
279 279 if name in self.ignored:
280 280 continue
281 281 try:
282 282 copysource, copynode = ctx.filectx(name).renamed()
283 283 if copysource in self.ignored or not self.keep(copynode):
284 284 continue
285 285 copies[name] = copysource
286 286 except TypeError:
287 287 pass
288 288 except error.LookupError, e:
289 289 if not self.ignoreerrors:
290 290 raise
291 self.ignored[name] = 1
291 self.ignored.add(name)
292 292 self.ui.warn(_('ignoring: %s\n') % e)
293 293 return copies
294 294
295 295 def getcommit(self, rev):
296 296 ctx = self.changectx(rev)
297 297 parents = [hex(p) for p in self.parents(ctx)]
298 298 if self.saverev:
299 299 crev = rev
300 300 else:
301 301 crev = None
302 302 return commit(author=ctx.user(), date=util.datestr(ctx.date()),
303 303 desc=ctx.description(), rev=crev, parents=parents,
304 304 branch=ctx.branch(), extra=ctx.extra())
305 305
306 306 def gettags(self):
307 307 tags = [t for t in self.repo.tagslist() if t[0] != 'tip']
308 308 return dict([(name, hex(node)) for name, node in tags
309 309 if self.keep(node)])
310 310
311 311 def getchangedfiles(self, rev, i):
312 312 ctx = self.changectx(rev)
313 313 parents = self.parents(ctx)
314 314 if not parents and i is None:
315 315 i = 0
316 316 changes = [], ctx.manifest().keys(), []
317 317 else:
318 318 i = i or 0
319 319 changes = self.repo.status(parents[i], ctx.node())[:3]
320 320 changes = [[f for f in l if f not in self.ignored] for l in changes]
321 321
322 322 if i == 0:
323 323 self._changescache = (rev, changes)
324 324
325 325 return changes[0] + changes[1] + changes[2]
326 326
327 327 def converted(self, rev, destrev):
328 328 if self.convertfp is None:
329 329 self.convertfp = open(os.path.join(self.path, '.hg', 'shamap'),
330 330 'a')
331 331 self.convertfp.write('%s %s\n' % (destrev, rev))
332 332 self.convertfp.flush()
333 333
334 334 def before(self):
335 335 self.ui.debug(_('run hg source pre-conversion action\n'))
336 336
337 337 def after(self):
338 338 self.ui.debug(_('run hg source post-conversion action\n'))
General Comments 0
You need to be logged in to leave comments. Login now