##// END OF EJS Templates
convert-repo: linearize the tag commit
Matt Mackall -
r1387:0c7e8d34 default
parent child Browse files
Show More
@@ -1,279 +1,284 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 #
2 #
3 # This is a generalized framework for converting between SCM
3 # This is a generalized framework for converting between SCM
4 # repository formats.
4 # repository formats.
5 #
5 #
6 # In its current form, it's hardcoded to convert incrementally between
6 # In its current form, it's hardcoded to convert incrementally between
7 # git and Mercurial.
7 # git and Mercurial.
8 #
8 #
9 # To use, you must first import the first git version into Mercurial,
9 # To use, you must first import the first git version into Mercurial,
10 # and establish a mapping between the git commit hash and the hash in
10 # and establish a mapping between the git commit hash and the hash in
11 # Mercurial for that version. This mapping is kept in a simple text
11 # Mercurial for that version. This mapping is kept in a simple text
12 # file with lines like so:
12 # file with lines like so:
13 #
13 #
14 # <git hash> <mercurial hash>
14 # <git hash> <mercurial hash>
15 #
15 #
16 # To convert the rest of the repo, run:
16 # To convert the rest of the repo, run:
17 #
17 #
18 # convert-repo <git-dir> <hg-dir> <mapfile>
18 # convert-repo <git-dir> <hg-dir> <mapfile>
19 #
19 #
20 # This updates the mapfile on each commit copied, so it can be
20 # This updates the mapfile on each commit copied, so it can be
21 # interrupted and can be run repeatedly to copy new commits.
21 # interrupted and can be run repeatedly to copy new commits.
22
22
23 import sys, os, zlib, sha, time
23 import sys, os, zlib, sha, time
24 from mercurial import hg, ui, util
24 from mercurial import hg, ui, util
25
25
26 class convert_git:
26 class convert_git:
27 def __init__(self, path):
27 def __init__(self, path):
28 self.path = path
28 self.path = path
29
29
30 def getheads(self):
30 def getheads(self):
31 return [file(self.path + "/HEAD").read()[:-1]]
31 return [file(self.path + "/HEAD").read()[:-1]]
32
32
33 def catfile(self, rev, type):
33 def catfile(self, rev, type):
34 if rev == "0" * 40: raise IOError()
34 if rev == "0" * 40: raise IOError()
35 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null" % (self.path, type, rev))
35 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null" % (self.path, type, rev))
36 return fh.read()
36 return fh.read()
37
37
38 def getfile(self, name, rev):
38 def getfile(self, name, rev):
39 return self.catfile(rev, "blob")
39 return self.catfile(rev, "blob")
40
40
41 def getchanges(self, version):
41 def getchanges(self, version):
42 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s" % (self.path, version))
42 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s" % (self.path, version))
43 changes = []
43 changes = []
44 for l in fh:
44 for l in fh:
45 if "\t" not in l: continue
45 if "\t" not in l: continue
46 m, f = l[:-1].split("\t")
46 m, f = l[:-1].split("\t")
47 m = m.split()
47 m = m.split()
48 h = m[3]
48 h = m[3]
49 p = (m[1] == "100755")
49 p = (m[1] == "100755")
50 changes.append((f, h, p))
50 changes.append((f, h, p))
51 return changes
51 return changes
52
52
53 def getcommit(self, version):
53 def getcommit(self, version):
54 c = self.catfile(version, "commit") # read the commit hash
54 c = self.catfile(version, "commit") # read the commit hash
55 end = c.find("\n\n")
55 end = c.find("\n\n")
56 message = c[end+2:]
56 message = c[end+2:]
57 l = c[:end].splitlines()
57 l = c[:end].splitlines()
58 manifest = l[0].split()[1]
58 manifest = l[0].split()[1]
59 parents = []
59 parents = []
60 for e in l[1:]:
60 for e in l[1:]:
61 n,v = e.split(" ", 1)
61 n,v = e.split(" ", 1)
62 if n == "author":
62 if n == "author":
63 p = v.split()
63 p = v.split()
64 tm, tz = p[-2:]
64 tm, tz = p[-2:]
65 author = " ".join(p[:-2])
65 author = " ".join(p[:-2])
66 if author[0] == "<": author = author[1:-1]
66 if author[0] == "<": author = author[1:-1]
67 if n == "committer":
67 if n == "committer":
68 p = v.split()
68 p = v.split()
69 tm, tz = p[-2:]
69 tm, tz = p[-2:]
70 committer = " ".join(p[:-2])
70 committer = " ".join(p[:-2])
71 if committer[0] == "<": committer = committer[1:-1]
71 if committer[0] == "<": committer = committer[1:-1]
72 message += "\ncommitter: %s\n" % v
72 message += "\ncommitter: %s\n" % v
73 if n == "parent": parents.append(v)
73 if n == "parent": parents.append(v)
74
74
75 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
75 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
76 tz = int(tzs) * (int(tzh) * 3600 + int(tzm))
76 tz = int(tzs) * (int(tzh) * 3600 + int(tzm))
77 date = tm + " " + str(tz)
77 date = tm + " " + str(tz)
78 return (parents, author, date, message)
78 return (parents, author, date, message)
79
79
80 def gettags(self):
80 def gettags(self):
81 tags = {}
81 tags = {}
82 for f in os.listdir(self.path + "/refs/tags"):
82 for f in os.listdir(self.path + "/refs/tags"):
83 try:
83 try:
84 h = file(self.path + "/refs/tags/" + f).read().strip()
84 h = file(self.path + "/refs/tags/" + f).read().strip()
85 c = self.catfile(h, "tag") # read the commit hash
85 c = self.catfile(h, "tag") # read the commit hash
86 h = c.splitlines()[0].split()[1]
86 h = c.splitlines()[0].split()[1]
87 tags[f] = h
87 tags[f] = h
88 except:
88 except:
89 pass
89 pass
90 return tags
90 return tags
91
91
92 class convert_mercurial:
92 class convert_mercurial:
93 def __init__(self, path):
93 def __init__(self, path):
94 self.path = path
94 self.path = path
95 u = ui.ui()
95 u = ui.ui()
96 self.repo = hg.repository(u, path)
96 self.repo = hg.repository(u, path)
97
97
98 def getheads(self):
98 def getheads(self):
99 h = self.repo.changelog.heads()
99 h = self.repo.changelog.heads()
100 return [ hg.hex(x) for x in h ]
100 return [ hg.hex(x) for x in h ]
101
101
102 def putfile(self, f, e, data):
102 def putfile(self, f, e, data):
103 self.repo.wfile(f, "w").write(data)
103 self.repo.wfile(f, "w").write(data)
104 if self.repo.dirstate.state(f) == '?':
104 if self.repo.dirstate.state(f) == '?':
105 self.repo.dirstate.update([f], "a")
105 self.repo.dirstate.update([f], "a")
106
106
107 util.set_exec(self.repo.wjoin(f), e)
107 util.set_exec(self.repo.wjoin(f), e)
108
108
109 def delfile(self, f):
109 def delfile(self, f):
110 try:
110 try:
111 os.unlink(self.repo.wjoin(f))
111 os.unlink(self.repo.wjoin(f))
112 #self.repo.remove([f])
112 #self.repo.remove([f])
113 except:
113 except:
114 pass
114 pass
115
115
116 def putcommit(self, files, parents, author, dest, text):
116 def putcommit(self, files, parents, author, dest, text):
117 seen = {}
117 seen = {}
118 pl = []
118 pl = []
119 for p in parents:
119 for p in parents:
120 if p not in seen:
120 if p not in seen:
121 pl.append(p)
121 pl.append(p)
122 seen[p] = 1
122 seen[p] = 1
123 parents = pl
123 parents = pl
124
124
125 if len(parents) < 2: parents.append("0" * 40)
125 if len(parents) < 2: parents.append("0" * 40)
126 if len(parents) < 2: parents.append("0" * 40)
126 if len(parents) < 2: parents.append("0" * 40)
127 p2 = parents.pop(0)
127 p2 = parents.pop(0)
128
128
129 while parents:
129 while parents:
130 p1 = p2
130 p1 = p2
131 p2 = parents.pop(0)
131 p2 = parents.pop(0)
132 self.repo.rawcommit(files, text, author, dest,
132 self.repo.rawcommit(files, text, author, dest,
133 hg.bin(p1), hg.bin(p2))
133 hg.bin(p1), hg.bin(p2))
134 text = "(octopus merge fixup)\n"
134 text = "(octopus merge fixup)\n"
135
135
136 return hg.hex(self.repo.changelog.tip())
136 return hg.hex(self.repo.changelog.tip())
137
137
138 def puttags(self, tags):
138 def puttags(self, tags):
139 try:
139 try:
140 old = self.repo.wfile(".hgtags").read()
140 old = self.repo.wfile(".hgtags").read()
141 oldlines = old.splitlines(1)
141 oldlines = old.splitlines(1)
142 oldlines.sort()
142 oldlines.sort()
143 except:
143 except:
144 oldlines = []
144 oldlines = []
145
145
146 k = tags.keys()
146 k = tags.keys()
147 k.sort()
147 k.sort()
148 newlines = []
148 newlines = []
149 for tag in k:
149 for tag in k:
150 newlines.append("%s %s\n" % (tags[tag], tag))
150 newlines.append("%s %s\n" % (tags[tag], tag))
151
151
152 newlines.sort()
152 newlines.sort()
153
153
154 if newlines != oldlines:
154 if newlines != oldlines:
155 #print "updating tags"
155 #print "updating tags"
156 f = self.repo.wfile(".hgtags", "w")
156 f = self.repo.wfile(".hgtags", "w")
157 f.write("".join(newlines))
157 f.write("".join(newlines))
158 f.close()
158 f.close()
159 if not oldlines: self.repo.add([".hgtags"])
159 if not oldlines: self.repo.add([".hgtags"])
160 date = "%s 0" % int(time.mktime(time.gmtime()))
160 date = "%s 0" % int(time.mktime(time.gmtime()))
161 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
161 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
162 date, self.repo.changelog.tip(), hg.nullid)
162 date, self.repo.changelog.tip(), hg.nullid)
163 return hg.hex(self.repo.changelog.tip())
163
164
164 class convert:
165 class convert:
165 def __init__(self, source, dest, mapfile):
166 def __init__(self, source, dest, mapfile):
166 self.source = source
167 self.source = source
167 self.dest = dest
168 self.dest = dest
168 self.mapfile = mapfile
169 self.mapfile = mapfile
169 self.commitcache = {}
170 self.commitcache = {}
170
171
171 self.map = {}
172 self.map = {}
172 for l in file(self.mapfile):
173 for l in file(self.mapfile):
173 sv, dv = l[:-1].split()
174 sv, dv = l[:-1].split()
174 self.map[sv] = dv
175 self.map[sv] = dv
175
176
176 def walktree(self, heads):
177 def walktree(self, heads):
177 visit = heads
178 visit = heads
178 known = {}
179 known = {}
179 parents = {}
180 parents = {}
180 while visit:
181 while visit:
181 n = visit.pop(0)
182 n = visit.pop(0)
182 if n in known or n in self.map: continue
183 if n in known or n in self.map: continue
183 known[n] = 1
184 known[n] = 1
184 self.commitcache[n] = self.source.getcommit(n)
185 self.commitcache[n] = self.source.getcommit(n)
185 cp = self.commitcache[n][0]
186 cp = self.commitcache[n][0]
186 for p in cp:
187 for p in cp:
187 parents.setdefault(n, []).append(p)
188 parents.setdefault(n, []).append(p)
188 visit.append(p)
189 visit.append(p)
189
190
190 return parents
191 return parents
191
192
192 def toposort(self, parents):
193 def toposort(self, parents):
193 visit = parents.keys()
194 visit = parents.keys()
194 seen = {}
195 seen = {}
195 children = {}
196 children = {}
196
197
197 while visit:
198 while visit:
198 n = visit.pop(0)
199 n = visit.pop(0)
199 if n in seen: continue
200 if n in seen: continue
200 seen[n] = 1
201 seen[n] = 1
201 pc = 0
202 pc = 0
202 if n in parents:
203 if n in parents:
203 for p in parents[n]:
204 for p in parents[n]:
204 if p not in self.map: pc += 1
205 if p not in self.map: pc += 1
205 visit.append(p)
206 visit.append(p)
206 children.setdefault(p, []).append(n)
207 children.setdefault(p, []).append(n)
207 if not pc: root = n
208 if not pc: root = n
208
209
209 s = []
210 s = []
210 removed = {}
211 removed = {}
211 visit = children.keys()
212 visit = children.keys()
212 while visit:
213 while visit:
213 n = visit.pop(0)
214 n = visit.pop(0)
214 if n in removed: continue
215 if n in removed: continue
215 dep = 0
216 dep = 0
216 if n in parents:
217 if n in parents:
217 for p in parents[n]:
218 for p in parents[n]:
218 if p in self.map: continue
219 if p in self.map: continue
219 if p not in removed:
220 if p not in removed:
220 # we're still dependent
221 # we're still dependent
221 visit.append(n)
222 visit.append(n)
222 dep = 1
223 dep = 1
223 break
224 break
224
225
225 if not dep:
226 if not dep:
226 # all n's parents are in the list
227 # all n's parents are in the list
227 removed[n] = 1
228 removed[n] = 1
228 s.append(n)
229 s.append(n)
229 if n in children:
230 if n in children:
230 for c in children[n]:
231 for c in children[n]:
231 visit.insert(0, c)
232 visit.insert(0, c)
232
233
233 return s
234 return s
234
235
235 def copy(self, rev):
236 def copy(self, rev):
236 p, a, d, t = self.commitcache[rev]
237 p, a, d, t = self.commitcache[rev]
237 files = self.source.getchanges(rev)
238 files = self.source.getchanges(rev)
238
239
239 for f,v,e in files:
240 for f,v,e in files:
240 try:
241 try:
241 data = self.source.getfile(f, v)
242 data = self.source.getfile(f, v)
242 except IOError, inst:
243 except IOError, inst:
243 self.dest.delfile(f)
244 self.dest.delfile(f)
244 else:
245 else:
245 self.dest.putfile(f, e, data)
246 self.dest.putfile(f, e, data)
246
247
247 r = [self.map[v] for v in p]
248 r = [self.map[v] for v in p]
248 f = [f for f,v,e in files]
249 f = [f for f,v,e in files]
249 self.map[rev] = self.dest.putcommit(f, r, a, d, t)
250 self.map[rev] = self.dest.putcommit(f, r, a, d, t)
250 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
251 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
251
252
252 def convert(self):
253 def convert(self):
253 heads = self.source.getheads()
254 heads = self.source.getheads()
254 parents = self.walktree(heads)
255 parents = self.walktree(heads)
255 t = self.toposort(parents)
256 t = self.toposort(parents)
256 num = len(t)
257 num = len(t)
257
258
258 for c in t:
259 for c in t:
259 num -= 1
260 num -= 1
260 if c in self.map: continue
261 if c in self.map: continue
261 desc = self.commitcache[c][3].splitlines()[0]
262 desc = self.commitcache[c][3].splitlines()[0]
262 #print num, desc
263 #print num, desc
263 self.copy(c)
264 self.copy(c)
264
265
265 tags = self.source.gettags()
266 tags = self.source.gettags()
266 ctags = {}
267 ctags = {}
267 for k in tags:
268 for k in tags:
268 v = tags[k]
269 v = tags[k]
269 if v in self.map:
270 if v in self.map:
270 ctags[k] = self.map[v]
271 ctags[k] = self.map[v]
271
272
272 self.dest.puttags(ctags)
273 if ctags:
274 nrev = self.dest.puttags(ctags)
275 # write another hash correspondence to override the previous
276 # one so we don't end up with extra tag heads
277 file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
273
278
274 gitpath, hgpath, mapfile = sys.argv[1:]
279 gitpath, hgpath, mapfile = sys.argv[1:]
275 if os.path.isdir(gitpath + "/.git"):
280 if os.path.isdir(gitpath + "/.git"):
276 gitpath += "/.git"
281 gitpath += "/.git"
277
282
278 c = convert(convert_git(gitpath), convert_mercurial(hgpath), mapfile)
283 c = convert(convert_git(gitpath), convert_mercurial(hgpath), mapfile)
279 c.convert()
284 c.convert()
General Comments 0
You need to be logged in to leave comments. Login now