##// END OF EJS Templates
convert-repo: change duplicate elimination
Matt Mackall -
r1388:5eb2d3c5 default
parent child Browse files
Show More
@@ -1,284 +1,284 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 #
2 #
3 # This is a generalized framework for converting between SCM
3 # This is a generalized framework for converting between SCM
4 # repository formats.
4 # repository formats.
5 #
5 #
6 # In its current form, it's hardcoded to convert incrementally between
6 # In its current form, it's hardcoded to convert incrementally between
7 # git and Mercurial.
7 # git and Mercurial.
8 #
8 #
9 # To use, you must first import the first git version into Mercurial,
9 # To use, you must first import the first git version into Mercurial,
10 # and establish a mapping between the git commit hash and the hash in
10 # and establish a mapping between the git commit hash and the hash in
11 # Mercurial for that version. This mapping is kept in a simple text
11 # Mercurial for that version. This mapping is kept in a simple text
12 # file with lines like so:
12 # file with lines like so:
13 #
13 #
14 # <git hash> <mercurial hash>
14 # <git hash> <mercurial hash>
15 #
15 #
16 # To convert the rest of the repo, run:
16 # To convert the rest of the repo, run:
17 #
17 #
18 # convert-repo <git-dir> <hg-dir> <mapfile>
18 # convert-repo <git-dir> <hg-dir> <mapfile>
19 #
19 #
20 # This updates the mapfile on each commit copied, so it can be
20 # This updates the mapfile on each commit copied, so it can be
21 # interrupted and can be run repeatedly to copy new commits.
21 # interrupted and can be run repeatedly to copy new commits.
22
22
23 import sys, os, zlib, sha, time
23 import sys, os, zlib, sha, time
24 from mercurial import hg, ui, util
24 from mercurial import hg, ui, util
25
25
26 class convert_git:
26 class convert_git:
27 def __init__(self, path):
27 def __init__(self, path):
28 self.path = path
28 self.path = path
29
29
30 def getheads(self):
30 def getheads(self):
31 return [file(self.path + "/HEAD").read()[:-1]]
31 return [file(self.path + "/HEAD").read()[:-1]]
32
32
33 def catfile(self, rev, type):
33 def catfile(self, rev, type):
34 if rev == "0" * 40: raise IOError()
34 if rev == "0" * 40: raise IOError()
35 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null" % (self.path, type, rev))
35 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null" % (self.path, type, rev))
36 return fh.read()
36 return fh.read()
37
37
38 def getfile(self, name, rev):
38 def getfile(self, name, rev):
39 return self.catfile(rev, "blob")
39 return self.catfile(rev, "blob")
40
40
41 def getchanges(self, version):
41 def getchanges(self, version):
42 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s" % (self.path, version))
42 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s" % (self.path, version))
43 changes = []
43 changes = []
44 for l in fh:
44 for l in fh:
45 if "\t" not in l: continue
45 if "\t" not in l: continue
46 m, f = l[:-1].split("\t")
46 m, f = l[:-1].split("\t")
47 m = m.split()
47 m = m.split()
48 h = m[3]
48 h = m[3]
49 p = (m[1] == "100755")
49 p = (m[1] == "100755")
50 changes.append((f, h, p))
50 changes.append((f, h, p))
51 return changes
51 return changes
52
52
53 def getcommit(self, version):
53 def getcommit(self, version):
54 c = self.catfile(version, "commit") # read the commit hash
54 c = self.catfile(version, "commit") # read the commit hash
55 end = c.find("\n\n")
55 end = c.find("\n\n")
56 message = c[end+2:]
56 message = c[end+2:]
57 l = c[:end].splitlines()
57 l = c[:end].splitlines()
58 manifest = l[0].split()[1]
58 manifest = l[0].split()[1]
59 parents = []
59 parents = []
60 for e in l[1:]:
60 for e in l[1:]:
61 n,v = e.split(" ", 1)
61 n,v = e.split(" ", 1)
62 if n == "author":
62 if n == "author":
63 p = v.split()
63 p = v.split()
64 tm, tz = p[-2:]
64 tm, tz = p[-2:]
65 author = " ".join(p[:-2])
65 author = " ".join(p[:-2])
66 if author[0] == "<": author = author[1:-1]
66 if author[0] == "<": author = author[1:-1]
67 if n == "committer":
67 if n == "committer":
68 p = v.split()
68 p = v.split()
69 tm, tz = p[-2:]
69 tm, tz = p[-2:]
70 committer = " ".join(p[:-2])
70 committer = " ".join(p[:-2])
71 if committer[0] == "<": committer = committer[1:-1]
71 if committer[0] == "<": committer = committer[1:-1]
72 message += "\ncommitter: %s\n" % v
72 message += "\ncommitter: %s\n" % v
73 if n == "parent": parents.append(v)
73 if n == "parent": parents.append(v)
74
74
75 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
75 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
76 tz = int(tzs) * (int(tzh) * 3600 + int(tzm))
76 tz = int(tzs) * (int(tzh) * 3600 + int(tzm))
77 date = tm + " " + str(tz)
77 date = tm + " " + str(tz)
78 return (parents, author, date, message)
78 return (parents, author, date, message)
79
79
80 def gettags(self):
80 def gettags(self):
81 tags = {}
81 tags = {}
82 for f in os.listdir(self.path + "/refs/tags"):
82 for f in os.listdir(self.path + "/refs/tags"):
83 try:
83 try:
84 h = file(self.path + "/refs/tags/" + f).read().strip()
84 h = file(self.path + "/refs/tags/" + f).read().strip()
85 c = self.catfile(h, "tag") # read the commit hash
85 c = self.catfile(h, "tag") # read the commit hash
86 h = c.splitlines()[0].split()[1]
86 h = c.splitlines()[0].split()[1]
87 tags[f] = h
87 tags[f] = h
88 except:
88 except:
89 pass
89 pass
90 return tags
90 return tags
91
91
92 class convert_mercurial:
92 class convert_mercurial:
93 def __init__(self, path):
93 def __init__(self, path):
94 self.path = path
94 self.path = path
95 u = ui.ui()
95 u = ui.ui()
96 self.repo = hg.repository(u, path)
96 self.repo = hg.repository(u, path)
97
97
98 def getheads(self):
98 def getheads(self):
99 h = self.repo.changelog.heads()
99 h = self.repo.changelog.heads()
100 return [ hg.hex(x) for x in h ]
100 return [ hg.hex(x) for x in h ]
101
101
102 def putfile(self, f, e, data):
102 def putfile(self, f, e, data):
103 self.repo.wfile(f, "w").write(data)
103 self.repo.wfile(f, "w").write(data)
104 if self.repo.dirstate.state(f) == '?':
104 if self.repo.dirstate.state(f) == '?':
105 self.repo.dirstate.update([f], "a")
105 self.repo.dirstate.update([f], "a")
106
106
107 util.set_exec(self.repo.wjoin(f), e)
107 util.set_exec(self.repo.wjoin(f), e)
108
108
109 def delfile(self, f):
109 def delfile(self, f):
110 try:
110 try:
111 os.unlink(self.repo.wjoin(f))
111 os.unlink(self.repo.wjoin(f))
112 #self.repo.remove([f])
112 #self.repo.remove([f])
113 except:
113 except:
114 pass
114 pass
115
115
116 def putcommit(self, files, parents, author, dest, text):
116 def putcommit(self, files, parents, author, dest, text):
117 seen = {}
117 seen = {}
118 pl = []
118 pl = []
119 for p in parents:
119 for p in parents:
120 if p not in seen:
120 if p not in seen:
121 pl.append(p)
121 pl.append(p)
122 seen[p] = 1
122 seen[p] = 1
123 parents = pl
123 parents = pl
124
124
125 if len(parents) < 2: parents.append("0" * 40)
125 if len(parents) < 2: parents.append("0" * 40)
126 if len(parents) < 2: parents.append("0" * 40)
126 if len(parents) < 2: parents.append("0" * 40)
127 p2 = parents.pop(0)
127 p2 = parents.pop(0)
128
128
129 while parents:
129 while parents:
130 p1 = p2
130 p1 = p2
131 p2 = parents.pop(0)
131 p2 = parents.pop(0)
132 self.repo.rawcommit(files, text, author, dest,
132 self.repo.rawcommit(files, text, author, dest,
133 hg.bin(p1), hg.bin(p2))
133 hg.bin(p1), hg.bin(p2))
134 text = "(octopus merge fixup)\n"
134 text = "(octopus merge fixup)\n"
135
135
136 return hg.hex(self.repo.changelog.tip())
136 return hg.hex(self.repo.changelog.tip())
137
137
138 def puttags(self, tags):
138 def puttags(self, tags):
139 try:
139 try:
140 old = self.repo.wfile(".hgtags").read()
140 old = self.repo.wfile(".hgtags").read()
141 oldlines = old.splitlines(1)
141 oldlines = old.splitlines(1)
142 oldlines.sort()
142 oldlines.sort()
143 except:
143 except:
144 oldlines = []
144 oldlines = []
145
145
146 k = tags.keys()
146 k = tags.keys()
147 k.sort()
147 k.sort()
148 newlines = []
148 newlines = []
149 for tag in k:
149 for tag in k:
150 newlines.append("%s %s\n" % (tags[tag], tag))
150 newlines.append("%s %s\n" % (tags[tag], tag))
151
151
152 newlines.sort()
152 newlines.sort()
153
153
154 if newlines != oldlines:
154 if newlines != oldlines:
155 #print "updating tags"
155 #print "updating tags"
156 f = self.repo.wfile(".hgtags", "w")
156 f = self.repo.wfile(".hgtags", "w")
157 f.write("".join(newlines))
157 f.write("".join(newlines))
158 f.close()
158 f.close()
159 if not oldlines: self.repo.add([".hgtags"])
159 if not oldlines: self.repo.add([".hgtags"])
160 date = "%s 0" % int(time.mktime(time.gmtime()))
160 date = "%s 0" % int(time.mktime(time.gmtime()))
161 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
161 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
162 date, self.repo.changelog.tip(), hg.nullid)
162 date, self.repo.changelog.tip(), hg.nullid)
163 return hg.hex(self.repo.changelog.tip())
163 return hg.hex(self.repo.changelog.tip())
164
164
165 class convert:
165 class convert:
166 def __init__(self, source, dest, mapfile):
166 def __init__(self, source, dest, mapfile):
167 self.source = source
167 self.source = source
168 self.dest = dest
168 self.dest = dest
169 self.mapfile = mapfile
169 self.mapfile = mapfile
170 self.commitcache = {}
170 self.commitcache = {}
171
171
172 self.map = {}
172 self.map = {}
173 for l in file(self.mapfile):
173 for l in file(self.mapfile):
174 sv, dv = l[:-1].split()
174 sv, dv = l[:-1].split()
175 self.map[sv] = dv
175 self.map[sv] = dv
176
176
177 def walktree(self, heads):
177 def walktree(self, heads):
178 visit = heads
178 visit = heads
179 known = {}
179 known = {}
180 parents = {}
180 parents = {}
181 while visit:
181 while visit:
182 n = visit.pop(0)
182 n = visit.pop(0)
183 if n in known or n in self.map: continue
183 if n in known or n in self.map: continue
184 known[n] = 1
184 known[n] = 1
185 self.commitcache[n] = self.source.getcommit(n)
185 self.commitcache[n] = self.source.getcommit(n)
186 cp = self.commitcache[n][0]
186 cp = self.commitcache[n][0]
187 for p in cp:
187 for p in cp:
188 parents.setdefault(n, []).append(p)
188 parents.setdefault(n, []).append(p)
189 visit.append(p)
189 visit.append(p)
190
190
191 return parents
191 return parents
192
192
193 def toposort(self, parents):
193 def toposort(self, parents):
194 visit = parents.keys()
194 visit = parents.keys()
195 seen = {}
195 seen = {}
196 children = {}
196 children = {}
197
197
198 while visit:
198 while visit:
199 n = visit.pop(0)
199 n = visit.pop(0)
200 if n in seen: continue
200 if n in seen: continue
201 seen[n] = 1
201 seen[n] = 1
202 pc = 0
202 pc = 0
203 if n in parents:
203 if n in parents:
204 for p in parents[n]:
204 for p in parents[n]:
205 if p not in self.map: pc += 1
205 if p not in self.map: pc += 1
206 visit.append(p)
206 visit.append(p)
207 children.setdefault(p, []).append(n)
207 children.setdefault(p, []).append(n)
208 if not pc: root = n
208 if not pc: root = n
209
209
210 s = []
210 s = []
211 removed = {}
211 removed = {}
212 visit = children.keys()
212 visit = children.keys()
213 while visit:
213 while visit:
214 n = visit.pop(0)
214 n = visit.pop(0)
215 if n in removed: continue
215 if n in removed: continue
216 dep = 0
216 dep = 0
217 if n in parents:
217 if n in parents:
218 for p in parents[n]:
218 for p in parents[n]:
219 if p in self.map: continue
219 if p in self.map: continue
220 if p not in removed:
220 if p not in removed:
221 # we're still dependent
221 # we're still dependent
222 visit.append(n)
222 visit.append(n)
223 dep = 1
223 dep = 1
224 break
224 break
225
225
226 if not dep:
226 if not dep:
227 # all n's parents are in the list
227 # all n's parents are in the list
228 removed[n] = 1
228 removed[n] = 1
229 s.append(n)
229 s.append(n)
230 if n in children:
230 if n in children:
231 for c in children[n]:
231 for c in children[n]:
232 visit.insert(0, c)
232 visit.insert(0, c)
233
233
234 return s
234 return s
235
235
236 def copy(self, rev):
236 def copy(self, rev):
237 p, a, d, t = self.commitcache[rev]
237 p, a, d, t = self.commitcache[rev]
238 files = self.source.getchanges(rev)
238 files = self.source.getchanges(rev)
239
239
240 for f,v,e in files:
240 for f,v,e in files:
241 try:
241 try:
242 data = self.source.getfile(f, v)
242 data = self.source.getfile(f, v)
243 except IOError, inst:
243 except IOError, inst:
244 self.dest.delfile(f)
244 self.dest.delfile(f)
245 else:
245 else:
246 self.dest.putfile(f, e, data)
246 self.dest.putfile(f, e, data)
247
247
248 r = [self.map[v] for v in p]
248 r = [self.map[v] for v in p]
249 f = [f for f,v,e in files]
249 f = [f for f,v,e in files]
250 self.map[rev] = self.dest.putcommit(f, r, a, d, t)
250 self.map[rev] = self.dest.putcommit(f, r, a, d, t)
251 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
251 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
252
252
253 def convert(self):
253 def convert(self):
254 heads = self.source.getheads()
254 heads = self.source.getheads()
255 parents = self.walktree(heads)
255 parents = self.walktree(heads)
256 t = self.toposort(parents)
256 t = self.toposort(parents)
257 t = [n for n in t if n not in self.map]
257 num = len(t)
258 num = len(t)
258
259
259 for c in t:
260 for c in t:
260 num -= 1
261 num -= 1
261 if c in self.map: continue
262 desc = self.commitcache[c][3].splitlines()[0]
262 desc = self.commitcache[c][3].splitlines()[0]
263 #print num, desc
263 #print num, desc
264 self.copy(c)
264 self.copy(c)
265
265
266 tags = self.source.gettags()
266 tags = self.source.gettags()
267 ctags = {}
267 ctags = {}
268 for k in tags:
268 for k in tags:
269 v = tags[k]
269 v = tags[k]
270 if v in self.map:
270 if v in self.map:
271 ctags[k] = self.map[v]
271 ctags[k] = self.map[v]
272
272
273 if ctags:
273 if ctags:
274 nrev = self.dest.puttags(ctags)
274 nrev = self.dest.puttags(ctags)
275 # write another hash correspondence to override the previous
275 # write another hash correspondence to override the previous
276 # one so we don't end up with extra tag heads
276 # one so we don't end up with extra tag heads
277 file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
277 file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
278
278
279 gitpath, hgpath, mapfile = sys.argv[1:]
279 gitpath, hgpath, mapfile = sys.argv[1:]
280 if os.path.isdir(gitpath + "/.git"):
280 if os.path.isdir(gitpath + "/.git"):
281 gitpath += "/.git"
281 gitpath += "/.git"
282
282
283 c = convert(convert_git(gitpath), convert_mercurial(hgpath), mapfile)
283 c = convert(convert_git(gitpath), convert_mercurial(hgpath), mapfile)
284 c.convert()
284 c.convert()
General Comments 0
You need to be logged in to leave comments. Login now