##// END OF EJS Templates
convert-repo: change duplicate elimination
Matt Mackall -
r1388:5eb2d3c5 default
parent child Browse files
Show More
@@ -1,284 +1,284 b''
1 1 #!/usr/bin/env python
2 2 #
3 3 # This is a generalized framework for converting between SCM
4 4 # repository formats.
5 5 #
6 6 # In its current form, it's hardcoded to convert incrementally between
7 7 # git and Mercurial.
8 8 #
9 9 # To use, you must first import the first git version into Mercurial,
10 10 # and establish a mapping between the git commit hash and the hash in
11 11 # Mercurial for that version. This mapping is kept in a simple text
12 12 # file with lines like so:
13 13 #
14 14 # <git hash> <mercurial hash>
15 15 #
16 16 # To convert the rest of the repo, run:
17 17 #
18 18 # convert-repo <git-dir> <hg-dir> <mapfile>
19 19 #
20 20 # This updates the mapfile on each commit copied, so it can be
21 21 # interrupted and can be run repeatedly to copy new commits.
22 22
23 23 import sys, os, zlib, sha, time
24 24 from mercurial import hg, ui, util
25 25
26 26 class convert_git:
27 27 def __init__(self, path):
28 28 self.path = path
29 29
30 30 def getheads(self):
31 31 return [file(self.path + "/HEAD").read()[:-1]]
32 32
33 33 def catfile(self, rev, type):
34 34 if rev == "0" * 40: raise IOError()
35 35 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null" % (self.path, type, rev))
36 36 return fh.read()
37 37
38 38 def getfile(self, name, rev):
39 39 return self.catfile(rev, "blob")
40 40
41 41 def getchanges(self, version):
42 42 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s" % (self.path, version))
43 43 changes = []
44 44 for l in fh:
45 45 if "\t" not in l: continue
46 46 m, f = l[:-1].split("\t")
47 47 m = m.split()
48 48 h = m[3]
49 49 p = (m[1] == "100755")
50 50 changes.append((f, h, p))
51 51 return changes
52 52
53 53 def getcommit(self, version):
54 54 c = self.catfile(version, "commit") # read the commit hash
55 55 end = c.find("\n\n")
56 56 message = c[end+2:]
57 57 l = c[:end].splitlines()
58 58 manifest = l[0].split()[1]
59 59 parents = []
60 60 for e in l[1:]:
61 61 n,v = e.split(" ", 1)
62 62 if n == "author":
63 63 p = v.split()
64 64 tm, tz = p[-2:]
65 65 author = " ".join(p[:-2])
66 66 if author[0] == "<": author = author[1:-1]
67 67 if n == "committer":
68 68 p = v.split()
69 69 tm, tz = p[-2:]
70 70 committer = " ".join(p[:-2])
71 71 if committer[0] == "<": committer = committer[1:-1]
72 72 message += "\ncommitter: %s\n" % v
73 73 if n == "parent": parents.append(v)
74 74
75 75 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
76 76 tz = int(tzs) * (int(tzh) * 3600 + int(tzm))
77 77 date = tm + " " + str(tz)
78 78 return (parents, author, date, message)
79 79
80 80 def gettags(self):
81 81 tags = {}
82 82 for f in os.listdir(self.path + "/refs/tags"):
83 83 try:
84 84 h = file(self.path + "/refs/tags/" + f).read().strip()
85 85 c = self.catfile(h, "tag") # read the commit hash
86 86 h = c.splitlines()[0].split()[1]
87 87 tags[f] = h
88 88 except:
89 89 pass
90 90 return tags
91 91
92 92 class convert_mercurial:
93 93 def __init__(self, path):
94 94 self.path = path
95 95 u = ui.ui()
96 96 self.repo = hg.repository(u, path)
97 97
98 98 def getheads(self):
99 99 h = self.repo.changelog.heads()
100 100 return [ hg.hex(x) for x in h ]
101 101
102 102 def putfile(self, f, e, data):
103 103 self.repo.wfile(f, "w").write(data)
104 104 if self.repo.dirstate.state(f) == '?':
105 105 self.repo.dirstate.update([f], "a")
106 106
107 107 util.set_exec(self.repo.wjoin(f), e)
108 108
109 109 def delfile(self, f):
110 110 try:
111 111 os.unlink(self.repo.wjoin(f))
112 112 #self.repo.remove([f])
113 113 except:
114 114 pass
115 115
116 116 def putcommit(self, files, parents, author, dest, text):
117 117 seen = {}
118 118 pl = []
119 119 for p in parents:
120 120 if p not in seen:
121 121 pl.append(p)
122 122 seen[p] = 1
123 123 parents = pl
124 124
125 125 if len(parents) < 2: parents.append("0" * 40)
126 126 if len(parents) < 2: parents.append("0" * 40)
127 127 p2 = parents.pop(0)
128 128
129 129 while parents:
130 130 p1 = p2
131 131 p2 = parents.pop(0)
132 132 self.repo.rawcommit(files, text, author, dest,
133 133 hg.bin(p1), hg.bin(p2))
134 134 text = "(octopus merge fixup)\n"
135 135
136 136 return hg.hex(self.repo.changelog.tip())
137 137
138 138 def puttags(self, tags):
139 139 try:
140 140 old = self.repo.wfile(".hgtags").read()
141 141 oldlines = old.splitlines(1)
142 142 oldlines.sort()
143 143 except:
144 144 oldlines = []
145 145
146 146 k = tags.keys()
147 147 k.sort()
148 148 newlines = []
149 149 for tag in k:
150 150 newlines.append("%s %s\n" % (tags[tag], tag))
151 151
152 152 newlines.sort()
153 153
154 154 if newlines != oldlines:
155 155 #print "updating tags"
156 156 f = self.repo.wfile(".hgtags", "w")
157 157 f.write("".join(newlines))
158 158 f.close()
159 159 if not oldlines: self.repo.add([".hgtags"])
160 160 date = "%s 0" % int(time.mktime(time.gmtime()))
161 161 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
162 162 date, self.repo.changelog.tip(), hg.nullid)
163 163 return hg.hex(self.repo.changelog.tip())
164 164
165 165 class convert:
166 166 def __init__(self, source, dest, mapfile):
167 167 self.source = source
168 168 self.dest = dest
169 169 self.mapfile = mapfile
170 170 self.commitcache = {}
171 171
172 172 self.map = {}
173 173 for l in file(self.mapfile):
174 174 sv, dv = l[:-1].split()
175 175 self.map[sv] = dv
176 176
177 177 def walktree(self, heads):
178 178 visit = heads
179 179 known = {}
180 180 parents = {}
181 181 while visit:
182 182 n = visit.pop(0)
183 183 if n in known or n in self.map: continue
184 184 known[n] = 1
185 185 self.commitcache[n] = self.source.getcommit(n)
186 186 cp = self.commitcache[n][0]
187 187 for p in cp:
188 188 parents.setdefault(n, []).append(p)
189 189 visit.append(p)
190 190
191 191 return parents
192 192
193 193 def toposort(self, parents):
194 194 visit = parents.keys()
195 195 seen = {}
196 196 children = {}
197 197
198 198 while visit:
199 199 n = visit.pop(0)
200 200 if n in seen: continue
201 201 seen[n] = 1
202 202 pc = 0
203 203 if n in parents:
204 204 for p in parents[n]:
205 205 if p not in self.map: pc += 1
206 206 visit.append(p)
207 207 children.setdefault(p, []).append(n)
208 208 if not pc: root = n
209 209
210 210 s = []
211 211 removed = {}
212 212 visit = children.keys()
213 213 while visit:
214 214 n = visit.pop(0)
215 215 if n in removed: continue
216 216 dep = 0
217 217 if n in parents:
218 218 for p in parents[n]:
219 219 if p in self.map: continue
220 220 if p not in removed:
221 221 # we're still dependent
222 222 visit.append(n)
223 223 dep = 1
224 224 break
225 225
226 226 if not dep:
227 227 # all n's parents are in the list
228 228 removed[n] = 1
229 229 s.append(n)
230 230 if n in children:
231 231 for c in children[n]:
232 232 visit.insert(0, c)
233 233
234 234 return s
235 235
236 236 def copy(self, rev):
237 237 p, a, d, t = self.commitcache[rev]
238 238 files = self.source.getchanges(rev)
239 239
240 240 for f,v,e in files:
241 241 try:
242 242 data = self.source.getfile(f, v)
243 243 except IOError, inst:
244 244 self.dest.delfile(f)
245 245 else:
246 246 self.dest.putfile(f, e, data)
247 247
248 248 r = [self.map[v] for v in p]
249 249 f = [f for f,v,e in files]
250 250 self.map[rev] = self.dest.putcommit(f, r, a, d, t)
251 251 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
252 252
253 253 def convert(self):
254 254 heads = self.source.getheads()
255 255 parents = self.walktree(heads)
256 256 t = self.toposort(parents)
257 t = [n for n in t if n not in self.map]
257 258 num = len(t)
258 259
259 260 for c in t:
260 261 num -= 1
261 if c in self.map: continue
262 262 desc = self.commitcache[c][3].splitlines()[0]
263 263 #print num, desc
264 264 self.copy(c)
265 265
266 266 tags = self.source.gettags()
267 267 ctags = {}
268 268 for k in tags:
269 269 v = tags[k]
270 270 if v in self.map:
271 271 ctags[k] = self.map[v]
272 272
273 273 if ctags:
274 274 nrev = self.dest.puttags(ctags)
275 275 # write another hash correspondence to override the previous
276 276 # one so we don't end up with extra tag heads
277 277 file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
278 278
279 279 gitpath, hgpath, mapfile = sys.argv[1:]
280 280 if os.path.isdir(gitpath + "/.git"):
281 281 gitpath += "/.git"
282 282
283 283 c = convert(convert_git(gitpath), convert_mercurial(hgpath), mapfile)
284 284 c.convert()
General Comments 0
You need to be logged in to leave comments. Login now