##// END OF EJS Templates
convert-repo: fix recoding of committer
Matt Mackall -
r3910:4bc5a240 default
parent child Browse files
Show More
@@ -1,305 +1,305 b''
1 1 #!/usr/bin/env python
2 2 #
3 3 # This is a generalized framework for converting between SCM
4 4 # repository formats.
5 5 #
6 6 # In its current form, it's hardcoded to convert incrementally between
7 7 # git and Mercurial.
8 8 #
9 9 # To use, you must first import the first git version into Mercurial,
10 10 # and establish a mapping between the git commit hash and the hash in
11 11 # Mercurial for that version. This mapping is kept in a simple text
12 12 # file with lines like so:
13 13 #
14 14 # <git hash> <mercurial hash>
15 15 #
16 16 # To convert the rest of the repo, run:
17 17 #
18 18 # convert-repo <git-dir> <hg-dir> <mapfile>
19 19 #
20 20 # This updates the mapfile on each commit copied, so it can be
21 21 # interrupted and can be run repeatedly to copy new commits.
22 22
23 23 import sys, os, zlib, sha, time
24 24
25 25 os.environ["HGENCODING"] = "utf-8"
26 26
27 27 from mercurial import hg, ui, util
28 28
29 29 def recode(s):
30 30 try:
31 31 return s.decode("utf-8").encode("utf-8")
32 32 except:
33 33 try:
34 34 return s.decode("latin-1").encode("utf-8")
35 35 except:
36 36 return s.decode("utf-8", "replace").encode("utf-8")
37 37
38 38 class convert_git:
39 39 def __init__(self, path):
40 40 self.path = path
41 41
42 42 def getheads(self):
43 43 fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
44 44 return [fh.read()[:-1]]
45 45
46 46 def catfile(self, rev, type):
47 47 if rev == "0" * 40: raise IOError()
48 48 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null" % (self.path, type, rev))
49 49 return fh.read()
50 50
51 51 def getfile(self, name, rev):
52 52 return self.catfile(rev, "blob")
53 53
54 54 def getchanges(self, version):
55 55 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s" % (self.path, version))
56 56 changes = []
57 57 for l in fh:
58 58 if "\t" not in l: continue
59 59 m, f = l[:-1].split("\t")
60 60 m = m.split()
61 61 h = m[3]
62 62 p = (m[1] == "100755")
63 63 changes.append((f, h, p))
64 64 return changes
65 65
66 66 def getcommit(self, version):
67 67 c = self.catfile(version, "commit") # read the commit hash
68 68 end = c.find("\n\n")
69 69 message = c[end+2:]
70 70 message = recode(message)
71 71 l = c[:end].splitlines()
72 72 manifest = l[0].split()[1]
73 73 parents = []
74 74 for e in l[1:]:
75 75 n,v = e.split(" ", 1)
76 76 if n == "author":
77 77 p = v.split()
78 78 tm, tz = p[-2:]
79 79 author = " ".join(p[:-2])
80 80 if author[0] == "<": author = author[1:-1]
81 81 author = recode(author)
82 82 if n == "committer":
83 83 p = v.split()
84 84 tm, tz = p[-2:]
85 85 committer = " ".join(p[:-2])
86 86 if committer[0] == "<": committer = committer[1:-1]
87 87 committer = recode(committer)
88 message += "\ncommitter: %s\n" % v
88 message += "\ncommitter: %s\n" % committer
89 89 if n == "parent": parents.append(v)
90 90
91 91 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
92 92 tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
93 93 date = tm + " " + str(tz)
94 94 return (parents, author, date, message)
95 95
96 96 def gettags(self):
97 97 tags = {}
98 98 for f in os.listdir(self.path + "/refs/tags"):
99 99 try:
100 100 h = file(self.path + "/refs/tags/" + f).read().strip()
101 101 c = self.catfile(h, "tag") # read the commit hash
102 102 h = c.splitlines()[0].split()[1]
103 103 tags[f] = h
104 104 except:
105 105 pass
106 106 return tags
107 107
108 108 class convert_mercurial:
109 109 def __init__(self, path):
110 110 self.path = path
111 111 u = ui.ui()
112 112 self.repo = hg.repository(u, path)
113 113
114 114 def getheads(self):
115 115 h = self.repo.changelog.heads()
116 116 return [ hg.hex(x) for x in h ]
117 117
118 118 def putfile(self, f, e, data):
119 119 self.repo.wfile(f, "w").write(data)
120 120 if self.repo.dirstate.state(f) == '?':
121 121 self.repo.dirstate.update([f], "a")
122 122
123 123 util.set_exec(self.repo.wjoin(f), e)
124 124
125 125 def delfile(self, f):
126 126 try:
127 127 os.unlink(self.repo.wjoin(f))
128 128 #self.repo.remove([f])
129 129 except:
130 130 pass
131 131
132 132 def putcommit(self, files, parents, author, dest, text):
133 133 seen = {}
134 134 pl = []
135 135 for p in parents:
136 136 if p not in seen:
137 137 pl.append(p)
138 138 seen[p] = 1
139 139 parents = pl
140 140
141 141 if len(parents) < 2: parents.append("0" * 40)
142 142 if len(parents) < 2: parents.append("0" * 40)
143 143 p2 = parents.pop(0)
144 144
145 145 while parents:
146 146 p1 = p2
147 147 p2 = parents.pop(0)
148 148 self.repo.rawcommit(files, text, author, dest,
149 149 hg.bin(p1), hg.bin(p2))
150 150 text = "(octopus merge fixup)\n"
151 151 p2 = hg.hex(self.repo.changelog.tip())
152 152
153 153 return p2
154 154
155 155 def puttags(self, tags):
156 156 try:
157 157 old = self.repo.wfile(".hgtags").read()
158 158 oldlines = old.splitlines(1)
159 159 oldlines.sort()
160 160 except:
161 161 oldlines = []
162 162
163 163 k = tags.keys()
164 164 k.sort()
165 165 newlines = []
166 166 for tag in k:
167 167 newlines.append("%s %s\n" % (tags[tag], tag))
168 168
169 169 newlines.sort()
170 170
171 171 if newlines != oldlines:
172 172 #print "updating tags"
173 173 f = self.repo.wfile(".hgtags", "w")
174 174 f.write("".join(newlines))
175 175 f.close()
176 176 if not oldlines: self.repo.add([".hgtags"])
177 177 date = "%s 0" % int(time.mktime(time.gmtime()))
178 178 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
179 179 date, self.repo.changelog.tip(), hg.nullid)
180 180 return hg.hex(self.repo.changelog.tip())
181 181
182 182 class convert:
183 183 def __init__(self, source, dest, mapfile):
184 184 self.source = source
185 185 self.dest = dest
186 186 self.mapfile = mapfile
187 187 self.commitcache = {}
188 188
189 189 self.map = {}
190 190 try:
191 191 for l in file(self.mapfile):
192 192 sv, dv = l[:-1].split()
193 193 self.map[sv] = dv
194 194 except IOError:
195 195 pass
196 196
197 197 def walktree(self, heads):
198 198 visit = heads
199 199 known = {}
200 200 parents = {}
201 201 while visit:
202 202 n = visit.pop(0)
203 203 if n in known or n in self.map: continue
204 204 known[n] = 1
205 205 self.commitcache[n] = self.source.getcommit(n)
206 206 cp = self.commitcache[n][0]
207 207 for p in cp:
208 208 parents.setdefault(n, []).append(p)
209 209 visit.append(p)
210 210
211 211 return parents
212 212
213 213 def toposort(self, parents):
214 214 visit = parents.keys()
215 215 seen = {}
216 216 children = {}
217 217
218 218 while visit:
219 219 n = visit.pop(0)
220 220 if n in seen: continue
221 221 seen[n] = 1
222 222 pc = 0
223 223 if n in parents:
224 224 for p in parents[n]:
225 225 if p not in self.map: pc += 1
226 226 visit.append(p)
227 227 children.setdefault(p, []).append(n)
228 228 if not pc: root = n
229 229
230 230 s = []
231 231 removed = {}
232 232 visit = children.keys()
233 233 while visit:
234 234 n = visit.pop(0)
235 235 if n in removed: continue
236 236 dep = 0
237 237 if n in parents:
238 238 for p in parents[n]:
239 239 if p in self.map: continue
240 240 if p not in removed:
241 241 # we're still dependent
242 242 visit.append(n)
243 243 dep = 1
244 244 break
245 245
246 246 if not dep:
247 247 # all n's parents are in the list
248 248 removed[n] = 1
249 249 s.append(n)
250 250 if n in children:
251 251 for c in children[n]:
252 252 visit.insert(0, c)
253 253
254 254 return s
255 255
256 256 def copy(self, rev):
257 257 p, a, d, t = self.commitcache[rev]
258 258 files = self.source.getchanges(rev)
259 259
260 260 for f,v,e in files:
261 261 try:
262 262 data = self.source.getfile(f, v)
263 263 except IOError, inst:
264 264 self.dest.delfile(f)
265 265 else:
266 266 self.dest.putfile(f, e, data)
267 267
268 268 r = [self.map[v] for v in p]
269 269 f = [f for f,v,e in files]
270 270 self.map[rev] = self.dest.putcommit(f, r, a, d, t)
271 271 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
272 272
273 273 def convert(self):
274 274 heads = self.source.getheads()
275 275 parents = self.walktree(heads)
276 276 t = self.toposort(parents)
277 277 t = [n for n in t if n not in self.map]
278 278 num = len(t)
279 279 c = None
280 280
281 281 for c in t:
282 282 num -= 1
283 283 desc = self.commitcache[c][3].splitlines()[0]
284 284 #print num, desc
285 285 self.copy(c)
286 286
287 287 tags = self.source.gettags()
288 288 ctags = {}
289 289 for k in tags:
290 290 v = tags[k]
291 291 if v in self.map:
292 292 ctags[k] = self.map[v]
293 293
294 294 if c and ctags:
295 295 nrev = self.dest.puttags(ctags)
296 296 # write another hash correspondence to override the previous
297 297 # one so we don't end up with extra tag heads
298 298 file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
299 299
300 300 gitpath, hgpath, mapfile = sys.argv[1:]
301 301 if os.path.isdir(gitpath + "/.git"):
302 302 gitpath += "/.git"
303 303
304 304 c = convert(convert_git(gitpath), convert_mercurial(hgpath), mapfile)
305 305 c.convert()
General Comments 0
You need to be logged in to leave comments. Login now