##// END OF EJS Templates
convert-repo: fix recoding of committer
Matt Mackall -
r3910:4bc5a240 default
parent child Browse files
Show More
@@ -1,305 +1,305 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 #
2 #
3 # This is a generalized framework for converting between SCM
3 # This is a generalized framework for converting between SCM
4 # repository formats.
4 # repository formats.
5 #
5 #
6 # In its current form, it's hardcoded to convert incrementally between
6 # In its current form, it's hardcoded to convert incrementally between
7 # git and Mercurial.
7 # git and Mercurial.
8 #
8 #
9 # To use, you must first import the first git version into Mercurial,
9 # To use, you must first import the first git version into Mercurial,
10 # and establish a mapping between the git commit hash and the hash in
10 # and establish a mapping between the git commit hash and the hash in
11 # Mercurial for that version. This mapping is kept in a simple text
11 # Mercurial for that version. This mapping is kept in a simple text
12 # file with lines like so:
12 # file with lines like so:
13 #
13 #
14 # <git hash> <mercurial hash>
14 # <git hash> <mercurial hash>
15 #
15 #
16 # To convert the rest of the repo, run:
16 # To convert the rest of the repo, run:
17 #
17 #
18 # convert-repo <git-dir> <hg-dir> <mapfile>
18 # convert-repo <git-dir> <hg-dir> <mapfile>
19 #
19 #
20 # This updates the mapfile on each commit copied, so it can be
20 # This updates the mapfile on each commit copied, so it can be
21 # interrupted and can be run repeatedly to copy new commits.
21 # interrupted and can be run repeatedly to copy new commits.
22
22
23 import sys, os, zlib, sha, time
23 import sys, os, zlib, sha, time
24
24
25 os.environ["HGENCODING"] = "utf-8"
25 os.environ["HGENCODING"] = "utf-8"
26
26
27 from mercurial import hg, ui, util
27 from mercurial import hg, ui, util
28
28
29 def recode(s):
29 def recode(s):
30 try:
30 try:
31 return s.decode("utf-8").encode("utf-8")
31 return s.decode("utf-8").encode("utf-8")
32 except:
32 except:
33 try:
33 try:
34 return s.decode("latin-1").encode("utf-8")
34 return s.decode("latin-1").encode("utf-8")
35 except:
35 except:
36 return s.decode("utf-8", "replace").encode("utf-8")
36 return s.decode("utf-8", "replace").encode("utf-8")
37
37
38 class convert_git:
38 class convert_git:
39 def __init__(self, path):
39 def __init__(self, path):
40 self.path = path
40 self.path = path
41
41
42 def getheads(self):
42 def getheads(self):
43 fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
43 fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
44 return [fh.read()[:-1]]
44 return [fh.read()[:-1]]
45
45
46 def catfile(self, rev, type):
46 def catfile(self, rev, type):
47 if rev == "0" * 40: raise IOError()
47 if rev == "0" * 40: raise IOError()
48 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null" % (self.path, type, rev))
48 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null" % (self.path, type, rev))
49 return fh.read()
49 return fh.read()
50
50
51 def getfile(self, name, rev):
51 def getfile(self, name, rev):
52 return self.catfile(rev, "blob")
52 return self.catfile(rev, "blob")
53
53
54 def getchanges(self, version):
54 def getchanges(self, version):
55 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s" % (self.path, version))
55 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s" % (self.path, version))
56 changes = []
56 changes = []
57 for l in fh:
57 for l in fh:
58 if "\t" not in l: continue
58 if "\t" not in l: continue
59 m, f = l[:-1].split("\t")
59 m, f = l[:-1].split("\t")
60 m = m.split()
60 m = m.split()
61 h = m[3]
61 h = m[3]
62 p = (m[1] == "100755")
62 p = (m[1] == "100755")
63 changes.append((f, h, p))
63 changes.append((f, h, p))
64 return changes
64 return changes
65
65
66 def getcommit(self, version):
66 def getcommit(self, version):
67 c = self.catfile(version, "commit") # read the commit hash
67 c = self.catfile(version, "commit") # read the commit hash
68 end = c.find("\n\n")
68 end = c.find("\n\n")
69 message = c[end+2:]
69 message = c[end+2:]
70 message = recode(message)
70 message = recode(message)
71 l = c[:end].splitlines()
71 l = c[:end].splitlines()
72 manifest = l[0].split()[1]
72 manifest = l[0].split()[1]
73 parents = []
73 parents = []
74 for e in l[1:]:
74 for e in l[1:]:
75 n,v = e.split(" ", 1)
75 n,v = e.split(" ", 1)
76 if n == "author":
76 if n == "author":
77 p = v.split()
77 p = v.split()
78 tm, tz = p[-2:]
78 tm, tz = p[-2:]
79 author = " ".join(p[:-2])
79 author = " ".join(p[:-2])
80 if author[0] == "<": author = author[1:-1]
80 if author[0] == "<": author = author[1:-1]
81 author = recode(author)
81 author = recode(author)
82 if n == "committer":
82 if n == "committer":
83 p = v.split()
83 p = v.split()
84 tm, tz = p[-2:]
84 tm, tz = p[-2:]
85 committer = " ".join(p[:-2])
85 committer = " ".join(p[:-2])
86 if committer[0] == "<": committer = committer[1:-1]
86 if committer[0] == "<": committer = committer[1:-1]
87 committer = recode(committer)
87 committer = recode(committer)
88 message += "\ncommitter: %s\n" % v
88 message += "\ncommitter: %s\n" % committer
89 if n == "parent": parents.append(v)
89 if n == "parent": parents.append(v)
90
90
91 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
91 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
92 tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
92 tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
93 date = tm + " " + str(tz)
93 date = tm + " " + str(tz)
94 return (parents, author, date, message)
94 return (parents, author, date, message)
95
95
96 def gettags(self):
96 def gettags(self):
97 tags = {}
97 tags = {}
98 for f in os.listdir(self.path + "/refs/tags"):
98 for f in os.listdir(self.path + "/refs/tags"):
99 try:
99 try:
100 h = file(self.path + "/refs/tags/" + f).read().strip()
100 h = file(self.path + "/refs/tags/" + f).read().strip()
101 c = self.catfile(h, "tag") # read the commit hash
101 c = self.catfile(h, "tag") # read the commit hash
102 h = c.splitlines()[0].split()[1]
102 h = c.splitlines()[0].split()[1]
103 tags[f] = h
103 tags[f] = h
104 except:
104 except:
105 pass
105 pass
106 return tags
106 return tags
107
107
108 class convert_mercurial:
108 class convert_mercurial:
109 def __init__(self, path):
109 def __init__(self, path):
110 self.path = path
110 self.path = path
111 u = ui.ui()
111 u = ui.ui()
112 self.repo = hg.repository(u, path)
112 self.repo = hg.repository(u, path)
113
113
114 def getheads(self):
114 def getheads(self):
115 h = self.repo.changelog.heads()
115 h = self.repo.changelog.heads()
116 return [ hg.hex(x) for x in h ]
116 return [ hg.hex(x) for x in h ]
117
117
118 def putfile(self, f, e, data):
118 def putfile(self, f, e, data):
119 self.repo.wfile(f, "w").write(data)
119 self.repo.wfile(f, "w").write(data)
120 if self.repo.dirstate.state(f) == '?':
120 if self.repo.dirstate.state(f) == '?':
121 self.repo.dirstate.update([f], "a")
121 self.repo.dirstate.update([f], "a")
122
122
123 util.set_exec(self.repo.wjoin(f), e)
123 util.set_exec(self.repo.wjoin(f), e)
124
124
125 def delfile(self, f):
125 def delfile(self, f):
126 try:
126 try:
127 os.unlink(self.repo.wjoin(f))
127 os.unlink(self.repo.wjoin(f))
128 #self.repo.remove([f])
128 #self.repo.remove([f])
129 except:
129 except:
130 pass
130 pass
131
131
132 def putcommit(self, files, parents, author, dest, text):
132 def putcommit(self, files, parents, author, dest, text):
133 seen = {}
133 seen = {}
134 pl = []
134 pl = []
135 for p in parents:
135 for p in parents:
136 if p not in seen:
136 if p not in seen:
137 pl.append(p)
137 pl.append(p)
138 seen[p] = 1
138 seen[p] = 1
139 parents = pl
139 parents = pl
140
140
141 if len(parents) < 2: parents.append("0" * 40)
141 if len(parents) < 2: parents.append("0" * 40)
142 if len(parents) < 2: parents.append("0" * 40)
142 if len(parents) < 2: parents.append("0" * 40)
143 p2 = parents.pop(0)
143 p2 = parents.pop(0)
144
144
145 while parents:
145 while parents:
146 p1 = p2
146 p1 = p2
147 p2 = parents.pop(0)
147 p2 = parents.pop(0)
148 self.repo.rawcommit(files, text, author, dest,
148 self.repo.rawcommit(files, text, author, dest,
149 hg.bin(p1), hg.bin(p2))
149 hg.bin(p1), hg.bin(p2))
150 text = "(octopus merge fixup)\n"
150 text = "(octopus merge fixup)\n"
151 p2 = hg.hex(self.repo.changelog.tip())
151 p2 = hg.hex(self.repo.changelog.tip())
152
152
153 return p2
153 return p2
154
154
155 def puttags(self, tags):
155 def puttags(self, tags):
156 try:
156 try:
157 old = self.repo.wfile(".hgtags").read()
157 old = self.repo.wfile(".hgtags").read()
158 oldlines = old.splitlines(1)
158 oldlines = old.splitlines(1)
159 oldlines.sort()
159 oldlines.sort()
160 except:
160 except:
161 oldlines = []
161 oldlines = []
162
162
163 k = tags.keys()
163 k = tags.keys()
164 k.sort()
164 k.sort()
165 newlines = []
165 newlines = []
166 for tag in k:
166 for tag in k:
167 newlines.append("%s %s\n" % (tags[tag], tag))
167 newlines.append("%s %s\n" % (tags[tag], tag))
168
168
169 newlines.sort()
169 newlines.sort()
170
170
171 if newlines != oldlines:
171 if newlines != oldlines:
172 #print "updating tags"
172 #print "updating tags"
173 f = self.repo.wfile(".hgtags", "w")
173 f = self.repo.wfile(".hgtags", "w")
174 f.write("".join(newlines))
174 f.write("".join(newlines))
175 f.close()
175 f.close()
176 if not oldlines: self.repo.add([".hgtags"])
176 if not oldlines: self.repo.add([".hgtags"])
177 date = "%s 0" % int(time.mktime(time.gmtime()))
177 date = "%s 0" % int(time.mktime(time.gmtime()))
178 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
178 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
179 date, self.repo.changelog.tip(), hg.nullid)
179 date, self.repo.changelog.tip(), hg.nullid)
180 return hg.hex(self.repo.changelog.tip())
180 return hg.hex(self.repo.changelog.tip())
181
181
182 class convert:
182 class convert:
183 def __init__(self, source, dest, mapfile):
183 def __init__(self, source, dest, mapfile):
184 self.source = source
184 self.source = source
185 self.dest = dest
185 self.dest = dest
186 self.mapfile = mapfile
186 self.mapfile = mapfile
187 self.commitcache = {}
187 self.commitcache = {}
188
188
189 self.map = {}
189 self.map = {}
190 try:
190 try:
191 for l in file(self.mapfile):
191 for l in file(self.mapfile):
192 sv, dv = l[:-1].split()
192 sv, dv = l[:-1].split()
193 self.map[sv] = dv
193 self.map[sv] = dv
194 except IOError:
194 except IOError:
195 pass
195 pass
196
196
197 def walktree(self, heads):
197 def walktree(self, heads):
198 visit = heads
198 visit = heads
199 known = {}
199 known = {}
200 parents = {}
200 parents = {}
201 while visit:
201 while visit:
202 n = visit.pop(0)
202 n = visit.pop(0)
203 if n in known or n in self.map: continue
203 if n in known or n in self.map: continue
204 known[n] = 1
204 known[n] = 1
205 self.commitcache[n] = self.source.getcommit(n)
205 self.commitcache[n] = self.source.getcommit(n)
206 cp = self.commitcache[n][0]
206 cp = self.commitcache[n][0]
207 for p in cp:
207 for p in cp:
208 parents.setdefault(n, []).append(p)
208 parents.setdefault(n, []).append(p)
209 visit.append(p)
209 visit.append(p)
210
210
211 return parents
211 return parents
212
212
213 def toposort(self, parents):
213 def toposort(self, parents):
214 visit = parents.keys()
214 visit = parents.keys()
215 seen = {}
215 seen = {}
216 children = {}
216 children = {}
217
217
218 while visit:
218 while visit:
219 n = visit.pop(0)
219 n = visit.pop(0)
220 if n in seen: continue
220 if n in seen: continue
221 seen[n] = 1
221 seen[n] = 1
222 pc = 0
222 pc = 0
223 if n in parents:
223 if n in parents:
224 for p in parents[n]:
224 for p in parents[n]:
225 if p not in self.map: pc += 1
225 if p not in self.map: pc += 1
226 visit.append(p)
226 visit.append(p)
227 children.setdefault(p, []).append(n)
227 children.setdefault(p, []).append(n)
228 if not pc: root = n
228 if not pc: root = n
229
229
230 s = []
230 s = []
231 removed = {}
231 removed = {}
232 visit = children.keys()
232 visit = children.keys()
233 while visit:
233 while visit:
234 n = visit.pop(0)
234 n = visit.pop(0)
235 if n in removed: continue
235 if n in removed: continue
236 dep = 0
236 dep = 0
237 if n in parents:
237 if n in parents:
238 for p in parents[n]:
238 for p in parents[n]:
239 if p in self.map: continue
239 if p in self.map: continue
240 if p not in removed:
240 if p not in removed:
241 # we're still dependent
241 # we're still dependent
242 visit.append(n)
242 visit.append(n)
243 dep = 1
243 dep = 1
244 break
244 break
245
245
246 if not dep:
246 if not dep:
247 # all n's parents are in the list
247 # all n's parents are in the list
248 removed[n] = 1
248 removed[n] = 1
249 s.append(n)
249 s.append(n)
250 if n in children:
250 if n in children:
251 for c in children[n]:
251 for c in children[n]:
252 visit.insert(0, c)
252 visit.insert(0, c)
253
253
254 return s
254 return s
255
255
256 def copy(self, rev):
256 def copy(self, rev):
257 p, a, d, t = self.commitcache[rev]
257 p, a, d, t = self.commitcache[rev]
258 files = self.source.getchanges(rev)
258 files = self.source.getchanges(rev)
259
259
260 for f,v,e in files:
260 for f,v,e in files:
261 try:
261 try:
262 data = self.source.getfile(f, v)
262 data = self.source.getfile(f, v)
263 except IOError, inst:
263 except IOError, inst:
264 self.dest.delfile(f)
264 self.dest.delfile(f)
265 else:
265 else:
266 self.dest.putfile(f, e, data)
266 self.dest.putfile(f, e, data)
267
267
268 r = [self.map[v] for v in p]
268 r = [self.map[v] for v in p]
269 f = [f for f,v,e in files]
269 f = [f for f,v,e in files]
270 self.map[rev] = self.dest.putcommit(f, r, a, d, t)
270 self.map[rev] = self.dest.putcommit(f, r, a, d, t)
271 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
271 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
272
272
273 def convert(self):
273 def convert(self):
274 heads = self.source.getheads()
274 heads = self.source.getheads()
275 parents = self.walktree(heads)
275 parents = self.walktree(heads)
276 t = self.toposort(parents)
276 t = self.toposort(parents)
277 t = [n for n in t if n not in self.map]
277 t = [n for n in t if n not in self.map]
278 num = len(t)
278 num = len(t)
279 c = None
279 c = None
280
280
281 for c in t:
281 for c in t:
282 num -= 1
282 num -= 1
283 desc = self.commitcache[c][3].splitlines()[0]
283 desc = self.commitcache[c][3].splitlines()[0]
284 #print num, desc
284 #print num, desc
285 self.copy(c)
285 self.copy(c)
286
286
287 tags = self.source.gettags()
287 tags = self.source.gettags()
288 ctags = {}
288 ctags = {}
289 for k in tags:
289 for k in tags:
290 v = tags[k]
290 v = tags[k]
291 if v in self.map:
291 if v in self.map:
292 ctags[k] = self.map[v]
292 ctags[k] = self.map[v]
293
293
294 if c and ctags:
294 if c and ctags:
295 nrev = self.dest.puttags(ctags)
295 nrev = self.dest.puttags(ctags)
296 # write another hash correspondence to override the previous
296 # write another hash correspondence to override the previous
297 # one so we don't end up with extra tag heads
297 # one so we don't end up with extra tag heads
298 file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
298 file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
299
299
300 gitpath, hgpath, mapfile = sys.argv[1:]
300 gitpath, hgpath, mapfile = sys.argv[1:]
301 if os.path.isdir(gitpath + "/.git"):
301 if os.path.isdir(gitpath + "/.git"):
302 gitpath += "/.git"
302 gitpath += "/.git"
303
303
304 c = convert(convert_git(gitpath), convert_mercurial(hgpath), mapfile)
304 c = convert(convert_git(gitpath), convert_mercurial(hgpath), mapfile)
305 c.convert()
305 c.convert()
General Comments 0
You need to be logged in to leave comments. Login now