##// END OF EJS Templates
Teach convert-repo to deal with mixed charsets in git
Matt Mackall -
r3821:158fce02 default
parent child Browse files
Show More
@@ -1,290 +1,305 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 #
2 #
3 # This is a generalized framework for converting between SCM
3 # This is a generalized framework for converting between SCM
4 # repository formats.
4 # repository formats.
5 #
5 #
6 # In its current form, it's hardcoded to convert incrementally between
6 # In its current form, it's hardcoded to convert incrementally between
7 # git and Mercurial.
7 # git and Mercurial.
8 #
8 #
9 # To use, you must first import the first git version into Mercurial,
9 # To use, you must first import the first git version into Mercurial,
10 # and establish a mapping between the git commit hash and the hash in
10 # and establish a mapping between the git commit hash and the hash in
11 # Mercurial for that version. This mapping is kept in a simple text
11 # Mercurial for that version. This mapping is kept in a simple text
12 # file with lines like so:
12 # file with lines like so:
13 #
13 #
14 # <git hash> <mercurial hash>
14 # <git hash> <mercurial hash>
15 #
15 #
16 # To convert the rest of the repo, run:
16 # To convert the rest of the repo, run:
17 #
17 #
18 # convert-repo <git-dir> <hg-dir> <mapfile>
18 # convert-repo <git-dir> <hg-dir> <mapfile>
19 #
19 #
20 # This updates the mapfile on each commit copied, so it can be
20 # This updates the mapfile on each commit copied, so it can be
21 # interrupted and can be run repeatedly to copy new commits.
21 # interrupted and can be run repeatedly to copy new commits.
22
22
23 import sys, os, zlib, sha, time
23 import sys, os, zlib, sha, time
24
25 os.environ["HGENCODING"] = "utf-8"
26
24 from mercurial import hg, ui, util
27 from mercurial import hg, ui, util
25
28
29 def recode(s):
30 try:
31 return s.decode("utf-8").encode("utf-8")
32 except:
33 try:
34 return s.decode("latin-1").encode("utf-8")
35 except:
36 return s.decode("utf-8", "replace").encode("utf-8")
37
26 class convert_git:
38 class convert_git:
27 def __init__(self, path):
39 def __init__(self, path):
28 self.path = path
40 self.path = path
29
41
30 def getheads(self):
42 def getheads(self):
31 fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
43 fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
32 return [fh.read()[:-1]]
44 return [fh.read()[:-1]]
33
45
34 def catfile(self, rev, type):
46 def catfile(self, rev, type):
35 if rev == "0" * 40: raise IOError()
47 if rev == "0" * 40: raise IOError()
36 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null" % (self.path, type, rev))
48 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null" % (self.path, type, rev))
37 return fh.read()
49 return fh.read()
38
50
39 def getfile(self, name, rev):
51 def getfile(self, name, rev):
40 return self.catfile(rev, "blob")
52 return self.catfile(rev, "blob")
41
53
42 def getchanges(self, version):
54 def getchanges(self, version):
43 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s" % (self.path, version))
55 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s" % (self.path, version))
44 changes = []
56 changes = []
45 for l in fh:
57 for l in fh:
46 if "\t" not in l: continue
58 if "\t" not in l: continue
47 m, f = l[:-1].split("\t")
59 m, f = l[:-1].split("\t")
48 m = m.split()
60 m = m.split()
49 h = m[3]
61 h = m[3]
50 p = (m[1] == "100755")
62 p = (m[1] == "100755")
51 changes.append((f, h, p))
63 changes.append((f, h, p))
52 return changes
64 return changes
53
65
54 def getcommit(self, version):
66 def getcommit(self, version):
55 c = self.catfile(version, "commit") # read the commit hash
67 c = self.catfile(version, "commit") # read the commit hash
56 end = c.find("\n\n")
68 end = c.find("\n\n")
57 message = c[end+2:]
69 message = c[end+2:]
70 message = recode(message)
58 l = c[:end].splitlines()
71 l = c[:end].splitlines()
59 manifest = l[0].split()[1]
72 manifest = l[0].split()[1]
60 parents = []
73 parents = []
61 for e in l[1:]:
74 for e in l[1:]:
62 n,v = e.split(" ", 1)
75 n,v = e.split(" ", 1)
63 if n == "author":
76 if n == "author":
64 p = v.split()
77 p = v.split()
65 tm, tz = p[-2:]
78 tm, tz = p[-2:]
66 author = " ".join(p[:-2])
79 author = " ".join(p[:-2])
67 if author[0] == "<": author = author[1:-1]
80 if author[0] == "<": author = author[1:-1]
81 author = recode(author)
68 if n == "committer":
82 if n == "committer":
69 p = v.split()
83 p = v.split()
70 tm, tz = p[-2:]
84 tm, tz = p[-2:]
71 committer = " ".join(p[:-2])
85 committer = " ".join(p[:-2])
72 if committer[0] == "<": committer = committer[1:-1]
86 if committer[0] == "<": committer = committer[1:-1]
87 committer = recode(committer)
73 message += "\ncommitter: %s\n" % v
88 message += "\ncommitter: %s\n" % v
74 if n == "parent": parents.append(v)
89 if n == "parent": parents.append(v)
75
90
76 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
91 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
77 tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
92 tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
78 date = tm + " " + str(tz)
93 date = tm + " " + str(tz)
79 return (parents, author, date, message)
94 return (parents, author, date, message)
80
95
81 def gettags(self):
96 def gettags(self):
82 tags = {}
97 tags = {}
83 for f in os.listdir(self.path + "/refs/tags"):
98 for f in os.listdir(self.path + "/refs/tags"):
84 try:
99 try:
85 h = file(self.path + "/refs/tags/" + f).read().strip()
100 h = file(self.path + "/refs/tags/" + f).read().strip()
86 c = self.catfile(h, "tag") # read the commit hash
101 c = self.catfile(h, "tag") # read the commit hash
87 h = c.splitlines()[0].split()[1]
102 h = c.splitlines()[0].split()[1]
88 tags[f] = h
103 tags[f] = h
89 except:
104 except:
90 pass
105 pass
91 return tags
106 return tags
92
107
93 class convert_mercurial:
108 class convert_mercurial:
94 def __init__(self, path):
109 def __init__(self, path):
95 self.path = path
110 self.path = path
96 u = ui.ui()
111 u = ui.ui()
97 self.repo = hg.repository(u, path)
112 self.repo = hg.repository(u, path)
98
113
99 def getheads(self):
114 def getheads(self):
100 h = self.repo.changelog.heads()
115 h = self.repo.changelog.heads()
101 return [ hg.hex(x) for x in h ]
116 return [ hg.hex(x) for x in h ]
102
117
103 def putfile(self, f, e, data):
118 def putfile(self, f, e, data):
104 self.repo.wfile(f, "w").write(data)
119 self.repo.wfile(f, "w").write(data)
105 if self.repo.dirstate.state(f) == '?':
120 if self.repo.dirstate.state(f) == '?':
106 self.repo.dirstate.update([f], "a")
121 self.repo.dirstate.update([f], "a")
107
122
108 util.set_exec(self.repo.wjoin(f), e)
123 util.set_exec(self.repo.wjoin(f), e)
109
124
110 def delfile(self, f):
125 def delfile(self, f):
111 try:
126 try:
112 os.unlink(self.repo.wjoin(f))
127 os.unlink(self.repo.wjoin(f))
113 #self.repo.remove([f])
128 #self.repo.remove([f])
114 except:
129 except:
115 pass
130 pass
116
131
117 def putcommit(self, files, parents, author, dest, text):
132 def putcommit(self, files, parents, author, dest, text):
118 seen = {}
133 seen = {}
119 pl = []
134 pl = []
120 for p in parents:
135 for p in parents:
121 if p not in seen:
136 if p not in seen:
122 pl.append(p)
137 pl.append(p)
123 seen[p] = 1
138 seen[p] = 1
124 parents = pl
139 parents = pl
125
140
126 if len(parents) < 2: parents.append("0" * 40)
141 if len(parents) < 2: parents.append("0" * 40)
127 if len(parents) < 2: parents.append("0" * 40)
142 if len(parents) < 2: parents.append("0" * 40)
128 p2 = parents.pop(0)
143 p2 = parents.pop(0)
129
144
130 while parents:
145 while parents:
131 p1 = p2
146 p1 = p2
132 p2 = parents.pop(0)
147 p2 = parents.pop(0)
133 self.repo.rawcommit(files, text, author, dest,
148 self.repo.rawcommit(files, text, author, dest,
134 hg.bin(p1), hg.bin(p2))
149 hg.bin(p1), hg.bin(p2))
135 text = "(octopus merge fixup)\n"
150 text = "(octopus merge fixup)\n"
136 p2 = hg.hex(self.repo.changelog.tip())
151 p2 = hg.hex(self.repo.changelog.tip())
137
152
138 return p2
153 return p2
139
154
140 def puttags(self, tags):
155 def puttags(self, tags):
141 try:
156 try:
142 old = self.repo.wfile(".hgtags").read()
157 old = self.repo.wfile(".hgtags").read()
143 oldlines = old.splitlines(1)
158 oldlines = old.splitlines(1)
144 oldlines.sort()
159 oldlines.sort()
145 except:
160 except:
146 oldlines = []
161 oldlines = []
147
162
148 k = tags.keys()
163 k = tags.keys()
149 k.sort()
164 k.sort()
150 newlines = []
165 newlines = []
151 for tag in k:
166 for tag in k:
152 newlines.append("%s %s\n" % (tags[tag], tag))
167 newlines.append("%s %s\n" % (tags[tag], tag))
153
168
154 newlines.sort()
169 newlines.sort()
155
170
156 if newlines != oldlines:
171 if newlines != oldlines:
157 #print "updating tags"
172 #print "updating tags"
158 f = self.repo.wfile(".hgtags", "w")
173 f = self.repo.wfile(".hgtags", "w")
159 f.write("".join(newlines))
174 f.write("".join(newlines))
160 f.close()
175 f.close()
161 if not oldlines: self.repo.add([".hgtags"])
176 if not oldlines: self.repo.add([".hgtags"])
162 date = "%s 0" % int(time.mktime(time.gmtime()))
177 date = "%s 0" % int(time.mktime(time.gmtime()))
163 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
178 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
164 date, self.repo.changelog.tip(), hg.nullid)
179 date, self.repo.changelog.tip(), hg.nullid)
165 return hg.hex(self.repo.changelog.tip())
180 return hg.hex(self.repo.changelog.tip())
166
181
167 class convert:
182 class convert:
168 def __init__(self, source, dest, mapfile):
183 def __init__(self, source, dest, mapfile):
169 self.source = source
184 self.source = source
170 self.dest = dest
185 self.dest = dest
171 self.mapfile = mapfile
186 self.mapfile = mapfile
172 self.commitcache = {}
187 self.commitcache = {}
173
188
174 self.map = {}
189 self.map = {}
175 try:
190 try:
176 for l in file(self.mapfile):
191 for l in file(self.mapfile):
177 sv, dv = l[:-1].split()
192 sv, dv = l[:-1].split()
178 self.map[sv] = dv
193 self.map[sv] = dv
179 except IOError:
194 except IOError:
180 pass
195 pass
181
196
182 def walktree(self, heads):
197 def walktree(self, heads):
183 visit = heads
198 visit = heads
184 known = {}
199 known = {}
185 parents = {}
200 parents = {}
186 while visit:
201 while visit:
187 n = visit.pop(0)
202 n = visit.pop(0)
188 if n in known or n in self.map: continue
203 if n in known or n in self.map: continue
189 known[n] = 1
204 known[n] = 1
190 self.commitcache[n] = self.source.getcommit(n)
205 self.commitcache[n] = self.source.getcommit(n)
191 cp = self.commitcache[n][0]
206 cp = self.commitcache[n][0]
192 for p in cp:
207 for p in cp:
193 parents.setdefault(n, []).append(p)
208 parents.setdefault(n, []).append(p)
194 visit.append(p)
209 visit.append(p)
195
210
196 return parents
211 return parents
197
212
198 def toposort(self, parents):
213 def toposort(self, parents):
199 visit = parents.keys()
214 visit = parents.keys()
200 seen = {}
215 seen = {}
201 children = {}
216 children = {}
202
217
203 while visit:
218 while visit:
204 n = visit.pop(0)
219 n = visit.pop(0)
205 if n in seen: continue
220 if n in seen: continue
206 seen[n] = 1
221 seen[n] = 1
207 pc = 0
222 pc = 0
208 if n in parents:
223 if n in parents:
209 for p in parents[n]:
224 for p in parents[n]:
210 if p not in self.map: pc += 1
225 if p not in self.map: pc += 1
211 visit.append(p)
226 visit.append(p)
212 children.setdefault(p, []).append(n)
227 children.setdefault(p, []).append(n)
213 if not pc: root = n
228 if not pc: root = n
214
229
215 s = []
230 s = []
216 removed = {}
231 removed = {}
217 visit = children.keys()
232 visit = children.keys()
218 while visit:
233 while visit:
219 n = visit.pop(0)
234 n = visit.pop(0)
220 if n in removed: continue
235 if n in removed: continue
221 dep = 0
236 dep = 0
222 if n in parents:
237 if n in parents:
223 for p in parents[n]:
238 for p in parents[n]:
224 if p in self.map: continue
239 if p in self.map: continue
225 if p not in removed:
240 if p not in removed:
226 # we're still dependent
241 # we're still dependent
227 visit.append(n)
242 visit.append(n)
228 dep = 1
243 dep = 1
229 break
244 break
230
245
231 if not dep:
246 if not dep:
232 # all n's parents are in the list
247 # all n's parents are in the list
233 removed[n] = 1
248 removed[n] = 1
234 s.append(n)
249 s.append(n)
235 if n in children:
250 if n in children:
236 for c in children[n]:
251 for c in children[n]:
237 visit.insert(0, c)
252 visit.insert(0, c)
238
253
239 return s
254 return s
240
255
241 def copy(self, rev):
256 def copy(self, rev):
242 p, a, d, t = self.commitcache[rev]
257 p, a, d, t = self.commitcache[rev]
243 files = self.source.getchanges(rev)
258 files = self.source.getchanges(rev)
244
259
245 for f,v,e in files:
260 for f,v,e in files:
246 try:
261 try:
247 data = self.source.getfile(f, v)
262 data = self.source.getfile(f, v)
248 except IOError, inst:
263 except IOError, inst:
249 self.dest.delfile(f)
264 self.dest.delfile(f)
250 else:
265 else:
251 self.dest.putfile(f, e, data)
266 self.dest.putfile(f, e, data)
252
267
253 r = [self.map[v] for v in p]
268 r = [self.map[v] for v in p]
254 f = [f for f,v,e in files]
269 f = [f for f,v,e in files]
255 self.map[rev] = self.dest.putcommit(f, r, a, d, t)
270 self.map[rev] = self.dest.putcommit(f, r, a, d, t)
256 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
271 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
257
272
258 def convert(self):
273 def convert(self):
259 heads = self.source.getheads()
274 heads = self.source.getheads()
260 parents = self.walktree(heads)
275 parents = self.walktree(heads)
261 t = self.toposort(parents)
276 t = self.toposort(parents)
262 t = [n for n in t if n not in self.map]
277 t = [n for n in t if n not in self.map]
263 num = len(t)
278 num = len(t)
264 c = None
279 c = None
265
280
266 for c in t:
281 for c in t:
267 num -= 1
282 num -= 1
268 desc = self.commitcache[c][3].splitlines()[0]
283 desc = self.commitcache[c][3].splitlines()[0]
269 #print num, desc
284 #print num, desc
270 self.copy(c)
285 self.copy(c)
271
286
272 tags = self.source.gettags()
287 tags = self.source.gettags()
273 ctags = {}
288 ctags = {}
274 for k in tags:
289 for k in tags:
275 v = tags[k]
290 v = tags[k]
276 if v in self.map:
291 if v in self.map:
277 ctags[k] = self.map[v]
292 ctags[k] = self.map[v]
278
293
279 if c and ctags:
294 if c and ctags:
280 nrev = self.dest.puttags(ctags)
295 nrev = self.dest.puttags(ctags)
281 # write another hash correspondence to override the previous
296 # write another hash correspondence to override the previous
282 # one so we don't end up with extra tag heads
297 # one so we don't end up with extra tag heads
283 file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
298 file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
284
299
285 gitpath, hgpath, mapfile = sys.argv[1:]
300 gitpath, hgpath, mapfile = sys.argv[1:]
286 if os.path.isdir(gitpath + "/.git"):
301 if os.path.isdir(gitpath + "/.git"):
287 gitpath += "/.git"
302 gitpath += "/.git"
288
303
289 c = convert(convert_git(gitpath), convert_mercurial(hgpath), mapfile)
304 c = convert(convert_git(gitpath), convert_mercurial(hgpath), mapfile)
290 c.convert()
305 c.convert()
General Comments 0
You need to be logged in to leave comments. Login now