##// END OF EJS Templates
Teach convert-repo to deal with mixed charsets in git
Matt Mackall -
r3821:158fce02 default
parent child Browse files
Show More
@@ -1,290 +1,305 b''
1 1 #!/usr/bin/env python
2 2 #
3 3 # This is a generalized framework for converting between SCM
4 4 # repository formats.
5 5 #
6 6 # In its current form, it's hardcoded to convert incrementally between
7 7 # git and Mercurial.
8 8 #
9 9 # To use, you must first import the first git version into Mercurial,
10 10 # and establish a mapping between the git commit hash and the hash in
11 11 # Mercurial for that version. This mapping is kept in a simple text
12 12 # file with lines like so:
13 13 #
14 14 # <git hash> <mercurial hash>
15 15 #
16 16 # To convert the rest of the repo, run:
17 17 #
18 18 # convert-repo <git-dir> <hg-dir> <mapfile>
19 19 #
20 20 # This updates the mapfile on each commit copied, so it can be
21 21 # interrupted and can be run repeatedly to copy new commits.
22 22
23 23 import sys, os, zlib, sha, time
24
25 os.environ["HGENCODING"] = "utf-8"
26
24 27 from mercurial import hg, ui, util
25 28
29 def recode(s):
30 try:
31 return s.decode("utf-8").encode("utf-8")
32 except:
33 try:
34 return s.decode("latin-1").encode("utf-8")
35 except:
36 return s.decode("utf-8", "replace").encode("utf-8")
37
26 38 class convert_git:
27 39 def __init__(self, path):
28 40 self.path = path
29 41
30 42 def getheads(self):
31 43 fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
32 44 return [fh.read()[:-1]]
33 45
34 46 def catfile(self, rev, type):
35 47 if rev == "0" * 40: raise IOError()
36 48 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null" % (self.path, type, rev))
37 49 return fh.read()
38 50
39 51 def getfile(self, name, rev):
40 52 return self.catfile(rev, "blob")
41 53
42 54 def getchanges(self, version):
43 55 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s" % (self.path, version))
44 56 changes = []
45 57 for l in fh:
46 58 if "\t" not in l: continue
47 59 m, f = l[:-1].split("\t")
48 60 m = m.split()
49 61 h = m[3]
50 62 p = (m[1] == "100755")
51 63 changes.append((f, h, p))
52 64 return changes
53 65
54 66 def getcommit(self, version):
55 67 c = self.catfile(version, "commit") # read the commit hash
56 68 end = c.find("\n\n")
57 69 message = c[end+2:]
70 message = recode(message)
58 71 l = c[:end].splitlines()
59 72 manifest = l[0].split()[1]
60 73 parents = []
61 74 for e in l[1:]:
62 75 n,v = e.split(" ", 1)
63 76 if n == "author":
64 77 p = v.split()
65 78 tm, tz = p[-2:]
66 79 author = " ".join(p[:-2])
67 80 if author[0] == "<": author = author[1:-1]
81 author = recode(author)
68 82 if n == "committer":
69 83 p = v.split()
70 84 tm, tz = p[-2:]
71 85 committer = " ".join(p[:-2])
72 86 if committer[0] == "<": committer = committer[1:-1]
87 committer = recode(committer)
73 88 message += "\ncommitter: %s\n" % v
74 89 if n == "parent": parents.append(v)
75 90
76 91 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
77 92 tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
78 93 date = tm + " " + str(tz)
79 94 return (parents, author, date, message)
80 95
81 96 def gettags(self):
82 97 tags = {}
83 98 for f in os.listdir(self.path + "/refs/tags"):
84 99 try:
85 100 h = file(self.path + "/refs/tags/" + f).read().strip()
86 101 c = self.catfile(h, "tag") # read the commit hash
87 102 h = c.splitlines()[0].split()[1]
88 103 tags[f] = h
89 104 except:
90 105 pass
91 106 return tags
92 107
93 108 class convert_mercurial:
94 109 def __init__(self, path):
95 110 self.path = path
96 111 u = ui.ui()
97 112 self.repo = hg.repository(u, path)
98 113
99 114 def getheads(self):
100 115 h = self.repo.changelog.heads()
101 116 return [ hg.hex(x) for x in h ]
102 117
103 118 def putfile(self, f, e, data):
104 119 self.repo.wfile(f, "w").write(data)
105 120 if self.repo.dirstate.state(f) == '?':
106 121 self.repo.dirstate.update([f], "a")
107 122
108 123 util.set_exec(self.repo.wjoin(f), e)
109 124
110 125 def delfile(self, f):
111 126 try:
112 127 os.unlink(self.repo.wjoin(f))
113 128 #self.repo.remove([f])
114 129 except:
115 130 pass
116 131
117 132 def putcommit(self, files, parents, author, dest, text):
118 133 seen = {}
119 134 pl = []
120 135 for p in parents:
121 136 if p not in seen:
122 137 pl.append(p)
123 138 seen[p] = 1
124 139 parents = pl
125 140
126 141 if len(parents) < 2: parents.append("0" * 40)
127 142 if len(parents) < 2: parents.append("0" * 40)
128 143 p2 = parents.pop(0)
129 144
130 145 while parents:
131 146 p1 = p2
132 147 p2 = parents.pop(0)
133 148 self.repo.rawcommit(files, text, author, dest,
134 149 hg.bin(p1), hg.bin(p2))
135 150 text = "(octopus merge fixup)\n"
136 151 p2 = hg.hex(self.repo.changelog.tip())
137 152
138 153 return p2
139 154
140 155 def puttags(self, tags):
141 156 try:
142 157 old = self.repo.wfile(".hgtags").read()
143 158 oldlines = old.splitlines(1)
144 159 oldlines.sort()
145 160 except:
146 161 oldlines = []
147 162
148 163 k = tags.keys()
149 164 k.sort()
150 165 newlines = []
151 166 for tag in k:
152 167 newlines.append("%s %s\n" % (tags[tag], tag))
153 168
154 169 newlines.sort()
155 170
156 171 if newlines != oldlines:
157 172 #print "updating tags"
158 173 f = self.repo.wfile(".hgtags", "w")
159 174 f.write("".join(newlines))
160 175 f.close()
161 176 if not oldlines: self.repo.add([".hgtags"])
162 177 date = "%s 0" % int(time.mktime(time.gmtime()))
163 178 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
164 179 date, self.repo.changelog.tip(), hg.nullid)
165 180 return hg.hex(self.repo.changelog.tip())
166 181
167 182 class convert:
168 183 def __init__(self, source, dest, mapfile):
169 184 self.source = source
170 185 self.dest = dest
171 186 self.mapfile = mapfile
172 187 self.commitcache = {}
173 188
174 189 self.map = {}
175 190 try:
176 191 for l in file(self.mapfile):
177 192 sv, dv = l[:-1].split()
178 193 self.map[sv] = dv
179 194 except IOError:
180 195 pass
181 196
182 197 def walktree(self, heads):
183 198 visit = heads
184 199 known = {}
185 200 parents = {}
186 201 while visit:
187 202 n = visit.pop(0)
188 203 if n in known or n in self.map: continue
189 204 known[n] = 1
190 205 self.commitcache[n] = self.source.getcommit(n)
191 206 cp = self.commitcache[n][0]
192 207 for p in cp:
193 208 parents.setdefault(n, []).append(p)
194 209 visit.append(p)
195 210
196 211 return parents
197 212
198 213 def toposort(self, parents):
199 214 visit = parents.keys()
200 215 seen = {}
201 216 children = {}
202 217
203 218 while visit:
204 219 n = visit.pop(0)
205 220 if n in seen: continue
206 221 seen[n] = 1
207 222 pc = 0
208 223 if n in parents:
209 224 for p in parents[n]:
210 225 if p not in self.map: pc += 1
211 226 visit.append(p)
212 227 children.setdefault(p, []).append(n)
213 228 if not pc: root = n
214 229
215 230 s = []
216 231 removed = {}
217 232 visit = children.keys()
218 233 while visit:
219 234 n = visit.pop(0)
220 235 if n in removed: continue
221 236 dep = 0
222 237 if n in parents:
223 238 for p in parents[n]:
224 239 if p in self.map: continue
225 240 if p not in removed:
226 241 # we're still dependent
227 242 visit.append(n)
228 243 dep = 1
229 244 break
230 245
231 246 if not dep:
232 247 # all n's parents are in the list
233 248 removed[n] = 1
234 249 s.append(n)
235 250 if n in children:
236 251 for c in children[n]:
237 252 visit.insert(0, c)
238 253
239 254 return s
240 255
241 256 def copy(self, rev):
242 257 p, a, d, t = self.commitcache[rev]
243 258 files = self.source.getchanges(rev)
244 259
245 260 for f,v,e in files:
246 261 try:
247 262 data = self.source.getfile(f, v)
248 263 except IOError, inst:
249 264 self.dest.delfile(f)
250 265 else:
251 266 self.dest.putfile(f, e, data)
252 267
253 268 r = [self.map[v] for v in p]
254 269 f = [f for f,v,e in files]
255 270 self.map[rev] = self.dest.putcommit(f, r, a, d, t)
256 271 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
257 272
258 273 def convert(self):
259 274 heads = self.source.getheads()
260 275 parents = self.walktree(heads)
261 276 t = self.toposort(parents)
262 277 t = [n for n in t if n not in self.map]
263 278 num = len(t)
264 279 c = None
265 280
266 281 for c in t:
267 282 num -= 1
268 283 desc = self.commitcache[c][3].splitlines()[0]
269 284 #print num, desc
270 285 self.copy(c)
271 286
272 287 tags = self.source.gettags()
273 288 ctags = {}
274 289 for k in tags:
275 290 v = tags[k]
276 291 if v in self.map:
277 292 ctags[k] = self.map[v]
278 293
279 294 if c and ctags:
280 295 nrev = self.dest.puttags(ctags)
281 296 # write another hash correspondence to override the previous
282 297 # one so we don't end up with extra tag heads
283 298 file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
284 299
285 300 gitpath, hgpath, mapfile = sys.argv[1:]
286 301 if os.path.isdir(gitpath + "/.git"):
287 302 gitpath += "/.git"
288 303
289 304 c = convert(convert_git(gitpath), convert_mercurial(hgpath), mapfile)
290 305 c.convert()
General Comments 0
You need to be logged in to leave comments. Login now