##// END OF EJS Templates
convert: properly set encoding for gnuarch converter
Brodie Rao -
r11986:d2796a3c stable
parent child Browse files
Show More
@@ -1,338 +1,338 b''
1 1 # gnuarch.py - GNU Arch support for the convert extension
2 2 #
3 3 # Copyright 2008, 2009 Aleix Conchillo Flaque <aleix@member.fsf.org>
4 4 # and others
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from common import NoRepo, commandline, commit, converter_source
10 10 from mercurial.i18n import _
11 11 from mercurial import util
12 12 import os, shutil, tempfile, stat, locale
13 13 from email.Parser import Parser
14 14
15 15 class gnuarch_source(converter_source, commandline):
16 16
17 17 class gnuarch_rev(object):
18 18 def __init__(self, rev):
19 19 self.rev = rev
20 20 self.summary = ''
21 21 self.date = None
22 22 self.author = ''
23 23 self.continuationof = None
24 24 self.add_files = []
25 25 self.mod_files = []
26 26 self.del_files = []
27 27 self.ren_files = {}
28 28 self.ren_dirs = {}
29 29
30 30 def __init__(self, ui, path, rev=None):
31 31 super(gnuarch_source, self).__init__(ui, path, rev=rev)
32 32
33 33 if not os.path.exists(os.path.join(path, '{arch}')):
34 34 raise NoRepo(_("%s does not look like a GNU Arch repository")
35 35 % path)
36 36
37 37 # Could use checktool, but we want to check for baz or tla.
38 38 self.execmd = None
39 39 if util.find_exe('baz'):
40 40 self.execmd = 'baz'
41 41 else:
42 42 if util.find_exe('tla'):
43 43 self.execmd = 'tla'
44 44 else:
45 45 raise util.Abort(_('cannot find a GNU Arch tool'))
46 46
47 47 commandline.__init__(self, ui, self.execmd)
48 48
49 49 self.path = os.path.realpath(path)
50 50 self.tmppath = None
51 51
52 52 self.treeversion = None
53 53 self.lastrev = None
54 54 self.changes = {}
55 55 self.parents = {}
56 56 self.tags = {}
57 57 self.catlogparser = Parser()
58 self.locale = locale.getpreferredencoding()
58 self.encoding = locale.getpreferredencoding()
59 59 self.archives = []
60 60
61 61 def before(self):
62 62 # Get registered archives
63 63 self.archives = [i.rstrip('\n')
64 64 for i in self.runlines0('archives', '-n')]
65 65
66 66 if self.execmd == 'tla':
67 67 output = self.run0('tree-version', self.path)
68 68 else:
69 69 output = self.run0('tree-version', '-d', self.path)
70 70 self.treeversion = output.strip()
71 71
72 72 # Get name of temporary directory
73 73 version = self.treeversion.split('/')
74 74 self.tmppath = os.path.join(tempfile.gettempdir(),
75 75 'hg-%s' % version[1])
76 76
77 77 # Generate parents dictionary
78 78 self.parents[None] = []
79 79 treeversion = self.treeversion
80 80 child = None
81 81 while treeversion:
82 82 self.ui.status(_('analyzing tree version %s...\n') % treeversion)
83 83
84 84 archive = treeversion.split('/')[0]
85 85 if archive not in self.archives:
86 86 self.ui.status(_('tree analysis stopped because it points to '
87 87 'an unregistered archive %s...\n') % archive)
88 88 break
89 89
90 90 # Get the complete list of revisions for that tree version
91 91 output, status = self.runlines('revisions', '-r', '-f', treeversion)
92 92 self.checkexit(status, 'failed retrieveing revisions for %s'
93 93 % treeversion)
94 94
95 95 # No new iteration unless a revision has a continuation-of header
96 96 treeversion = None
97 97
98 98 for l in output:
99 99 rev = l.strip()
100 100 self.changes[rev] = self.gnuarch_rev(rev)
101 101 self.parents[rev] = []
102 102
103 103 # Read author, date and summary
104 104 catlog, status = self.run('cat-log', '-d', self.path, rev)
105 105 if status:
106 106 catlog = self.run0('cat-archive-log', rev)
107 107 self._parsecatlog(catlog, rev)
108 108
109 109 # Populate the parents map
110 110 self.parents[child].append(rev)
111 111
112 112 # Keep track of the current revision as the child of the next
113 113 # revision scanned
114 114 child = rev
115 115
116 116 # Check if we have to follow the usual incremental history
117 117 # or if we have to 'jump' to a different treeversion given
118 118 # by the continuation-of header.
119 119 if self.changes[rev].continuationof:
120 120 treeversion = '--'.join(
121 121 self.changes[rev].continuationof.split('--')[:-1])
122 122 break
123 123
124 124 # If we reached a base-0 revision w/o any continuation-of
125 125 # header, it means the tree history ends here.
126 126 if rev[-6:] == 'base-0':
127 127 break
128 128
129 129 def after(self):
130 130 self.ui.debug('cleaning up %s\n' % self.tmppath)
131 131 shutil.rmtree(self.tmppath, ignore_errors=True)
132 132
133 133 def getheads(self):
134 134 return self.parents[None]
135 135
136 136 def getfile(self, name, rev):
137 137 if rev != self.lastrev:
138 138 raise util.Abort(_('internal calling inconsistency'))
139 139
140 140 # Raise IOError if necessary (i.e. deleted files).
141 141 if not os.path.exists(os.path.join(self.tmppath, name)):
142 142 raise IOError
143 143
144 144 return self._getfile(name, rev)
145 145
146 146 def getchanges(self, rev):
147 147 self._update(rev)
148 148 changes = []
149 149 copies = {}
150 150
151 151 for f in self.changes[rev].add_files:
152 152 changes.append((f, rev))
153 153
154 154 for f in self.changes[rev].mod_files:
155 155 changes.append((f, rev))
156 156
157 157 for f in self.changes[rev].del_files:
158 158 changes.append((f, rev))
159 159
160 160 for src in self.changes[rev].ren_files:
161 161 to = self.changes[rev].ren_files[src]
162 162 changes.append((src, rev))
163 163 changes.append((to, rev))
164 164 copies[to] = src
165 165
166 166 for src in self.changes[rev].ren_dirs:
167 167 to = self.changes[rev].ren_dirs[src]
168 168 chgs, cps = self._rendirchanges(src, to)
169 169 changes += [(f, rev) for f in chgs]
170 170 copies.update(cps)
171 171
172 172 self.lastrev = rev
173 173 return sorted(set(changes)), copies
174 174
175 175 def getcommit(self, rev):
176 176 changes = self.changes[rev]
177 177 return commit(author=changes.author, date=changes.date,
178 178 desc=changes.summary, parents=self.parents[rev], rev=rev)
179 179
180 180 def gettags(self):
181 181 return self.tags
182 182
183 183 def _execute(self, cmd, *args, **kwargs):
184 184 cmdline = [self.execmd, cmd]
185 185 cmdline += args
186 186 cmdline = [util.shellquote(arg) for arg in cmdline]
187 187 cmdline += ['>', util.nulldev, '2>', util.nulldev]
188 188 cmdline = util.quotecommand(' '.join(cmdline))
189 189 self.ui.debug(cmdline, '\n')
190 190 return os.system(cmdline)
191 191
192 192 def _update(self, rev):
193 193 self.ui.debug('applying revision %s...\n' % rev)
194 194 changeset, status = self.runlines('replay', '-d', self.tmppath,
195 195 rev)
196 196 if status:
197 197 # Something went wrong while merging (baz or tla
198 198 # issue?), get latest revision and try from there
199 199 shutil.rmtree(self.tmppath, ignore_errors=True)
200 200 self._obtainrevision(rev)
201 201 else:
202 202 old_rev = self.parents[rev][0]
203 203 self.ui.debug('computing changeset between %s and %s...\n'
204 204 % (old_rev, rev))
205 205 self._parsechangeset(changeset, rev)
206 206
207 207 def _getfile(self, name, rev):
208 208 mode = os.lstat(os.path.join(self.tmppath, name)).st_mode
209 209 if stat.S_ISLNK(mode):
210 210 data = os.readlink(os.path.join(self.tmppath, name))
211 211 mode = mode and 'l' or ''
212 212 else:
213 213 data = open(os.path.join(self.tmppath, name), 'rb').read()
214 214 mode = (mode & 0111) and 'x' or ''
215 215 return data, mode
216 216
217 217 def _exclude(self, name):
218 218 exclude = ['{arch}', '.arch-ids', '.arch-inventory']
219 219 for exc in exclude:
220 220 if name.find(exc) != -1:
221 221 return True
222 222 return False
223 223
224 224 def _readcontents(self, path):
225 225 files = []
226 226 contents = os.listdir(path)
227 227 while len(contents) > 0:
228 228 c = contents.pop()
229 229 p = os.path.join(path, c)
230 230 # os.walk could be used, but here we avoid internal GNU
231 231 # Arch files and directories, thus saving a lot time.
232 232 if not self._exclude(p):
233 233 if os.path.isdir(p):
234 234 contents += [os.path.join(c, f) for f in os.listdir(p)]
235 235 else:
236 236 files.append(c)
237 237 return files
238 238
239 239 def _rendirchanges(self, src, dest):
240 240 changes = []
241 241 copies = {}
242 242 files = self._readcontents(os.path.join(self.tmppath, dest))
243 243 for f in files:
244 244 s = os.path.join(src, f)
245 245 d = os.path.join(dest, f)
246 246 changes.append(s)
247 247 changes.append(d)
248 248 copies[d] = s
249 249 return changes, copies
250 250
251 251 def _obtainrevision(self, rev):
252 252 self.ui.debug('obtaining revision %s...\n' % rev)
253 253 output = self._execute('get', rev, self.tmppath)
254 254 self.checkexit(output)
255 255 self.ui.debug('analyzing revision %s...\n' % rev)
256 256 files = self._readcontents(self.tmppath)
257 257 self.changes[rev].add_files += files
258 258
259 259 def _stripbasepath(self, path):
260 260 if path.startswith('./'):
261 261 return path[2:]
262 262 return path
263 263
264 264 def _parsecatlog(self, data, rev):
265 265 try:
266 266 catlog = self.catlogparser.parsestr(data)
267 267
268 268 # Commit date
269 269 self.changes[rev].date = util.datestr(
270 270 util.strdate(catlog['Standard-date'],
271 271 '%Y-%m-%d %H:%M:%S'))
272 272
273 273 # Commit author
274 274 self.changes[rev].author = self.recode(catlog['Creator'])
275 275
276 276 # Commit description
277 277 self.changes[rev].summary = '\n\n'.join((catlog['Summary'],
278 278 catlog.get_payload()))
279 279 self.changes[rev].summary = self.recode(self.changes[rev].summary)
280 280
281 281 # Commit revision origin when dealing with a branch or tag
282 282 if 'Continuation-of' in catlog:
283 283 self.changes[rev].continuationof = self.recode(
284 284 catlog['Continuation-of'])
285 285 except Exception:
286 286 raise util.Abort(_('could not parse cat-log of %s') % rev)
287 287
288 288 def _parsechangeset(self, data, rev):
289 289 for l in data:
290 290 l = l.strip()
291 291 # Added file (ignore added directory)
292 292 if l.startswith('A') and not l.startswith('A/'):
293 293 file = self._stripbasepath(l[1:].strip())
294 294 if not self._exclude(file):
295 295 self.changes[rev].add_files.append(file)
296 296 # Deleted file (ignore deleted directory)
297 297 elif l.startswith('D') and not l.startswith('D/'):
298 298 file = self._stripbasepath(l[1:].strip())
299 299 if not self._exclude(file):
300 300 self.changes[rev].del_files.append(file)
301 301 # Modified binary file
302 302 elif l.startswith('Mb'):
303 303 file = self._stripbasepath(l[2:].strip())
304 304 if not self._exclude(file):
305 305 self.changes[rev].mod_files.append(file)
306 306 # Modified link
307 307 elif l.startswith('M->'):
308 308 file = self._stripbasepath(l[3:].strip())
309 309 if not self._exclude(file):
310 310 self.changes[rev].mod_files.append(file)
311 311 # Modified file
312 312 elif l.startswith('M'):
313 313 file = self._stripbasepath(l[1:].strip())
314 314 if not self._exclude(file):
315 315 self.changes[rev].mod_files.append(file)
316 316 # Renamed file (or link)
317 317 elif l.startswith('=>'):
318 318 files = l[2:].strip().split(' ')
319 319 if len(files) == 1:
320 320 files = l[2:].strip().split('\t')
321 321 src = self._stripbasepath(files[0])
322 322 dst = self._stripbasepath(files[1])
323 323 if not self._exclude(src) and not self._exclude(dst):
324 324 self.changes[rev].ren_files[src] = dst
325 325 # Conversion from file to link or from link to file (modified)
326 326 elif l.startswith('ch'):
327 327 file = self._stripbasepath(l[2:].strip())
328 328 if not self._exclude(file):
329 329 self.changes[rev].mod_files.append(file)
330 330 # Renamed directory
331 331 elif l.startswith('/>'):
332 332 dirs = l[2:].strip().split(' ')
333 333 if len(dirs) == 1:
334 334 dirs = l[2:].strip().split('\t')
335 335 src = self._stripbasepath(dirs[0])
336 336 dst = self._stripbasepath(dirs[1])
337 337 if not self._exclude(src) and not self._exclude(dst):
338 338 self.changes[rev].ren_dirs[src] = dst
General Comments 0
You need to be logged in to leave comments. Login now