##// END OF EJS Templates
convert: properly set encoding for gnuarch converter
Brodie Rao -
r11986:d2796a3c stable
parent child Browse files
Show More
@@ -1,338 +1,338 b''
1 # gnuarch.py - GNU Arch support for the convert extension
1 # gnuarch.py - GNU Arch support for the convert extension
2 #
2 #
3 # Copyright 2008, 2009 Aleix Conchillo Flaque <aleix@member.fsf.org>
3 # Copyright 2008, 2009 Aleix Conchillo Flaque <aleix@member.fsf.org>
4 # and others
4 # and others
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from common import NoRepo, commandline, commit, converter_source
9 from common import NoRepo, commandline, commit, converter_source
10 from mercurial.i18n import _
10 from mercurial.i18n import _
11 from mercurial import util
11 from mercurial import util
12 import os, shutil, tempfile, stat, locale
12 import os, shutil, tempfile, stat, locale
13 from email.Parser import Parser
13 from email.Parser import Parser
14
14
15 class gnuarch_source(converter_source, commandline):
15 class gnuarch_source(converter_source, commandline):
16
16
17 class gnuarch_rev(object):
17 class gnuarch_rev(object):
18 def __init__(self, rev):
18 def __init__(self, rev):
19 self.rev = rev
19 self.rev = rev
20 self.summary = ''
20 self.summary = ''
21 self.date = None
21 self.date = None
22 self.author = ''
22 self.author = ''
23 self.continuationof = None
23 self.continuationof = None
24 self.add_files = []
24 self.add_files = []
25 self.mod_files = []
25 self.mod_files = []
26 self.del_files = []
26 self.del_files = []
27 self.ren_files = {}
27 self.ren_files = {}
28 self.ren_dirs = {}
28 self.ren_dirs = {}
29
29
30 def __init__(self, ui, path, rev=None):
30 def __init__(self, ui, path, rev=None):
31 super(gnuarch_source, self).__init__(ui, path, rev=rev)
31 super(gnuarch_source, self).__init__(ui, path, rev=rev)
32
32
33 if not os.path.exists(os.path.join(path, '{arch}')):
33 if not os.path.exists(os.path.join(path, '{arch}')):
34 raise NoRepo(_("%s does not look like a GNU Arch repository")
34 raise NoRepo(_("%s does not look like a GNU Arch repository")
35 % path)
35 % path)
36
36
37 # Could use checktool, but we want to check for baz or tla.
37 # Could use checktool, but we want to check for baz or tla.
38 self.execmd = None
38 self.execmd = None
39 if util.find_exe('baz'):
39 if util.find_exe('baz'):
40 self.execmd = 'baz'
40 self.execmd = 'baz'
41 else:
41 else:
42 if util.find_exe('tla'):
42 if util.find_exe('tla'):
43 self.execmd = 'tla'
43 self.execmd = 'tla'
44 else:
44 else:
45 raise util.Abort(_('cannot find a GNU Arch tool'))
45 raise util.Abort(_('cannot find a GNU Arch tool'))
46
46
47 commandline.__init__(self, ui, self.execmd)
47 commandline.__init__(self, ui, self.execmd)
48
48
49 self.path = os.path.realpath(path)
49 self.path = os.path.realpath(path)
50 self.tmppath = None
50 self.tmppath = None
51
51
52 self.treeversion = None
52 self.treeversion = None
53 self.lastrev = None
53 self.lastrev = None
54 self.changes = {}
54 self.changes = {}
55 self.parents = {}
55 self.parents = {}
56 self.tags = {}
56 self.tags = {}
57 self.catlogparser = Parser()
57 self.catlogparser = Parser()
58 self.locale = locale.getpreferredencoding()
58 self.encoding = locale.getpreferredencoding()
59 self.archives = []
59 self.archives = []
60
60
61 def before(self):
61 def before(self):
62 # Get registered archives
62 # Get registered archives
63 self.archives = [i.rstrip('\n')
63 self.archives = [i.rstrip('\n')
64 for i in self.runlines0('archives', '-n')]
64 for i in self.runlines0('archives', '-n')]
65
65
66 if self.execmd == 'tla':
66 if self.execmd == 'tla':
67 output = self.run0('tree-version', self.path)
67 output = self.run0('tree-version', self.path)
68 else:
68 else:
69 output = self.run0('tree-version', '-d', self.path)
69 output = self.run0('tree-version', '-d', self.path)
70 self.treeversion = output.strip()
70 self.treeversion = output.strip()
71
71
72 # Get name of temporary directory
72 # Get name of temporary directory
73 version = self.treeversion.split('/')
73 version = self.treeversion.split('/')
74 self.tmppath = os.path.join(tempfile.gettempdir(),
74 self.tmppath = os.path.join(tempfile.gettempdir(),
75 'hg-%s' % version[1])
75 'hg-%s' % version[1])
76
76
77 # Generate parents dictionary
77 # Generate parents dictionary
78 self.parents[None] = []
78 self.parents[None] = []
79 treeversion = self.treeversion
79 treeversion = self.treeversion
80 child = None
80 child = None
81 while treeversion:
81 while treeversion:
82 self.ui.status(_('analyzing tree version %s...\n') % treeversion)
82 self.ui.status(_('analyzing tree version %s...\n') % treeversion)
83
83
84 archive = treeversion.split('/')[0]
84 archive = treeversion.split('/')[0]
85 if archive not in self.archives:
85 if archive not in self.archives:
86 self.ui.status(_('tree analysis stopped because it points to '
86 self.ui.status(_('tree analysis stopped because it points to '
87 'an unregistered archive %s...\n') % archive)
87 'an unregistered archive %s...\n') % archive)
88 break
88 break
89
89
90 # Get the complete list of revisions for that tree version
90 # Get the complete list of revisions for that tree version
91 output, status = self.runlines('revisions', '-r', '-f', treeversion)
91 output, status = self.runlines('revisions', '-r', '-f', treeversion)
92 self.checkexit(status, 'failed retrieveing revisions for %s'
92 self.checkexit(status, 'failed retrieveing revisions for %s'
93 % treeversion)
93 % treeversion)
94
94
95 # No new iteration unless a revision has a continuation-of header
95 # No new iteration unless a revision has a continuation-of header
96 treeversion = None
96 treeversion = None
97
97
98 for l in output:
98 for l in output:
99 rev = l.strip()
99 rev = l.strip()
100 self.changes[rev] = self.gnuarch_rev(rev)
100 self.changes[rev] = self.gnuarch_rev(rev)
101 self.parents[rev] = []
101 self.parents[rev] = []
102
102
103 # Read author, date and summary
103 # Read author, date and summary
104 catlog, status = self.run('cat-log', '-d', self.path, rev)
104 catlog, status = self.run('cat-log', '-d', self.path, rev)
105 if status:
105 if status:
106 catlog = self.run0('cat-archive-log', rev)
106 catlog = self.run0('cat-archive-log', rev)
107 self._parsecatlog(catlog, rev)
107 self._parsecatlog(catlog, rev)
108
108
109 # Populate the parents map
109 # Populate the parents map
110 self.parents[child].append(rev)
110 self.parents[child].append(rev)
111
111
112 # Keep track of the current revision as the child of the next
112 # Keep track of the current revision as the child of the next
113 # revision scanned
113 # revision scanned
114 child = rev
114 child = rev
115
115
116 # Check if we have to follow the usual incremental history
116 # Check if we have to follow the usual incremental history
117 # or if we have to 'jump' to a different treeversion given
117 # or if we have to 'jump' to a different treeversion given
118 # by the continuation-of header.
118 # by the continuation-of header.
119 if self.changes[rev].continuationof:
119 if self.changes[rev].continuationof:
120 treeversion = '--'.join(
120 treeversion = '--'.join(
121 self.changes[rev].continuationof.split('--')[:-1])
121 self.changes[rev].continuationof.split('--')[:-1])
122 break
122 break
123
123
124 # If we reached a base-0 revision w/o any continuation-of
124 # If we reached a base-0 revision w/o any continuation-of
125 # header, it means the tree history ends here.
125 # header, it means the tree history ends here.
126 if rev[-6:] == 'base-0':
126 if rev[-6:] == 'base-0':
127 break
127 break
128
128
129 def after(self):
129 def after(self):
130 self.ui.debug('cleaning up %s\n' % self.tmppath)
130 self.ui.debug('cleaning up %s\n' % self.tmppath)
131 shutil.rmtree(self.tmppath, ignore_errors=True)
131 shutil.rmtree(self.tmppath, ignore_errors=True)
132
132
133 def getheads(self):
133 def getheads(self):
134 return self.parents[None]
134 return self.parents[None]
135
135
136 def getfile(self, name, rev):
136 def getfile(self, name, rev):
137 if rev != self.lastrev:
137 if rev != self.lastrev:
138 raise util.Abort(_('internal calling inconsistency'))
138 raise util.Abort(_('internal calling inconsistency'))
139
139
140 # Raise IOError if necessary (i.e. deleted files).
140 # Raise IOError if necessary (i.e. deleted files).
141 if not os.path.exists(os.path.join(self.tmppath, name)):
141 if not os.path.exists(os.path.join(self.tmppath, name)):
142 raise IOError
142 raise IOError
143
143
144 return self._getfile(name, rev)
144 return self._getfile(name, rev)
145
145
146 def getchanges(self, rev):
146 def getchanges(self, rev):
147 self._update(rev)
147 self._update(rev)
148 changes = []
148 changes = []
149 copies = {}
149 copies = {}
150
150
151 for f in self.changes[rev].add_files:
151 for f in self.changes[rev].add_files:
152 changes.append((f, rev))
152 changes.append((f, rev))
153
153
154 for f in self.changes[rev].mod_files:
154 for f in self.changes[rev].mod_files:
155 changes.append((f, rev))
155 changes.append((f, rev))
156
156
157 for f in self.changes[rev].del_files:
157 for f in self.changes[rev].del_files:
158 changes.append((f, rev))
158 changes.append((f, rev))
159
159
160 for src in self.changes[rev].ren_files:
160 for src in self.changes[rev].ren_files:
161 to = self.changes[rev].ren_files[src]
161 to = self.changes[rev].ren_files[src]
162 changes.append((src, rev))
162 changes.append((src, rev))
163 changes.append((to, rev))
163 changes.append((to, rev))
164 copies[to] = src
164 copies[to] = src
165
165
166 for src in self.changes[rev].ren_dirs:
166 for src in self.changes[rev].ren_dirs:
167 to = self.changes[rev].ren_dirs[src]
167 to = self.changes[rev].ren_dirs[src]
168 chgs, cps = self._rendirchanges(src, to)
168 chgs, cps = self._rendirchanges(src, to)
169 changes += [(f, rev) for f in chgs]
169 changes += [(f, rev) for f in chgs]
170 copies.update(cps)
170 copies.update(cps)
171
171
172 self.lastrev = rev
172 self.lastrev = rev
173 return sorted(set(changes)), copies
173 return sorted(set(changes)), copies
174
174
175 def getcommit(self, rev):
175 def getcommit(self, rev):
176 changes = self.changes[rev]
176 changes = self.changes[rev]
177 return commit(author=changes.author, date=changes.date,
177 return commit(author=changes.author, date=changes.date,
178 desc=changes.summary, parents=self.parents[rev], rev=rev)
178 desc=changes.summary, parents=self.parents[rev], rev=rev)
179
179
180 def gettags(self):
180 def gettags(self):
181 return self.tags
181 return self.tags
182
182
183 def _execute(self, cmd, *args, **kwargs):
183 def _execute(self, cmd, *args, **kwargs):
184 cmdline = [self.execmd, cmd]
184 cmdline = [self.execmd, cmd]
185 cmdline += args
185 cmdline += args
186 cmdline = [util.shellquote(arg) for arg in cmdline]
186 cmdline = [util.shellquote(arg) for arg in cmdline]
187 cmdline += ['>', util.nulldev, '2>', util.nulldev]
187 cmdline += ['>', util.nulldev, '2>', util.nulldev]
188 cmdline = util.quotecommand(' '.join(cmdline))
188 cmdline = util.quotecommand(' '.join(cmdline))
189 self.ui.debug(cmdline, '\n')
189 self.ui.debug(cmdline, '\n')
190 return os.system(cmdline)
190 return os.system(cmdline)
191
191
192 def _update(self, rev):
192 def _update(self, rev):
193 self.ui.debug('applying revision %s...\n' % rev)
193 self.ui.debug('applying revision %s...\n' % rev)
194 changeset, status = self.runlines('replay', '-d', self.tmppath,
194 changeset, status = self.runlines('replay', '-d', self.tmppath,
195 rev)
195 rev)
196 if status:
196 if status:
197 # Something went wrong while merging (baz or tla
197 # Something went wrong while merging (baz or tla
198 # issue?), get latest revision and try from there
198 # issue?), get latest revision and try from there
199 shutil.rmtree(self.tmppath, ignore_errors=True)
199 shutil.rmtree(self.tmppath, ignore_errors=True)
200 self._obtainrevision(rev)
200 self._obtainrevision(rev)
201 else:
201 else:
202 old_rev = self.parents[rev][0]
202 old_rev = self.parents[rev][0]
203 self.ui.debug('computing changeset between %s and %s...\n'
203 self.ui.debug('computing changeset between %s and %s...\n'
204 % (old_rev, rev))
204 % (old_rev, rev))
205 self._parsechangeset(changeset, rev)
205 self._parsechangeset(changeset, rev)
206
206
207 def _getfile(self, name, rev):
207 def _getfile(self, name, rev):
208 mode = os.lstat(os.path.join(self.tmppath, name)).st_mode
208 mode = os.lstat(os.path.join(self.tmppath, name)).st_mode
209 if stat.S_ISLNK(mode):
209 if stat.S_ISLNK(mode):
210 data = os.readlink(os.path.join(self.tmppath, name))
210 data = os.readlink(os.path.join(self.tmppath, name))
211 mode = mode and 'l' or ''
211 mode = mode and 'l' or ''
212 else:
212 else:
213 data = open(os.path.join(self.tmppath, name), 'rb').read()
213 data = open(os.path.join(self.tmppath, name), 'rb').read()
214 mode = (mode & 0111) and 'x' or ''
214 mode = (mode & 0111) and 'x' or ''
215 return data, mode
215 return data, mode
216
216
217 def _exclude(self, name):
217 def _exclude(self, name):
218 exclude = ['{arch}', '.arch-ids', '.arch-inventory']
218 exclude = ['{arch}', '.arch-ids', '.arch-inventory']
219 for exc in exclude:
219 for exc in exclude:
220 if name.find(exc) != -1:
220 if name.find(exc) != -1:
221 return True
221 return True
222 return False
222 return False
223
223
224 def _readcontents(self, path):
224 def _readcontents(self, path):
225 files = []
225 files = []
226 contents = os.listdir(path)
226 contents = os.listdir(path)
227 while len(contents) > 0:
227 while len(contents) > 0:
228 c = contents.pop()
228 c = contents.pop()
229 p = os.path.join(path, c)
229 p = os.path.join(path, c)
230 # os.walk could be used, but here we avoid internal GNU
230 # os.walk could be used, but here we avoid internal GNU
231 # Arch files and directories, thus saving a lot time.
231 # Arch files and directories, thus saving a lot time.
232 if not self._exclude(p):
232 if not self._exclude(p):
233 if os.path.isdir(p):
233 if os.path.isdir(p):
234 contents += [os.path.join(c, f) for f in os.listdir(p)]
234 contents += [os.path.join(c, f) for f in os.listdir(p)]
235 else:
235 else:
236 files.append(c)
236 files.append(c)
237 return files
237 return files
238
238
239 def _rendirchanges(self, src, dest):
239 def _rendirchanges(self, src, dest):
240 changes = []
240 changes = []
241 copies = {}
241 copies = {}
242 files = self._readcontents(os.path.join(self.tmppath, dest))
242 files = self._readcontents(os.path.join(self.tmppath, dest))
243 for f in files:
243 for f in files:
244 s = os.path.join(src, f)
244 s = os.path.join(src, f)
245 d = os.path.join(dest, f)
245 d = os.path.join(dest, f)
246 changes.append(s)
246 changes.append(s)
247 changes.append(d)
247 changes.append(d)
248 copies[d] = s
248 copies[d] = s
249 return changes, copies
249 return changes, copies
250
250
251 def _obtainrevision(self, rev):
251 def _obtainrevision(self, rev):
252 self.ui.debug('obtaining revision %s...\n' % rev)
252 self.ui.debug('obtaining revision %s...\n' % rev)
253 output = self._execute('get', rev, self.tmppath)
253 output = self._execute('get', rev, self.tmppath)
254 self.checkexit(output)
254 self.checkexit(output)
255 self.ui.debug('analyzing revision %s...\n' % rev)
255 self.ui.debug('analyzing revision %s...\n' % rev)
256 files = self._readcontents(self.tmppath)
256 files = self._readcontents(self.tmppath)
257 self.changes[rev].add_files += files
257 self.changes[rev].add_files += files
258
258
259 def _stripbasepath(self, path):
259 def _stripbasepath(self, path):
260 if path.startswith('./'):
260 if path.startswith('./'):
261 return path[2:]
261 return path[2:]
262 return path
262 return path
263
263
264 def _parsecatlog(self, data, rev):
264 def _parsecatlog(self, data, rev):
265 try:
265 try:
266 catlog = self.catlogparser.parsestr(data)
266 catlog = self.catlogparser.parsestr(data)
267
267
268 # Commit date
268 # Commit date
269 self.changes[rev].date = util.datestr(
269 self.changes[rev].date = util.datestr(
270 util.strdate(catlog['Standard-date'],
270 util.strdate(catlog['Standard-date'],
271 '%Y-%m-%d %H:%M:%S'))
271 '%Y-%m-%d %H:%M:%S'))
272
272
273 # Commit author
273 # Commit author
274 self.changes[rev].author = self.recode(catlog['Creator'])
274 self.changes[rev].author = self.recode(catlog['Creator'])
275
275
276 # Commit description
276 # Commit description
277 self.changes[rev].summary = '\n\n'.join((catlog['Summary'],
277 self.changes[rev].summary = '\n\n'.join((catlog['Summary'],
278 catlog.get_payload()))
278 catlog.get_payload()))
279 self.changes[rev].summary = self.recode(self.changes[rev].summary)
279 self.changes[rev].summary = self.recode(self.changes[rev].summary)
280
280
281 # Commit revision origin when dealing with a branch or tag
281 # Commit revision origin when dealing with a branch or tag
282 if 'Continuation-of' in catlog:
282 if 'Continuation-of' in catlog:
283 self.changes[rev].continuationof = self.recode(
283 self.changes[rev].continuationof = self.recode(
284 catlog['Continuation-of'])
284 catlog['Continuation-of'])
285 except Exception:
285 except Exception:
286 raise util.Abort(_('could not parse cat-log of %s') % rev)
286 raise util.Abort(_('could not parse cat-log of %s') % rev)
287
287
288 def _parsechangeset(self, data, rev):
288 def _parsechangeset(self, data, rev):
289 for l in data:
289 for l in data:
290 l = l.strip()
290 l = l.strip()
291 # Added file (ignore added directory)
291 # Added file (ignore added directory)
292 if l.startswith('A') and not l.startswith('A/'):
292 if l.startswith('A') and not l.startswith('A/'):
293 file = self._stripbasepath(l[1:].strip())
293 file = self._stripbasepath(l[1:].strip())
294 if not self._exclude(file):
294 if not self._exclude(file):
295 self.changes[rev].add_files.append(file)
295 self.changes[rev].add_files.append(file)
296 # Deleted file (ignore deleted directory)
296 # Deleted file (ignore deleted directory)
297 elif l.startswith('D') and not l.startswith('D/'):
297 elif l.startswith('D') and not l.startswith('D/'):
298 file = self._stripbasepath(l[1:].strip())
298 file = self._stripbasepath(l[1:].strip())
299 if not self._exclude(file):
299 if not self._exclude(file):
300 self.changes[rev].del_files.append(file)
300 self.changes[rev].del_files.append(file)
301 # Modified binary file
301 # Modified binary file
302 elif l.startswith('Mb'):
302 elif l.startswith('Mb'):
303 file = self._stripbasepath(l[2:].strip())
303 file = self._stripbasepath(l[2:].strip())
304 if not self._exclude(file):
304 if not self._exclude(file):
305 self.changes[rev].mod_files.append(file)
305 self.changes[rev].mod_files.append(file)
306 # Modified link
306 # Modified link
307 elif l.startswith('M->'):
307 elif l.startswith('M->'):
308 file = self._stripbasepath(l[3:].strip())
308 file = self._stripbasepath(l[3:].strip())
309 if not self._exclude(file):
309 if not self._exclude(file):
310 self.changes[rev].mod_files.append(file)
310 self.changes[rev].mod_files.append(file)
311 # Modified file
311 # Modified file
312 elif l.startswith('M'):
312 elif l.startswith('M'):
313 file = self._stripbasepath(l[1:].strip())
313 file = self._stripbasepath(l[1:].strip())
314 if not self._exclude(file):
314 if not self._exclude(file):
315 self.changes[rev].mod_files.append(file)
315 self.changes[rev].mod_files.append(file)
316 # Renamed file (or link)
316 # Renamed file (or link)
317 elif l.startswith('=>'):
317 elif l.startswith('=>'):
318 files = l[2:].strip().split(' ')
318 files = l[2:].strip().split(' ')
319 if len(files) == 1:
319 if len(files) == 1:
320 files = l[2:].strip().split('\t')
320 files = l[2:].strip().split('\t')
321 src = self._stripbasepath(files[0])
321 src = self._stripbasepath(files[0])
322 dst = self._stripbasepath(files[1])
322 dst = self._stripbasepath(files[1])
323 if not self._exclude(src) and not self._exclude(dst):
323 if not self._exclude(src) and not self._exclude(dst):
324 self.changes[rev].ren_files[src] = dst
324 self.changes[rev].ren_files[src] = dst
325 # Conversion from file to link or from link to file (modified)
325 # Conversion from file to link or from link to file (modified)
326 elif l.startswith('ch'):
326 elif l.startswith('ch'):
327 file = self._stripbasepath(l[2:].strip())
327 file = self._stripbasepath(l[2:].strip())
328 if not self._exclude(file):
328 if not self._exclude(file):
329 self.changes[rev].mod_files.append(file)
329 self.changes[rev].mod_files.append(file)
330 # Renamed directory
330 # Renamed directory
331 elif l.startswith('/>'):
331 elif l.startswith('/>'):
332 dirs = l[2:].strip().split(' ')
332 dirs = l[2:].strip().split(' ')
333 if len(dirs) == 1:
333 if len(dirs) == 1:
334 dirs = l[2:].strip().split('\t')
334 dirs = l[2:].strip().split('\t')
335 src = self._stripbasepath(dirs[0])
335 src = self._stripbasepath(dirs[0])
336 dst = self._stripbasepath(dirs[1])
336 dst = self._stripbasepath(dirs[1])
337 if not self._exclude(src) and not self._exclude(dst):
337 if not self._exclude(src) and not self._exclude(dst):
338 self.changes[rev].ren_dirs[src] = dst
338 self.changes[rev].ren_dirs[src] = dst
General Comments 0
You need to be logged in to leave comments. Login now