##// END OF EJS Templates
convert/bzr: expect unicode metadata, encode in UTF-8 (issue3232)...
Patrick Mezard -
r16059:f5b6046f default
parent child Browse files
Show More
@@ -1,278 +1,265 b''
1 # bzr.py - bzr support for the convert extension
1 # bzr.py - bzr support for the convert extension
2 #
2 #
3 # Copyright 2008, 2009 Marek Kubica <marek@xivilization.net> and others
3 # Copyright 2008, 2009 Marek Kubica <marek@xivilization.net> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 # This module is for handling 'bzr', that was formerly known as Bazaar-NG;
8 # This module is for handling 'bzr', that was formerly known as Bazaar-NG;
9 # it cannot access 'bar' repositories, but they were never used very much
9 # it cannot access 'bar' repositories, but they were never used very much
10
10
11 import os
11 import os
12 from mercurial import demandimport
12 from mercurial import demandimport
13 # these do not work with demandimport, blacklist
13 # these do not work with demandimport, blacklist
14 demandimport.ignore.extend([
14 demandimport.ignore.extend([
15 'bzrlib.transactions',
15 'bzrlib.transactions',
16 'bzrlib.urlutils',
16 'bzrlib.urlutils',
17 'ElementPath',
17 'ElementPath',
18 ])
18 ])
19
19
20 from mercurial.i18n import _
20 from mercurial.i18n import _
21 from mercurial import util
21 from mercurial import util
22 from common import NoRepo, commit, converter_source
22 from common import NoRepo, commit, converter_source
23
23
24 try:
24 try:
25 # bazaar imports
25 # bazaar imports
26 from bzrlib import branch, revision, errors
26 from bzrlib import branch, revision, errors
27 from bzrlib.revisionspec import RevisionSpec
27 from bzrlib.revisionspec import RevisionSpec
28 except ImportError:
28 except ImportError:
29 pass
29 pass
30
30
31 supportedkinds = ('file', 'symlink')
31 supportedkinds = ('file', 'symlink')
32
32
33 class bzr_source(converter_source):
33 class bzr_source(converter_source):
34 """Reads Bazaar repositories by using the Bazaar Python libraries"""
34 """Reads Bazaar repositories by using the Bazaar Python libraries"""
35
35
36 def __init__(self, ui, path, rev=None):
36 def __init__(self, ui, path, rev=None):
37 super(bzr_source, self).__init__(ui, path, rev=rev)
37 super(bzr_source, self).__init__(ui, path, rev=rev)
38
38
39 if not os.path.exists(os.path.join(path, '.bzr')):
39 if not os.path.exists(os.path.join(path, '.bzr')):
40 raise NoRepo(_('%s does not look like a Bazaar repository')
40 raise NoRepo(_('%s does not look like a Bazaar repository')
41 % path)
41 % path)
42
42
43 try:
43 try:
44 # access bzrlib stuff
44 # access bzrlib stuff
45 branch
45 branch
46 except NameError:
46 except NameError:
47 raise NoRepo(_('Bazaar modules could not be loaded'))
47 raise NoRepo(_('Bazaar modules could not be loaded'))
48
48
49 path = os.path.abspath(path)
49 path = os.path.abspath(path)
50 self._checkrepotype(path)
50 self._checkrepotype(path)
51 self.branch = branch.Branch.open(path)
51 self.branch = branch.Branch.open(path)
52 self.sourcerepo = self.branch.repository
52 self.sourcerepo = self.branch.repository
53 self._parentids = {}
53 self._parentids = {}
54
54
55 def _checkrepotype(self, path):
55 def _checkrepotype(self, path):
56 # Lightweight checkouts detection is informational but probably
56 # Lightweight checkouts detection is informational but probably
57 # fragile at API level. It should not terminate the conversion.
57 # fragile at API level. It should not terminate the conversion.
58 try:
58 try:
59 from bzrlib import bzrdir
59 from bzrlib import bzrdir
60 dir = bzrdir.BzrDir.open_containing(path)[0]
60 dir = bzrdir.BzrDir.open_containing(path)[0]
61 try:
61 try:
62 tree = dir.open_workingtree(recommend_upgrade=False)
62 tree = dir.open_workingtree(recommend_upgrade=False)
63 branch = tree.branch
63 branch = tree.branch
64 except (errors.NoWorkingTree, errors.NotLocalUrl):
64 except (errors.NoWorkingTree, errors.NotLocalUrl):
65 tree = None
65 tree = None
66 branch = dir.open_branch()
66 branch = dir.open_branch()
67 if (tree is not None and tree.bzrdir.root_transport.base !=
67 if (tree is not None and tree.bzrdir.root_transport.base !=
68 branch.bzrdir.root_transport.base):
68 branch.bzrdir.root_transport.base):
69 self.ui.warn(_('warning: lightweight checkouts may cause '
69 self.ui.warn(_('warning: lightweight checkouts may cause '
70 'conversion failures, try with a regular '
70 'conversion failures, try with a regular '
71 'branch instead.\n'))
71 'branch instead.\n'))
72 except:
72 except:
73 self.ui.note(_('bzr source type could not be determined\n'))
73 self.ui.note(_('bzr source type could not be determined\n'))
74
74
75 def before(self):
75 def before(self):
76 """Before the conversion begins, acquire a read lock
76 """Before the conversion begins, acquire a read lock
77 for all the operations that might need it. Fortunately
77 for all the operations that might need it. Fortunately
78 read locks don't block other reads or writes to the
78 read locks don't block other reads or writes to the
79 repository, so this shouldn't have any impact on the usage of
79 repository, so this shouldn't have any impact on the usage of
80 the source repository.
80 the source repository.
81
81
82 The alternative would be locking on every operation that
82 The alternative would be locking on every operation that
83 needs locks (there are currently two: getting the file and
83 needs locks (there are currently two: getting the file and
84 getting the parent map) and releasing immediately after,
84 getting the parent map) and releasing immediately after,
85 but this approach can take even 40% longer."""
85 but this approach can take even 40% longer."""
86 self.sourcerepo.lock_read()
86 self.sourcerepo.lock_read()
87
87
88 def after(self):
88 def after(self):
89 self.sourcerepo.unlock()
89 self.sourcerepo.unlock()
90
90
91 def getheads(self):
91 def getheads(self):
92 if not self.rev:
92 if not self.rev:
93 return [self.branch.last_revision()]
93 return [self.branch.last_revision()]
94 try:
94 try:
95 r = RevisionSpec.from_string(self.rev)
95 r = RevisionSpec.from_string(self.rev)
96 info = r.in_history(self.branch)
96 info = r.in_history(self.branch)
97 except errors.BzrError:
97 except errors.BzrError:
98 raise util.Abort(_('%s is not a valid revision in current branch')
98 raise util.Abort(_('%s is not a valid revision in current branch')
99 % self.rev)
99 % self.rev)
100 return [info.rev_id]
100 return [info.rev_id]
101
101
102 def getfile(self, name, rev):
102 def getfile(self, name, rev):
103 revtree = self.sourcerepo.revision_tree(rev)
103 revtree = self.sourcerepo.revision_tree(rev)
104 fileid = revtree.path2id(name.decode(self.encoding or 'utf-8'))
104 fileid = revtree.path2id(name.decode(self.encoding or 'utf-8'))
105 kind = None
105 kind = None
106 if fileid is not None:
106 if fileid is not None:
107 kind = revtree.kind(fileid)
107 kind = revtree.kind(fileid)
108 if kind not in supportedkinds:
108 if kind not in supportedkinds:
109 # the file is not available anymore - was deleted
109 # the file is not available anymore - was deleted
110 raise IOError(_('%s is not available in %s anymore') %
110 raise IOError(_('%s is not available in %s anymore') %
111 (name, rev))
111 (name, rev))
112 mode = self._modecache[(name, rev)]
112 mode = self._modecache[(name, rev)]
113 if kind == 'symlink':
113 if kind == 'symlink':
114 target = revtree.get_symlink_target(fileid)
114 target = revtree.get_symlink_target(fileid)
115 if target is None:
115 if target is None:
116 raise util.Abort(_('%s.%s symlink has no target')
116 raise util.Abort(_('%s.%s symlink has no target')
117 % (name, rev))
117 % (name, rev))
118 return target, mode
118 return target, mode
119 else:
119 else:
120 sio = revtree.get_file(fileid)
120 sio = revtree.get_file(fileid)
121 return sio.read(), mode
121 return sio.read(), mode
122
122
123 def getchanges(self, version):
123 def getchanges(self, version):
124 # set up caches: modecache and revtree
124 # set up caches: modecache and revtree
125 self._modecache = {}
125 self._modecache = {}
126 self._revtree = self.sourcerepo.revision_tree(version)
126 self._revtree = self.sourcerepo.revision_tree(version)
127 # get the parentids from the cache
127 # get the parentids from the cache
128 parentids = self._parentids.pop(version)
128 parentids = self._parentids.pop(version)
129 # only diff against first parent id
129 # only diff against first parent id
130 prevtree = self.sourcerepo.revision_tree(parentids[0])
130 prevtree = self.sourcerepo.revision_tree(parentids[0])
131 return self._gettreechanges(self._revtree, prevtree)
131 return self._gettreechanges(self._revtree, prevtree)
132
132
133 def getcommit(self, version):
133 def getcommit(self, version):
134 rev = self.sourcerepo.get_revision(version)
134 rev = self.sourcerepo.get_revision(version)
135 # populate parent id cache
135 # populate parent id cache
136 if not rev.parent_ids:
136 if not rev.parent_ids:
137 parents = []
137 parents = []
138 self._parentids[version] = (revision.NULL_REVISION,)
138 self._parentids[version] = (revision.NULL_REVISION,)
139 else:
139 else:
140 parents = self._filterghosts(rev.parent_ids)
140 parents = self._filterghosts(rev.parent_ids)
141 self._parentids[version] = parents
141 self._parentids[version] = parents
142
142
143 return commit(parents=parents,
143 return commit(parents=parents,
144 date='%d %d' % (rev.timestamp, -rev.timezone),
144 date='%d %d' % (rev.timestamp, -rev.timezone),
145 author=self.recode(rev.committer),
145 author=self.recode(rev.committer),
146 # bzr returns bytestrings or unicode, depending on the content
147 desc=self.recode(rev.message),
146 desc=self.recode(rev.message),
148 rev=version)
147 rev=version)
149
148
150 def gettags(self):
149 def gettags(self):
151 if not self.branch.supports_tags():
150 if not self.branch.supports_tags():
152 return {}
151 return {}
153 tagdict = self.branch.tags.get_tag_dict()
152 tagdict = self.branch.tags.get_tag_dict()
154 bytetags = {}
153 bytetags = {}
155 for name, rev in tagdict.iteritems():
154 for name, rev in tagdict.iteritems():
156 bytetags[self.recode(name)] = rev
155 bytetags[self.recode(name)] = rev
157 return bytetags
156 return bytetags
158
157
159 def getchangedfiles(self, rev, i):
158 def getchangedfiles(self, rev, i):
160 self._modecache = {}
159 self._modecache = {}
161 curtree = self.sourcerepo.revision_tree(rev)
160 curtree = self.sourcerepo.revision_tree(rev)
162 if i is not None:
161 if i is not None:
163 parentid = self._parentids[rev][i]
162 parentid = self._parentids[rev][i]
164 else:
163 else:
165 # no parent id, get the empty revision
164 # no parent id, get the empty revision
166 parentid = revision.NULL_REVISION
165 parentid = revision.NULL_REVISION
167
166
168 prevtree = self.sourcerepo.revision_tree(parentid)
167 prevtree = self.sourcerepo.revision_tree(parentid)
169 changes = [e[0] for e in self._gettreechanges(curtree, prevtree)[0]]
168 changes = [e[0] for e in self._gettreechanges(curtree, prevtree)[0]]
170 return changes
169 return changes
171
170
172 def _gettreechanges(self, current, origin):
171 def _gettreechanges(self, current, origin):
173 revid = current._revision_id
172 revid = current._revision_id
174 changes = []
173 changes = []
175 renames = {}
174 renames = {}
176 seen = set()
175 seen = set()
177 # Process the entries by reverse lexicographic name order to
176 # Process the entries by reverse lexicographic name order to
178 # handle nested renames correctly, most specific first.
177 # handle nested renames correctly, most specific first.
179 curchanges = sorted(current.iter_changes(origin),
178 curchanges = sorted(current.iter_changes(origin),
180 key=lambda c: c[1][0] or c[1][1],
179 key=lambda c: c[1][0] or c[1][1],
181 reverse=True)
180 reverse=True)
182 for (fileid, paths, changed_content, versioned, parent, name,
181 for (fileid, paths, changed_content, versioned, parent, name,
183 kind, executable) in curchanges:
182 kind, executable) in curchanges:
184
183
185 if paths[0] == u'' or paths[1] == u'':
184 if paths[0] == u'' or paths[1] == u'':
186 # ignore changes to tree root
185 # ignore changes to tree root
187 continue
186 continue
188
187
189 # bazaar tracks directories, mercurial does not, so
188 # bazaar tracks directories, mercurial does not, so
190 # we have to rename the directory contents
189 # we have to rename the directory contents
191 if kind[1] == 'directory':
190 if kind[1] == 'directory':
192 if kind[0] not in (None, 'directory'):
191 if kind[0] not in (None, 'directory'):
193 # Replacing 'something' with a directory, record it
192 # Replacing 'something' with a directory, record it
194 # so it can be removed.
193 # so it can be removed.
195 changes.append((self.recode(paths[0]), revid))
194 changes.append((self.recode(paths[0]), revid))
196
195
197 if kind[0] == 'directory' and None not in paths:
196 if kind[0] == 'directory' and None not in paths:
198 renaming = paths[0] != paths[1]
197 renaming = paths[0] != paths[1]
199 # neither an add nor an delete - a move
198 # neither an add nor an delete - a move
200 # rename all directory contents manually
199 # rename all directory contents manually
201 subdir = origin.inventory.path2id(paths[0])
200 subdir = origin.inventory.path2id(paths[0])
202 # get all child-entries of the directory
201 # get all child-entries of the directory
203 for name, entry in origin.inventory.iter_entries(subdir):
202 for name, entry in origin.inventory.iter_entries(subdir):
204 # hg does not track directory renames
203 # hg does not track directory renames
205 if entry.kind == 'directory':
204 if entry.kind == 'directory':
206 continue
205 continue
207 frompath = self.recode(paths[0] + '/' + name)
206 frompath = self.recode(paths[0] + '/' + name)
208 if frompath in seen:
207 if frompath in seen:
209 # Already handled by a more specific change entry
208 # Already handled by a more specific change entry
210 # This is important when you have:
209 # This is important when you have:
211 # a => b
210 # a => b
212 # a/c => a/c
211 # a/c => a/c
213 # Here a/c must not be renamed into b/c
212 # Here a/c must not be renamed into b/c
214 continue
213 continue
215 seen.add(frompath)
214 seen.add(frompath)
216 if not renaming:
215 if not renaming:
217 continue
216 continue
218 topath = self.recode(paths[1] + '/' + name)
217 topath = self.recode(paths[1] + '/' + name)
219 # register the files as changed
218 # register the files as changed
220 changes.append((frompath, revid))
219 changes.append((frompath, revid))
221 changes.append((topath, revid))
220 changes.append((topath, revid))
222 # add to mode cache
221 # add to mode cache
223 mode = ((entry.executable and 'x')
222 mode = ((entry.executable and 'x')
224 or (entry.kind == 'symlink' and 's')
223 or (entry.kind == 'symlink' and 's')
225 or '')
224 or '')
226 self._modecache[(topath, revid)] = mode
225 self._modecache[(topath, revid)] = mode
227 # register the change as move
226 # register the change as move
228 renames[topath] = frompath
227 renames[topath] = frompath
229
228
230 # no futher changes, go to the next change
229 # no futher changes, go to the next change
231 continue
230 continue
232
231
233 # we got unicode paths, need to convert them
232 # we got unicode paths, need to convert them
234 path, topath = [self.recode(part) for part in paths]
233 path, topath = paths
234 if path is not None:
235 path = self.recode(path)
236 if topath is not None:
237 topath = self.recode(topath)
235 seen.add(path or topath)
238 seen.add(path or topath)
236
239
237 if topath is None:
240 if topath is None:
238 # file deleted
241 # file deleted
239 changes.append((path, revid))
242 changes.append((path, revid))
240 continue
243 continue
241
244
242 # renamed
245 # renamed
243 if path and path != topath:
246 if path and path != topath:
244 renames[topath] = path
247 renames[topath] = path
245 changes.append((path, revid))
248 changes.append((path, revid))
246
249
247 # populate the mode cache
250 # populate the mode cache
248 kind, executable = [e[1] for e in (kind, executable)]
251 kind, executable = [e[1] for e in (kind, executable)]
249 mode = ((executable and 'x') or (kind == 'symlink' and 'l')
252 mode = ((executable and 'x') or (kind == 'symlink' and 'l')
250 or '')
253 or '')
251 self._modecache[(topath, revid)] = mode
254 self._modecache[(topath, revid)] = mode
252 changes.append((topath, revid))
255 changes.append((topath, revid))
253
256
254 return changes, renames
257 return changes, renames
255
258
256 def _filterghosts(self, ids):
259 def _filterghosts(self, ids):
257 """Filters out ghost revisions which hg does not support, see
260 """Filters out ghost revisions which hg does not support, see
258 <http://bazaar-vcs.org/GhostRevision>
261 <http://bazaar-vcs.org/GhostRevision>
259 """
262 """
260 parentmap = self.sourcerepo.get_parent_map(ids)
263 parentmap = self.sourcerepo.get_parent_map(ids)
261 parents = tuple([parent for parent in ids if parent in parentmap])
264 parents = tuple([parent for parent in ids if parent in parentmap])
262 return parents
265 return parents
263
264 def recode(self, s, encoding=None):
265 """This version of recode tries to encode unicode to bytecode,
266 and preferably using the UTF-8 codec.
267 Other types than Unicode are silently returned, this is by
268 intention, e.g. the None-type is not going to be encoded but instead
269 just passed through
270 """
271 if not encoding:
272 encoding = self.encoding or 'utf-8'
273
274 if isinstance(s, unicode):
275 return s.encode(encoding)
276 else:
277 # leave it alone
278 return s
General Comments 0
You need to be logged in to leave comments. Login now