##// END OF EJS Templates
convert/bzr: expect unicode metadata, encode in UTF-8 (issue3232)...
Patrick Mezard -
r16059:f5b6046f default
parent child Browse files
Show More
@@ -1,278 +1,265 b''
1 1 # bzr.py - bzr support for the convert extension
2 2 #
3 3 # Copyright 2008, 2009 Marek Kubica <marek@xivilization.net> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 # This module is for handling 'bzr', that was formerly known as Bazaar-NG;
9 9 # it cannot access 'bar' repositories, but they were never used very much
10 10
11 11 import os
12 12 from mercurial import demandimport
13 13 # these do not work with demandimport, blacklist
14 14 demandimport.ignore.extend([
15 15 'bzrlib.transactions',
16 16 'bzrlib.urlutils',
17 17 'ElementPath',
18 18 ])
19 19
20 20 from mercurial.i18n import _
21 21 from mercurial import util
22 22 from common import NoRepo, commit, converter_source
23 23
24 24 try:
25 25 # bazaar imports
26 26 from bzrlib import branch, revision, errors
27 27 from bzrlib.revisionspec import RevisionSpec
28 28 except ImportError:
29 29 pass
30 30
31 31 supportedkinds = ('file', 'symlink')
32 32
33 33 class bzr_source(converter_source):
34 34 """Reads Bazaar repositories by using the Bazaar Python libraries"""
35 35
36 36 def __init__(self, ui, path, rev=None):
37 37 super(bzr_source, self).__init__(ui, path, rev=rev)
38 38
39 39 if not os.path.exists(os.path.join(path, '.bzr')):
40 40 raise NoRepo(_('%s does not look like a Bazaar repository')
41 41 % path)
42 42
43 43 try:
44 44 # access bzrlib stuff
45 45 branch
46 46 except NameError:
47 47 raise NoRepo(_('Bazaar modules could not be loaded'))
48 48
49 49 path = os.path.abspath(path)
50 50 self._checkrepotype(path)
51 51 self.branch = branch.Branch.open(path)
52 52 self.sourcerepo = self.branch.repository
53 53 self._parentids = {}
54 54
55 55 def _checkrepotype(self, path):
56 56 # Lightweight checkouts detection is informational but probably
57 57 # fragile at API level. It should not terminate the conversion.
58 58 try:
59 59 from bzrlib import bzrdir
60 60 dir = bzrdir.BzrDir.open_containing(path)[0]
61 61 try:
62 62 tree = dir.open_workingtree(recommend_upgrade=False)
63 63 branch = tree.branch
64 64 except (errors.NoWorkingTree, errors.NotLocalUrl):
65 65 tree = None
66 66 branch = dir.open_branch()
67 67 if (tree is not None and tree.bzrdir.root_transport.base !=
68 68 branch.bzrdir.root_transport.base):
69 69 self.ui.warn(_('warning: lightweight checkouts may cause '
70 70 'conversion failures, try with a regular '
71 71 'branch instead.\n'))
72 72 except:
73 73 self.ui.note(_('bzr source type could not be determined\n'))
74 74
75 75 def before(self):
76 76 """Before the conversion begins, acquire a read lock
77 77 for all the operations that might need it. Fortunately
78 78 read locks don't block other reads or writes to the
79 79 repository, so this shouldn't have any impact on the usage of
80 80 the source repository.
81 81
82 82 The alternative would be locking on every operation that
83 83 needs locks (there are currently two: getting the file and
84 84 getting the parent map) and releasing immediately after,
85 85 but this approach can take even 40% longer."""
86 86 self.sourcerepo.lock_read()
87 87
88 88 def after(self):
89 89 self.sourcerepo.unlock()
90 90
91 91 def getheads(self):
92 92 if not self.rev:
93 93 return [self.branch.last_revision()]
94 94 try:
95 95 r = RevisionSpec.from_string(self.rev)
96 96 info = r.in_history(self.branch)
97 97 except errors.BzrError:
98 98 raise util.Abort(_('%s is not a valid revision in current branch')
99 99 % self.rev)
100 100 return [info.rev_id]
101 101
102 102 def getfile(self, name, rev):
103 103 revtree = self.sourcerepo.revision_tree(rev)
104 104 fileid = revtree.path2id(name.decode(self.encoding or 'utf-8'))
105 105 kind = None
106 106 if fileid is not None:
107 107 kind = revtree.kind(fileid)
108 108 if kind not in supportedkinds:
109 109 # the file is not available anymore - was deleted
110 110 raise IOError(_('%s is not available in %s anymore') %
111 111 (name, rev))
112 112 mode = self._modecache[(name, rev)]
113 113 if kind == 'symlink':
114 114 target = revtree.get_symlink_target(fileid)
115 115 if target is None:
116 116 raise util.Abort(_('%s.%s symlink has no target')
117 117 % (name, rev))
118 118 return target, mode
119 119 else:
120 120 sio = revtree.get_file(fileid)
121 121 return sio.read(), mode
122 122
123 123 def getchanges(self, version):
124 124 # set up caches: modecache and revtree
125 125 self._modecache = {}
126 126 self._revtree = self.sourcerepo.revision_tree(version)
127 127 # get the parentids from the cache
128 128 parentids = self._parentids.pop(version)
129 129 # only diff against first parent id
130 130 prevtree = self.sourcerepo.revision_tree(parentids[0])
131 131 return self._gettreechanges(self._revtree, prevtree)
132 132
133 133 def getcommit(self, version):
134 134 rev = self.sourcerepo.get_revision(version)
135 135 # populate parent id cache
136 136 if not rev.parent_ids:
137 137 parents = []
138 138 self._parentids[version] = (revision.NULL_REVISION,)
139 139 else:
140 140 parents = self._filterghosts(rev.parent_ids)
141 141 self._parentids[version] = parents
142 142
143 143 return commit(parents=parents,
144 144 date='%d %d' % (rev.timestamp, -rev.timezone),
145 145 author=self.recode(rev.committer),
146 # bzr returns bytestrings or unicode, depending on the content
147 146 desc=self.recode(rev.message),
148 147 rev=version)
149 148
150 149 def gettags(self):
151 150 if not self.branch.supports_tags():
152 151 return {}
153 152 tagdict = self.branch.tags.get_tag_dict()
154 153 bytetags = {}
155 154 for name, rev in tagdict.iteritems():
156 155 bytetags[self.recode(name)] = rev
157 156 return bytetags
158 157
159 158 def getchangedfiles(self, rev, i):
160 159 self._modecache = {}
161 160 curtree = self.sourcerepo.revision_tree(rev)
162 161 if i is not None:
163 162 parentid = self._parentids[rev][i]
164 163 else:
165 164 # no parent id, get the empty revision
166 165 parentid = revision.NULL_REVISION
167 166
168 167 prevtree = self.sourcerepo.revision_tree(parentid)
169 168 changes = [e[0] for e in self._gettreechanges(curtree, prevtree)[0]]
170 169 return changes
171 170
172 171 def _gettreechanges(self, current, origin):
173 172 revid = current._revision_id
174 173 changes = []
175 174 renames = {}
176 175 seen = set()
177 176 # Process the entries by reverse lexicographic name order to
178 177 # handle nested renames correctly, most specific first.
179 178 curchanges = sorted(current.iter_changes(origin),
180 179 key=lambda c: c[1][0] or c[1][1],
181 180 reverse=True)
182 181 for (fileid, paths, changed_content, versioned, parent, name,
183 182 kind, executable) in curchanges:
184 183
185 184 if paths[0] == u'' or paths[1] == u'':
186 185 # ignore changes to tree root
187 186 continue
188 187
189 188 # bazaar tracks directories, mercurial does not, so
190 189 # we have to rename the directory contents
191 190 if kind[1] == 'directory':
192 191 if kind[0] not in (None, 'directory'):
193 192 # Replacing 'something' with a directory, record it
194 193 # so it can be removed.
195 194 changes.append((self.recode(paths[0]), revid))
196 195
197 196 if kind[0] == 'directory' and None not in paths:
198 197 renaming = paths[0] != paths[1]
199 198 # neither an add nor an delete - a move
200 199 # rename all directory contents manually
201 200 subdir = origin.inventory.path2id(paths[0])
202 201 # get all child-entries of the directory
203 202 for name, entry in origin.inventory.iter_entries(subdir):
204 203 # hg does not track directory renames
205 204 if entry.kind == 'directory':
206 205 continue
207 206 frompath = self.recode(paths[0] + '/' + name)
208 207 if frompath in seen:
209 208 # Already handled by a more specific change entry
210 209 # This is important when you have:
211 210 # a => b
212 211 # a/c => a/c
213 212 # Here a/c must not be renamed into b/c
214 213 continue
215 214 seen.add(frompath)
216 215 if not renaming:
217 216 continue
218 217 topath = self.recode(paths[1] + '/' + name)
219 218 # register the files as changed
220 219 changes.append((frompath, revid))
221 220 changes.append((topath, revid))
222 221 # add to mode cache
223 222 mode = ((entry.executable and 'x')
224 223 or (entry.kind == 'symlink' and 's')
225 224 or '')
226 225 self._modecache[(topath, revid)] = mode
227 226 # register the change as move
228 227 renames[topath] = frompath
229 228
230 229 # no futher changes, go to the next change
231 230 continue
232 231
233 232 # we got unicode paths, need to convert them
234 path, topath = [self.recode(part) for part in paths]
233 path, topath = paths
234 if path is not None:
235 path = self.recode(path)
236 if topath is not None:
237 topath = self.recode(topath)
235 238 seen.add(path or topath)
236 239
237 240 if topath is None:
238 241 # file deleted
239 242 changes.append((path, revid))
240 243 continue
241 244
242 245 # renamed
243 246 if path and path != topath:
244 247 renames[topath] = path
245 248 changes.append((path, revid))
246 249
247 250 # populate the mode cache
248 251 kind, executable = [e[1] for e in (kind, executable)]
249 252 mode = ((executable and 'x') or (kind == 'symlink' and 'l')
250 253 or '')
251 254 self._modecache[(topath, revid)] = mode
252 255 changes.append((topath, revid))
253 256
254 257 return changes, renames
255 258
256 259 def _filterghosts(self, ids):
257 260 """Filters out ghost revisions which hg does not support, see
258 261 <http://bazaar-vcs.org/GhostRevision>
259 262 """
260 263 parentmap = self.sourcerepo.get_parent_map(ids)
261 264 parents = tuple([parent for parent in ids if parent in parentmap])
262 265 return parents
263
264 def recode(self, s, encoding=None):
265 """This version of recode tries to encode unicode to bytecode,
266 and preferably using the UTF-8 codec.
267 Other types than Unicode are silently returned, this is by
268 intention, e.g. the None-type is not going to be encoded but instead
269 just passed through
270 """
271 if not encoding:
272 encoding = self.encoding or 'utf-8'
273
274 if isinstance(s, unicode):
275 return s.encode(encoding)
276 else:
277 # leave it alone
278 return s
General Comments 0
You need to be logged in to leave comments. Login now