##// END OF EJS Templates
convert/subversion: work around memory leak in svn's python bindings...
Bryan O'Sullivan -
r4946:e8f4e40f default
parent child Browse files
Show More
@@ -1,620 +1,664
1 # Subversion 1.4/1.5 Python API backend
1 # Subversion 1.4/1.5 Python API backend
2 #
2 #
3 # Copyright(C) 2007 Daniel Holth et al
3 # Copyright(C) 2007 Daniel Holth et al
4 #
4 #
5 # Configuration options:
5 # Configuration options:
6 #
6 #
7 # convert.svn.trunk
7 # convert.svn.trunk
8 # Relative path to the trunk (default: "trunk")
8 # Relative path to the trunk (default: "trunk")
9 # convert.svn.branches
9 # convert.svn.branches
10 # Relative path to tree of branches (default: "branches")
10 # Relative path to tree of branches (default: "branches")
11 #
11 #
12 # Set these in a hgrc, or on the command line as follows:
12 # Set these in a hgrc, or on the command line as follows:
13 #
13 #
14 # hg convert --config convert.svn.trunk=wackoname [...]
14 # hg convert --config convert.svn.trunk=wackoname [...]
15
15
16 import pprint
16 import pprint
17 import locale
17 import locale
18
18 import os
19 import cPickle as pickle
19 from mercurial import util
20 from mercurial import util
20
21
21 # Subversion stuff. Works best with very recent Python SVN bindings
22 # Subversion stuff. Works best with very recent Python SVN bindings
22 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
23 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
23 # these bindings.
24 # these bindings.
24
25
25 from cStringIO import StringIO
26 from cStringIO import StringIO
26
27
27 from common import NoRepo, commit, converter_source
28 from common import NoRepo, commit, converter_source
28
29
29 try:
30 try:
30 from svn.core import SubversionException, Pool
31 from svn.core import SubversionException, Pool
31 import svn.core
32 import svn.core
32 import svn.ra
33 import svn.ra
33 import svn.delta
34 import svn.delta
34 import svn
35 import svn
35 import transport
36 import transport
36 except ImportError:
37 except ImportError:
37 pass
38 pass
38
39
39 class CompatibilityException(Exception): pass
40 class CompatibilityException(Exception): pass
40
41
42 class changedpath(object):
43 def __init__(self, p):
44 self.copyfrom_path = p.copyfrom_path
45 self.copyfrom_rev = p.copyfrom_rev
46 self.action = p.action
47
41 # SVN conversion code stolen from bzr-svn and tailor
48 # SVN conversion code stolen from bzr-svn and tailor
42 class convert_svn(converter_source):
49 class convert_svn(converter_source):
43 def __init__(self, ui, url, rev=None):
50 def __init__(self, ui, url, rev=None):
44 super(convert_svn, self).__init__(ui, url, rev=rev)
51 super(convert_svn, self).__init__(ui, url, rev=rev)
45
52
46 try:
53 try:
47 SubversionException
54 SubversionException
48 except NameError:
55 except NameError:
49 msg = 'subversion python bindings could not be loaded\n'
56 msg = 'subversion python bindings could not be loaded\n'
50 ui.warn(msg)
57 ui.warn(msg)
51 raise NoRepo(msg)
58 raise NoRepo(msg)
52
59
53 self.encoding = locale.getpreferredencoding()
60 self.encoding = locale.getpreferredencoding()
54 self.lastrevs = {}
61 self.lastrevs = {}
55
62
56 latest = None
63 latest = None
57 if rev:
64 if rev:
58 try:
65 try:
59 latest = int(rev)
66 latest = int(rev)
60 except ValueError:
67 except ValueError:
61 raise util.Abort('svn: revision %s is not an integer' % rev)
68 raise util.Abort('svn: revision %s is not an integer' % rev)
62 try:
69 try:
63 # Support file://path@rev syntax. Useful e.g. to convert
70 # Support file://path@rev syntax. Useful e.g. to convert
64 # deleted branches.
71 # deleted branches.
65 at = url.rfind('@')
72 at = url.rfind('@')
66 if at >= 0:
73 if at >= 0:
67 latest = int(url[at+1:])
74 latest = int(url[at+1:])
68 url = url[:at]
75 url = url[:at]
69 except ValueError, e:
76 except ValueError, e:
70 pass
77 pass
71 self.url = url
78 self.url = url
72 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
79 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
73 try:
80 try:
74 self.transport = transport.SvnRaTransport(url = url)
81 self.transport = transport.SvnRaTransport(url=url)
75 self.ra = self.transport.ra
82 self.ra = self.transport.ra
76 self.ctx = svn.client.create_context()
83 self.ctx = self.transport.client
77 self.base = svn.ra.get_repos_root(self.ra)
84 self.base = svn.ra.get_repos_root(self.ra)
78 self.module = self.url[len(self.base):]
85 self.module = self.url[len(self.base):]
79 self.modulemap = {} # revision, module
86 self.modulemap = {} # revision, module
80 self.commits = {}
87 self.commits = {}
81 self.files = {}
88 self.files = {}
82 self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
89 self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
83 except SubversionException, e:
90 except SubversionException, e:
84 raise NoRepo("couldn't open SVN repo %s" % url)
91 raise NoRepo("couldn't open SVN repo %s" % url)
85
92
86 try:
93 try:
87 self.get_blacklist()
94 self.get_blacklist()
88 except IOError, e:
95 except IOError, e:
89 pass
96 pass
90
97
91 self.last_changed = self.latest(self.module, latest)
98 self.last_changed = self.latest(self.module, latest)
92
99
93 self.head = self.revid(self.last_changed)
100 self.head = self.revid(self.last_changed)
94
101
95 def setrevmap(self, revmap):
102 def setrevmap(self, revmap):
96 lastrevs = {}
103 lastrevs = {}
97 for revid in revmap.keys():
104 for revid in revmap.keys():
98 uuid, module, revnum = self.revsplit(revid)
105 uuid, module, revnum = self.revsplit(revid)
99 lastrevnum = lastrevs.setdefault(module, revnum)
106 lastrevnum = lastrevs.setdefault(module, revnum)
100 if revnum > lastrevnum:
107 if revnum > lastrevnum:
101 lastrevs[module] = revnum
108 lastrevs[module] = revnum
102 self.lastrevs = lastrevs
109 self.lastrevs = lastrevs
103
110
104 def exists(self, path, optrev):
111 def exists(self, path, optrev):
105 try:
112 try:
106 return svn.client.ls(self.url.rstrip('/') + '/' + path,
113 return svn.client.ls(self.url.rstrip('/') + '/' + path,
107 optrev, False, self.ctx)
114 optrev, False, self.ctx)
108 except SubversionException, err:
115 except SubversionException, err:
109 return []
116 return []
110
117
111 def getheads(self):
118 def getheads(self):
112 # detect standard /branches, /tags, /trunk layout
119 # detect standard /branches, /tags, /trunk layout
113 optrev = svn.core.svn_opt_revision_t()
120 optrev = svn.core.svn_opt_revision_t()
114 optrev.kind = svn.core.svn_opt_revision_number
121 optrev.kind = svn.core.svn_opt_revision_number
115 optrev.value.number = self.last_changed
122 optrev.value.number = self.last_changed
116 rpath = self.url.strip('/')
123 rpath = self.url.strip('/')
117 cfgtrunk = self.ui.config('convert', 'svn.trunk')
124 cfgtrunk = self.ui.config('convert', 'svn.trunk')
118 cfgbranches = self.ui.config('convert', 'svn.branches')
125 cfgbranches = self.ui.config('convert', 'svn.branches')
119 trunk = (cfgtrunk or 'trunk').strip('/')
126 trunk = (cfgtrunk or 'trunk').strip('/')
120 branches = (cfgbranches or 'branches').strip('/')
127 branches = (cfgbranches or 'branches').strip('/')
121 if self.exists(trunk, optrev) and self.exists(branches, optrev):
128 if self.exists(trunk, optrev) and self.exists(branches, optrev):
122 self.ui.note('found trunk at %r and branches at %r\n' %
129 self.ui.note('found trunk at %r and branches at %r\n' %
123 (trunk, branches))
130 (trunk, branches))
124 oldmodule = self.module
131 oldmodule = self.module
125 self.module += '/' + trunk
132 self.module += '/' + trunk
126 lt = self.latest(self.module, self.last_changed)
133 lt = self.latest(self.module, self.last_changed)
127 self.head = self.revid(lt)
134 self.head = self.revid(lt)
128 self.heads = [self.head]
135 self.heads = [self.head]
129 branchnames = svn.client.ls(rpath + '/' + branches, optrev, False,
136 branchnames = svn.client.ls(rpath + '/' + branches, optrev, False,
130 self.ctx)
137 self.ctx)
131 for branch in branchnames.keys():
138 for branch in branchnames.keys():
132 if oldmodule:
139 if oldmodule:
133 module = '/' + oldmodule + '/' + branches + '/' + branch
140 module = '/' + oldmodule + '/' + branches + '/' + branch
134 else:
141 else:
135 module = '/' + branches + '/' + branch
142 module = '/' + branches + '/' + branch
136 brevnum = self.latest(module, self.last_changed)
143 brevnum = self.latest(module, self.last_changed)
137 brev = self.revid(brevnum, module)
144 brev = self.revid(brevnum, module)
138 self.ui.note('found branch %s at %d\n' % (branch, brevnum))
145 self.ui.note('found branch %s at %d\n' % (branch, brevnum))
139 self.heads.append(brev)
146 self.heads.append(brev)
140 elif cfgtrunk or cfgbranches:
147 elif cfgtrunk or cfgbranches:
141 raise util.Abort(_('trunk/branch layout expected, '
148 raise util.Abort(_('trunk/branch layout expected, '
142 'but not found'))
149 'but not found'))
143 else:
150 else:
144 self.ui.note('working with one branch\n')
151 self.ui.note('working with one branch\n')
145 self.heads = [self.head]
152 self.heads = [self.head]
146 return self.heads
153 return self.heads
147
154
148 def getfile(self, file, rev):
155 def getfile(self, file, rev):
149 data, mode = self._getfile(file, rev)
156 data, mode = self._getfile(file, rev)
150 self.modecache[(file, rev)] = mode
157 self.modecache[(file, rev)] = mode
151 return data
158 return data
152
159
153 def getmode(self, file, rev):
160 def getmode(self, file, rev):
154 return self.modecache[(file, rev)]
161 return self.modecache[(file, rev)]
155
162
156 def getchanges(self, rev):
163 def getchanges(self, rev):
157 self.modecache = {}
164 self.modecache = {}
158 files = self.files[rev]
165 files = self.files[rev]
159 cl = files
166 cl = files
160 cl.sort()
167 cl.sort()
161 # caller caches the result, so free it here to release memory
168 # caller caches the result, so free it here to release memory
162 del self.files[rev]
169 del self.files[rev]
163 return cl
170 return cl
164
171
165 def getcommit(self, rev):
172 def getcommit(self, rev):
166 if rev not in self.commits:
173 if rev not in self.commits:
167 uuid, module, revnum = self.revsplit(rev)
174 uuid, module, revnum = self.revsplit(rev)
168 self.module = module
175 self.module = module
169 self.reparent(module)
176 self.reparent(module)
170 stop = self.lastrevs.get(module, 0)
177 stop = self.lastrevs.get(module, 0)
171 self._fetch_revisions(from_revnum=revnum, to_revnum=stop)
178 self._fetch_revisions(from_revnum=revnum, to_revnum=stop)
172 commit = self.commits[rev]
179 commit = self.commits[rev]
173 # caller caches the result, so free it here to release memory
180 # caller caches the result, so free it here to release memory
174 del self.commits[rev]
181 del self.commits[rev]
175 return commit
182 return commit
176
183
184 def get_log(self, paths, start, end, limit=0, discover_changed_paths=True,
185 strict_node_history=False):
186 '''wrapper for svn.ra.get_log.
187 on a large repository, svn.ra.get_log pins huge amounts of
188 memory that cannot be recovered. work around it by forking
189 and writing results over a pipe.'''
190
191 def child(fp):
192 protocol = -1
193 def receiver(orig_paths, revnum, author, date, message, pool):
194 if orig_paths is not None:
195 for k, v in orig_paths.iteritems():
196 orig_paths[k] = changedpath(v)
197 pickle.dump((orig_paths, revnum, author, date, message),
198 fp, protocol)
199
200 try:
201 # Use an ra of our own so that our parent can consume
202 # our results without confusing the server.
203 t = transport.SvnRaTransport(url=self.url)
204 svn.ra.get_log(t.ra, paths, start, end, limit,
205 discover_changed_paths,
206 strict_node_history,
207 receiver)
208 except SubversionException, (_, num):
209 pickle.dump(num, fp, protocol)
210 else:
211 pickle.dump(None, fp, protocol)
212 fp.close()
213
214 def parent(fp):
215 while True:
216 entry = pickle.load(fp)
217 try:
218 orig_paths, revnum, author, date, message = entry
219 except:
220 if entry is None:
221 break
222 raise SubversionException("child raised exception", entry)
223 yield entry
224
225 rfd, wfd = os.pipe()
226 pid = os.fork()
227 if pid:
228 os.close(wfd)
229 for p in parent(os.fdopen(rfd, 'rb')):
230 yield p
231 ret = os.waitpid(pid, 0)[1]
232 if ret:
233 raise util.Abort(_('get_log %s') % util.explain_exit(ret))
234 else:
235 os.close(rfd)
236 child(os.fdopen(wfd, 'wb'))
237 os._exit(0)
238
177 def gettags(self):
239 def gettags(self):
178 tags = {}
240 tags = {}
179 def parselogentry(*arg, **args):
241 for entry in self.get_log(['/tags'], 0, self.revnum(self.head)):
180 orig_paths, revnum, author, date, message, pool = arg
242 orig_paths, revnum, author, date, message = entry
181 for path in orig_paths:
243 for path in orig_paths:
182 if not path.startswith('/tags/'):
244 if not path.startswith('/tags/'):
183 continue
245 continue
184 ent = orig_paths[path]
246 ent = orig_paths[path]
185 source = ent.copyfrom_path
247 source = ent.copyfrom_path
186 rev = ent.copyfrom_rev
248 rev = ent.copyfrom_rev
187 tag = path.split('/', 2)[2]
249 tag = path.split('/', 2)[2]
188 tags[tag] = self.revid(rev, module=source)
250 tags[tag] = self.revid(rev, module=source)
189
190 start = self.revnum(self.head)
191 try:
192 svn.ra.get_log(self.ra, ['/tags'], 0, start, 0, True, False,
193 parselogentry)
194 return tags
251 return tags
195 except SubversionException:
196 self.ui.note('no tags found at revision %d\n' % start)
197 return {}
198
252
199 # -- helper functions --
253 # -- helper functions --
200
254
201 def revid(self, revnum, module=None):
255 def revid(self, revnum, module=None):
202 if not module:
256 if not module:
203 module = self.module
257 module = self.module
204 return (u"svn:%s%s@%s" % (self.uuid, module, revnum)).decode(self.encoding)
258 return (u"svn:%s%s@%s" % (self.uuid, module, revnum)).decode(self.encoding)
205
259
206 def revnum(self, rev):
260 def revnum(self, rev):
207 return int(rev.split('@')[-1])
261 return int(rev.split('@')[-1])
208
262
209 def revsplit(self, rev):
263 def revsplit(self, rev):
210 url, revnum = rev.encode(self.encoding).split('@', 1)
264 url, revnum = rev.encode(self.encoding).split('@', 1)
211 revnum = int(revnum)
265 revnum = int(revnum)
212 parts = url.split('/', 1)
266 parts = url.split('/', 1)
213 uuid = parts.pop(0)[4:]
267 uuid = parts.pop(0)[4:]
214 mod = ''
268 mod = ''
215 if parts:
269 if parts:
216 mod = '/' + parts[0]
270 mod = '/' + parts[0]
217 return uuid, mod, revnum
271 return uuid, mod, revnum
218
272
219 def latest(self, path, stop=0):
273 def latest(self, path, stop=0):
220 'find the latest revision affecting path, up to stop'
274 'find the latest revision affecting path, up to stop'
221 if not stop:
275 if not stop:
222 stop = svn.ra.get_latest_revnum(self.ra)
276 stop = svn.ra.get_latest_revnum(self.ra)
223 try:
277 try:
224 self.reparent('')
278 self.reparent('')
225 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
279 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
226 self.reparent(self.module)
280 self.reparent(self.module)
227 except SubversionException:
281 except SubversionException:
228 dirent = None
282 dirent = None
229 if not dirent:
283 if not dirent:
230 print self.base, path
284 print self.base, path
231 raise util.Abort('%s not found up to revision %d' % (path, stop))
285 raise util.Abort('%s not found up to revision %d' % (path, stop))
232
286
233 return dirent.created_rev
287 return dirent.created_rev
234
288
235 def get_blacklist(self):
289 def get_blacklist(self):
236 """Avoid certain revision numbers.
290 """Avoid certain revision numbers.
237 It is not uncommon for two nearby revisions to cancel each other
291 It is not uncommon for two nearby revisions to cancel each other
238 out, e.g. 'I copied trunk into a subdirectory of itself instead
292 out, e.g. 'I copied trunk into a subdirectory of itself instead
239 of making a branch'. The converted repository is significantly
293 of making a branch'. The converted repository is significantly
240 smaller if we ignore such revisions."""
294 smaller if we ignore such revisions."""
241 self.blacklist = set()
295 self.blacklist = set()
242 blacklist = self.blacklist
296 blacklist = self.blacklist
243 for line in file("blacklist.txt", "r"):
297 for line in file("blacklist.txt", "r"):
244 if not line.startswith("#"):
298 if not line.startswith("#"):
245 try:
299 try:
246 svn_rev = int(line.strip())
300 svn_rev = int(line.strip())
247 blacklist.add(svn_rev)
301 blacklist.add(svn_rev)
248 except ValueError, e:
302 except ValueError, e:
249 pass # not an integer or a comment
303 pass # not an integer or a comment
250
304
251 def is_blacklisted(self, svn_rev):
305 def is_blacklisted(self, svn_rev):
252 return svn_rev in self.blacklist
306 return svn_rev in self.blacklist
253
307
254 def reparent(self, module):
308 def reparent(self, module):
255 svn_url = self.base + module
309 svn_url = self.base + module
256 self.ui.debug("reparent to %s\n" % svn_url.encode(self.encoding))
310 self.ui.debug("reparent to %s\n" % svn_url.encode(self.encoding))
257 svn.ra.reparent(self.ra, svn_url.encode(self.encoding))
311 svn.ra.reparent(self.ra, svn_url.encode(self.encoding))
258
312
259 def _fetch_revisions(self, from_revnum = 0, to_revnum = 347):
313 def _fetch_revisions(self, from_revnum = 0, to_revnum = 347):
260 def get_entry_from_path(path, module=self.module):
314 def get_entry_from_path(path, module=self.module):
261 # Given the repository url of this wc, say
315 # Given the repository url of this wc, say
262 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
316 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
263 # extract the "entry" portion (a relative path) from what
317 # extract the "entry" portion (a relative path) from what
264 # svn log --xml says, ie
318 # svn log --xml says, ie
265 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
319 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
266 # that is to say "tests/PloneTestCase.py"
320 # that is to say "tests/PloneTestCase.py"
267
321
268 if path.startswith(module):
322 if path.startswith(module):
269 relative = path[len(module):]
323 relative = path[len(module):]
270 if relative.startswith('/'):
324 if relative.startswith('/'):
271 return relative[1:]
325 return relative[1:]
272 else:
326 else:
273 return relative
327 return relative
274
328
275 # The path is outside our tracked tree...
329 # The path is outside our tracked tree...
276 self.ui.debug('Ignoring %r since it is not under %r\n' % (path, module))
330 self.ui.debug('Ignoring %r since it is not under %r\n' % (path, module))
277 return None
331 return None
278
332
279 received = []
280 # svn.ra.get_log requires no other calls to the ra until it completes,
281 # so we just collect the log entries and parse them afterwards
282 def receivelog(orig_paths, revnum, author, date, message, pool):
283 if self.is_blacklisted(revnum):
284 self.ui.note('skipping blacklisted revision %d\n' % revnum)
285 return
286
287 if orig_paths is None:
288 self.ui.debug('revision %d has no entries\n' % revnum)
289 return
290
291 received.append((revnum, orig_paths.items(), author, date, message))
292
293 self.child_cset = None
333 self.child_cset = None
294 def parselogentry((revnum, orig_paths, author, date, message)):
334 def parselogentry(orig_paths, revnum, author, date, message):
295 self.ui.debug("parsing revision %d\n" % revnum)
335 self.ui.debug("parsing revision %d (%d changes)\n" %
336 (revnum, len(orig_paths)))
296
337
297 if revnum in self.modulemap:
338 if revnum in self.modulemap:
298 new_module = self.modulemap[revnum]
339 new_module = self.modulemap[revnum]
299 if new_module != self.module:
340 if new_module != self.module:
300 self.module = new_module
341 self.module = new_module
301 self.reparent(self.module)
342 self.reparent(self.module)
302
343
303 copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
344 copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
304 copies = {}
345 copies = {}
305 entries = []
346 entries = []
306 rev = self.revid(revnum)
347 rev = self.revid(revnum)
307 parents = []
348 parents = []
308
349
309 # branch log might return entries for a parent we already have
350 # branch log might return entries for a parent we already have
310 if (rev in self.commits or
351 if (rev in self.commits or
311 (revnum < self.lastrevs.get(self.module, 0))):
352 (revnum < self.lastrevs.get(self.module, 0))):
312 return
353 return
313
354
314 try:
355 try:
315 branch = self.module.split("/")[-1]
356 branch = self.module.split("/")[-1]
316 if branch == 'trunk':
357 if branch == 'trunk':
317 branch = ''
358 branch = ''
318 except IndexError:
359 except IndexError:
319 branch = None
360 branch = None
320
361
362 orig_paths = orig_paths.items()
321 orig_paths.sort()
363 orig_paths.sort()
322 for path, ent in orig_paths:
364 for path, ent in orig_paths:
323 # self.ui.write("path %s\n" % path)
365 # self.ui.write("path %s\n" % path)
324 if path == self.module: # Follow branching back in history
366 if path == self.module: # Follow branching back in history
325 if ent:
367 if ent:
326 if ent.copyfrom_path:
368 if ent.copyfrom_path:
327 # ent.copyfrom_rev may not be the actual last revision
369 # ent.copyfrom_rev may not be the actual last revision
328 prev = self.latest(ent.copyfrom_path, ent.copyfrom_rev)
370 prev = self.latest(ent.copyfrom_path, ent.copyfrom_rev)
329 self.modulemap[prev] = ent.copyfrom_path
371 self.modulemap[prev] = ent.copyfrom_path
330 parents = [self.revid(prev, ent.copyfrom_path)]
372 parents = [self.revid(prev, ent.copyfrom_path)]
331 self.ui.note('found parent of branch %s at %d: %s\n' % \
373 self.ui.note('found parent of branch %s at %d: %s\n' % \
332 (self.module, prev, ent.copyfrom_path))
374 (self.module, prev, ent.copyfrom_path))
333 else:
375 else:
334 self.ui.debug("No copyfrom path, don't know what to do.\n")
376 self.ui.debug("No copyfrom path, don't know what to do.\n")
335 # Maybe it was added and there is no more history.
377 # Maybe it was added and there is no more history.
336 entrypath = get_entry_from_path(path, module=self.module)
378 entrypath = get_entry_from_path(path, module=self.module)
337 # self.ui.write("entrypath %s\n" % entrypath)
379 # self.ui.write("entrypath %s\n" % entrypath)
338 if entrypath is None:
380 if entrypath is None:
339 # Outside our area of interest
381 # Outside our area of interest
340 self.ui.debug("boring@%s: %s\n" % (revnum, path))
382 self.ui.debug("boring@%s: %s\n" % (revnum, path))
341 continue
383 continue
342 entry = entrypath.decode(self.encoding)
384 entry = entrypath.decode(self.encoding)
343
385
344 kind = svn.ra.check_path(self.ra, entrypath, revnum)
386 kind = svn.ra.check_path(self.ra, entrypath, revnum)
345 if kind == svn.core.svn_node_file:
387 if kind == svn.core.svn_node_file:
346 if ent.copyfrom_path:
388 if ent.copyfrom_path:
347 copyfrom_path = get_entry_from_path(ent.copyfrom_path)
389 copyfrom_path = get_entry_from_path(ent.copyfrom_path)
348 if copyfrom_path:
390 if copyfrom_path:
349 self.ui.debug("Copied to %s from %s@%s\n" % (entry, copyfrom_path, ent.copyfrom_rev))
391 self.ui.debug("Copied to %s from %s@%s\n" % (entry, copyfrom_path, ent.copyfrom_rev))
350 # It's probably important for hg that the source
392 # It's probably important for hg that the source
351 # exists in the revision's parent, not just the
393 # exists in the revision's parent, not just the
352 # ent.copyfrom_rev
394 # ent.copyfrom_rev
353 fromkind = svn.ra.check_path(self.ra, copyfrom_path, ent.copyfrom_rev)
395 fromkind = svn.ra.check_path(self.ra, copyfrom_path, ent.copyfrom_rev)
354 if fromkind != 0:
396 if fromkind != 0:
355 copies[self.recode(entry)] = self.recode(copyfrom_path)
397 copies[self.recode(entry)] = self.recode(copyfrom_path)
356 entries.append(self.recode(entry))
398 entries.append(self.recode(entry))
357 elif kind == 0: # gone, but had better be a deleted *file*
399 elif kind == 0: # gone, but had better be a deleted *file*
358 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
400 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
359
401
360 # if a branch is created but entries are removed in the same
402 # if a branch is created but entries are removed in the same
361 # changeset, get the right fromrev
403 # changeset, get the right fromrev
362 if parents:
404 if parents:
363 uuid, old_module, fromrev = self.revsplit(parents[0])
405 uuid, old_module, fromrev = self.revsplit(parents[0])
364 else:
406 else:
365 fromrev = revnum - 1
407 fromrev = revnum - 1
366 # might always need to be revnum - 1 in these 3 lines?
408 # might always need to be revnum - 1 in these 3 lines?
367 old_module = self.modulemap.get(fromrev, self.module)
409 old_module = self.modulemap.get(fromrev, self.module)
368
410
369 basepath = old_module + "/" + get_entry_from_path(path, module=self.module)
411 basepath = old_module + "/" + get_entry_from_path(path, module=self.module)
370 entrypath = old_module + "/" + get_entry_from_path(path, module=self.module)
412 entrypath = old_module + "/" + get_entry_from_path(path, module=self.module)
371
413
372 def lookup_parts(p):
414 def lookup_parts(p):
373 rc = None
415 rc = None
374 parts = p.split("/")
416 parts = p.split("/")
375 for i in range(len(parts)):
417 for i in range(len(parts)):
376 part = "/".join(parts[:i])
418 part = "/".join(parts[:i])
377 info = part, copyfrom.get(part, None)
419 info = part, copyfrom.get(part, None)
378 if info[1] is not None:
420 if info[1] is not None:
379 self.ui.debug("Found parent directory %s\n" % info[1])
421 self.ui.debug("Found parent directory %s\n" % info[1])
380 rc = info
422 rc = info
381 return rc
423 return rc
382
424
383 self.ui.debug("base, entry %s %s\n" % (basepath, entrypath))
425 self.ui.debug("base, entry %s %s\n" % (basepath, entrypath))
384
426
385 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
427 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
386
428
387 # need to remove fragment from lookup_parts and replace with copyfrom_path
429 # need to remove fragment from lookup_parts and replace with copyfrom_path
388 if frompath is not None:
430 if frompath is not None:
389 self.ui.debug("munge-o-matic\n")
431 self.ui.debug("munge-o-matic\n")
390 self.ui.debug(entrypath + '\n')
432 self.ui.debug(entrypath + '\n')
391 self.ui.debug(entrypath[len(frompath):] + '\n')
433 self.ui.debug(entrypath[len(frompath):] + '\n')
392 entrypath = froment.copyfrom_path + entrypath[len(frompath):]
434 entrypath = froment.copyfrom_path + entrypath[len(frompath):]
393 fromrev = froment.copyfrom_rev
435 fromrev = froment.copyfrom_rev
394 self.ui.debug("Info: %s %s %s %s\n" % (frompath, froment, ent, entrypath))
436 self.ui.debug("Info: %s %s %s %s\n" % (frompath, froment, ent, entrypath))
395
437
396 fromkind = svn.ra.check_path(self.ra, entrypath, fromrev)
438 fromkind = svn.ra.check_path(self.ra, entrypath, fromrev)
397 if fromkind == svn.core.svn_node_file: # a deleted file
439 if fromkind == svn.core.svn_node_file: # a deleted file
398 entries.append(self.recode(entry))
440 entries.append(self.recode(entry))
399 elif fromkind == svn.core.svn_node_dir:
441 elif fromkind == svn.core.svn_node_dir:
400 # print "Deleted/moved non-file:", revnum, path, ent
442 # print "Deleted/moved non-file:", revnum, path, ent
401 # children = self._find_children(path, revnum - 1)
443 # children = self._find_children(path, revnum - 1)
402 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
444 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
403 # Sometimes this is tricky. For example: in
445 # Sometimes this is tricky. For example: in
404 # The Subversion Repository revision 6940 a dir
446 # The Subversion Repository revision 6940 a dir
405 # was copied and one of its files was deleted
447 # was copied and one of its files was deleted
406 # from the new location in the same commit. This
448 # from the new location in the same commit. This
407 # code can't deal with that yet.
449 # code can't deal with that yet.
408 if ent.action == 'C':
450 if ent.action == 'C':
409 children = self._find_children(path, fromrev)
451 children = self._find_children(path, fromrev)
410 else:
452 else:
411 oroot = entrypath.strip('/')
453 oroot = entrypath.strip('/')
412 nroot = path.strip('/')
454 nroot = path.strip('/')
413 children = self._find_children(oroot, fromrev)
455 children = self._find_children(oroot, fromrev)
414 children = [s.replace(oroot,nroot) for s in children]
456 children = [s.replace(oroot,nroot) for s in children]
415 # Mark all [files, not directories] as deleted.
457 # Mark all [files, not directories] as deleted.
416 for child in children:
458 for child in children:
417 # Can we move a child directory and its
459 # Can we move a child directory and its
418 # parent in the same commit? (probably can). Could
460 # parent in the same commit? (probably can). Could
419 # cause problems if instead of revnum -1,
461 # cause problems if instead of revnum -1,
420 # we have to look in (copyfrom_path, revnum - 1)
462 # we have to look in (copyfrom_path, revnum - 1)
421 entrypath = get_entry_from_path("/" + child, module=old_module)
463 entrypath = get_entry_from_path("/" + child, module=old_module)
422 if entrypath:
464 if entrypath:
423 entry = self.recode(entrypath.decode(self.encoding))
465 entry = self.recode(entrypath.decode(self.encoding))
424 if entry in copies:
466 if entry in copies:
425 # deleted file within a copy
467 # deleted file within a copy
426 del copies[entry]
468 del copies[entry]
427 else:
469 else:
428 entries.append(entry)
470 entries.append(entry)
429 else:
471 else:
430 self.ui.debug('unknown path in revision %d: %s\n' % \
472 self.ui.debug('unknown path in revision %d: %s\n' % \
431 (revnum, path))
473 (revnum, path))
432 elif kind == svn.core.svn_node_dir:
474 elif kind == svn.core.svn_node_dir:
433 # Should probably synthesize normal file entries
475 # Should probably synthesize normal file entries
434 # and handle as above to clean up copy/rename handling.
476 # and handle as above to clean up copy/rename handling.
435
477
436 # If the directory just had a prop change,
478 # If the directory just had a prop change,
437 # then we shouldn't need to look for its children.
479 # then we shouldn't need to look for its children.
438 # Also this could create duplicate entries. Not sure
480 # Also this could create duplicate entries. Not sure
439 # whether this will matter. Maybe should make entries a set.
481 # whether this will matter. Maybe should make entries a set.
440 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
482 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
441 # This will fail if a directory was copied
483 # This will fail if a directory was copied
442 # from another branch and then some of its files
484 # from another branch and then some of its files
443 # were deleted in the same transaction.
485 # were deleted in the same transaction.
444 children = self._find_children(path, revnum)
486 children = self._find_children(path, revnum)
445 children.sort()
487 children.sort()
446 for child in children:
488 for child in children:
447 # Can we move a child directory and its
489 # Can we move a child directory and its
448 # parent in the same commit? (probably can). Could
490 # parent in the same commit? (probably can). Could
449 # cause problems if instead of revnum -1,
491 # cause problems if instead of revnum -1,
450 # we have to look in (copyfrom_path, revnum - 1)
492 # we have to look in (copyfrom_path, revnum - 1)
451 entrypath = get_entry_from_path("/" + child, module=self.module)
493 entrypath = get_entry_from_path("/" + child, module=self.module)
452 # print child, self.module, entrypath
494 # print child, self.module, entrypath
453 if entrypath:
495 if entrypath:
454 # Need to filter out directories here...
496 # Need to filter out directories here...
455 kind = svn.ra.check_path(self.ra, entrypath, revnum)
497 kind = svn.ra.check_path(self.ra, entrypath, revnum)
456 if kind != svn.core.svn_node_dir:
498 if kind != svn.core.svn_node_dir:
457 entries.append(self.recode(entrypath))
499 entries.append(self.recode(entrypath))
458
500
459 # Copies here (must copy all from source)
501 # Copies here (must copy all from source)
460 # Probably not a real problem for us if
502 # Probably not a real problem for us if
461 # source does not exist
503 # source does not exist
462
504
463 # Can do this with the copy command "hg copy"
505 # Can do this with the copy command "hg copy"
464 # if ent.copyfrom_path:
506 # if ent.copyfrom_path:
465 # copyfrom_entry = get_entry_from_path(ent.copyfrom_path.decode(self.encoding),
507 # copyfrom_entry = get_entry_from_path(ent.copyfrom_path.decode(self.encoding),
466 # module=self.module)
508 # module=self.module)
467 # copyto_entry = entrypath
509 # copyto_entry = entrypath
468 #
510 #
469 # print "copy directory", copyfrom_entry, 'to', copyto_entry
511 # print "copy directory", copyfrom_entry, 'to', copyto_entry
470 #
512 #
471 # copies.append((copyfrom_entry, copyto_entry))
513 # copies.append((copyfrom_entry, copyto_entry))
472
514
473 if ent.copyfrom_path:
515 if ent.copyfrom_path:
474 copyfrom_path = ent.copyfrom_path.decode(self.encoding)
516 copyfrom_path = ent.copyfrom_path.decode(self.encoding)
475 copyfrom_entry = get_entry_from_path(copyfrom_path, module=self.module)
517 copyfrom_entry = get_entry_from_path(copyfrom_path, module=self.module)
476 if copyfrom_entry:
518 if copyfrom_entry:
477 copyfrom[path] = ent
519 copyfrom[path] = ent
478 self.ui.debug("mark %s came from %s\n" % (path, copyfrom[path]))
520 self.ui.debug("mark %s came from %s\n" % (path, copyfrom[path]))
479
521
480 # Good, /probably/ a regular copy. Really should check
522 # Good, /probably/ a regular copy. Really should check
481 # to see whether the parent revision actually contains
523 # to see whether the parent revision actually contains
482 # the directory in question.
524 # the directory in question.
483 children = self._find_children(self.recode(copyfrom_path), ent.copyfrom_rev)
525 children = self._find_children(self.recode(copyfrom_path), ent.copyfrom_rev)
484 children.sort()
526 children.sort()
485 for child in children:
527 for child in children:
486 entrypath = get_entry_from_path("/" + child, module=self.module)
528 entrypath = get_entry_from_path("/" + child, module=self.module)
487 if entrypath:
529 if entrypath:
488 entry = entrypath.decode(self.encoding)
530 entry = entrypath.decode(self.encoding)
489 # print "COPY COPY From", copyfrom_entry, entry
531 # print "COPY COPY From", copyfrom_entry, entry
490 copyto_path = path + entry[len(copyfrom_entry):]
532 copyto_path = path + entry[len(copyfrom_entry):]
491 copyto_entry = get_entry_from_path(copyto_path, module=self.module)
533 copyto_entry = get_entry_from_path(copyto_path, module=self.module)
492 # print "COPY", entry, "COPY To", copyto_entry
534 # print "COPY", entry, "COPY To", copyto_entry
493 copies[self.recode(copyto_entry)] = self.recode(entry)
535 copies[self.recode(copyto_entry)] = self.recode(entry)
494 # copy from quux splort/quuxfile
536 # copy from quux splort/quuxfile
495
537
496 self.modulemap[revnum] = self.module # track backwards in time
538 self.modulemap[revnum] = self.module # track backwards in time
497 # a list of (filename, id) where id lets us retrieve the file.
539 # a list of (filename, id) where id lets us retrieve the file.
498 # eg in git, id is the object hash. for svn it'll be the
540 # eg in git, id is the object hash. for svn it'll be the
499 self.files[rev] = zip(entries, [rev] * len(entries))
541 self.files[rev] = zip(entries, [rev] * len(entries))
500 if not entries:
542 if not entries:
501 return
543 return
502
544
503 # Example SVN datetime. Includes microseconds.
545 # Example SVN datetime. Includes microseconds.
504 # ISO-8601 conformant
546 # ISO-8601 conformant
505 # '2007-01-04T17:35:00.902377Z'
547 # '2007-01-04T17:35:00.902377Z'
506 date = util.parsedate(date[:18] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
548 date = util.parsedate(date[:18] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
507
549
508 log = message and self.recode(message)
550 log = message and self.recode(message)
509 author = author and self.recode(author) or ''
551 author = author and self.recode(author) or ''
510
552
511 cset = commit(author=author,
553 cset = commit(author=author,
512 date=util.datestr(date),
554 date=util.datestr(date),
513 desc=log,
555 desc=log,
514 parents=parents,
556 parents=parents,
515 copies=copies,
557 copies=copies,
516 branch=branch,
558 branch=branch,
517 rev=rev.encode('utf-8'))
559 rev=rev.encode('utf-8'))
518
560
519 self.commits[rev] = cset
561 self.commits[rev] = cset
520 if self.child_cset and not self.child_cset.parents:
562 if self.child_cset and not self.child_cset.parents:
521 self.child_cset.parents = [rev]
563 self.child_cset.parents = [rev]
522 self.child_cset = cset
564 self.child_cset = cset
523
565
524 self.ui.note('fetching revision log for "%s" from %d to %d\n' %
566 self.ui.note('fetching revision log for "%s" from %d to %d\n' %
525 (self.module, from_revnum, to_revnum))
567 (self.module, from_revnum, to_revnum))
526
568
527 try:
569 try:
528 discover_changed_paths = True
570 discover_changed_paths = True
529 strict_node_history = False
571 strict_node_history = False
530 svn.ra.get_log(self.ra, [self.module], from_revnum, to_revnum, 0,
572 for entry in self.get_log([self.module], from_revnum, to_revnum):
531 discover_changed_paths, strict_node_history,
573 orig_paths, revnum, author, date, message = entry
532 receivelog)
574 if self.is_blacklisted(revnum):
533 self.ui.note('parsing %d log entries for "%s"\n' %
575 self.ui.note('skipping blacklisted revision %d\n' % revnum)
534 (len(received), self.module))
576 continue
535 for entry in received:
577 if orig_paths is None:
536 parselogentry(entry)
578 self.ui.debug('revision %d has no entries\n' % revnum)
579 continue
580 parselogentry(orig_paths, revnum, author, date, message)
537 except SubversionException, (_, num):
581 except SubversionException, (_, num):
538 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
582 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
539 raise NoSuchRevision(branch=self,
583 raise NoSuchRevision(branch=self,
540 revision="Revision number %d" % to_revnum)
584 revision="Revision number %d" % to_revnum)
541 raise
585 raise
542
586
543 def _getfile(self, file, rev):
587 def _getfile(self, file, rev):
544 io = StringIO()
588 io = StringIO()
545 # TODO: ra.get_file transmits the whole file instead of diffs.
589 # TODO: ra.get_file transmits the whole file instead of diffs.
546 mode = ''
590 mode = ''
547 try:
591 try:
548 revnum = self.revnum(rev)
592 revnum = self.revnum(rev)
549 if self.module != self.modulemap[revnum]:
593 if self.module != self.modulemap[revnum]:
550 self.module = self.modulemap[revnum]
594 self.module = self.modulemap[revnum]
551 self.reparent(self.module)
595 self.reparent(self.module)
552 info = svn.ra.get_file(self.ra, file, revnum, io)
596 info = svn.ra.get_file(self.ra, file, revnum, io)
553 if isinstance(info, list):
597 if isinstance(info, list):
554 info = info[-1]
598 info = info[-1]
555 mode = ("svn:executable" in info) and 'x' or ''
599 mode = ("svn:executable" in info) and 'x' or ''
556 mode = ("svn:special" in info) and 'l' or mode
600 mode = ("svn:special" in info) and 'l' or mode
557 except SubversionException, e:
601 except SubversionException, e:
558 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
602 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
559 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
603 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
560 if e.apr_err in notfound: # File not found
604 if e.apr_err in notfound: # File not found
561 raise IOError()
605 raise IOError()
562 raise
606 raise
563 data = io.getvalue()
607 data = io.getvalue()
564 if mode == 'l':
608 if mode == 'l':
565 link_prefix = "link "
609 link_prefix = "link "
566 if data.startswith(link_prefix):
610 if data.startswith(link_prefix):
567 data = data[len(link_prefix):]
611 data = data[len(link_prefix):]
568 return data, mode
612 return data, mode
569
613
570 def _find_children(self, path, revnum):
614 def _find_children(self, path, revnum):
571 path = path.strip("/")
615 path = path.strip("/")
572
616
573 def _find_children_fallback(path, revnum):
617 def _find_children_fallback(path, revnum):
574 # SWIG python bindings for getdir are broken up to at least 1.4.3
618 # SWIG python bindings for getdir are broken up to at least 1.4.3
575 pool = Pool()
619 pool = Pool()
576 optrev = svn.core.svn_opt_revision_t()
620 optrev = svn.core.svn_opt_revision_t()
577 optrev.kind = svn.core.svn_opt_revision_number
621 optrev.kind = svn.core.svn_opt_revision_number
578 optrev.value.number = revnum
622 optrev.value.number = revnum
579 rpath = '/'.join([self.base, path]).strip('/')
623 rpath = '/'.join([self.base, path]).strip('/')
580 return ['%s/%s' % (path, x) for x in svn.client.ls(rpath, optrev, True, self.ctx, pool).keys()]
624 return ['%s/%s' % (path, x) for x in svn.client.ls(rpath, optrev, True, self.ctx, pool).keys()]
581
625
582 if hasattr(self, '_find_children_fallback'):
626 if hasattr(self, '_find_children_fallback'):
583 return _find_children_fallback(path, revnum)
627 return _find_children_fallback(path, revnum)
584
628
585 self.reparent("/" + path)
629 self.reparent("/" + path)
586 pool = Pool()
630 pool = Pool()
587
631
588 children = []
632 children = []
589 def find_children_inner(children, path, revnum = revnum):
633 def find_children_inner(children, path, revnum = revnum):
590 if hasattr(svn.ra, 'get_dir2'): # Since SVN 1.4
634 if hasattr(svn.ra, 'get_dir2'): # Since SVN 1.4
591 fields = 0xffffffff # Binding does not provide SVN_DIRENT_ALL
635 fields = 0xffffffff # Binding does not provide SVN_DIRENT_ALL
592 getdir = svn.ra.get_dir2(self.ra, path, revnum, fields, pool)
636 getdir = svn.ra.get_dir2(self.ra, path, revnum, fields, pool)
593 else:
637 else:
594 getdir = svn.ra.get_dir(self.ra, path, revnum, pool)
638 getdir = svn.ra.get_dir(self.ra, path, revnum, pool)
595 if type(getdir) == dict:
639 if type(getdir) == dict:
596 # python binding for getdir is broken up to at least 1.4.3
640 # python binding for getdir is broken up to at least 1.4.3
597 raise CompatibilityException()
641 raise CompatibilityException()
598 dirents = getdir[0]
642 dirents = getdir[0]
599 if type(dirents) == int:
643 if type(dirents) == int:
600 # got here once due to infinite recursion bug
644 # got here once due to infinite recursion bug
601 # pprint.pprint(getdir)
645 # pprint.pprint(getdir)
602 return
646 return
603 c = dirents.keys()
647 c = dirents.keys()
604 c.sort()
648 c.sort()
605 for child in c:
649 for child in c:
606 dirent = dirents[child]
650 dirent = dirents[child]
607 if dirent.kind == svn.core.svn_node_dir:
651 if dirent.kind == svn.core.svn_node_dir:
608 find_children_inner(children, (path + "/" + child).strip("/"))
652 find_children_inner(children, (path + "/" + child).strip("/"))
609 else:
653 else:
610 children.append((path + "/" + child).strip("/"))
654 children.append((path + "/" + child).strip("/"))
611
655
612 try:
656 try:
613 find_children_inner(children, "")
657 find_children_inner(children, "")
614 except CompatibilityException:
658 except CompatibilityException:
615 self._find_children_fallback = True
659 self._find_children_fallback = True
616 self.reparent(self.module)
660 self.reparent(self.module)
617 return _find_children_fallback(path, revnum)
661 return _find_children_fallback(path, revnum)
618
662
619 self.reparent(self.module)
663 self.reparent(self.module)
620 return [path + "/" + c for c in children]
664 return [path + "/" + c for c in children]
General Comments 0
You need to be logged in to leave comments. Login now