##// END OF EJS Templates
convert: svn: fix fallback directory listing code when importing a submodule
Brendan Cully -
r4770:4526ef8c default
parent child Browse files
Show More
@@ -1,522 +1,522 b''
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4
5 5 import pprint
6 6 import locale
7 7
8 8 from mercurial import util
9 9
10 10 # Subversion stuff. Works best with very recent Python SVN bindings
11 11 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
12 12 # these bindings.
13 13
14 14 from cStringIO import StringIO
15 15
16 16 from common import NoRepo, commit, converter_source
17 17
18 18 try:
19 19 from svn.core import SubversionException, Pool
20 20 import svn.core
21 21 import svn.ra
22 22 import svn.delta
23 23 import svn
24 24 import transport
25 25 except ImportError:
26 26 pass
27 27
28 28 class CompatibilityException(Exception): pass
29 29
30 30 nbRevisionsPerFetch = 50
31 31
32 32 class svn_entry(object):
33 33 """Emulate a Subversion path change."""
34 34 __slots__ = ['path', 'copyfrom_path', 'copyfrom_rev', 'action']
35 35 def __init__(self, entry):
36 36 self.copyfrom_path = entry.copyfrom_path
37 37 self.copyfrom_rev = entry.copyfrom_rev
38 38 self.action = entry.action
39 39
40 40 def __str__(self):
41 41 return "%s %s %s" % (self.action, self.copyfrom_path, self.copyfrom_rev)
42 42
43 43 def __repr__(self):
44 44 return self.__str__()
45 45
46 46 class svn_paths(object):
47 47 """Emulate a Subversion ordered dictionary of changed paths."""
48 48 __slots__ = ['values', 'order']
49 49 def __init__(self, orig_paths):
50 50 self.order = []
51 51 self.values = {}
52 52 if hasattr(orig_paths, 'keys'):
53 53 self.order = sorted(orig_paths.keys())
54 54 self.values.update(orig_paths)
55 55 return
56 56 if not orig_paths:
57 57 return
58 58 for path in orig_paths:
59 59 self.order.append(path)
60 60 self.values[path] = svn_entry(orig_paths[path])
61 61 self.order.sort() # maybe the order it came in isn't so great...
62 62
63 63 def __iter__(self):
64 64 return iter(self.order)
65 65
66 66 def __getitem__(self, key):
67 67 return self.values[key]
68 68
69 69 def __str__(self):
70 70 s = "{\n"
71 71 for path in self.order:
72 72 s += "'%s': %s,\n" % (path, self.values[path])
73 73 s += "}"
74 74 return s
75 75
76 76 def __repr__(self):
77 77 return self.__str__()
78 78
79 79 # SVN conversion code stolen from bzr-svn and tailor
80 80 class convert_svn(converter_source):
81 81 def __init__(self, ui, url, rev=None):
82 82 try:
83 83 SubversionException
84 84 except NameError:
85 85 msg = 'subversion python bindings could not be loaded\n'
86 86 ui.warn(msg)
87 87 raise NoRepo(msg)
88 88
89 89 self.ui = ui
90 90 self.encoding = locale.getpreferredencoding()
91 91 latest = None
92 92 if rev:
93 93 try:
94 94 latest = int(rev)
95 95 except ValueError:
96 96 raise util.Abort('svn: revision %s is not an integer' % rev)
97 97 try:
98 98 # Support file://path@rev syntax. Useful e.g. to convert
99 99 # deleted branches.
100 100 url, latest = url.rsplit("@", 1)
101 101 latest = int(latest)
102 102 except ValueError, e:
103 103 pass
104 104 self.url = url
105 105 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
106 106 try:
107 107 self.transport = transport.SvnRaTransport(url = url)
108 108 self.ra = self.transport.ra
109 109 self.base = svn.ra.get_repos_root(self.ra)
110 110 self.module = self.url[len(self.base):]
111 111 self.modulemap = {} # revision, module
112 112 self.commits = {}
113 113 self.files = {}
114 114 self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
115 115 except SubversionException, e:
116 116 raise NoRepo("couldn't open SVN repo %s" % url)
117 117
118 118 try:
119 119 self.get_blacklist()
120 120 except IOError, e:
121 121 pass
122 122
123 123 if not latest:
124 124 latest = svn.ra.get_latest_revnum(self.ra)
125 125 dirent = svn.ra.stat(self.ra, self.module, latest)
126 126 self.last_changed = dirent.created_rev
127 127
128 128 self.head = self.rev(self.last_changed)
129 129
130 130 # Should lazily fetch revisions in batches of, say, 1,000...:
131 131 self._fetch_revisions(from_revnum=self.last_changed, to_revnum=0)
132 132
133 133 def rev(self, revnum):
134 134 return (u"svn:%s%s@%s" % (self.uuid, self.module, revnum)).decode(self.encoding)
135 135
136 136 def get_blacklist(self):
137 137 """Avoid certain revision numbers.
138 138 It is not uncommon for two nearby revisions to cancel each other
139 139 out, e.g. 'I copied trunk into a subdirectory of itself instead
140 140 of making a branch'. The converted repository is significantly
141 141 smaller if we ignore such revisions."""
142 142 self.blacklist = set()
143 143 blacklist = self.blacklist
144 144 for line in file("blacklist.txt", "r"):
145 145 if not line.startswith("#"):
146 146 try:
147 147 svn_rev = int(line.strip())
148 148 blacklist.add(svn_rev)
149 149 except ValueError, e:
150 150 pass # not an integer or a comment
151 151
152 152 def is_blacklisted(self, svn_rev):
153 153 return svn_rev in self.blacklist
154 154
155 155 def reparent(self, module):
156 156 svn_url = self.base + module
157 157 self.ui.debug("reparent to %s\n" % svn_url.encode(self.encoding))
158 158 svn.ra.reparent(self.ra, svn_url.encode(self.encoding))
159 159
160 160 def _fetch_revisions(self, from_revnum = 0, to_revnum = 347, pb=None):
161 161 self.parent_cset = None
162 162 self.child_cset = None
163 163
164 164 self.ui.debug('Fetching revisions %d to %d\n' % (from_revnum, to_revnum))
165 165
166 166 def get_entry_from_path(path, module=self.module):
167 167 # Given the repository url of this wc, say
168 168 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
169 169 # extract the "entry" portion (a relative path) from what
170 170 # svn log --xml says, ie
171 171 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
172 172 # that is to say "tests/PloneTestCase.py"
173 173
174 174 if path.startswith(module):
175 175 relative = path[len(module):]
176 176 if relative.startswith('/'):
177 177 return relative[1:]
178 178 else:
179 179 return relative
180 180
181 181 # The path is outside our tracked tree...
182 182 self.ui.debug('Ignoring %r since it is not under %r\n' % (path, module))
183 183 return None
184 184
185 185 received = []
186 186 def rcvr(*arg, **args):
187 187 orig_paths, revnum, author, date, message, pool = arg
188 188 new_orig_paths = svn_paths(orig_paths)
189 189 rcvr2(new_orig_paths, revnum, author, date, message, pool)
190 190
191 191 def rcvr2(orig_paths, revnum, author, date, message, pool, better_paths = None):
192 192 if not self.is_blacklisted(revnum):
193 193 received.append((orig_paths, revnum, author, date, message))
194 194
195 195 def after_received(orig_paths, revnum, author, date, message):
196 196 if revnum in self.modulemap:
197 197 new_module = self.modulemap[revnum]
198 198 if new_module != self.module:
199 199 self.module = new_module
200 200 self.reparent(self.module)
201 201
202 202 copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
203 203 copies = {}
204 204 entries = []
205 205 self.ui.debug("Parsing revision %d\n" % revnum)
206 206 if orig_paths is not None:
207 207 rev = self.rev(revnum)
208 208 try:
209 209 branch = self.module.split("/")[-1]
210 210 except IndexError:
211 211 branch = None
212 212
213 213 for path in orig_paths:
214 214 # self.ui.write("path %s\n" % path)
215 215 if path == self.module: # Follow branching back in history
216 216 ent = orig_paths[path]
217 217 if ent:
218 218 if ent.copyfrom_path:
219 219 self.modulemap[ent.copyfrom_rev] = ent.copyfrom_path
220 220 else:
221 221 self.ui.debug("No copyfrom path, don't know what to do.\n")
222 222 # Maybe it was added and there is no more history.
223 223 entrypath = get_entry_from_path(path, module=self.module)
224 224 # self.ui.write("entrypath %s\n" % entrypath)
225 225 if not entrypath:
226 226 # Outside our area of interest
227 227 self.ui.debug("boring@%s: %s\n" % (revnum, path))
228 228 continue
229 229 entry = entrypath.decode(self.encoding)
230 230 ent = orig_paths[path]
231 231
232 232 kind = svn.ra.check_path(self.ra, entrypath, revnum)
233 233 if kind == svn.core.svn_node_file:
234 234 if ent.copyfrom_path:
235 235 copyfrom_path = get_entry_from_path(ent.copyfrom_path)
236 236 if copyfrom_path:
237 237 self.ui.debug("Copied to %s from %s@%s\n" % (entry, copyfrom_path, ent.copyfrom_rev))
238 238 # It's probably important for hg that the source
239 239 # exists in the revision's parent, not just the
240 240 # ent.copyfrom_rev
241 241 fromkind = svn.ra.check_path(self.ra, copyfrom_path, ent.copyfrom_rev)
242 242 if fromkind != 0:
243 243 copies[self.recode(entry)] = self.recode(copyfrom_path)
244 244 entries.append(self.recode(entry))
245 245 elif kind == 0: # gone, but had better be a deleted *file*
246 246 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
247 247
248 248 fromrev = revnum - 1
249 249 # might always need to be revnum - 1 in these 3 lines?
250 250 old_module = self.modulemap.get(fromrev, self.module)
251 251 basepath = old_module + "/" + get_entry_from_path(path, module=self.module)
252 252 entrypath = old_module + "/" + get_entry_from_path(path, module=self.module)
253 253
254 254 def lookup_parts(p):
255 255 rc = None
256 256 parts = p.split("/")
257 257 for i in range(len(parts)):
258 258 part = "/".join(parts[:i])
259 259 info = part, copyfrom.get(part, None)
260 260 if info[1] is not None:
261 261 self.ui.debug("Found parent directory %s\n" % info)
262 262 rc = info
263 263 return rc
264 264
265 265 self.ui.debug("base, entry %s %s\n" % (basepath, entrypath))
266 266
267 267 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
268 268
269 269 # need to remove fragment from lookup_parts and replace with copyfrom_path
270 270 if frompath is not None:
271 271 self.ui.debug("munge-o-matic\n")
272 272 self.ui.debug(entrypath + '\n')
273 273 self.ui.debug(entrypath[len(frompath):] + '\n')
274 274 entrypath = froment.copyfrom_path + entrypath[len(frompath):]
275 275 fromrev = froment.copyfrom_rev
276 276 self.ui.debug("Info: %s %s %s %s\n" % (frompath, froment, ent, entrypath))
277 277
278 278 fromkind = svn.ra.check_path(self.ra, entrypath, fromrev)
279 279 if fromkind == svn.core.svn_node_file: # a deleted file
280 280 entries.append(self.recode(entry))
281 281 else:
282 282 # print "Deleted/moved non-file:", revnum, path, ent
283 283 # children = self._find_children(path, revnum - 1)
284 284 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
285 285 # Sometimes this is tricky. For example: in
286 286 # The Subversion Repository revision 6940 a dir
287 287 # was copied and one of its files was deleted
288 288 # from the new location in the same commit. This
289 289 # code can't deal with that yet.
290 290 if ent.action == 'C':
291 291 children = self._find_children(path, fromrev)
292 292 else:
293 293 oroot = entrypath.strip('/')
294 294 nroot = path.strip('/')
295 295 children = self._find_children(oroot, fromrev)
296 296 children = [s.replace(oroot,nroot) for s in children]
297 297 # Mark all [files, not directories] as deleted.
298 298 for child in children:
299 299 # Can we move a child directory and its
300 300 # parent in the same commit? (probably can). Could
301 301 # cause problems if instead of revnum -1,
302 302 # we have to look in (copyfrom_path, revnum - 1)
303 303 entrypath = get_entry_from_path("/" + child, module=old_module)
304 304 if entrypath:
305 305 entry = self.recode(entrypath.decode(self.encoding))
306 306 if entry in copies:
307 307 # deleted file within a copy
308 308 del copies[entry]
309 309 else:
310 310 entries.append(entry)
311 311 elif kind == svn.core.svn_node_dir:
312 312 # Should probably synthesize normal file entries
313 313 # and handle as above to clean up copy/rename handling.
314 314
315 315 # If the directory just had a prop change,
316 316 # then we shouldn't need to look for its children.
317 317 # Also this could create duplicate entries. Not sure
318 318 # whether this will matter. Maybe should make entries a set.
319 319 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
320 320 # This will fail if a directory was copied
321 321 # from another branch and then some of its files
322 322 # were deleted in the same transaction.
323 323 children = self._find_children(path, revnum)
324 324 children.sort()
325 325 for child in children:
326 326 # Can we move a child directory and its
327 327 # parent in the same commit? (probably can). Could
328 328 # cause problems if instead of revnum -1,
329 329 # we have to look in (copyfrom_path, revnum - 1)
330 330 entrypath = get_entry_from_path("/" + child, module=self.module)
331 331 # print child, self.module, entrypath
332 332 if entrypath:
333 333 # Need to filter out directories here...
334 334 kind = svn.ra.check_path(self.ra, entrypath, revnum)
335 335 if kind != svn.core.svn_node_dir:
336 336 entries.append(self.recode(entrypath))
337 337
338 338 # Copies here (must copy all from source)
339 339 # Probably not a real problem for us if
340 340 # source does not exist
341 341
342 342 # Can do this with the copy command "hg copy"
343 343 # if ent.copyfrom_path:
344 344 # copyfrom_entry = get_entry_from_path(ent.copyfrom_path.decode(self.encoding),
345 345 # module=self.module)
346 346 # copyto_entry = entrypath
347 347 #
348 348 # print "copy directory", copyfrom_entry, 'to', copyto_entry
349 349 #
350 350 # copies.append((copyfrom_entry, copyto_entry))
351 351
352 352 if ent.copyfrom_path:
353 353 copyfrom_path = ent.copyfrom_path.decode(self.encoding)
354 354 copyfrom_entry = get_entry_from_path(copyfrom_path, module=self.module)
355 355 if copyfrom_entry:
356 356 copyfrom[path] = ent
357 357 self.ui.debug("mark %s came from %s\n" % (path, copyfrom[path]))
358 358
359 359 # Good, /probably/ a regular copy. Really should check
360 360 # to see whether the parent revision actually contains
361 361 # the directory in question.
362 362 children = self._find_children(self.recode(copyfrom_path), ent.copyfrom_rev)
363 363 children.sort()
364 364 for child in children:
365 365 entrypath = get_entry_from_path("/" + child, module=self.module)
366 366 if entrypath:
367 367 entry = entrypath.decode(self.encoding)
368 368 # print "COPY COPY From", copyfrom_entry, entry
369 369 copyto_path = path + entry[len(copyfrom_entry):]
370 370 copyto_entry = get_entry_from_path(copyto_path, module=self.module)
371 371 # print "COPY", entry, "COPY To", copyto_entry
372 372 copies[self.recode(copyto_entry)] = self.recode(entry)
373 373 # copy from quux splort/quuxfile
374 374
375 375 self.modulemap[revnum] = self.module # track backwards in time
376 376 # a list of (filename, id) where id lets us retrieve the file.
377 377 # eg in git, id is the object hash. for svn it'll be the
378 378 self.files[rev] = zip(entries, [rev] * len(entries))
379 379
380 380 # Example SVN datetime. Includes microseconds.
381 381 # ISO-8601 conformant
382 382 # '2007-01-04T17:35:00.902377Z'
383 383 date = util.parsedate(date[:18] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
384 384
385 385 log = message and self.recode(message)
386 386 author = author and self.recode(author) or ''
387 387
388 388 cset = commit(author=author,
389 389 date=util.datestr(date),
390 390 desc=log,
391 391 parents=[],
392 392 copies=copies,
393 393 branch=branch)
394 394
395 395 if self.child_cset is not None:
396 396 self.child_cset.parents = [rev]
397 397
398 398 self.child_cset = cset
399 399
400 400 self.commits[rev] = cset
401 401
402 402 try:
403 403 discover_changed_paths = True
404 404 strict_node_history = False
405 405 svn.ra.get_log(self.ra, [self.module], from_revnum, to_revnum,
406 406 0, discover_changed_paths, strict_node_history, rcvr)
407 407 for args in received:
408 408 after_received(*args)
409 409 self.last_revnum = to_revnum
410 410 except SubversionException, (_, num):
411 411 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
412 412 raise NoSuchRevision(branch=self,
413 413 revision="Revision number %d" % to_revnum)
414 414 raise
415 415
416 416 def getheads(self):
417 417 # svn-url@rev
418 418 # Not safe if someone committed:
419 419 self.heads = [self.head]
420 420 # print self.commits.keys()
421 421 return self.heads
422 422
423 423 def _getfile(self, file, rev):
424 424 io = StringIO()
425 425 # TODO: ra.get_file transmits the whole file instead of diffs.
426 426 mode = ''
427 427 try:
428 428 revnum = int(rev.split("@")[-1])
429 429 if self.module != self.modulemap[revnum]:
430 430 self.module = self.modulemap[revnum]
431 431 self.reparent(self.module)
432 432 info = svn.ra.get_file(self.ra, file, revnum, io)
433 433 if isinstance(info, list):
434 434 info = info[-1]
435 435 mode = ("svn:executable" in info) and 'x' or ''
436 436 mode = ("svn:special" in info) and 'l' or mode
437 437 except SubversionException, e:
438 438 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
439 439 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
440 440 if e.apr_err in notfound: # File not found
441 441 raise IOError()
442 442 raise
443 443 data = io.getvalue()
444 444 if mode == 'l':
445 445 link_prefix = "link "
446 446 if data.startswith(link_prefix):
447 447 data = data[len(link_prefix):]
448 448 return data, mode
449 449
450 450 def getfile(self, file, rev):
451 451 data, mode = self._getfile(file, rev)
452 452 self.modecache[(file, rev)] = mode
453 453 return data
454 454
455 455 def getmode(self, file, rev):
456 456 return self.modecache[(file, rev)]
457 457
458 458 def getchanges(self, rev):
459 459 self.modecache = {}
460 460 files = self.files[rev]
461 461 cl = files
462 462 cl.sort()
463 463 return cl
464 464
465 465 def getcommit(self, rev):
466 466 return self.commits[rev]
467 467
468 468 def gettags(self):
469 469 return []
470 470
471 471 def _find_children(self, path, revnum):
472 472 path = path.strip("/")
473 473
474 474 def _find_children_fallback(path, revnum):
475 475 # SWIG python bindings for getdir are broken up to at least 1.4.3
476 476 if not hasattr(self, 'client_ctx'):
477 477 self.client_ctx = svn.client.create_context()
478 478 optrev = svn.core.svn_opt_revision_t()
479 479 optrev.kind = svn.core.svn_opt_revision_number
480 480 optrev.value.number = revnum
481 rpath = '/'.join([self.url, path]).strip('/')
481 rpath = '/'.join([self.base, path]).strip('/')
482 482 return ['%s/%s' % (path, x) for x in svn.client.ls(rpath, optrev, True, self.client_ctx).keys()]
483 483
484 484 if hasattr(self, '_find_children_fallback'):
485 485 return _find_children_fallback(path, revnum)
486 486
487 487 self.reparent("/" + path)
488 488 pool = Pool()
489 489
490 490 children = []
491 491 def find_children_inner(children, path, revnum = revnum):
492 492 if hasattr(svn.ra, 'get_dir2'): # Since SVN 1.4
493 493 fields = 0xffffffff # Binding does not provide SVN_DIRENT_ALL
494 494 getdir = svn.ra.get_dir2(self.ra, path, revnum, fields, pool)
495 495 else:
496 496 getdir = svn.ra.get_dir(self.ra, path, revnum, pool)
497 497 if type(getdir) == dict:
498 498 # python binding for getdir is broken up to at least 1.4.3
499 499 raise CompatibilityException()
500 500 dirents = getdir[0]
501 501 if type(dirents) == int:
502 502 # got here once due to infinite recursion bug
503 503 # pprint.pprint(getdir)
504 504 return
505 505 c = dirents.keys()
506 506 c.sort()
507 507 for child in c:
508 508 dirent = dirents[child]
509 509 if dirent.kind == svn.core.svn_node_dir:
510 510 find_children_inner(children, (path + "/" + child).strip("/"))
511 511 else:
512 512 children.append((path + "/" + child).strip("/"))
513 513
514 514 try:
515 515 find_children_inner(children, "")
516 516 except CompatibilityException:
517 517 self._find_children_fallback = True
518 518 self.reparent(self.module)
519 519 return _find_children_fallback(path, revnum)
520 520
521 521 self.reparent(self.module)
522 522 return [path + "/" + c for c in children]
General Comments 0
You need to be logged in to leave comments. Login now