##// END OF EJS Templates
convert: svn: get parent for branch creation events
Brendan Cully -
r4795:ea618c59 default
parent child Browse files
Show More
@@ -1,573 +1,576 b''
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4
5 5 import pprint
6 6 import locale
7 7
8 8 from mercurial import util
9 9
10 10 # Subversion stuff. Works best with very recent Python SVN bindings
11 11 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
12 12 # these bindings.
13 13
14 14 from cStringIO import StringIO
15 15
16 16 from common import NoRepo, commit, converter_source
17 17
18 18 try:
19 19 from svn.core import SubversionException, Pool
20 20 import svn.core
21 21 import svn.ra
22 22 import svn.delta
23 23 import svn
24 24 import transport
25 25 except ImportError:
26 26 pass
27 27
28 28 class CompatibilityException(Exception): pass
29 29
30 30 LOG_BATCH_SIZE = 50
31 31
32 32 class svn_entry(object):
33 33 """Emulate a Subversion path change."""
34 34 __slots__ = ['path', 'copyfrom_path', 'copyfrom_rev', 'action']
35 35 def __init__(self, entry):
36 36 self.copyfrom_path = entry.copyfrom_path
37 37 self.copyfrom_rev = entry.copyfrom_rev
38 38 self.action = entry.action
39 39
40 40 def __str__(self):
41 41 return "%s %s %s" % (self.action, self.copyfrom_path, self.copyfrom_rev)
42 42
43 43 def __repr__(self):
44 44 return self.__str__()
45 45
46 46 class svn_paths(object):
47 47 """Emulate a Subversion ordered dictionary of changed paths."""
48 48 __slots__ = ['values', 'order']
49 49 def __init__(self, orig_paths):
50 50 self.order = []
51 51 self.values = {}
52 52 if hasattr(orig_paths, 'keys'):
53 53 self.order = sorted(orig_paths.keys())
54 54 self.values.update(orig_paths)
55 55 return
56 56 if not orig_paths:
57 57 return
58 58 for path in orig_paths:
59 59 self.order.append(path)
60 60 self.values[path] = svn_entry(orig_paths[path])
61 61 self.order.sort() # maybe the order it came in isn't so great...
62 62
63 63 def __iter__(self):
64 64 return iter(self.order)
65 65
66 66 def __getitem__(self, key):
67 67 return self.values[key]
68 68
69 69 def __str__(self):
70 70 s = "{\n"
71 71 for path in self.order:
72 72 s += "'%s': %s,\n" % (path, self.values[path])
73 73 s += "}"
74 74 return s
75 75
76 76 def __repr__(self):
77 77 return self.__str__()
78 78
79 79 # SVN conversion code stolen from bzr-svn and tailor
80 80 class convert_svn(converter_source):
81 81 def __init__(self, ui, url, rev=None):
82 82 try:
83 83 SubversionException
84 84 except NameError:
85 85 msg = 'subversion python bindings could not be loaded\n'
86 86 ui.warn(msg)
87 87 raise NoRepo(msg)
88 88
89 89 self.ui = ui
90 90 self.encoding = locale.getpreferredencoding()
91 91 latest = None
92 92 if rev:
93 93 try:
94 94 latest = int(rev)
95 95 except ValueError:
96 96 raise util.Abort('svn: revision %s is not an integer' % rev)
97 97 try:
98 98 # Support file://path@rev syntax. Useful e.g. to convert
99 99 # deleted branches.
100 100 url, latest = url.rsplit("@", 1)
101 101 latest = int(latest)
102 102 except ValueError, e:
103 103 pass
104 104 self.url = url
105 105 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
106 106 try:
107 107 self.transport = transport.SvnRaTransport(url = url)
108 108 self.ra = self.transport.ra
109 109 self.base = svn.ra.get_repos_root(self.ra)
110 110 self.module = self.url[len(self.base):]
111 111 self.modulemap = {} # revision, module
112 112 self.commits = {}
113 113 self.files = {}
114 114 self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
115 115 except SubversionException, e:
116 116 raise NoRepo("couldn't open SVN repo %s" % url)
117 117
118 118 try:
119 119 self.get_blacklist()
120 120 except IOError, e:
121 121 pass
122 122
123 123 self.last_changed = self.latest(self.module, latest)
124 124
125 125 self.head = self.rev(self.last_changed)
126 126
127 def rev(self, revnum):
128 return (u"svn:%s%s@%s" % (self.uuid, self.module, revnum)).decode(self.encoding)
127 def rev(self, revnum, module=None):
128 if not module:
129 module = self.module
130 return (u"svn:%s%s@%s" % (self.uuid, module, revnum)).decode(self.encoding)
129 131
130 132 def revnum(self, rev):
131 133 return int(rev.split('@')[-1])
132 134
133 135 def revsplit(self, rev):
134 136 url, revnum = rev.encode(self.encoding).split('@', 1)
135 137 revnum = int(revnum)
136 138 parts = url.split('/', 1)
137 139 uuid = parts.pop(0)[4:]
138 140 mod = '/'
139 141 if parts:
140 142 mod += parts[0]
141 143 return uuid, mod, revnum
142 144
143 145 def latest(self, path, stop=0):
144 146 'find the latest revision affecting path, up to stop'
145 147 if not stop:
146 148 stop = svn.ra.get_latest_revnum(self.ra)
147 149 try:
148 150 self.reparent('')
149 151 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
150 152 self.reparent(self.module)
151 153 except SubversionException:
152 154 dirent = None
153 155 if not dirent:
154 156 raise util.Abort('%s not found up to revision %d' \
155 157 % (path, stop))
156 158
157 159 return dirent.created_rev
158 160
159 161 def get_blacklist(self):
160 162 """Avoid certain revision numbers.
161 163 It is not uncommon for two nearby revisions to cancel each other
162 164 out, e.g. 'I copied trunk into a subdirectory of itself instead
163 165 of making a branch'. The converted repository is significantly
164 166 smaller if we ignore such revisions."""
165 167 self.blacklist = set()
166 168 blacklist = self.blacklist
167 169 for line in file("blacklist.txt", "r"):
168 170 if not line.startswith("#"):
169 171 try:
170 172 svn_rev = int(line.strip())
171 173 blacklist.add(svn_rev)
172 174 except ValueError, e:
173 175 pass # not an integer or a comment
174 176
175 177 def is_blacklisted(self, svn_rev):
176 178 return svn_rev in self.blacklist
177 179
178 180 def reparent(self, module):
179 181 svn_url = self.base + module
180 182 self.ui.debug("reparent to %s\n" % svn_url.encode(self.encoding))
181 183 svn.ra.reparent(self.ra, svn_url.encode(self.encoding))
182 184
183 185 def _fetch_revisions(self, from_revnum = 0, to_revnum = 347, module=None):
184 186 # batching is broken for branches
185 187 to_revnum = 0
186 188 if not hasattr(self, 'child_rev'):
187 189 self.child_rev = from_revnum
188 190 self.child_cset = self.commits.get(self.child_rev)
189 191 else:
190 192 self.commits[self.child_rev] = self.child_cset
191 193 # batching broken
192 194 return
193 195 # if the branch was created in the middle of the last batch,
194 196 # svn log will complain that the path doesn't exist in this batch
195 197 # so we roll the parser back to the last revision where this branch appeared
196 198 revnum = self.revnum(self.child_rev)
197 199 if revnum > from_revnum:
198 200 from_revnum = revnum
199 201
200 202 self.ui.note('fetching revision log from %d to %d\n' % \
201 203 (from_revnum, to_revnum))
202 204
203 205 def get_entry_from_path(path, module=self.module):
204 206 # Given the repository url of this wc, say
205 207 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
206 208 # extract the "entry" portion (a relative path) from what
207 209 # svn log --xml says, ie
208 210 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
209 211 # that is to say "tests/PloneTestCase.py"
210 212
211 213 if path.startswith(module):
212 214 relative = path[len(module):]
213 215 if relative.startswith('/'):
214 216 return relative[1:]
215 217 else:
216 218 return relative
217 219
218 220 # The path is outside our tracked tree...
219 221 self.ui.debug('Ignoring %r since it is not under %r\n' % (path, module))
220 222 return None
221 223
222 224 def parselogentry(*arg, **args):
223 225 orig_paths, revnum, author, date, message, pool = arg
224 226 orig_paths = svn_paths(orig_paths)
225 227
226 228 if self.is_blacklisted(revnum):
227 229 self.ui.note('skipping blacklisted revision %d\n' % revnum)
228 230 return
229 231
230 232 self.ui.note("parsing revision %d\n" % revnum)
231 233
232 234 if orig_paths is None:
233 235 self.ui.debug('revision %d has no entries\n' % revnum)
234 236 return
235 237
236 238 if revnum in self.modulemap:
237 239 new_module = self.modulemap[revnum]
238 240 if new_module != self.module:
239 241 self.module = new_module
240 242 self.reparent(self.module)
241 243
242 244 copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
243 245 copies = {}
244 246 entries = []
245 247 rev = self.rev(revnum)
248 parents = []
246 249 try:
247 250 branch = self.module.split("/")[-1]
248 251 if branch == 'trunk':
249 252 branch = ''
250 253 except IndexError:
251 254 branch = None
252 255
253 256 for path in orig_paths:
254 257 # self.ui.write("path %s\n" % path)
255 258 if path == self.module: # Follow branching back in history
259 import pdb
260 pdb.set_trace()
256 261 ent = orig_paths[path]
257 262 if ent:
258 263 if ent.copyfrom_path:
259 264 self.modulemap[ent.copyfrom_rev] = ent.copyfrom_path
265 parents = [self.rev(ent.copyfrom_rev, ent.copyfrom_path)]
260 266 else:
261 267 self.ui.debug("No copyfrom path, don't know what to do.\n")
262 268 # Maybe it was added and there is no more history.
263 269 entrypath = get_entry_from_path(path, module=self.module)
264 270 # self.ui.write("entrypath %s\n" % entrypath)
265 271 if entrypath is None:
266 272 # Outside our area of interest
267 273 self.ui.debug("boring@%s: %s\n" % (revnum, path))
268 274 continue
269 275 entry = entrypath.decode(self.encoding)
270 276 ent = orig_paths[path]
271 if not entrypath:
272 # TODO: branch creation event
273 pass
274 277
275 278 kind = svn.ra.check_path(self.ra, entrypath, revnum)
276 279 if kind == svn.core.svn_node_file:
277 280 if ent.copyfrom_path:
278 281 copyfrom_path = get_entry_from_path(ent.copyfrom_path)
279 282 if copyfrom_path:
280 283 self.ui.debug("Copied to %s from %s@%s\n" % (entry, copyfrom_path, ent.copyfrom_rev))
281 284 # It's probably important for hg that the source
282 285 # exists in the revision's parent, not just the
283 286 # ent.copyfrom_rev
284 287 fromkind = svn.ra.check_path(self.ra, copyfrom_path, ent.copyfrom_rev)
285 288 if fromkind != 0:
286 289 copies[self.recode(entry)] = self.recode(copyfrom_path)
287 290 entries.append(self.recode(entry))
288 291 elif kind == 0: # gone, but had better be a deleted *file*
289 292 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
290 293
291 294 fromrev = revnum - 1
292 295 # might always need to be revnum - 1 in these 3 lines?
293 296 old_module = self.modulemap.get(fromrev, self.module)
294 297 basepath = old_module + "/" + get_entry_from_path(path, module=self.module)
295 298 entrypath = old_module + "/" + get_entry_from_path(path, module=self.module)
296 299
297 300 def lookup_parts(p):
298 301 rc = None
299 302 parts = p.split("/")
300 303 for i in range(len(parts)):
301 304 part = "/".join(parts[:i])
302 305 info = part, copyfrom.get(part, None)
303 306 if info[1] is not None:
304 307 self.ui.debug("Found parent directory %s\n" % info)
305 308 rc = info
306 309 return rc
307 310
308 311 self.ui.debug("base, entry %s %s\n" % (basepath, entrypath))
309 312
310 313 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
311 314
312 315 # need to remove fragment from lookup_parts and replace with copyfrom_path
313 316 if frompath is not None:
314 317 self.ui.debug("munge-o-matic\n")
315 318 self.ui.debug(entrypath + '\n')
316 319 self.ui.debug(entrypath[len(frompath):] + '\n')
317 320 entrypath = froment.copyfrom_path + entrypath[len(frompath):]
318 321 fromrev = froment.copyfrom_rev
319 322 self.ui.debug("Info: %s %s %s %s\n" % (frompath, froment, ent, entrypath))
320 323
321 324 fromkind = svn.ra.check_path(self.ra, entrypath, fromrev)
322 325 if fromkind == svn.core.svn_node_file: # a deleted file
323 326 entries.append(self.recode(entry))
324 327 else:
325 328 # print "Deleted/moved non-file:", revnum, path, ent
326 329 # children = self._find_children(path, revnum - 1)
327 330 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
328 331 # Sometimes this is tricky. For example: in
329 332 # The Subversion Repository revision 6940 a dir
330 333 # was copied and one of its files was deleted
331 334 # from the new location in the same commit. This
332 335 # code can't deal with that yet.
333 336 if ent.action == 'C':
334 337 children = self._find_children(path, fromrev)
335 338 else:
336 339 oroot = entrypath.strip('/')
337 340 nroot = path.strip('/')
338 341 children = self._find_children(oroot, fromrev)
339 342 children = [s.replace(oroot,nroot) for s in children]
340 343 # Mark all [files, not directories] as deleted.
341 344 for child in children:
342 345 # Can we move a child directory and its
343 346 # parent in the same commit? (probably can). Could
344 347 # cause problems if instead of revnum -1,
345 348 # we have to look in (copyfrom_path, revnum - 1)
346 349 entrypath = get_entry_from_path("/" + child, module=old_module)
347 350 if entrypath:
348 351 entry = self.recode(entrypath.decode(self.encoding))
349 352 if entry in copies:
350 353 # deleted file within a copy
351 354 del copies[entry]
352 355 else:
353 356 entries.append(entry)
354 357 elif kind == svn.core.svn_node_dir:
355 358 # Should probably synthesize normal file entries
356 359 # and handle as above to clean up copy/rename handling.
357 360
358 361 # If the directory just had a prop change,
359 362 # then we shouldn't need to look for its children.
360 363 # Also this could create duplicate entries. Not sure
361 364 # whether this will matter. Maybe should make entries a set.
362 365 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
363 366 # This will fail if a directory was copied
364 367 # from another branch and then some of its files
365 368 # were deleted in the same transaction.
366 369 children = self._find_children(path, revnum)
367 370 children.sort()
368 371 for child in children:
369 372 # Can we move a child directory and its
370 373 # parent in the same commit? (probably can). Could
371 374 # cause problems if instead of revnum -1,
372 375 # we have to look in (copyfrom_path, revnum - 1)
373 376 entrypath = get_entry_from_path("/" + child, module=self.module)
374 377 # print child, self.module, entrypath
375 378 if entrypath:
376 379 # Need to filter out directories here...
377 380 kind = svn.ra.check_path(self.ra, entrypath, revnum)
378 381 if kind != svn.core.svn_node_dir:
379 382 entries.append(self.recode(entrypath))
380 383
381 384 # Copies here (must copy all from source)
382 385 # Probably not a real problem for us if
383 386 # source does not exist
384 387
385 388 # Can do this with the copy command "hg copy"
386 389 # if ent.copyfrom_path:
387 390 # copyfrom_entry = get_entry_from_path(ent.copyfrom_path.decode(self.encoding),
388 391 # module=self.module)
389 392 # copyto_entry = entrypath
390 393 #
391 394 # print "copy directory", copyfrom_entry, 'to', copyto_entry
392 395 #
393 396 # copies.append((copyfrom_entry, copyto_entry))
394 397
395 398 if ent.copyfrom_path:
396 399 copyfrom_path = ent.copyfrom_path.decode(self.encoding)
397 400 copyfrom_entry = get_entry_from_path(copyfrom_path, module=self.module)
398 401 if copyfrom_entry:
399 402 copyfrom[path] = ent
400 403 self.ui.debug("mark %s came from %s\n" % (path, copyfrom[path]))
401 404
402 405 # Good, /probably/ a regular copy. Really should check
403 406 # to see whether the parent revision actually contains
404 407 # the directory in question.
405 408 children = self._find_children(self.recode(copyfrom_path), ent.copyfrom_rev)
406 409 children.sort()
407 410 for child in children:
408 411 entrypath = get_entry_from_path("/" + child, module=self.module)
409 412 if entrypath:
410 413 entry = entrypath.decode(self.encoding)
411 414 # print "COPY COPY From", copyfrom_entry, entry
412 415 copyto_path = path + entry[len(copyfrom_entry):]
413 416 copyto_entry = get_entry_from_path(copyto_path, module=self.module)
414 417 # print "COPY", entry, "COPY To", copyto_entry
415 418 copies[self.recode(copyto_entry)] = self.recode(entry)
416 419 # copy from quux splort/quuxfile
417 420
418 421 self.modulemap[revnum] = self.module # track backwards in time
419 422 # a list of (filename, id) where id lets us retrieve the file.
420 423 # eg in git, id is the object hash. for svn it'll be the
421 424 self.files[rev] = zip(entries, [rev] * len(entries))
422 425 if not entries:
423 426 return
424 427
425 428 # Example SVN datetime. Includes microseconds.
426 429 # ISO-8601 conformant
427 430 # '2007-01-04T17:35:00.902377Z'
428 431 date = util.parsedate(date[:18] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
429 432
430 433 log = message and self.recode(message)
431 434 author = author and self.recode(author) or ''
432 435
433 436 cset = commit(author=author,
434 437 date=util.datestr(date),
435 438 desc=log,
436 parents=[],
439 parents=parents,
437 440 copies=copies,
438 441 branch=branch)
439 442
440 443 if self.child_cset and self.child_rev != rev:
441 444 self.child_cset.parents = [rev]
442 445 self.commits[self.child_rev] = self.child_cset
443 446 self.child_cset = cset
444 447 self.child_rev = rev
445 448
446 449 if module is None:
447 450 module = self.module
448 451 try:
449 452 discover_changed_paths = True
450 453 strict_node_history = False
451 454 svn.ra.get_log(self.ra, [module], from_revnum, to_revnum, 0,
452 455 discover_changed_paths, strict_node_history,
453 456 parselogentry)
454 457 self.last_revnum = to_revnum
455 458 except SubversionException, (_, num):
456 459 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
457 460 raise NoSuchRevision(branch=self,
458 461 revision="Revision number %d" % to_revnum)
459 462 raise
460 463
461 464 def getheads(self):
462 465 # svn-url@rev
463 466 # Not safe if someone committed:
464 467 self.heads = [self.head]
465 468 # print self.commits.keys()
466 469 return self.heads
467 470
468 471 def _getfile(self, file, rev):
469 472 io = StringIO()
470 473 # TODO: ra.get_file transmits the whole file instead of diffs.
471 474 mode = ''
472 475 try:
473 476 revnum = self.revnum(rev)
474 477 if self.module != self.modulemap[revnum]:
475 478 self.module = self.modulemap[revnum]
476 479 self.reparent(self.module)
477 480 info = svn.ra.get_file(self.ra, file, revnum, io)
478 481 if isinstance(info, list):
479 482 info = info[-1]
480 483 mode = ("svn:executable" in info) and 'x' or ''
481 484 mode = ("svn:special" in info) and 'l' or mode
482 485 except SubversionException, e:
483 486 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
484 487 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
485 488 if e.apr_err in notfound: # File not found
486 489 raise IOError()
487 490 raise
488 491 data = io.getvalue()
489 492 if mode == 'l':
490 493 link_prefix = "link "
491 494 if data.startswith(link_prefix):
492 495 data = data[len(link_prefix):]
493 496 return data, mode
494 497
495 498 def getfile(self, file, rev):
496 499 data, mode = self._getfile(file, rev)
497 500 self.modecache[(file, rev)] = mode
498 501 return data
499 502
500 503 def getmode(self, file, rev):
501 504 return self.modecache[(file, rev)]
502 505
503 506 def getchanges(self, rev):
504 507 self.modecache = {}
505 508 files = self.files[rev]
506 509 cl = files
507 510 cl.sort()
508 511 return cl
509 512
510 513 def getcommit(self, rev):
511 514 if rev not in self.commits:
512 515 uuid, module, revnum = self.revsplit(rev)
513 516 minrev = revnum - LOG_BATCH_SIZE > 0 and revnum - LOG_BATCH_SIZE or 0
514 517 self._fetch_revisions(from_revnum=revnum, to_revnum=minrev,
515 518 module=module)
516 519 return self.commits[rev]
517 520
518 521 def gettags(self):
519 522 return []
520 523
521 524 def _find_children(self, path, revnum):
522 525 path = path.strip("/")
523 526
524 527 def _find_children_fallback(path, revnum):
525 528 # SWIG python bindings for getdir are broken up to at least 1.4.3
526 529 if not hasattr(self, 'client_ctx'):
527 530 self.client_ctx = svn.client.create_context()
528 531 pool = Pool()
529 532 optrev = svn.core.svn_opt_revision_t()
530 533 optrev.kind = svn.core.svn_opt_revision_number
531 534 optrev.value.number = revnum
532 535 rpath = '/'.join([self.base, path]).strip('/')
533 536 return ['%s/%s' % (path, x) for x in svn.client.ls(rpath, optrev, True, self.client_ctx, pool).keys()]
534 537
535 538 if hasattr(self, '_find_children_fallback'):
536 539 return _find_children_fallback(path, revnum)
537 540
538 541 self.reparent("/" + path)
539 542 pool = Pool()
540 543
541 544 children = []
542 545 def find_children_inner(children, path, revnum = revnum):
543 546 if hasattr(svn.ra, 'get_dir2'): # Since SVN 1.4
544 547 fields = 0xffffffff # Binding does not provide SVN_DIRENT_ALL
545 548 getdir = svn.ra.get_dir2(self.ra, path, revnum, fields, pool)
546 549 else:
547 550 getdir = svn.ra.get_dir(self.ra, path, revnum, pool)
548 551 if type(getdir) == dict:
549 552 # python binding for getdir is broken up to at least 1.4.3
550 553 raise CompatibilityException()
551 554 dirents = getdir[0]
552 555 if type(dirents) == int:
553 556 # got here once due to infinite recursion bug
554 557 # pprint.pprint(getdir)
555 558 return
556 559 c = dirents.keys()
557 560 c.sort()
558 561 for child in c:
559 562 dirent = dirents[child]
560 563 if dirent.kind == svn.core.svn_node_dir:
561 564 find_children_inner(children, (path + "/" + child).strip("/"))
562 565 else:
563 566 children.append((path + "/" + child).strip("/"))
564 567
565 568 try:
566 569 find_children_inner(children, "")
567 570 except CompatibilityException:
568 571 self._find_children_fallback = True
569 572 self.reparent(self.module)
570 573 return _find_children_fallback(path, revnum)
571 574
572 575 self.reparent(self.module)
573 576 return [path + "/" + c for c in children]
General Comments 0
You need to be logged in to leave comments. Login now