##// END OF EJS Templates
convert: svn: parse log entries as they arrive instead of deferring it
Brendan Cully -
r4793:9f20f4b2 default
parent child Browse files
Show More
@@ -1,562 +1,560 b''
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4
5 5 import pprint
6 6 import locale
7 7
8 8 from mercurial import util
9 9
10 10 # Subversion stuff. Works best with very recent Python SVN bindings
11 11 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
12 12 # these bindings.
13 13
14 14 from cStringIO import StringIO
15 15
16 16 from common import NoRepo, commit, converter_source
17 17
18 18 try:
19 19 from svn.core import SubversionException, Pool
20 20 import svn.core
21 21 import svn.ra
22 22 import svn.delta
23 23 import svn
24 24 import transport
25 25 except ImportError:
26 26 pass
27 27
28 28 class CompatibilityException(Exception): pass
29 29
30 30 LOG_BATCH_SIZE = 50
31 31
32 32 class svn_entry(object):
33 33 """Emulate a Subversion path change."""
34 34 __slots__ = ['path', 'copyfrom_path', 'copyfrom_rev', 'action']
35 35 def __init__(self, entry):
36 36 self.copyfrom_path = entry.copyfrom_path
37 37 self.copyfrom_rev = entry.copyfrom_rev
38 38 self.action = entry.action
39 39
40 40 def __str__(self):
41 41 return "%s %s %s" % (self.action, self.copyfrom_path, self.copyfrom_rev)
42 42
43 43 def __repr__(self):
44 44 return self.__str__()
45 45
46 46 class svn_paths(object):
47 47 """Emulate a Subversion ordered dictionary of changed paths."""
48 48 __slots__ = ['values', 'order']
49 49 def __init__(self, orig_paths):
50 50 self.order = []
51 51 self.values = {}
52 52 if hasattr(orig_paths, 'keys'):
53 53 self.order = sorted(orig_paths.keys())
54 54 self.values.update(orig_paths)
55 55 return
56 56 if not orig_paths:
57 57 return
58 58 for path in orig_paths:
59 59 self.order.append(path)
60 60 self.values[path] = svn_entry(orig_paths[path])
61 61 self.order.sort() # maybe the order it came in isn't so great...
62 62
63 63 def __iter__(self):
64 64 return iter(self.order)
65 65
66 66 def __getitem__(self, key):
67 67 return self.values[key]
68 68
69 69 def __str__(self):
70 70 s = "{\n"
71 71 for path in self.order:
72 72 s += "'%s': %s,\n" % (path, self.values[path])
73 73 s += "}"
74 74 return s
75 75
76 76 def __repr__(self):
77 77 return self.__str__()
78 78
79 79 # SVN conversion code stolen from bzr-svn and tailor
80 80 class convert_svn(converter_source):
81 81 def __init__(self, ui, url, rev=None):
82 82 try:
83 83 SubversionException
84 84 except NameError:
85 85 msg = 'subversion python bindings could not be loaded\n'
86 86 ui.warn(msg)
87 87 raise NoRepo(msg)
88 88
89 89 self.ui = ui
90 90 self.encoding = locale.getpreferredencoding()
91 91 latest = None
92 92 if rev:
93 93 try:
94 94 latest = int(rev)
95 95 except ValueError:
96 96 raise util.Abort('svn: revision %s is not an integer' % rev)
97 97 try:
98 98 # Support file://path@rev syntax. Useful e.g. to convert
99 99 # deleted branches.
100 100 url, latest = url.rsplit("@", 1)
101 101 latest = int(latest)
102 102 except ValueError, e:
103 103 pass
104 104 self.url = url
105 105 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
106 106 try:
107 107 self.transport = transport.SvnRaTransport(url = url)
108 108 self.ra = self.transport.ra
109 109 self.base = svn.ra.get_repos_root(self.ra)
110 110 self.module = self.url[len(self.base):]
111 111 self.modulemap = {} # revision, module
112 112 self.commits = {}
113 113 self.files = {}
114 114 self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
115 115 except SubversionException, e:
116 116 raise NoRepo("couldn't open SVN repo %s" % url)
117 117
118 118 try:
119 119 self.get_blacklist()
120 120 except IOError, e:
121 121 pass
122 122
123 123 self.last_changed = self.latest(self.module, latest)
124 124
125 125 self.head = self.rev(self.last_changed)
126 126
127 127 def rev(self, revnum):
128 128 return (u"svn:%s%s@%s" % (self.uuid, self.module, revnum)).decode(self.encoding)
129 129
130 130 def revnum(self, rev):
131 131 return int(rev.split('@')[-1])
132 132
133 133 def latest(self, path, stop=0):
134 134 'find the latest revision affecting path, up to stop'
135 135 if not stop:
136 136 stop = svn.ra.get_latest_revnum(self.ra)
137 137 try:
138 138 self.reparent('')
139 139 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
140 140 self.reparent(self.module)
141 141 except SubversionException:
142 142 dirent = None
143 143 if not dirent:
144 144 raise util.Abort('%s not found up to revision %d' \
145 145 % (path, stop))
146 146
147 147 return dirent.created_rev
148 148
149 149 def get_blacklist(self):
150 150 """Avoid certain revision numbers.
151 151 It is not uncommon for two nearby revisions to cancel each other
152 152 out, e.g. 'I copied trunk into a subdirectory of itself instead
153 153 of making a branch'. The converted repository is significantly
154 154 smaller if we ignore such revisions."""
155 155 self.blacklist = set()
156 156 blacklist = self.blacklist
157 157 for line in file("blacklist.txt", "r"):
158 158 if not line.startswith("#"):
159 159 try:
160 160 svn_rev = int(line.strip())
161 161 blacklist.add(svn_rev)
162 162 except ValueError, e:
163 163 pass # not an integer or a comment
164 164
165 165 def is_blacklisted(self, svn_rev):
166 166 return svn_rev in self.blacklist
167 167
168 168 def reparent(self, module):
169 169 svn_url = self.base + module
170 170 self.ui.debug("reparent to %s\n" % svn_url.encode(self.encoding))
171 171 svn.ra.reparent(self.ra, svn_url.encode(self.encoding))
172 172
173 173 def _fetch_revisions(self, from_revnum = 0, to_revnum = 347, pb=None):
174 174 # batching is broken for branches
175 175 to_revnum = 0
176 176 if not hasattr(self, 'child_rev'):
177 177 self.child_rev = from_revnum
178 178 self.child_cset = self.commits.get(self.child_rev)
179 179 else:
180 180 self.commits[self.child_rev] = self.child_cset
181 181 # batching broken
182 182 return
183 183 # if the branch was created in the middle of the last batch,
184 184 # svn log will complain that the path doesn't exist in this batch
185 185 # so we roll the parser back to the last revision where this branch appeared
186 186 revnum = self.revnum(self.child_rev)
187 187 if revnum > from_revnum:
188 188 from_revnum = revnum
189 189
190 190 self.ui.note('fetching revision log from %d to %d\n' % \
191 191 (from_revnum, to_revnum))
192 192
193 193 def get_entry_from_path(path, module=self.module):
194 194 # Given the repository url of this wc, say
195 195 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
196 196 # extract the "entry" portion (a relative path) from what
197 197 # svn log --xml says, ie
198 198 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
199 199 # that is to say "tests/PloneTestCase.py"
200 200
201 201 if path.startswith(module):
202 202 relative = path[len(module):]
203 203 if relative.startswith('/'):
204 204 return relative[1:]
205 205 else:
206 206 return relative
207 207
208 208 # The path is outside our tracked tree...
209 209 self.ui.debug('Ignoring %r since it is not under %r\n' % (path, module))
210 210 return None
211 211
212 received = []
213 def rcvr(*arg, **args):
212 def parselogentry(*arg, **args):
214 213 orig_paths, revnum, author, date, message, pool = arg
215 new_orig_paths = svn_paths(orig_paths)
216 rcvr2(new_orig_paths, revnum, author, date, message, pool)
214 orig_paths = svn_paths(orig_paths)
217 215
218 def rcvr2(orig_paths, revnum, author, date, message, pool, better_paths = None):
219 if not self.is_blacklisted(revnum):
220 received.append((orig_paths, revnum, author, date, message))
216 if self.is_blacklisted(revnum):
217 self.ui.note('skipping blacklisted revision %d\n' % revnum)
218 return
219
220 self.ui.note("parsing revision %d\n" % revnum)
221 221
222 def after_received(orig_paths, revnum, author, date, message):
223 self.ui.note("parsing revision %d\n" % revnum)
224 222 if orig_paths is None:
223 self.ui.debug('revision %d has no entries\n' % revnum)
225 224 return
226 225
227 226 if revnum in self.modulemap:
228 227 new_module = self.modulemap[revnum]
229 228 if new_module != self.module:
230 229 self.module = new_module
231 230 self.reparent(self.module)
232 231
233 232 copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
234 233 copies = {}
235 234 entries = []
236 235 rev = self.rev(revnum)
237 236 try:
238 237 branch = self.module.split("/")[-1]
239 238 if branch == 'trunk':
240 239 branch = ''
241 240 except IndexError:
242 241 branch = None
243 242
244 243 for path in orig_paths:
245 244 # self.ui.write("path %s\n" % path)
246 245 if path == self.module: # Follow branching back in history
247 246 ent = orig_paths[path]
248 247 if ent:
249 248 if ent.copyfrom_path:
250 249 self.modulemap[ent.copyfrom_rev] = ent.copyfrom_path
251 250 else:
252 251 self.ui.debug("No copyfrom path, don't know what to do.\n")
253 252 # Maybe it was added and there is no more history.
254 253 entrypath = get_entry_from_path(path, module=self.module)
255 254 # self.ui.write("entrypath %s\n" % entrypath)
256 255 if entrypath is None:
257 256 # Outside our area of interest
258 257 self.ui.debug("boring@%s: %s\n" % (revnum, path))
259 258 continue
260 259 entry = entrypath.decode(self.encoding)
261 260 ent = orig_paths[path]
262 261 if not entrypath:
263 262 # TODO: branch creation event
264 263 pass
265 264
266 265 kind = svn.ra.check_path(self.ra, entrypath, revnum)
267 266 if kind == svn.core.svn_node_file:
268 267 if ent.copyfrom_path:
269 268 copyfrom_path = get_entry_from_path(ent.copyfrom_path)
270 269 if copyfrom_path:
271 270 self.ui.debug("Copied to %s from %s@%s\n" % (entry, copyfrom_path, ent.copyfrom_rev))
272 271 # It's probably important for hg that the source
273 272 # exists in the revision's parent, not just the
274 273 # ent.copyfrom_rev
275 274 fromkind = svn.ra.check_path(self.ra, copyfrom_path, ent.copyfrom_rev)
276 275 if fromkind != 0:
277 276 copies[self.recode(entry)] = self.recode(copyfrom_path)
278 277 entries.append(self.recode(entry))
279 278 elif kind == 0: # gone, but had better be a deleted *file*
280 279 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
281 280
282 281 fromrev = revnum - 1
283 282 # might always need to be revnum - 1 in these 3 lines?
284 283 old_module = self.modulemap.get(fromrev, self.module)
285 284 basepath = old_module + "/" + get_entry_from_path(path, module=self.module)
286 285 entrypath = old_module + "/" + get_entry_from_path(path, module=self.module)
287 286
288 287 def lookup_parts(p):
289 288 rc = None
290 289 parts = p.split("/")
291 290 for i in range(len(parts)):
292 291 part = "/".join(parts[:i])
293 292 info = part, copyfrom.get(part, None)
294 293 if info[1] is not None:
295 294 self.ui.debug("Found parent directory %s\n" % info)
296 295 rc = info
297 296 return rc
298 297
299 298 self.ui.debug("base, entry %s %s\n" % (basepath, entrypath))
300 299
301 300 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
302 301
303 302 # need to remove fragment from lookup_parts and replace with copyfrom_path
304 303 if frompath is not None:
305 304 self.ui.debug("munge-o-matic\n")
306 305 self.ui.debug(entrypath + '\n')
307 306 self.ui.debug(entrypath[len(frompath):] + '\n')
308 307 entrypath = froment.copyfrom_path + entrypath[len(frompath):]
309 308 fromrev = froment.copyfrom_rev
310 309 self.ui.debug("Info: %s %s %s %s\n" % (frompath, froment, ent, entrypath))
311 310
312 311 fromkind = svn.ra.check_path(self.ra, entrypath, fromrev)
313 312 if fromkind == svn.core.svn_node_file: # a deleted file
314 313 entries.append(self.recode(entry))
315 314 else:
316 315 # print "Deleted/moved non-file:", revnum, path, ent
317 316 # children = self._find_children(path, revnum - 1)
318 317 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
319 318 # Sometimes this is tricky. For example: in
320 319 # The Subversion Repository revision 6940 a dir
321 320 # was copied and one of its files was deleted
322 321 # from the new location in the same commit. This
323 322 # code can't deal with that yet.
324 323 if ent.action == 'C':
325 324 children = self._find_children(path, fromrev)
326 325 else:
327 326 oroot = entrypath.strip('/')
328 327 nroot = path.strip('/')
329 328 children = self._find_children(oroot, fromrev)
330 329 children = [s.replace(oroot,nroot) for s in children]
331 330 # Mark all [files, not directories] as deleted.
332 331 for child in children:
333 332 # Can we move a child directory and its
334 333 # parent in the same commit? (probably can). Could
335 334 # cause problems if instead of revnum -1,
336 335 # we have to look in (copyfrom_path, revnum - 1)
337 336 entrypath = get_entry_from_path("/" + child, module=old_module)
338 337 if entrypath:
339 338 entry = self.recode(entrypath.decode(self.encoding))
340 339 if entry in copies:
341 340 # deleted file within a copy
342 341 del copies[entry]
343 342 else:
344 343 entries.append(entry)
345 344 elif kind == svn.core.svn_node_dir:
346 345 # Should probably synthesize normal file entries
347 346 # and handle as above to clean up copy/rename handling.
348 347
349 348 # If the directory just had a prop change,
350 349 # then we shouldn't need to look for its children.
351 350 # Also this could create duplicate entries. Not sure
352 351 # whether this will matter. Maybe should make entries a set.
353 352 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
354 353 # This will fail if a directory was copied
355 354 # from another branch and then some of its files
356 355 # were deleted in the same transaction.
357 356 children = self._find_children(path, revnum)
358 357 children.sort()
359 358 for child in children:
360 359 # Can we move a child directory and its
361 360 # parent in the same commit? (probably can). Could
362 361 # cause problems if instead of revnum -1,
363 362 # we have to look in (copyfrom_path, revnum - 1)
364 363 entrypath = get_entry_from_path("/" + child, module=self.module)
365 364 # print child, self.module, entrypath
366 365 if entrypath:
367 366 # Need to filter out directories here...
368 367 kind = svn.ra.check_path(self.ra, entrypath, revnum)
369 368 if kind != svn.core.svn_node_dir:
370 369 entries.append(self.recode(entrypath))
371 370
372 371 # Copies here (must copy all from source)
373 372 # Probably not a real problem for us if
374 373 # source does not exist
375 374
376 375 # Can do this with the copy command "hg copy"
377 376 # if ent.copyfrom_path:
378 377 # copyfrom_entry = get_entry_from_path(ent.copyfrom_path.decode(self.encoding),
379 378 # module=self.module)
380 379 # copyto_entry = entrypath
381 380 #
382 381 # print "copy directory", copyfrom_entry, 'to', copyto_entry
383 382 #
384 383 # copies.append((copyfrom_entry, copyto_entry))
385 384
386 385 if ent.copyfrom_path:
387 386 copyfrom_path = ent.copyfrom_path.decode(self.encoding)
388 387 copyfrom_entry = get_entry_from_path(copyfrom_path, module=self.module)
389 388 if copyfrom_entry:
390 389 copyfrom[path] = ent
391 390 self.ui.debug("mark %s came from %s\n" % (path, copyfrom[path]))
392 391
393 392 # Good, /probably/ a regular copy. Really should check
394 393 # to see whether the parent revision actually contains
395 394 # the directory in question.
396 395 children = self._find_children(self.recode(copyfrom_path), ent.copyfrom_rev)
397 396 children.sort()
398 397 for child in children:
399 398 entrypath = get_entry_from_path("/" + child, module=self.module)
400 399 if entrypath:
401 400 entry = entrypath.decode(self.encoding)
402 401 # print "COPY COPY From", copyfrom_entry, entry
403 402 copyto_path = path + entry[len(copyfrom_entry):]
404 403 copyto_entry = get_entry_from_path(copyto_path, module=self.module)
405 404 # print "COPY", entry, "COPY To", copyto_entry
406 405 copies[self.recode(copyto_entry)] = self.recode(entry)
407 406 # copy from quux splort/quuxfile
408 407
409 408 self.modulemap[revnum] = self.module # track backwards in time
410 409 # a list of (filename, id) where id lets us retrieve the file.
411 410 # eg in git, id is the object hash. for svn it'll be the
412 411 self.files[rev] = zip(entries, [rev] * len(entries))
413 412 if not entries:
414 413 return
415 414
416 415 # Example SVN datetime. Includes microseconds.
417 416 # ISO-8601 conformant
418 417 # '2007-01-04T17:35:00.902377Z'
419 418 date = util.parsedate(date[:18] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
420 419
421 420 log = message and self.recode(message)
422 421 author = author and self.recode(author) or ''
423 422
424 423 cset = commit(author=author,
425 424 date=util.datestr(date),
426 425 desc=log,
427 426 parents=[],
428 427 copies=copies,
429 428 branch=branch)
430 429
431 430 if self.child_cset and self.child_rev != rev:
432 431 self.child_cset.parents = [rev]
433 432 self.commits[self.child_rev] = self.child_cset
434 433 self.child_cset = cset
435 434 self.child_rev = rev
436 435
437 436 try:
438 437 discover_changed_paths = True
439 438 strict_node_history = False
440 svn.ra.get_log(self.ra, [self.module], from_revnum, to_revnum,
441 0, discover_changed_paths, strict_node_history, rcvr)
442 for args in received:
443 after_received(*args)
439 svn.ra.get_log(self.ra, [self.module], from_revnum, to_revnum, 0,
440 discover_changed_paths, strict_node_history,
441 parselogentry)
444 442 self.last_revnum = to_revnum
445 443 except SubversionException, (_, num):
446 444 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
447 445 raise NoSuchRevision(branch=self,
448 446 revision="Revision number %d" % to_revnum)
449 447 raise
450 448
451 449 def getheads(self):
452 450 # svn-url@rev
453 451 # Not safe if someone committed:
454 452 self.heads = [self.head]
455 453 # print self.commits.keys()
456 454 return self.heads
457 455
458 456 def _getfile(self, file, rev):
459 457 io = StringIO()
460 458 # TODO: ra.get_file transmits the whole file instead of diffs.
461 459 mode = ''
462 460 try:
463 461 revnum = self.revnum(rev)
464 462 if self.module != self.modulemap[revnum]:
465 463 self.module = self.modulemap[revnum]
466 464 self.reparent(self.module)
467 465 info = svn.ra.get_file(self.ra, file, revnum, io)
468 466 if isinstance(info, list):
469 467 info = info[-1]
470 468 mode = ("svn:executable" in info) and 'x' or ''
471 469 mode = ("svn:special" in info) and 'l' or mode
472 470 except SubversionException, e:
473 471 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
474 472 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
475 473 if e.apr_err in notfound: # File not found
476 474 raise IOError()
477 475 raise
478 476 data = io.getvalue()
479 477 if mode == 'l':
480 478 link_prefix = "link "
481 479 if data.startswith(link_prefix):
482 480 data = data[len(link_prefix):]
483 481 return data, mode
484 482
485 483 def getfile(self, file, rev):
486 484 data, mode = self._getfile(file, rev)
487 485 self.modecache[(file, rev)] = mode
488 486 return data
489 487
490 488 def getmode(self, file, rev):
491 489 return self.modecache[(file, rev)]
492 490
493 491 def getchanges(self, rev):
494 492 self.modecache = {}
495 493 files = self.files[rev]
496 494 cl = files
497 495 cl.sort()
498 496 return cl
499 497
500 498 def getcommit(self, rev):
501 499 if rev not in self.commits:
502 500 revnum = self.revnum(rev)
503 501 minrev = revnum - LOG_BATCH_SIZE > 0 and revnum - LOG_BATCH_SIZE or 0
504 502 self._fetch_revisions(from_revnum=revnum, to_revnum=minrev)
505 503 return self.commits[rev]
506 504
507 505 def gettags(self):
508 506 return []
509 507
510 508 def _find_children(self, path, revnum):
511 509 path = path.strip("/")
512 510
513 511 def _find_children_fallback(path, revnum):
514 512 # SWIG python bindings for getdir are broken up to at least 1.4.3
515 513 if not hasattr(self, 'client_ctx'):
516 514 self.client_ctx = svn.client.create_context()
517 515 pool = Pool()
518 516 optrev = svn.core.svn_opt_revision_t()
519 517 optrev.kind = svn.core.svn_opt_revision_number
520 518 optrev.value.number = revnum
521 519 rpath = '/'.join([self.base, path]).strip('/')
522 520 return ['%s/%s' % (path, x) for x in svn.client.ls(rpath, optrev, True, self.client_ctx, pool).keys()]
523 521
524 522 if hasattr(self, '_find_children_fallback'):
525 523 return _find_children_fallback(path, revnum)
526 524
527 525 self.reparent("/" + path)
528 526 pool = Pool()
529 527
530 528 children = []
531 529 def find_children_inner(children, path, revnum = revnum):
532 530 if hasattr(svn.ra, 'get_dir2'): # Since SVN 1.4
533 531 fields = 0xffffffff # Binding does not provide SVN_DIRENT_ALL
534 532 getdir = svn.ra.get_dir2(self.ra, path, revnum, fields, pool)
535 533 else:
536 534 getdir = svn.ra.get_dir(self.ra, path, revnum, pool)
537 535 if type(getdir) == dict:
538 536 # python binding for getdir is broken up to at least 1.4.3
539 537 raise CompatibilityException()
540 538 dirents = getdir[0]
541 539 if type(dirents) == int:
542 540 # got here once due to infinite recursion bug
543 541 # pprint.pprint(getdir)
544 542 return
545 543 c = dirents.keys()
546 544 c.sort()
547 545 for child in c:
548 546 dirent = dirents[child]
549 547 if dirent.kind == svn.core.svn_node_dir:
550 548 find_children_inner(children, (path + "/" + child).strip("/"))
551 549 else:
552 550 children.append((path + "/" + child).strip("/"))
553 551
554 552 try:
555 553 find_children_inner(children, "")
556 554 except CompatibilityException:
557 555 self._find_children_fallback = True
558 556 self.reparent(self.module)
559 557 return _find_children_fallback(path, revnum)
560 558
561 559 self.reparent(self.module)
562 560 return [path + "/" + c for c in children]
General Comments 0
You need to be logged in to leave comments. Login now