##// END OF EJS Templates
convert: svn: disable batched fetch - get the whole log immediately
Brendan Cully -
r4775:739fd34f default
parent child Browse files
Show More
@@ -1,532 +1,542 b''
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4
5 5 import pprint
6 6 import locale
7 7
8 8 from mercurial import util
9 9
10 10 # Subversion stuff. Works best with very recent Python SVN bindings
11 11 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
12 12 # these bindings.
13 13
14 14 from cStringIO import StringIO
15 15
16 16 from common import NoRepo, commit, converter_source
17 17
18 18 try:
19 19 from svn.core import SubversionException, Pool
20 20 import svn.core
21 21 import svn.ra
22 22 import svn.delta
23 23 import svn
24 24 import transport
25 25 except ImportError:
26 26 pass
27 27
28 28 class CompatibilityException(Exception): pass
29 29
30 30 LOG_BATCH_SIZE = 50
31 31
32 32 class svn_entry(object):
33 33 """Emulate a Subversion path change."""
34 34 __slots__ = ['path', 'copyfrom_path', 'copyfrom_rev', 'action']
35 35 def __init__(self, entry):
36 36 self.copyfrom_path = entry.copyfrom_path
37 37 self.copyfrom_rev = entry.copyfrom_rev
38 38 self.action = entry.action
39 39
40 40 def __str__(self):
41 41 return "%s %s %s" % (self.action, self.copyfrom_path, self.copyfrom_rev)
42 42
43 43 def __repr__(self):
44 44 return self.__str__()
45 45
46 46 class svn_paths(object):
47 47 """Emulate a Subversion ordered dictionary of changed paths."""
48 48 __slots__ = ['values', 'order']
49 49 def __init__(self, orig_paths):
50 50 self.order = []
51 51 self.values = {}
52 52 if hasattr(orig_paths, 'keys'):
53 53 self.order = sorted(orig_paths.keys())
54 54 self.values.update(orig_paths)
55 55 return
56 56 if not orig_paths:
57 57 return
58 58 for path in orig_paths:
59 59 self.order.append(path)
60 60 self.values[path] = svn_entry(orig_paths[path])
61 61 self.order.sort() # maybe the order it came in isn't so great...
62 62
63 63 def __iter__(self):
64 64 return iter(self.order)
65 65
66 66 def __getitem__(self, key):
67 67 return self.values[key]
68 68
69 69 def __str__(self):
70 70 s = "{\n"
71 71 for path in self.order:
72 72 s += "'%s': %s,\n" % (path, self.values[path])
73 73 s += "}"
74 74 return s
75 75
76 76 def __repr__(self):
77 77 return self.__str__()
78 78
79 79 # SVN conversion code stolen from bzr-svn and tailor
80 80 class convert_svn(converter_source):
81 81 def __init__(self, ui, url, rev=None):
82 82 try:
83 83 SubversionException
84 84 except NameError:
85 85 msg = 'subversion python bindings could not be loaded\n'
86 86 ui.warn(msg)
87 87 raise NoRepo(msg)
88 88
89 89 self.ui = ui
90 90 self.encoding = locale.getpreferredencoding()
91 91 latest = None
92 92 if rev:
93 93 try:
94 94 latest = int(rev)
95 95 except ValueError:
96 96 raise util.Abort('svn: revision %s is not an integer' % rev)
97 97 try:
98 98 # Support file://path@rev syntax. Useful e.g. to convert
99 99 # deleted branches.
100 100 url, latest = url.rsplit("@", 1)
101 101 latest = int(latest)
102 102 except ValueError, e:
103 103 pass
104 104 self.url = url
105 105 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
106 106 try:
107 107 self.transport = transport.SvnRaTransport(url = url)
108 108 self.ra = self.transport.ra
109 109 self.base = svn.ra.get_repos_root(self.ra)
110 110 self.module = self.url[len(self.base):]
111 111 self.modulemap = {} # revision, module
112 112 self.commits = {}
113 113 self.files = {}
114 114 self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
115 115 except SubversionException, e:
116 116 raise NoRepo("couldn't open SVN repo %s" % url)
117 117
118 118 try:
119 119 self.get_blacklist()
120 120 except IOError, e:
121 121 pass
122 122
123 123 if not latest:
124 124 latest = svn.ra.get_latest_revnum(self.ra)
125 125 dirent = svn.ra.stat(self.ra, self.module, latest)
126 126 if not dirent:
127 127 raise util.Abort('module %s not found in revision %d' % (self.module, latest))
128 128 self.last_changed = dirent.created_rev
129 129
130 130 self.head = self.rev(self.last_changed)
131 131
132 132 def rev(self, revnum):
133 133 return (u"svn:%s%s@%s" % (self.uuid, self.module, revnum)).decode(self.encoding)
134 134
135 135 def revnum(self, rev):
136 136 return int(rev.split('@')[-1])
137 137
138 138 def get_blacklist(self):
139 139 """Avoid certain revision numbers.
140 140 It is not uncommon for two nearby revisions to cancel each other
141 141 out, e.g. 'I copied trunk into a subdirectory of itself instead
142 142 of making a branch'. The converted repository is significantly
143 143 smaller if we ignore such revisions."""
144 144 self.blacklist = set()
145 145 blacklist = self.blacklist
146 146 for line in file("blacklist.txt", "r"):
147 147 if not line.startswith("#"):
148 148 try:
149 149 svn_rev = int(line.strip())
150 150 blacklist.add(svn_rev)
151 151 except ValueError, e:
152 152 pass # not an integer or a comment
153 153
154 154 def is_blacklisted(self, svn_rev):
155 155 return svn_rev in self.blacklist
156 156
157 157 def reparent(self, module):
158 158 svn_url = self.base + module
159 159 self.ui.debug("reparent to %s\n" % svn_url.encode(self.encoding))
160 160 svn.ra.reparent(self.ra, svn_url.encode(self.encoding))
161 161
162 162 def _fetch_revisions(self, from_revnum = 0, to_revnum = 347, pb=None):
163 # batching is broken for branches
164 to_revnum = 0
163 165 if not hasattr(self, 'child_rev'):
164 166 self.child_rev = from_revnum
165 167 self.child_cset = self.commits.get(self.child_rev)
166 168 else:
167 169 self.commits[self.child_rev] = self.child_cset
170 # batching broken
171 return
172 # if the branch was created in the middle of the last batch,
173 # svn log will complain that the path doesn't exist in this batch
174 # so we roll the parser back to the last revision where this branch appeared
175 revnum = self.revnum(self.child_rev)
176 if revnum > from_revnum:
177 from_revnum = revnum
168 178
169 179 self.ui.debug('Fetching revisions %d to %d\n' % (from_revnum, to_revnum))
170 180
171 181 def get_entry_from_path(path, module=self.module):
172 182 # Given the repository url of this wc, say
173 183 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
174 184 # extract the "entry" portion (a relative path) from what
175 185 # svn log --xml says, ie
176 186 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
177 187 # that is to say "tests/PloneTestCase.py"
178 188
179 189 if path.startswith(module):
180 190 relative = path[len(module):]
181 191 if relative.startswith('/'):
182 192 return relative[1:]
183 193 else:
184 194 return relative
185 195
186 196 # The path is outside our tracked tree...
187 197 self.ui.debug('Ignoring %r since it is not under %r\n' % (path, module))
188 198 return None
189 199
190 200 received = []
191 201 def rcvr(*arg, **args):
192 202 orig_paths, revnum, author, date, message, pool = arg
193 203 new_orig_paths = svn_paths(orig_paths)
194 204 rcvr2(new_orig_paths, revnum, author, date, message, pool)
195 205
196 206 def rcvr2(orig_paths, revnum, author, date, message, pool, better_paths = None):
197 207 if not self.is_blacklisted(revnum):
198 208 received.append((orig_paths, revnum, author, date, message))
199 209
200 210 def after_received(orig_paths, revnum, author, date, message):
201 211 if revnum in self.modulemap:
202 212 new_module = self.modulemap[revnum]
203 213 if new_module != self.module:
204 214 self.module = new_module
205 215 self.reparent(self.module)
206 216
207 217 copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
208 218 copies = {}
209 219 entries = []
210 220 self.ui.debug("Parsing revision %d\n" % revnum)
211 221 if orig_paths is not None:
212 222 rev = self.rev(revnum)
213 223 try:
214 224 branch = self.module.split("/")[-1]
215 225 if branch == 'trunk':
216 226 branch = ''
217 227 except IndexError:
218 228 branch = None
219 229
220 230 for path in orig_paths:
221 231 # self.ui.write("path %s\n" % path)
222 232 if path == self.module: # Follow branching back in history
223 233 ent = orig_paths[path]
224 234 if ent:
225 235 if ent.copyfrom_path:
226 236 self.modulemap[ent.copyfrom_rev] = ent.copyfrom_path
227 237 else:
228 238 self.ui.debug("No copyfrom path, don't know what to do.\n")
229 239 # Maybe it was added and there is no more history.
230 240 entrypath = get_entry_from_path(path, module=self.module)
231 241 # self.ui.write("entrypath %s\n" % entrypath)
232 242 if not entrypath:
233 243 # Outside our area of interest
234 244 self.ui.debug("boring@%s: %s\n" % (revnum, path))
235 245 continue
236 246 entry = entrypath.decode(self.encoding)
237 247 ent = orig_paths[path]
238 248
239 249 kind = svn.ra.check_path(self.ra, entrypath, revnum)
240 250 if kind == svn.core.svn_node_file:
241 251 if ent.copyfrom_path:
242 252 copyfrom_path = get_entry_from_path(ent.copyfrom_path)
243 253 if copyfrom_path:
244 254 self.ui.debug("Copied to %s from %s@%s\n" % (entry, copyfrom_path, ent.copyfrom_rev))
245 255 # It's probably important for hg that the source
246 256 # exists in the revision's parent, not just the
247 257 # ent.copyfrom_rev
248 258 fromkind = svn.ra.check_path(self.ra, copyfrom_path, ent.copyfrom_rev)
249 259 if fromkind != 0:
250 260 copies[self.recode(entry)] = self.recode(copyfrom_path)
251 261 entries.append(self.recode(entry))
252 262 elif kind == 0: # gone, but had better be a deleted *file*
253 263 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
254 264
255 265 fromrev = revnum - 1
256 266 # might always need to be revnum - 1 in these 3 lines?
257 267 old_module = self.modulemap.get(fromrev, self.module)
258 268 basepath = old_module + "/" + get_entry_from_path(path, module=self.module)
259 269 entrypath = old_module + "/" + get_entry_from_path(path, module=self.module)
260 270
261 271 def lookup_parts(p):
262 272 rc = None
263 273 parts = p.split("/")
264 274 for i in range(len(parts)):
265 275 part = "/".join(parts[:i])
266 276 info = part, copyfrom.get(part, None)
267 277 if info[1] is not None:
268 278 self.ui.debug("Found parent directory %s\n" % info)
269 279 rc = info
270 280 return rc
271 281
272 282 self.ui.debug("base, entry %s %s\n" % (basepath, entrypath))
273 283
274 284 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
275 285
276 286 # need to remove fragment from lookup_parts and replace with copyfrom_path
277 287 if frompath is not None:
278 288 self.ui.debug("munge-o-matic\n")
279 289 self.ui.debug(entrypath + '\n')
280 290 self.ui.debug(entrypath[len(frompath):] + '\n')
281 291 entrypath = froment.copyfrom_path + entrypath[len(frompath):]
282 292 fromrev = froment.copyfrom_rev
283 293 self.ui.debug("Info: %s %s %s %s\n" % (frompath, froment, ent, entrypath))
284 294
285 295 fromkind = svn.ra.check_path(self.ra, entrypath, fromrev)
286 296 if fromkind == svn.core.svn_node_file: # a deleted file
287 297 entries.append(self.recode(entry))
288 298 else:
289 299 # print "Deleted/moved non-file:", revnum, path, ent
290 300 # children = self._find_children(path, revnum - 1)
291 301 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
292 302 # Sometimes this is tricky. For example: in
293 303 # The Subversion Repository revision 6940 a dir
294 304 # was copied and one of its files was deleted
295 305 # from the new location in the same commit. This
296 306 # code can't deal with that yet.
297 307 if ent.action == 'C':
298 308 children = self._find_children(path, fromrev)
299 309 else:
300 310 oroot = entrypath.strip('/')
301 311 nroot = path.strip('/')
302 312 children = self._find_children(oroot, fromrev)
303 313 children = [s.replace(oroot,nroot) for s in children]
304 314 # Mark all [files, not directories] as deleted.
305 315 for child in children:
306 316 # Can we move a child directory and its
307 317 # parent in the same commit? (probably can). Could
308 318 # cause problems if instead of revnum -1,
309 319 # we have to look in (copyfrom_path, revnum - 1)
310 320 entrypath = get_entry_from_path("/" + child, module=old_module)
311 321 if entrypath:
312 322 entry = self.recode(entrypath.decode(self.encoding))
313 323 if entry in copies:
314 324 # deleted file within a copy
315 325 del copies[entry]
316 326 else:
317 327 entries.append(entry)
318 328 elif kind == svn.core.svn_node_dir:
319 329 # Should probably synthesize normal file entries
320 330 # and handle as above to clean up copy/rename handling.
321 331
322 332 # If the directory just had a prop change,
323 333 # then we shouldn't need to look for its children.
324 334 # Also this could create duplicate entries. Not sure
325 335 # whether this will matter. Maybe should make entries a set.
326 336 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
327 337 # This will fail if a directory was copied
328 338 # from another branch and then some of its files
329 339 # were deleted in the same transaction.
330 340 children = self._find_children(path, revnum)
331 341 children.sort()
332 342 for child in children:
333 343 # Can we move a child directory and its
334 344 # parent in the same commit? (probably can). Could
335 345 # cause problems if instead of revnum -1,
336 346 # we have to look in (copyfrom_path, revnum - 1)
337 347 entrypath = get_entry_from_path("/" + child, module=self.module)
338 348 # print child, self.module, entrypath
339 349 if entrypath:
340 350 # Need to filter out directories here...
341 351 kind = svn.ra.check_path(self.ra, entrypath, revnum)
342 352 if kind != svn.core.svn_node_dir:
343 353 entries.append(self.recode(entrypath))
344 354
345 355 # Copies here (must copy all from source)
346 356 # Probably not a real problem for us if
347 357 # source does not exist
348 358
349 359 # Can do this with the copy command "hg copy"
350 360 # if ent.copyfrom_path:
351 361 # copyfrom_entry = get_entry_from_path(ent.copyfrom_path.decode(self.encoding),
352 362 # module=self.module)
353 363 # copyto_entry = entrypath
354 364 #
355 365 # print "copy directory", copyfrom_entry, 'to', copyto_entry
356 366 #
357 367 # copies.append((copyfrom_entry, copyto_entry))
358 368
359 369 if ent.copyfrom_path:
360 370 copyfrom_path = ent.copyfrom_path.decode(self.encoding)
361 371 copyfrom_entry = get_entry_from_path(copyfrom_path, module=self.module)
362 372 if copyfrom_entry:
363 373 copyfrom[path] = ent
364 374 self.ui.debug("mark %s came from %s\n" % (path, copyfrom[path]))
365 375
366 376 # Good, /probably/ a regular copy. Really should check
367 377 # to see whether the parent revision actually contains
368 378 # the directory in question.
369 379 children = self._find_children(self.recode(copyfrom_path), ent.copyfrom_rev)
370 380 children.sort()
371 381 for child in children:
372 382 entrypath = get_entry_from_path("/" + child, module=self.module)
373 383 if entrypath:
374 384 entry = entrypath.decode(self.encoding)
375 385 # print "COPY COPY From", copyfrom_entry, entry
376 386 copyto_path = path + entry[len(copyfrom_entry):]
377 387 copyto_entry = get_entry_from_path(copyto_path, module=self.module)
378 388 # print "COPY", entry, "COPY To", copyto_entry
379 389 copies[self.recode(copyto_entry)] = self.recode(entry)
380 390 # copy from quux splort/quuxfile
381 391
382 392 self.modulemap[revnum] = self.module # track backwards in time
383 393 # a list of (filename, id) where id lets us retrieve the file.
384 394 # eg in git, id is the object hash. for svn it'll be the
385 395 self.files[rev] = zip(entries, [rev] * len(entries))
386 396
387 397 # Example SVN datetime. Includes microseconds.
388 398 # ISO-8601 conformant
389 399 # '2007-01-04T17:35:00.902377Z'
390 400 date = util.parsedate(date[:18] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
391 401
392 402 log = message and self.recode(message)
393 403 author = author and self.recode(author) or ''
394 404
395 405 cset = commit(author=author,
396 406 date=util.datestr(date),
397 407 desc=log,
398 408 parents=[],
399 409 copies=copies,
400 410 branch=branch)
401 411
402 412 if self.child_cset and self.child_rev != rev:
403 413 self.child_cset.parents = [rev]
404 414 self.commits[self.child_rev] = self.child_cset
405 415 self.child_cset = cset
406 416 self.child_rev = rev
407 417
408 418 try:
409 419 discover_changed_paths = True
410 420 strict_node_history = False
411 421 svn.ra.get_log(self.ra, [self.module], from_revnum, to_revnum,
412 422 0, discover_changed_paths, strict_node_history, rcvr)
413 423 for args in received:
414 424 after_received(*args)
415 425 self.last_revnum = to_revnum
416 426 except SubversionException, (_, num):
417 427 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
418 428 raise NoSuchRevision(branch=self,
419 429 revision="Revision number %d" % to_revnum)
420 430 raise
421 431
422 432 def getheads(self):
423 433 # svn-url@rev
424 434 # Not safe if someone committed:
425 435 self.heads = [self.head]
426 436 # print self.commits.keys()
427 437 return self.heads
428 438
429 439 def _getfile(self, file, rev):
430 440 io = StringIO()
431 441 # TODO: ra.get_file transmits the whole file instead of diffs.
432 442 mode = ''
433 443 try:
434 444 revnum = self.revnum(rev)
435 445 if self.module != self.modulemap[revnum]:
436 446 self.module = self.modulemap[revnum]
437 447 self.reparent(self.module)
438 448 info = svn.ra.get_file(self.ra, file, revnum, io)
439 449 if isinstance(info, list):
440 450 info = info[-1]
441 451 mode = ("svn:executable" in info) and 'x' or ''
442 452 mode = ("svn:special" in info) and 'l' or mode
443 453 except SubversionException, e:
444 454 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
445 455 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
446 456 if e.apr_err in notfound: # File not found
447 457 raise IOError()
448 458 raise
449 459 data = io.getvalue()
450 460 if mode == 'l':
451 461 link_prefix = "link "
452 462 if data.startswith(link_prefix):
453 463 data = data[len(link_prefix):]
454 464 return data, mode
455 465
456 466 def getfile(self, file, rev):
457 467 data, mode = self._getfile(file, rev)
458 468 self.modecache[(file, rev)] = mode
459 469 return data
460 470
461 471 def getmode(self, file, rev):
462 472 return self.modecache[(file, rev)]
463 473
464 474 def getchanges(self, rev):
465 475 self.modecache = {}
466 476 files = self.files[rev]
467 477 cl = files
468 478 cl.sort()
469 479 return cl
470 480
471 481 def getcommit(self, rev):
472 482 if rev not in self.commits:
473 483 revnum = self.revnum(rev)
474 484 minrev = revnum - LOG_BATCH_SIZE > 0 and revnum - LOG_BATCH_SIZE or 0
475 485 self._fetch_revisions(from_revnum=revnum, to_revnum=minrev)
476 486 return self.commits[rev]
477 487
478 488 def gettags(self):
479 489 return []
480 490
481 491 def _find_children(self, path, revnum):
482 492 path = path.strip("/")
483 493
484 494 def _find_children_fallback(path, revnum):
485 495 # SWIG python bindings for getdir are broken up to at least 1.4.3
486 496 if not hasattr(self, 'client_ctx'):
487 497 self.client_ctx = svn.client.create_context()
488 498 optrev = svn.core.svn_opt_revision_t()
489 499 optrev.kind = svn.core.svn_opt_revision_number
490 500 optrev.value.number = revnum
491 501 rpath = '/'.join([self.base, path]).strip('/')
492 502 return ['%s/%s' % (path, x) for x in svn.client.ls(rpath, optrev, True, self.client_ctx).keys()]
493 503
494 504 if hasattr(self, '_find_children_fallback'):
495 505 return _find_children_fallback(path, revnum)
496 506
497 507 self.reparent("/" + path)
498 508 pool = Pool()
499 509
500 510 children = []
501 511 def find_children_inner(children, path, revnum = revnum):
502 512 if hasattr(svn.ra, 'get_dir2'): # Since SVN 1.4
503 513 fields = 0xffffffff # Binding does not provide SVN_DIRENT_ALL
504 514 getdir = svn.ra.get_dir2(self.ra, path, revnum, fields, pool)
505 515 else:
506 516 getdir = svn.ra.get_dir(self.ra, path, revnum, pool)
507 517 if type(getdir) == dict:
508 518 # python binding for getdir is broken up to at least 1.4.3
509 519 raise CompatibilityException()
510 520 dirents = getdir[0]
511 521 if type(dirents) == int:
512 522 # got here once due to infinite recursion bug
513 523 # pprint.pprint(getdir)
514 524 return
515 525 c = dirents.keys()
516 526 c.sort()
517 527 for child in c:
518 528 dirent = dirents[child]
519 529 if dirent.kind == svn.core.svn_node_dir:
520 530 find_children_inner(children, (path + "/" + child).strip("/"))
521 531 else:
522 532 children.append((path + "/" + child).strip("/"))
523 533
524 534 try:
525 535 find_children_inner(children, "")
526 536 except CompatibilityException:
527 537 self._find_children_fallback = True
528 538 self.reparent(self.module)
529 539 return _find_children_fallback(path, revnum)
530 540
531 541 self.reparent(self.module)
532 542 return [path + "/" + c for c in children]
General Comments 0
You need to be logged in to leave comments. Login now