##// END OF EJS Templates
py3: make sure regexes are bytes...
Pulkit Goyal -
r36473:9e3cb58c default
parent child Browse files
Show More
@@ -1,952 +1,952 b''
1 1 # Mercurial built-in replacement for cvsps.
2 2 #
3 3 # Copyright 2008, Frank Kingswood <frank@kingswood-consulting.co.uk>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 from __future__ import absolute_import
8 8
9 9 import os
10 10 import re
11 11
12 12 from mercurial.i18n import _
13 13 from mercurial import (
14 14 encoding,
15 15 error,
16 16 hook,
17 17 pycompat,
18 18 util,
19 19 )
20 20
21 21 pickle = util.pickle
22 22
23 23 class logentry(object):
24 24 '''Class logentry has the following attributes:
25 25 .author - author name as CVS knows it
26 26 .branch - name of branch this revision is on
27 27 .branches - revision tuple of branches starting at this revision
28 28 .comment - commit message
29 29 .commitid - CVS commitid or None
30 30 .date - the commit date as a (time, tz) tuple
31 31 .dead - true if file revision is dead
32 32 .file - Name of file
33 33 .lines - a tuple (+lines, -lines) or None
34 34 .parent - Previous revision of this entry
35 35 .rcs - name of file as returned from CVS
36 36 .revision - revision number as tuple
37 37 .tags - list of tags on the file
38 38 .synthetic - is this a synthetic "file ... added on ..." revision?
39 39 .mergepoint - the branch that has been merged from (if present in
40 40 rlog output) or None
41 41 .branchpoints - the branches that start at the current entry or empty
42 42 '''
43 43 def __init__(self, **entries):
44 44 self.synthetic = False
45 45 self.__dict__.update(entries)
46 46
47 47 def __repr__(self):
48 48 items = ("%s=%r"%(k, self.__dict__[k]) for k in sorted(self.__dict__))
49 49 return "%s(%s)"%(type(self).__name__, ", ".join(items))
50 50
51 51 class logerror(Exception):
52 52 pass
53 53
54 54 def getrepopath(cvspath):
55 55 """Return the repository path from a CVS path.
56 56
57 57 >>> getrepopath(b'/foo/bar')
58 58 '/foo/bar'
59 59 >>> getrepopath(b'c:/foo/bar')
60 60 '/foo/bar'
61 61 >>> getrepopath(b':pserver:10/foo/bar')
62 62 '/foo/bar'
63 63 >>> getrepopath(b':pserver:10c:/foo/bar')
64 64 '/foo/bar'
65 65 >>> getrepopath(b':pserver:/foo/bar')
66 66 '/foo/bar'
67 67 >>> getrepopath(b':pserver:c:/foo/bar')
68 68 '/foo/bar'
69 69 >>> getrepopath(b':pserver:truc@foo.bar:/foo/bar')
70 70 '/foo/bar'
71 71 >>> getrepopath(b':pserver:truc@foo.bar:c:/foo/bar')
72 72 '/foo/bar'
73 73 >>> getrepopath(b'user@server/path/to/repository')
74 74 '/path/to/repository'
75 75 """
76 76 # According to CVS manual, CVS paths are expressed like:
77 77 # [:method:][[user][:password]@]hostname[:[port]]/path/to/repository
78 78 #
79 79 # CVSpath is splitted into parts and then position of the first occurrence
80 80 # of the '/' char after the '@' is located. The solution is the rest of the
81 81 # string after that '/' sign including it
82 82
83 83 parts = cvspath.split(':')
84 84 atposition = parts[-1].find('@')
85 85 start = 0
86 86
87 87 if atposition != -1:
88 88 start = atposition
89 89
90 90 repopath = parts[-1][parts[-1].find('/', start):]
91 91 return repopath
92 92
93 93 def createlog(ui, directory=None, root="", rlog=True, cache=None):
94 94 '''Collect the CVS rlog'''
95 95
96 96 # Because we store many duplicate commit log messages, reusing strings
97 97 # saves a lot of memory and pickle storage space.
98 98 _scache = {}
99 99 def scache(s):
100 100 "return a shared version of a string"
101 101 return _scache.setdefault(s, s)
102 102
103 103 ui.status(_('collecting CVS rlog\n'))
104 104
105 105 log = [] # list of logentry objects containing the CVS state
106 106
107 107 # patterns to match in CVS (r)log output, by state of use
108 108 re_00 = re.compile('RCS file: (.+)$')
109 109 re_01 = re.compile('cvs \\[r?log aborted\\]: (.+)$')
110 110 re_02 = re.compile('cvs (r?log|server): (.+)\n$')
111 111 re_03 = re.compile("(Cannot access.+CVSROOT)|"
112 112 "(can't create temporary directory.+)$")
113 113 re_10 = re.compile('Working file: (.+)$')
114 114 re_20 = re.compile('symbolic names:')
115 115 re_30 = re.compile('\t(.+): ([\\d.]+)$')
116 116 re_31 = re.compile('----------------------------$')
117 117 re_32 = re.compile('======================================='
118 118 '======================================$')
119 119 re_50 = re.compile('revision ([\\d.]+)(\s+locked by:\s+.+;)?$')
120 120 re_60 = re.compile(r'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);'
121 121 r'(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?'
122 122 r'(\s+commitid:\s+([^;]+);)?'
123 123 r'(.*mergepoint:\s+([^;]+);)?')
124 124 re_70 = re.compile('branches: (.+);$')
125 125
126 126 file_added_re = re.compile(r'file [^/]+ was (initially )?added on branch')
127 127
128 128 prefix = '' # leading path to strip of what we get from CVS
129 129
130 130 if directory is None:
131 131 # Current working directory
132 132
133 133 # Get the real directory in the repository
134 134 try:
135 135 prefix = open(os.path.join('CVS','Repository'), 'rb').read().strip()
136 136 directory = prefix
137 137 if prefix == ".":
138 138 prefix = ""
139 139 except IOError:
140 140 raise logerror(_('not a CVS sandbox'))
141 141
142 142 if prefix and not prefix.endswith(pycompat.ossep):
143 143 prefix += pycompat.ossep
144 144
145 145 # Use the Root file in the sandbox, if it exists
146 146 try:
147 147 root = open(os.path.join('CVS','Root'), 'rb').read().strip()
148 148 except IOError:
149 149 pass
150 150
151 151 if not root:
152 152 root = encoding.environ.get('CVSROOT', '')
153 153
154 154 # read log cache if one exists
155 155 oldlog = []
156 156 date = None
157 157
158 158 if cache:
159 159 cachedir = os.path.expanduser('~/.hg.cvsps')
160 160 if not os.path.exists(cachedir):
161 161 os.mkdir(cachedir)
162 162
163 163 # The cvsps cache pickle needs a uniquified name, based on the
164 164 # repository location. The address may have all sort of nasties
165 165 # in it, slashes, colons and such. So here we take just the
166 166 # alphanumeric characters, concatenated in a way that does not
167 167 # mix up the various components, so that
168 168 # :pserver:user@server:/path
169 169 # and
170 170 # /pserver/user/server/path
171 171 # are mapped to different cache file names.
172 172 cachefile = root.split(":") + [directory, "cache"]
173 cachefile = ['-'.join(re.findall(r'\w+', s)) for s in cachefile if s]
173 cachefile = ['-'.join(re.findall(br'\w+', s)) for s in cachefile if s]
174 174 cachefile = os.path.join(cachedir,
175 175 '.'.join([s for s in cachefile if s]))
176 176
177 177 if cache == 'update':
178 178 try:
179 179 ui.note(_('reading cvs log cache %s\n') % cachefile)
180 180 oldlog = pickle.load(open(cachefile, 'rb'))
181 181 for e in oldlog:
182 182 if not (util.safehasattr(e, 'branchpoints') and
183 183 util.safehasattr(e, 'commitid') and
184 184 util.safehasattr(e, 'mergepoint')):
185 185 ui.status(_('ignoring old cache\n'))
186 186 oldlog = []
187 187 break
188 188
189 189 ui.note(_('cache has %d log entries\n') % len(oldlog))
190 190 except Exception as e:
191 191 ui.note(_('error reading cache: %r\n') % e)
192 192
193 193 if oldlog:
194 194 date = oldlog[-1].date # last commit date as a (time,tz) tuple
195 195 date = util.datestr(date, '%Y/%m/%d %H:%M:%S %1%2')
196 196
197 197 # build the CVS commandline
198 198 cmd = ['cvs', '-q']
199 199 if root:
200 200 cmd.append('-d%s' % root)
201 201 p = util.normpath(getrepopath(root))
202 202 if not p.endswith('/'):
203 203 p += '/'
204 204 if prefix:
205 205 # looks like normpath replaces "" by "."
206 206 prefix = p + util.normpath(prefix)
207 207 else:
208 208 prefix = p
209 209 cmd.append(['log', 'rlog'][rlog])
210 210 if date:
211 211 # no space between option and date string
212 212 cmd.append('-d>%s' % date)
213 213 cmd.append(directory)
214 214
215 215 # state machine begins here
216 216 tags = {} # dictionary of revisions on current file with their tags
217 217 branchmap = {} # mapping between branch names and revision numbers
218 218 rcsmap = {}
219 219 state = 0
220 220 store = False # set when a new record can be appended
221 221
222 222 cmd = [util.shellquote(arg) for arg in cmd]
223 223 ui.note(_("running %s\n") % (' '.join(cmd)))
224 224 ui.debug("prefix=%r directory=%r root=%r\n" % (prefix, directory, root))
225 225
226 226 pfp = util.popen(' '.join(cmd))
227 227 peek = pfp.readline()
228 228 while True:
229 229 line = peek
230 230 if line == '':
231 231 break
232 232 peek = pfp.readline()
233 233 if line.endswith('\n'):
234 234 line = line[:-1]
235 235 #ui.debug('state=%d line=%r\n' % (state, line))
236 236
237 237 if state == 0:
238 238 # initial state, consume input until we see 'RCS file'
239 239 match = re_00.match(line)
240 240 if match:
241 241 rcs = match.group(1)
242 242 tags = {}
243 243 if rlog:
244 244 filename = util.normpath(rcs[:-2])
245 245 if filename.startswith(prefix):
246 246 filename = filename[len(prefix):]
247 247 if filename.startswith('/'):
248 248 filename = filename[1:]
249 249 if filename.startswith('Attic/'):
250 250 filename = filename[6:]
251 251 else:
252 252 filename = filename.replace('/Attic/', '/')
253 253 state = 2
254 254 continue
255 255 state = 1
256 256 continue
257 257 match = re_01.match(line)
258 258 if match:
259 259 raise logerror(match.group(1))
260 260 match = re_02.match(line)
261 261 if match:
262 262 raise logerror(match.group(2))
263 263 if re_03.match(line):
264 264 raise logerror(line)
265 265
266 266 elif state == 1:
267 267 # expect 'Working file' (only when using log instead of rlog)
268 268 match = re_10.match(line)
269 269 assert match, _('RCS file must be followed by working file')
270 270 filename = util.normpath(match.group(1))
271 271 state = 2
272 272
273 273 elif state == 2:
274 274 # expect 'symbolic names'
275 275 if re_20.match(line):
276 276 branchmap = {}
277 277 state = 3
278 278
279 279 elif state == 3:
280 280 # read the symbolic names and store as tags
281 281 match = re_30.match(line)
282 282 if match:
283 283 rev = [int(x) for x in match.group(2).split('.')]
284 284
285 285 # Convert magic branch number to an odd-numbered one
286 286 revn = len(rev)
287 287 if revn > 3 and (revn % 2) == 0 and rev[-2] == 0:
288 288 rev = rev[:-2] + rev[-1:]
289 289 rev = tuple(rev)
290 290
291 291 if rev not in tags:
292 292 tags[rev] = []
293 293 tags[rev].append(match.group(1))
294 294 branchmap[match.group(1)] = match.group(2)
295 295
296 296 elif re_31.match(line):
297 297 state = 5
298 298 elif re_32.match(line):
299 299 state = 0
300 300
301 301 elif state == 4:
302 302 # expecting '------' separator before first revision
303 303 if re_31.match(line):
304 304 state = 5
305 305 else:
306 306 assert not re_32.match(line), _('must have at least '
307 307 'some revisions')
308 308
309 309 elif state == 5:
310 310 # expecting revision number and possibly (ignored) lock indication
311 311 # we create the logentry here from values stored in states 0 to 4,
312 312 # as this state is re-entered for subsequent revisions of a file.
313 313 match = re_50.match(line)
314 314 assert match, _('expected revision number')
315 315 e = logentry(rcs=scache(rcs),
316 316 file=scache(filename),
317 317 revision=tuple([int(x) for x in
318 318 match.group(1).split('.')]),
319 319 branches=[],
320 320 parent=None,
321 321 commitid=None,
322 322 mergepoint=None,
323 323 branchpoints=set())
324 324
325 325 state = 6
326 326
327 327 elif state == 6:
328 328 # expecting date, author, state, lines changed
329 329 match = re_60.match(line)
330 330 assert match, _('revision must be followed by date line')
331 331 d = match.group(1)
332 332 if d[2] == '/':
333 333 # Y2K
334 334 d = '19' + d
335 335
336 336 if len(d.split()) != 3:
337 337 # cvs log dates always in GMT
338 338 d = d + ' UTC'
339 339 e.date = util.parsedate(d, ['%y/%m/%d %H:%M:%S',
340 340 '%Y/%m/%d %H:%M:%S',
341 341 '%Y-%m-%d %H:%M:%S'])
342 342 e.author = scache(match.group(2))
343 343 e.dead = match.group(3).lower() == 'dead'
344 344
345 345 if match.group(5):
346 346 if match.group(6):
347 347 e.lines = (int(match.group(5)), int(match.group(6)))
348 348 else:
349 349 e.lines = (int(match.group(5)), 0)
350 350 elif match.group(6):
351 351 e.lines = (0, int(match.group(6)))
352 352 else:
353 353 e.lines = None
354 354
355 355 if match.group(7): # cvs 1.12 commitid
356 356 e.commitid = match.group(8)
357 357
358 358 if match.group(9): # cvsnt mergepoint
359 359 myrev = match.group(10).split('.')
360 360 if len(myrev) == 2: # head
361 361 e.mergepoint = 'HEAD'
362 362 else:
363 363 myrev = '.'.join(myrev[:-2] + ['0', myrev[-2]])
364 364 branches = [b for b in branchmap if branchmap[b] == myrev]
365 365 assert len(branches) == 1, ('unknown branch: %s'
366 366 % e.mergepoint)
367 367 e.mergepoint = branches[0]
368 368
369 369 e.comment = []
370 370 state = 7
371 371
372 372 elif state == 7:
373 373 # read the revision numbers of branches that start at this revision
374 374 # or store the commit log message otherwise
375 375 m = re_70.match(line)
376 376 if m:
377 377 e.branches = [tuple([int(y) for y in x.strip().split('.')])
378 378 for x in m.group(1).split(';')]
379 379 state = 8
380 380 elif re_31.match(line) and re_50.match(peek):
381 381 state = 5
382 382 store = True
383 383 elif re_32.match(line):
384 384 state = 0
385 385 store = True
386 386 else:
387 387 e.comment.append(line)
388 388
389 389 elif state == 8:
390 390 # store commit log message
391 391 if re_31.match(line):
392 392 cpeek = peek
393 393 if cpeek.endswith('\n'):
394 394 cpeek = cpeek[:-1]
395 395 if re_50.match(cpeek):
396 396 state = 5
397 397 store = True
398 398 else:
399 399 e.comment.append(line)
400 400 elif re_32.match(line):
401 401 state = 0
402 402 store = True
403 403 else:
404 404 e.comment.append(line)
405 405
406 406 # When a file is added on a branch B1, CVS creates a synthetic
407 407 # dead trunk revision 1.1 so that the branch has a root.
408 408 # Likewise, if you merge such a file to a later branch B2 (one
409 409 # that already existed when the file was added on B1), CVS
410 410 # creates a synthetic dead revision 1.1.x.1 on B2. Don't drop
411 411 # these revisions now, but mark them synthetic so
412 412 # createchangeset() can take care of them.
413 413 if (store and
414 414 e.dead and
415 415 e.revision[-1] == 1 and # 1.1 or 1.1.x.1
416 416 len(e.comment) == 1 and
417 417 file_added_re.match(e.comment[0])):
418 418 ui.debug('found synthetic revision in %s: %r\n'
419 419 % (e.rcs, e.comment[0]))
420 420 e.synthetic = True
421 421
422 422 if store:
423 423 # clean up the results and save in the log.
424 424 store = False
425 425 e.tags = sorted([scache(x) for x in tags.get(e.revision, [])])
426 426 e.comment = scache('\n'.join(e.comment))
427 427
428 428 revn = len(e.revision)
429 429 if revn > 3 and (revn % 2) == 0:
430 430 e.branch = tags.get(e.revision[:-1], [None])[0]
431 431 else:
432 432 e.branch = None
433 433
434 434 # find the branches starting from this revision
435 435 branchpoints = set()
436 436 for branch, revision in branchmap.iteritems():
437 437 revparts = tuple([int(i) for i in revision.split('.')])
438 438 if len(revparts) < 2: # bad tags
439 439 continue
440 440 if revparts[-2] == 0 and revparts[-1] % 2 == 0:
441 441 # normal branch
442 442 if revparts[:-2] == e.revision:
443 443 branchpoints.add(branch)
444 444 elif revparts == (1, 1, 1): # vendor branch
445 445 if revparts in e.branches:
446 446 branchpoints.add(branch)
447 447 e.branchpoints = branchpoints
448 448
449 449 log.append(e)
450 450
451 451 rcsmap[e.rcs.replace('/Attic/', '/')] = e.rcs
452 452
453 453 if len(log) % 100 == 0:
454 454 ui.status(util.ellipsis('%d %s' % (len(log), e.file), 80)+'\n')
455 455
456 456 log.sort(key=lambda x: (x.rcs, x.revision))
457 457
458 458 # find parent revisions of individual files
459 459 versions = {}
460 460 for e in sorted(oldlog, key=lambda x: (x.rcs, x.revision)):
461 461 rcs = e.rcs.replace('/Attic/', '/')
462 462 if rcs in rcsmap:
463 463 e.rcs = rcsmap[rcs]
464 464 branch = e.revision[:-1]
465 465 versions[(e.rcs, branch)] = e.revision
466 466
467 467 for e in log:
468 468 branch = e.revision[:-1]
469 469 p = versions.get((e.rcs, branch), None)
470 470 if p is None:
471 471 p = e.revision[:-2]
472 472 e.parent = p
473 473 versions[(e.rcs, branch)] = e.revision
474 474
475 475 # update the log cache
476 476 if cache:
477 477 if log:
478 478 # join up the old and new logs
479 479 log.sort(key=lambda x: x.date)
480 480
481 481 if oldlog and oldlog[-1].date >= log[0].date:
482 482 raise logerror(_('log cache overlaps with new log entries,'
483 483 ' re-run without cache.'))
484 484
485 485 log = oldlog + log
486 486
487 487 # write the new cachefile
488 488 ui.note(_('writing cvs log cache %s\n') % cachefile)
489 489 pickle.dump(log, open(cachefile, 'wb'))
490 490 else:
491 491 log = oldlog
492 492
493 493 ui.status(_('%d log entries\n') % len(log))
494 494
495 495 encodings = ui.configlist('convert', 'cvsps.logencoding')
496 496 if encodings:
497 497 def revstr(r):
498 498 # this is needed, because logentry.revision is a tuple of "int"
499 499 # (e.g. (1, 2) for "1.2")
500 500 return '.'.join(pycompat.maplist(pycompat.bytestr, r))
501 501
502 502 for entry in log:
503 503 comment = entry.comment
504 504 for e in encodings:
505 505 try:
506 506 entry.comment = comment.decode(e).encode('utf-8')
507 507 if ui.debugflag:
508 508 ui.debug("transcoding by %s: %s of %s\n" %
509 509 (e, revstr(entry.revision), entry.file))
510 510 break
511 511 except UnicodeDecodeError:
512 512 pass # try next encoding
513 513 except LookupError as inst: # unknown encoding, maybe
514 514 raise error.Abort(inst,
515 515 hint=_('check convert.cvsps.logencoding'
516 516 ' configuration'))
517 517 else:
518 518 raise error.Abort(_("no encoding can transcode"
519 519 " CVS log message for %s of %s")
520 520 % (revstr(entry.revision), entry.file),
521 521 hint=_('check convert.cvsps.logencoding'
522 522 ' configuration'))
523 523
524 524 hook.hook(ui, None, "cvslog", True, log=log)
525 525
526 526 return log
527 527
528 528
529 529 class changeset(object):
530 530 '''Class changeset has the following attributes:
531 531 .id - integer identifying this changeset (list index)
532 532 .author - author name as CVS knows it
533 533 .branch - name of branch this changeset is on, or None
534 534 .comment - commit message
535 535 .commitid - CVS commitid or None
536 536 .date - the commit date as a (time,tz) tuple
537 537 .entries - list of logentry objects in this changeset
538 538 .parents - list of one or two parent changesets
539 539 .tags - list of tags on this changeset
540 540 .synthetic - from synthetic revision "file ... added on branch ..."
541 541 .mergepoint- the branch that has been merged from or None
542 542 .branchpoints- the branches that start at the current entry or empty
543 543 '''
544 544 def __init__(self, **entries):
545 545 self.id = None
546 546 self.synthetic = False
547 547 self.__dict__.update(entries)
548 548
549 549 def __repr__(self):
550 550 items = ("%s=%r"%(k, self.__dict__[k]) for k in sorted(self.__dict__))
551 551 return "%s(%s)"%(type(self).__name__, ", ".join(items))
552 552
553 553 def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None):
554 554 '''Convert log into changesets.'''
555 555
556 556 ui.status(_('creating changesets\n'))
557 557
558 558 # try to order commitids by date
559 559 mindate = {}
560 560 for e in log:
561 561 if e.commitid:
562 562 mindate[e.commitid] = min(e.date, mindate.get(e.commitid))
563 563
564 564 # Merge changesets
565 565 log.sort(key=lambda x: (mindate.get(x.commitid), x.commitid, x.comment,
566 566 x.author, x.branch, x.date, x.branchpoints))
567 567
568 568 changesets = []
569 569 files = set()
570 570 c = None
571 571 for i, e in enumerate(log):
572 572
573 573 # Check if log entry belongs to the current changeset or not.
574 574
575 575 # Since CVS is file-centric, two different file revisions with
576 576 # different branchpoints should be treated as belonging to two
577 577 # different changesets (and the ordering is important and not
578 578 # honoured by cvsps at this point).
579 579 #
580 580 # Consider the following case:
581 581 # foo 1.1 branchpoints: [MYBRANCH]
582 582 # bar 1.1 branchpoints: [MYBRANCH, MYBRANCH2]
583 583 #
584 584 # Here foo is part only of MYBRANCH, but not MYBRANCH2, e.g. a
585 585 # later version of foo may be in MYBRANCH2, so foo should be the
586 586 # first changeset and bar the next and MYBRANCH and MYBRANCH2
587 587 # should both start off of the bar changeset. No provisions are
588 588 # made to ensure that this is, in fact, what happens.
589 589 if not (c and e.branchpoints == c.branchpoints and
590 590 (# cvs commitids
591 591 (e.commitid is not None and e.commitid == c.commitid) or
592 592 (# no commitids, use fuzzy commit detection
593 593 (e.commitid is None or c.commitid is None) and
594 594 e.comment == c.comment and
595 595 e.author == c.author and
596 596 e.branch == c.branch and
597 597 ((c.date[0] + c.date[1]) <=
598 598 (e.date[0] + e.date[1]) <=
599 599 (c.date[0] + c.date[1]) + fuzz) and
600 600 e.file not in files))):
601 601 c = changeset(comment=e.comment, author=e.author,
602 602 branch=e.branch, date=e.date,
603 603 entries=[], mergepoint=e.mergepoint,
604 604 branchpoints=e.branchpoints, commitid=e.commitid)
605 605 changesets.append(c)
606 606
607 607 files = set()
608 608 if len(changesets) % 100 == 0:
609 609 t = '%d %s' % (len(changesets), repr(e.comment)[1:-1])
610 610 ui.status(util.ellipsis(t, 80) + '\n')
611 611
612 612 c.entries.append(e)
613 613 files.add(e.file)
614 614 c.date = e.date # changeset date is date of latest commit in it
615 615
616 616 # Mark synthetic changesets
617 617
618 618 for c in changesets:
619 619 # Synthetic revisions always get their own changeset, because
620 620 # the log message includes the filename. E.g. if you add file3
621 621 # and file4 on a branch, you get four log entries and three
622 622 # changesets:
623 623 # "File file3 was added on branch ..." (synthetic, 1 entry)
624 624 # "File file4 was added on branch ..." (synthetic, 1 entry)
625 625 # "Add file3 and file4 to fix ..." (real, 2 entries)
626 626 # Hence the check for 1 entry here.
627 627 c.synthetic = len(c.entries) == 1 and c.entries[0].synthetic
628 628
629 629 # Sort files in each changeset
630 630
631 631 def entitycompare(l, r):
632 632 'Mimic cvsps sorting order'
633 633 l = l.file.split('/')
634 634 r = r.file.split('/')
635 635 nl = len(l)
636 636 nr = len(r)
637 637 n = min(nl, nr)
638 638 for i in range(n):
639 639 if i + 1 == nl and nl < nr:
640 640 return -1
641 641 elif i + 1 == nr and nl > nr:
642 642 return +1
643 643 elif l[i] < r[i]:
644 644 return -1
645 645 elif l[i] > r[i]:
646 646 return +1
647 647 return 0
648 648
649 649 for c in changesets:
650 650 c.entries.sort(entitycompare)
651 651
652 652 # Sort changesets by date
653 653
654 654 odd = set()
655 655 def cscmp(l, r):
656 656 d = sum(l.date) - sum(r.date)
657 657 if d:
658 658 return d
659 659
660 660 # detect vendor branches and initial commits on a branch
661 661 le = {}
662 662 for e in l.entries:
663 663 le[e.rcs] = e.revision
664 664 re = {}
665 665 for e in r.entries:
666 666 re[e.rcs] = e.revision
667 667
668 668 d = 0
669 669 for e in l.entries:
670 670 if re.get(e.rcs, None) == e.parent:
671 671 assert not d
672 672 d = 1
673 673 break
674 674
675 675 for e in r.entries:
676 676 if le.get(e.rcs, None) == e.parent:
677 677 if d:
678 678 odd.add((l, r))
679 679 d = -1
680 680 break
681 681 # By this point, the changesets are sufficiently compared that
682 682 # we don't really care about ordering. However, this leaves
683 683 # some race conditions in the tests, so we compare on the
684 684 # number of files modified, the files contained in each
685 685 # changeset, and the branchpoints in the change to ensure test
686 686 # output remains stable.
687 687
688 688 # recommended replacement for cmp from
689 689 # https://docs.python.org/3.0/whatsnew/3.0.html
690 690 c = lambda x, y: (x > y) - (x < y)
691 691 # Sort bigger changes first.
692 692 if not d:
693 693 d = c(len(l.entries), len(r.entries))
694 694 # Try sorting by filename in the change.
695 695 if not d:
696 696 d = c([e.file for e in l.entries], [e.file for e in r.entries])
697 697 # Try and put changes without a branch point before ones with
698 698 # a branch point.
699 699 if not d:
700 700 d = c(len(l.branchpoints), len(r.branchpoints))
701 701 return d
702 702
703 703 changesets.sort(cscmp)
704 704
705 705 # Collect tags
706 706
707 707 globaltags = {}
708 708 for c in changesets:
709 709 for e in c.entries:
710 710 for tag in e.tags:
711 711 # remember which is the latest changeset to have this tag
712 712 globaltags[tag] = c
713 713
714 714 for c in changesets:
715 715 tags = set()
716 716 for e in c.entries:
717 717 tags.update(e.tags)
718 718 # remember tags only if this is the latest changeset to have it
719 719 c.tags = sorted(tag for tag in tags if globaltags[tag] is c)
720 720
721 721 # Find parent changesets, handle {{mergetobranch BRANCHNAME}}
722 722 # by inserting dummy changesets with two parents, and handle
723 723 # {{mergefrombranch BRANCHNAME}} by setting two parents.
724 724
725 725 if mergeto is None:
726 726 mergeto = r'{{mergetobranch ([-\w]+)}}'
727 727 if mergeto:
728 728 mergeto = re.compile(mergeto)
729 729
730 730 if mergefrom is None:
731 731 mergefrom = r'{{mergefrombranch ([-\w]+)}}'
732 732 if mergefrom:
733 733 mergefrom = re.compile(mergefrom)
734 734
735 735 versions = {} # changeset index where we saw any particular file version
736 736 branches = {} # changeset index where we saw a branch
737 737 n = len(changesets)
738 738 i = 0
739 739 while i < n:
740 740 c = changesets[i]
741 741
742 742 for f in c.entries:
743 743 versions[(f.rcs, f.revision)] = i
744 744
745 745 p = None
746 746 if c.branch in branches:
747 747 p = branches[c.branch]
748 748 else:
749 749 # first changeset on a new branch
750 750 # the parent is a changeset with the branch in its
751 751 # branchpoints such that it is the latest possible
752 752 # commit without any intervening, unrelated commits.
753 753
754 754 for candidate in xrange(i):
755 755 if c.branch not in changesets[candidate].branchpoints:
756 756 if p is not None:
757 757 break
758 758 continue
759 759 p = candidate
760 760
761 761 c.parents = []
762 762 if p is not None:
763 763 p = changesets[p]
764 764
765 765 # Ensure no changeset has a synthetic changeset as a parent.
766 766 while p.synthetic:
767 767 assert len(p.parents) <= 1, \
768 768 _('synthetic changeset cannot have multiple parents')
769 769 if p.parents:
770 770 p = p.parents[0]
771 771 else:
772 772 p = None
773 773 break
774 774
775 775 if p is not None:
776 776 c.parents.append(p)
777 777
778 778 if c.mergepoint:
779 779 if c.mergepoint == 'HEAD':
780 780 c.mergepoint = None
781 781 c.parents.append(changesets[branches[c.mergepoint]])
782 782
783 783 if mergefrom:
784 784 m = mergefrom.search(c.comment)
785 785 if m:
786 786 m = m.group(1)
787 787 if m == 'HEAD':
788 788 m = None
789 789 try:
790 790 candidate = changesets[branches[m]]
791 791 except KeyError:
792 792 ui.warn(_("warning: CVS commit message references "
793 793 "non-existent branch %r:\n%s\n")
794 794 % (m, c.comment))
795 795 if m in branches and c.branch != m and not candidate.synthetic:
796 796 c.parents.append(candidate)
797 797
798 798 if mergeto:
799 799 m = mergeto.search(c.comment)
800 800 if m:
801 801 if m.groups():
802 802 m = m.group(1)
803 803 if m == 'HEAD':
804 804 m = None
805 805 else:
806 806 m = None # if no group found then merge to HEAD
807 807 if m in branches and c.branch != m:
808 808 # insert empty changeset for merge
809 809 cc = changeset(
810 810 author=c.author, branch=m, date=c.date,
811 811 comment='convert-repo: CVS merge from branch %s'
812 812 % c.branch,
813 813 entries=[], tags=[],
814 814 parents=[changesets[branches[m]], c])
815 815 changesets.insert(i + 1, cc)
816 816 branches[m] = i + 1
817 817
818 818 # adjust our loop counters now we have inserted a new entry
819 819 n += 1
820 820 i += 2
821 821 continue
822 822
823 823 branches[c.branch] = i
824 824 i += 1
825 825
826 826 # Drop synthetic changesets (safe now that we have ensured no other
827 827 # changesets can have them as parents).
828 828 i = 0
829 829 while i < len(changesets):
830 830 if changesets[i].synthetic:
831 831 del changesets[i]
832 832 else:
833 833 i += 1
834 834
835 835 # Number changesets
836 836
837 837 for i, c in enumerate(changesets):
838 838 c.id = i + 1
839 839
840 840 if odd:
841 841 for l, r in odd:
842 842 if l.id is not None and r.id is not None:
843 843 ui.warn(_('changeset %d is both before and after %d\n')
844 844 % (l.id, r.id))
845 845
846 846 ui.status(_('%d changeset entries\n') % len(changesets))
847 847
848 848 hook.hook(ui, None, "cvschangesets", True, changesets=changesets)
849 849
850 850 return changesets
851 851
852 852
853 853 def debugcvsps(ui, *args, **opts):
854 854 '''Read CVS rlog for current directory or named path in
855 855 repository, and convert the log to changesets based on matching
856 856 commit log entries and dates.
857 857 '''
858 858 opts = pycompat.byteskwargs(opts)
859 859 if opts["new_cache"]:
860 860 cache = "write"
861 861 elif opts["update_cache"]:
862 862 cache = "update"
863 863 else:
864 864 cache = None
865 865
866 866 revisions = opts["revisions"]
867 867
868 868 try:
869 869 if args:
870 870 log = []
871 871 for d in args:
872 872 log += createlog(ui, d, root=opts["root"], cache=cache)
873 873 else:
874 874 log = createlog(ui, root=opts["root"], cache=cache)
875 875 except logerror as e:
876 876 ui.write("%r\n"%e)
877 877 return
878 878
879 879 changesets = createchangeset(ui, log, opts["fuzz"])
880 880 del log
881 881
882 882 # Print changesets (optionally filtered)
883 883
884 884 off = len(revisions)
885 885 branches = {} # latest version number in each branch
886 886 ancestors = {} # parent branch
887 887 for cs in changesets:
888 888
889 889 if opts["ancestors"]:
890 890 if cs.branch not in branches and cs.parents and cs.parents[0].id:
891 891 ancestors[cs.branch] = (changesets[cs.parents[0].id - 1].branch,
892 892 cs.parents[0].id)
893 893 branches[cs.branch] = cs.id
894 894
895 895 # limit by branches
896 896 if opts["branches"] and (cs.branch or 'HEAD') not in opts["branches"]:
897 897 continue
898 898
899 899 if not off:
900 900 # Note: trailing spaces on several lines here are needed to have
901 901 # bug-for-bug compatibility with cvsps.
902 902 ui.write('---------------------\n')
903 903 ui.write(('PatchSet %d \n' % cs.id))
904 904 ui.write(('Date: %s\n' % util.datestr(cs.date,
905 905 '%Y/%m/%d %H:%M:%S %1%2')))
906 906 ui.write(('Author: %s\n' % cs.author))
907 907 ui.write(('Branch: %s\n' % (cs.branch or 'HEAD')))
908 908 ui.write(('Tag%s: %s \n' % (['', 's'][len(cs.tags) > 1],
909 909 ','.join(cs.tags) or '(none)')))
910 910 if cs.branchpoints:
911 911 ui.write(('Branchpoints: %s \n') %
912 912 ', '.join(sorted(cs.branchpoints)))
913 913 if opts["parents"] and cs.parents:
914 914 if len(cs.parents) > 1:
915 915 ui.write(('Parents: %s\n' %
916 916 (','.join([str(p.id) for p in cs.parents]))))
917 917 else:
918 918 ui.write(('Parent: %d\n' % cs.parents[0].id))
919 919
920 920 if opts["ancestors"]:
921 921 b = cs.branch
922 922 r = []
923 923 while b:
924 924 b, c = ancestors[b]
925 925 r.append('%s:%d:%d' % (b or "HEAD", c, branches[b]))
926 926 if r:
927 927 ui.write(('Ancestors: %s\n' % (','.join(r))))
928 928
929 929 ui.write(('Log:\n'))
930 930 ui.write('%s\n\n' % cs.comment)
931 931 ui.write(('Members: \n'))
932 932 for f in cs.entries:
933 933 fn = f.file
934 934 if fn.startswith(opts["prefix"]):
935 935 fn = fn[len(opts["prefix"]):]
936 936 ui.write('\t%s:%s->%s%s \n' % (
937 937 fn, '.'.join([str(x) for x in f.parent]) or 'INITIAL',
938 938 '.'.join([str(x) for x in f.revision]),
939 939 ['', '(DEAD)'][f.dead]))
940 940 ui.write('\n')
941 941
942 942 # have we seen the start tag?
943 943 if revisions and off:
944 944 if revisions[0] == str(cs.id) or \
945 945 revisions[0] in cs.tags:
946 946 off = False
947 947
948 948 # see if we reached the end tag
949 949 if len(revisions) > 1 and not off:
950 950 if revisions[1] == str(cs.id) or \
951 951 revisions[1] in cs.tags:
952 952 break
@@ -1,1356 +1,1356 b''
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4 from __future__ import absolute_import
5 5
6 6 import os
7 7 import re
8 8 import tempfile
9 9 import xml.dom.minidom
10 10
11 11 from mercurial.i18n import _
12 12 from mercurial import (
13 13 encoding,
14 14 error,
15 15 pycompat,
16 16 util,
17 17 vfs as vfsmod,
18 18 )
19 19
20 20 from . import common
21 21
22 22 pickle = util.pickle
23 23 stringio = util.stringio
24 24 propertycache = util.propertycache
25 25 urlerr = util.urlerr
26 26 urlreq = util.urlreq
27 27
28 28 commandline = common.commandline
29 29 commit = common.commit
30 30 converter_sink = common.converter_sink
31 31 converter_source = common.converter_source
32 32 decodeargs = common.decodeargs
33 33 encodeargs = common.encodeargs
34 34 makedatetimestamp = common.makedatetimestamp
35 35 mapfile = common.mapfile
36 36 MissingTool = common.MissingTool
37 37 NoRepo = common.NoRepo
38 38
39 39 # Subversion stuff. Works best with very recent Python SVN bindings
40 40 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
41 41 # these bindings.
42 42
43 43 try:
44 44 import svn
45 45 import svn.client
46 46 import svn.core
47 47 import svn.ra
48 48 import svn.delta
49 49 from . import transport
50 50 import warnings
51 51 warnings.filterwarnings('ignore',
52 52 module='svn.core',
53 53 category=DeprecationWarning)
54 54 svn.core.SubversionException # trigger import to catch error
55 55
56 56 except ImportError:
57 57 svn = None
58 58
59 59 class SvnPathNotFound(Exception):
60 60 pass
61 61
62 62 def revsplit(rev):
63 63 """Parse a revision string and return (uuid, path, revnum).
64 64 >>> revsplit(b'svn:a2147622-4a9f-4db4-a8d3-13562ff547b2'
65 65 ... b'/proj%20B/mytrunk/mytrunk@1')
66 66 ('a2147622-4a9f-4db4-a8d3-13562ff547b2', '/proj%20B/mytrunk/mytrunk', 1)
67 67 >>> revsplit(b'svn:8af66a51-67f5-4354-b62c-98d67cc7be1d@1')
68 68 ('', '', 1)
69 69 >>> revsplit(b'@7')
70 70 ('', '', 7)
71 71 >>> revsplit(b'7')
72 72 ('', '', 0)
73 73 >>> revsplit(b'bad')
74 74 ('', '', 0)
75 75 """
76 76 parts = rev.rsplit('@', 1)
77 77 revnum = 0
78 78 if len(parts) > 1:
79 79 revnum = int(parts[1])
80 80 parts = parts[0].split('/', 1)
81 81 uuid = ''
82 82 mod = ''
83 83 if len(parts) > 1 and parts[0].startswith('svn:'):
84 84 uuid = parts[0][4:]
85 85 mod = '/' + parts[1]
86 86 return uuid, mod, revnum
87 87
88 88 def quote(s):
89 89 # As of svn 1.7, many svn calls expect "canonical" paths. In
90 90 # theory, we should call svn.core.*canonicalize() on all paths
91 91 # before passing them to the API. Instead, we assume the base url
92 92 # is canonical and copy the behaviour of svn URL encoding function
93 93 # so we can extend it safely with new components. The "safe"
94 94 # characters were taken from the "svn_uri__char_validity" table in
95 95 # libsvn_subr/path.c.
96 96 return urlreq.quote(s, "!$&'()*+,-./:=@_~")
97 97
98 98 def geturl(path):
99 99 try:
100 100 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
101 101 except svn.core.SubversionException:
102 102 # svn.client.url_from_path() fails with local repositories
103 103 pass
104 104 if os.path.isdir(path):
105 105 path = os.path.normpath(os.path.abspath(path))
106 106 if pycompat.iswindows:
107 107 path = '/' + util.normpath(path)
108 108 # Module URL is later compared with the repository URL returned
109 109 # by svn API, which is UTF-8.
110 110 path = encoding.tolocal(path)
111 111 path = 'file://%s' % quote(path)
112 112 return svn.core.svn_path_canonicalize(path)
113 113
114 114 def optrev(number):
115 115 optrev = svn.core.svn_opt_revision_t()
116 116 optrev.kind = svn.core.svn_opt_revision_number
117 117 optrev.value.number = number
118 118 return optrev
119 119
120 120 class changedpath(object):
121 121 def __init__(self, p):
122 122 self.copyfrom_path = p.copyfrom_path
123 123 self.copyfrom_rev = p.copyfrom_rev
124 124 self.action = p.action
125 125
126 126 def get_log_child(fp, url, paths, start, end, limit=0,
127 127 discover_changed_paths=True, strict_node_history=False):
128 128 protocol = -1
129 129 def receiver(orig_paths, revnum, author, date, message, pool):
130 130 paths = {}
131 131 if orig_paths is not None:
132 132 for k, v in orig_paths.iteritems():
133 133 paths[k] = changedpath(v)
134 134 pickle.dump((paths, revnum, author, date, message),
135 135 fp, protocol)
136 136
137 137 try:
138 138 # Use an ra of our own so that our parent can consume
139 139 # our results without confusing the server.
140 140 t = transport.SvnRaTransport(url=url)
141 141 svn.ra.get_log(t.ra, paths, start, end, limit,
142 142 discover_changed_paths,
143 143 strict_node_history,
144 144 receiver)
145 145 except IOError:
146 146 # Caller may interrupt the iteration
147 147 pickle.dump(None, fp, protocol)
148 148 except Exception as inst:
149 149 pickle.dump(str(inst), fp, protocol)
150 150 else:
151 151 pickle.dump(None, fp, protocol)
152 152 fp.close()
153 153 # With large history, cleanup process goes crazy and suddenly
154 154 # consumes *huge* amount of memory. The output file being closed,
155 155 # there is no need for clean termination.
156 156 os._exit(0)
157 157
158 158 def debugsvnlog(ui, **opts):
159 159 """Fetch SVN log in a subprocess and channel them back to parent to
160 160 avoid memory collection issues.
161 161 """
162 162 if svn is None:
163 163 raise error.Abort(_('debugsvnlog could not load Subversion python '
164 164 'bindings'))
165 165
166 166 args = decodeargs(ui.fin.read())
167 167 get_log_child(ui.fout, *args)
168 168
169 169 class logstream(object):
170 170 """Interruptible revision log iterator."""
171 171 def __init__(self, stdout):
172 172 self._stdout = stdout
173 173
174 174 def __iter__(self):
175 175 while True:
176 176 try:
177 177 entry = pickle.load(self._stdout)
178 178 except EOFError:
179 179 raise error.Abort(_('Mercurial failed to run itself, check'
180 180 ' hg executable is in PATH'))
181 181 try:
182 182 orig_paths, revnum, author, date, message = entry
183 183 except (TypeError, ValueError):
184 184 if entry is None:
185 185 break
186 186 raise error.Abort(_("log stream exception '%s'") % entry)
187 187 yield entry
188 188
189 189 def close(self):
190 190 if self._stdout:
191 191 self._stdout.close()
192 192 self._stdout = None
193 193
194 194 class directlogstream(list):
195 195 """Direct revision log iterator.
196 196 This can be used for debugging and development but it will probably leak
197 197 memory and is not suitable for real conversions."""
198 198 def __init__(self, url, paths, start, end, limit=0,
199 199 discover_changed_paths=True, strict_node_history=False):
200 200
201 201 def receiver(orig_paths, revnum, author, date, message, pool):
202 202 paths = {}
203 203 if orig_paths is not None:
204 204 for k, v in orig_paths.iteritems():
205 205 paths[k] = changedpath(v)
206 206 self.append((paths, revnum, author, date, message))
207 207
208 208 # Use an ra of our own so that our parent can consume
209 209 # our results without confusing the server.
210 210 t = transport.SvnRaTransport(url=url)
211 211 svn.ra.get_log(t.ra, paths, start, end, limit,
212 212 discover_changed_paths,
213 213 strict_node_history,
214 214 receiver)
215 215
216 216 def close(self):
217 217 pass
218 218
219 219 # Check to see if the given path is a local Subversion repo. Verify this by
220 220 # looking for several svn-specific files and directories in the given
221 221 # directory.
222 222 def filecheck(ui, path, proto):
223 223 for x in ('locks', 'hooks', 'format', 'db'):
224 224 if not os.path.exists(os.path.join(path, x)):
225 225 return False
226 226 return True
227 227
228 228 # Check to see if a given path is the root of an svn repo over http. We verify
229 229 # this by requesting a version-controlled URL we know can't exist and looking
230 230 # for the svn-specific "not found" XML.
231 231 def httpcheck(ui, path, proto):
232 232 try:
233 233 opener = urlreq.buildopener()
234 234 rsp = opener.open('%s://%s/!svn/ver/0/.svn' % (proto, path), 'rb')
235 235 data = rsp.read()
236 236 except urlerr.httperror as inst:
237 237 if inst.code != 404:
238 238 # Except for 404 we cannot know for sure this is not an svn repo
239 239 ui.warn(_('svn: cannot probe remote repository, assume it could '
240 240 'be a subversion repository. Use --source-type if you '
241 241 'know better.\n'))
242 242 return True
243 243 data = inst.fp.read()
244 244 except Exception:
245 245 # Could be urlerr.urlerror if the URL is invalid or anything else.
246 246 return False
247 247 return '<m:human-readable errcode="160013">' in data
248 248
249 249 protomap = {'http': httpcheck,
250 250 'https': httpcheck,
251 251 'file': filecheck,
252 252 }
253 253 def issvnurl(ui, url):
254 254 try:
255 255 proto, path = url.split('://', 1)
256 256 if proto == 'file':
257 257 if (pycompat.iswindows and path[:1] == '/'
258 258 and path[1:2].isalpha() and path[2:6].lower() == '%3a/'):
259 259 path = path[:2] + ':/' + path[6:]
260 260 path = urlreq.url2pathname(path)
261 261 except ValueError:
262 262 proto = 'file'
263 263 path = os.path.abspath(url)
264 264 if proto == 'file':
265 265 path = util.pconvert(path)
266 266 check = protomap.get(proto, lambda *args: False)
267 267 while '/' in path:
268 268 if check(ui, path, proto):
269 269 return True
270 270 path = path.rsplit('/', 1)[0]
271 271 return False
272 272
273 273 # SVN conversion code stolen from bzr-svn and tailor
274 274 #
275 275 # Subversion looks like a versioned filesystem, branches structures
276 276 # are defined by conventions and not enforced by the tool. First,
277 277 # we define the potential branches (modules) as "trunk" and "branches"
278 278 # children directories. Revisions are then identified by their
279 279 # module and revision number (and a repository identifier).
280 280 #
281 281 # The revision graph is really a tree (or a forest). By default, a
282 282 # revision parent is the previous revision in the same module. If the
283 283 # module directory is copied/moved from another module then the
284 284 # revision is the module root and its parent the source revision in
285 285 # the parent module. A revision has at most one parent.
286 286 #
287 287 class svn_source(converter_source):
288 288 def __init__(self, ui, repotype, url, revs=None):
289 289 super(svn_source, self).__init__(ui, repotype, url, revs=revs)
290 290
291 291 if not (url.startswith('svn://') or url.startswith('svn+ssh://') or
292 292 (os.path.exists(url) and
293 293 os.path.exists(os.path.join(url, '.svn'))) or
294 294 issvnurl(ui, url)):
295 295 raise NoRepo(_("%s does not look like a Subversion repository")
296 296 % url)
297 297 if svn is None:
298 298 raise MissingTool(_('could not load Subversion python bindings'))
299 299
300 300 try:
301 301 version = svn.core.SVN_VER_MAJOR, svn.core.SVN_VER_MINOR
302 302 if version < (1, 4):
303 303 raise MissingTool(_('Subversion python bindings %d.%d found, '
304 304 '1.4 or later required') % version)
305 305 except AttributeError:
306 306 raise MissingTool(_('Subversion python bindings are too old, 1.4 '
307 307 'or later required'))
308 308
309 309 self.lastrevs = {}
310 310
311 311 latest = None
312 312 try:
313 313 # Support file://path@rev syntax. Useful e.g. to convert
314 314 # deleted branches.
315 315 at = url.rfind('@')
316 316 if at >= 0:
317 317 latest = int(url[at + 1:])
318 318 url = url[:at]
319 319 except ValueError:
320 320 pass
321 321 self.url = geturl(url)
322 322 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
323 323 try:
324 324 self.transport = transport.SvnRaTransport(url=self.url)
325 325 self.ra = self.transport.ra
326 326 self.ctx = self.transport.client
327 327 self.baseurl = svn.ra.get_repos_root(self.ra)
328 328 # Module is either empty or a repository path starting with
329 329 # a slash and not ending with a slash.
330 330 self.module = urlreq.unquote(self.url[len(self.baseurl):])
331 331 self.prevmodule = None
332 332 self.rootmodule = self.module
333 333 self.commits = {}
334 334 self.paths = {}
335 335 self.uuid = svn.ra.get_uuid(self.ra)
336 336 except svn.core.SubversionException:
337 337 ui.traceback()
338 338 svnversion = '%d.%d.%d' % (svn.core.SVN_VER_MAJOR,
339 339 svn.core.SVN_VER_MINOR,
340 340 svn.core.SVN_VER_MICRO)
341 341 raise NoRepo(_("%s does not look like a Subversion repository "
342 342 "to libsvn version %s")
343 343 % (self.url, svnversion))
344 344
345 345 if revs:
346 346 if len(revs) > 1:
347 347 raise error.Abort(_('subversion source does not support '
348 348 'specifying multiple revisions'))
349 349 try:
350 350 latest = int(revs[0])
351 351 except ValueError:
352 352 raise error.Abort(_('svn: revision %s is not an integer') %
353 353 revs[0])
354 354
355 355 trunkcfg = self.ui.config('convert', 'svn.trunk')
356 356 if trunkcfg is None:
357 357 trunkcfg = 'trunk'
358 358 self.trunkname = trunkcfg.strip('/')
359 359 self.startrev = self.ui.config('convert', 'svn.startrev')
360 360 try:
361 361 self.startrev = int(self.startrev)
362 362 if self.startrev < 0:
363 363 self.startrev = 0
364 364 except ValueError:
365 365 raise error.Abort(_('svn: start revision %s is not an integer')
366 366 % self.startrev)
367 367
368 368 try:
369 369 self.head = self.latest(self.module, latest)
370 370 except SvnPathNotFound:
371 371 self.head = None
372 372 if not self.head:
373 373 raise error.Abort(_('no revision found in module %s')
374 374 % self.module)
375 375 self.last_changed = self.revnum(self.head)
376 376
377 377 self._changescache = (None, None)
378 378
379 379 if os.path.exists(os.path.join(url, '.svn/entries')):
380 380 self.wc = url
381 381 else:
382 382 self.wc = None
383 383 self.convertfp = None
384 384
385 385 def setrevmap(self, revmap):
386 386 lastrevs = {}
387 387 for revid in revmap:
388 388 uuid, module, revnum = revsplit(revid)
389 389 lastrevnum = lastrevs.setdefault(module, revnum)
390 390 if revnum > lastrevnum:
391 391 lastrevs[module] = revnum
392 392 self.lastrevs = lastrevs
393 393
394 394 def exists(self, path, optrev):
395 395 try:
396 396 svn.client.ls(self.url.rstrip('/') + '/' + quote(path),
397 397 optrev, False, self.ctx)
398 398 return True
399 399 except svn.core.SubversionException:
400 400 return False
401 401
402 402 def getheads(self):
403 403
404 404 def isdir(path, revnum):
405 405 kind = self._checkpath(path, revnum)
406 406 return kind == svn.core.svn_node_dir
407 407
408 408 def getcfgpath(name, rev):
409 409 cfgpath = self.ui.config('convert', 'svn.' + name)
410 410 if cfgpath is not None and cfgpath.strip() == '':
411 411 return None
412 412 path = (cfgpath or name).strip('/')
413 413 if not self.exists(path, rev):
414 414 if self.module.endswith(path) and name == 'trunk':
415 415 # we are converting from inside this directory
416 416 return None
417 417 if cfgpath:
418 418 raise error.Abort(_('expected %s to be at %r, but not found'
419 419 ) % (name, path))
420 420 return None
421 421 self.ui.note(_('found %s at %r\n') % (name, path))
422 422 return path
423 423
424 424 rev = optrev(self.last_changed)
425 425 oldmodule = ''
426 426 trunk = getcfgpath('trunk', rev)
427 427 self.tags = getcfgpath('tags', rev)
428 428 branches = getcfgpath('branches', rev)
429 429
430 430 # If the project has a trunk or branches, we will extract heads
431 431 # from them. We keep the project root otherwise.
432 432 if trunk:
433 433 oldmodule = self.module or ''
434 434 self.module += '/' + trunk
435 435 self.head = self.latest(self.module, self.last_changed)
436 436 if not self.head:
437 437 raise error.Abort(_('no revision found in module %s')
438 438 % self.module)
439 439
440 440 # First head in the list is the module's head
441 441 self.heads = [self.head]
442 442 if self.tags is not None:
443 443 self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
444 444
445 445 # Check if branches bring a few more heads to the list
446 446 if branches:
447 447 rpath = self.url.strip('/')
448 448 branchnames = svn.client.ls(rpath + '/' + quote(branches),
449 449 rev, False, self.ctx)
450 450 for branch in sorted(branchnames):
451 451 module = '%s/%s/%s' % (oldmodule, branches, branch)
452 452 if not isdir(module, self.last_changed):
453 453 continue
454 454 brevid = self.latest(module, self.last_changed)
455 455 if not brevid:
456 456 self.ui.note(_('ignoring empty branch %s\n') % branch)
457 457 continue
458 458 self.ui.note(_('found branch %s at %d\n') %
459 459 (branch, self.revnum(brevid)))
460 460 self.heads.append(brevid)
461 461
462 462 if self.startrev and self.heads:
463 463 if len(self.heads) > 1:
464 464 raise error.Abort(_('svn: start revision is not supported '
465 465 'with more than one branch'))
466 466 revnum = self.revnum(self.heads[0])
467 467 if revnum < self.startrev:
468 468 raise error.Abort(
469 469 _('svn: no revision found after start revision %d')
470 470 % self.startrev)
471 471
472 472 return self.heads
473 473
474 474 def _getchanges(self, rev, full):
475 475 (paths, parents) = self.paths[rev]
476 476 copies = {}
477 477 if parents:
478 478 files, self.removed, copies = self.expandpaths(rev, paths, parents)
479 479 if full or not parents:
480 480 # Perform a full checkout on roots
481 481 uuid, module, revnum = revsplit(rev)
482 482 entries = svn.client.ls(self.baseurl + quote(module),
483 483 optrev(revnum), True, self.ctx)
484 484 files = [n for n, e in entries.iteritems()
485 485 if e.kind == svn.core.svn_node_file]
486 486 self.removed = set()
487 487
488 488 files.sort()
489 489 files = zip(files, [rev] * len(files))
490 490 return (files, copies)
491 491
492 492 def getchanges(self, rev, full):
493 493 # reuse cache from getchangedfiles
494 494 if self._changescache[0] == rev and not full:
495 495 (files, copies) = self._changescache[1]
496 496 else:
497 497 (files, copies) = self._getchanges(rev, full)
498 498 # caller caches the result, so free it here to release memory
499 499 del self.paths[rev]
500 500 return (files, copies, set())
501 501
502 502 def getchangedfiles(self, rev, i):
503 503 # called from filemap - cache computed values for reuse in getchanges
504 504 (files, copies) = self._getchanges(rev, False)
505 505 self._changescache = (rev, (files, copies))
506 506 return [f[0] for f in files]
507 507
508 508 def getcommit(self, rev):
509 509 if rev not in self.commits:
510 510 uuid, module, revnum = revsplit(rev)
511 511 self.module = module
512 512 self.reparent(module)
513 513 # We assume that:
514 514 # - requests for revisions after "stop" come from the
515 515 # revision graph backward traversal. Cache all of them
516 516 # down to stop, they will be used eventually.
517 517 # - requests for revisions before "stop" come to get
518 518 # isolated branches parents. Just fetch what is needed.
519 519 stop = self.lastrevs.get(module, 0)
520 520 if revnum < stop:
521 521 stop = revnum + 1
522 522 self._fetch_revisions(revnum, stop)
523 523 if rev not in self.commits:
524 524 raise error.Abort(_('svn: revision %s not found') % revnum)
525 525 revcommit = self.commits[rev]
526 526 # caller caches the result, so free it here to release memory
527 527 del self.commits[rev]
528 528 return revcommit
529 529
530 530 def checkrevformat(self, revstr, mapname='splicemap'):
531 531 """ fails if revision format does not match the correct format"""
532 532 if not re.match(r'svn:[0-9a-f]{8,8}-[0-9a-f]{4,4}-'
533 533 r'[0-9a-f]{4,4}-[0-9a-f]{4,4}-[0-9a-f]'
534 534 r'{12,12}(.*)\@[0-9]+$',revstr):
535 535 raise error.Abort(_('%s entry %s is not a valid revision'
536 536 ' identifier') % (mapname, revstr))
537 537
538 538 def numcommits(self):
539 539 return int(self.head.rsplit('@', 1)[1]) - self.startrev
540 540
541 541 def gettags(self):
542 542 tags = {}
543 543 if self.tags is None:
544 544 return tags
545 545
546 546 # svn tags are just a convention, project branches left in a
547 547 # 'tags' directory. There is no other relationship than
548 548 # ancestry, which is expensive to discover and makes them hard
549 549 # to update incrementally. Worse, past revisions may be
550 550 # referenced by tags far away in the future, requiring a deep
551 551 # history traversal on every calculation. Current code
552 552 # performs a single backward traversal, tracking moves within
553 553 # the tags directory (tag renaming) and recording a new tag
554 554 # everytime a project is copied from outside the tags
555 555 # directory. It also lists deleted tags, this behaviour may
556 556 # change in the future.
557 557 pendings = []
558 558 tagspath = self.tags
559 559 start = svn.ra.get_latest_revnum(self.ra)
560 560 stream = self._getlog([self.tags], start, self.startrev)
561 561 try:
562 562 for entry in stream:
563 563 origpaths, revnum, author, date, message = entry
564 564 if not origpaths:
565 565 origpaths = []
566 566 copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
567 567 in origpaths.iteritems() if e.copyfrom_path]
568 568 # Apply moves/copies from more specific to general
569 569 copies.sort(reverse=True)
570 570
571 571 srctagspath = tagspath
572 572 if copies and copies[-1][2] == tagspath:
573 573 # Track tags directory moves
574 574 srctagspath = copies.pop()[0]
575 575
576 576 for source, sourcerev, dest in copies:
577 577 if not dest.startswith(tagspath + '/'):
578 578 continue
579 579 for tag in pendings:
580 580 if tag[0].startswith(dest):
581 581 tagpath = source + tag[0][len(dest):]
582 582 tag[:2] = [tagpath, sourcerev]
583 583 break
584 584 else:
585 585 pendings.append([source, sourcerev, dest])
586 586
587 587 # Filter out tags with children coming from different
588 588 # parts of the repository like:
589 589 # /tags/tag.1 (from /trunk:10)
590 590 # /tags/tag.1/foo (from /branches/foo:12)
591 591 # Here/tags/tag.1 discarded as well as its children.
592 592 # It happens with tools like cvs2svn. Such tags cannot
593 593 # be represented in mercurial.
594 594 addeds = dict((p, e.copyfrom_path) for p, e
595 595 in origpaths.iteritems()
596 596 if e.action == 'A' and e.copyfrom_path)
597 597 badroots = set()
598 598 for destroot in addeds:
599 599 for source, sourcerev, dest in pendings:
600 600 if (not dest.startswith(destroot + '/')
601 601 or source.startswith(addeds[destroot] + '/')):
602 602 continue
603 603 badroots.add(destroot)
604 604 break
605 605
606 606 for badroot in badroots:
607 607 pendings = [p for p in pendings if p[2] != badroot
608 608 and not p[2].startswith(badroot + '/')]
609 609
610 610 # Tell tag renamings from tag creations
611 611 renamings = []
612 612 for source, sourcerev, dest in pendings:
613 613 tagname = dest.split('/')[-1]
614 614 if source.startswith(srctagspath):
615 615 renamings.append([source, sourcerev, tagname])
616 616 continue
617 617 if tagname in tags:
618 618 # Keep the latest tag value
619 619 continue
620 620 # From revision may be fake, get one with changes
621 621 try:
622 622 tagid = self.latest(source, sourcerev)
623 623 if tagid and tagname not in tags:
624 624 tags[tagname] = tagid
625 625 except SvnPathNotFound:
626 626 # It happens when we are following directories
627 627 # we assumed were copied with their parents
628 628 # but were really created in the tag
629 629 # directory.
630 630 pass
631 631 pendings = renamings
632 632 tagspath = srctagspath
633 633 finally:
634 634 stream.close()
635 635 return tags
636 636
637 637 def converted(self, rev, destrev):
638 638 if not self.wc:
639 639 return
640 640 if self.convertfp is None:
641 641 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
642 642 'ab')
643 643 self.convertfp.write(util.tonativeeol('%s %d\n'
644 644 % (destrev, self.revnum(rev))))
645 645 self.convertfp.flush()
646 646
647 647 def revid(self, revnum, module=None):
648 648 return 'svn:%s%s@%s' % (self.uuid, module or self.module, revnum)
649 649
650 650 def revnum(self, rev):
651 651 return int(rev.split('@')[-1])
652 652
653 653 def latest(self, path, stop=None):
654 654 """Find the latest revid affecting path, up to stop revision
655 655 number. If stop is None, default to repository latest
656 656 revision. It may return a revision in a different module,
657 657 since a branch may be moved without a change being
658 658 reported. Return None if computed module does not belong to
659 659 rootmodule subtree.
660 660 """
661 661 def findchanges(path, start, stop=None):
662 662 stream = self._getlog([path], start, stop or 1)
663 663 try:
664 664 for entry in stream:
665 665 paths, revnum, author, date, message = entry
666 666 if stop is None and paths:
667 667 # We do not know the latest changed revision,
668 668 # keep the first one with changed paths.
669 669 break
670 670 if revnum <= stop:
671 671 break
672 672
673 673 for p in paths:
674 674 if (not path.startswith(p) or
675 675 not paths[p].copyfrom_path):
676 676 continue
677 677 newpath = paths[p].copyfrom_path + path[len(p):]
678 678 self.ui.debug("branch renamed from %s to %s at %d\n" %
679 679 (path, newpath, revnum))
680 680 path = newpath
681 681 break
682 682 if not paths:
683 683 revnum = None
684 684 return revnum, path
685 685 finally:
686 686 stream.close()
687 687
688 688 if not path.startswith(self.rootmodule):
689 689 # Requests on foreign branches may be forbidden at server level
690 690 self.ui.debug('ignoring foreign branch %r\n' % path)
691 691 return None
692 692
693 693 if stop is None:
694 694 stop = svn.ra.get_latest_revnum(self.ra)
695 695 try:
696 696 prevmodule = self.reparent('')
697 697 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
698 698 self.reparent(prevmodule)
699 699 except svn.core.SubversionException:
700 700 dirent = None
701 701 if not dirent:
702 702 raise SvnPathNotFound(_('%s not found up to revision %d')
703 703 % (path, stop))
704 704
705 705 # stat() gives us the previous revision on this line of
706 706 # development, but it might be in *another module*. Fetch the
707 707 # log and detect renames down to the latest revision.
708 708 revnum, realpath = findchanges(path, stop, dirent.created_rev)
709 709 if revnum is None:
710 710 # Tools like svnsync can create empty revision, when
711 711 # synchronizing only a subtree for instance. These empty
712 712 # revisions created_rev still have their original values
713 713 # despite all changes having disappeared and can be
714 714 # returned by ra.stat(), at least when stating the root
715 715 # module. In that case, do not trust created_rev and scan
716 716 # the whole history.
717 717 revnum, realpath = findchanges(path, stop)
718 718 if revnum is None:
719 719 self.ui.debug('ignoring empty branch %r\n' % realpath)
720 720 return None
721 721
722 722 if not realpath.startswith(self.rootmodule):
723 723 self.ui.debug('ignoring foreign branch %r\n' % realpath)
724 724 return None
725 725 return self.revid(revnum, realpath)
726 726
727 727 def reparent(self, module):
728 728 """Reparent the svn transport and return the previous parent."""
729 729 if self.prevmodule == module:
730 730 return module
731 731 svnurl = self.baseurl + quote(module)
732 732 prevmodule = self.prevmodule
733 733 if prevmodule is None:
734 734 prevmodule = ''
735 735 self.ui.debug("reparent to %s\n" % svnurl)
736 736 svn.ra.reparent(self.ra, svnurl)
737 737 self.prevmodule = module
738 738 return prevmodule
739 739
740 740 def expandpaths(self, rev, paths, parents):
741 741 changed, removed = set(), set()
742 742 copies = {}
743 743
744 744 new_module, revnum = revsplit(rev)[1:]
745 745 if new_module != self.module:
746 746 self.module = new_module
747 747 self.reparent(self.module)
748 748
749 749 for i, (path, ent) in enumerate(paths):
750 750 self.ui.progress(_('scanning paths'), i, item=path,
751 751 total=len(paths), unit=_('paths'))
752 752 entrypath = self.getrelpath(path)
753 753
754 754 kind = self._checkpath(entrypath, revnum)
755 755 if kind == svn.core.svn_node_file:
756 756 changed.add(self.recode(entrypath))
757 757 if not ent.copyfrom_path or not parents:
758 758 continue
759 759 # Copy sources not in parent revisions cannot be
760 760 # represented, ignore their origin for now
761 761 pmodule, prevnum = revsplit(parents[0])[1:]
762 762 if ent.copyfrom_rev < prevnum:
763 763 continue
764 764 copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
765 765 if not copyfrom_path:
766 766 continue
767 767 self.ui.debug("copied to %s from %s@%s\n" %
768 768 (entrypath, copyfrom_path, ent.copyfrom_rev))
769 769 copies[self.recode(entrypath)] = self.recode(copyfrom_path)
770 770 elif kind == 0: # gone, but had better be a deleted *file*
771 771 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
772 772 pmodule, prevnum = revsplit(parents[0])[1:]
773 773 parentpath = pmodule + "/" + entrypath
774 774 fromkind = self._checkpath(entrypath, prevnum, pmodule)
775 775
776 776 if fromkind == svn.core.svn_node_file:
777 777 removed.add(self.recode(entrypath))
778 778 elif fromkind == svn.core.svn_node_dir:
779 779 oroot = parentpath.strip('/')
780 780 nroot = path.strip('/')
781 781 children = self._iterfiles(oroot, prevnum)
782 782 for childpath in children:
783 783 childpath = childpath.replace(oroot, nroot)
784 784 childpath = self.getrelpath("/" + childpath, pmodule)
785 785 if childpath:
786 786 removed.add(self.recode(childpath))
787 787 else:
788 788 self.ui.debug('unknown path in revision %d: %s\n' % \
789 789 (revnum, path))
790 790 elif kind == svn.core.svn_node_dir:
791 791 if ent.action == 'M':
792 792 # If the directory just had a prop change,
793 793 # then we shouldn't need to look for its children.
794 794 continue
795 795 if ent.action == 'R' and parents:
796 796 # If a directory is replacing a file, mark the previous
797 797 # file as deleted
798 798 pmodule, prevnum = revsplit(parents[0])[1:]
799 799 pkind = self._checkpath(entrypath, prevnum, pmodule)
800 800 if pkind == svn.core.svn_node_file:
801 801 removed.add(self.recode(entrypath))
802 802 elif pkind == svn.core.svn_node_dir:
803 803 # We do not know what files were kept or removed,
804 804 # mark them all as changed.
805 805 for childpath in self._iterfiles(pmodule, prevnum):
806 806 childpath = self.getrelpath("/" + childpath)
807 807 if childpath:
808 808 changed.add(self.recode(childpath))
809 809
810 810 for childpath in self._iterfiles(path, revnum):
811 811 childpath = self.getrelpath("/" + childpath)
812 812 if childpath:
813 813 changed.add(self.recode(childpath))
814 814
815 815 # Handle directory copies
816 816 if not ent.copyfrom_path or not parents:
817 817 continue
818 818 # Copy sources not in parent revisions cannot be
819 819 # represented, ignore their origin for now
820 820 pmodule, prevnum = revsplit(parents[0])[1:]
821 821 if ent.copyfrom_rev < prevnum:
822 822 continue
823 823 copyfrompath = self.getrelpath(ent.copyfrom_path, pmodule)
824 824 if not copyfrompath:
825 825 continue
826 826 self.ui.debug("mark %s came from %s:%d\n"
827 827 % (path, copyfrompath, ent.copyfrom_rev))
828 828 children = self._iterfiles(ent.copyfrom_path, ent.copyfrom_rev)
829 829 for childpath in children:
830 830 childpath = self.getrelpath("/" + childpath, pmodule)
831 831 if not childpath:
832 832 continue
833 833 copytopath = path + childpath[len(copyfrompath):]
834 834 copytopath = self.getrelpath(copytopath)
835 835 copies[self.recode(copytopath)] = self.recode(childpath)
836 836
837 837 self.ui.progress(_('scanning paths'), None)
838 838 changed.update(removed)
839 839 return (list(changed), removed, copies)
840 840
841 841 def _fetch_revisions(self, from_revnum, to_revnum):
842 842 if from_revnum < to_revnum:
843 843 from_revnum, to_revnum = to_revnum, from_revnum
844 844
845 845 self.child_cset = None
846 846
847 847 def parselogentry(orig_paths, revnum, author, date, message):
848 848 """Return the parsed commit object or None, and True if
849 849 the revision is a branch root.
850 850 """
851 851 self.ui.debug("parsing revision %d (%d changes)\n" %
852 852 (revnum, len(orig_paths)))
853 853
854 854 branched = False
855 855 rev = self.revid(revnum)
856 856 # branch log might return entries for a parent we already have
857 857
858 858 if rev in self.commits or revnum < to_revnum:
859 859 return None, branched
860 860
861 861 parents = []
862 862 # check whether this revision is the start of a branch or part
863 863 # of a branch renaming
864 864 orig_paths = sorted(orig_paths.iteritems())
865 865 root_paths = [(p, e) for p, e in orig_paths
866 866 if self.module.startswith(p)]
867 867 if root_paths:
868 868 path, ent = root_paths[-1]
869 869 if ent.copyfrom_path:
870 870 branched = True
871 871 newpath = ent.copyfrom_path + self.module[len(path):]
872 872 # ent.copyfrom_rev may not be the actual last revision
873 873 previd = self.latest(newpath, ent.copyfrom_rev)
874 874 if previd is not None:
875 875 prevmodule, prevnum = revsplit(previd)[1:]
876 876 if prevnum >= self.startrev:
877 877 parents = [previd]
878 878 self.ui.note(
879 879 _('found parent of branch %s at %d: %s\n') %
880 880 (self.module, prevnum, prevmodule))
881 881 else:
882 882 self.ui.debug("no copyfrom path, don't know what to do.\n")
883 883
884 884 paths = []
885 885 # filter out unrelated paths
886 886 for path, ent in orig_paths:
887 887 if self.getrelpath(path) is None:
888 888 continue
889 889 paths.append((path, ent))
890 890
891 891 # Example SVN datetime. Includes microseconds.
892 892 # ISO-8601 conformant
893 893 # '2007-01-04T17:35:00.902377Z'
894 894 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
895 895 if self.ui.configbool('convert', 'localtimezone'):
896 896 date = makedatetimestamp(date[0])
897 897
898 898 if message:
899 899 log = self.recode(message)
900 900 else:
901 901 log = ''
902 902
903 903 if author:
904 904 author = self.recode(author)
905 905 else:
906 906 author = ''
907 907
908 908 try:
909 909 branch = self.module.split("/")[-1]
910 910 if branch == self.trunkname:
911 911 branch = None
912 912 except IndexError:
913 913 branch = None
914 914
915 915 cset = commit(author=author,
916 916 date=util.datestr(date, '%Y-%m-%d %H:%M:%S %1%2'),
917 917 desc=log,
918 918 parents=parents,
919 919 branch=branch,
920 920 rev=rev)
921 921
922 922 self.commits[rev] = cset
923 923 # The parents list is *shared* among self.paths and the
924 924 # commit object. Both will be updated below.
925 925 self.paths[rev] = (paths, cset.parents)
926 926 if self.child_cset and not self.child_cset.parents:
927 927 self.child_cset.parents[:] = [rev]
928 928 self.child_cset = cset
929 929 return cset, branched
930 930
931 931 self.ui.note(_('fetching revision log for "%s" from %d to %d\n') %
932 932 (self.module, from_revnum, to_revnum))
933 933
934 934 try:
935 935 firstcset = None
936 936 lastonbranch = False
937 937 stream = self._getlog([self.module], from_revnum, to_revnum)
938 938 try:
939 939 for entry in stream:
940 940 paths, revnum, author, date, message = entry
941 941 if revnum < self.startrev:
942 942 lastonbranch = True
943 943 break
944 944 if not paths:
945 945 self.ui.debug('revision %d has no entries\n' % revnum)
946 946 # If we ever leave the loop on an empty
947 947 # revision, do not try to get a parent branch
948 948 lastonbranch = lastonbranch or revnum == 0
949 949 continue
950 950 cset, lastonbranch = parselogentry(paths, revnum, author,
951 951 date, message)
952 952 if cset:
953 953 firstcset = cset
954 954 if lastonbranch:
955 955 break
956 956 finally:
957 957 stream.close()
958 958
959 959 if not lastonbranch and firstcset and not firstcset.parents:
960 960 # The first revision of the sequence (the last fetched one)
961 961 # has invalid parents if not a branch root. Find the parent
962 962 # revision now, if any.
963 963 try:
964 964 firstrevnum = self.revnum(firstcset.rev)
965 965 if firstrevnum > 1:
966 966 latest = self.latest(self.module, firstrevnum - 1)
967 967 if latest:
968 968 firstcset.parents.append(latest)
969 969 except SvnPathNotFound:
970 970 pass
971 971 except svn.core.SubversionException as xxx_todo_changeme:
972 972 (inst, num) = xxx_todo_changeme.args
973 973 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
974 974 raise error.Abort(_('svn: branch has no revision %s')
975 975 % to_revnum)
976 976 raise
977 977
978 978 def getfile(self, file, rev):
979 979 # TODO: ra.get_file transmits the whole file instead of diffs.
980 980 if file in self.removed:
981 981 return None, None
982 982 mode = ''
983 983 try:
984 984 new_module, revnum = revsplit(rev)[1:]
985 985 if self.module != new_module:
986 986 self.module = new_module
987 987 self.reparent(self.module)
988 988 io = stringio()
989 989 info = svn.ra.get_file(self.ra, file, revnum, io)
990 990 data = io.getvalue()
991 991 # ra.get_file() seems to keep a reference on the input buffer
992 992 # preventing collection. Release it explicitly.
993 993 io.close()
994 994 if isinstance(info, list):
995 995 info = info[-1]
996 996 mode = ("svn:executable" in info) and 'x' or ''
997 997 mode = ("svn:special" in info) and 'l' or mode
998 998 except svn.core.SubversionException as e:
999 999 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
1000 1000 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
1001 1001 if e.apr_err in notfound: # File not found
1002 1002 return None, None
1003 1003 raise
1004 1004 if mode == 'l':
1005 1005 link_prefix = "link "
1006 1006 if data.startswith(link_prefix):
1007 1007 data = data[len(link_prefix):]
1008 1008 return data, mode
1009 1009
1010 1010 def _iterfiles(self, path, revnum):
1011 1011 """Enumerate all files in path at revnum, recursively."""
1012 1012 path = path.strip('/')
1013 1013 pool = svn.core.Pool()
1014 1014 rpath = '/'.join([self.baseurl, quote(path)]).strip('/')
1015 1015 entries = svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool)
1016 1016 if path:
1017 1017 path += '/'
1018 1018 return ((path + p) for p, e in entries.iteritems()
1019 1019 if e.kind == svn.core.svn_node_file)
1020 1020
1021 1021 def getrelpath(self, path, module=None):
1022 1022 if module is None:
1023 1023 module = self.module
1024 1024 # Given the repository url of this wc, say
1025 1025 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
1026 1026 # extract the "entry" portion (a relative path) from what
1027 1027 # svn log --xml says, i.e.
1028 1028 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
1029 1029 # that is to say "tests/PloneTestCase.py"
1030 1030 if path.startswith(module):
1031 1031 relative = path.rstrip('/')[len(module):]
1032 1032 if relative.startswith('/'):
1033 1033 return relative[1:]
1034 1034 elif relative == '':
1035 1035 return relative
1036 1036
1037 1037 # The path is outside our tracked tree...
1038 1038 self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
1039 1039 return None
1040 1040
1041 1041 def _checkpath(self, path, revnum, module=None):
1042 1042 if module is not None:
1043 1043 prevmodule = self.reparent('')
1044 1044 path = module + '/' + path
1045 1045 try:
1046 1046 # ra.check_path does not like leading slashes very much, it leads
1047 1047 # to PROPFIND subversion errors
1048 1048 return svn.ra.check_path(self.ra, path.strip('/'), revnum)
1049 1049 finally:
1050 1050 if module is not None:
1051 1051 self.reparent(prevmodule)
1052 1052
1053 1053 def _getlog(self, paths, start, end, limit=0, discover_changed_paths=True,
1054 1054 strict_node_history=False):
1055 1055 # Normalize path names, svn >= 1.5 only wants paths relative to
1056 1056 # supplied URL
1057 1057 relpaths = []
1058 1058 for p in paths:
1059 1059 if not p.startswith('/'):
1060 1060 p = self.module + '/' + p
1061 1061 relpaths.append(p.strip('/'))
1062 1062 args = [self.baseurl, relpaths, start, end, limit,
1063 1063 discover_changed_paths, strict_node_history]
1064 1064 # developer config: convert.svn.debugsvnlog
1065 1065 if not self.ui.configbool('convert', 'svn.debugsvnlog'):
1066 1066 return directlogstream(*args)
1067 1067 arg = encodeargs(args)
1068 1068 hgexe = util.hgexecutable()
1069 1069 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
1070 1070 stdin, stdout = util.popen2(util.quotecommand(cmd))
1071 1071 stdin.write(arg)
1072 1072 try:
1073 1073 stdin.close()
1074 1074 except IOError:
1075 1075 raise error.Abort(_('Mercurial failed to run itself, check'
1076 1076 ' hg executable is in PATH'))
1077 1077 return logstream(stdout)
1078 1078
1079 1079 pre_revprop_change = '''#!/bin/sh
1080 1080
1081 1081 REPOS="$1"
1082 1082 REV="$2"
1083 1083 USER="$3"
1084 1084 PROPNAME="$4"
1085 1085 ACTION="$5"
1086 1086
1087 1087 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
1088 1088 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
1089 1089 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
1090 1090
1091 1091 echo "Changing prohibited revision property" >&2
1092 1092 exit 1
1093 1093 '''
1094 1094
1095 1095 class svn_sink(converter_sink, commandline):
1096 1096 commit_re = re.compile(r'Committed revision (\d+).', re.M)
1097 1097 uuid_re = re.compile(r'Repository UUID:\s*(\S+)', re.M)
1098 1098
1099 1099 def prerun(self):
1100 1100 if self.wc:
1101 1101 os.chdir(self.wc)
1102 1102
1103 1103 def postrun(self):
1104 1104 if self.wc:
1105 1105 os.chdir(self.cwd)
1106 1106
1107 1107 def join(self, name):
1108 1108 return os.path.join(self.wc, '.svn', name)
1109 1109
1110 1110 def revmapfile(self):
1111 1111 return self.join('hg-shamap')
1112 1112
1113 1113 def authorfile(self):
1114 1114 return self.join('hg-authormap')
1115 1115
1116 1116 def __init__(self, ui, repotype, path):
1117 1117
1118 1118 converter_sink.__init__(self, ui, repotype, path)
1119 1119 commandline.__init__(self, ui, 'svn')
1120 1120 self.delete = []
1121 1121 self.setexec = []
1122 1122 self.delexec = []
1123 1123 self.copies = []
1124 1124 self.wc = None
1125 1125 self.cwd = pycompat.getcwd()
1126 1126
1127 1127 created = False
1128 1128 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
1129 1129 self.wc = os.path.realpath(path)
1130 1130 self.run0('update')
1131 1131 else:
1132 if not re.search(r'^(file|http|https|svn|svn\+ssh)\://', path):
1132 if not re.search(br'^(file|http|https|svn|svn\+ssh)\://', path):
1133 1133 path = os.path.realpath(path)
1134 1134 if os.path.isdir(os.path.dirname(path)):
1135 1135 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
1136 1136 ui.status(_('initializing svn repository %r\n') %
1137 1137 os.path.basename(path))
1138 1138 commandline(ui, 'svnadmin').run0('create', path)
1139 1139 created = path
1140 1140 path = util.normpath(path)
1141 1141 if not path.startswith('/'):
1142 1142 path = '/' + path
1143 1143 path = 'file://' + path
1144 1144
1145 1145 wcpath = os.path.join(pycompat.getcwd(), os.path.basename(path) +
1146 1146 '-wc')
1147 1147 ui.status(_('initializing svn working copy %r\n')
1148 1148 % os.path.basename(wcpath))
1149 1149 self.run0('checkout', path, wcpath)
1150 1150
1151 1151 self.wc = wcpath
1152 1152 self.opener = vfsmod.vfs(self.wc)
1153 1153 self.wopener = vfsmod.vfs(self.wc)
1154 1154 self.childmap = mapfile(ui, self.join('hg-childmap'))
1155 1155 if util.checkexec(self.wc):
1156 1156 self.is_exec = util.isexec
1157 1157 else:
1158 1158 self.is_exec = None
1159 1159
1160 1160 if created:
1161 1161 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
1162 1162 fp = open(hook, 'wb')
1163 1163 fp.write(pre_revprop_change)
1164 1164 fp.close()
1165 1165 util.setflags(hook, False, True)
1166 1166
1167 1167 output = self.run0('info')
1168 1168 self.uuid = self.uuid_re.search(output).group(1).strip()
1169 1169
1170 1170 def wjoin(self, *names):
1171 1171 return os.path.join(self.wc, *names)
1172 1172
1173 1173 @propertycache
1174 1174 def manifest(self):
1175 1175 # As of svn 1.7, the "add" command fails when receiving
1176 1176 # already tracked entries, so we have to track and filter them
1177 1177 # ourselves.
1178 1178 m = set()
1179 1179 output = self.run0('ls', recursive=True, xml=True)
1180 1180 doc = xml.dom.minidom.parseString(output)
1181 1181 for e in doc.getElementsByTagName('entry'):
1182 1182 for n in e.childNodes:
1183 1183 if n.nodeType != n.ELEMENT_NODE or n.tagName != 'name':
1184 1184 continue
1185 1185 name = ''.join(c.data for c in n.childNodes
1186 1186 if c.nodeType == c.TEXT_NODE)
1187 1187 # Entries are compared with names coming from
1188 1188 # mercurial, so bytes with undefined encoding. Our
1189 1189 # best bet is to assume they are in local
1190 1190 # encoding. They will be passed to command line calls
1191 1191 # later anyway, so they better be.
1192 1192 m.add(encoding.unitolocal(name))
1193 1193 break
1194 1194 return m
1195 1195
1196 1196 def putfile(self, filename, flags, data):
1197 1197 if 'l' in flags:
1198 1198 self.wopener.symlink(data, filename)
1199 1199 else:
1200 1200 try:
1201 1201 if os.path.islink(self.wjoin(filename)):
1202 1202 os.unlink(filename)
1203 1203 except OSError:
1204 1204 pass
1205 1205 self.wopener.write(filename, data)
1206 1206
1207 1207 if self.is_exec:
1208 1208 if self.is_exec(self.wjoin(filename)):
1209 1209 if 'x' not in flags:
1210 1210 self.delexec.append(filename)
1211 1211 else:
1212 1212 if 'x' in flags:
1213 1213 self.setexec.append(filename)
1214 1214 util.setflags(self.wjoin(filename), False, 'x' in flags)
1215 1215
1216 1216 def _copyfile(self, source, dest):
1217 1217 # SVN's copy command pukes if the destination file exists, but
1218 1218 # our copyfile method expects to record a copy that has
1219 1219 # already occurred. Cross the semantic gap.
1220 1220 wdest = self.wjoin(dest)
1221 1221 exists = os.path.lexists(wdest)
1222 1222 if exists:
1223 1223 fd, tempname = tempfile.mkstemp(
1224 1224 prefix='hg-copy-', dir=os.path.dirname(wdest))
1225 1225 os.close(fd)
1226 1226 os.unlink(tempname)
1227 1227 os.rename(wdest, tempname)
1228 1228 try:
1229 1229 self.run0('copy', source, dest)
1230 1230 finally:
1231 1231 self.manifest.add(dest)
1232 1232 if exists:
1233 1233 try:
1234 1234 os.unlink(wdest)
1235 1235 except OSError:
1236 1236 pass
1237 1237 os.rename(tempname, wdest)
1238 1238
1239 1239 def dirs_of(self, files):
1240 1240 dirs = set()
1241 1241 for f in files:
1242 1242 if os.path.isdir(self.wjoin(f)):
1243 1243 dirs.add(f)
1244 1244 i = len(f)
1245 1245 for i in iter(lambda: f.rfind('/', 0, i), -1):
1246 1246 dirs.add(f[:i])
1247 1247 return dirs
1248 1248
1249 1249 def add_dirs(self, files):
1250 1250 add_dirs = [d for d in sorted(self.dirs_of(files))
1251 1251 if d not in self.manifest]
1252 1252 if add_dirs:
1253 1253 self.manifest.update(add_dirs)
1254 1254 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
1255 1255 return add_dirs
1256 1256
1257 1257 def add_files(self, files):
1258 1258 files = [f for f in files if f not in self.manifest]
1259 1259 if files:
1260 1260 self.manifest.update(files)
1261 1261 self.xargs(files, 'add', quiet=True)
1262 1262 return files
1263 1263
1264 1264 def addchild(self, parent, child):
1265 1265 self.childmap[parent] = child
1266 1266
1267 1267 def revid(self, rev):
1268 1268 return u"svn:%s@%s" % (self.uuid, rev)
1269 1269
1270 1270 def putcommit(self, files, copies, parents, commit, source, revmap, full,
1271 1271 cleanp2):
1272 1272 for parent in parents:
1273 1273 try:
1274 1274 return self.revid(self.childmap[parent])
1275 1275 except KeyError:
1276 1276 pass
1277 1277
1278 1278 # Apply changes to working copy
1279 1279 for f, v in files:
1280 1280 data, mode = source.getfile(f, v)
1281 1281 if data is None:
1282 1282 self.delete.append(f)
1283 1283 else:
1284 1284 self.putfile(f, mode, data)
1285 1285 if f in copies:
1286 1286 self.copies.append([copies[f], f])
1287 1287 if full:
1288 1288 self.delete.extend(sorted(self.manifest.difference(files)))
1289 1289 files = [f[0] for f in files]
1290 1290
1291 1291 entries = set(self.delete)
1292 1292 files = frozenset(files)
1293 1293 entries.update(self.add_dirs(files.difference(entries)))
1294 1294 if self.copies:
1295 1295 for s, d in self.copies:
1296 1296 self._copyfile(s, d)
1297 1297 self.copies = []
1298 1298 if self.delete:
1299 1299 self.xargs(self.delete, 'delete')
1300 1300 for f in self.delete:
1301 1301 self.manifest.remove(f)
1302 1302 self.delete = []
1303 1303 entries.update(self.add_files(files.difference(entries)))
1304 1304 if self.delexec:
1305 1305 self.xargs(self.delexec, 'propdel', 'svn:executable')
1306 1306 self.delexec = []
1307 1307 if self.setexec:
1308 1308 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1309 1309 self.setexec = []
1310 1310
1311 1311 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1312 1312 fp = os.fdopen(fd, pycompat.sysstr('wb'))
1313 1313 fp.write(util.tonativeeol(commit.desc))
1314 1314 fp.close()
1315 1315 try:
1316 1316 output = self.run0('commit',
1317 1317 username=util.shortuser(commit.author),
1318 1318 file=messagefile,
1319 1319 encoding='utf-8')
1320 1320 try:
1321 1321 rev = self.commit_re.search(output).group(1)
1322 1322 except AttributeError:
1323 1323 if parents and not files:
1324 1324 return parents[0]
1325 1325 self.ui.warn(_('unexpected svn output:\n'))
1326 1326 self.ui.warn(output)
1327 1327 raise error.Abort(_('unable to cope with svn output'))
1328 1328 if commit.rev:
1329 1329 self.run('propset', 'hg:convert-rev', commit.rev,
1330 1330 revprop=True, revision=rev)
1331 1331 if commit.branch and commit.branch != 'default':
1332 1332 self.run('propset', 'hg:convert-branch', commit.branch,
1333 1333 revprop=True, revision=rev)
1334 1334 for parent in parents:
1335 1335 self.addchild(parent, rev)
1336 1336 return self.revid(rev)
1337 1337 finally:
1338 1338 os.unlink(messagefile)
1339 1339
1340 1340 def puttags(self, tags):
1341 1341 self.ui.warn(_('writing Subversion tags is not yet implemented\n'))
1342 1342 return None, None
1343 1343
1344 1344 def hascommitfrommap(self, rev):
1345 1345 # We trust that revisions referenced in a map still is present
1346 1346 # TODO: implement something better if necessary and feasible
1347 1347 return True
1348 1348
1349 1349 def hascommitforsplicemap(self, rev):
1350 1350 # This is not correct as one can convert to an existing subversion
1351 1351 # repository and childmap would not list all revisions. Too bad.
1352 1352 if rev in self.childmap:
1353 1353 return True
1354 1354 raise error.Abort(_('splice map revision %s not found in subversion '
1355 1355 'child map (revision lookups are not implemented)')
1356 1356 % rev)
@@ -1,481 +1,481 b''
1 1 # blobstore.py - local and remote (speaking Git-LFS protocol) blob storages
2 2 #
3 3 # Copyright 2017 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import hashlib
11 11 import json
12 12 import os
13 13 import re
14 14 import socket
15 15
16 16 from mercurial.i18n import _
17 17
18 18 from mercurial import (
19 19 error,
20 20 pathutil,
21 21 url as urlmod,
22 22 util,
23 23 vfs as vfsmod,
24 24 worker,
25 25 )
26 26
27 27 from ..largefiles import lfutil
28 28
29 29 # 64 bytes for SHA256
30 _lfsre = re.compile(r'\A[a-f0-9]{64}\Z')
30 _lfsre = re.compile(br'\A[a-f0-9]{64}\Z')
31 31
32 32 class lfsvfs(vfsmod.vfs):
33 33 def join(self, path):
34 34 """split the path at first two characters, like: XX/XXXXX..."""
35 35 if not _lfsre.match(path):
36 36 raise error.ProgrammingError('unexpected lfs path: %s' % path)
37 37 return super(lfsvfs, self).join(path[0:2], path[2:])
38 38
39 39 def walk(self, path=None, onerror=None):
40 40 """Yield (dirpath, [], oids) tuple for blobs under path
41 41
42 42 Oids only exist in the root of this vfs, so dirpath is always ''.
43 43 """
44 44 root = os.path.normpath(self.base)
45 45 # when dirpath == root, dirpath[prefixlen:] becomes empty
46 46 # because len(dirpath) < prefixlen.
47 47 prefixlen = len(pathutil.normasprefix(root))
48 48 oids = []
49 49
50 50 for dirpath, dirs, files in os.walk(self.reljoin(self.base, path or ''),
51 51 onerror=onerror):
52 52 dirpath = dirpath[prefixlen:]
53 53
54 54 # Silently skip unexpected files and directories
55 55 if len(dirpath) == 2:
56 56 oids.extend([dirpath + f for f in files
57 57 if _lfsre.match(dirpath + f)])
58 58
59 59 yield ('', [], oids)
60 60
61 61 class filewithprogress(object):
62 62 """a file-like object that supports __len__ and read.
63 63
64 64 Useful to provide progress information for how many bytes are read.
65 65 """
66 66
67 67 def __init__(self, fp, callback):
68 68 self._fp = fp
69 69 self._callback = callback # func(readsize)
70 70 fp.seek(0, os.SEEK_END)
71 71 self._len = fp.tell()
72 72 fp.seek(0)
73 73
74 74 def __len__(self):
75 75 return self._len
76 76
77 77 def read(self, size):
78 78 if self._fp is None:
79 79 return b''
80 80 data = self._fp.read(size)
81 81 if data:
82 82 if self._callback:
83 83 self._callback(len(data))
84 84 else:
85 85 self._fp.close()
86 86 self._fp = None
87 87 return data
88 88
89 89 class local(object):
90 90 """Local blobstore for large file contents.
91 91
92 92 This blobstore is used both as a cache and as a staging area for large blobs
93 93 to be uploaded to the remote blobstore.
94 94 """
95 95
96 96 def __init__(self, repo):
97 97 fullpath = repo.svfs.join('lfs/objects')
98 98 self.vfs = lfsvfs(fullpath)
99 99 usercache = lfutil._usercachedir(repo.ui, 'lfs')
100 100 self.cachevfs = lfsvfs(usercache)
101 101 self.ui = repo.ui
102 102
103 103 def open(self, oid):
104 104 """Open a read-only file descriptor to the named blob, in either the
105 105 usercache or the local store."""
106 106 # The usercache is the most likely place to hold the file. Commit will
107 107 # write to both it and the local store, as will anything that downloads
108 108 # the blobs. However, things like clone without an update won't
109 109 # populate the local store. For an init + push of a local clone,
110 110 # the usercache is the only place it _could_ be. If not present, the
111 111 # missing file msg here will indicate the local repo, not the usercache.
112 112 if self.cachevfs.exists(oid):
113 113 return self.cachevfs(oid, 'rb')
114 114
115 115 return self.vfs(oid, 'rb')
116 116
117 117 def download(self, oid, src):
118 118 """Read the blob from the remote source in chunks, verify the content,
119 119 and write to this local blobstore."""
120 120 sha256 = hashlib.sha256()
121 121
122 122 with self.vfs(oid, 'wb', atomictemp=True) as fp:
123 123 for chunk in util.filechunkiter(src, size=1048576):
124 124 fp.write(chunk)
125 125 sha256.update(chunk)
126 126
127 127 realoid = sha256.hexdigest()
128 128 if realoid != oid:
129 129 raise error.Abort(_('corrupt remote lfs object: %s') % oid)
130 130
131 131 # XXX: should we verify the content of the cache, and hardlink back to
132 132 # the local store on success, but truncate, write and link on failure?
133 133 if not self.cachevfs.exists(oid):
134 134 self.ui.note(_('lfs: adding %s to the usercache\n') % oid)
135 135 lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
136 136
137 137 def write(self, oid, data):
138 138 """Write blob to local blobstore.
139 139
140 140 This should only be called from the filelog during a commit or similar.
141 141 As such, there is no need to verify the data. Imports from a remote
142 142 store must use ``download()`` instead."""
143 143 with self.vfs(oid, 'wb', atomictemp=True) as fp:
144 144 fp.write(data)
145 145
146 146 # XXX: should we verify the content of the cache, and hardlink back to
147 147 # the local store on success, but truncate, write and link on failure?
148 148 if not self.cachevfs.exists(oid):
149 149 self.ui.note(_('lfs: adding %s to the usercache\n') % oid)
150 150 lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
151 151
152 152 def read(self, oid, verify=True):
153 153 """Read blob from local blobstore."""
154 154 if not self.vfs.exists(oid):
155 155 blob = self._read(self.cachevfs, oid, verify)
156 156
157 157 # Even if revlog will verify the content, it needs to be verified
158 158 # now before making the hardlink to avoid propagating corrupt blobs.
159 159 # Don't abort if corruption is detected, because `hg verify` will
160 160 # give more useful info about the corruption- simply don't add the
161 161 # hardlink.
162 162 if verify or hashlib.sha256(blob).hexdigest() == oid:
163 163 self.ui.note(_('lfs: found %s in the usercache\n') % oid)
164 164 lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
165 165 else:
166 166 self.ui.note(_('lfs: found %s in the local lfs store\n') % oid)
167 167 blob = self._read(self.vfs, oid, verify)
168 168 return blob
169 169
170 170 def _read(self, vfs, oid, verify):
171 171 """Read blob (after verifying) from the given store"""
172 172 blob = vfs.read(oid)
173 173 if verify:
174 174 _verify(oid, blob)
175 175 return blob
176 176
177 177 def has(self, oid):
178 178 """Returns True if the local blobstore contains the requested blob,
179 179 False otherwise."""
180 180 return self.cachevfs.exists(oid) or self.vfs.exists(oid)
181 181
182 182 class _gitlfsremote(object):
183 183
184 184 def __init__(self, repo, url):
185 185 ui = repo.ui
186 186 self.ui = ui
187 187 baseurl, authinfo = url.authinfo()
188 188 self.baseurl = baseurl.rstrip('/')
189 189 useragent = repo.ui.config('experimental', 'lfs.user-agent')
190 190 if not useragent:
191 191 useragent = 'git-lfs/2.3.4 (Mercurial %s)' % util.version()
192 192 self.urlopener = urlmod.opener(ui, authinfo, useragent)
193 193 self.retry = ui.configint('lfs', 'retry')
194 194
195 195 def writebatch(self, pointers, fromstore):
196 196 """Batch upload from local to remote blobstore."""
197 197 self._batch(_deduplicate(pointers), fromstore, 'upload')
198 198
199 199 def readbatch(self, pointers, tostore):
200 200 """Batch download from remote to local blostore."""
201 201 self._batch(_deduplicate(pointers), tostore, 'download')
202 202
203 203 def _batchrequest(self, pointers, action):
204 204 """Get metadata about objects pointed by pointers for given action
205 205
206 206 Return decoded JSON object like {'objects': [{'oid': '', 'size': 1}]}
207 207 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/batch.md
208 208 """
209 209 objects = [{'oid': p.oid(), 'size': p.size()} for p in pointers]
210 210 requestdata = json.dumps({
211 211 'objects': objects,
212 212 'operation': action,
213 213 })
214 214 batchreq = util.urlreq.request('%s/objects/batch' % self.baseurl,
215 215 data=requestdata)
216 216 batchreq.add_header('Accept', 'application/vnd.git-lfs+json')
217 217 batchreq.add_header('Content-Type', 'application/vnd.git-lfs+json')
218 218 try:
219 219 rawjson = self.urlopener.open(batchreq).read()
220 220 except util.urlerr.httperror as ex:
221 221 raise LfsRemoteError(_('LFS HTTP error: %s (action=%s)')
222 222 % (ex, action))
223 223 try:
224 224 response = json.loads(rawjson)
225 225 except ValueError:
226 226 raise LfsRemoteError(_('LFS server returns invalid JSON: %s')
227 227 % rawjson)
228 228 return response
229 229
230 230 def _checkforservererror(self, pointers, responses, action):
231 231 """Scans errors from objects
232 232
233 233 Raises LfsRemoteError if any objects have an error"""
234 234 for response in responses:
235 235 # The server should return 404 when objects cannot be found. Some
236 236 # server implementation (ex. lfs-test-server) does not set "error"
237 237 # but just removes "download" from "actions". Treat that case
238 238 # as the same as 404 error.
239 239 notfound = (response.get('error', {}).get('code') == 404
240 240 or (action == 'download'
241 241 and action not in response.get('actions', [])))
242 242 if notfound:
243 243 ptrmap = {p.oid(): p for p in pointers}
244 244 p = ptrmap.get(response['oid'], None)
245 245 if p:
246 246 filename = getattr(p, 'filename', 'unknown')
247 247 raise LfsRemoteError(
248 248 _(('LFS server error. Remote object '
249 249 'for "%s" not found: %r')) % (filename, response))
250 250 else:
251 251 raise LfsRemoteError(
252 252 _('LFS server error. Unsolicited response for oid %s')
253 253 % response['oid'])
254 254 if 'error' in response:
255 255 raise LfsRemoteError(_('LFS server error: %r') % response)
256 256
257 257 def _extractobjects(self, response, pointers, action):
258 258 """extract objects from response of the batch API
259 259
260 260 response: parsed JSON object returned by batch API
261 261 return response['objects'] filtered by action
262 262 raise if any object has an error
263 263 """
264 264 # Scan errors from objects - fail early
265 265 objects = response.get('objects', [])
266 266 self._checkforservererror(pointers, objects, action)
267 267
268 268 # Filter objects with given action. Practically, this skips uploading
269 269 # objects which exist in the server.
270 270 filteredobjects = [o for o in objects if action in o.get('actions', [])]
271 271
272 272 return filteredobjects
273 273
274 274 def _basictransfer(self, obj, action, localstore):
275 275 """Download or upload a single object using basic transfer protocol
276 276
277 277 obj: dict, an object description returned by batch API
278 278 action: string, one of ['upload', 'download']
279 279 localstore: blobstore.local
280 280
281 281 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/\
282 282 basic-transfers.md
283 283 """
284 284 oid = str(obj['oid'])
285 285
286 286 href = str(obj['actions'][action].get('href'))
287 287 headers = obj['actions'][action].get('header', {}).items()
288 288
289 289 request = util.urlreq.request(href)
290 290 if action == 'upload':
291 291 # If uploading blobs, read data from local blobstore.
292 292 with localstore.open(oid) as fp:
293 293 _verifyfile(oid, fp)
294 294 request.data = filewithprogress(localstore.open(oid), None)
295 295 request.get_method = lambda: 'PUT'
296 296
297 297 for k, v in headers:
298 298 request.add_header(k, v)
299 299
300 300 response = b''
301 301 try:
302 302 req = self.urlopener.open(request)
303 303 if action == 'download':
304 304 # If downloading blobs, store downloaded data to local blobstore
305 305 localstore.download(oid, req)
306 306 else:
307 307 while True:
308 308 data = req.read(1048576)
309 309 if not data:
310 310 break
311 311 response += data
312 312 if response:
313 313 self.ui.debug('lfs %s response: %s' % (action, response))
314 314 except util.urlerr.httperror as ex:
315 315 if self.ui.debugflag:
316 316 self.ui.debug('%s: %s\n' % (oid, ex.read()))
317 317 raise LfsRemoteError(_('HTTP error: %s (oid=%s, action=%s)')
318 318 % (ex, oid, action))
319 319
320 320 def _batch(self, pointers, localstore, action):
321 321 if action not in ['upload', 'download']:
322 322 raise error.ProgrammingError('invalid Git-LFS action: %s' % action)
323 323
324 324 response = self._batchrequest(pointers, action)
325 325 objects = self._extractobjects(response, pointers, action)
326 326 total = sum(x.get('size', 0) for x in objects)
327 327 sizes = {}
328 328 for obj in objects:
329 329 sizes[obj.get('oid')] = obj.get('size', 0)
330 330 topic = {'upload': _('lfs uploading'),
331 331 'download': _('lfs downloading')}[action]
332 332 if len(objects) > 1:
333 333 self.ui.note(_('lfs: need to transfer %d objects (%s)\n')
334 334 % (len(objects), util.bytecount(total)))
335 335 self.ui.progress(topic, 0, total=total)
336 336 def transfer(chunk):
337 337 for obj in chunk:
338 338 objsize = obj.get('size', 0)
339 339 if self.ui.verbose:
340 340 if action == 'download':
341 341 msg = _('lfs: downloading %s (%s)\n')
342 342 elif action == 'upload':
343 343 msg = _('lfs: uploading %s (%s)\n')
344 344 self.ui.note(msg % (obj.get('oid'),
345 345 util.bytecount(objsize)))
346 346 retry = self.retry
347 347 while True:
348 348 try:
349 349 self._basictransfer(obj, action, localstore)
350 350 yield 1, obj.get('oid')
351 351 break
352 352 except socket.error as ex:
353 353 if retry > 0:
354 354 self.ui.note(
355 355 _('lfs: failed: %r (remaining retry %d)\n')
356 356 % (ex, retry))
357 357 retry -= 1
358 358 continue
359 359 raise
360 360
361 361 # Until https multiplexing gets sorted out
362 362 if self.ui.configbool('experimental', 'lfs.worker-enable'):
363 363 oids = worker.worker(self.ui, 0.1, transfer, (),
364 364 sorted(objects, key=lambda o: o.get('oid')))
365 365 else:
366 366 oids = transfer(sorted(objects, key=lambda o: o.get('oid')))
367 367
368 368 processed = 0
369 369 blobs = 0
370 370 for _one, oid in oids:
371 371 processed += sizes[oid]
372 372 blobs += 1
373 373 self.ui.progress(topic, processed, total=total)
374 374 self.ui.note(_('lfs: processed: %s\n') % oid)
375 375 self.ui.progress(topic, pos=None, total=total)
376 376
377 377 if blobs > 0:
378 378 if action == 'upload':
379 379 self.ui.status(_('lfs: uploaded %d files (%s)\n')
380 380 % (blobs, util.bytecount(processed)))
381 381 # TODO: coalesce the download requests, and comment this in
382 382 #elif action == 'download':
383 383 # self.ui.status(_('lfs: downloaded %d files (%s)\n')
384 384 # % (blobs, util.bytecount(processed)))
385 385
386 386 def __del__(self):
387 387 # copied from mercurial/httppeer.py
388 388 urlopener = getattr(self, 'urlopener', None)
389 389 if urlopener:
390 390 for h in urlopener.handlers:
391 391 h.close()
392 392 getattr(h, "close_all", lambda : None)()
393 393
394 394 class _dummyremote(object):
395 395 """Dummy store storing blobs to temp directory."""
396 396
397 397 def __init__(self, repo, url):
398 398 fullpath = repo.vfs.join('lfs', url.path)
399 399 self.vfs = lfsvfs(fullpath)
400 400
401 401 def writebatch(self, pointers, fromstore):
402 402 for p in _deduplicate(pointers):
403 403 content = fromstore.read(p.oid(), verify=True)
404 404 with self.vfs(p.oid(), 'wb', atomictemp=True) as fp:
405 405 fp.write(content)
406 406
407 407 def readbatch(self, pointers, tostore):
408 408 for p in _deduplicate(pointers):
409 409 with self.vfs(p.oid(), 'rb') as fp:
410 410 tostore.download(p.oid(), fp)
411 411
412 412 class _nullremote(object):
413 413 """Null store storing blobs to /dev/null."""
414 414
415 415 def __init__(self, repo, url):
416 416 pass
417 417
418 418 def writebatch(self, pointers, fromstore):
419 419 pass
420 420
421 421 def readbatch(self, pointers, tostore):
422 422 pass
423 423
424 424 class _promptremote(object):
425 425 """Prompt user to set lfs.url when accessed."""
426 426
427 427 def __init__(self, repo, url):
428 428 pass
429 429
430 430 def writebatch(self, pointers, fromstore, ui=None):
431 431 self._prompt()
432 432
433 433 def readbatch(self, pointers, tostore, ui=None):
434 434 self._prompt()
435 435
436 436 def _prompt(self):
437 437 raise error.Abort(_('lfs.url needs to be configured'))
438 438
439 439 _storemap = {
440 440 'https': _gitlfsremote,
441 441 'http': _gitlfsremote,
442 442 'file': _dummyremote,
443 443 'null': _nullremote,
444 444 None: _promptremote,
445 445 }
446 446
447 447 def _deduplicate(pointers):
448 448 """Remove any duplicate oids that exist in the list"""
449 449 reduced = util.sortdict()
450 450 for p in pointers:
451 451 reduced[p.oid()] = p
452 452 return reduced.values()
453 453
454 454 def _verify(oid, content):
455 455 realoid = hashlib.sha256(content).hexdigest()
456 456 if realoid != oid:
457 457 raise error.Abort(_('detected corrupt lfs object: %s') % oid,
458 458 hint=_('run hg verify'))
459 459
460 460 def _verifyfile(oid, fp):
461 461 sha256 = hashlib.sha256()
462 462 while True:
463 463 data = fp.read(1024 * 1024)
464 464 if not data:
465 465 break
466 466 sha256.update(data)
467 467 realoid = sha256.hexdigest()
468 468 if realoid != oid:
469 469 raise error.Abort(_('detected corrupt lfs object: %s') % oid,
470 470 hint=_('run hg verify'))
471 471
472 472 def remote(repo):
473 473 """remotestore factory. return a store in _storemap depending on config"""
474 474 url = util.url(repo.ui.config('lfs', 'url') or '')
475 475 scheme = url.scheme
476 476 if scheme not in _storemap:
477 477 raise error.Abort(_('lfs: unknown url scheme: %s') % scheme)
478 478 return _storemap[scheme](repo, url)
479 479
480 480 class LfsRemoteError(error.RevlogError):
481 481 pass
General Comments 0
You need to be logged in to leave comments. Login now