##// END OF EJS Templates
py3: make sure regexes are bytes...
Pulkit Goyal -
r36473:9e3cb58c default
parent child Browse files
Show More
@@ -1,952 +1,952 b''
1 # Mercurial built-in replacement for cvsps.
1 # Mercurial built-in replacement for cvsps.
2 #
2 #
3 # Copyright 2008, Frank Kingswood <frank@kingswood-consulting.co.uk>
3 # Copyright 2008, Frank Kingswood <frank@kingswood-consulting.co.uk>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7 from __future__ import absolute_import
7 from __future__ import absolute_import
8
8
9 import os
9 import os
10 import re
10 import re
11
11
12 from mercurial.i18n import _
12 from mercurial.i18n import _
13 from mercurial import (
13 from mercurial import (
14 encoding,
14 encoding,
15 error,
15 error,
16 hook,
16 hook,
17 pycompat,
17 pycompat,
18 util,
18 util,
19 )
19 )
20
20
21 pickle = util.pickle
21 pickle = util.pickle
22
22
23 class logentry(object):
23 class logentry(object):
24 '''Class logentry has the following attributes:
24 '''Class logentry has the following attributes:
25 .author - author name as CVS knows it
25 .author - author name as CVS knows it
26 .branch - name of branch this revision is on
26 .branch - name of branch this revision is on
27 .branches - revision tuple of branches starting at this revision
27 .branches - revision tuple of branches starting at this revision
28 .comment - commit message
28 .comment - commit message
29 .commitid - CVS commitid or None
29 .commitid - CVS commitid or None
30 .date - the commit date as a (time, tz) tuple
30 .date - the commit date as a (time, tz) tuple
31 .dead - true if file revision is dead
31 .dead - true if file revision is dead
32 .file - Name of file
32 .file - Name of file
33 .lines - a tuple (+lines, -lines) or None
33 .lines - a tuple (+lines, -lines) or None
34 .parent - Previous revision of this entry
34 .parent - Previous revision of this entry
35 .rcs - name of file as returned from CVS
35 .rcs - name of file as returned from CVS
36 .revision - revision number as tuple
36 .revision - revision number as tuple
37 .tags - list of tags on the file
37 .tags - list of tags on the file
38 .synthetic - is this a synthetic "file ... added on ..." revision?
38 .synthetic - is this a synthetic "file ... added on ..." revision?
39 .mergepoint - the branch that has been merged from (if present in
39 .mergepoint - the branch that has been merged from (if present in
40 rlog output) or None
40 rlog output) or None
41 .branchpoints - the branches that start at the current entry or empty
41 .branchpoints - the branches that start at the current entry or empty
42 '''
42 '''
43 def __init__(self, **entries):
43 def __init__(self, **entries):
44 self.synthetic = False
44 self.synthetic = False
45 self.__dict__.update(entries)
45 self.__dict__.update(entries)
46
46
47 def __repr__(self):
47 def __repr__(self):
48 items = ("%s=%r"%(k, self.__dict__[k]) for k in sorted(self.__dict__))
48 items = ("%s=%r"%(k, self.__dict__[k]) for k in sorted(self.__dict__))
49 return "%s(%s)"%(type(self).__name__, ", ".join(items))
49 return "%s(%s)"%(type(self).__name__, ", ".join(items))
50
50
51 class logerror(Exception):
51 class logerror(Exception):
52 pass
52 pass
53
53
54 def getrepopath(cvspath):
54 def getrepopath(cvspath):
55 """Return the repository path from a CVS path.
55 """Return the repository path from a CVS path.
56
56
57 >>> getrepopath(b'/foo/bar')
57 >>> getrepopath(b'/foo/bar')
58 '/foo/bar'
58 '/foo/bar'
59 >>> getrepopath(b'c:/foo/bar')
59 >>> getrepopath(b'c:/foo/bar')
60 '/foo/bar'
60 '/foo/bar'
61 >>> getrepopath(b':pserver:10/foo/bar')
61 >>> getrepopath(b':pserver:10/foo/bar')
62 '/foo/bar'
62 '/foo/bar'
63 >>> getrepopath(b':pserver:10c:/foo/bar')
63 >>> getrepopath(b':pserver:10c:/foo/bar')
64 '/foo/bar'
64 '/foo/bar'
65 >>> getrepopath(b':pserver:/foo/bar')
65 >>> getrepopath(b':pserver:/foo/bar')
66 '/foo/bar'
66 '/foo/bar'
67 >>> getrepopath(b':pserver:c:/foo/bar')
67 >>> getrepopath(b':pserver:c:/foo/bar')
68 '/foo/bar'
68 '/foo/bar'
69 >>> getrepopath(b':pserver:truc@foo.bar:/foo/bar')
69 >>> getrepopath(b':pserver:truc@foo.bar:/foo/bar')
70 '/foo/bar'
70 '/foo/bar'
71 >>> getrepopath(b':pserver:truc@foo.bar:c:/foo/bar')
71 >>> getrepopath(b':pserver:truc@foo.bar:c:/foo/bar')
72 '/foo/bar'
72 '/foo/bar'
73 >>> getrepopath(b'user@server/path/to/repository')
73 >>> getrepopath(b'user@server/path/to/repository')
74 '/path/to/repository'
74 '/path/to/repository'
75 """
75 """
76 # According to CVS manual, CVS paths are expressed like:
76 # According to CVS manual, CVS paths are expressed like:
77 # [:method:][[user][:password]@]hostname[:[port]]/path/to/repository
77 # [:method:][[user][:password]@]hostname[:[port]]/path/to/repository
78 #
78 #
79 # CVSpath is splitted into parts and then position of the first occurrence
79 # CVSpath is splitted into parts and then position of the first occurrence
80 # of the '/' char after the '@' is located. The solution is the rest of the
80 # of the '/' char after the '@' is located. The solution is the rest of the
81 # string after that '/' sign including it
81 # string after that '/' sign including it
82
82
83 parts = cvspath.split(':')
83 parts = cvspath.split(':')
84 atposition = parts[-1].find('@')
84 atposition = parts[-1].find('@')
85 start = 0
85 start = 0
86
86
87 if atposition != -1:
87 if atposition != -1:
88 start = atposition
88 start = atposition
89
89
90 repopath = parts[-1][parts[-1].find('/', start):]
90 repopath = parts[-1][parts[-1].find('/', start):]
91 return repopath
91 return repopath
92
92
93 def createlog(ui, directory=None, root="", rlog=True, cache=None):
93 def createlog(ui, directory=None, root="", rlog=True, cache=None):
94 '''Collect the CVS rlog'''
94 '''Collect the CVS rlog'''
95
95
96 # Because we store many duplicate commit log messages, reusing strings
96 # Because we store many duplicate commit log messages, reusing strings
97 # saves a lot of memory and pickle storage space.
97 # saves a lot of memory and pickle storage space.
98 _scache = {}
98 _scache = {}
99 def scache(s):
99 def scache(s):
100 "return a shared version of a string"
100 "return a shared version of a string"
101 return _scache.setdefault(s, s)
101 return _scache.setdefault(s, s)
102
102
103 ui.status(_('collecting CVS rlog\n'))
103 ui.status(_('collecting CVS rlog\n'))
104
104
105 log = [] # list of logentry objects containing the CVS state
105 log = [] # list of logentry objects containing the CVS state
106
106
107 # patterns to match in CVS (r)log output, by state of use
107 # patterns to match in CVS (r)log output, by state of use
108 re_00 = re.compile('RCS file: (.+)$')
108 re_00 = re.compile('RCS file: (.+)$')
109 re_01 = re.compile('cvs \\[r?log aborted\\]: (.+)$')
109 re_01 = re.compile('cvs \\[r?log aborted\\]: (.+)$')
110 re_02 = re.compile('cvs (r?log|server): (.+)\n$')
110 re_02 = re.compile('cvs (r?log|server): (.+)\n$')
111 re_03 = re.compile("(Cannot access.+CVSROOT)|"
111 re_03 = re.compile("(Cannot access.+CVSROOT)|"
112 "(can't create temporary directory.+)$")
112 "(can't create temporary directory.+)$")
113 re_10 = re.compile('Working file: (.+)$')
113 re_10 = re.compile('Working file: (.+)$')
114 re_20 = re.compile('symbolic names:')
114 re_20 = re.compile('symbolic names:')
115 re_30 = re.compile('\t(.+): ([\\d.]+)$')
115 re_30 = re.compile('\t(.+): ([\\d.]+)$')
116 re_31 = re.compile('----------------------------$')
116 re_31 = re.compile('----------------------------$')
117 re_32 = re.compile('======================================='
117 re_32 = re.compile('======================================='
118 '======================================$')
118 '======================================$')
119 re_50 = re.compile('revision ([\\d.]+)(\s+locked by:\s+.+;)?$')
119 re_50 = re.compile('revision ([\\d.]+)(\s+locked by:\s+.+;)?$')
120 re_60 = re.compile(r'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);'
120 re_60 = re.compile(r'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);'
121 r'(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?'
121 r'(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?'
122 r'(\s+commitid:\s+([^;]+);)?'
122 r'(\s+commitid:\s+([^;]+);)?'
123 r'(.*mergepoint:\s+([^;]+);)?')
123 r'(.*mergepoint:\s+([^;]+);)?')
124 re_70 = re.compile('branches: (.+);$')
124 re_70 = re.compile('branches: (.+);$')
125
125
126 file_added_re = re.compile(r'file [^/]+ was (initially )?added on branch')
126 file_added_re = re.compile(r'file [^/]+ was (initially )?added on branch')
127
127
128 prefix = '' # leading path to strip of what we get from CVS
128 prefix = '' # leading path to strip of what we get from CVS
129
129
130 if directory is None:
130 if directory is None:
131 # Current working directory
131 # Current working directory
132
132
133 # Get the real directory in the repository
133 # Get the real directory in the repository
134 try:
134 try:
135 prefix = open(os.path.join('CVS','Repository'), 'rb').read().strip()
135 prefix = open(os.path.join('CVS','Repository'), 'rb').read().strip()
136 directory = prefix
136 directory = prefix
137 if prefix == ".":
137 if prefix == ".":
138 prefix = ""
138 prefix = ""
139 except IOError:
139 except IOError:
140 raise logerror(_('not a CVS sandbox'))
140 raise logerror(_('not a CVS sandbox'))
141
141
142 if prefix and not prefix.endswith(pycompat.ossep):
142 if prefix and not prefix.endswith(pycompat.ossep):
143 prefix += pycompat.ossep
143 prefix += pycompat.ossep
144
144
145 # Use the Root file in the sandbox, if it exists
145 # Use the Root file in the sandbox, if it exists
146 try:
146 try:
147 root = open(os.path.join('CVS','Root'), 'rb').read().strip()
147 root = open(os.path.join('CVS','Root'), 'rb').read().strip()
148 except IOError:
148 except IOError:
149 pass
149 pass
150
150
151 if not root:
151 if not root:
152 root = encoding.environ.get('CVSROOT', '')
152 root = encoding.environ.get('CVSROOT', '')
153
153
154 # read log cache if one exists
154 # read log cache if one exists
155 oldlog = []
155 oldlog = []
156 date = None
156 date = None
157
157
158 if cache:
158 if cache:
159 cachedir = os.path.expanduser('~/.hg.cvsps')
159 cachedir = os.path.expanduser('~/.hg.cvsps')
160 if not os.path.exists(cachedir):
160 if not os.path.exists(cachedir):
161 os.mkdir(cachedir)
161 os.mkdir(cachedir)
162
162
163 # The cvsps cache pickle needs a uniquified name, based on the
163 # The cvsps cache pickle needs a uniquified name, based on the
164 # repository location. The address may have all sort of nasties
164 # repository location. The address may have all sort of nasties
165 # in it, slashes, colons and such. So here we take just the
165 # in it, slashes, colons and such. So here we take just the
166 # alphanumeric characters, concatenated in a way that does not
166 # alphanumeric characters, concatenated in a way that does not
167 # mix up the various components, so that
167 # mix up the various components, so that
168 # :pserver:user@server:/path
168 # :pserver:user@server:/path
169 # and
169 # and
170 # /pserver/user/server/path
170 # /pserver/user/server/path
171 # are mapped to different cache file names.
171 # are mapped to different cache file names.
172 cachefile = root.split(":") + [directory, "cache"]
172 cachefile = root.split(":") + [directory, "cache"]
173 cachefile = ['-'.join(re.findall(r'\w+', s)) for s in cachefile if s]
173 cachefile = ['-'.join(re.findall(br'\w+', s)) for s in cachefile if s]
174 cachefile = os.path.join(cachedir,
174 cachefile = os.path.join(cachedir,
175 '.'.join([s for s in cachefile if s]))
175 '.'.join([s for s in cachefile if s]))
176
176
177 if cache == 'update':
177 if cache == 'update':
178 try:
178 try:
179 ui.note(_('reading cvs log cache %s\n') % cachefile)
179 ui.note(_('reading cvs log cache %s\n') % cachefile)
180 oldlog = pickle.load(open(cachefile, 'rb'))
180 oldlog = pickle.load(open(cachefile, 'rb'))
181 for e in oldlog:
181 for e in oldlog:
182 if not (util.safehasattr(e, 'branchpoints') and
182 if not (util.safehasattr(e, 'branchpoints') and
183 util.safehasattr(e, 'commitid') and
183 util.safehasattr(e, 'commitid') and
184 util.safehasattr(e, 'mergepoint')):
184 util.safehasattr(e, 'mergepoint')):
185 ui.status(_('ignoring old cache\n'))
185 ui.status(_('ignoring old cache\n'))
186 oldlog = []
186 oldlog = []
187 break
187 break
188
188
189 ui.note(_('cache has %d log entries\n') % len(oldlog))
189 ui.note(_('cache has %d log entries\n') % len(oldlog))
190 except Exception as e:
190 except Exception as e:
191 ui.note(_('error reading cache: %r\n') % e)
191 ui.note(_('error reading cache: %r\n') % e)
192
192
193 if oldlog:
193 if oldlog:
194 date = oldlog[-1].date # last commit date as a (time,tz) tuple
194 date = oldlog[-1].date # last commit date as a (time,tz) tuple
195 date = util.datestr(date, '%Y/%m/%d %H:%M:%S %1%2')
195 date = util.datestr(date, '%Y/%m/%d %H:%M:%S %1%2')
196
196
197 # build the CVS commandline
197 # build the CVS commandline
198 cmd = ['cvs', '-q']
198 cmd = ['cvs', '-q']
199 if root:
199 if root:
200 cmd.append('-d%s' % root)
200 cmd.append('-d%s' % root)
201 p = util.normpath(getrepopath(root))
201 p = util.normpath(getrepopath(root))
202 if not p.endswith('/'):
202 if not p.endswith('/'):
203 p += '/'
203 p += '/'
204 if prefix:
204 if prefix:
205 # looks like normpath replaces "" by "."
205 # looks like normpath replaces "" by "."
206 prefix = p + util.normpath(prefix)
206 prefix = p + util.normpath(prefix)
207 else:
207 else:
208 prefix = p
208 prefix = p
209 cmd.append(['log', 'rlog'][rlog])
209 cmd.append(['log', 'rlog'][rlog])
210 if date:
210 if date:
211 # no space between option and date string
211 # no space between option and date string
212 cmd.append('-d>%s' % date)
212 cmd.append('-d>%s' % date)
213 cmd.append(directory)
213 cmd.append(directory)
214
214
215 # state machine begins here
215 # state machine begins here
216 tags = {} # dictionary of revisions on current file with their tags
216 tags = {} # dictionary of revisions on current file with their tags
217 branchmap = {} # mapping between branch names and revision numbers
217 branchmap = {} # mapping between branch names and revision numbers
218 rcsmap = {}
218 rcsmap = {}
219 state = 0
219 state = 0
220 store = False # set when a new record can be appended
220 store = False # set when a new record can be appended
221
221
222 cmd = [util.shellquote(arg) for arg in cmd]
222 cmd = [util.shellquote(arg) for arg in cmd]
223 ui.note(_("running %s\n") % (' '.join(cmd)))
223 ui.note(_("running %s\n") % (' '.join(cmd)))
224 ui.debug("prefix=%r directory=%r root=%r\n" % (prefix, directory, root))
224 ui.debug("prefix=%r directory=%r root=%r\n" % (prefix, directory, root))
225
225
226 pfp = util.popen(' '.join(cmd))
226 pfp = util.popen(' '.join(cmd))
227 peek = pfp.readline()
227 peek = pfp.readline()
228 while True:
228 while True:
229 line = peek
229 line = peek
230 if line == '':
230 if line == '':
231 break
231 break
232 peek = pfp.readline()
232 peek = pfp.readline()
233 if line.endswith('\n'):
233 if line.endswith('\n'):
234 line = line[:-1]
234 line = line[:-1]
235 #ui.debug('state=%d line=%r\n' % (state, line))
235 #ui.debug('state=%d line=%r\n' % (state, line))
236
236
237 if state == 0:
237 if state == 0:
238 # initial state, consume input until we see 'RCS file'
238 # initial state, consume input until we see 'RCS file'
239 match = re_00.match(line)
239 match = re_00.match(line)
240 if match:
240 if match:
241 rcs = match.group(1)
241 rcs = match.group(1)
242 tags = {}
242 tags = {}
243 if rlog:
243 if rlog:
244 filename = util.normpath(rcs[:-2])
244 filename = util.normpath(rcs[:-2])
245 if filename.startswith(prefix):
245 if filename.startswith(prefix):
246 filename = filename[len(prefix):]
246 filename = filename[len(prefix):]
247 if filename.startswith('/'):
247 if filename.startswith('/'):
248 filename = filename[1:]
248 filename = filename[1:]
249 if filename.startswith('Attic/'):
249 if filename.startswith('Attic/'):
250 filename = filename[6:]
250 filename = filename[6:]
251 else:
251 else:
252 filename = filename.replace('/Attic/', '/')
252 filename = filename.replace('/Attic/', '/')
253 state = 2
253 state = 2
254 continue
254 continue
255 state = 1
255 state = 1
256 continue
256 continue
257 match = re_01.match(line)
257 match = re_01.match(line)
258 if match:
258 if match:
259 raise logerror(match.group(1))
259 raise logerror(match.group(1))
260 match = re_02.match(line)
260 match = re_02.match(line)
261 if match:
261 if match:
262 raise logerror(match.group(2))
262 raise logerror(match.group(2))
263 if re_03.match(line):
263 if re_03.match(line):
264 raise logerror(line)
264 raise logerror(line)
265
265
266 elif state == 1:
266 elif state == 1:
267 # expect 'Working file' (only when using log instead of rlog)
267 # expect 'Working file' (only when using log instead of rlog)
268 match = re_10.match(line)
268 match = re_10.match(line)
269 assert match, _('RCS file must be followed by working file')
269 assert match, _('RCS file must be followed by working file')
270 filename = util.normpath(match.group(1))
270 filename = util.normpath(match.group(1))
271 state = 2
271 state = 2
272
272
273 elif state == 2:
273 elif state == 2:
274 # expect 'symbolic names'
274 # expect 'symbolic names'
275 if re_20.match(line):
275 if re_20.match(line):
276 branchmap = {}
276 branchmap = {}
277 state = 3
277 state = 3
278
278
279 elif state == 3:
279 elif state == 3:
280 # read the symbolic names and store as tags
280 # read the symbolic names and store as tags
281 match = re_30.match(line)
281 match = re_30.match(line)
282 if match:
282 if match:
283 rev = [int(x) for x in match.group(2).split('.')]
283 rev = [int(x) for x in match.group(2).split('.')]
284
284
285 # Convert magic branch number to an odd-numbered one
285 # Convert magic branch number to an odd-numbered one
286 revn = len(rev)
286 revn = len(rev)
287 if revn > 3 and (revn % 2) == 0 and rev[-2] == 0:
287 if revn > 3 and (revn % 2) == 0 and rev[-2] == 0:
288 rev = rev[:-2] + rev[-1:]
288 rev = rev[:-2] + rev[-1:]
289 rev = tuple(rev)
289 rev = tuple(rev)
290
290
291 if rev not in tags:
291 if rev not in tags:
292 tags[rev] = []
292 tags[rev] = []
293 tags[rev].append(match.group(1))
293 tags[rev].append(match.group(1))
294 branchmap[match.group(1)] = match.group(2)
294 branchmap[match.group(1)] = match.group(2)
295
295
296 elif re_31.match(line):
296 elif re_31.match(line):
297 state = 5
297 state = 5
298 elif re_32.match(line):
298 elif re_32.match(line):
299 state = 0
299 state = 0
300
300
301 elif state == 4:
301 elif state == 4:
302 # expecting '------' separator before first revision
302 # expecting '------' separator before first revision
303 if re_31.match(line):
303 if re_31.match(line):
304 state = 5
304 state = 5
305 else:
305 else:
306 assert not re_32.match(line), _('must have at least '
306 assert not re_32.match(line), _('must have at least '
307 'some revisions')
307 'some revisions')
308
308
309 elif state == 5:
309 elif state == 5:
310 # expecting revision number and possibly (ignored) lock indication
310 # expecting revision number and possibly (ignored) lock indication
311 # we create the logentry here from values stored in states 0 to 4,
311 # we create the logentry here from values stored in states 0 to 4,
312 # as this state is re-entered for subsequent revisions of a file.
312 # as this state is re-entered for subsequent revisions of a file.
313 match = re_50.match(line)
313 match = re_50.match(line)
314 assert match, _('expected revision number')
314 assert match, _('expected revision number')
315 e = logentry(rcs=scache(rcs),
315 e = logentry(rcs=scache(rcs),
316 file=scache(filename),
316 file=scache(filename),
317 revision=tuple([int(x) for x in
317 revision=tuple([int(x) for x in
318 match.group(1).split('.')]),
318 match.group(1).split('.')]),
319 branches=[],
319 branches=[],
320 parent=None,
320 parent=None,
321 commitid=None,
321 commitid=None,
322 mergepoint=None,
322 mergepoint=None,
323 branchpoints=set())
323 branchpoints=set())
324
324
325 state = 6
325 state = 6
326
326
327 elif state == 6:
327 elif state == 6:
328 # expecting date, author, state, lines changed
328 # expecting date, author, state, lines changed
329 match = re_60.match(line)
329 match = re_60.match(line)
330 assert match, _('revision must be followed by date line')
330 assert match, _('revision must be followed by date line')
331 d = match.group(1)
331 d = match.group(1)
332 if d[2] == '/':
332 if d[2] == '/':
333 # Y2K
333 # Y2K
334 d = '19' + d
334 d = '19' + d
335
335
336 if len(d.split()) != 3:
336 if len(d.split()) != 3:
337 # cvs log dates always in GMT
337 # cvs log dates always in GMT
338 d = d + ' UTC'
338 d = d + ' UTC'
339 e.date = util.parsedate(d, ['%y/%m/%d %H:%M:%S',
339 e.date = util.parsedate(d, ['%y/%m/%d %H:%M:%S',
340 '%Y/%m/%d %H:%M:%S',
340 '%Y/%m/%d %H:%M:%S',
341 '%Y-%m-%d %H:%M:%S'])
341 '%Y-%m-%d %H:%M:%S'])
342 e.author = scache(match.group(2))
342 e.author = scache(match.group(2))
343 e.dead = match.group(3).lower() == 'dead'
343 e.dead = match.group(3).lower() == 'dead'
344
344
345 if match.group(5):
345 if match.group(5):
346 if match.group(6):
346 if match.group(6):
347 e.lines = (int(match.group(5)), int(match.group(6)))
347 e.lines = (int(match.group(5)), int(match.group(6)))
348 else:
348 else:
349 e.lines = (int(match.group(5)), 0)
349 e.lines = (int(match.group(5)), 0)
350 elif match.group(6):
350 elif match.group(6):
351 e.lines = (0, int(match.group(6)))
351 e.lines = (0, int(match.group(6)))
352 else:
352 else:
353 e.lines = None
353 e.lines = None
354
354
355 if match.group(7): # cvs 1.12 commitid
355 if match.group(7): # cvs 1.12 commitid
356 e.commitid = match.group(8)
356 e.commitid = match.group(8)
357
357
358 if match.group(9): # cvsnt mergepoint
358 if match.group(9): # cvsnt mergepoint
359 myrev = match.group(10).split('.')
359 myrev = match.group(10).split('.')
360 if len(myrev) == 2: # head
360 if len(myrev) == 2: # head
361 e.mergepoint = 'HEAD'
361 e.mergepoint = 'HEAD'
362 else:
362 else:
363 myrev = '.'.join(myrev[:-2] + ['0', myrev[-2]])
363 myrev = '.'.join(myrev[:-2] + ['0', myrev[-2]])
364 branches = [b for b in branchmap if branchmap[b] == myrev]
364 branches = [b for b in branchmap if branchmap[b] == myrev]
365 assert len(branches) == 1, ('unknown branch: %s'
365 assert len(branches) == 1, ('unknown branch: %s'
366 % e.mergepoint)
366 % e.mergepoint)
367 e.mergepoint = branches[0]
367 e.mergepoint = branches[0]
368
368
369 e.comment = []
369 e.comment = []
370 state = 7
370 state = 7
371
371
372 elif state == 7:
372 elif state == 7:
373 # read the revision numbers of branches that start at this revision
373 # read the revision numbers of branches that start at this revision
374 # or store the commit log message otherwise
374 # or store the commit log message otherwise
375 m = re_70.match(line)
375 m = re_70.match(line)
376 if m:
376 if m:
377 e.branches = [tuple([int(y) for y in x.strip().split('.')])
377 e.branches = [tuple([int(y) for y in x.strip().split('.')])
378 for x in m.group(1).split(';')]
378 for x in m.group(1).split(';')]
379 state = 8
379 state = 8
380 elif re_31.match(line) and re_50.match(peek):
380 elif re_31.match(line) and re_50.match(peek):
381 state = 5
381 state = 5
382 store = True
382 store = True
383 elif re_32.match(line):
383 elif re_32.match(line):
384 state = 0
384 state = 0
385 store = True
385 store = True
386 else:
386 else:
387 e.comment.append(line)
387 e.comment.append(line)
388
388
389 elif state == 8:
389 elif state == 8:
390 # store commit log message
390 # store commit log message
391 if re_31.match(line):
391 if re_31.match(line):
392 cpeek = peek
392 cpeek = peek
393 if cpeek.endswith('\n'):
393 if cpeek.endswith('\n'):
394 cpeek = cpeek[:-1]
394 cpeek = cpeek[:-1]
395 if re_50.match(cpeek):
395 if re_50.match(cpeek):
396 state = 5
396 state = 5
397 store = True
397 store = True
398 else:
398 else:
399 e.comment.append(line)
399 e.comment.append(line)
400 elif re_32.match(line):
400 elif re_32.match(line):
401 state = 0
401 state = 0
402 store = True
402 store = True
403 else:
403 else:
404 e.comment.append(line)
404 e.comment.append(line)
405
405
406 # When a file is added on a branch B1, CVS creates a synthetic
406 # When a file is added on a branch B1, CVS creates a synthetic
407 # dead trunk revision 1.1 so that the branch has a root.
407 # dead trunk revision 1.1 so that the branch has a root.
408 # Likewise, if you merge such a file to a later branch B2 (one
408 # Likewise, if you merge such a file to a later branch B2 (one
409 # that already existed when the file was added on B1), CVS
409 # that already existed when the file was added on B1), CVS
410 # creates a synthetic dead revision 1.1.x.1 on B2. Don't drop
410 # creates a synthetic dead revision 1.1.x.1 on B2. Don't drop
411 # these revisions now, but mark them synthetic so
411 # these revisions now, but mark them synthetic so
412 # createchangeset() can take care of them.
412 # createchangeset() can take care of them.
413 if (store and
413 if (store and
414 e.dead and
414 e.dead and
415 e.revision[-1] == 1 and # 1.1 or 1.1.x.1
415 e.revision[-1] == 1 and # 1.1 or 1.1.x.1
416 len(e.comment) == 1 and
416 len(e.comment) == 1 and
417 file_added_re.match(e.comment[0])):
417 file_added_re.match(e.comment[0])):
418 ui.debug('found synthetic revision in %s: %r\n'
418 ui.debug('found synthetic revision in %s: %r\n'
419 % (e.rcs, e.comment[0]))
419 % (e.rcs, e.comment[0]))
420 e.synthetic = True
420 e.synthetic = True
421
421
422 if store:
422 if store:
423 # clean up the results and save in the log.
423 # clean up the results and save in the log.
424 store = False
424 store = False
425 e.tags = sorted([scache(x) for x in tags.get(e.revision, [])])
425 e.tags = sorted([scache(x) for x in tags.get(e.revision, [])])
426 e.comment = scache('\n'.join(e.comment))
426 e.comment = scache('\n'.join(e.comment))
427
427
428 revn = len(e.revision)
428 revn = len(e.revision)
429 if revn > 3 and (revn % 2) == 0:
429 if revn > 3 and (revn % 2) == 0:
430 e.branch = tags.get(e.revision[:-1], [None])[0]
430 e.branch = tags.get(e.revision[:-1], [None])[0]
431 else:
431 else:
432 e.branch = None
432 e.branch = None
433
433
434 # find the branches starting from this revision
434 # find the branches starting from this revision
435 branchpoints = set()
435 branchpoints = set()
436 for branch, revision in branchmap.iteritems():
436 for branch, revision in branchmap.iteritems():
437 revparts = tuple([int(i) for i in revision.split('.')])
437 revparts = tuple([int(i) for i in revision.split('.')])
438 if len(revparts) < 2: # bad tags
438 if len(revparts) < 2: # bad tags
439 continue
439 continue
440 if revparts[-2] == 0 and revparts[-1] % 2 == 0:
440 if revparts[-2] == 0 and revparts[-1] % 2 == 0:
441 # normal branch
441 # normal branch
442 if revparts[:-2] == e.revision:
442 if revparts[:-2] == e.revision:
443 branchpoints.add(branch)
443 branchpoints.add(branch)
444 elif revparts == (1, 1, 1): # vendor branch
444 elif revparts == (1, 1, 1): # vendor branch
445 if revparts in e.branches:
445 if revparts in e.branches:
446 branchpoints.add(branch)
446 branchpoints.add(branch)
447 e.branchpoints = branchpoints
447 e.branchpoints = branchpoints
448
448
449 log.append(e)
449 log.append(e)
450
450
451 rcsmap[e.rcs.replace('/Attic/', '/')] = e.rcs
451 rcsmap[e.rcs.replace('/Attic/', '/')] = e.rcs
452
452
453 if len(log) % 100 == 0:
453 if len(log) % 100 == 0:
454 ui.status(util.ellipsis('%d %s' % (len(log), e.file), 80)+'\n')
454 ui.status(util.ellipsis('%d %s' % (len(log), e.file), 80)+'\n')
455
455
456 log.sort(key=lambda x: (x.rcs, x.revision))
456 log.sort(key=lambda x: (x.rcs, x.revision))
457
457
458 # find parent revisions of individual files
458 # find parent revisions of individual files
459 versions = {}
459 versions = {}
460 for e in sorted(oldlog, key=lambda x: (x.rcs, x.revision)):
460 for e in sorted(oldlog, key=lambda x: (x.rcs, x.revision)):
461 rcs = e.rcs.replace('/Attic/', '/')
461 rcs = e.rcs.replace('/Attic/', '/')
462 if rcs in rcsmap:
462 if rcs in rcsmap:
463 e.rcs = rcsmap[rcs]
463 e.rcs = rcsmap[rcs]
464 branch = e.revision[:-1]
464 branch = e.revision[:-1]
465 versions[(e.rcs, branch)] = e.revision
465 versions[(e.rcs, branch)] = e.revision
466
466
467 for e in log:
467 for e in log:
468 branch = e.revision[:-1]
468 branch = e.revision[:-1]
469 p = versions.get((e.rcs, branch), None)
469 p = versions.get((e.rcs, branch), None)
470 if p is None:
470 if p is None:
471 p = e.revision[:-2]
471 p = e.revision[:-2]
472 e.parent = p
472 e.parent = p
473 versions[(e.rcs, branch)] = e.revision
473 versions[(e.rcs, branch)] = e.revision
474
474
475 # update the log cache
475 # update the log cache
476 if cache:
476 if cache:
477 if log:
477 if log:
478 # join up the old and new logs
478 # join up the old and new logs
479 log.sort(key=lambda x: x.date)
479 log.sort(key=lambda x: x.date)
480
480
481 if oldlog and oldlog[-1].date >= log[0].date:
481 if oldlog and oldlog[-1].date >= log[0].date:
482 raise logerror(_('log cache overlaps with new log entries,'
482 raise logerror(_('log cache overlaps with new log entries,'
483 ' re-run without cache.'))
483 ' re-run without cache.'))
484
484
485 log = oldlog + log
485 log = oldlog + log
486
486
487 # write the new cachefile
487 # write the new cachefile
488 ui.note(_('writing cvs log cache %s\n') % cachefile)
488 ui.note(_('writing cvs log cache %s\n') % cachefile)
489 pickle.dump(log, open(cachefile, 'wb'))
489 pickle.dump(log, open(cachefile, 'wb'))
490 else:
490 else:
491 log = oldlog
491 log = oldlog
492
492
493 ui.status(_('%d log entries\n') % len(log))
493 ui.status(_('%d log entries\n') % len(log))
494
494
495 encodings = ui.configlist('convert', 'cvsps.logencoding')
495 encodings = ui.configlist('convert', 'cvsps.logencoding')
496 if encodings:
496 if encodings:
497 def revstr(r):
497 def revstr(r):
498 # this is needed, because logentry.revision is a tuple of "int"
498 # this is needed, because logentry.revision is a tuple of "int"
499 # (e.g. (1, 2) for "1.2")
499 # (e.g. (1, 2) for "1.2")
500 return '.'.join(pycompat.maplist(pycompat.bytestr, r))
500 return '.'.join(pycompat.maplist(pycompat.bytestr, r))
501
501
502 for entry in log:
502 for entry in log:
503 comment = entry.comment
503 comment = entry.comment
504 for e in encodings:
504 for e in encodings:
505 try:
505 try:
506 entry.comment = comment.decode(e).encode('utf-8')
506 entry.comment = comment.decode(e).encode('utf-8')
507 if ui.debugflag:
507 if ui.debugflag:
508 ui.debug("transcoding by %s: %s of %s\n" %
508 ui.debug("transcoding by %s: %s of %s\n" %
509 (e, revstr(entry.revision), entry.file))
509 (e, revstr(entry.revision), entry.file))
510 break
510 break
511 except UnicodeDecodeError:
511 except UnicodeDecodeError:
512 pass # try next encoding
512 pass # try next encoding
513 except LookupError as inst: # unknown encoding, maybe
513 except LookupError as inst: # unknown encoding, maybe
514 raise error.Abort(inst,
514 raise error.Abort(inst,
515 hint=_('check convert.cvsps.logencoding'
515 hint=_('check convert.cvsps.logencoding'
516 ' configuration'))
516 ' configuration'))
517 else:
517 else:
518 raise error.Abort(_("no encoding can transcode"
518 raise error.Abort(_("no encoding can transcode"
519 " CVS log message for %s of %s")
519 " CVS log message for %s of %s")
520 % (revstr(entry.revision), entry.file),
520 % (revstr(entry.revision), entry.file),
521 hint=_('check convert.cvsps.logencoding'
521 hint=_('check convert.cvsps.logencoding'
522 ' configuration'))
522 ' configuration'))
523
523
524 hook.hook(ui, None, "cvslog", True, log=log)
524 hook.hook(ui, None, "cvslog", True, log=log)
525
525
526 return log
526 return log
527
527
528
528
529 class changeset(object):
529 class changeset(object):
530 '''Class changeset has the following attributes:
530 '''Class changeset has the following attributes:
531 .id - integer identifying this changeset (list index)
531 .id - integer identifying this changeset (list index)
532 .author - author name as CVS knows it
532 .author - author name as CVS knows it
533 .branch - name of branch this changeset is on, or None
533 .branch - name of branch this changeset is on, or None
534 .comment - commit message
534 .comment - commit message
535 .commitid - CVS commitid or None
535 .commitid - CVS commitid or None
536 .date - the commit date as a (time,tz) tuple
536 .date - the commit date as a (time,tz) tuple
537 .entries - list of logentry objects in this changeset
537 .entries - list of logentry objects in this changeset
538 .parents - list of one or two parent changesets
538 .parents - list of one or two parent changesets
539 .tags - list of tags on this changeset
539 .tags - list of tags on this changeset
540 .synthetic - from synthetic revision "file ... added on branch ..."
540 .synthetic - from synthetic revision "file ... added on branch ..."
541 .mergepoint- the branch that has been merged from or None
541 .mergepoint- the branch that has been merged from or None
542 .branchpoints- the branches that start at the current entry or empty
542 .branchpoints- the branches that start at the current entry or empty
543 '''
543 '''
544 def __init__(self, **entries):
544 def __init__(self, **entries):
545 self.id = None
545 self.id = None
546 self.synthetic = False
546 self.synthetic = False
547 self.__dict__.update(entries)
547 self.__dict__.update(entries)
548
548
549 def __repr__(self):
549 def __repr__(self):
550 items = ("%s=%r"%(k, self.__dict__[k]) for k in sorted(self.__dict__))
550 items = ("%s=%r"%(k, self.__dict__[k]) for k in sorted(self.__dict__))
551 return "%s(%s)"%(type(self).__name__, ", ".join(items))
551 return "%s(%s)"%(type(self).__name__, ", ".join(items))
552
552
553 def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None):
553 def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None):
554 '''Convert log into changesets.'''
554 '''Convert log into changesets.'''
555
555
556 ui.status(_('creating changesets\n'))
556 ui.status(_('creating changesets\n'))
557
557
558 # try to order commitids by date
558 # try to order commitids by date
559 mindate = {}
559 mindate = {}
560 for e in log:
560 for e in log:
561 if e.commitid:
561 if e.commitid:
562 mindate[e.commitid] = min(e.date, mindate.get(e.commitid))
562 mindate[e.commitid] = min(e.date, mindate.get(e.commitid))
563
563
564 # Merge changesets
564 # Merge changesets
565 log.sort(key=lambda x: (mindate.get(x.commitid), x.commitid, x.comment,
565 log.sort(key=lambda x: (mindate.get(x.commitid), x.commitid, x.comment,
566 x.author, x.branch, x.date, x.branchpoints))
566 x.author, x.branch, x.date, x.branchpoints))
567
567
568 changesets = []
568 changesets = []
569 files = set()
569 files = set()
570 c = None
570 c = None
571 for i, e in enumerate(log):
571 for i, e in enumerate(log):
572
572
573 # Check if log entry belongs to the current changeset or not.
573 # Check if log entry belongs to the current changeset or not.
574
574
575 # Since CVS is file-centric, two different file revisions with
575 # Since CVS is file-centric, two different file revisions with
576 # different branchpoints should be treated as belonging to two
576 # different branchpoints should be treated as belonging to two
577 # different changesets (and the ordering is important and not
577 # different changesets (and the ordering is important and not
578 # honoured by cvsps at this point).
578 # honoured by cvsps at this point).
579 #
579 #
580 # Consider the following case:
580 # Consider the following case:
581 # foo 1.1 branchpoints: [MYBRANCH]
581 # foo 1.1 branchpoints: [MYBRANCH]
582 # bar 1.1 branchpoints: [MYBRANCH, MYBRANCH2]
582 # bar 1.1 branchpoints: [MYBRANCH, MYBRANCH2]
583 #
583 #
584 # Here foo is part only of MYBRANCH, but not MYBRANCH2, e.g. a
584 # Here foo is part only of MYBRANCH, but not MYBRANCH2, e.g. a
585 # later version of foo may be in MYBRANCH2, so foo should be the
585 # later version of foo may be in MYBRANCH2, so foo should be the
586 # first changeset and bar the next and MYBRANCH and MYBRANCH2
586 # first changeset and bar the next and MYBRANCH and MYBRANCH2
587 # should both start off of the bar changeset. No provisions are
587 # should both start off of the bar changeset. No provisions are
588 # made to ensure that this is, in fact, what happens.
588 # made to ensure that this is, in fact, what happens.
589 if not (c and e.branchpoints == c.branchpoints and
589 if not (c and e.branchpoints == c.branchpoints and
590 (# cvs commitids
590 (# cvs commitids
591 (e.commitid is not None and e.commitid == c.commitid) or
591 (e.commitid is not None and e.commitid == c.commitid) or
592 (# no commitids, use fuzzy commit detection
592 (# no commitids, use fuzzy commit detection
593 (e.commitid is None or c.commitid is None) and
593 (e.commitid is None or c.commitid is None) and
594 e.comment == c.comment and
594 e.comment == c.comment and
595 e.author == c.author and
595 e.author == c.author and
596 e.branch == c.branch and
596 e.branch == c.branch and
597 ((c.date[0] + c.date[1]) <=
597 ((c.date[0] + c.date[1]) <=
598 (e.date[0] + e.date[1]) <=
598 (e.date[0] + e.date[1]) <=
599 (c.date[0] + c.date[1]) + fuzz) and
599 (c.date[0] + c.date[1]) + fuzz) and
600 e.file not in files))):
600 e.file not in files))):
601 c = changeset(comment=e.comment, author=e.author,
601 c = changeset(comment=e.comment, author=e.author,
602 branch=e.branch, date=e.date,
602 branch=e.branch, date=e.date,
603 entries=[], mergepoint=e.mergepoint,
603 entries=[], mergepoint=e.mergepoint,
604 branchpoints=e.branchpoints, commitid=e.commitid)
604 branchpoints=e.branchpoints, commitid=e.commitid)
605 changesets.append(c)
605 changesets.append(c)
606
606
607 files = set()
607 files = set()
608 if len(changesets) % 100 == 0:
608 if len(changesets) % 100 == 0:
609 t = '%d %s' % (len(changesets), repr(e.comment)[1:-1])
609 t = '%d %s' % (len(changesets), repr(e.comment)[1:-1])
610 ui.status(util.ellipsis(t, 80) + '\n')
610 ui.status(util.ellipsis(t, 80) + '\n')
611
611
612 c.entries.append(e)
612 c.entries.append(e)
613 files.add(e.file)
613 files.add(e.file)
614 c.date = e.date # changeset date is date of latest commit in it
614 c.date = e.date # changeset date is date of latest commit in it
615
615
616 # Mark synthetic changesets
616 # Mark synthetic changesets
617
617
618 for c in changesets:
618 for c in changesets:
619 # Synthetic revisions always get their own changeset, because
619 # Synthetic revisions always get their own changeset, because
620 # the log message includes the filename. E.g. if you add file3
620 # the log message includes the filename. E.g. if you add file3
621 # and file4 on a branch, you get four log entries and three
621 # and file4 on a branch, you get four log entries and three
622 # changesets:
622 # changesets:
623 # "File file3 was added on branch ..." (synthetic, 1 entry)
623 # "File file3 was added on branch ..." (synthetic, 1 entry)
624 # "File file4 was added on branch ..." (synthetic, 1 entry)
624 # "File file4 was added on branch ..." (synthetic, 1 entry)
625 # "Add file3 and file4 to fix ..." (real, 2 entries)
625 # "Add file3 and file4 to fix ..." (real, 2 entries)
626 # Hence the check for 1 entry here.
626 # Hence the check for 1 entry here.
627 c.synthetic = len(c.entries) == 1 and c.entries[0].synthetic
627 c.synthetic = len(c.entries) == 1 and c.entries[0].synthetic
628
628
629 # Sort files in each changeset
629 # Sort files in each changeset
630
630
631 def entitycompare(l, r):
631 def entitycompare(l, r):
632 'Mimic cvsps sorting order'
632 'Mimic cvsps sorting order'
633 l = l.file.split('/')
633 l = l.file.split('/')
634 r = r.file.split('/')
634 r = r.file.split('/')
635 nl = len(l)
635 nl = len(l)
636 nr = len(r)
636 nr = len(r)
637 n = min(nl, nr)
637 n = min(nl, nr)
638 for i in range(n):
638 for i in range(n):
639 if i + 1 == nl and nl < nr:
639 if i + 1 == nl and nl < nr:
640 return -1
640 return -1
641 elif i + 1 == nr and nl > nr:
641 elif i + 1 == nr and nl > nr:
642 return +1
642 return +1
643 elif l[i] < r[i]:
643 elif l[i] < r[i]:
644 return -1
644 return -1
645 elif l[i] > r[i]:
645 elif l[i] > r[i]:
646 return +1
646 return +1
647 return 0
647 return 0
648
648
649 for c in changesets:
649 for c in changesets:
650 c.entries.sort(entitycompare)
650 c.entries.sort(entitycompare)
651
651
652 # Sort changesets by date
652 # Sort changesets by date
653
653
654 odd = set()
654 odd = set()
655 def cscmp(l, r):
655 def cscmp(l, r):
656 d = sum(l.date) - sum(r.date)
656 d = sum(l.date) - sum(r.date)
657 if d:
657 if d:
658 return d
658 return d
659
659
660 # detect vendor branches and initial commits on a branch
660 # detect vendor branches and initial commits on a branch
661 le = {}
661 le = {}
662 for e in l.entries:
662 for e in l.entries:
663 le[e.rcs] = e.revision
663 le[e.rcs] = e.revision
664 re = {}
664 re = {}
665 for e in r.entries:
665 for e in r.entries:
666 re[e.rcs] = e.revision
666 re[e.rcs] = e.revision
667
667
668 d = 0
668 d = 0
669 for e in l.entries:
669 for e in l.entries:
670 if re.get(e.rcs, None) == e.parent:
670 if re.get(e.rcs, None) == e.parent:
671 assert not d
671 assert not d
672 d = 1
672 d = 1
673 break
673 break
674
674
675 for e in r.entries:
675 for e in r.entries:
676 if le.get(e.rcs, None) == e.parent:
676 if le.get(e.rcs, None) == e.parent:
677 if d:
677 if d:
678 odd.add((l, r))
678 odd.add((l, r))
679 d = -1
679 d = -1
680 break
680 break
681 # By this point, the changesets are sufficiently compared that
681 # By this point, the changesets are sufficiently compared that
682 # we don't really care about ordering. However, this leaves
682 # we don't really care about ordering. However, this leaves
683 # some race conditions in the tests, so we compare on the
683 # some race conditions in the tests, so we compare on the
684 # number of files modified, the files contained in each
684 # number of files modified, the files contained in each
685 # changeset, and the branchpoints in the change to ensure test
685 # changeset, and the branchpoints in the change to ensure test
686 # output remains stable.
686 # output remains stable.
687
687
688 # recommended replacement for cmp from
688 # recommended replacement for cmp from
689 # https://docs.python.org/3.0/whatsnew/3.0.html
689 # https://docs.python.org/3.0/whatsnew/3.0.html
690 c = lambda x, y: (x > y) - (x < y)
690 c = lambda x, y: (x > y) - (x < y)
691 # Sort bigger changes first.
691 # Sort bigger changes first.
692 if not d:
692 if not d:
693 d = c(len(l.entries), len(r.entries))
693 d = c(len(l.entries), len(r.entries))
694 # Try sorting by filename in the change.
694 # Try sorting by filename in the change.
695 if not d:
695 if not d:
696 d = c([e.file for e in l.entries], [e.file for e in r.entries])
696 d = c([e.file for e in l.entries], [e.file for e in r.entries])
697 # Try and put changes without a branch point before ones with
697 # Try and put changes without a branch point before ones with
698 # a branch point.
698 # a branch point.
699 if not d:
699 if not d:
700 d = c(len(l.branchpoints), len(r.branchpoints))
700 d = c(len(l.branchpoints), len(r.branchpoints))
701 return d
701 return d
702
702
703 changesets.sort(cscmp)
703 changesets.sort(cscmp)
704
704
705 # Collect tags
705 # Collect tags
706
706
707 globaltags = {}
707 globaltags = {}
708 for c in changesets:
708 for c in changesets:
709 for e in c.entries:
709 for e in c.entries:
710 for tag in e.tags:
710 for tag in e.tags:
711 # remember which is the latest changeset to have this tag
711 # remember which is the latest changeset to have this tag
712 globaltags[tag] = c
712 globaltags[tag] = c
713
713
714 for c in changesets:
714 for c in changesets:
715 tags = set()
715 tags = set()
716 for e in c.entries:
716 for e in c.entries:
717 tags.update(e.tags)
717 tags.update(e.tags)
718 # remember tags only if this is the latest changeset to have it
718 # remember tags only if this is the latest changeset to have it
719 c.tags = sorted(tag for tag in tags if globaltags[tag] is c)
719 c.tags = sorted(tag for tag in tags if globaltags[tag] is c)
720
720
721 # Find parent changesets, handle {{mergetobranch BRANCHNAME}}
721 # Find parent changesets, handle {{mergetobranch BRANCHNAME}}
722 # by inserting dummy changesets with two parents, and handle
722 # by inserting dummy changesets with two parents, and handle
723 # {{mergefrombranch BRANCHNAME}} by setting two parents.
723 # {{mergefrombranch BRANCHNAME}} by setting two parents.
724
724
725 if mergeto is None:
725 if mergeto is None:
726 mergeto = r'{{mergetobranch ([-\w]+)}}'
726 mergeto = r'{{mergetobranch ([-\w]+)}}'
727 if mergeto:
727 if mergeto:
728 mergeto = re.compile(mergeto)
728 mergeto = re.compile(mergeto)
729
729
730 if mergefrom is None:
730 if mergefrom is None:
731 mergefrom = r'{{mergefrombranch ([-\w]+)}}'
731 mergefrom = r'{{mergefrombranch ([-\w]+)}}'
732 if mergefrom:
732 if mergefrom:
733 mergefrom = re.compile(mergefrom)
733 mergefrom = re.compile(mergefrom)
734
734
735 versions = {} # changeset index where we saw any particular file version
735 versions = {} # changeset index where we saw any particular file version
736 branches = {} # changeset index where we saw a branch
736 branches = {} # changeset index where we saw a branch
737 n = len(changesets)
737 n = len(changesets)
738 i = 0
738 i = 0
739 while i < n:
739 while i < n:
740 c = changesets[i]
740 c = changesets[i]
741
741
742 for f in c.entries:
742 for f in c.entries:
743 versions[(f.rcs, f.revision)] = i
743 versions[(f.rcs, f.revision)] = i
744
744
745 p = None
745 p = None
746 if c.branch in branches:
746 if c.branch in branches:
747 p = branches[c.branch]
747 p = branches[c.branch]
748 else:
748 else:
749 # first changeset on a new branch
749 # first changeset on a new branch
750 # the parent is a changeset with the branch in its
750 # the parent is a changeset with the branch in its
751 # branchpoints such that it is the latest possible
751 # branchpoints such that it is the latest possible
752 # commit without any intervening, unrelated commits.
752 # commit without any intervening, unrelated commits.
753
753
754 for candidate in xrange(i):
754 for candidate in xrange(i):
755 if c.branch not in changesets[candidate].branchpoints:
755 if c.branch not in changesets[candidate].branchpoints:
756 if p is not None:
756 if p is not None:
757 break
757 break
758 continue
758 continue
759 p = candidate
759 p = candidate
760
760
761 c.parents = []
761 c.parents = []
762 if p is not None:
762 if p is not None:
763 p = changesets[p]
763 p = changesets[p]
764
764
765 # Ensure no changeset has a synthetic changeset as a parent.
765 # Ensure no changeset has a synthetic changeset as a parent.
766 while p.synthetic:
766 while p.synthetic:
767 assert len(p.parents) <= 1, \
767 assert len(p.parents) <= 1, \
768 _('synthetic changeset cannot have multiple parents')
768 _('synthetic changeset cannot have multiple parents')
769 if p.parents:
769 if p.parents:
770 p = p.parents[0]
770 p = p.parents[0]
771 else:
771 else:
772 p = None
772 p = None
773 break
773 break
774
774
775 if p is not None:
775 if p is not None:
776 c.parents.append(p)
776 c.parents.append(p)
777
777
778 if c.mergepoint:
778 if c.mergepoint:
779 if c.mergepoint == 'HEAD':
779 if c.mergepoint == 'HEAD':
780 c.mergepoint = None
780 c.mergepoint = None
781 c.parents.append(changesets[branches[c.mergepoint]])
781 c.parents.append(changesets[branches[c.mergepoint]])
782
782
783 if mergefrom:
783 if mergefrom:
784 m = mergefrom.search(c.comment)
784 m = mergefrom.search(c.comment)
785 if m:
785 if m:
786 m = m.group(1)
786 m = m.group(1)
787 if m == 'HEAD':
787 if m == 'HEAD':
788 m = None
788 m = None
789 try:
789 try:
790 candidate = changesets[branches[m]]
790 candidate = changesets[branches[m]]
791 except KeyError:
791 except KeyError:
792 ui.warn(_("warning: CVS commit message references "
792 ui.warn(_("warning: CVS commit message references "
793 "non-existent branch %r:\n%s\n")
793 "non-existent branch %r:\n%s\n")
794 % (m, c.comment))
794 % (m, c.comment))
795 if m in branches and c.branch != m and not candidate.synthetic:
795 if m in branches and c.branch != m and not candidate.synthetic:
796 c.parents.append(candidate)
796 c.parents.append(candidate)
797
797
798 if mergeto:
798 if mergeto:
799 m = mergeto.search(c.comment)
799 m = mergeto.search(c.comment)
800 if m:
800 if m:
801 if m.groups():
801 if m.groups():
802 m = m.group(1)
802 m = m.group(1)
803 if m == 'HEAD':
803 if m == 'HEAD':
804 m = None
804 m = None
805 else:
805 else:
806 m = None # if no group found then merge to HEAD
806 m = None # if no group found then merge to HEAD
807 if m in branches and c.branch != m:
807 if m in branches and c.branch != m:
808 # insert empty changeset for merge
808 # insert empty changeset for merge
809 cc = changeset(
809 cc = changeset(
810 author=c.author, branch=m, date=c.date,
810 author=c.author, branch=m, date=c.date,
811 comment='convert-repo: CVS merge from branch %s'
811 comment='convert-repo: CVS merge from branch %s'
812 % c.branch,
812 % c.branch,
813 entries=[], tags=[],
813 entries=[], tags=[],
814 parents=[changesets[branches[m]], c])
814 parents=[changesets[branches[m]], c])
815 changesets.insert(i + 1, cc)
815 changesets.insert(i + 1, cc)
816 branches[m] = i + 1
816 branches[m] = i + 1
817
817
818 # adjust our loop counters now we have inserted a new entry
818 # adjust our loop counters now we have inserted a new entry
819 n += 1
819 n += 1
820 i += 2
820 i += 2
821 continue
821 continue
822
822
823 branches[c.branch] = i
823 branches[c.branch] = i
824 i += 1
824 i += 1
825
825
826 # Drop synthetic changesets (safe now that we have ensured no other
826 # Drop synthetic changesets (safe now that we have ensured no other
827 # changesets can have them as parents).
827 # changesets can have them as parents).
828 i = 0
828 i = 0
829 while i < len(changesets):
829 while i < len(changesets):
830 if changesets[i].synthetic:
830 if changesets[i].synthetic:
831 del changesets[i]
831 del changesets[i]
832 else:
832 else:
833 i += 1
833 i += 1
834
834
835 # Number changesets
835 # Number changesets
836
836
837 for i, c in enumerate(changesets):
837 for i, c in enumerate(changesets):
838 c.id = i + 1
838 c.id = i + 1
839
839
840 if odd:
840 if odd:
841 for l, r in odd:
841 for l, r in odd:
842 if l.id is not None and r.id is not None:
842 if l.id is not None and r.id is not None:
843 ui.warn(_('changeset %d is both before and after %d\n')
843 ui.warn(_('changeset %d is both before and after %d\n')
844 % (l.id, r.id))
844 % (l.id, r.id))
845
845
846 ui.status(_('%d changeset entries\n') % len(changesets))
846 ui.status(_('%d changeset entries\n') % len(changesets))
847
847
848 hook.hook(ui, None, "cvschangesets", True, changesets=changesets)
848 hook.hook(ui, None, "cvschangesets", True, changesets=changesets)
849
849
850 return changesets
850 return changesets
851
851
852
852
853 def debugcvsps(ui, *args, **opts):
853 def debugcvsps(ui, *args, **opts):
854 '''Read CVS rlog for current directory or named path in
854 '''Read CVS rlog for current directory or named path in
855 repository, and convert the log to changesets based on matching
855 repository, and convert the log to changesets based on matching
856 commit log entries and dates.
856 commit log entries and dates.
857 '''
857 '''
858 opts = pycompat.byteskwargs(opts)
858 opts = pycompat.byteskwargs(opts)
859 if opts["new_cache"]:
859 if opts["new_cache"]:
860 cache = "write"
860 cache = "write"
861 elif opts["update_cache"]:
861 elif opts["update_cache"]:
862 cache = "update"
862 cache = "update"
863 else:
863 else:
864 cache = None
864 cache = None
865
865
866 revisions = opts["revisions"]
866 revisions = opts["revisions"]
867
867
868 try:
868 try:
869 if args:
869 if args:
870 log = []
870 log = []
871 for d in args:
871 for d in args:
872 log += createlog(ui, d, root=opts["root"], cache=cache)
872 log += createlog(ui, d, root=opts["root"], cache=cache)
873 else:
873 else:
874 log = createlog(ui, root=opts["root"], cache=cache)
874 log = createlog(ui, root=opts["root"], cache=cache)
875 except logerror as e:
875 except logerror as e:
876 ui.write("%r\n"%e)
876 ui.write("%r\n"%e)
877 return
877 return
878
878
879 changesets = createchangeset(ui, log, opts["fuzz"])
879 changesets = createchangeset(ui, log, opts["fuzz"])
880 del log
880 del log
881
881
882 # Print changesets (optionally filtered)
882 # Print changesets (optionally filtered)
883
883
884 off = len(revisions)
884 off = len(revisions)
885 branches = {} # latest version number in each branch
885 branches = {} # latest version number in each branch
886 ancestors = {} # parent branch
886 ancestors = {} # parent branch
887 for cs in changesets:
887 for cs in changesets:
888
888
889 if opts["ancestors"]:
889 if opts["ancestors"]:
890 if cs.branch not in branches and cs.parents and cs.parents[0].id:
890 if cs.branch not in branches and cs.parents and cs.parents[0].id:
891 ancestors[cs.branch] = (changesets[cs.parents[0].id - 1].branch,
891 ancestors[cs.branch] = (changesets[cs.parents[0].id - 1].branch,
892 cs.parents[0].id)
892 cs.parents[0].id)
893 branches[cs.branch] = cs.id
893 branches[cs.branch] = cs.id
894
894
895 # limit by branches
895 # limit by branches
896 if opts["branches"] and (cs.branch or 'HEAD') not in opts["branches"]:
896 if opts["branches"] and (cs.branch or 'HEAD') not in opts["branches"]:
897 continue
897 continue
898
898
899 if not off:
899 if not off:
900 # Note: trailing spaces on several lines here are needed to have
900 # Note: trailing spaces on several lines here are needed to have
901 # bug-for-bug compatibility with cvsps.
901 # bug-for-bug compatibility with cvsps.
902 ui.write('---------------------\n')
902 ui.write('---------------------\n')
903 ui.write(('PatchSet %d \n' % cs.id))
903 ui.write(('PatchSet %d \n' % cs.id))
904 ui.write(('Date: %s\n' % util.datestr(cs.date,
904 ui.write(('Date: %s\n' % util.datestr(cs.date,
905 '%Y/%m/%d %H:%M:%S %1%2')))
905 '%Y/%m/%d %H:%M:%S %1%2')))
906 ui.write(('Author: %s\n' % cs.author))
906 ui.write(('Author: %s\n' % cs.author))
907 ui.write(('Branch: %s\n' % (cs.branch or 'HEAD')))
907 ui.write(('Branch: %s\n' % (cs.branch or 'HEAD')))
908 ui.write(('Tag%s: %s \n' % (['', 's'][len(cs.tags) > 1],
908 ui.write(('Tag%s: %s \n' % (['', 's'][len(cs.tags) > 1],
909 ','.join(cs.tags) or '(none)')))
909 ','.join(cs.tags) or '(none)')))
910 if cs.branchpoints:
910 if cs.branchpoints:
911 ui.write(('Branchpoints: %s \n') %
911 ui.write(('Branchpoints: %s \n') %
912 ', '.join(sorted(cs.branchpoints)))
912 ', '.join(sorted(cs.branchpoints)))
913 if opts["parents"] and cs.parents:
913 if opts["parents"] and cs.parents:
914 if len(cs.parents) > 1:
914 if len(cs.parents) > 1:
915 ui.write(('Parents: %s\n' %
915 ui.write(('Parents: %s\n' %
916 (','.join([str(p.id) for p in cs.parents]))))
916 (','.join([str(p.id) for p in cs.parents]))))
917 else:
917 else:
918 ui.write(('Parent: %d\n' % cs.parents[0].id))
918 ui.write(('Parent: %d\n' % cs.parents[0].id))
919
919
920 if opts["ancestors"]:
920 if opts["ancestors"]:
921 b = cs.branch
921 b = cs.branch
922 r = []
922 r = []
923 while b:
923 while b:
924 b, c = ancestors[b]
924 b, c = ancestors[b]
925 r.append('%s:%d:%d' % (b or "HEAD", c, branches[b]))
925 r.append('%s:%d:%d' % (b or "HEAD", c, branches[b]))
926 if r:
926 if r:
927 ui.write(('Ancestors: %s\n' % (','.join(r))))
927 ui.write(('Ancestors: %s\n' % (','.join(r))))
928
928
929 ui.write(('Log:\n'))
929 ui.write(('Log:\n'))
930 ui.write('%s\n\n' % cs.comment)
930 ui.write('%s\n\n' % cs.comment)
931 ui.write(('Members: \n'))
931 ui.write(('Members: \n'))
932 for f in cs.entries:
932 for f in cs.entries:
933 fn = f.file
933 fn = f.file
934 if fn.startswith(opts["prefix"]):
934 if fn.startswith(opts["prefix"]):
935 fn = fn[len(opts["prefix"]):]
935 fn = fn[len(opts["prefix"]):]
936 ui.write('\t%s:%s->%s%s \n' % (
936 ui.write('\t%s:%s->%s%s \n' % (
937 fn, '.'.join([str(x) for x in f.parent]) or 'INITIAL',
937 fn, '.'.join([str(x) for x in f.parent]) or 'INITIAL',
938 '.'.join([str(x) for x in f.revision]),
938 '.'.join([str(x) for x in f.revision]),
939 ['', '(DEAD)'][f.dead]))
939 ['', '(DEAD)'][f.dead]))
940 ui.write('\n')
940 ui.write('\n')
941
941
942 # have we seen the start tag?
942 # have we seen the start tag?
943 if revisions and off:
943 if revisions and off:
944 if revisions[0] == str(cs.id) or \
944 if revisions[0] == str(cs.id) or \
945 revisions[0] in cs.tags:
945 revisions[0] in cs.tags:
946 off = False
946 off = False
947
947
948 # see if we reached the end tag
948 # see if we reached the end tag
949 if len(revisions) > 1 and not off:
949 if len(revisions) > 1 and not off:
950 if revisions[1] == str(cs.id) or \
950 if revisions[1] == str(cs.id) or \
951 revisions[1] in cs.tags:
951 revisions[1] in cs.tags:
952 break
952 break
@@ -1,1356 +1,1356 b''
1 # Subversion 1.4/1.5 Python API backend
1 # Subversion 1.4/1.5 Python API backend
2 #
2 #
3 # Copyright(C) 2007 Daniel Holth et al
3 # Copyright(C) 2007 Daniel Holth et al
4 from __future__ import absolute_import
4 from __future__ import absolute_import
5
5
6 import os
6 import os
7 import re
7 import re
8 import tempfile
8 import tempfile
9 import xml.dom.minidom
9 import xml.dom.minidom
10
10
11 from mercurial.i18n import _
11 from mercurial.i18n import _
12 from mercurial import (
12 from mercurial import (
13 encoding,
13 encoding,
14 error,
14 error,
15 pycompat,
15 pycompat,
16 util,
16 util,
17 vfs as vfsmod,
17 vfs as vfsmod,
18 )
18 )
19
19
20 from . import common
20 from . import common
21
21
22 pickle = util.pickle
22 pickle = util.pickle
23 stringio = util.stringio
23 stringio = util.stringio
24 propertycache = util.propertycache
24 propertycache = util.propertycache
25 urlerr = util.urlerr
25 urlerr = util.urlerr
26 urlreq = util.urlreq
26 urlreq = util.urlreq
27
27
28 commandline = common.commandline
28 commandline = common.commandline
29 commit = common.commit
29 commit = common.commit
30 converter_sink = common.converter_sink
30 converter_sink = common.converter_sink
31 converter_source = common.converter_source
31 converter_source = common.converter_source
32 decodeargs = common.decodeargs
32 decodeargs = common.decodeargs
33 encodeargs = common.encodeargs
33 encodeargs = common.encodeargs
34 makedatetimestamp = common.makedatetimestamp
34 makedatetimestamp = common.makedatetimestamp
35 mapfile = common.mapfile
35 mapfile = common.mapfile
36 MissingTool = common.MissingTool
36 MissingTool = common.MissingTool
37 NoRepo = common.NoRepo
37 NoRepo = common.NoRepo
38
38
39 # Subversion stuff. Works best with very recent Python SVN bindings
39 # Subversion stuff. Works best with very recent Python SVN bindings
40 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
40 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
41 # these bindings.
41 # these bindings.
42
42
43 try:
43 try:
44 import svn
44 import svn
45 import svn.client
45 import svn.client
46 import svn.core
46 import svn.core
47 import svn.ra
47 import svn.ra
48 import svn.delta
48 import svn.delta
49 from . import transport
49 from . import transport
50 import warnings
50 import warnings
51 warnings.filterwarnings('ignore',
51 warnings.filterwarnings('ignore',
52 module='svn.core',
52 module='svn.core',
53 category=DeprecationWarning)
53 category=DeprecationWarning)
54 svn.core.SubversionException # trigger import to catch error
54 svn.core.SubversionException # trigger import to catch error
55
55
56 except ImportError:
56 except ImportError:
57 svn = None
57 svn = None
58
58
59 class SvnPathNotFound(Exception):
59 class SvnPathNotFound(Exception):
60 pass
60 pass
61
61
62 def revsplit(rev):
62 def revsplit(rev):
63 """Parse a revision string and return (uuid, path, revnum).
63 """Parse a revision string and return (uuid, path, revnum).
64 >>> revsplit(b'svn:a2147622-4a9f-4db4-a8d3-13562ff547b2'
64 >>> revsplit(b'svn:a2147622-4a9f-4db4-a8d3-13562ff547b2'
65 ... b'/proj%20B/mytrunk/mytrunk@1')
65 ... b'/proj%20B/mytrunk/mytrunk@1')
66 ('a2147622-4a9f-4db4-a8d3-13562ff547b2', '/proj%20B/mytrunk/mytrunk', 1)
66 ('a2147622-4a9f-4db4-a8d3-13562ff547b2', '/proj%20B/mytrunk/mytrunk', 1)
67 >>> revsplit(b'svn:8af66a51-67f5-4354-b62c-98d67cc7be1d@1')
67 >>> revsplit(b'svn:8af66a51-67f5-4354-b62c-98d67cc7be1d@1')
68 ('', '', 1)
68 ('', '', 1)
69 >>> revsplit(b'@7')
69 >>> revsplit(b'@7')
70 ('', '', 7)
70 ('', '', 7)
71 >>> revsplit(b'7')
71 >>> revsplit(b'7')
72 ('', '', 0)
72 ('', '', 0)
73 >>> revsplit(b'bad')
73 >>> revsplit(b'bad')
74 ('', '', 0)
74 ('', '', 0)
75 """
75 """
76 parts = rev.rsplit('@', 1)
76 parts = rev.rsplit('@', 1)
77 revnum = 0
77 revnum = 0
78 if len(parts) > 1:
78 if len(parts) > 1:
79 revnum = int(parts[1])
79 revnum = int(parts[1])
80 parts = parts[0].split('/', 1)
80 parts = parts[0].split('/', 1)
81 uuid = ''
81 uuid = ''
82 mod = ''
82 mod = ''
83 if len(parts) > 1 and parts[0].startswith('svn:'):
83 if len(parts) > 1 and parts[0].startswith('svn:'):
84 uuid = parts[0][4:]
84 uuid = parts[0][4:]
85 mod = '/' + parts[1]
85 mod = '/' + parts[1]
86 return uuid, mod, revnum
86 return uuid, mod, revnum
87
87
88 def quote(s):
88 def quote(s):
89 # As of svn 1.7, many svn calls expect "canonical" paths. In
89 # As of svn 1.7, many svn calls expect "canonical" paths. In
90 # theory, we should call svn.core.*canonicalize() on all paths
90 # theory, we should call svn.core.*canonicalize() on all paths
91 # before passing them to the API. Instead, we assume the base url
91 # before passing them to the API. Instead, we assume the base url
92 # is canonical and copy the behaviour of svn URL encoding function
92 # is canonical and copy the behaviour of svn URL encoding function
93 # so we can extend it safely with new components. The "safe"
93 # so we can extend it safely with new components. The "safe"
94 # characters were taken from the "svn_uri__char_validity" table in
94 # characters were taken from the "svn_uri__char_validity" table in
95 # libsvn_subr/path.c.
95 # libsvn_subr/path.c.
96 return urlreq.quote(s, "!$&'()*+,-./:=@_~")
96 return urlreq.quote(s, "!$&'()*+,-./:=@_~")
97
97
98 def geturl(path):
98 def geturl(path):
99 try:
99 try:
100 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
100 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
101 except svn.core.SubversionException:
101 except svn.core.SubversionException:
102 # svn.client.url_from_path() fails with local repositories
102 # svn.client.url_from_path() fails with local repositories
103 pass
103 pass
104 if os.path.isdir(path):
104 if os.path.isdir(path):
105 path = os.path.normpath(os.path.abspath(path))
105 path = os.path.normpath(os.path.abspath(path))
106 if pycompat.iswindows:
106 if pycompat.iswindows:
107 path = '/' + util.normpath(path)
107 path = '/' + util.normpath(path)
108 # Module URL is later compared with the repository URL returned
108 # Module URL is later compared with the repository URL returned
109 # by svn API, which is UTF-8.
109 # by svn API, which is UTF-8.
110 path = encoding.tolocal(path)
110 path = encoding.tolocal(path)
111 path = 'file://%s' % quote(path)
111 path = 'file://%s' % quote(path)
112 return svn.core.svn_path_canonicalize(path)
112 return svn.core.svn_path_canonicalize(path)
113
113
114 def optrev(number):
114 def optrev(number):
115 optrev = svn.core.svn_opt_revision_t()
115 optrev = svn.core.svn_opt_revision_t()
116 optrev.kind = svn.core.svn_opt_revision_number
116 optrev.kind = svn.core.svn_opt_revision_number
117 optrev.value.number = number
117 optrev.value.number = number
118 return optrev
118 return optrev
119
119
120 class changedpath(object):
120 class changedpath(object):
121 def __init__(self, p):
121 def __init__(self, p):
122 self.copyfrom_path = p.copyfrom_path
122 self.copyfrom_path = p.copyfrom_path
123 self.copyfrom_rev = p.copyfrom_rev
123 self.copyfrom_rev = p.copyfrom_rev
124 self.action = p.action
124 self.action = p.action
125
125
126 def get_log_child(fp, url, paths, start, end, limit=0,
126 def get_log_child(fp, url, paths, start, end, limit=0,
127 discover_changed_paths=True, strict_node_history=False):
127 discover_changed_paths=True, strict_node_history=False):
128 protocol = -1
128 protocol = -1
129 def receiver(orig_paths, revnum, author, date, message, pool):
129 def receiver(orig_paths, revnum, author, date, message, pool):
130 paths = {}
130 paths = {}
131 if orig_paths is not None:
131 if orig_paths is not None:
132 for k, v in orig_paths.iteritems():
132 for k, v in orig_paths.iteritems():
133 paths[k] = changedpath(v)
133 paths[k] = changedpath(v)
134 pickle.dump((paths, revnum, author, date, message),
134 pickle.dump((paths, revnum, author, date, message),
135 fp, protocol)
135 fp, protocol)
136
136
137 try:
137 try:
138 # Use an ra of our own so that our parent can consume
138 # Use an ra of our own so that our parent can consume
139 # our results without confusing the server.
139 # our results without confusing the server.
140 t = transport.SvnRaTransport(url=url)
140 t = transport.SvnRaTransport(url=url)
141 svn.ra.get_log(t.ra, paths, start, end, limit,
141 svn.ra.get_log(t.ra, paths, start, end, limit,
142 discover_changed_paths,
142 discover_changed_paths,
143 strict_node_history,
143 strict_node_history,
144 receiver)
144 receiver)
145 except IOError:
145 except IOError:
146 # Caller may interrupt the iteration
146 # Caller may interrupt the iteration
147 pickle.dump(None, fp, protocol)
147 pickle.dump(None, fp, protocol)
148 except Exception as inst:
148 except Exception as inst:
149 pickle.dump(str(inst), fp, protocol)
149 pickle.dump(str(inst), fp, protocol)
150 else:
150 else:
151 pickle.dump(None, fp, protocol)
151 pickle.dump(None, fp, protocol)
152 fp.close()
152 fp.close()
153 # With large history, cleanup process goes crazy and suddenly
153 # With large history, cleanup process goes crazy and suddenly
154 # consumes *huge* amount of memory. The output file being closed,
154 # consumes *huge* amount of memory. The output file being closed,
155 # there is no need for clean termination.
155 # there is no need for clean termination.
156 os._exit(0)
156 os._exit(0)
157
157
158 def debugsvnlog(ui, **opts):
158 def debugsvnlog(ui, **opts):
159 """Fetch SVN log in a subprocess and channel them back to parent to
159 """Fetch SVN log in a subprocess and channel them back to parent to
160 avoid memory collection issues.
160 avoid memory collection issues.
161 """
161 """
162 if svn is None:
162 if svn is None:
163 raise error.Abort(_('debugsvnlog could not load Subversion python '
163 raise error.Abort(_('debugsvnlog could not load Subversion python '
164 'bindings'))
164 'bindings'))
165
165
166 args = decodeargs(ui.fin.read())
166 args = decodeargs(ui.fin.read())
167 get_log_child(ui.fout, *args)
167 get_log_child(ui.fout, *args)
168
168
169 class logstream(object):
169 class logstream(object):
170 """Interruptible revision log iterator."""
170 """Interruptible revision log iterator."""
171 def __init__(self, stdout):
171 def __init__(self, stdout):
172 self._stdout = stdout
172 self._stdout = stdout
173
173
174 def __iter__(self):
174 def __iter__(self):
175 while True:
175 while True:
176 try:
176 try:
177 entry = pickle.load(self._stdout)
177 entry = pickle.load(self._stdout)
178 except EOFError:
178 except EOFError:
179 raise error.Abort(_('Mercurial failed to run itself, check'
179 raise error.Abort(_('Mercurial failed to run itself, check'
180 ' hg executable is in PATH'))
180 ' hg executable is in PATH'))
181 try:
181 try:
182 orig_paths, revnum, author, date, message = entry
182 orig_paths, revnum, author, date, message = entry
183 except (TypeError, ValueError):
183 except (TypeError, ValueError):
184 if entry is None:
184 if entry is None:
185 break
185 break
186 raise error.Abort(_("log stream exception '%s'") % entry)
186 raise error.Abort(_("log stream exception '%s'") % entry)
187 yield entry
187 yield entry
188
188
189 def close(self):
189 def close(self):
190 if self._stdout:
190 if self._stdout:
191 self._stdout.close()
191 self._stdout.close()
192 self._stdout = None
192 self._stdout = None
193
193
194 class directlogstream(list):
194 class directlogstream(list):
195 """Direct revision log iterator.
195 """Direct revision log iterator.
196 This can be used for debugging and development but it will probably leak
196 This can be used for debugging and development but it will probably leak
197 memory and is not suitable for real conversions."""
197 memory and is not suitable for real conversions."""
198 def __init__(self, url, paths, start, end, limit=0,
198 def __init__(self, url, paths, start, end, limit=0,
199 discover_changed_paths=True, strict_node_history=False):
199 discover_changed_paths=True, strict_node_history=False):
200
200
201 def receiver(orig_paths, revnum, author, date, message, pool):
201 def receiver(orig_paths, revnum, author, date, message, pool):
202 paths = {}
202 paths = {}
203 if orig_paths is not None:
203 if orig_paths is not None:
204 for k, v in orig_paths.iteritems():
204 for k, v in orig_paths.iteritems():
205 paths[k] = changedpath(v)
205 paths[k] = changedpath(v)
206 self.append((paths, revnum, author, date, message))
206 self.append((paths, revnum, author, date, message))
207
207
208 # Use an ra of our own so that our parent can consume
208 # Use an ra of our own so that our parent can consume
209 # our results without confusing the server.
209 # our results without confusing the server.
210 t = transport.SvnRaTransport(url=url)
210 t = transport.SvnRaTransport(url=url)
211 svn.ra.get_log(t.ra, paths, start, end, limit,
211 svn.ra.get_log(t.ra, paths, start, end, limit,
212 discover_changed_paths,
212 discover_changed_paths,
213 strict_node_history,
213 strict_node_history,
214 receiver)
214 receiver)
215
215
216 def close(self):
216 def close(self):
217 pass
217 pass
218
218
219 # Check to see if the given path is a local Subversion repo. Verify this by
219 # Check to see if the given path is a local Subversion repo. Verify this by
220 # looking for several svn-specific files and directories in the given
220 # looking for several svn-specific files and directories in the given
221 # directory.
221 # directory.
222 def filecheck(ui, path, proto):
222 def filecheck(ui, path, proto):
223 for x in ('locks', 'hooks', 'format', 'db'):
223 for x in ('locks', 'hooks', 'format', 'db'):
224 if not os.path.exists(os.path.join(path, x)):
224 if not os.path.exists(os.path.join(path, x)):
225 return False
225 return False
226 return True
226 return True
227
227
228 # Check to see if a given path is the root of an svn repo over http. We verify
228 # Check to see if a given path is the root of an svn repo over http. We verify
229 # this by requesting a version-controlled URL we know can't exist and looking
229 # this by requesting a version-controlled URL we know can't exist and looking
230 # for the svn-specific "not found" XML.
230 # for the svn-specific "not found" XML.
231 def httpcheck(ui, path, proto):
231 def httpcheck(ui, path, proto):
232 try:
232 try:
233 opener = urlreq.buildopener()
233 opener = urlreq.buildopener()
234 rsp = opener.open('%s://%s/!svn/ver/0/.svn' % (proto, path), 'rb')
234 rsp = opener.open('%s://%s/!svn/ver/0/.svn' % (proto, path), 'rb')
235 data = rsp.read()
235 data = rsp.read()
236 except urlerr.httperror as inst:
236 except urlerr.httperror as inst:
237 if inst.code != 404:
237 if inst.code != 404:
238 # Except for 404 we cannot know for sure this is not an svn repo
238 # Except for 404 we cannot know for sure this is not an svn repo
239 ui.warn(_('svn: cannot probe remote repository, assume it could '
239 ui.warn(_('svn: cannot probe remote repository, assume it could '
240 'be a subversion repository. Use --source-type if you '
240 'be a subversion repository. Use --source-type if you '
241 'know better.\n'))
241 'know better.\n'))
242 return True
242 return True
243 data = inst.fp.read()
243 data = inst.fp.read()
244 except Exception:
244 except Exception:
245 # Could be urlerr.urlerror if the URL is invalid or anything else.
245 # Could be urlerr.urlerror if the URL is invalid or anything else.
246 return False
246 return False
247 return '<m:human-readable errcode="160013">' in data
247 return '<m:human-readable errcode="160013">' in data
248
248
249 protomap = {'http': httpcheck,
249 protomap = {'http': httpcheck,
250 'https': httpcheck,
250 'https': httpcheck,
251 'file': filecheck,
251 'file': filecheck,
252 }
252 }
253 def issvnurl(ui, url):
253 def issvnurl(ui, url):
254 try:
254 try:
255 proto, path = url.split('://', 1)
255 proto, path = url.split('://', 1)
256 if proto == 'file':
256 if proto == 'file':
257 if (pycompat.iswindows and path[:1] == '/'
257 if (pycompat.iswindows and path[:1] == '/'
258 and path[1:2].isalpha() and path[2:6].lower() == '%3a/'):
258 and path[1:2].isalpha() and path[2:6].lower() == '%3a/'):
259 path = path[:2] + ':/' + path[6:]
259 path = path[:2] + ':/' + path[6:]
260 path = urlreq.url2pathname(path)
260 path = urlreq.url2pathname(path)
261 except ValueError:
261 except ValueError:
262 proto = 'file'
262 proto = 'file'
263 path = os.path.abspath(url)
263 path = os.path.abspath(url)
264 if proto == 'file':
264 if proto == 'file':
265 path = util.pconvert(path)
265 path = util.pconvert(path)
266 check = protomap.get(proto, lambda *args: False)
266 check = protomap.get(proto, lambda *args: False)
267 while '/' in path:
267 while '/' in path:
268 if check(ui, path, proto):
268 if check(ui, path, proto):
269 return True
269 return True
270 path = path.rsplit('/', 1)[0]
270 path = path.rsplit('/', 1)[0]
271 return False
271 return False
272
272
273 # SVN conversion code stolen from bzr-svn and tailor
273 # SVN conversion code stolen from bzr-svn and tailor
274 #
274 #
275 # Subversion looks like a versioned filesystem, branches structures
275 # Subversion looks like a versioned filesystem, branches structures
276 # are defined by conventions and not enforced by the tool. First,
276 # are defined by conventions and not enforced by the tool. First,
277 # we define the potential branches (modules) as "trunk" and "branches"
277 # we define the potential branches (modules) as "trunk" and "branches"
278 # children directories. Revisions are then identified by their
278 # children directories. Revisions are then identified by their
279 # module and revision number (and a repository identifier).
279 # module and revision number (and a repository identifier).
280 #
280 #
281 # The revision graph is really a tree (or a forest). By default, a
281 # The revision graph is really a tree (or a forest). By default, a
282 # revision parent is the previous revision in the same module. If the
282 # revision parent is the previous revision in the same module. If the
283 # module directory is copied/moved from another module then the
283 # module directory is copied/moved from another module then the
284 # revision is the module root and its parent the source revision in
284 # revision is the module root and its parent the source revision in
285 # the parent module. A revision has at most one parent.
285 # the parent module. A revision has at most one parent.
286 #
286 #
287 class svn_source(converter_source):
287 class svn_source(converter_source):
288 def __init__(self, ui, repotype, url, revs=None):
288 def __init__(self, ui, repotype, url, revs=None):
289 super(svn_source, self).__init__(ui, repotype, url, revs=revs)
289 super(svn_source, self).__init__(ui, repotype, url, revs=revs)
290
290
291 if not (url.startswith('svn://') or url.startswith('svn+ssh://') or
291 if not (url.startswith('svn://') or url.startswith('svn+ssh://') or
292 (os.path.exists(url) and
292 (os.path.exists(url) and
293 os.path.exists(os.path.join(url, '.svn'))) or
293 os.path.exists(os.path.join(url, '.svn'))) or
294 issvnurl(ui, url)):
294 issvnurl(ui, url)):
295 raise NoRepo(_("%s does not look like a Subversion repository")
295 raise NoRepo(_("%s does not look like a Subversion repository")
296 % url)
296 % url)
297 if svn is None:
297 if svn is None:
298 raise MissingTool(_('could not load Subversion python bindings'))
298 raise MissingTool(_('could not load Subversion python bindings'))
299
299
300 try:
300 try:
301 version = svn.core.SVN_VER_MAJOR, svn.core.SVN_VER_MINOR
301 version = svn.core.SVN_VER_MAJOR, svn.core.SVN_VER_MINOR
302 if version < (1, 4):
302 if version < (1, 4):
303 raise MissingTool(_('Subversion python bindings %d.%d found, '
303 raise MissingTool(_('Subversion python bindings %d.%d found, '
304 '1.4 or later required') % version)
304 '1.4 or later required') % version)
305 except AttributeError:
305 except AttributeError:
306 raise MissingTool(_('Subversion python bindings are too old, 1.4 '
306 raise MissingTool(_('Subversion python bindings are too old, 1.4 '
307 'or later required'))
307 'or later required'))
308
308
309 self.lastrevs = {}
309 self.lastrevs = {}
310
310
311 latest = None
311 latest = None
312 try:
312 try:
313 # Support file://path@rev syntax. Useful e.g. to convert
313 # Support file://path@rev syntax. Useful e.g. to convert
314 # deleted branches.
314 # deleted branches.
315 at = url.rfind('@')
315 at = url.rfind('@')
316 if at >= 0:
316 if at >= 0:
317 latest = int(url[at + 1:])
317 latest = int(url[at + 1:])
318 url = url[:at]
318 url = url[:at]
319 except ValueError:
319 except ValueError:
320 pass
320 pass
321 self.url = geturl(url)
321 self.url = geturl(url)
322 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
322 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
323 try:
323 try:
324 self.transport = transport.SvnRaTransport(url=self.url)
324 self.transport = transport.SvnRaTransport(url=self.url)
325 self.ra = self.transport.ra
325 self.ra = self.transport.ra
326 self.ctx = self.transport.client
326 self.ctx = self.transport.client
327 self.baseurl = svn.ra.get_repos_root(self.ra)
327 self.baseurl = svn.ra.get_repos_root(self.ra)
328 # Module is either empty or a repository path starting with
328 # Module is either empty or a repository path starting with
329 # a slash and not ending with a slash.
329 # a slash and not ending with a slash.
330 self.module = urlreq.unquote(self.url[len(self.baseurl):])
330 self.module = urlreq.unquote(self.url[len(self.baseurl):])
331 self.prevmodule = None
331 self.prevmodule = None
332 self.rootmodule = self.module
332 self.rootmodule = self.module
333 self.commits = {}
333 self.commits = {}
334 self.paths = {}
334 self.paths = {}
335 self.uuid = svn.ra.get_uuid(self.ra)
335 self.uuid = svn.ra.get_uuid(self.ra)
336 except svn.core.SubversionException:
336 except svn.core.SubversionException:
337 ui.traceback()
337 ui.traceback()
338 svnversion = '%d.%d.%d' % (svn.core.SVN_VER_MAJOR,
338 svnversion = '%d.%d.%d' % (svn.core.SVN_VER_MAJOR,
339 svn.core.SVN_VER_MINOR,
339 svn.core.SVN_VER_MINOR,
340 svn.core.SVN_VER_MICRO)
340 svn.core.SVN_VER_MICRO)
341 raise NoRepo(_("%s does not look like a Subversion repository "
341 raise NoRepo(_("%s does not look like a Subversion repository "
342 "to libsvn version %s")
342 "to libsvn version %s")
343 % (self.url, svnversion))
343 % (self.url, svnversion))
344
344
345 if revs:
345 if revs:
346 if len(revs) > 1:
346 if len(revs) > 1:
347 raise error.Abort(_('subversion source does not support '
347 raise error.Abort(_('subversion source does not support '
348 'specifying multiple revisions'))
348 'specifying multiple revisions'))
349 try:
349 try:
350 latest = int(revs[0])
350 latest = int(revs[0])
351 except ValueError:
351 except ValueError:
352 raise error.Abort(_('svn: revision %s is not an integer') %
352 raise error.Abort(_('svn: revision %s is not an integer') %
353 revs[0])
353 revs[0])
354
354
355 trunkcfg = self.ui.config('convert', 'svn.trunk')
355 trunkcfg = self.ui.config('convert', 'svn.trunk')
356 if trunkcfg is None:
356 if trunkcfg is None:
357 trunkcfg = 'trunk'
357 trunkcfg = 'trunk'
358 self.trunkname = trunkcfg.strip('/')
358 self.trunkname = trunkcfg.strip('/')
359 self.startrev = self.ui.config('convert', 'svn.startrev')
359 self.startrev = self.ui.config('convert', 'svn.startrev')
360 try:
360 try:
361 self.startrev = int(self.startrev)
361 self.startrev = int(self.startrev)
362 if self.startrev < 0:
362 if self.startrev < 0:
363 self.startrev = 0
363 self.startrev = 0
364 except ValueError:
364 except ValueError:
365 raise error.Abort(_('svn: start revision %s is not an integer')
365 raise error.Abort(_('svn: start revision %s is not an integer')
366 % self.startrev)
366 % self.startrev)
367
367
368 try:
368 try:
369 self.head = self.latest(self.module, latest)
369 self.head = self.latest(self.module, latest)
370 except SvnPathNotFound:
370 except SvnPathNotFound:
371 self.head = None
371 self.head = None
372 if not self.head:
372 if not self.head:
373 raise error.Abort(_('no revision found in module %s')
373 raise error.Abort(_('no revision found in module %s')
374 % self.module)
374 % self.module)
375 self.last_changed = self.revnum(self.head)
375 self.last_changed = self.revnum(self.head)
376
376
377 self._changescache = (None, None)
377 self._changescache = (None, None)
378
378
379 if os.path.exists(os.path.join(url, '.svn/entries')):
379 if os.path.exists(os.path.join(url, '.svn/entries')):
380 self.wc = url
380 self.wc = url
381 else:
381 else:
382 self.wc = None
382 self.wc = None
383 self.convertfp = None
383 self.convertfp = None
384
384
385 def setrevmap(self, revmap):
385 def setrevmap(self, revmap):
386 lastrevs = {}
386 lastrevs = {}
387 for revid in revmap:
387 for revid in revmap:
388 uuid, module, revnum = revsplit(revid)
388 uuid, module, revnum = revsplit(revid)
389 lastrevnum = lastrevs.setdefault(module, revnum)
389 lastrevnum = lastrevs.setdefault(module, revnum)
390 if revnum > lastrevnum:
390 if revnum > lastrevnum:
391 lastrevs[module] = revnum
391 lastrevs[module] = revnum
392 self.lastrevs = lastrevs
392 self.lastrevs = lastrevs
393
393
394 def exists(self, path, optrev):
394 def exists(self, path, optrev):
395 try:
395 try:
396 svn.client.ls(self.url.rstrip('/') + '/' + quote(path),
396 svn.client.ls(self.url.rstrip('/') + '/' + quote(path),
397 optrev, False, self.ctx)
397 optrev, False, self.ctx)
398 return True
398 return True
399 except svn.core.SubversionException:
399 except svn.core.SubversionException:
400 return False
400 return False
401
401
402 def getheads(self):
402 def getheads(self):
403
403
404 def isdir(path, revnum):
404 def isdir(path, revnum):
405 kind = self._checkpath(path, revnum)
405 kind = self._checkpath(path, revnum)
406 return kind == svn.core.svn_node_dir
406 return kind == svn.core.svn_node_dir
407
407
408 def getcfgpath(name, rev):
408 def getcfgpath(name, rev):
409 cfgpath = self.ui.config('convert', 'svn.' + name)
409 cfgpath = self.ui.config('convert', 'svn.' + name)
410 if cfgpath is not None and cfgpath.strip() == '':
410 if cfgpath is not None and cfgpath.strip() == '':
411 return None
411 return None
412 path = (cfgpath or name).strip('/')
412 path = (cfgpath or name).strip('/')
413 if not self.exists(path, rev):
413 if not self.exists(path, rev):
414 if self.module.endswith(path) and name == 'trunk':
414 if self.module.endswith(path) and name == 'trunk':
415 # we are converting from inside this directory
415 # we are converting from inside this directory
416 return None
416 return None
417 if cfgpath:
417 if cfgpath:
418 raise error.Abort(_('expected %s to be at %r, but not found'
418 raise error.Abort(_('expected %s to be at %r, but not found'
419 ) % (name, path))
419 ) % (name, path))
420 return None
420 return None
421 self.ui.note(_('found %s at %r\n') % (name, path))
421 self.ui.note(_('found %s at %r\n') % (name, path))
422 return path
422 return path
423
423
424 rev = optrev(self.last_changed)
424 rev = optrev(self.last_changed)
425 oldmodule = ''
425 oldmodule = ''
426 trunk = getcfgpath('trunk', rev)
426 trunk = getcfgpath('trunk', rev)
427 self.tags = getcfgpath('tags', rev)
427 self.tags = getcfgpath('tags', rev)
428 branches = getcfgpath('branches', rev)
428 branches = getcfgpath('branches', rev)
429
429
430 # If the project has a trunk or branches, we will extract heads
430 # If the project has a trunk or branches, we will extract heads
431 # from them. We keep the project root otherwise.
431 # from them. We keep the project root otherwise.
432 if trunk:
432 if trunk:
433 oldmodule = self.module or ''
433 oldmodule = self.module or ''
434 self.module += '/' + trunk
434 self.module += '/' + trunk
435 self.head = self.latest(self.module, self.last_changed)
435 self.head = self.latest(self.module, self.last_changed)
436 if not self.head:
436 if not self.head:
437 raise error.Abort(_('no revision found in module %s')
437 raise error.Abort(_('no revision found in module %s')
438 % self.module)
438 % self.module)
439
439
440 # First head in the list is the module's head
440 # First head in the list is the module's head
441 self.heads = [self.head]
441 self.heads = [self.head]
442 if self.tags is not None:
442 if self.tags is not None:
443 self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
443 self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
444
444
445 # Check if branches bring a few more heads to the list
445 # Check if branches bring a few more heads to the list
446 if branches:
446 if branches:
447 rpath = self.url.strip('/')
447 rpath = self.url.strip('/')
448 branchnames = svn.client.ls(rpath + '/' + quote(branches),
448 branchnames = svn.client.ls(rpath + '/' + quote(branches),
449 rev, False, self.ctx)
449 rev, False, self.ctx)
450 for branch in sorted(branchnames):
450 for branch in sorted(branchnames):
451 module = '%s/%s/%s' % (oldmodule, branches, branch)
451 module = '%s/%s/%s' % (oldmodule, branches, branch)
452 if not isdir(module, self.last_changed):
452 if not isdir(module, self.last_changed):
453 continue
453 continue
454 brevid = self.latest(module, self.last_changed)
454 brevid = self.latest(module, self.last_changed)
455 if not brevid:
455 if not brevid:
456 self.ui.note(_('ignoring empty branch %s\n') % branch)
456 self.ui.note(_('ignoring empty branch %s\n') % branch)
457 continue
457 continue
458 self.ui.note(_('found branch %s at %d\n') %
458 self.ui.note(_('found branch %s at %d\n') %
459 (branch, self.revnum(brevid)))
459 (branch, self.revnum(brevid)))
460 self.heads.append(brevid)
460 self.heads.append(brevid)
461
461
462 if self.startrev and self.heads:
462 if self.startrev and self.heads:
463 if len(self.heads) > 1:
463 if len(self.heads) > 1:
464 raise error.Abort(_('svn: start revision is not supported '
464 raise error.Abort(_('svn: start revision is not supported '
465 'with more than one branch'))
465 'with more than one branch'))
466 revnum = self.revnum(self.heads[0])
466 revnum = self.revnum(self.heads[0])
467 if revnum < self.startrev:
467 if revnum < self.startrev:
468 raise error.Abort(
468 raise error.Abort(
469 _('svn: no revision found after start revision %d')
469 _('svn: no revision found after start revision %d')
470 % self.startrev)
470 % self.startrev)
471
471
472 return self.heads
472 return self.heads
473
473
474 def _getchanges(self, rev, full):
474 def _getchanges(self, rev, full):
475 (paths, parents) = self.paths[rev]
475 (paths, parents) = self.paths[rev]
476 copies = {}
476 copies = {}
477 if parents:
477 if parents:
478 files, self.removed, copies = self.expandpaths(rev, paths, parents)
478 files, self.removed, copies = self.expandpaths(rev, paths, parents)
479 if full or not parents:
479 if full or not parents:
480 # Perform a full checkout on roots
480 # Perform a full checkout on roots
481 uuid, module, revnum = revsplit(rev)
481 uuid, module, revnum = revsplit(rev)
482 entries = svn.client.ls(self.baseurl + quote(module),
482 entries = svn.client.ls(self.baseurl + quote(module),
483 optrev(revnum), True, self.ctx)
483 optrev(revnum), True, self.ctx)
484 files = [n for n, e in entries.iteritems()
484 files = [n for n, e in entries.iteritems()
485 if e.kind == svn.core.svn_node_file]
485 if e.kind == svn.core.svn_node_file]
486 self.removed = set()
486 self.removed = set()
487
487
488 files.sort()
488 files.sort()
489 files = zip(files, [rev] * len(files))
489 files = zip(files, [rev] * len(files))
490 return (files, copies)
490 return (files, copies)
491
491
492 def getchanges(self, rev, full):
492 def getchanges(self, rev, full):
493 # reuse cache from getchangedfiles
493 # reuse cache from getchangedfiles
494 if self._changescache[0] == rev and not full:
494 if self._changescache[0] == rev and not full:
495 (files, copies) = self._changescache[1]
495 (files, copies) = self._changescache[1]
496 else:
496 else:
497 (files, copies) = self._getchanges(rev, full)
497 (files, copies) = self._getchanges(rev, full)
498 # caller caches the result, so free it here to release memory
498 # caller caches the result, so free it here to release memory
499 del self.paths[rev]
499 del self.paths[rev]
500 return (files, copies, set())
500 return (files, copies, set())
501
501
502 def getchangedfiles(self, rev, i):
502 def getchangedfiles(self, rev, i):
503 # called from filemap - cache computed values for reuse in getchanges
503 # called from filemap - cache computed values for reuse in getchanges
504 (files, copies) = self._getchanges(rev, False)
504 (files, copies) = self._getchanges(rev, False)
505 self._changescache = (rev, (files, copies))
505 self._changescache = (rev, (files, copies))
506 return [f[0] for f in files]
506 return [f[0] for f in files]
507
507
508 def getcommit(self, rev):
508 def getcommit(self, rev):
509 if rev not in self.commits:
509 if rev not in self.commits:
510 uuid, module, revnum = revsplit(rev)
510 uuid, module, revnum = revsplit(rev)
511 self.module = module
511 self.module = module
512 self.reparent(module)
512 self.reparent(module)
513 # We assume that:
513 # We assume that:
514 # - requests for revisions after "stop" come from the
514 # - requests for revisions after "stop" come from the
515 # revision graph backward traversal. Cache all of them
515 # revision graph backward traversal. Cache all of them
516 # down to stop, they will be used eventually.
516 # down to stop, they will be used eventually.
517 # - requests for revisions before "stop" come to get
517 # - requests for revisions before "stop" come to get
518 # isolated branches parents. Just fetch what is needed.
518 # isolated branches parents. Just fetch what is needed.
519 stop = self.lastrevs.get(module, 0)
519 stop = self.lastrevs.get(module, 0)
520 if revnum < stop:
520 if revnum < stop:
521 stop = revnum + 1
521 stop = revnum + 1
522 self._fetch_revisions(revnum, stop)
522 self._fetch_revisions(revnum, stop)
523 if rev not in self.commits:
523 if rev not in self.commits:
524 raise error.Abort(_('svn: revision %s not found') % revnum)
524 raise error.Abort(_('svn: revision %s not found') % revnum)
525 revcommit = self.commits[rev]
525 revcommit = self.commits[rev]
526 # caller caches the result, so free it here to release memory
526 # caller caches the result, so free it here to release memory
527 del self.commits[rev]
527 del self.commits[rev]
528 return revcommit
528 return revcommit
529
529
530 def checkrevformat(self, revstr, mapname='splicemap'):
530 def checkrevformat(self, revstr, mapname='splicemap'):
531 """ fails if revision format does not match the correct format"""
531 """ fails if revision format does not match the correct format"""
532 if not re.match(r'svn:[0-9a-f]{8,8}-[0-9a-f]{4,4}-'
532 if not re.match(r'svn:[0-9a-f]{8,8}-[0-9a-f]{4,4}-'
533 r'[0-9a-f]{4,4}-[0-9a-f]{4,4}-[0-9a-f]'
533 r'[0-9a-f]{4,4}-[0-9a-f]{4,4}-[0-9a-f]'
534 r'{12,12}(.*)\@[0-9]+$',revstr):
534 r'{12,12}(.*)\@[0-9]+$',revstr):
535 raise error.Abort(_('%s entry %s is not a valid revision'
535 raise error.Abort(_('%s entry %s is not a valid revision'
536 ' identifier') % (mapname, revstr))
536 ' identifier') % (mapname, revstr))
537
537
538 def numcommits(self):
538 def numcommits(self):
539 return int(self.head.rsplit('@', 1)[1]) - self.startrev
539 return int(self.head.rsplit('@', 1)[1]) - self.startrev
540
540
541 def gettags(self):
541 def gettags(self):
542 tags = {}
542 tags = {}
543 if self.tags is None:
543 if self.tags is None:
544 return tags
544 return tags
545
545
546 # svn tags are just a convention, project branches left in a
546 # svn tags are just a convention, project branches left in a
547 # 'tags' directory. There is no other relationship than
547 # 'tags' directory. There is no other relationship than
548 # ancestry, which is expensive to discover and makes them hard
548 # ancestry, which is expensive to discover and makes them hard
549 # to update incrementally. Worse, past revisions may be
549 # to update incrementally. Worse, past revisions may be
550 # referenced by tags far away in the future, requiring a deep
550 # referenced by tags far away in the future, requiring a deep
551 # history traversal on every calculation. Current code
551 # history traversal on every calculation. Current code
552 # performs a single backward traversal, tracking moves within
552 # performs a single backward traversal, tracking moves within
553 # the tags directory (tag renaming) and recording a new tag
553 # the tags directory (tag renaming) and recording a new tag
554 # everytime a project is copied from outside the tags
554 # everytime a project is copied from outside the tags
555 # directory. It also lists deleted tags, this behaviour may
555 # directory. It also lists deleted tags, this behaviour may
556 # change in the future.
556 # change in the future.
557 pendings = []
557 pendings = []
558 tagspath = self.tags
558 tagspath = self.tags
559 start = svn.ra.get_latest_revnum(self.ra)
559 start = svn.ra.get_latest_revnum(self.ra)
560 stream = self._getlog([self.tags], start, self.startrev)
560 stream = self._getlog([self.tags], start, self.startrev)
561 try:
561 try:
562 for entry in stream:
562 for entry in stream:
563 origpaths, revnum, author, date, message = entry
563 origpaths, revnum, author, date, message = entry
564 if not origpaths:
564 if not origpaths:
565 origpaths = []
565 origpaths = []
566 copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
566 copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
567 in origpaths.iteritems() if e.copyfrom_path]
567 in origpaths.iteritems() if e.copyfrom_path]
568 # Apply moves/copies from more specific to general
568 # Apply moves/copies from more specific to general
569 copies.sort(reverse=True)
569 copies.sort(reverse=True)
570
570
571 srctagspath = tagspath
571 srctagspath = tagspath
572 if copies and copies[-1][2] == tagspath:
572 if copies and copies[-1][2] == tagspath:
573 # Track tags directory moves
573 # Track tags directory moves
574 srctagspath = copies.pop()[0]
574 srctagspath = copies.pop()[0]
575
575
576 for source, sourcerev, dest in copies:
576 for source, sourcerev, dest in copies:
577 if not dest.startswith(tagspath + '/'):
577 if not dest.startswith(tagspath + '/'):
578 continue
578 continue
579 for tag in pendings:
579 for tag in pendings:
580 if tag[0].startswith(dest):
580 if tag[0].startswith(dest):
581 tagpath = source + tag[0][len(dest):]
581 tagpath = source + tag[0][len(dest):]
582 tag[:2] = [tagpath, sourcerev]
582 tag[:2] = [tagpath, sourcerev]
583 break
583 break
584 else:
584 else:
585 pendings.append([source, sourcerev, dest])
585 pendings.append([source, sourcerev, dest])
586
586
587 # Filter out tags with children coming from different
587 # Filter out tags with children coming from different
588 # parts of the repository like:
588 # parts of the repository like:
589 # /tags/tag.1 (from /trunk:10)
589 # /tags/tag.1 (from /trunk:10)
590 # /tags/tag.1/foo (from /branches/foo:12)
590 # /tags/tag.1/foo (from /branches/foo:12)
591 # Here/tags/tag.1 discarded as well as its children.
591 # Here/tags/tag.1 discarded as well as its children.
592 # It happens with tools like cvs2svn. Such tags cannot
592 # It happens with tools like cvs2svn. Such tags cannot
593 # be represented in mercurial.
593 # be represented in mercurial.
594 addeds = dict((p, e.copyfrom_path) for p, e
594 addeds = dict((p, e.copyfrom_path) for p, e
595 in origpaths.iteritems()
595 in origpaths.iteritems()
596 if e.action == 'A' and e.copyfrom_path)
596 if e.action == 'A' and e.copyfrom_path)
597 badroots = set()
597 badroots = set()
598 for destroot in addeds:
598 for destroot in addeds:
599 for source, sourcerev, dest in pendings:
599 for source, sourcerev, dest in pendings:
600 if (not dest.startswith(destroot + '/')
600 if (not dest.startswith(destroot + '/')
601 or source.startswith(addeds[destroot] + '/')):
601 or source.startswith(addeds[destroot] + '/')):
602 continue
602 continue
603 badroots.add(destroot)
603 badroots.add(destroot)
604 break
604 break
605
605
606 for badroot in badroots:
606 for badroot in badroots:
607 pendings = [p for p in pendings if p[2] != badroot
607 pendings = [p for p in pendings if p[2] != badroot
608 and not p[2].startswith(badroot + '/')]
608 and not p[2].startswith(badroot + '/')]
609
609
610 # Tell tag renamings from tag creations
610 # Tell tag renamings from tag creations
611 renamings = []
611 renamings = []
612 for source, sourcerev, dest in pendings:
612 for source, sourcerev, dest in pendings:
613 tagname = dest.split('/')[-1]
613 tagname = dest.split('/')[-1]
614 if source.startswith(srctagspath):
614 if source.startswith(srctagspath):
615 renamings.append([source, sourcerev, tagname])
615 renamings.append([source, sourcerev, tagname])
616 continue
616 continue
617 if tagname in tags:
617 if tagname in tags:
618 # Keep the latest tag value
618 # Keep the latest tag value
619 continue
619 continue
620 # From revision may be fake, get one with changes
620 # From revision may be fake, get one with changes
621 try:
621 try:
622 tagid = self.latest(source, sourcerev)
622 tagid = self.latest(source, sourcerev)
623 if tagid and tagname not in tags:
623 if tagid and tagname not in tags:
624 tags[tagname] = tagid
624 tags[tagname] = tagid
625 except SvnPathNotFound:
625 except SvnPathNotFound:
626 # It happens when we are following directories
626 # It happens when we are following directories
627 # we assumed were copied with their parents
627 # we assumed were copied with their parents
628 # but were really created in the tag
628 # but were really created in the tag
629 # directory.
629 # directory.
630 pass
630 pass
631 pendings = renamings
631 pendings = renamings
632 tagspath = srctagspath
632 tagspath = srctagspath
633 finally:
633 finally:
634 stream.close()
634 stream.close()
635 return tags
635 return tags
636
636
637 def converted(self, rev, destrev):
637 def converted(self, rev, destrev):
638 if not self.wc:
638 if not self.wc:
639 return
639 return
640 if self.convertfp is None:
640 if self.convertfp is None:
641 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
641 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
642 'ab')
642 'ab')
643 self.convertfp.write(util.tonativeeol('%s %d\n'
643 self.convertfp.write(util.tonativeeol('%s %d\n'
644 % (destrev, self.revnum(rev))))
644 % (destrev, self.revnum(rev))))
645 self.convertfp.flush()
645 self.convertfp.flush()
646
646
647 def revid(self, revnum, module=None):
647 def revid(self, revnum, module=None):
648 return 'svn:%s%s@%s' % (self.uuid, module or self.module, revnum)
648 return 'svn:%s%s@%s' % (self.uuid, module or self.module, revnum)
649
649
650 def revnum(self, rev):
650 def revnum(self, rev):
651 return int(rev.split('@')[-1])
651 return int(rev.split('@')[-1])
652
652
653 def latest(self, path, stop=None):
653 def latest(self, path, stop=None):
654 """Find the latest revid affecting path, up to stop revision
654 """Find the latest revid affecting path, up to stop revision
655 number. If stop is None, default to repository latest
655 number. If stop is None, default to repository latest
656 revision. It may return a revision in a different module,
656 revision. It may return a revision in a different module,
657 since a branch may be moved without a change being
657 since a branch may be moved without a change being
658 reported. Return None if computed module does not belong to
658 reported. Return None if computed module does not belong to
659 rootmodule subtree.
659 rootmodule subtree.
660 """
660 """
661 def findchanges(path, start, stop=None):
661 def findchanges(path, start, stop=None):
662 stream = self._getlog([path], start, stop or 1)
662 stream = self._getlog([path], start, stop or 1)
663 try:
663 try:
664 for entry in stream:
664 for entry in stream:
665 paths, revnum, author, date, message = entry
665 paths, revnum, author, date, message = entry
666 if stop is None and paths:
666 if stop is None and paths:
667 # We do not know the latest changed revision,
667 # We do not know the latest changed revision,
668 # keep the first one with changed paths.
668 # keep the first one with changed paths.
669 break
669 break
670 if revnum <= stop:
670 if revnum <= stop:
671 break
671 break
672
672
673 for p in paths:
673 for p in paths:
674 if (not path.startswith(p) or
674 if (not path.startswith(p) or
675 not paths[p].copyfrom_path):
675 not paths[p].copyfrom_path):
676 continue
676 continue
677 newpath = paths[p].copyfrom_path + path[len(p):]
677 newpath = paths[p].copyfrom_path + path[len(p):]
678 self.ui.debug("branch renamed from %s to %s at %d\n" %
678 self.ui.debug("branch renamed from %s to %s at %d\n" %
679 (path, newpath, revnum))
679 (path, newpath, revnum))
680 path = newpath
680 path = newpath
681 break
681 break
682 if not paths:
682 if not paths:
683 revnum = None
683 revnum = None
684 return revnum, path
684 return revnum, path
685 finally:
685 finally:
686 stream.close()
686 stream.close()
687
687
688 if not path.startswith(self.rootmodule):
688 if not path.startswith(self.rootmodule):
689 # Requests on foreign branches may be forbidden at server level
689 # Requests on foreign branches may be forbidden at server level
690 self.ui.debug('ignoring foreign branch %r\n' % path)
690 self.ui.debug('ignoring foreign branch %r\n' % path)
691 return None
691 return None
692
692
693 if stop is None:
693 if stop is None:
694 stop = svn.ra.get_latest_revnum(self.ra)
694 stop = svn.ra.get_latest_revnum(self.ra)
695 try:
695 try:
696 prevmodule = self.reparent('')
696 prevmodule = self.reparent('')
697 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
697 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
698 self.reparent(prevmodule)
698 self.reparent(prevmodule)
699 except svn.core.SubversionException:
699 except svn.core.SubversionException:
700 dirent = None
700 dirent = None
701 if not dirent:
701 if not dirent:
702 raise SvnPathNotFound(_('%s not found up to revision %d')
702 raise SvnPathNotFound(_('%s not found up to revision %d')
703 % (path, stop))
703 % (path, stop))
704
704
705 # stat() gives us the previous revision on this line of
705 # stat() gives us the previous revision on this line of
706 # development, but it might be in *another module*. Fetch the
706 # development, but it might be in *another module*. Fetch the
707 # log and detect renames down to the latest revision.
707 # log and detect renames down to the latest revision.
708 revnum, realpath = findchanges(path, stop, dirent.created_rev)
708 revnum, realpath = findchanges(path, stop, dirent.created_rev)
709 if revnum is None:
709 if revnum is None:
710 # Tools like svnsync can create empty revision, when
710 # Tools like svnsync can create empty revision, when
711 # synchronizing only a subtree for instance. These empty
711 # synchronizing only a subtree for instance. These empty
712 # revisions created_rev still have their original values
712 # revisions created_rev still have their original values
713 # despite all changes having disappeared and can be
713 # despite all changes having disappeared and can be
714 # returned by ra.stat(), at least when stating the root
714 # returned by ra.stat(), at least when stating the root
715 # module. In that case, do not trust created_rev and scan
715 # module. In that case, do not trust created_rev and scan
716 # the whole history.
716 # the whole history.
717 revnum, realpath = findchanges(path, stop)
717 revnum, realpath = findchanges(path, stop)
718 if revnum is None:
718 if revnum is None:
719 self.ui.debug('ignoring empty branch %r\n' % realpath)
719 self.ui.debug('ignoring empty branch %r\n' % realpath)
720 return None
720 return None
721
721
722 if not realpath.startswith(self.rootmodule):
722 if not realpath.startswith(self.rootmodule):
723 self.ui.debug('ignoring foreign branch %r\n' % realpath)
723 self.ui.debug('ignoring foreign branch %r\n' % realpath)
724 return None
724 return None
725 return self.revid(revnum, realpath)
725 return self.revid(revnum, realpath)
726
726
727 def reparent(self, module):
727 def reparent(self, module):
728 """Reparent the svn transport and return the previous parent."""
728 """Reparent the svn transport and return the previous parent."""
729 if self.prevmodule == module:
729 if self.prevmodule == module:
730 return module
730 return module
731 svnurl = self.baseurl + quote(module)
731 svnurl = self.baseurl + quote(module)
732 prevmodule = self.prevmodule
732 prevmodule = self.prevmodule
733 if prevmodule is None:
733 if prevmodule is None:
734 prevmodule = ''
734 prevmodule = ''
735 self.ui.debug("reparent to %s\n" % svnurl)
735 self.ui.debug("reparent to %s\n" % svnurl)
736 svn.ra.reparent(self.ra, svnurl)
736 svn.ra.reparent(self.ra, svnurl)
737 self.prevmodule = module
737 self.prevmodule = module
738 return prevmodule
738 return prevmodule
739
739
740 def expandpaths(self, rev, paths, parents):
740 def expandpaths(self, rev, paths, parents):
741 changed, removed = set(), set()
741 changed, removed = set(), set()
742 copies = {}
742 copies = {}
743
743
744 new_module, revnum = revsplit(rev)[1:]
744 new_module, revnum = revsplit(rev)[1:]
745 if new_module != self.module:
745 if new_module != self.module:
746 self.module = new_module
746 self.module = new_module
747 self.reparent(self.module)
747 self.reparent(self.module)
748
748
749 for i, (path, ent) in enumerate(paths):
749 for i, (path, ent) in enumerate(paths):
750 self.ui.progress(_('scanning paths'), i, item=path,
750 self.ui.progress(_('scanning paths'), i, item=path,
751 total=len(paths), unit=_('paths'))
751 total=len(paths), unit=_('paths'))
752 entrypath = self.getrelpath(path)
752 entrypath = self.getrelpath(path)
753
753
754 kind = self._checkpath(entrypath, revnum)
754 kind = self._checkpath(entrypath, revnum)
755 if kind == svn.core.svn_node_file:
755 if kind == svn.core.svn_node_file:
756 changed.add(self.recode(entrypath))
756 changed.add(self.recode(entrypath))
757 if not ent.copyfrom_path or not parents:
757 if not ent.copyfrom_path or not parents:
758 continue
758 continue
759 # Copy sources not in parent revisions cannot be
759 # Copy sources not in parent revisions cannot be
760 # represented, ignore their origin for now
760 # represented, ignore their origin for now
761 pmodule, prevnum = revsplit(parents[0])[1:]
761 pmodule, prevnum = revsplit(parents[0])[1:]
762 if ent.copyfrom_rev < prevnum:
762 if ent.copyfrom_rev < prevnum:
763 continue
763 continue
764 copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
764 copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
765 if not copyfrom_path:
765 if not copyfrom_path:
766 continue
766 continue
767 self.ui.debug("copied to %s from %s@%s\n" %
767 self.ui.debug("copied to %s from %s@%s\n" %
768 (entrypath, copyfrom_path, ent.copyfrom_rev))
768 (entrypath, copyfrom_path, ent.copyfrom_rev))
769 copies[self.recode(entrypath)] = self.recode(copyfrom_path)
769 copies[self.recode(entrypath)] = self.recode(copyfrom_path)
770 elif kind == 0: # gone, but had better be a deleted *file*
770 elif kind == 0: # gone, but had better be a deleted *file*
771 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
771 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
772 pmodule, prevnum = revsplit(parents[0])[1:]
772 pmodule, prevnum = revsplit(parents[0])[1:]
773 parentpath = pmodule + "/" + entrypath
773 parentpath = pmodule + "/" + entrypath
774 fromkind = self._checkpath(entrypath, prevnum, pmodule)
774 fromkind = self._checkpath(entrypath, prevnum, pmodule)
775
775
776 if fromkind == svn.core.svn_node_file:
776 if fromkind == svn.core.svn_node_file:
777 removed.add(self.recode(entrypath))
777 removed.add(self.recode(entrypath))
778 elif fromkind == svn.core.svn_node_dir:
778 elif fromkind == svn.core.svn_node_dir:
779 oroot = parentpath.strip('/')
779 oroot = parentpath.strip('/')
780 nroot = path.strip('/')
780 nroot = path.strip('/')
781 children = self._iterfiles(oroot, prevnum)
781 children = self._iterfiles(oroot, prevnum)
782 for childpath in children:
782 for childpath in children:
783 childpath = childpath.replace(oroot, nroot)
783 childpath = childpath.replace(oroot, nroot)
784 childpath = self.getrelpath("/" + childpath, pmodule)
784 childpath = self.getrelpath("/" + childpath, pmodule)
785 if childpath:
785 if childpath:
786 removed.add(self.recode(childpath))
786 removed.add(self.recode(childpath))
787 else:
787 else:
788 self.ui.debug('unknown path in revision %d: %s\n' % \
788 self.ui.debug('unknown path in revision %d: %s\n' % \
789 (revnum, path))
789 (revnum, path))
790 elif kind == svn.core.svn_node_dir:
790 elif kind == svn.core.svn_node_dir:
791 if ent.action == 'M':
791 if ent.action == 'M':
792 # If the directory just had a prop change,
792 # If the directory just had a prop change,
793 # then we shouldn't need to look for its children.
793 # then we shouldn't need to look for its children.
794 continue
794 continue
795 if ent.action == 'R' and parents:
795 if ent.action == 'R' and parents:
796 # If a directory is replacing a file, mark the previous
796 # If a directory is replacing a file, mark the previous
797 # file as deleted
797 # file as deleted
798 pmodule, prevnum = revsplit(parents[0])[1:]
798 pmodule, prevnum = revsplit(parents[0])[1:]
799 pkind = self._checkpath(entrypath, prevnum, pmodule)
799 pkind = self._checkpath(entrypath, prevnum, pmodule)
800 if pkind == svn.core.svn_node_file:
800 if pkind == svn.core.svn_node_file:
801 removed.add(self.recode(entrypath))
801 removed.add(self.recode(entrypath))
802 elif pkind == svn.core.svn_node_dir:
802 elif pkind == svn.core.svn_node_dir:
803 # We do not know what files were kept or removed,
803 # We do not know what files were kept or removed,
804 # mark them all as changed.
804 # mark them all as changed.
805 for childpath in self._iterfiles(pmodule, prevnum):
805 for childpath in self._iterfiles(pmodule, prevnum):
806 childpath = self.getrelpath("/" + childpath)
806 childpath = self.getrelpath("/" + childpath)
807 if childpath:
807 if childpath:
808 changed.add(self.recode(childpath))
808 changed.add(self.recode(childpath))
809
809
810 for childpath in self._iterfiles(path, revnum):
810 for childpath in self._iterfiles(path, revnum):
811 childpath = self.getrelpath("/" + childpath)
811 childpath = self.getrelpath("/" + childpath)
812 if childpath:
812 if childpath:
813 changed.add(self.recode(childpath))
813 changed.add(self.recode(childpath))
814
814
815 # Handle directory copies
815 # Handle directory copies
816 if not ent.copyfrom_path or not parents:
816 if not ent.copyfrom_path or not parents:
817 continue
817 continue
818 # Copy sources not in parent revisions cannot be
818 # Copy sources not in parent revisions cannot be
819 # represented, ignore their origin for now
819 # represented, ignore their origin for now
820 pmodule, prevnum = revsplit(parents[0])[1:]
820 pmodule, prevnum = revsplit(parents[0])[1:]
821 if ent.copyfrom_rev < prevnum:
821 if ent.copyfrom_rev < prevnum:
822 continue
822 continue
823 copyfrompath = self.getrelpath(ent.copyfrom_path, pmodule)
823 copyfrompath = self.getrelpath(ent.copyfrom_path, pmodule)
824 if not copyfrompath:
824 if not copyfrompath:
825 continue
825 continue
826 self.ui.debug("mark %s came from %s:%d\n"
826 self.ui.debug("mark %s came from %s:%d\n"
827 % (path, copyfrompath, ent.copyfrom_rev))
827 % (path, copyfrompath, ent.copyfrom_rev))
828 children = self._iterfiles(ent.copyfrom_path, ent.copyfrom_rev)
828 children = self._iterfiles(ent.copyfrom_path, ent.copyfrom_rev)
829 for childpath in children:
829 for childpath in children:
830 childpath = self.getrelpath("/" + childpath, pmodule)
830 childpath = self.getrelpath("/" + childpath, pmodule)
831 if not childpath:
831 if not childpath:
832 continue
832 continue
833 copytopath = path + childpath[len(copyfrompath):]
833 copytopath = path + childpath[len(copyfrompath):]
834 copytopath = self.getrelpath(copytopath)
834 copytopath = self.getrelpath(copytopath)
835 copies[self.recode(copytopath)] = self.recode(childpath)
835 copies[self.recode(copytopath)] = self.recode(childpath)
836
836
837 self.ui.progress(_('scanning paths'), None)
837 self.ui.progress(_('scanning paths'), None)
838 changed.update(removed)
838 changed.update(removed)
839 return (list(changed), removed, copies)
839 return (list(changed), removed, copies)
840
840
841 def _fetch_revisions(self, from_revnum, to_revnum):
841 def _fetch_revisions(self, from_revnum, to_revnum):
842 if from_revnum < to_revnum:
842 if from_revnum < to_revnum:
843 from_revnum, to_revnum = to_revnum, from_revnum
843 from_revnum, to_revnum = to_revnum, from_revnum
844
844
845 self.child_cset = None
845 self.child_cset = None
846
846
847 def parselogentry(orig_paths, revnum, author, date, message):
847 def parselogentry(orig_paths, revnum, author, date, message):
848 """Return the parsed commit object or None, and True if
848 """Return the parsed commit object or None, and True if
849 the revision is a branch root.
849 the revision is a branch root.
850 """
850 """
851 self.ui.debug("parsing revision %d (%d changes)\n" %
851 self.ui.debug("parsing revision %d (%d changes)\n" %
852 (revnum, len(orig_paths)))
852 (revnum, len(orig_paths)))
853
853
854 branched = False
854 branched = False
855 rev = self.revid(revnum)
855 rev = self.revid(revnum)
856 # branch log might return entries for a parent we already have
856 # branch log might return entries for a parent we already have
857
857
858 if rev in self.commits or revnum < to_revnum:
858 if rev in self.commits or revnum < to_revnum:
859 return None, branched
859 return None, branched
860
860
861 parents = []
861 parents = []
862 # check whether this revision is the start of a branch or part
862 # check whether this revision is the start of a branch or part
863 # of a branch renaming
863 # of a branch renaming
864 orig_paths = sorted(orig_paths.iteritems())
864 orig_paths = sorted(orig_paths.iteritems())
865 root_paths = [(p, e) for p, e in orig_paths
865 root_paths = [(p, e) for p, e in orig_paths
866 if self.module.startswith(p)]
866 if self.module.startswith(p)]
867 if root_paths:
867 if root_paths:
868 path, ent = root_paths[-1]
868 path, ent = root_paths[-1]
869 if ent.copyfrom_path:
869 if ent.copyfrom_path:
870 branched = True
870 branched = True
871 newpath = ent.copyfrom_path + self.module[len(path):]
871 newpath = ent.copyfrom_path + self.module[len(path):]
872 # ent.copyfrom_rev may not be the actual last revision
872 # ent.copyfrom_rev may not be the actual last revision
873 previd = self.latest(newpath, ent.copyfrom_rev)
873 previd = self.latest(newpath, ent.copyfrom_rev)
874 if previd is not None:
874 if previd is not None:
875 prevmodule, prevnum = revsplit(previd)[1:]
875 prevmodule, prevnum = revsplit(previd)[1:]
876 if prevnum >= self.startrev:
876 if prevnum >= self.startrev:
877 parents = [previd]
877 parents = [previd]
878 self.ui.note(
878 self.ui.note(
879 _('found parent of branch %s at %d: %s\n') %
879 _('found parent of branch %s at %d: %s\n') %
880 (self.module, prevnum, prevmodule))
880 (self.module, prevnum, prevmodule))
881 else:
881 else:
882 self.ui.debug("no copyfrom path, don't know what to do.\n")
882 self.ui.debug("no copyfrom path, don't know what to do.\n")
883
883
884 paths = []
884 paths = []
885 # filter out unrelated paths
885 # filter out unrelated paths
886 for path, ent in orig_paths:
886 for path, ent in orig_paths:
887 if self.getrelpath(path) is None:
887 if self.getrelpath(path) is None:
888 continue
888 continue
889 paths.append((path, ent))
889 paths.append((path, ent))
890
890
891 # Example SVN datetime. Includes microseconds.
891 # Example SVN datetime. Includes microseconds.
892 # ISO-8601 conformant
892 # ISO-8601 conformant
893 # '2007-01-04T17:35:00.902377Z'
893 # '2007-01-04T17:35:00.902377Z'
894 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
894 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
895 if self.ui.configbool('convert', 'localtimezone'):
895 if self.ui.configbool('convert', 'localtimezone'):
896 date = makedatetimestamp(date[0])
896 date = makedatetimestamp(date[0])
897
897
898 if message:
898 if message:
899 log = self.recode(message)
899 log = self.recode(message)
900 else:
900 else:
901 log = ''
901 log = ''
902
902
903 if author:
903 if author:
904 author = self.recode(author)
904 author = self.recode(author)
905 else:
905 else:
906 author = ''
906 author = ''
907
907
908 try:
908 try:
909 branch = self.module.split("/")[-1]
909 branch = self.module.split("/")[-1]
910 if branch == self.trunkname:
910 if branch == self.trunkname:
911 branch = None
911 branch = None
912 except IndexError:
912 except IndexError:
913 branch = None
913 branch = None
914
914
915 cset = commit(author=author,
915 cset = commit(author=author,
916 date=util.datestr(date, '%Y-%m-%d %H:%M:%S %1%2'),
916 date=util.datestr(date, '%Y-%m-%d %H:%M:%S %1%2'),
917 desc=log,
917 desc=log,
918 parents=parents,
918 parents=parents,
919 branch=branch,
919 branch=branch,
920 rev=rev)
920 rev=rev)
921
921
922 self.commits[rev] = cset
922 self.commits[rev] = cset
923 # The parents list is *shared* among self.paths and the
923 # The parents list is *shared* among self.paths and the
924 # commit object. Both will be updated below.
924 # commit object. Both will be updated below.
925 self.paths[rev] = (paths, cset.parents)
925 self.paths[rev] = (paths, cset.parents)
926 if self.child_cset and not self.child_cset.parents:
926 if self.child_cset and not self.child_cset.parents:
927 self.child_cset.parents[:] = [rev]
927 self.child_cset.parents[:] = [rev]
928 self.child_cset = cset
928 self.child_cset = cset
929 return cset, branched
929 return cset, branched
930
930
931 self.ui.note(_('fetching revision log for "%s" from %d to %d\n') %
931 self.ui.note(_('fetching revision log for "%s" from %d to %d\n') %
932 (self.module, from_revnum, to_revnum))
932 (self.module, from_revnum, to_revnum))
933
933
934 try:
934 try:
935 firstcset = None
935 firstcset = None
936 lastonbranch = False
936 lastonbranch = False
937 stream = self._getlog([self.module], from_revnum, to_revnum)
937 stream = self._getlog([self.module], from_revnum, to_revnum)
938 try:
938 try:
939 for entry in stream:
939 for entry in stream:
940 paths, revnum, author, date, message = entry
940 paths, revnum, author, date, message = entry
941 if revnum < self.startrev:
941 if revnum < self.startrev:
942 lastonbranch = True
942 lastonbranch = True
943 break
943 break
944 if not paths:
944 if not paths:
945 self.ui.debug('revision %d has no entries\n' % revnum)
945 self.ui.debug('revision %d has no entries\n' % revnum)
946 # If we ever leave the loop on an empty
946 # If we ever leave the loop on an empty
947 # revision, do not try to get a parent branch
947 # revision, do not try to get a parent branch
948 lastonbranch = lastonbranch or revnum == 0
948 lastonbranch = lastonbranch or revnum == 0
949 continue
949 continue
950 cset, lastonbranch = parselogentry(paths, revnum, author,
950 cset, lastonbranch = parselogentry(paths, revnum, author,
951 date, message)
951 date, message)
952 if cset:
952 if cset:
953 firstcset = cset
953 firstcset = cset
954 if lastonbranch:
954 if lastonbranch:
955 break
955 break
956 finally:
956 finally:
957 stream.close()
957 stream.close()
958
958
959 if not lastonbranch and firstcset and not firstcset.parents:
959 if not lastonbranch and firstcset and not firstcset.parents:
960 # The first revision of the sequence (the last fetched one)
960 # The first revision of the sequence (the last fetched one)
961 # has invalid parents if not a branch root. Find the parent
961 # has invalid parents if not a branch root. Find the parent
962 # revision now, if any.
962 # revision now, if any.
963 try:
963 try:
964 firstrevnum = self.revnum(firstcset.rev)
964 firstrevnum = self.revnum(firstcset.rev)
965 if firstrevnum > 1:
965 if firstrevnum > 1:
966 latest = self.latest(self.module, firstrevnum - 1)
966 latest = self.latest(self.module, firstrevnum - 1)
967 if latest:
967 if latest:
968 firstcset.parents.append(latest)
968 firstcset.parents.append(latest)
969 except SvnPathNotFound:
969 except SvnPathNotFound:
970 pass
970 pass
971 except svn.core.SubversionException as xxx_todo_changeme:
971 except svn.core.SubversionException as xxx_todo_changeme:
972 (inst, num) = xxx_todo_changeme.args
972 (inst, num) = xxx_todo_changeme.args
973 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
973 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
974 raise error.Abort(_('svn: branch has no revision %s')
974 raise error.Abort(_('svn: branch has no revision %s')
975 % to_revnum)
975 % to_revnum)
976 raise
976 raise
977
977
978 def getfile(self, file, rev):
978 def getfile(self, file, rev):
979 # TODO: ra.get_file transmits the whole file instead of diffs.
979 # TODO: ra.get_file transmits the whole file instead of diffs.
980 if file in self.removed:
980 if file in self.removed:
981 return None, None
981 return None, None
982 mode = ''
982 mode = ''
983 try:
983 try:
984 new_module, revnum = revsplit(rev)[1:]
984 new_module, revnum = revsplit(rev)[1:]
985 if self.module != new_module:
985 if self.module != new_module:
986 self.module = new_module
986 self.module = new_module
987 self.reparent(self.module)
987 self.reparent(self.module)
988 io = stringio()
988 io = stringio()
989 info = svn.ra.get_file(self.ra, file, revnum, io)
989 info = svn.ra.get_file(self.ra, file, revnum, io)
990 data = io.getvalue()
990 data = io.getvalue()
991 # ra.get_file() seems to keep a reference on the input buffer
991 # ra.get_file() seems to keep a reference on the input buffer
992 # preventing collection. Release it explicitly.
992 # preventing collection. Release it explicitly.
993 io.close()
993 io.close()
994 if isinstance(info, list):
994 if isinstance(info, list):
995 info = info[-1]
995 info = info[-1]
996 mode = ("svn:executable" in info) and 'x' or ''
996 mode = ("svn:executable" in info) and 'x' or ''
997 mode = ("svn:special" in info) and 'l' or mode
997 mode = ("svn:special" in info) and 'l' or mode
998 except svn.core.SubversionException as e:
998 except svn.core.SubversionException as e:
999 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
999 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
1000 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
1000 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
1001 if e.apr_err in notfound: # File not found
1001 if e.apr_err in notfound: # File not found
1002 return None, None
1002 return None, None
1003 raise
1003 raise
1004 if mode == 'l':
1004 if mode == 'l':
1005 link_prefix = "link "
1005 link_prefix = "link "
1006 if data.startswith(link_prefix):
1006 if data.startswith(link_prefix):
1007 data = data[len(link_prefix):]
1007 data = data[len(link_prefix):]
1008 return data, mode
1008 return data, mode
1009
1009
1010 def _iterfiles(self, path, revnum):
1010 def _iterfiles(self, path, revnum):
1011 """Enumerate all files in path at revnum, recursively."""
1011 """Enumerate all files in path at revnum, recursively."""
1012 path = path.strip('/')
1012 path = path.strip('/')
1013 pool = svn.core.Pool()
1013 pool = svn.core.Pool()
1014 rpath = '/'.join([self.baseurl, quote(path)]).strip('/')
1014 rpath = '/'.join([self.baseurl, quote(path)]).strip('/')
1015 entries = svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool)
1015 entries = svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool)
1016 if path:
1016 if path:
1017 path += '/'
1017 path += '/'
1018 return ((path + p) for p, e in entries.iteritems()
1018 return ((path + p) for p, e in entries.iteritems()
1019 if e.kind == svn.core.svn_node_file)
1019 if e.kind == svn.core.svn_node_file)
1020
1020
1021 def getrelpath(self, path, module=None):
1021 def getrelpath(self, path, module=None):
1022 if module is None:
1022 if module is None:
1023 module = self.module
1023 module = self.module
1024 # Given the repository url of this wc, say
1024 # Given the repository url of this wc, say
1025 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
1025 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
1026 # extract the "entry" portion (a relative path) from what
1026 # extract the "entry" portion (a relative path) from what
1027 # svn log --xml says, i.e.
1027 # svn log --xml says, i.e.
1028 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
1028 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
1029 # that is to say "tests/PloneTestCase.py"
1029 # that is to say "tests/PloneTestCase.py"
1030 if path.startswith(module):
1030 if path.startswith(module):
1031 relative = path.rstrip('/')[len(module):]
1031 relative = path.rstrip('/')[len(module):]
1032 if relative.startswith('/'):
1032 if relative.startswith('/'):
1033 return relative[1:]
1033 return relative[1:]
1034 elif relative == '':
1034 elif relative == '':
1035 return relative
1035 return relative
1036
1036
1037 # The path is outside our tracked tree...
1037 # The path is outside our tracked tree...
1038 self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
1038 self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
1039 return None
1039 return None
1040
1040
1041 def _checkpath(self, path, revnum, module=None):
1041 def _checkpath(self, path, revnum, module=None):
1042 if module is not None:
1042 if module is not None:
1043 prevmodule = self.reparent('')
1043 prevmodule = self.reparent('')
1044 path = module + '/' + path
1044 path = module + '/' + path
1045 try:
1045 try:
1046 # ra.check_path does not like leading slashes very much, it leads
1046 # ra.check_path does not like leading slashes very much, it leads
1047 # to PROPFIND subversion errors
1047 # to PROPFIND subversion errors
1048 return svn.ra.check_path(self.ra, path.strip('/'), revnum)
1048 return svn.ra.check_path(self.ra, path.strip('/'), revnum)
1049 finally:
1049 finally:
1050 if module is not None:
1050 if module is not None:
1051 self.reparent(prevmodule)
1051 self.reparent(prevmodule)
1052
1052
1053 def _getlog(self, paths, start, end, limit=0, discover_changed_paths=True,
1053 def _getlog(self, paths, start, end, limit=0, discover_changed_paths=True,
1054 strict_node_history=False):
1054 strict_node_history=False):
1055 # Normalize path names, svn >= 1.5 only wants paths relative to
1055 # Normalize path names, svn >= 1.5 only wants paths relative to
1056 # supplied URL
1056 # supplied URL
1057 relpaths = []
1057 relpaths = []
1058 for p in paths:
1058 for p in paths:
1059 if not p.startswith('/'):
1059 if not p.startswith('/'):
1060 p = self.module + '/' + p
1060 p = self.module + '/' + p
1061 relpaths.append(p.strip('/'))
1061 relpaths.append(p.strip('/'))
1062 args = [self.baseurl, relpaths, start, end, limit,
1062 args = [self.baseurl, relpaths, start, end, limit,
1063 discover_changed_paths, strict_node_history]
1063 discover_changed_paths, strict_node_history]
1064 # developer config: convert.svn.debugsvnlog
1064 # developer config: convert.svn.debugsvnlog
1065 if not self.ui.configbool('convert', 'svn.debugsvnlog'):
1065 if not self.ui.configbool('convert', 'svn.debugsvnlog'):
1066 return directlogstream(*args)
1066 return directlogstream(*args)
1067 arg = encodeargs(args)
1067 arg = encodeargs(args)
1068 hgexe = util.hgexecutable()
1068 hgexe = util.hgexecutable()
1069 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
1069 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
1070 stdin, stdout = util.popen2(util.quotecommand(cmd))
1070 stdin, stdout = util.popen2(util.quotecommand(cmd))
1071 stdin.write(arg)
1071 stdin.write(arg)
1072 try:
1072 try:
1073 stdin.close()
1073 stdin.close()
1074 except IOError:
1074 except IOError:
1075 raise error.Abort(_('Mercurial failed to run itself, check'
1075 raise error.Abort(_('Mercurial failed to run itself, check'
1076 ' hg executable is in PATH'))
1076 ' hg executable is in PATH'))
1077 return logstream(stdout)
1077 return logstream(stdout)
1078
1078
1079 pre_revprop_change = '''#!/bin/sh
1079 pre_revprop_change = '''#!/bin/sh
1080
1080
1081 REPOS="$1"
1081 REPOS="$1"
1082 REV="$2"
1082 REV="$2"
1083 USER="$3"
1083 USER="$3"
1084 PROPNAME="$4"
1084 PROPNAME="$4"
1085 ACTION="$5"
1085 ACTION="$5"
1086
1086
1087 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
1087 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
1088 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
1088 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
1089 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
1089 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
1090
1090
1091 echo "Changing prohibited revision property" >&2
1091 echo "Changing prohibited revision property" >&2
1092 exit 1
1092 exit 1
1093 '''
1093 '''
1094
1094
1095 class svn_sink(converter_sink, commandline):
1095 class svn_sink(converter_sink, commandline):
1096 commit_re = re.compile(r'Committed revision (\d+).', re.M)
1096 commit_re = re.compile(r'Committed revision (\d+).', re.M)
1097 uuid_re = re.compile(r'Repository UUID:\s*(\S+)', re.M)
1097 uuid_re = re.compile(r'Repository UUID:\s*(\S+)', re.M)
1098
1098
1099 def prerun(self):
1099 def prerun(self):
1100 if self.wc:
1100 if self.wc:
1101 os.chdir(self.wc)
1101 os.chdir(self.wc)
1102
1102
1103 def postrun(self):
1103 def postrun(self):
1104 if self.wc:
1104 if self.wc:
1105 os.chdir(self.cwd)
1105 os.chdir(self.cwd)
1106
1106
1107 def join(self, name):
1107 def join(self, name):
1108 return os.path.join(self.wc, '.svn', name)
1108 return os.path.join(self.wc, '.svn', name)
1109
1109
1110 def revmapfile(self):
1110 def revmapfile(self):
1111 return self.join('hg-shamap')
1111 return self.join('hg-shamap')
1112
1112
1113 def authorfile(self):
1113 def authorfile(self):
1114 return self.join('hg-authormap')
1114 return self.join('hg-authormap')
1115
1115
1116 def __init__(self, ui, repotype, path):
1116 def __init__(self, ui, repotype, path):
1117
1117
1118 converter_sink.__init__(self, ui, repotype, path)
1118 converter_sink.__init__(self, ui, repotype, path)
1119 commandline.__init__(self, ui, 'svn')
1119 commandline.__init__(self, ui, 'svn')
1120 self.delete = []
1120 self.delete = []
1121 self.setexec = []
1121 self.setexec = []
1122 self.delexec = []
1122 self.delexec = []
1123 self.copies = []
1123 self.copies = []
1124 self.wc = None
1124 self.wc = None
1125 self.cwd = pycompat.getcwd()
1125 self.cwd = pycompat.getcwd()
1126
1126
1127 created = False
1127 created = False
1128 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
1128 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
1129 self.wc = os.path.realpath(path)
1129 self.wc = os.path.realpath(path)
1130 self.run0('update')
1130 self.run0('update')
1131 else:
1131 else:
1132 if not re.search(r'^(file|http|https|svn|svn\+ssh)\://', path):
1132 if not re.search(br'^(file|http|https|svn|svn\+ssh)\://', path):
1133 path = os.path.realpath(path)
1133 path = os.path.realpath(path)
1134 if os.path.isdir(os.path.dirname(path)):
1134 if os.path.isdir(os.path.dirname(path)):
1135 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
1135 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
1136 ui.status(_('initializing svn repository %r\n') %
1136 ui.status(_('initializing svn repository %r\n') %
1137 os.path.basename(path))
1137 os.path.basename(path))
1138 commandline(ui, 'svnadmin').run0('create', path)
1138 commandline(ui, 'svnadmin').run0('create', path)
1139 created = path
1139 created = path
1140 path = util.normpath(path)
1140 path = util.normpath(path)
1141 if not path.startswith('/'):
1141 if not path.startswith('/'):
1142 path = '/' + path
1142 path = '/' + path
1143 path = 'file://' + path
1143 path = 'file://' + path
1144
1144
1145 wcpath = os.path.join(pycompat.getcwd(), os.path.basename(path) +
1145 wcpath = os.path.join(pycompat.getcwd(), os.path.basename(path) +
1146 '-wc')
1146 '-wc')
1147 ui.status(_('initializing svn working copy %r\n')
1147 ui.status(_('initializing svn working copy %r\n')
1148 % os.path.basename(wcpath))
1148 % os.path.basename(wcpath))
1149 self.run0('checkout', path, wcpath)
1149 self.run0('checkout', path, wcpath)
1150
1150
1151 self.wc = wcpath
1151 self.wc = wcpath
1152 self.opener = vfsmod.vfs(self.wc)
1152 self.opener = vfsmod.vfs(self.wc)
1153 self.wopener = vfsmod.vfs(self.wc)
1153 self.wopener = vfsmod.vfs(self.wc)
1154 self.childmap = mapfile(ui, self.join('hg-childmap'))
1154 self.childmap = mapfile(ui, self.join('hg-childmap'))
1155 if util.checkexec(self.wc):
1155 if util.checkexec(self.wc):
1156 self.is_exec = util.isexec
1156 self.is_exec = util.isexec
1157 else:
1157 else:
1158 self.is_exec = None
1158 self.is_exec = None
1159
1159
1160 if created:
1160 if created:
1161 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
1161 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
1162 fp = open(hook, 'wb')
1162 fp = open(hook, 'wb')
1163 fp.write(pre_revprop_change)
1163 fp.write(pre_revprop_change)
1164 fp.close()
1164 fp.close()
1165 util.setflags(hook, False, True)
1165 util.setflags(hook, False, True)
1166
1166
1167 output = self.run0('info')
1167 output = self.run0('info')
1168 self.uuid = self.uuid_re.search(output).group(1).strip()
1168 self.uuid = self.uuid_re.search(output).group(1).strip()
1169
1169
1170 def wjoin(self, *names):
1170 def wjoin(self, *names):
1171 return os.path.join(self.wc, *names)
1171 return os.path.join(self.wc, *names)
1172
1172
1173 @propertycache
1173 @propertycache
1174 def manifest(self):
1174 def manifest(self):
1175 # As of svn 1.7, the "add" command fails when receiving
1175 # As of svn 1.7, the "add" command fails when receiving
1176 # already tracked entries, so we have to track and filter them
1176 # already tracked entries, so we have to track and filter them
1177 # ourselves.
1177 # ourselves.
1178 m = set()
1178 m = set()
1179 output = self.run0('ls', recursive=True, xml=True)
1179 output = self.run0('ls', recursive=True, xml=True)
1180 doc = xml.dom.minidom.parseString(output)
1180 doc = xml.dom.minidom.parseString(output)
1181 for e in doc.getElementsByTagName('entry'):
1181 for e in doc.getElementsByTagName('entry'):
1182 for n in e.childNodes:
1182 for n in e.childNodes:
1183 if n.nodeType != n.ELEMENT_NODE or n.tagName != 'name':
1183 if n.nodeType != n.ELEMENT_NODE or n.tagName != 'name':
1184 continue
1184 continue
1185 name = ''.join(c.data for c in n.childNodes
1185 name = ''.join(c.data for c in n.childNodes
1186 if c.nodeType == c.TEXT_NODE)
1186 if c.nodeType == c.TEXT_NODE)
1187 # Entries are compared with names coming from
1187 # Entries are compared with names coming from
1188 # mercurial, so bytes with undefined encoding. Our
1188 # mercurial, so bytes with undefined encoding. Our
1189 # best bet is to assume they are in local
1189 # best bet is to assume they are in local
1190 # encoding. They will be passed to command line calls
1190 # encoding. They will be passed to command line calls
1191 # later anyway, so they better be.
1191 # later anyway, so they better be.
1192 m.add(encoding.unitolocal(name))
1192 m.add(encoding.unitolocal(name))
1193 break
1193 break
1194 return m
1194 return m
1195
1195
1196 def putfile(self, filename, flags, data):
1196 def putfile(self, filename, flags, data):
1197 if 'l' in flags:
1197 if 'l' in flags:
1198 self.wopener.symlink(data, filename)
1198 self.wopener.symlink(data, filename)
1199 else:
1199 else:
1200 try:
1200 try:
1201 if os.path.islink(self.wjoin(filename)):
1201 if os.path.islink(self.wjoin(filename)):
1202 os.unlink(filename)
1202 os.unlink(filename)
1203 except OSError:
1203 except OSError:
1204 pass
1204 pass
1205 self.wopener.write(filename, data)
1205 self.wopener.write(filename, data)
1206
1206
1207 if self.is_exec:
1207 if self.is_exec:
1208 if self.is_exec(self.wjoin(filename)):
1208 if self.is_exec(self.wjoin(filename)):
1209 if 'x' not in flags:
1209 if 'x' not in flags:
1210 self.delexec.append(filename)
1210 self.delexec.append(filename)
1211 else:
1211 else:
1212 if 'x' in flags:
1212 if 'x' in flags:
1213 self.setexec.append(filename)
1213 self.setexec.append(filename)
1214 util.setflags(self.wjoin(filename), False, 'x' in flags)
1214 util.setflags(self.wjoin(filename), False, 'x' in flags)
1215
1215
1216 def _copyfile(self, source, dest):
1216 def _copyfile(self, source, dest):
1217 # SVN's copy command pukes if the destination file exists, but
1217 # SVN's copy command pukes if the destination file exists, but
1218 # our copyfile method expects to record a copy that has
1218 # our copyfile method expects to record a copy that has
1219 # already occurred. Cross the semantic gap.
1219 # already occurred. Cross the semantic gap.
1220 wdest = self.wjoin(dest)
1220 wdest = self.wjoin(dest)
1221 exists = os.path.lexists(wdest)
1221 exists = os.path.lexists(wdest)
1222 if exists:
1222 if exists:
1223 fd, tempname = tempfile.mkstemp(
1223 fd, tempname = tempfile.mkstemp(
1224 prefix='hg-copy-', dir=os.path.dirname(wdest))
1224 prefix='hg-copy-', dir=os.path.dirname(wdest))
1225 os.close(fd)
1225 os.close(fd)
1226 os.unlink(tempname)
1226 os.unlink(tempname)
1227 os.rename(wdest, tempname)
1227 os.rename(wdest, tempname)
1228 try:
1228 try:
1229 self.run0('copy', source, dest)
1229 self.run0('copy', source, dest)
1230 finally:
1230 finally:
1231 self.manifest.add(dest)
1231 self.manifest.add(dest)
1232 if exists:
1232 if exists:
1233 try:
1233 try:
1234 os.unlink(wdest)
1234 os.unlink(wdest)
1235 except OSError:
1235 except OSError:
1236 pass
1236 pass
1237 os.rename(tempname, wdest)
1237 os.rename(tempname, wdest)
1238
1238
1239 def dirs_of(self, files):
1239 def dirs_of(self, files):
1240 dirs = set()
1240 dirs = set()
1241 for f in files:
1241 for f in files:
1242 if os.path.isdir(self.wjoin(f)):
1242 if os.path.isdir(self.wjoin(f)):
1243 dirs.add(f)
1243 dirs.add(f)
1244 i = len(f)
1244 i = len(f)
1245 for i in iter(lambda: f.rfind('/', 0, i), -1):
1245 for i in iter(lambda: f.rfind('/', 0, i), -1):
1246 dirs.add(f[:i])
1246 dirs.add(f[:i])
1247 return dirs
1247 return dirs
1248
1248
1249 def add_dirs(self, files):
1249 def add_dirs(self, files):
1250 add_dirs = [d for d in sorted(self.dirs_of(files))
1250 add_dirs = [d for d in sorted(self.dirs_of(files))
1251 if d not in self.manifest]
1251 if d not in self.manifest]
1252 if add_dirs:
1252 if add_dirs:
1253 self.manifest.update(add_dirs)
1253 self.manifest.update(add_dirs)
1254 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
1254 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
1255 return add_dirs
1255 return add_dirs
1256
1256
1257 def add_files(self, files):
1257 def add_files(self, files):
1258 files = [f for f in files if f not in self.manifest]
1258 files = [f for f in files if f not in self.manifest]
1259 if files:
1259 if files:
1260 self.manifest.update(files)
1260 self.manifest.update(files)
1261 self.xargs(files, 'add', quiet=True)
1261 self.xargs(files, 'add', quiet=True)
1262 return files
1262 return files
1263
1263
1264 def addchild(self, parent, child):
1264 def addchild(self, parent, child):
1265 self.childmap[parent] = child
1265 self.childmap[parent] = child
1266
1266
1267 def revid(self, rev):
1267 def revid(self, rev):
1268 return u"svn:%s@%s" % (self.uuid, rev)
1268 return u"svn:%s@%s" % (self.uuid, rev)
1269
1269
1270 def putcommit(self, files, copies, parents, commit, source, revmap, full,
1270 def putcommit(self, files, copies, parents, commit, source, revmap, full,
1271 cleanp2):
1271 cleanp2):
1272 for parent in parents:
1272 for parent in parents:
1273 try:
1273 try:
1274 return self.revid(self.childmap[parent])
1274 return self.revid(self.childmap[parent])
1275 except KeyError:
1275 except KeyError:
1276 pass
1276 pass
1277
1277
1278 # Apply changes to working copy
1278 # Apply changes to working copy
1279 for f, v in files:
1279 for f, v in files:
1280 data, mode = source.getfile(f, v)
1280 data, mode = source.getfile(f, v)
1281 if data is None:
1281 if data is None:
1282 self.delete.append(f)
1282 self.delete.append(f)
1283 else:
1283 else:
1284 self.putfile(f, mode, data)
1284 self.putfile(f, mode, data)
1285 if f in copies:
1285 if f in copies:
1286 self.copies.append([copies[f], f])
1286 self.copies.append([copies[f], f])
1287 if full:
1287 if full:
1288 self.delete.extend(sorted(self.manifest.difference(files)))
1288 self.delete.extend(sorted(self.manifest.difference(files)))
1289 files = [f[0] for f in files]
1289 files = [f[0] for f in files]
1290
1290
1291 entries = set(self.delete)
1291 entries = set(self.delete)
1292 files = frozenset(files)
1292 files = frozenset(files)
1293 entries.update(self.add_dirs(files.difference(entries)))
1293 entries.update(self.add_dirs(files.difference(entries)))
1294 if self.copies:
1294 if self.copies:
1295 for s, d in self.copies:
1295 for s, d in self.copies:
1296 self._copyfile(s, d)
1296 self._copyfile(s, d)
1297 self.copies = []
1297 self.copies = []
1298 if self.delete:
1298 if self.delete:
1299 self.xargs(self.delete, 'delete')
1299 self.xargs(self.delete, 'delete')
1300 for f in self.delete:
1300 for f in self.delete:
1301 self.manifest.remove(f)
1301 self.manifest.remove(f)
1302 self.delete = []
1302 self.delete = []
1303 entries.update(self.add_files(files.difference(entries)))
1303 entries.update(self.add_files(files.difference(entries)))
1304 if self.delexec:
1304 if self.delexec:
1305 self.xargs(self.delexec, 'propdel', 'svn:executable')
1305 self.xargs(self.delexec, 'propdel', 'svn:executable')
1306 self.delexec = []
1306 self.delexec = []
1307 if self.setexec:
1307 if self.setexec:
1308 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1308 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1309 self.setexec = []
1309 self.setexec = []
1310
1310
1311 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1311 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1312 fp = os.fdopen(fd, pycompat.sysstr('wb'))
1312 fp = os.fdopen(fd, pycompat.sysstr('wb'))
1313 fp.write(util.tonativeeol(commit.desc))
1313 fp.write(util.tonativeeol(commit.desc))
1314 fp.close()
1314 fp.close()
1315 try:
1315 try:
1316 output = self.run0('commit',
1316 output = self.run0('commit',
1317 username=util.shortuser(commit.author),
1317 username=util.shortuser(commit.author),
1318 file=messagefile,
1318 file=messagefile,
1319 encoding='utf-8')
1319 encoding='utf-8')
1320 try:
1320 try:
1321 rev = self.commit_re.search(output).group(1)
1321 rev = self.commit_re.search(output).group(1)
1322 except AttributeError:
1322 except AttributeError:
1323 if parents and not files:
1323 if parents and not files:
1324 return parents[0]
1324 return parents[0]
1325 self.ui.warn(_('unexpected svn output:\n'))
1325 self.ui.warn(_('unexpected svn output:\n'))
1326 self.ui.warn(output)
1326 self.ui.warn(output)
1327 raise error.Abort(_('unable to cope with svn output'))
1327 raise error.Abort(_('unable to cope with svn output'))
1328 if commit.rev:
1328 if commit.rev:
1329 self.run('propset', 'hg:convert-rev', commit.rev,
1329 self.run('propset', 'hg:convert-rev', commit.rev,
1330 revprop=True, revision=rev)
1330 revprop=True, revision=rev)
1331 if commit.branch and commit.branch != 'default':
1331 if commit.branch and commit.branch != 'default':
1332 self.run('propset', 'hg:convert-branch', commit.branch,
1332 self.run('propset', 'hg:convert-branch', commit.branch,
1333 revprop=True, revision=rev)
1333 revprop=True, revision=rev)
1334 for parent in parents:
1334 for parent in parents:
1335 self.addchild(parent, rev)
1335 self.addchild(parent, rev)
1336 return self.revid(rev)
1336 return self.revid(rev)
1337 finally:
1337 finally:
1338 os.unlink(messagefile)
1338 os.unlink(messagefile)
1339
1339
1340 def puttags(self, tags):
1340 def puttags(self, tags):
1341 self.ui.warn(_('writing Subversion tags is not yet implemented\n'))
1341 self.ui.warn(_('writing Subversion tags is not yet implemented\n'))
1342 return None, None
1342 return None, None
1343
1343
1344 def hascommitfrommap(self, rev):
1344 def hascommitfrommap(self, rev):
1345 # We trust that revisions referenced in a map still is present
1345 # We trust that revisions referenced in a map still is present
1346 # TODO: implement something better if necessary and feasible
1346 # TODO: implement something better if necessary and feasible
1347 return True
1347 return True
1348
1348
1349 def hascommitforsplicemap(self, rev):
1349 def hascommitforsplicemap(self, rev):
1350 # This is not correct as one can convert to an existing subversion
1350 # This is not correct as one can convert to an existing subversion
1351 # repository and childmap would not list all revisions. Too bad.
1351 # repository and childmap would not list all revisions. Too bad.
1352 if rev in self.childmap:
1352 if rev in self.childmap:
1353 return True
1353 return True
1354 raise error.Abort(_('splice map revision %s not found in subversion '
1354 raise error.Abort(_('splice map revision %s not found in subversion '
1355 'child map (revision lookups are not implemented)')
1355 'child map (revision lookups are not implemented)')
1356 % rev)
1356 % rev)
@@ -1,481 +1,481 b''
1 # blobstore.py - local and remote (speaking Git-LFS protocol) blob storages
1 # blobstore.py - local and remote (speaking Git-LFS protocol) blob storages
2 #
2 #
3 # Copyright 2017 Facebook, Inc.
3 # Copyright 2017 Facebook, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import hashlib
10 import hashlib
11 import json
11 import json
12 import os
12 import os
13 import re
13 import re
14 import socket
14 import socket
15
15
16 from mercurial.i18n import _
16 from mercurial.i18n import _
17
17
18 from mercurial import (
18 from mercurial import (
19 error,
19 error,
20 pathutil,
20 pathutil,
21 url as urlmod,
21 url as urlmod,
22 util,
22 util,
23 vfs as vfsmod,
23 vfs as vfsmod,
24 worker,
24 worker,
25 )
25 )
26
26
27 from ..largefiles import lfutil
27 from ..largefiles import lfutil
28
28
29 # 64 bytes for SHA256
29 # 64 bytes for SHA256
30 _lfsre = re.compile(r'\A[a-f0-9]{64}\Z')
30 _lfsre = re.compile(br'\A[a-f0-9]{64}\Z')
31
31
32 class lfsvfs(vfsmod.vfs):
32 class lfsvfs(vfsmod.vfs):
33 def join(self, path):
33 def join(self, path):
34 """split the path at first two characters, like: XX/XXXXX..."""
34 """split the path at first two characters, like: XX/XXXXX..."""
35 if not _lfsre.match(path):
35 if not _lfsre.match(path):
36 raise error.ProgrammingError('unexpected lfs path: %s' % path)
36 raise error.ProgrammingError('unexpected lfs path: %s' % path)
37 return super(lfsvfs, self).join(path[0:2], path[2:])
37 return super(lfsvfs, self).join(path[0:2], path[2:])
38
38
39 def walk(self, path=None, onerror=None):
39 def walk(self, path=None, onerror=None):
40 """Yield (dirpath, [], oids) tuple for blobs under path
40 """Yield (dirpath, [], oids) tuple for blobs under path
41
41
42 Oids only exist in the root of this vfs, so dirpath is always ''.
42 Oids only exist in the root of this vfs, so dirpath is always ''.
43 """
43 """
44 root = os.path.normpath(self.base)
44 root = os.path.normpath(self.base)
45 # when dirpath == root, dirpath[prefixlen:] becomes empty
45 # when dirpath == root, dirpath[prefixlen:] becomes empty
46 # because len(dirpath) < prefixlen.
46 # because len(dirpath) < prefixlen.
47 prefixlen = len(pathutil.normasprefix(root))
47 prefixlen = len(pathutil.normasprefix(root))
48 oids = []
48 oids = []
49
49
50 for dirpath, dirs, files in os.walk(self.reljoin(self.base, path or ''),
50 for dirpath, dirs, files in os.walk(self.reljoin(self.base, path or ''),
51 onerror=onerror):
51 onerror=onerror):
52 dirpath = dirpath[prefixlen:]
52 dirpath = dirpath[prefixlen:]
53
53
54 # Silently skip unexpected files and directories
54 # Silently skip unexpected files and directories
55 if len(dirpath) == 2:
55 if len(dirpath) == 2:
56 oids.extend([dirpath + f for f in files
56 oids.extend([dirpath + f for f in files
57 if _lfsre.match(dirpath + f)])
57 if _lfsre.match(dirpath + f)])
58
58
59 yield ('', [], oids)
59 yield ('', [], oids)
60
60
61 class filewithprogress(object):
61 class filewithprogress(object):
62 """a file-like object that supports __len__ and read.
62 """a file-like object that supports __len__ and read.
63
63
64 Useful to provide progress information for how many bytes are read.
64 Useful to provide progress information for how many bytes are read.
65 """
65 """
66
66
67 def __init__(self, fp, callback):
67 def __init__(self, fp, callback):
68 self._fp = fp
68 self._fp = fp
69 self._callback = callback # func(readsize)
69 self._callback = callback # func(readsize)
70 fp.seek(0, os.SEEK_END)
70 fp.seek(0, os.SEEK_END)
71 self._len = fp.tell()
71 self._len = fp.tell()
72 fp.seek(0)
72 fp.seek(0)
73
73
74 def __len__(self):
74 def __len__(self):
75 return self._len
75 return self._len
76
76
77 def read(self, size):
77 def read(self, size):
78 if self._fp is None:
78 if self._fp is None:
79 return b''
79 return b''
80 data = self._fp.read(size)
80 data = self._fp.read(size)
81 if data:
81 if data:
82 if self._callback:
82 if self._callback:
83 self._callback(len(data))
83 self._callback(len(data))
84 else:
84 else:
85 self._fp.close()
85 self._fp.close()
86 self._fp = None
86 self._fp = None
87 return data
87 return data
88
88
89 class local(object):
89 class local(object):
90 """Local blobstore for large file contents.
90 """Local blobstore for large file contents.
91
91
92 This blobstore is used both as a cache and as a staging area for large blobs
92 This blobstore is used both as a cache and as a staging area for large blobs
93 to be uploaded to the remote blobstore.
93 to be uploaded to the remote blobstore.
94 """
94 """
95
95
96 def __init__(self, repo):
96 def __init__(self, repo):
97 fullpath = repo.svfs.join('lfs/objects')
97 fullpath = repo.svfs.join('lfs/objects')
98 self.vfs = lfsvfs(fullpath)
98 self.vfs = lfsvfs(fullpath)
99 usercache = lfutil._usercachedir(repo.ui, 'lfs')
99 usercache = lfutil._usercachedir(repo.ui, 'lfs')
100 self.cachevfs = lfsvfs(usercache)
100 self.cachevfs = lfsvfs(usercache)
101 self.ui = repo.ui
101 self.ui = repo.ui
102
102
103 def open(self, oid):
103 def open(self, oid):
104 """Open a read-only file descriptor to the named blob, in either the
104 """Open a read-only file descriptor to the named blob, in either the
105 usercache or the local store."""
105 usercache or the local store."""
106 # The usercache is the most likely place to hold the file. Commit will
106 # The usercache is the most likely place to hold the file. Commit will
107 # write to both it and the local store, as will anything that downloads
107 # write to both it and the local store, as will anything that downloads
108 # the blobs. However, things like clone without an update won't
108 # the blobs. However, things like clone without an update won't
109 # populate the local store. For an init + push of a local clone,
109 # populate the local store. For an init + push of a local clone,
110 # the usercache is the only place it _could_ be. If not present, the
110 # the usercache is the only place it _could_ be. If not present, the
111 # missing file msg here will indicate the local repo, not the usercache.
111 # missing file msg here will indicate the local repo, not the usercache.
112 if self.cachevfs.exists(oid):
112 if self.cachevfs.exists(oid):
113 return self.cachevfs(oid, 'rb')
113 return self.cachevfs(oid, 'rb')
114
114
115 return self.vfs(oid, 'rb')
115 return self.vfs(oid, 'rb')
116
116
117 def download(self, oid, src):
117 def download(self, oid, src):
118 """Read the blob from the remote source in chunks, verify the content,
118 """Read the blob from the remote source in chunks, verify the content,
119 and write to this local blobstore."""
119 and write to this local blobstore."""
120 sha256 = hashlib.sha256()
120 sha256 = hashlib.sha256()
121
121
122 with self.vfs(oid, 'wb', atomictemp=True) as fp:
122 with self.vfs(oid, 'wb', atomictemp=True) as fp:
123 for chunk in util.filechunkiter(src, size=1048576):
123 for chunk in util.filechunkiter(src, size=1048576):
124 fp.write(chunk)
124 fp.write(chunk)
125 sha256.update(chunk)
125 sha256.update(chunk)
126
126
127 realoid = sha256.hexdigest()
127 realoid = sha256.hexdigest()
128 if realoid != oid:
128 if realoid != oid:
129 raise error.Abort(_('corrupt remote lfs object: %s') % oid)
129 raise error.Abort(_('corrupt remote lfs object: %s') % oid)
130
130
131 # XXX: should we verify the content of the cache, and hardlink back to
131 # XXX: should we verify the content of the cache, and hardlink back to
132 # the local store on success, but truncate, write and link on failure?
132 # the local store on success, but truncate, write and link on failure?
133 if not self.cachevfs.exists(oid):
133 if not self.cachevfs.exists(oid):
134 self.ui.note(_('lfs: adding %s to the usercache\n') % oid)
134 self.ui.note(_('lfs: adding %s to the usercache\n') % oid)
135 lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
135 lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
136
136
137 def write(self, oid, data):
137 def write(self, oid, data):
138 """Write blob to local blobstore.
138 """Write blob to local blobstore.
139
139
140 This should only be called from the filelog during a commit or similar.
140 This should only be called from the filelog during a commit or similar.
141 As such, there is no need to verify the data. Imports from a remote
141 As such, there is no need to verify the data. Imports from a remote
142 store must use ``download()`` instead."""
142 store must use ``download()`` instead."""
143 with self.vfs(oid, 'wb', atomictemp=True) as fp:
143 with self.vfs(oid, 'wb', atomictemp=True) as fp:
144 fp.write(data)
144 fp.write(data)
145
145
146 # XXX: should we verify the content of the cache, and hardlink back to
146 # XXX: should we verify the content of the cache, and hardlink back to
147 # the local store on success, but truncate, write and link on failure?
147 # the local store on success, but truncate, write and link on failure?
148 if not self.cachevfs.exists(oid):
148 if not self.cachevfs.exists(oid):
149 self.ui.note(_('lfs: adding %s to the usercache\n') % oid)
149 self.ui.note(_('lfs: adding %s to the usercache\n') % oid)
150 lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
150 lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
151
151
152 def read(self, oid, verify=True):
152 def read(self, oid, verify=True):
153 """Read blob from local blobstore."""
153 """Read blob from local blobstore."""
154 if not self.vfs.exists(oid):
154 if not self.vfs.exists(oid):
155 blob = self._read(self.cachevfs, oid, verify)
155 blob = self._read(self.cachevfs, oid, verify)
156
156
157 # Even if revlog will verify the content, it needs to be verified
157 # Even if revlog will verify the content, it needs to be verified
158 # now before making the hardlink to avoid propagating corrupt blobs.
158 # now before making the hardlink to avoid propagating corrupt blobs.
159 # Don't abort if corruption is detected, because `hg verify` will
159 # Don't abort if corruption is detected, because `hg verify` will
160 # give more useful info about the corruption- simply don't add the
160 # give more useful info about the corruption- simply don't add the
161 # hardlink.
161 # hardlink.
162 if verify or hashlib.sha256(blob).hexdigest() == oid:
162 if verify or hashlib.sha256(blob).hexdigest() == oid:
163 self.ui.note(_('lfs: found %s in the usercache\n') % oid)
163 self.ui.note(_('lfs: found %s in the usercache\n') % oid)
164 lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
164 lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
165 else:
165 else:
166 self.ui.note(_('lfs: found %s in the local lfs store\n') % oid)
166 self.ui.note(_('lfs: found %s in the local lfs store\n') % oid)
167 blob = self._read(self.vfs, oid, verify)
167 blob = self._read(self.vfs, oid, verify)
168 return blob
168 return blob
169
169
170 def _read(self, vfs, oid, verify):
170 def _read(self, vfs, oid, verify):
171 """Read blob (after verifying) from the given store"""
171 """Read blob (after verifying) from the given store"""
172 blob = vfs.read(oid)
172 blob = vfs.read(oid)
173 if verify:
173 if verify:
174 _verify(oid, blob)
174 _verify(oid, blob)
175 return blob
175 return blob
176
176
177 def has(self, oid):
177 def has(self, oid):
178 """Returns True if the local blobstore contains the requested blob,
178 """Returns True if the local blobstore contains the requested blob,
179 False otherwise."""
179 False otherwise."""
180 return self.cachevfs.exists(oid) or self.vfs.exists(oid)
180 return self.cachevfs.exists(oid) or self.vfs.exists(oid)
181
181
182 class _gitlfsremote(object):
182 class _gitlfsremote(object):
183
183
184 def __init__(self, repo, url):
184 def __init__(self, repo, url):
185 ui = repo.ui
185 ui = repo.ui
186 self.ui = ui
186 self.ui = ui
187 baseurl, authinfo = url.authinfo()
187 baseurl, authinfo = url.authinfo()
188 self.baseurl = baseurl.rstrip('/')
188 self.baseurl = baseurl.rstrip('/')
189 useragent = repo.ui.config('experimental', 'lfs.user-agent')
189 useragent = repo.ui.config('experimental', 'lfs.user-agent')
190 if not useragent:
190 if not useragent:
191 useragent = 'git-lfs/2.3.4 (Mercurial %s)' % util.version()
191 useragent = 'git-lfs/2.3.4 (Mercurial %s)' % util.version()
192 self.urlopener = urlmod.opener(ui, authinfo, useragent)
192 self.urlopener = urlmod.opener(ui, authinfo, useragent)
193 self.retry = ui.configint('lfs', 'retry')
193 self.retry = ui.configint('lfs', 'retry')
194
194
195 def writebatch(self, pointers, fromstore):
195 def writebatch(self, pointers, fromstore):
196 """Batch upload from local to remote blobstore."""
196 """Batch upload from local to remote blobstore."""
197 self._batch(_deduplicate(pointers), fromstore, 'upload')
197 self._batch(_deduplicate(pointers), fromstore, 'upload')
198
198
199 def readbatch(self, pointers, tostore):
199 def readbatch(self, pointers, tostore):
200 """Batch download from remote to local blostore."""
200 """Batch download from remote to local blostore."""
201 self._batch(_deduplicate(pointers), tostore, 'download')
201 self._batch(_deduplicate(pointers), tostore, 'download')
202
202
203 def _batchrequest(self, pointers, action):
203 def _batchrequest(self, pointers, action):
204 """Get metadata about objects pointed by pointers for given action
204 """Get metadata about objects pointed by pointers for given action
205
205
206 Return decoded JSON object like {'objects': [{'oid': '', 'size': 1}]}
206 Return decoded JSON object like {'objects': [{'oid': '', 'size': 1}]}
207 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/batch.md
207 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/batch.md
208 """
208 """
209 objects = [{'oid': p.oid(), 'size': p.size()} for p in pointers]
209 objects = [{'oid': p.oid(), 'size': p.size()} for p in pointers]
210 requestdata = json.dumps({
210 requestdata = json.dumps({
211 'objects': objects,
211 'objects': objects,
212 'operation': action,
212 'operation': action,
213 })
213 })
214 batchreq = util.urlreq.request('%s/objects/batch' % self.baseurl,
214 batchreq = util.urlreq.request('%s/objects/batch' % self.baseurl,
215 data=requestdata)
215 data=requestdata)
216 batchreq.add_header('Accept', 'application/vnd.git-lfs+json')
216 batchreq.add_header('Accept', 'application/vnd.git-lfs+json')
217 batchreq.add_header('Content-Type', 'application/vnd.git-lfs+json')
217 batchreq.add_header('Content-Type', 'application/vnd.git-lfs+json')
218 try:
218 try:
219 rawjson = self.urlopener.open(batchreq).read()
219 rawjson = self.urlopener.open(batchreq).read()
220 except util.urlerr.httperror as ex:
220 except util.urlerr.httperror as ex:
221 raise LfsRemoteError(_('LFS HTTP error: %s (action=%s)')
221 raise LfsRemoteError(_('LFS HTTP error: %s (action=%s)')
222 % (ex, action))
222 % (ex, action))
223 try:
223 try:
224 response = json.loads(rawjson)
224 response = json.loads(rawjson)
225 except ValueError:
225 except ValueError:
226 raise LfsRemoteError(_('LFS server returns invalid JSON: %s')
226 raise LfsRemoteError(_('LFS server returns invalid JSON: %s')
227 % rawjson)
227 % rawjson)
228 return response
228 return response
229
229
230 def _checkforservererror(self, pointers, responses, action):
230 def _checkforservererror(self, pointers, responses, action):
231 """Scans errors from objects
231 """Scans errors from objects
232
232
233 Raises LfsRemoteError if any objects have an error"""
233 Raises LfsRemoteError if any objects have an error"""
234 for response in responses:
234 for response in responses:
235 # The server should return 404 when objects cannot be found. Some
235 # The server should return 404 when objects cannot be found. Some
236 # server implementation (ex. lfs-test-server) does not set "error"
236 # server implementation (ex. lfs-test-server) does not set "error"
237 # but just removes "download" from "actions". Treat that case
237 # but just removes "download" from "actions". Treat that case
238 # as the same as 404 error.
238 # as the same as 404 error.
239 notfound = (response.get('error', {}).get('code') == 404
239 notfound = (response.get('error', {}).get('code') == 404
240 or (action == 'download'
240 or (action == 'download'
241 and action not in response.get('actions', [])))
241 and action not in response.get('actions', [])))
242 if notfound:
242 if notfound:
243 ptrmap = {p.oid(): p for p in pointers}
243 ptrmap = {p.oid(): p for p in pointers}
244 p = ptrmap.get(response['oid'], None)
244 p = ptrmap.get(response['oid'], None)
245 if p:
245 if p:
246 filename = getattr(p, 'filename', 'unknown')
246 filename = getattr(p, 'filename', 'unknown')
247 raise LfsRemoteError(
247 raise LfsRemoteError(
248 _(('LFS server error. Remote object '
248 _(('LFS server error. Remote object '
249 'for "%s" not found: %r')) % (filename, response))
249 'for "%s" not found: %r')) % (filename, response))
250 else:
250 else:
251 raise LfsRemoteError(
251 raise LfsRemoteError(
252 _('LFS server error. Unsolicited response for oid %s')
252 _('LFS server error. Unsolicited response for oid %s')
253 % response['oid'])
253 % response['oid'])
254 if 'error' in response:
254 if 'error' in response:
255 raise LfsRemoteError(_('LFS server error: %r') % response)
255 raise LfsRemoteError(_('LFS server error: %r') % response)
256
256
257 def _extractobjects(self, response, pointers, action):
257 def _extractobjects(self, response, pointers, action):
258 """extract objects from response of the batch API
258 """extract objects from response of the batch API
259
259
260 response: parsed JSON object returned by batch API
260 response: parsed JSON object returned by batch API
261 return response['objects'] filtered by action
261 return response['objects'] filtered by action
262 raise if any object has an error
262 raise if any object has an error
263 """
263 """
264 # Scan errors from objects - fail early
264 # Scan errors from objects - fail early
265 objects = response.get('objects', [])
265 objects = response.get('objects', [])
266 self._checkforservererror(pointers, objects, action)
266 self._checkforservererror(pointers, objects, action)
267
267
268 # Filter objects with given action. Practically, this skips uploading
268 # Filter objects with given action. Practically, this skips uploading
269 # objects which exist in the server.
269 # objects which exist in the server.
270 filteredobjects = [o for o in objects if action in o.get('actions', [])]
270 filteredobjects = [o for o in objects if action in o.get('actions', [])]
271
271
272 return filteredobjects
272 return filteredobjects
273
273
274 def _basictransfer(self, obj, action, localstore):
274 def _basictransfer(self, obj, action, localstore):
275 """Download or upload a single object using basic transfer protocol
275 """Download or upload a single object using basic transfer protocol
276
276
277 obj: dict, an object description returned by batch API
277 obj: dict, an object description returned by batch API
278 action: string, one of ['upload', 'download']
278 action: string, one of ['upload', 'download']
279 localstore: blobstore.local
279 localstore: blobstore.local
280
280
281 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/\
281 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/\
282 basic-transfers.md
282 basic-transfers.md
283 """
283 """
284 oid = str(obj['oid'])
284 oid = str(obj['oid'])
285
285
286 href = str(obj['actions'][action].get('href'))
286 href = str(obj['actions'][action].get('href'))
287 headers = obj['actions'][action].get('header', {}).items()
287 headers = obj['actions'][action].get('header', {}).items()
288
288
289 request = util.urlreq.request(href)
289 request = util.urlreq.request(href)
290 if action == 'upload':
290 if action == 'upload':
291 # If uploading blobs, read data from local blobstore.
291 # If uploading blobs, read data from local blobstore.
292 with localstore.open(oid) as fp:
292 with localstore.open(oid) as fp:
293 _verifyfile(oid, fp)
293 _verifyfile(oid, fp)
294 request.data = filewithprogress(localstore.open(oid), None)
294 request.data = filewithprogress(localstore.open(oid), None)
295 request.get_method = lambda: 'PUT'
295 request.get_method = lambda: 'PUT'
296
296
297 for k, v in headers:
297 for k, v in headers:
298 request.add_header(k, v)
298 request.add_header(k, v)
299
299
300 response = b''
300 response = b''
301 try:
301 try:
302 req = self.urlopener.open(request)
302 req = self.urlopener.open(request)
303 if action == 'download':
303 if action == 'download':
304 # If downloading blobs, store downloaded data to local blobstore
304 # If downloading blobs, store downloaded data to local blobstore
305 localstore.download(oid, req)
305 localstore.download(oid, req)
306 else:
306 else:
307 while True:
307 while True:
308 data = req.read(1048576)
308 data = req.read(1048576)
309 if not data:
309 if not data:
310 break
310 break
311 response += data
311 response += data
312 if response:
312 if response:
313 self.ui.debug('lfs %s response: %s' % (action, response))
313 self.ui.debug('lfs %s response: %s' % (action, response))
314 except util.urlerr.httperror as ex:
314 except util.urlerr.httperror as ex:
315 if self.ui.debugflag:
315 if self.ui.debugflag:
316 self.ui.debug('%s: %s\n' % (oid, ex.read()))
316 self.ui.debug('%s: %s\n' % (oid, ex.read()))
317 raise LfsRemoteError(_('HTTP error: %s (oid=%s, action=%s)')
317 raise LfsRemoteError(_('HTTP error: %s (oid=%s, action=%s)')
318 % (ex, oid, action))
318 % (ex, oid, action))
319
319
320 def _batch(self, pointers, localstore, action):
320 def _batch(self, pointers, localstore, action):
321 if action not in ['upload', 'download']:
321 if action not in ['upload', 'download']:
322 raise error.ProgrammingError('invalid Git-LFS action: %s' % action)
322 raise error.ProgrammingError('invalid Git-LFS action: %s' % action)
323
323
324 response = self._batchrequest(pointers, action)
324 response = self._batchrequest(pointers, action)
325 objects = self._extractobjects(response, pointers, action)
325 objects = self._extractobjects(response, pointers, action)
326 total = sum(x.get('size', 0) for x in objects)
326 total = sum(x.get('size', 0) for x in objects)
327 sizes = {}
327 sizes = {}
328 for obj in objects:
328 for obj in objects:
329 sizes[obj.get('oid')] = obj.get('size', 0)
329 sizes[obj.get('oid')] = obj.get('size', 0)
330 topic = {'upload': _('lfs uploading'),
330 topic = {'upload': _('lfs uploading'),
331 'download': _('lfs downloading')}[action]
331 'download': _('lfs downloading')}[action]
332 if len(objects) > 1:
332 if len(objects) > 1:
333 self.ui.note(_('lfs: need to transfer %d objects (%s)\n')
333 self.ui.note(_('lfs: need to transfer %d objects (%s)\n')
334 % (len(objects), util.bytecount(total)))
334 % (len(objects), util.bytecount(total)))
335 self.ui.progress(topic, 0, total=total)
335 self.ui.progress(topic, 0, total=total)
336 def transfer(chunk):
336 def transfer(chunk):
337 for obj in chunk:
337 for obj in chunk:
338 objsize = obj.get('size', 0)
338 objsize = obj.get('size', 0)
339 if self.ui.verbose:
339 if self.ui.verbose:
340 if action == 'download':
340 if action == 'download':
341 msg = _('lfs: downloading %s (%s)\n')
341 msg = _('lfs: downloading %s (%s)\n')
342 elif action == 'upload':
342 elif action == 'upload':
343 msg = _('lfs: uploading %s (%s)\n')
343 msg = _('lfs: uploading %s (%s)\n')
344 self.ui.note(msg % (obj.get('oid'),
344 self.ui.note(msg % (obj.get('oid'),
345 util.bytecount(objsize)))
345 util.bytecount(objsize)))
346 retry = self.retry
346 retry = self.retry
347 while True:
347 while True:
348 try:
348 try:
349 self._basictransfer(obj, action, localstore)
349 self._basictransfer(obj, action, localstore)
350 yield 1, obj.get('oid')
350 yield 1, obj.get('oid')
351 break
351 break
352 except socket.error as ex:
352 except socket.error as ex:
353 if retry > 0:
353 if retry > 0:
354 self.ui.note(
354 self.ui.note(
355 _('lfs: failed: %r (remaining retry %d)\n')
355 _('lfs: failed: %r (remaining retry %d)\n')
356 % (ex, retry))
356 % (ex, retry))
357 retry -= 1
357 retry -= 1
358 continue
358 continue
359 raise
359 raise
360
360
361 # Until https multiplexing gets sorted out
361 # Until https multiplexing gets sorted out
362 if self.ui.configbool('experimental', 'lfs.worker-enable'):
362 if self.ui.configbool('experimental', 'lfs.worker-enable'):
363 oids = worker.worker(self.ui, 0.1, transfer, (),
363 oids = worker.worker(self.ui, 0.1, transfer, (),
364 sorted(objects, key=lambda o: o.get('oid')))
364 sorted(objects, key=lambda o: o.get('oid')))
365 else:
365 else:
366 oids = transfer(sorted(objects, key=lambda o: o.get('oid')))
366 oids = transfer(sorted(objects, key=lambda o: o.get('oid')))
367
367
368 processed = 0
368 processed = 0
369 blobs = 0
369 blobs = 0
370 for _one, oid in oids:
370 for _one, oid in oids:
371 processed += sizes[oid]
371 processed += sizes[oid]
372 blobs += 1
372 blobs += 1
373 self.ui.progress(topic, processed, total=total)
373 self.ui.progress(topic, processed, total=total)
374 self.ui.note(_('lfs: processed: %s\n') % oid)
374 self.ui.note(_('lfs: processed: %s\n') % oid)
375 self.ui.progress(topic, pos=None, total=total)
375 self.ui.progress(topic, pos=None, total=total)
376
376
377 if blobs > 0:
377 if blobs > 0:
378 if action == 'upload':
378 if action == 'upload':
379 self.ui.status(_('lfs: uploaded %d files (%s)\n')
379 self.ui.status(_('lfs: uploaded %d files (%s)\n')
380 % (blobs, util.bytecount(processed)))
380 % (blobs, util.bytecount(processed)))
381 # TODO: coalesce the download requests, and comment this in
381 # TODO: coalesce the download requests, and comment this in
382 #elif action == 'download':
382 #elif action == 'download':
383 # self.ui.status(_('lfs: downloaded %d files (%s)\n')
383 # self.ui.status(_('lfs: downloaded %d files (%s)\n')
384 # % (blobs, util.bytecount(processed)))
384 # % (blobs, util.bytecount(processed)))
385
385
386 def __del__(self):
386 def __del__(self):
387 # copied from mercurial/httppeer.py
387 # copied from mercurial/httppeer.py
388 urlopener = getattr(self, 'urlopener', None)
388 urlopener = getattr(self, 'urlopener', None)
389 if urlopener:
389 if urlopener:
390 for h in urlopener.handlers:
390 for h in urlopener.handlers:
391 h.close()
391 h.close()
392 getattr(h, "close_all", lambda : None)()
392 getattr(h, "close_all", lambda : None)()
393
393
394 class _dummyremote(object):
394 class _dummyremote(object):
395 """Dummy store storing blobs to temp directory."""
395 """Dummy store storing blobs to temp directory."""
396
396
397 def __init__(self, repo, url):
397 def __init__(self, repo, url):
398 fullpath = repo.vfs.join('lfs', url.path)
398 fullpath = repo.vfs.join('lfs', url.path)
399 self.vfs = lfsvfs(fullpath)
399 self.vfs = lfsvfs(fullpath)
400
400
401 def writebatch(self, pointers, fromstore):
401 def writebatch(self, pointers, fromstore):
402 for p in _deduplicate(pointers):
402 for p in _deduplicate(pointers):
403 content = fromstore.read(p.oid(), verify=True)
403 content = fromstore.read(p.oid(), verify=True)
404 with self.vfs(p.oid(), 'wb', atomictemp=True) as fp:
404 with self.vfs(p.oid(), 'wb', atomictemp=True) as fp:
405 fp.write(content)
405 fp.write(content)
406
406
407 def readbatch(self, pointers, tostore):
407 def readbatch(self, pointers, tostore):
408 for p in _deduplicate(pointers):
408 for p in _deduplicate(pointers):
409 with self.vfs(p.oid(), 'rb') as fp:
409 with self.vfs(p.oid(), 'rb') as fp:
410 tostore.download(p.oid(), fp)
410 tostore.download(p.oid(), fp)
411
411
412 class _nullremote(object):
412 class _nullremote(object):
413 """Null store storing blobs to /dev/null."""
413 """Null store storing blobs to /dev/null."""
414
414
415 def __init__(self, repo, url):
415 def __init__(self, repo, url):
416 pass
416 pass
417
417
418 def writebatch(self, pointers, fromstore):
418 def writebatch(self, pointers, fromstore):
419 pass
419 pass
420
420
421 def readbatch(self, pointers, tostore):
421 def readbatch(self, pointers, tostore):
422 pass
422 pass
423
423
424 class _promptremote(object):
424 class _promptremote(object):
425 """Prompt user to set lfs.url when accessed."""
425 """Prompt user to set lfs.url when accessed."""
426
426
427 def __init__(self, repo, url):
427 def __init__(self, repo, url):
428 pass
428 pass
429
429
430 def writebatch(self, pointers, fromstore, ui=None):
430 def writebatch(self, pointers, fromstore, ui=None):
431 self._prompt()
431 self._prompt()
432
432
433 def readbatch(self, pointers, tostore, ui=None):
433 def readbatch(self, pointers, tostore, ui=None):
434 self._prompt()
434 self._prompt()
435
435
436 def _prompt(self):
436 def _prompt(self):
437 raise error.Abort(_('lfs.url needs to be configured'))
437 raise error.Abort(_('lfs.url needs to be configured'))
438
438
439 _storemap = {
439 _storemap = {
440 'https': _gitlfsremote,
440 'https': _gitlfsremote,
441 'http': _gitlfsremote,
441 'http': _gitlfsremote,
442 'file': _dummyremote,
442 'file': _dummyremote,
443 'null': _nullremote,
443 'null': _nullremote,
444 None: _promptremote,
444 None: _promptremote,
445 }
445 }
446
446
447 def _deduplicate(pointers):
447 def _deduplicate(pointers):
448 """Remove any duplicate oids that exist in the list"""
448 """Remove any duplicate oids that exist in the list"""
449 reduced = util.sortdict()
449 reduced = util.sortdict()
450 for p in pointers:
450 for p in pointers:
451 reduced[p.oid()] = p
451 reduced[p.oid()] = p
452 return reduced.values()
452 return reduced.values()
453
453
454 def _verify(oid, content):
454 def _verify(oid, content):
455 realoid = hashlib.sha256(content).hexdigest()
455 realoid = hashlib.sha256(content).hexdigest()
456 if realoid != oid:
456 if realoid != oid:
457 raise error.Abort(_('detected corrupt lfs object: %s') % oid,
457 raise error.Abort(_('detected corrupt lfs object: %s') % oid,
458 hint=_('run hg verify'))
458 hint=_('run hg verify'))
459
459
460 def _verifyfile(oid, fp):
460 def _verifyfile(oid, fp):
461 sha256 = hashlib.sha256()
461 sha256 = hashlib.sha256()
462 while True:
462 while True:
463 data = fp.read(1024 * 1024)
463 data = fp.read(1024 * 1024)
464 if not data:
464 if not data:
465 break
465 break
466 sha256.update(data)
466 sha256.update(data)
467 realoid = sha256.hexdigest()
467 realoid = sha256.hexdigest()
468 if realoid != oid:
468 if realoid != oid:
469 raise error.Abort(_('detected corrupt lfs object: %s') % oid,
469 raise error.Abort(_('detected corrupt lfs object: %s') % oid,
470 hint=_('run hg verify'))
470 hint=_('run hg verify'))
471
471
472 def remote(repo):
472 def remote(repo):
473 """remotestore factory. return a store in _storemap depending on config"""
473 """remotestore factory. return a store in _storemap depending on config"""
474 url = util.url(repo.ui.config('lfs', 'url') or '')
474 url = util.url(repo.ui.config('lfs', 'url') or '')
475 scheme = url.scheme
475 scheme = url.scheme
476 if scheme not in _storemap:
476 if scheme not in _storemap:
477 raise error.Abort(_('lfs: unknown url scheme: %s') % scheme)
477 raise error.Abort(_('lfs: unknown url scheme: %s') % scheme)
478 return _storemap[scheme](repo, url)
478 return _storemap[scheme](repo, url)
479
479
480 class LfsRemoteError(error.RevlogError):
480 class LfsRemoteError(error.RevlogError):
481 pass
481 pass
General Comments 0
You need to be logged in to leave comments. Login now