##// END OF EJS Templates
repositories: implemented faster dedicated checks for empty repositories
marcink -
r698:65b1b84c default
parent child
Show More
@@ -1,742 +1,751
1 # RhodeCode VCSServer provides access to different vcs backends via network.
1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2019 RhodeCode GmbH
2 # Copyright (C) 2014-2019 RhodeCode GmbH
3 #
3 #
4 # This program is free software; you can redistribute it and/or modify
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 3 of the License, or
6 # the Free Software Foundation; either version 3 of the License, or
7 # (at your option) any later version.
7 # (at your option) any later version.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU General Public License
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software Foundation,
15 # along with this program; if not, write to the Free Software Foundation,
16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 import collections
17 import collections
18 import logging
18 import logging
19 import os
19 import os
20 import posixpath as vcspath
20 import posixpath as vcspath
21 import re
21 import re
22 import stat
22 import stat
23 import traceback
23 import traceback
24 import urllib
24 import urllib
25 import urllib2
25 import urllib2
26 from functools import wraps
26 from functools import wraps
27
27
28 import more_itertools
28 import more_itertools
29 from dulwich import index, objects
29 from dulwich import index, objects
30 from dulwich.client import HttpGitClient, LocalGitClient
30 from dulwich.client import HttpGitClient, LocalGitClient
31 from dulwich.errors import (
31 from dulwich.errors import (
32 NotGitRepository, ChecksumMismatch, WrongObjectException,
32 NotGitRepository, ChecksumMismatch, WrongObjectException,
33 MissingCommitError, ObjectMissing, HangupException,
33 MissingCommitError, ObjectMissing, HangupException,
34 UnexpectedCommandError)
34 UnexpectedCommandError)
35 from dulwich.repo import Repo as DulwichRepo, Tag
35 from dulwich.repo import Repo as DulwichRepo, Tag
36 from dulwich.server import update_server_info
36 from dulwich.server import update_server_info
37
37
38 from vcsserver import exceptions, settings, subprocessio
38 from vcsserver import exceptions, settings, subprocessio
39 from vcsserver.utils import safe_str
39 from vcsserver.utils import safe_str
40 from vcsserver.base import RepoFactory, obfuscate_qs, raise_from_original
40 from vcsserver.base import RepoFactory, obfuscate_qs, raise_from_original
41 from vcsserver.hgcompat import (
41 from vcsserver.hgcompat import (
42 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
42 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
43 from vcsserver.git_lfs.lib import LFSOidStore
43 from vcsserver.git_lfs.lib import LFSOidStore
44
44
45 DIR_STAT = stat.S_IFDIR
45 DIR_STAT = stat.S_IFDIR
46 FILE_MODE = stat.S_IFMT
46 FILE_MODE = stat.S_IFMT
47 GIT_LINK = objects.S_IFGITLINK
47 GIT_LINK = objects.S_IFGITLINK
48
48
49 log = logging.getLogger(__name__)
49 log = logging.getLogger(__name__)
50
50
51
51
52 def reraise_safe_exceptions(func):
52 def reraise_safe_exceptions(func):
53 """Converts Dulwich exceptions to something neutral."""
53 """Converts Dulwich exceptions to something neutral."""
54 @wraps(func)
54 @wraps(func)
55 def wrapper(*args, **kwargs):
55 def wrapper(*args, **kwargs):
56 try:
56 try:
57 return func(*args, **kwargs)
57 return func(*args, **kwargs)
58 except (ChecksumMismatch, WrongObjectException, MissingCommitError,
58 except (ChecksumMismatch, WrongObjectException, MissingCommitError,
59 ObjectMissing) as e:
59 ObjectMissing) as e:
60 exc = exceptions.LookupException(e)
60 exc = exceptions.LookupException(e)
61 raise exc(e)
61 raise exc(e)
62 except (HangupException, UnexpectedCommandError) as e:
62 except (HangupException, UnexpectedCommandError) as e:
63 exc = exceptions.VcsException(e)
63 exc = exceptions.VcsException(e)
64 raise exc(e)
64 raise exc(e)
65 except Exception as e:
65 except Exception as e:
66 # NOTE(marcink): becuase of how dulwich handles some exceptions
66 # NOTE(marcink): becuase of how dulwich handles some exceptions
67 # (KeyError on empty repos), we cannot track this and catch all
67 # (KeyError on empty repos), we cannot track this and catch all
68 # exceptions, it's an exceptions from other handlers
68 # exceptions, it's an exceptions from other handlers
69 #if not hasattr(e, '_vcs_kind'):
69 #if not hasattr(e, '_vcs_kind'):
70 #log.exception("Unhandled exception in git remote call")
70 #log.exception("Unhandled exception in git remote call")
71 #raise_from_original(exceptions.UnhandledException)
71 #raise_from_original(exceptions.UnhandledException)
72 raise
72 raise
73 return wrapper
73 return wrapper
74
74
75
75
76 class Repo(DulwichRepo):
76 class Repo(DulwichRepo):
77 """
77 """
78 A wrapper for dulwich Repo class.
78 A wrapper for dulwich Repo class.
79
79
80 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
80 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
81 "Too many open files" error. We need to close all opened file descriptors
81 "Too many open files" error. We need to close all opened file descriptors
82 once the repo object is destroyed.
82 once the repo object is destroyed.
83
83
84 TODO: mikhail: please check if we need this wrapper after updating dulwich
84 TODO: mikhail: please check if we need this wrapper after updating dulwich
85 to 0.12.0 +
85 to 0.12.0 +
86 """
86 """
87 def __del__(self):
87 def __del__(self):
88 if hasattr(self, 'object_store'):
88 if hasattr(self, 'object_store'):
89 self.close()
89 self.close()
90
90
91
91
92 class GitFactory(RepoFactory):
92 class GitFactory(RepoFactory):
93 repo_type = 'git'
93 repo_type = 'git'
94
94
95 def _create_repo(self, wire, create):
95 def _create_repo(self, wire, create):
96 repo_path = str_to_dulwich(wire['path'])
96 repo_path = str_to_dulwich(wire['path'])
97 return Repo(repo_path)
97 return Repo(repo_path)
98
98
99
99
100 class GitRemote(object):
100 class GitRemote(object):
101
101
102 def __init__(self, factory):
102 def __init__(self, factory):
103 self._factory = factory
103 self._factory = factory
104 self.peeled_ref_marker = '^{}'
104 self.peeled_ref_marker = '^{}'
105 self._bulk_methods = {
105 self._bulk_methods = {
106 "author": self.commit_attribute,
106 "author": self.commit_attribute,
107 "date": self.get_object_attrs,
107 "date": self.get_object_attrs,
108 "message": self.commit_attribute,
108 "message": self.commit_attribute,
109 "parents": self.commit_attribute,
109 "parents": self.commit_attribute,
110 "_commit": self.revision,
110 "_commit": self.revision,
111 }
111 }
112
112
113 def _wire_to_config(self, wire):
113 def _wire_to_config(self, wire):
114 if 'config' in wire:
114 if 'config' in wire:
115 return dict([(x[0] + '_' + x[1], x[2]) for x in wire['config']])
115 return dict([(x[0] + '_' + x[1], x[2]) for x in wire['config']])
116 return {}
116 return {}
117
117
118 def _assign_ref(self, wire, ref, commit_id):
118 def _assign_ref(self, wire, ref, commit_id):
119 repo = self._factory.repo(wire)
119 repo = self._factory.repo(wire)
120 repo[ref] = commit_id
120 repo[ref] = commit_id
121
121
122 def _remote_conf(self, config):
122 def _remote_conf(self, config):
123 params = [
123 params = [
124 '-c', 'core.askpass=""',
124 '-c', 'core.askpass=""',
125 ]
125 ]
126 ssl_cert_dir = config.get('vcs_ssl_dir')
126 ssl_cert_dir = config.get('vcs_ssl_dir')
127 if ssl_cert_dir:
127 if ssl_cert_dir:
128 params.extend(['-c', 'http.sslCAinfo={}'.format(ssl_cert_dir)])
128 params.extend(['-c', 'http.sslCAinfo={}'.format(ssl_cert_dir)])
129 return params
129 return params
130
130
131 @reraise_safe_exceptions
131 @reraise_safe_exceptions
132 def is_empty(self, wire):
133 repo = self._factory.repo(wire)
134 try:
135 return not repo.head()
136 except Exception:
137 log.exception("failed to read object_store")
138 return True
139
140 @reraise_safe_exceptions
132 def add_object(self, wire, content):
141 def add_object(self, wire, content):
133 repo = self._factory.repo(wire)
142 repo = self._factory.repo(wire)
134 blob = objects.Blob()
143 blob = objects.Blob()
135 blob.set_raw_string(content)
144 blob.set_raw_string(content)
136 repo.object_store.add_object(blob)
145 repo.object_store.add_object(blob)
137 return blob.id
146 return blob.id
138
147
139 @reraise_safe_exceptions
148 @reraise_safe_exceptions
140 def assert_correct_path(self, wire):
149 def assert_correct_path(self, wire):
141 path = wire.get('path')
150 path = wire.get('path')
142 try:
151 try:
143 self._factory.repo(wire)
152 self._factory.repo(wire)
144 except NotGitRepository as e:
153 except NotGitRepository as e:
145 tb = traceback.format_exc()
154 tb = traceback.format_exc()
146 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
155 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
147 return False
156 return False
148
157
149 return True
158 return True
150
159
151 @reraise_safe_exceptions
160 @reraise_safe_exceptions
152 def bare(self, wire):
161 def bare(self, wire):
153 repo = self._factory.repo(wire)
162 repo = self._factory.repo(wire)
154 return repo.bare
163 return repo.bare
155
164
156 @reraise_safe_exceptions
165 @reraise_safe_exceptions
157 def blob_as_pretty_string(self, wire, sha):
166 def blob_as_pretty_string(self, wire, sha):
158 repo = self._factory.repo(wire)
167 repo = self._factory.repo(wire)
159 return repo[sha].as_pretty_string()
168 return repo[sha].as_pretty_string()
160
169
161 @reraise_safe_exceptions
170 @reraise_safe_exceptions
162 def blob_raw_length(self, wire, sha):
171 def blob_raw_length(self, wire, sha):
163 repo = self._factory.repo(wire)
172 repo = self._factory.repo(wire)
164 blob = repo[sha]
173 blob = repo[sha]
165 return blob.raw_length()
174 return blob.raw_length()
166
175
167 def _parse_lfs_pointer(self, raw_content):
176 def _parse_lfs_pointer(self, raw_content):
168
177
169 spec_string = 'version https://git-lfs.github.com/spec'
178 spec_string = 'version https://git-lfs.github.com/spec'
170 if raw_content and raw_content.startswith(spec_string):
179 if raw_content and raw_content.startswith(spec_string):
171 pattern = re.compile(r"""
180 pattern = re.compile(r"""
172 (?:\n)?
181 (?:\n)?
173 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
182 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
174 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
183 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
175 ^size[ ](?P<oid_size>[0-9]+)\n
184 ^size[ ](?P<oid_size>[0-9]+)\n
176 (?:\n)?
185 (?:\n)?
177 """, re.VERBOSE | re.MULTILINE)
186 """, re.VERBOSE | re.MULTILINE)
178 match = pattern.match(raw_content)
187 match = pattern.match(raw_content)
179 if match:
188 if match:
180 return match.groupdict()
189 return match.groupdict()
181
190
182 return {}
191 return {}
183
192
184 @reraise_safe_exceptions
193 @reraise_safe_exceptions
185 def is_large_file(self, wire, sha):
194 def is_large_file(self, wire, sha):
186 repo = self._factory.repo(wire)
195 repo = self._factory.repo(wire)
187 blob = repo[sha]
196 blob = repo[sha]
188 return self._parse_lfs_pointer(blob.as_raw_string())
197 return self._parse_lfs_pointer(blob.as_raw_string())
189
198
190 @reraise_safe_exceptions
199 @reraise_safe_exceptions
191 def in_largefiles_store(self, wire, oid):
200 def in_largefiles_store(self, wire, oid):
192 repo = self._factory.repo(wire)
201 repo = self._factory.repo(wire)
193 conf = self._wire_to_config(wire)
202 conf = self._wire_to_config(wire)
194
203
195 store_location = conf.get('vcs_git_lfs_store_location')
204 store_location = conf.get('vcs_git_lfs_store_location')
196 if store_location:
205 if store_location:
197 repo_name = repo.path
206 repo_name = repo.path
198 store = LFSOidStore(
207 store = LFSOidStore(
199 oid=oid, repo=repo_name, store_location=store_location)
208 oid=oid, repo=repo_name, store_location=store_location)
200 return store.has_oid()
209 return store.has_oid()
201
210
202 return False
211 return False
203
212
204 @reraise_safe_exceptions
213 @reraise_safe_exceptions
205 def store_path(self, wire, oid):
214 def store_path(self, wire, oid):
206 repo = self._factory.repo(wire)
215 repo = self._factory.repo(wire)
207 conf = self._wire_to_config(wire)
216 conf = self._wire_to_config(wire)
208
217
209 store_location = conf.get('vcs_git_lfs_store_location')
218 store_location = conf.get('vcs_git_lfs_store_location')
210 if store_location:
219 if store_location:
211 repo_name = repo.path
220 repo_name = repo.path
212 store = LFSOidStore(
221 store = LFSOidStore(
213 oid=oid, repo=repo_name, store_location=store_location)
222 oid=oid, repo=repo_name, store_location=store_location)
214 return store.oid_path
223 return store.oid_path
215 raise ValueError('Unable to fetch oid with path {}'.format(oid))
224 raise ValueError('Unable to fetch oid with path {}'.format(oid))
216
225
217 @reraise_safe_exceptions
226 @reraise_safe_exceptions
218 def bulk_request(self, wire, rev, pre_load):
227 def bulk_request(self, wire, rev, pre_load):
219 result = {}
228 result = {}
220 for attr in pre_load:
229 for attr in pre_load:
221 try:
230 try:
222 method = self._bulk_methods[attr]
231 method = self._bulk_methods[attr]
223 args = [wire, rev]
232 args = [wire, rev]
224 if attr == "date":
233 if attr == "date":
225 args.extend(["commit_time", "commit_timezone"])
234 args.extend(["commit_time", "commit_timezone"])
226 elif attr in ["author", "message", "parents"]:
235 elif attr in ["author", "message", "parents"]:
227 args.append(attr)
236 args.append(attr)
228 result[attr] = method(*args)
237 result[attr] = method(*args)
229 except KeyError as e:
238 except KeyError as e:
230 raise exceptions.VcsException(e)(
239 raise exceptions.VcsException(e)(
231 "Unknown bulk attribute: %s" % attr)
240 "Unknown bulk attribute: %s" % attr)
232 return result
241 return result
233
242
234 def _build_opener(self, url):
243 def _build_opener(self, url):
235 handlers = []
244 handlers = []
236 url_obj = url_parser(url)
245 url_obj = url_parser(url)
237 _, authinfo = url_obj.authinfo()
246 _, authinfo = url_obj.authinfo()
238
247
239 if authinfo:
248 if authinfo:
240 # create a password manager
249 # create a password manager
241 passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
250 passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
242 passmgr.add_password(*authinfo)
251 passmgr.add_password(*authinfo)
243
252
244 handlers.extend((httpbasicauthhandler(passmgr),
253 handlers.extend((httpbasicauthhandler(passmgr),
245 httpdigestauthhandler(passmgr)))
254 httpdigestauthhandler(passmgr)))
246
255
247 return urllib2.build_opener(*handlers)
256 return urllib2.build_opener(*handlers)
248
257
249 @reraise_safe_exceptions
258 @reraise_safe_exceptions
250 def check_url(self, url, config):
259 def check_url(self, url, config):
251 url_obj = url_parser(url)
260 url_obj = url_parser(url)
252 test_uri, _ = url_obj.authinfo()
261 test_uri, _ = url_obj.authinfo()
253 url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
262 url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
254 url_obj.query = obfuscate_qs(url_obj.query)
263 url_obj.query = obfuscate_qs(url_obj.query)
255 cleaned_uri = str(url_obj)
264 cleaned_uri = str(url_obj)
256 log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
265 log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
257
266
258 if not test_uri.endswith('info/refs'):
267 if not test_uri.endswith('info/refs'):
259 test_uri = test_uri.rstrip('/') + '/info/refs'
268 test_uri = test_uri.rstrip('/') + '/info/refs'
260
269
261 o = self._build_opener(url)
270 o = self._build_opener(url)
262 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
271 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
263
272
264 q = {"service": 'git-upload-pack'}
273 q = {"service": 'git-upload-pack'}
265 qs = '?%s' % urllib.urlencode(q)
274 qs = '?%s' % urllib.urlencode(q)
266 cu = "%s%s" % (test_uri, qs)
275 cu = "%s%s" % (test_uri, qs)
267 req = urllib2.Request(cu, None, {})
276 req = urllib2.Request(cu, None, {})
268
277
269 try:
278 try:
270 log.debug("Trying to open URL %s", cleaned_uri)
279 log.debug("Trying to open URL %s", cleaned_uri)
271 resp = o.open(req)
280 resp = o.open(req)
272 if resp.code != 200:
281 if resp.code != 200:
273 raise exceptions.URLError()('Return Code is not 200')
282 raise exceptions.URLError()('Return Code is not 200')
274 except Exception as e:
283 except Exception as e:
275 log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
284 log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
276 # means it cannot be cloned
285 # means it cannot be cloned
277 raise exceptions.URLError(e)("[%s] org_exc: %s" % (cleaned_uri, e))
286 raise exceptions.URLError(e)("[%s] org_exc: %s" % (cleaned_uri, e))
278
287
279 # now detect if it's proper git repo
288 # now detect if it's proper git repo
280 gitdata = resp.read()
289 gitdata = resp.read()
281 if 'service=git-upload-pack' in gitdata:
290 if 'service=git-upload-pack' in gitdata:
282 pass
291 pass
283 elif re.findall(r'[0-9a-fA-F]{40}\s+refs', gitdata):
292 elif re.findall(r'[0-9a-fA-F]{40}\s+refs', gitdata):
284 # old style git can return some other format !
293 # old style git can return some other format !
285 pass
294 pass
286 else:
295 else:
287 raise exceptions.URLError()(
296 raise exceptions.URLError()(
288 "url [%s] does not look like an git" % (cleaned_uri,))
297 "url [%s] does not look like an git" % (cleaned_uri,))
289
298
290 return True
299 return True
291
300
292 @reraise_safe_exceptions
301 @reraise_safe_exceptions
293 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
302 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
294 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
303 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
295 remote_refs = self.pull(wire, url, apply_refs=False)
304 remote_refs = self.pull(wire, url, apply_refs=False)
296 repo = self._factory.repo(wire)
305 repo = self._factory.repo(wire)
297 if isinstance(valid_refs, list):
306 if isinstance(valid_refs, list):
298 valid_refs = tuple(valid_refs)
307 valid_refs = tuple(valid_refs)
299
308
300 for k in remote_refs:
309 for k in remote_refs:
301 # only parse heads/tags and skip so called deferred tags
310 # only parse heads/tags and skip so called deferred tags
302 if k.startswith(valid_refs) and not k.endswith(deferred):
311 if k.startswith(valid_refs) and not k.endswith(deferred):
303 repo[k] = remote_refs[k]
312 repo[k] = remote_refs[k]
304
313
305 if update_after_clone:
314 if update_after_clone:
306 # we want to checkout HEAD
315 # we want to checkout HEAD
307 repo["HEAD"] = remote_refs["HEAD"]
316 repo["HEAD"] = remote_refs["HEAD"]
308 index.build_index_from_tree(repo.path, repo.index_path(),
317 index.build_index_from_tree(repo.path, repo.index_path(),
309 repo.object_store, repo["HEAD"].tree)
318 repo.object_store, repo["HEAD"].tree)
310
319
311 # TODO: this is quite complex, check if that can be simplified
320 # TODO: this is quite complex, check if that can be simplified
312 @reraise_safe_exceptions
321 @reraise_safe_exceptions
313 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
322 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
314 repo = self._factory.repo(wire)
323 repo = self._factory.repo(wire)
315 object_store = repo.object_store
324 object_store = repo.object_store
316
325
317 # Create tree and populates it with blobs
326 # Create tree and populates it with blobs
318 commit_tree = commit_tree and repo[commit_tree] or objects.Tree()
327 commit_tree = commit_tree and repo[commit_tree] or objects.Tree()
319
328
320 for node in updated:
329 for node in updated:
321 # Compute subdirs if needed
330 # Compute subdirs if needed
322 dirpath, nodename = vcspath.split(node['path'])
331 dirpath, nodename = vcspath.split(node['path'])
323 dirnames = map(safe_str, dirpath and dirpath.split('/') or [])
332 dirnames = map(safe_str, dirpath and dirpath.split('/') or [])
324 parent = commit_tree
333 parent = commit_tree
325 ancestors = [('', parent)]
334 ancestors = [('', parent)]
326
335
327 # Tries to dig for the deepest existing tree
336 # Tries to dig for the deepest existing tree
328 while dirnames:
337 while dirnames:
329 curdir = dirnames.pop(0)
338 curdir = dirnames.pop(0)
330 try:
339 try:
331 dir_id = parent[curdir][1]
340 dir_id = parent[curdir][1]
332 except KeyError:
341 except KeyError:
333 # put curdir back into dirnames and stops
342 # put curdir back into dirnames and stops
334 dirnames.insert(0, curdir)
343 dirnames.insert(0, curdir)
335 break
344 break
336 else:
345 else:
337 # If found, updates parent
346 # If found, updates parent
338 parent = repo[dir_id]
347 parent = repo[dir_id]
339 ancestors.append((curdir, parent))
348 ancestors.append((curdir, parent))
340 # Now parent is deepest existing tree and we need to create
349 # Now parent is deepest existing tree and we need to create
341 # subtrees for dirnames (in reverse order)
350 # subtrees for dirnames (in reverse order)
342 # [this only applies for nodes from added]
351 # [this only applies for nodes from added]
343 new_trees = []
352 new_trees = []
344
353
345 blob = objects.Blob.from_string(node['content'])
354 blob = objects.Blob.from_string(node['content'])
346
355
347 if dirnames:
356 if dirnames:
348 # If there are trees which should be created we need to build
357 # If there are trees which should be created we need to build
349 # them now (in reverse order)
358 # them now (in reverse order)
350 reversed_dirnames = list(reversed(dirnames))
359 reversed_dirnames = list(reversed(dirnames))
351 curtree = objects.Tree()
360 curtree = objects.Tree()
352 curtree[node['node_path']] = node['mode'], blob.id
361 curtree[node['node_path']] = node['mode'], blob.id
353 new_trees.append(curtree)
362 new_trees.append(curtree)
354 for dirname in reversed_dirnames[:-1]:
363 for dirname in reversed_dirnames[:-1]:
355 newtree = objects.Tree()
364 newtree = objects.Tree()
356 newtree[dirname] = (DIR_STAT, curtree.id)
365 newtree[dirname] = (DIR_STAT, curtree.id)
357 new_trees.append(newtree)
366 new_trees.append(newtree)
358 curtree = newtree
367 curtree = newtree
359 parent[reversed_dirnames[-1]] = (DIR_STAT, curtree.id)
368 parent[reversed_dirnames[-1]] = (DIR_STAT, curtree.id)
360 else:
369 else:
361 parent.add(
370 parent.add(
362 name=node['node_path'], mode=node['mode'], hexsha=blob.id)
371 name=node['node_path'], mode=node['mode'], hexsha=blob.id)
363
372
364 new_trees.append(parent)
373 new_trees.append(parent)
365 # Update ancestors
374 # Update ancestors
366 reversed_ancestors = reversed(
375 reversed_ancestors = reversed(
367 [(a[1], b[1], b[0]) for a, b in zip(ancestors, ancestors[1:])])
376 [(a[1], b[1], b[0]) for a, b in zip(ancestors, ancestors[1:])])
368 for parent, tree, path in reversed_ancestors:
377 for parent, tree, path in reversed_ancestors:
369 parent[path] = (DIR_STAT, tree.id)
378 parent[path] = (DIR_STAT, tree.id)
370 object_store.add_object(tree)
379 object_store.add_object(tree)
371
380
372 object_store.add_object(blob)
381 object_store.add_object(blob)
373 for tree in new_trees:
382 for tree in new_trees:
374 object_store.add_object(tree)
383 object_store.add_object(tree)
375
384
376 for node_path in removed:
385 for node_path in removed:
377 paths = node_path.split('/')
386 paths = node_path.split('/')
378 tree = commit_tree
387 tree = commit_tree
379 trees = [tree]
388 trees = [tree]
380 # Traverse deep into the forest...
389 # Traverse deep into the forest...
381 for path in paths:
390 for path in paths:
382 try:
391 try:
383 obj = repo[tree[path][1]]
392 obj = repo[tree[path][1]]
384 if isinstance(obj, objects.Tree):
393 if isinstance(obj, objects.Tree):
385 trees.append(obj)
394 trees.append(obj)
386 tree = obj
395 tree = obj
387 except KeyError:
396 except KeyError:
388 break
397 break
389 # Cut down the blob and all rotten trees on the way back...
398 # Cut down the blob and all rotten trees on the way back...
390 for path, tree in reversed(zip(paths, trees)):
399 for path, tree in reversed(zip(paths, trees)):
391 del tree[path]
400 del tree[path]
392 if tree:
401 if tree:
393 # This tree still has elements - don't remove it or any
402 # This tree still has elements - don't remove it or any
394 # of it's parents
403 # of it's parents
395 break
404 break
396
405
397 object_store.add_object(commit_tree)
406 object_store.add_object(commit_tree)
398
407
399 # Create commit
408 # Create commit
400 commit = objects.Commit()
409 commit = objects.Commit()
401 commit.tree = commit_tree.id
410 commit.tree = commit_tree.id
402 for k, v in commit_data.iteritems():
411 for k, v in commit_data.iteritems():
403 setattr(commit, k, v)
412 setattr(commit, k, v)
404 object_store.add_object(commit)
413 object_store.add_object(commit)
405
414
406 ref = 'refs/heads/%s' % branch
415 ref = 'refs/heads/%s' % branch
407 repo.refs[ref] = commit.id
416 repo.refs[ref] = commit.id
408
417
409 return commit.id
418 return commit.id
410
419
411 @reraise_safe_exceptions
420 @reraise_safe_exceptions
412 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
421 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
413 if url != 'default' and '://' not in url:
422 if url != 'default' and '://' not in url:
414 client = LocalGitClient(url)
423 client = LocalGitClient(url)
415 else:
424 else:
416 url_obj = url_parser(url)
425 url_obj = url_parser(url)
417 o = self._build_opener(url)
426 o = self._build_opener(url)
418 url, _ = url_obj.authinfo()
427 url, _ = url_obj.authinfo()
419 client = HttpGitClient(base_url=url, opener=o)
428 client = HttpGitClient(base_url=url, opener=o)
420 repo = self._factory.repo(wire)
429 repo = self._factory.repo(wire)
421
430
422 determine_wants = repo.object_store.determine_wants_all
431 determine_wants = repo.object_store.determine_wants_all
423 if refs:
432 if refs:
424 def determine_wants_requested(references):
433 def determine_wants_requested(references):
425 return [references[r] for r in references if r in refs]
434 return [references[r] for r in references if r in refs]
426 determine_wants = determine_wants_requested
435 determine_wants = determine_wants_requested
427
436
428 try:
437 try:
429 remote_refs = client.fetch(
438 remote_refs = client.fetch(
430 path=url, target=repo, determine_wants=determine_wants)
439 path=url, target=repo, determine_wants=determine_wants)
431 except NotGitRepository as e:
440 except NotGitRepository as e:
432 log.warning(
441 log.warning(
433 'Trying to fetch from "%s" failed, not a Git repository.', url)
442 'Trying to fetch from "%s" failed, not a Git repository.', url)
434 # Exception can contain unicode which we convert
443 # Exception can contain unicode which we convert
435 raise exceptions.AbortException(e)(repr(e))
444 raise exceptions.AbortException(e)(repr(e))
436
445
437 # mikhail: client.fetch() returns all the remote refs, but fetches only
446 # mikhail: client.fetch() returns all the remote refs, but fetches only
438 # refs filtered by `determine_wants` function. We need to filter result
447 # refs filtered by `determine_wants` function. We need to filter result
439 # as well
448 # as well
440 if refs:
449 if refs:
441 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
450 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
442
451
443 if apply_refs:
452 if apply_refs:
444 # TODO: johbo: Needs proper test coverage with a git repository
453 # TODO: johbo: Needs proper test coverage with a git repository
445 # that contains a tag object, so that we would end up with
454 # that contains a tag object, so that we would end up with
446 # a peeled ref at this point.
455 # a peeled ref at this point.
447 for k in remote_refs:
456 for k in remote_refs:
448 if k.endswith(self.peeled_ref_marker):
457 if k.endswith(self.peeled_ref_marker):
449 log.debug("Skipping peeled reference %s", k)
458 log.debug("Skipping peeled reference %s", k)
450 continue
459 continue
451 repo[k] = remote_refs[k]
460 repo[k] = remote_refs[k]
452
461
453 if refs and not update_after:
462 if refs and not update_after:
454 # mikhail: explicitly set the head to the last ref.
463 # mikhail: explicitly set the head to the last ref.
455 repo['HEAD'] = remote_refs[refs[-1]]
464 repo['HEAD'] = remote_refs[refs[-1]]
456
465
457 if update_after:
466 if update_after:
458 # we want to checkout HEAD
467 # we want to checkout HEAD
459 repo["HEAD"] = remote_refs["HEAD"]
468 repo["HEAD"] = remote_refs["HEAD"]
460 index.build_index_from_tree(repo.path, repo.index_path(),
469 index.build_index_from_tree(repo.path, repo.index_path(),
461 repo.object_store, repo["HEAD"].tree)
470 repo.object_store, repo["HEAD"].tree)
462 return remote_refs
471 return remote_refs
463
472
464 @reraise_safe_exceptions
473 @reraise_safe_exceptions
465 def sync_fetch(self, wire, url, refs=None):
474 def sync_fetch(self, wire, url, refs=None):
466 repo = self._factory.repo(wire)
475 repo = self._factory.repo(wire)
467 if refs and not isinstance(refs, (list, tuple)):
476 if refs and not isinstance(refs, (list, tuple)):
468 refs = [refs]
477 refs = [refs]
469 config = self._wire_to_config(wire)
478 config = self._wire_to_config(wire)
470 # get all remote refs we'll use to fetch later
479 # get all remote refs we'll use to fetch later
471 output, __ = self.run_git_command(
480 output, __ = self.run_git_command(
472 wire, ['ls-remote', url], fail_on_stderr=False,
481 wire, ['ls-remote', url], fail_on_stderr=False,
473 _copts=self._remote_conf(config),
482 _copts=self._remote_conf(config),
474 extra_env={'GIT_TERMINAL_PROMPT': '0'})
483 extra_env={'GIT_TERMINAL_PROMPT': '0'})
475
484
476 remote_refs = collections.OrderedDict()
485 remote_refs = collections.OrderedDict()
477 fetch_refs = []
486 fetch_refs = []
478
487
479 for ref_line in output.splitlines():
488 for ref_line in output.splitlines():
480 sha, ref = ref_line.split('\t')
489 sha, ref = ref_line.split('\t')
481 sha = sha.strip()
490 sha = sha.strip()
482 if ref in remote_refs:
491 if ref in remote_refs:
483 # duplicate, skip
492 # duplicate, skip
484 continue
493 continue
485 if ref.endswith(self.peeled_ref_marker):
494 if ref.endswith(self.peeled_ref_marker):
486 log.debug("Skipping peeled reference %s", ref)
495 log.debug("Skipping peeled reference %s", ref)
487 continue
496 continue
488 # don't sync HEAD
497 # don't sync HEAD
489 if ref in ['HEAD']:
498 if ref in ['HEAD']:
490 continue
499 continue
491
500
492 remote_refs[ref] = sha
501 remote_refs[ref] = sha
493
502
494 if refs and sha in refs:
503 if refs and sha in refs:
495 # we filter fetch using our specified refs
504 # we filter fetch using our specified refs
496 fetch_refs.append('{}:{}'.format(ref, ref))
505 fetch_refs.append('{}:{}'.format(ref, ref))
497 elif not refs:
506 elif not refs:
498 fetch_refs.append('{}:{}'.format(ref, ref))
507 fetch_refs.append('{}:{}'.format(ref, ref))
499 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
508 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
500 if fetch_refs:
509 if fetch_refs:
501 for chunk in more_itertools.chunked(fetch_refs, 1024 * 4):
510 for chunk in more_itertools.chunked(fetch_refs, 1024 * 4):
502 fetch_refs_chunks = list(chunk)
511 fetch_refs_chunks = list(chunk)
503 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
512 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
504 _out, _err = self.run_git_command(
513 _out, _err = self.run_git_command(
505 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
514 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
506 fail_on_stderr=False,
515 fail_on_stderr=False,
507 _copts=self._remote_conf(config),
516 _copts=self._remote_conf(config),
508 extra_env={'GIT_TERMINAL_PROMPT': '0'})
517 extra_env={'GIT_TERMINAL_PROMPT': '0'})
509
518
510 return remote_refs
519 return remote_refs
511
520
512 @reraise_safe_exceptions
521 @reraise_safe_exceptions
513 def sync_push(self, wire, url, refs=None):
522 def sync_push(self, wire, url, refs=None):
514 if not self.check_url(url, wire):
523 if not self.check_url(url, wire):
515 return
524 return
516 config = self._wire_to_config(wire)
525 config = self._wire_to_config(wire)
517 repo = self._factory.repo(wire)
526 repo = self._factory.repo(wire)
518 self.run_git_command(
527 self.run_git_command(
519 wire, ['push', url, '--mirror'], fail_on_stderr=False,
528 wire, ['push', url, '--mirror'], fail_on_stderr=False,
520 _copts=self._remote_conf(config),
529 _copts=self._remote_conf(config),
521 extra_env={'GIT_TERMINAL_PROMPT': '0'})
530 extra_env={'GIT_TERMINAL_PROMPT': '0'})
522
531
523 @reraise_safe_exceptions
532 @reraise_safe_exceptions
524 def get_remote_refs(self, wire, url):
533 def get_remote_refs(self, wire, url):
525 repo = Repo(url)
534 repo = Repo(url)
526 return repo.get_refs()
535 return repo.get_refs()
527
536
528 @reraise_safe_exceptions
537 @reraise_safe_exceptions
529 def get_description(self, wire):
538 def get_description(self, wire):
530 repo = self._factory.repo(wire)
539 repo = self._factory.repo(wire)
531 return repo.get_description()
540 return repo.get_description()
532
541
533 @reraise_safe_exceptions
542 @reraise_safe_exceptions
534 def get_missing_revs(self, wire, rev1, rev2, path2):
543 def get_missing_revs(self, wire, rev1, rev2, path2):
535 repo = self._factory.repo(wire)
544 repo = self._factory.repo(wire)
536 LocalGitClient(thin_packs=False).fetch(path2, repo)
545 LocalGitClient(thin_packs=False).fetch(path2, repo)
537
546
538 wire_remote = wire.copy()
547 wire_remote = wire.copy()
539 wire_remote['path'] = path2
548 wire_remote['path'] = path2
540 repo_remote = self._factory.repo(wire_remote)
549 repo_remote = self._factory.repo(wire_remote)
541 LocalGitClient(thin_packs=False).fetch(wire["path"], repo_remote)
550 LocalGitClient(thin_packs=False).fetch(wire["path"], repo_remote)
542
551
543 revs = [
552 revs = [
544 x.commit.id
553 x.commit.id
545 for x in repo_remote.get_walker(include=[rev2], exclude=[rev1])]
554 for x in repo_remote.get_walker(include=[rev2], exclude=[rev1])]
546 return revs
555 return revs
547
556
548 @reraise_safe_exceptions
557 @reraise_safe_exceptions
549 def get_object(self, wire, sha):
558 def get_object(self, wire, sha):
550 repo = self._factory.repo(wire)
559 repo = self._factory.repo(wire)
551 obj = repo.get_object(sha)
560 obj = repo.get_object(sha)
552 commit_id = obj.id
561 commit_id = obj.id
553
562
554 if isinstance(obj, Tag):
563 if isinstance(obj, Tag):
555 commit_id = obj.object[1]
564 commit_id = obj.object[1]
556
565
557 return {
566 return {
558 'id': obj.id,
567 'id': obj.id,
559 'type': obj.type_name,
568 'type': obj.type_name,
560 'commit_id': commit_id
569 'commit_id': commit_id
561 }
570 }
562
571
563 @reraise_safe_exceptions
572 @reraise_safe_exceptions
564 def get_object_attrs(self, wire, sha, *attrs):
573 def get_object_attrs(self, wire, sha, *attrs):
565 repo = self._factory.repo(wire)
574 repo = self._factory.repo(wire)
566 obj = repo.get_object(sha)
575 obj = repo.get_object(sha)
567 return list(getattr(obj, a) for a in attrs)
576 return list(getattr(obj, a) for a in attrs)
568
577
569 @reraise_safe_exceptions
578 @reraise_safe_exceptions
570 def get_refs(self, wire):
579 def get_refs(self, wire):
571 repo = self._factory.repo(wire)
580 repo = self._factory.repo(wire)
572 result = {}
581 result = {}
573 for ref, sha in repo.refs.as_dict().items():
582 for ref, sha in repo.refs.as_dict().items():
574 peeled_sha = repo.get_peeled(ref)
583 peeled_sha = repo.get_peeled(ref)
575 result[ref] = peeled_sha
584 result[ref] = peeled_sha
576 return result
585 return result
577