##// END OF EJS Templates
git: handle properly the .git bare repo clones.
marcink -
r202:0ed081fc default
parent child Browse files
Show More
@@ -1,375 +1,381 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2017 RodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 """Handles the Git smart protocol."""
19 19
20 20 import os
21 21 import socket
22 22 import logging
23 23
24 24 import simplejson as json
25 25 import dulwich.protocol
26 26 from webob import Request, Response, exc
27 27
28 28 from vcsserver import hooks, subprocessio
29 29
30 30
31 31 log = logging.getLogger(__name__)
32 32
33 33
34 34 class FileWrapper(object):
35 35 """File wrapper that ensures how much data is read from it."""
36 36
37 37 def __init__(self, fd, content_length):
38 38 self.fd = fd
39 39 self.content_length = content_length
40 40 self.remain = content_length
41 41
42 42 def read(self, size):
43 43 if size <= self.remain:
44 44 try:
45 45 data = self.fd.read(size)
46 46 except socket.error:
47 47 raise IOError(self)
48 48 self.remain -= size
49 49 elif self.remain:
50 50 data = self.fd.read(self.remain)
51 51 self.remain = 0
52 52 else:
53 53 data = None
54 54 return data
55 55
56 56 def __repr__(self):
57 57 return '<FileWrapper %s len: %s, read: %s>' % (
58 58 self.fd, self.content_length, self.content_length - self.remain
59 59 )
60 60
61 61
62 62 class GitRepository(object):
63 63 """WSGI app for handling Git smart protocol endpoints."""
64 64
65 65 git_folder_signature = frozenset(
66 66 ('config', 'head', 'info', 'objects', 'refs'))
67 67 commands = frozenset(('git-upload-pack', 'git-receive-pack'))
68 68 valid_accepts = frozenset(('application/x-%s-result' %
69 69 c for c in commands))
70 70
71 71 # The last bytes are the SHA1 of the first 12 bytes.
72 72 EMPTY_PACK = (
73 73 'PACK\x00\x00\x00\x02\x00\x00\x00\x00' +
74 74 '\x02\x9d\x08\x82;\xd8\xa8\xea\xb5\x10\xadj\xc7\\\x82<\xfd>\xd3\x1e'
75 75 )
76 76 SIDE_BAND_CAPS = frozenset(('side-band', 'side-band-64k'))
77 77
78 78 def __init__(self, repo_name, content_path, git_path, update_server_info,
79 79 extras):
80 80 files = frozenset(f.lower() for f in os.listdir(content_path))
81 81 valid_dir_signature = self.git_folder_signature.issubset(files)
82 82
83 83 if not valid_dir_signature:
84 84 raise OSError('%s missing git signature' % content_path)
85 85
86 86 self.content_path = content_path
87 87 self.repo_name = repo_name
88 88 self.extras = extras
89 89 self.git_path = git_path
90 90 self.update_server_info = update_server_info
91 91
92 92 def _get_fixedpath(self, path):
93 93 """
94 94 Small fix for repo_path
95 95
96 96 :param path:
97 97 """
98 return path.split(self.repo_name, 1)[-1].strip('/')
98 path = path.split(self.repo_name, 1)[-1]
99 if path.startswith('.git'):
100 # for bare repos we still get the .git prefix inside, we skip it
101 # here, and remove from the service command
102 path = path[4:]
103
104 return path.strip('/')
99 105
100 106 def inforefs(self, request, unused_environ):
101 107 """
102 108 WSGI Response producer for HTTP GET Git Smart
103 109 HTTP /info/refs request.
104 110 """
105 111
106 112 git_command = request.GET.get('service')
107 113 if git_command not in self.commands:
108 114 log.debug('command %s not allowed', git_command)
109 115 return exc.HTTPForbidden()
110 116
111 117 # please, resist the urge to add '\n' to git capture and increment
112 118 # line count by 1.
113 119 # by git docs: Documentation/technical/http-protocol.txt#L214 \n is
114 120 # a part of protocol.
115 121 # The code in Git client not only does NOT need '\n', but actually
116 122 # blows up if you sprinkle "flush" (0000) as "0001\n".
117 123 # It reads binary, per number of bytes specified.
118 124 # if you do add '\n' as part of data, count it.
119 125 server_advert = '# service=%s\n' % git_command
120 126 packet_len = str(hex(len(server_advert) + 4)[2:].rjust(4, '0')).lower()
121 127 try:
122 128 gitenv = dict(os.environ)
123 129 # forget all configs
124 130 gitenv['RC_SCM_DATA'] = json.dumps(self.extras)
125 131 command = [self.git_path, git_command[4:], '--stateless-rpc',
126 132 '--advertise-refs', self.content_path]
127 133 out = subprocessio.SubprocessIOChunker(
128 134 command,
129 135 env=gitenv,
130 136 starting_values=[packet_len + server_advert + '0000'],
131 137 shell=False
132 138 )
133 139 except EnvironmentError:
134 140 log.exception('Error processing command')
135 141 raise exc.HTTPExpectationFailed()
136 142
137 143 resp = Response()
138 144 resp.content_type = 'application/x-%s-advertisement' % str(git_command)
139 145 resp.charset = None
140 146 resp.app_iter = out
141 147
142 148 return resp
143 149
144 150 def _get_want_capabilities(self, request):
145 151 """Read the capabilities found in the first want line of the request."""
146 152 pos = request.body_file_seekable.tell()
147 153 first_line = request.body_file_seekable.readline()
148 154 request.body_file_seekable.seek(pos)
149 155
150 156 return frozenset(
151 157 dulwich.protocol.extract_want_line_capabilities(first_line)[1])
152 158
153 159 def _build_failed_pre_pull_response(self, capabilities, pre_pull_messages):
154 160 """
155 161 Construct a response with an empty PACK file.
156 162
157 163 We use an empty PACK file, as that would trigger the failure of the pull
158 164 or clone command.
159 165
160 166 We also print in the error output a message explaining why the command
161 167 was aborted.
162 168
163 169 If aditionally, the user is accepting messages we send them the output
164 170 of the pre-pull hook.
165 171
166 172 Note that for clients not supporting side-band we just send them the
167 173 emtpy PACK file.
168 174 """
169 175 if self.SIDE_BAND_CAPS.intersection(capabilities):
170 176 response = []
171 177 proto = dulwich.protocol.Protocol(None, response.append)
172 178 proto.write_pkt_line('NAK\n')
173 179 self._write_sideband_to_proto(pre_pull_messages, proto,
174 180 capabilities)
175 181 # N.B.(skreft): Do not change the sideband channel to 3, as that
176 182 # produces a fatal error in the client:
177 183 # fatal: error in sideband demultiplexer
178 184 proto.write_sideband(2, 'Pre pull hook failed: aborting\n')
179 185 proto.write_sideband(1, self.EMPTY_PACK)
180 186
181 187 # writes 0000
182 188 proto.write_pkt_line(None)
183 189
184 190 return response
185 191 else:
186 192 return [self.EMPTY_PACK]
187 193
188 194 def _write_sideband_to_proto(self, data, proto, capabilities):
189 195 """
190 196 Write the data to the proto's sideband number 2.
191 197
192 198 We do not use dulwich's write_sideband directly as it only supports
193 199 side-band-64k.
194 200 """
195 201 if not data:
196 202 return
197 203
198 204 # N.B.(skreft): The values below are explained in the pack protocol
199 205 # documentation, section Packfile Data.
200 206 # https://github.com/git/git/blob/master/Documentation/technical/pack-protocol.txt
201 207 if 'side-band-64k' in capabilities:
202 208 chunk_size = 65515
203 209 elif 'side-band' in capabilities:
204 210 chunk_size = 995
205 211 else:
206 212 return
207 213
208 214 chunker = (
209 215 data[i:i + chunk_size] for i in xrange(0, len(data), chunk_size))
210 216
211 217 for chunk in chunker:
212 218 proto.write_sideband(2, chunk)
213 219
214 220 def _get_messages(self, data, capabilities):
215 221 """Return a list with packets for sending data in sideband number 2."""
216 222 response = []
217 223 proto = dulwich.protocol.Protocol(None, response.append)
218 224
219 225 self._write_sideband_to_proto(data, proto, capabilities)
220 226
221 227 return response
222 228
223 229 def _inject_messages_to_response(self, response, capabilities,
224 230 start_messages, end_messages):
225 231 """
226 232 Given a list reponse we inject the pre/post-pull messages.
227 233
228 234 We only inject the messages if the client supports sideband, and the
229 235 response has the format:
230 236 0008NAK\n...0000
231 237
232 238 Note that we do not check the no-progress capability as by default, git
233 239 sends it, which effectively would block all messages.
234 240 """
235 241 if not self.SIDE_BAND_CAPS.intersection(capabilities):
236 242 return response
237 243
238 244 if (not response[0].startswith('0008NAK\n') or
239 245 not response[-1].endswith('0000')):
240 246 return response
241 247
242 248 if not start_messages and not end_messages:
243 249 return response
244 250
245 251 new_response = ['0008NAK\n']
246 252 new_response.extend(self._get_messages(start_messages, capabilities))
247 253 if len(response) == 1:
248 254 new_response.append(response[0][8:-4])
249 255 else:
250 256 new_response.append(response[0][8:])
251 257 new_response.extend(response[1:-1])
252 258 new_response.append(response[-1][:-4])
253 259 new_response.extend(self._get_messages(end_messages, capabilities))
254 260 new_response.append('0000')
255 261
256 262 return new_response
257 263
258 264 def backend(self, request, environ):
259 265 """
260 266 WSGI Response producer for HTTP POST Git Smart HTTP requests.
261 267 Reads commands and data from HTTP POST's body.
262 268 returns an iterator obj with contents of git command's
263 269 response to stdout
264 270 """
265 271 # TODO(skreft): think how we could detect an HTTPLockedException, as
266 272 # we probably want to have the same mechanism used by mercurial and
267 273 # simplevcs.
268 274 # For that we would need to parse the output of the command looking for
269 275 # some signs of the HTTPLockedError, parse the data and reraise it in
270 276 # pygrack. However, that would interfere with the streaming.
271 277 #
272 278 # Now the output of a blocked push is:
273 279 # Pushing to http://test_regular:test12@127.0.0.1:5001/vcs_test_git
274 280 # POST git-receive-pack (1047 bytes)
275 281 # remote: ERROR: Repository `vcs_test_git` locked by user `test_admin`. Reason:`lock_auto`
276 282 # To http://test_regular:test12@127.0.0.1:5001/vcs_test_git
277 283 # ! [remote rejected] master -> master (pre-receive hook declined)
278 284 # error: failed to push some refs to 'http://test_regular:test12@127.0.0.1:5001/vcs_test_git'
279 285
280 286 git_command = self._get_fixedpath(request.path_info)
281 287 if git_command not in self.commands:
282 288 log.debug('command %s not allowed', git_command)
283 289 return exc.HTTPForbidden()
284 290
285 291 capabilities = None
286 292 if git_command == 'git-upload-pack':
287 293 capabilities = self._get_want_capabilities(request)
288 294
289 295 if 'CONTENT_LENGTH' in environ:
290 296 inputstream = FileWrapper(request.body_file_seekable,
291 297 request.content_length)
292 298 else:
293 299 inputstream = request.body_file_seekable
294 300
295 301 resp = Response()
296 302 resp.content_type = ('application/x-%s-result' %
297 303 git_command.encode('utf8'))
298 304 resp.charset = None
299 305
300 306 if git_command == 'git-upload-pack':
301 307 status, pre_pull_messages = hooks.git_pre_pull(self.extras)
302 308 if status != 0:
303 309 resp.app_iter = self._build_failed_pre_pull_response(
304 310 capabilities, pre_pull_messages)
305 311 return resp
306 312
307 313 gitenv = dict(os.environ)
308 314 # forget all configs
309 315 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
310 316 gitenv['RC_SCM_DATA'] = json.dumps(self.extras)
311 317 cmd = [self.git_path, git_command[4:], '--stateless-rpc',
312 318 self.content_path]
313 319 log.debug('handling cmd %s', cmd)
314 320
315 321 out = subprocessio.SubprocessIOChunker(
316 322 cmd,
317 323 inputstream=inputstream,
318 324 env=gitenv,
319 325 cwd=self.content_path,
320 326 shell=False,
321 327 fail_on_stderr=False,
322 328 fail_on_return_code=False
323 329 )
324 330
325 331 if self.update_server_info and git_command == 'git-receive-pack':
326 332 # We need to fully consume the iterator here, as the
327 333 # update-server-info command needs to be run after the push.
328 334 out = list(out)
329 335
330 336 # Updating refs manually after each push.
331 337 # This is required as some clients are exposing Git repos internally
332 338 # with the dumb protocol.
333 339 cmd = [self.git_path, 'update-server-info']
334 340 log.debug('handling cmd %s', cmd)
335 341 output = subprocessio.SubprocessIOChunker(
336 342 cmd,
337 343 inputstream=inputstream,
338 344 env=gitenv,
339 345 cwd=self.content_path,
340 346 shell=False,
341 347 fail_on_stderr=False,
342 348 fail_on_return_code=False
343 349 )
344 350 # Consume all the output so the subprocess finishes
345 351 for _ in output:
346 352 pass
347 353
348 354 if git_command == 'git-upload-pack':
349 355 out = list(out)
350 356 unused_status, post_pull_messages = hooks.git_post_pull(self.extras)
351 357 resp.app_iter = self._inject_messages_to_response(
352 358 out, capabilities, pre_pull_messages, post_pull_messages)
353 359 else:
354 360 resp.app_iter = out
355 361
356 362 return resp
357 363
358 364 def __call__(self, environ, start_response):
359 365 request = Request(environ)
360 366 _path = self._get_fixedpath(request.path_info)
361 367 if _path.startswith('info/refs'):
362 368 app = self.inforefs
363 369 else:
364 370 app = self.backend
365 371
366 372 try:
367 373 resp = app(request, environ)
368 374 except exc.HTTPException as error:
369 375 log.exception('HTTP Error')
370 376 resp = error
371 377 except Exception:
372 378 log.exception('Unknown error')
373 379 resp = exc.HTTPInternalServerError()
374 380
375 381 return resp(environ, start_response)
General Comments 0
You need to be logged in to leave comments. Login now