##// END OF EJS Templates
git: make sure we don't break streaming in case of empty pull messages....
marcink -
r277:90be8c49 stable
parent child Browse files
Show More
@@ -1,381 +1,386 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2017 RodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 """Handles the Git smart protocol."""
19 19
20 20 import os
21 21 import socket
22 22 import logging
23 23
24 24 import simplejson as json
25 25 import dulwich.protocol
26 26 from webob import Request, Response, exc
27 27
28 28 from vcsserver import hooks, subprocessio
29 29
30 30
31 31 log = logging.getLogger(__name__)
32 32
33 33
34 34 class FileWrapper(object):
35 35 """File wrapper that ensures how much data is read from it."""
36 36
37 37 def __init__(self, fd, content_length):
38 38 self.fd = fd
39 39 self.content_length = content_length
40 40 self.remain = content_length
41 41
42 42 def read(self, size):
43 43 if size <= self.remain:
44 44 try:
45 45 data = self.fd.read(size)
46 46 except socket.error:
47 47 raise IOError(self)
48 48 self.remain -= size
49 49 elif self.remain:
50 50 data = self.fd.read(self.remain)
51 51 self.remain = 0
52 52 else:
53 53 data = None
54 54 return data
55 55
56 56 def __repr__(self):
57 57 return '<FileWrapper %s len: %s, read: %s>' % (
58 58 self.fd, self.content_length, self.content_length - self.remain
59 59 )
60 60
61 61
62 62 class GitRepository(object):
63 63 """WSGI app for handling Git smart protocol endpoints."""
64 64
65 65 git_folder_signature = frozenset(
66 66 ('config', 'head', 'info', 'objects', 'refs'))
67 67 commands = frozenset(('git-upload-pack', 'git-receive-pack'))
68 68 valid_accepts = frozenset(('application/x-%s-result' %
69 69 c for c in commands))
70 70
71 71 # The last bytes are the SHA1 of the first 12 bytes.
72 72 EMPTY_PACK = (
73 73 'PACK\x00\x00\x00\x02\x00\x00\x00\x00' +
74 74 '\x02\x9d\x08\x82;\xd8\xa8\xea\xb5\x10\xadj\xc7\\\x82<\xfd>\xd3\x1e'
75 75 )
76 76 SIDE_BAND_CAPS = frozenset(('side-band', 'side-band-64k'))
77 77
78 78 def __init__(self, repo_name, content_path, git_path, update_server_info,
79 79 extras):
80 80 files = frozenset(f.lower() for f in os.listdir(content_path))
81 81 valid_dir_signature = self.git_folder_signature.issubset(files)
82 82
83 83 if not valid_dir_signature:
84 84 raise OSError('%s missing git signature' % content_path)
85 85
86 86 self.content_path = content_path
87 87 self.repo_name = repo_name
88 88 self.extras = extras
89 89 self.git_path = git_path
90 90 self.update_server_info = update_server_info
91 91
92 92 def _get_fixedpath(self, path):
93 93 """
94 94 Small fix for repo_path
95 95
96 96 :param path:
97 97 """
98 98 path = path.split(self.repo_name, 1)[-1]
99 99 if path.startswith('.git'):
100 100 # for bare repos we still get the .git prefix inside, we skip it
101 101 # here, and remove from the service command
102 102 path = path[4:]
103 103
104 104 return path.strip('/')
105 105
106 106 def inforefs(self, request, unused_environ):
107 107 """
108 108 WSGI Response producer for HTTP GET Git Smart
109 109 HTTP /info/refs request.
110 110 """
111 111
112 112 git_command = request.GET.get('service')
113 113 if git_command not in self.commands:
114 114 log.debug('command %s not allowed', git_command)
115 115 return exc.HTTPForbidden()
116 116
117 117 # please, resist the urge to add '\n' to git capture and increment
118 118 # line count by 1.
119 119 # by git docs: Documentation/technical/http-protocol.txt#L214 \n is
120 120 # a part of protocol.
121 121 # The code in Git client not only does NOT need '\n', but actually
122 122 # blows up if you sprinkle "flush" (0000) as "0001\n".
123 123 # It reads binary, per number of bytes specified.
124 124 # if you do add '\n' as part of data, count it.
125 125 server_advert = '# service=%s\n' % git_command
126 126 packet_len = str(hex(len(server_advert) + 4)[2:].rjust(4, '0')).lower()
127 127 try:
128 128 gitenv = dict(os.environ)
129 129 # forget all configs
130 130 gitenv['RC_SCM_DATA'] = json.dumps(self.extras)
131 131 command = [self.git_path, git_command[4:], '--stateless-rpc',
132 132 '--advertise-refs', self.content_path]
133 133 out = subprocessio.SubprocessIOChunker(
134 134 command,
135 135 env=gitenv,
136 136 starting_values=[packet_len + server_advert + '0000'],
137 137 shell=False
138 138 )
139 139 except EnvironmentError:
140 140 log.exception('Error processing command')
141 141 raise exc.HTTPExpectationFailed()
142 142
143 143 resp = Response()
144 144 resp.content_type = 'application/x-%s-advertisement' % str(git_command)
145 145 resp.charset = None
146 146 resp.app_iter = out
147 147
148 148 return resp
149 149
150 150 def _get_want_capabilities(self, request):
151 151 """Read the capabilities found in the first want line of the request."""
152 152 pos = request.body_file_seekable.tell()
153 153 first_line = request.body_file_seekable.readline()
154 154 request.body_file_seekable.seek(pos)
155 155
156 156 return frozenset(
157 157 dulwich.protocol.extract_want_line_capabilities(first_line)[1])
158 158
159 159 def _build_failed_pre_pull_response(self, capabilities, pre_pull_messages):
160 160 """
161 161 Construct a response with an empty PACK file.
162 162
163 163 We use an empty PACK file, as that would trigger the failure of the pull
164 164 or clone command.
165 165
166 166 We also print in the error output a message explaining why the command
167 167 was aborted.
168 168
169 169 If aditionally, the user is accepting messages we send them the output
170 170 of the pre-pull hook.
171 171
172 172 Note that for clients not supporting side-band we just send them the
173 173 emtpy PACK file.
174 174 """
175 175 if self.SIDE_BAND_CAPS.intersection(capabilities):
176 176 response = []
177 177 proto = dulwich.protocol.Protocol(None, response.append)
178 178 proto.write_pkt_line('NAK\n')
179 179 self._write_sideband_to_proto(pre_pull_messages, proto,
180 180 capabilities)
181 181 # N.B.(skreft): Do not change the sideband channel to 3, as that
182 182 # produces a fatal error in the client:
183 183 # fatal: error in sideband demultiplexer
184 184 proto.write_sideband(2, 'Pre pull hook failed: aborting\n')
185 185 proto.write_sideband(1, self.EMPTY_PACK)
186 186
187 187 # writes 0000
188 188 proto.write_pkt_line(None)
189 189
190 190 return response
191 191 else:
192 192 return [self.EMPTY_PACK]
193 193
194 194 def _write_sideband_to_proto(self, data, proto, capabilities):
195 195 """
196 196 Write the data to the proto's sideband number 2.
197 197
198 198 We do not use dulwich's write_sideband directly as it only supports
199 199 side-band-64k.
200 200 """
201 201 if not data:
202 202 return
203 203
204 204 # N.B.(skreft): The values below are explained in the pack protocol
205 205 # documentation, section Packfile Data.
206 206 # https://github.com/git/git/blob/master/Documentation/technical/pack-protocol.txt
207 207 if 'side-band-64k' in capabilities:
208 208 chunk_size = 65515
209 209 elif 'side-band' in capabilities:
210 210 chunk_size = 995
211 211 else:
212 212 return
213 213
214 214 chunker = (
215 215 data[i:i + chunk_size] for i in xrange(0, len(data), chunk_size))
216 216
217 217 for chunk in chunker:
218 218 proto.write_sideband(2, chunk)
219 219
220 220 def _get_messages(self, data, capabilities):
221 221 """Return a list with packets for sending data in sideband number 2."""
222 222 response = []
223 223 proto = dulwich.protocol.Protocol(None, response.append)
224 224
225 225 self._write_sideband_to_proto(data, proto, capabilities)
226 226
227 227 return response
228 228
229 229 def _inject_messages_to_response(self, response, capabilities,
230 230 start_messages, end_messages):
231 231 """
232 Given a list reponse we inject the pre/post-pull messages.
232 Given a list response we inject the pre/post-pull messages.
233 233
234 234 We only inject the messages if the client supports sideband, and the
235 235 response has the format:
236 236 0008NAK\n...0000
237 237
238 238 Note that we do not check the no-progress capability as by default, git
239 239 sends it, which effectively would block all messages.
240 240 """
241 241 if not self.SIDE_BAND_CAPS.intersection(capabilities):
242 242 return response
243 243
244 if not start_messages and not end_messages:
245 return response
246
247 # make a list out of response if it's an iterator
248 # so we can investigate it for message injection.
249 if hasattr(response, '__iter__'):
250 response = list(response)
251
244 252 if (not response[0].startswith('0008NAK\n') or
245 253 not response[-1].endswith('0000')):
246 254 return response
247 255
248 if not start_messages and not end_messages:
249 return response
250
251 256 new_response = ['0008NAK\n']
252 257 new_response.extend(self._get_messages(start_messages, capabilities))
253 258 if len(response) == 1:
254 259 new_response.append(response[0][8:-4])
255 260 else:
256 261 new_response.append(response[0][8:])
257 262 new_response.extend(response[1:-1])
258 263 new_response.append(response[-1][:-4])
259 264 new_response.extend(self._get_messages(end_messages, capabilities))
260 265 new_response.append('0000')
261 266
262 267 return new_response
263 268
264 269 def backend(self, request, environ):
265 270 """
266 271 WSGI Response producer for HTTP POST Git Smart HTTP requests.
267 272 Reads commands and data from HTTP POST's body.
268 273 returns an iterator obj with contents of git command's
269 274 response to stdout
270 275 """
271 276 # TODO(skreft): think how we could detect an HTTPLockedException, as
272 277 # we probably want to have the same mechanism used by mercurial and
273 278 # simplevcs.
274 279 # For that we would need to parse the output of the command looking for
275 280 # some signs of the HTTPLockedError, parse the data and reraise it in
276 281 # pygrack. However, that would interfere with the streaming.
277 282 #
278 283 # Now the output of a blocked push is:
279 284 # Pushing to http://test_regular:test12@127.0.0.1:5001/vcs_test_git
280 285 # POST git-receive-pack (1047 bytes)
281 286 # remote: ERROR: Repository `vcs_test_git` locked by user `test_admin`. Reason:`lock_auto`
282 287 # To http://test_regular:test12@127.0.0.1:5001/vcs_test_git
283 288 # ! [remote rejected] master -> master (pre-receive hook declined)
284 289 # error: failed to push some refs to 'http://test_regular:test12@127.0.0.1:5001/vcs_test_git'
285 290
286 291 git_command = self._get_fixedpath(request.path_info)
287 292 if git_command not in self.commands:
288 293 log.debug('command %s not allowed', git_command)
289 294 return exc.HTTPForbidden()
290 295
291 296 capabilities = None
292 297 if git_command == 'git-upload-pack':
293 298 capabilities = self._get_want_capabilities(request)
294 299
295 300 if 'CONTENT_LENGTH' in environ:
296 301 inputstream = FileWrapper(request.body_file_seekable,
297 302 request.content_length)
298 303 else:
299 304 inputstream = request.body_file_seekable
300 305
301 306 resp = Response()
302 307 resp.content_type = ('application/x-%s-result' %
303 308 git_command.encode('utf8'))
304 309 resp.charset = None
305 310
311 pre_pull_messages = ''
306 312 if git_command == 'git-upload-pack':
307 313 status, pre_pull_messages = hooks.git_pre_pull(self.extras)
308 314 if status != 0:
309 315 resp.app_iter = self._build_failed_pre_pull_response(
310 316 capabilities, pre_pull_messages)
311 317 return resp
312 318
313 319 gitenv = dict(os.environ)
314 320 # forget all configs
315 321 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
316 322 gitenv['RC_SCM_DATA'] = json.dumps(self.extras)
317 323 cmd = [self.git_path, git_command[4:], '--stateless-rpc',
318 324 self.content_path]
319 325 log.debug('handling cmd %s', cmd)
320 326
321 327 out = subprocessio.SubprocessIOChunker(
322 328 cmd,
323 329 inputstream=inputstream,
324 330 env=gitenv,
325 331 cwd=self.content_path,
326 332 shell=False,
327 333 fail_on_stderr=False,
328 334 fail_on_return_code=False
329 335 )
330 336
331 337 if self.update_server_info and git_command == 'git-receive-pack':
332 338 # We need to fully consume the iterator here, as the
333 339 # update-server-info command needs to be run after the push.
334 340 out = list(out)
335 341
336 342 # Updating refs manually after each push.
337 343 # This is required as some clients are exposing Git repos internally
338 344 # with the dumb protocol.
339 345 cmd = [self.git_path, 'update-server-info']
340 346 log.debug('handling cmd %s', cmd)
341 347 output = subprocessio.SubprocessIOChunker(
342 348 cmd,
343 349 inputstream=inputstream,
344 350 env=gitenv,
345 351 cwd=self.content_path,
346 352 shell=False,
347 353 fail_on_stderr=False,
348 354 fail_on_return_code=False
349 355 )
350 356 # Consume all the output so the subprocess finishes
351 357 for _ in output:
352 358 pass
353 359
354 360 if git_command == 'git-upload-pack':
355 out = list(out)
356 361 unused_status, post_pull_messages = hooks.git_post_pull(self.extras)
357 362 resp.app_iter = self._inject_messages_to_response(
358 363 out, capabilities, pre_pull_messages, post_pull_messages)
359 364 else:
360 365 resp.app_iter = out
361 366
362 367 return resp
363 368
364 369 def __call__(self, environ, start_response):
365 370 request = Request(environ)
366 371 _path = self._get_fixedpath(request.path_info)
367 372 if _path.startswith('info/refs'):
368 373 app = self.inforefs
369 374 else:
370 375 app = self.backend
371 376
372 377 try:
373 378 resp = app(request, environ)
374 379 except exc.HTTPException as error:
375 380 log.exception('HTTP Error')
376 381 resp = error
377 382 except Exception:
378 383 log.exception('Unknown error')
379 384 resp = exc.HTTPInternalServerError()
380 385
381 386 return resp(environ, start_response)
General Comments 0
You need to be logged in to leave comments. Login now