Show More
@@ -1,471 +1,472 b'' | |||||
1 | # This library is free software; you can redistribute it and/or |
|
1 | # This library is free software; you can redistribute it and/or | |
2 | # modify it under the terms of the GNU Lesser General Public |
|
2 | # modify it under the terms of the GNU Lesser General Public | |
3 | # License as published by the Free Software Foundation; either |
|
3 | # License as published by the Free Software Foundation; either | |
4 | # version 2.1 of the License, or (at your option) any later version. |
|
4 | # version 2.1 of the License, or (at your option) any later version. | |
5 | # |
|
5 | # | |
6 | # This library is distributed in the hope that it will be useful, |
|
6 | # This library is distributed in the hope that it will be useful, | |
7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
9 | # Lesser General Public License for more details. |
|
9 | # Lesser General Public License for more details. | |
10 | # |
|
10 | # | |
11 | # You should have received a copy of the GNU Lesser General Public |
|
11 | # You should have received a copy of the GNU Lesser General Public | |
12 | # License along with this library; if not, see |
|
12 | # License along with this library; if not, see | |
13 | # <http://www.gnu.org/licenses/>. |
|
13 | # <http://www.gnu.org/licenses/>. | |
14 |
|
14 | |||
15 | # This file is part of urlgrabber, a high-level cross-protocol url-grabber |
|
15 | # This file is part of urlgrabber, a high-level cross-protocol url-grabber | |
16 | # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko |
|
16 | # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko | |
17 |
|
17 | |||
18 | # $Id: byterange.py,v 1.9 2005/02/14 21:55:07 mstenner Exp $ |
|
18 | # $Id: byterange.py,v 1.9 2005/02/14 21:55:07 mstenner Exp $ | |
19 |
|
19 | |||
20 | from __future__ import absolute_import |
|
20 | from __future__ import absolute_import | |
21 |
|
21 | |||
22 | import email |
|
22 | import email | |
23 | import ftplib |
|
23 | import ftplib | |
24 | import mimetypes |
|
24 | import mimetypes | |
25 | import os |
|
25 | import os | |
26 | import re |
|
26 | import re | |
27 | import socket |
|
27 | import socket | |
28 | import stat |
|
28 | import stat | |
29 |
|
29 | |||
30 | from . import ( |
|
30 | from . import ( | |
|
31 | urllibcompat, | |||
31 | util, |
|
32 | util, | |
32 | ) |
|
33 | ) | |
33 |
|
34 | |||
34 | urlerr = util.urlerr |
|
35 | urlerr = util.urlerr | |
35 | urlreq = util.urlreq |
|
36 | urlreq = util.urlreq | |
36 |
|
37 | |||
37 | addclosehook = urlreq.addclosehook |
|
38 | addclosehook = urlreq.addclosehook | |
38 | addinfourl = urlreq.addinfourl |
|
39 | addinfourl = urlreq.addinfourl | |
39 | splitattr = urlreq.splitattr |
|
40 | splitattr = urlreq.splitattr | |
40 | splitpasswd = urlreq.splitpasswd |
|
41 | splitpasswd = urlreq.splitpasswd | |
41 | splitport = urlreq.splitport |
|
42 | splitport = urlreq.splitport | |
42 | splituser = urlreq.splituser |
|
43 | splituser = urlreq.splituser | |
43 | unquote = urlreq.unquote |
|
44 | unquote = urlreq.unquote | |
44 |
|
45 | |||
45 | class RangeError(IOError): |
|
46 | class RangeError(IOError): | |
46 | """Error raised when an unsatisfiable range is requested.""" |
|
47 | """Error raised when an unsatisfiable range is requested.""" | |
47 |
|
48 | |||
48 | class HTTPRangeHandler(urlreq.basehandler): |
|
49 | class HTTPRangeHandler(urlreq.basehandler): | |
49 | """Handler that enables HTTP Range headers. |
|
50 | """Handler that enables HTTP Range headers. | |
50 |
|
51 | |||
51 | This was extremely simple. The Range header is a HTTP feature to |
|
52 | This was extremely simple. The Range header is a HTTP feature to | |
52 | begin with so all this class does is tell urllib2 that the |
|
53 | begin with so all this class does is tell urllib2 that the | |
53 | "206 Partial Content" response from the HTTP server is what we |
|
54 | "206 Partial Content" response from the HTTP server is what we | |
54 | expected. |
|
55 | expected. | |
55 |
|
56 | |||
56 | Example: |
|
57 | Example: | |
57 | import urllib2 |
|
58 | import urllib2 | |
58 | import byterange |
|
59 | import byterange | |
59 |
|
60 | |||
60 | range_handler = range.HTTPRangeHandler() |
|
61 | range_handler = range.HTTPRangeHandler() | |
61 | opener = urlreq.buildopener(range_handler) |
|
62 | opener = urlreq.buildopener(range_handler) | |
62 |
|
63 | |||
63 | # install it |
|
64 | # install it | |
64 | urlreq.installopener(opener) |
|
65 | urlreq.installopener(opener) | |
65 |
|
66 | |||
66 | # create Request and set Range header |
|
67 | # create Request and set Range header | |
67 | req = urlreq.request('http://www.python.org/') |
|
68 | req = urlreq.request('http://www.python.org/') | |
68 | req.header['Range'] = 'bytes=30-50' |
|
69 | req.header['Range'] = 'bytes=30-50' | |
69 | f = urlreq.urlopen(req) |
|
70 | f = urlreq.urlopen(req) | |
70 | """ |
|
71 | """ | |
71 |
|
72 | |||
72 | def http_error_206(self, req, fp, code, msg, hdrs): |
|
73 | def http_error_206(self, req, fp, code, msg, hdrs): | |
73 | # 206 Partial Content Response |
|
74 | # 206 Partial Content Response | |
74 | r = urlreq.addinfourl(fp, hdrs, req.get_full_url()) |
|
75 | r = urlreq.addinfourl(fp, hdrs, req.get_full_url()) | |
75 | r.code = code |
|
76 | r.code = code | |
76 | r.msg = msg |
|
77 | r.msg = msg | |
77 | return r |
|
78 | return r | |
78 |
|
79 | |||
79 | def http_error_416(self, req, fp, code, msg, hdrs): |
|
80 | def http_error_416(self, req, fp, code, msg, hdrs): | |
80 | # HTTP's Range Not Satisfiable error |
|
81 | # HTTP's Range Not Satisfiable error | |
81 | raise RangeError('Requested Range Not Satisfiable') |
|
82 | raise RangeError('Requested Range Not Satisfiable') | |
82 |
|
83 | |||
83 | class RangeableFileObject(object): |
|
84 | class RangeableFileObject(object): | |
84 | """File object wrapper to enable raw range handling. |
|
85 | """File object wrapper to enable raw range handling. | |
85 | This was implemented primarily for handling range |
|
86 | This was implemented primarily for handling range | |
86 | specifications for file:// urls. This object effectively makes |
|
87 | specifications for file:// urls. This object effectively makes | |
87 | a file object look like it consists only of a range of bytes in |
|
88 | a file object look like it consists only of a range of bytes in | |
88 | the stream. |
|
89 | the stream. | |
89 |
|
90 | |||
90 | Examples: |
|
91 | Examples: | |
91 | # expose 10 bytes, starting at byte position 20, from |
|
92 | # expose 10 bytes, starting at byte position 20, from | |
92 | # /etc/aliases. |
|
93 | # /etc/aliases. | |
93 | >>> fo = RangeableFileObject(file(b'/etc/passwd', b'r'), (20,30)) |
|
94 | >>> fo = RangeableFileObject(file(b'/etc/passwd', b'r'), (20,30)) | |
94 | # seek seeks within the range (to position 23 in this case) |
|
95 | # seek seeks within the range (to position 23 in this case) | |
95 | >>> fo.seek(3) |
|
96 | >>> fo.seek(3) | |
96 | # tell tells where your at _within the range_ (position 3 in |
|
97 | # tell tells where your at _within the range_ (position 3 in | |
97 | # this case) |
|
98 | # this case) | |
98 | >>> fo.tell() |
|
99 | >>> fo.tell() | |
99 | # read EOFs if an attempt is made to read past the last |
|
100 | # read EOFs if an attempt is made to read past the last | |
100 | # byte in the range. the following will return only 7 bytes. |
|
101 | # byte in the range. the following will return only 7 bytes. | |
101 | >>> fo.read(30) |
|
102 | >>> fo.read(30) | |
102 | """ |
|
103 | """ | |
103 |
|
104 | |||
104 | def __init__(self, fo, rangetup): |
|
105 | def __init__(self, fo, rangetup): | |
105 | """Create a RangeableFileObject. |
|
106 | """Create a RangeableFileObject. | |
106 | fo -- a file like object. only the read() method need be |
|
107 | fo -- a file like object. only the read() method need be | |
107 | supported but supporting an optimized seek() is |
|
108 | supported but supporting an optimized seek() is | |
108 | preferable. |
|
109 | preferable. | |
109 | rangetup -- a (firstbyte,lastbyte) tuple specifying the range |
|
110 | rangetup -- a (firstbyte,lastbyte) tuple specifying the range | |
110 | to work over. |
|
111 | to work over. | |
111 | The file object provided is assumed to be at byte offset 0. |
|
112 | The file object provided is assumed to be at byte offset 0. | |
112 | """ |
|
113 | """ | |
113 | self.fo = fo |
|
114 | self.fo = fo | |
114 | (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup) |
|
115 | (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup) | |
115 | self.realpos = 0 |
|
116 | self.realpos = 0 | |
116 | self._do_seek(self.firstbyte) |
|
117 | self._do_seek(self.firstbyte) | |
117 |
|
118 | |||
118 | def __getattr__(self, name): |
|
119 | def __getattr__(self, name): | |
119 | """This effectively allows us to wrap at the instance level. |
|
120 | """This effectively allows us to wrap at the instance level. | |
120 | Any attribute not found in _this_ object will be searched for |
|
121 | Any attribute not found in _this_ object will be searched for | |
121 | in self.fo. This includes methods.""" |
|
122 | in self.fo. This includes methods.""" | |
122 | return getattr(self.fo, name) |
|
123 | return getattr(self.fo, name) | |
123 |
|
124 | |||
124 | def tell(self): |
|
125 | def tell(self): | |
125 | """Return the position within the range. |
|
126 | """Return the position within the range. | |
126 | This is different from fo.seek in that position 0 is the |
|
127 | This is different from fo.seek in that position 0 is the | |
127 | first byte position of the range tuple. For example, if |
|
128 | first byte position of the range tuple. For example, if | |
128 | this object was created with a range tuple of (500,899), |
|
129 | this object was created with a range tuple of (500,899), | |
129 | tell() will return 0 when at byte position 500 of the file. |
|
130 | tell() will return 0 when at byte position 500 of the file. | |
130 | """ |
|
131 | """ | |
131 | return (self.realpos - self.firstbyte) |
|
132 | return (self.realpos - self.firstbyte) | |
132 |
|
133 | |||
133 | def seek(self, offset, whence=0): |
|
134 | def seek(self, offset, whence=0): | |
134 | """Seek within the byte range. |
|
135 | """Seek within the byte range. | |
135 | Positioning is identical to that described under tell(). |
|
136 | Positioning is identical to that described under tell(). | |
136 | """ |
|
137 | """ | |
137 | assert whence in (0, 1, 2) |
|
138 | assert whence in (0, 1, 2) | |
138 | if whence == 0: # absolute seek |
|
139 | if whence == 0: # absolute seek | |
139 | realoffset = self.firstbyte + offset |
|
140 | realoffset = self.firstbyte + offset | |
140 | elif whence == 1: # relative seek |
|
141 | elif whence == 1: # relative seek | |
141 | realoffset = self.realpos + offset |
|
142 | realoffset = self.realpos + offset | |
142 | elif whence == 2: # absolute from end of file |
|
143 | elif whence == 2: # absolute from end of file | |
143 | # XXX: are we raising the right Error here? |
|
144 | # XXX: are we raising the right Error here? | |
144 | raise IOError('seek from end of file not supported.') |
|
145 | raise IOError('seek from end of file not supported.') | |
145 |
|
146 | |||
146 | # do not allow seek past lastbyte in range |
|
147 | # do not allow seek past lastbyte in range | |
147 | if self.lastbyte and (realoffset >= self.lastbyte): |
|
148 | if self.lastbyte and (realoffset >= self.lastbyte): | |
148 | realoffset = self.lastbyte |
|
149 | realoffset = self.lastbyte | |
149 |
|
150 | |||
150 | self._do_seek(realoffset - self.realpos) |
|
151 | self._do_seek(realoffset - self.realpos) | |
151 |
|
152 | |||
152 | def read(self, size=-1): |
|
153 | def read(self, size=-1): | |
153 | """Read within the range. |
|
154 | """Read within the range. | |
154 | This method will limit the size read based on the range. |
|
155 | This method will limit the size read based on the range. | |
155 | """ |
|
156 | """ | |
156 | size = self._calc_read_size(size) |
|
157 | size = self._calc_read_size(size) | |
157 | rslt = self.fo.read(size) |
|
158 | rslt = self.fo.read(size) | |
158 | self.realpos += len(rslt) |
|
159 | self.realpos += len(rslt) | |
159 | return rslt |
|
160 | return rslt | |
160 |
|
161 | |||
161 | def readline(self, size=-1): |
|
162 | def readline(self, size=-1): | |
162 | """Read lines within the range. |
|
163 | """Read lines within the range. | |
163 | This method will limit the size read based on the range. |
|
164 | This method will limit the size read based on the range. | |
164 | """ |
|
165 | """ | |
165 | size = self._calc_read_size(size) |
|
166 | size = self._calc_read_size(size) | |
166 | rslt = self.fo.readline(size) |
|
167 | rslt = self.fo.readline(size) | |
167 | self.realpos += len(rslt) |
|
168 | self.realpos += len(rslt) | |
168 | return rslt |
|
169 | return rslt | |
169 |
|
170 | |||
170 | def _calc_read_size(self, size): |
|
171 | def _calc_read_size(self, size): | |
171 | """Handles calculating the amount of data to read based on |
|
172 | """Handles calculating the amount of data to read based on | |
172 | the range. |
|
173 | the range. | |
173 | """ |
|
174 | """ | |
174 | if self.lastbyte: |
|
175 | if self.lastbyte: | |
175 | if size > -1: |
|
176 | if size > -1: | |
176 | if ((self.realpos + size) >= self.lastbyte): |
|
177 | if ((self.realpos + size) >= self.lastbyte): | |
177 | size = (self.lastbyte - self.realpos) |
|
178 | size = (self.lastbyte - self.realpos) | |
178 | else: |
|
179 | else: | |
179 | size = (self.lastbyte - self.realpos) |
|
180 | size = (self.lastbyte - self.realpos) | |
180 | return size |
|
181 | return size | |
181 |
|
182 | |||
182 | def _do_seek(self, offset): |
|
183 | def _do_seek(self, offset): | |
183 | """Seek based on whether wrapped object supports seek(). |
|
184 | """Seek based on whether wrapped object supports seek(). | |
184 | offset is relative to the current position (self.realpos). |
|
185 | offset is relative to the current position (self.realpos). | |
185 | """ |
|
186 | """ | |
186 | assert offset >= 0 |
|
187 | assert offset >= 0 | |
187 | seek = getattr(self.fo, 'seek', self._poor_mans_seek) |
|
188 | seek = getattr(self.fo, 'seek', self._poor_mans_seek) | |
188 | seek(self.realpos + offset) |
|
189 | seek(self.realpos + offset) | |
189 | self.realpos += offset |
|
190 | self.realpos += offset | |
190 |
|
191 | |||
191 | def _poor_mans_seek(self, offset): |
|
192 | def _poor_mans_seek(self, offset): | |
192 | """Seek by calling the wrapped file objects read() method. |
|
193 | """Seek by calling the wrapped file objects read() method. | |
193 | This is used for file like objects that do not have native |
|
194 | This is used for file like objects that do not have native | |
194 | seek support. The wrapped objects read() method is called |
|
195 | seek support. The wrapped objects read() method is called | |
195 | to manually seek to the desired position. |
|
196 | to manually seek to the desired position. | |
196 | offset -- read this number of bytes from the wrapped |
|
197 | offset -- read this number of bytes from the wrapped | |
197 | file object. |
|
198 | file object. | |
198 | raise RangeError if we encounter EOF before reaching the |
|
199 | raise RangeError if we encounter EOF before reaching the | |
199 | specified offset. |
|
200 | specified offset. | |
200 | """ |
|
201 | """ | |
201 | pos = 0 |
|
202 | pos = 0 | |
202 | bufsize = 1024 |
|
203 | bufsize = 1024 | |
203 | while pos < offset: |
|
204 | while pos < offset: | |
204 | if (pos + bufsize) > offset: |
|
205 | if (pos + bufsize) > offset: | |
205 | bufsize = offset - pos |
|
206 | bufsize = offset - pos | |
206 | buf = self.fo.read(bufsize) |
|
207 | buf = self.fo.read(bufsize) | |
207 | if len(buf) != bufsize: |
|
208 | if len(buf) != bufsize: | |
208 | raise RangeError('Requested Range Not Satisfiable') |
|
209 | raise RangeError('Requested Range Not Satisfiable') | |
209 | pos += bufsize |
|
210 | pos += bufsize | |
210 |
|
211 | |||
211 | class FileRangeHandler(urlreq.filehandler): |
|
212 | class FileRangeHandler(urlreq.filehandler): | |
212 | """FileHandler subclass that adds Range support. |
|
213 | """FileHandler subclass that adds Range support. | |
213 | This class handles Range headers exactly like an HTTP |
|
214 | This class handles Range headers exactly like an HTTP | |
214 | server would. |
|
215 | server would. | |
215 | """ |
|
216 | """ | |
216 | def open_local_file(self, req): |
|
217 | def open_local_file(self, req): | |
217 |
host = req |
|
218 | host = urllibcompat.gethost(req) | |
218 |
file = req |
|
219 | file = urllibcompat.getselector(req) | |
219 | localfile = urlreq.url2pathname(file) |
|
220 | localfile = urlreq.url2pathname(file) | |
220 | stats = os.stat(localfile) |
|
221 | stats = os.stat(localfile) | |
221 | size = stats[stat.ST_SIZE] |
|
222 | size = stats[stat.ST_SIZE] | |
222 | modified = email.Utils.formatdate(stats[stat.ST_MTIME]) |
|
223 | modified = email.Utils.formatdate(stats[stat.ST_MTIME]) | |
223 | mtype = mimetypes.guess_type(file)[0] |
|
224 | mtype = mimetypes.guess_type(file)[0] | |
224 | if host: |
|
225 | if host: | |
225 | host, port = urlreq.splitport(host) |
|
226 | host, port = urlreq.splitport(host) | |
226 | if port or socket.gethostbyname(host) not in self.get_names(): |
|
227 | if port or socket.gethostbyname(host) not in self.get_names(): | |
227 | raise urlerr.urlerror('file not on local host') |
|
228 | raise urlerr.urlerror('file not on local host') | |
228 | fo = open(localfile,'rb') |
|
229 | fo = open(localfile,'rb') | |
229 | brange = req.headers.get('Range', None) |
|
230 | brange = req.headers.get('Range', None) | |
230 | brange = range_header_to_tuple(brange) |
|
231 | brange = range_header_to_tuple(brange) | |
231 | assert brange != () |
|
232 | assert brange != () | |
232 | if brange: |
|
233 | if brange: | |
233 | (fb, lb) = brange |
|
234 | (fb, lb) = brange | |
234 | if lb == '': |
|
235 | if lb == '': | |
235 | lb = size |
|
236 | lb = size | |
236 | if fb < 0 or fb > size or lb > size: |
|
237 | if fb < 0 or fb > size or lb > size: | |
237 | raise RangeError('Requested Range Not Satisfiable') |
|
238 | raise RangeError('Requested Range Not Satisfiable') | |
238 | size = (lb - fb) |
|
239 | size = (lb - fb) | |
239 | fo = RangeableFileObject(fo, (fb, lb)) |
|
240 | fo = RangeableFileObject(fo, (fb, lb)) | |
240 | headers = email.message_from_string( |
|
241 | headers = email.message_from_string( | |
241 | 'Content-Type: %s\nContent-Length: %d\nLast-Modified: %s\n' % |
|
242 | 'Content-Type: %s\nContent-Length: %d\nLast-Modified: %s\n' % | |
242 | (mtype or 'text/plain', size, modified)) |
|
243 | (mtype or 'text/plain', size, modified)) | |
243 | return urlreq.addinfourl(fo, headers, 'file:'+file) |
|
244 | return urlreq.addinfourl(fo, headers, 'file:'+file) | |
244 |
|
245 | |||
245 |
|
246 | |||
246 | # FTP Range Support |
|
247 | # FTP Range Support | |
247 | # Unfortunately, a large amount of base FTP code had to be copied |
|
248 | # Unfortunately, a large amount of base FTP code had to be copied | |
248 | # from urllib and urllib2 in order to insert the FTP REST command. |
|
249 | # from urllib and urllib2 in order to insert the FTP REST command. | |
249 | # Code modifications for range support have been commented as |
|
250 | # Code modifications for range support have been commented as | |
250 | # follows: |
|
251 | # follows: | |
251 | # -- range support modifications start/end here |
|
252 | # -- range support modifications start/end here | |
252 |
|
253 | |||
253 | class FTPRangeHandler(urlreq.ftphandler): |
|
254 | class FTPRangeHandler(urlreq.ftphandler): | |
254 | def ftp_open(self, req): |
|
255 | def ftp_open(self, req): | |
255 |
host = req |
|
256 | host = urllibcompat.gethost(req) | |
256 | if not host: |
|
257 | if not host: | |
257 | raise IOError('ftp error', 'no host given') |
|
258 | raise IOError('ftp error', 'no host given') | |
258 | host, port = splitport(host) |
|
259 | host, port = splitport(host) | |
259 | if port is None: |
|
260 | if port is None: | |
260 | port = ftplib.FTP_PORT |
|
261 | port = ftplib.FTP_PORT | |
261 | else: |
|
262 | else: | |
262 | port = int(port) |
|
263 | port = int(port) | |
263 |
|
264 | |||
264 | # username/password handling |
|
265 | # username/password handling | |
265 | user, host = splituser(host) |
|
266 | user, host = splituser(host) | |
266 | if user: |
|
267 | if user: | |
267 | user, passwd = splitpasswd(user) |
|
268 | user, passwd = splitpasswd(user) | |
268 | else: |
|
269 | else: | |
269 | passwd = None |
|
270 | passwd = None | |
270 | host = unquote(host) |
|
271 | host = unquote(host) | |
271 | user = unquote(user or '') |
|
272 | user = unquote(user or '') | |
272 | passwd = unquote(passwd or '') |
|
273 | passwd = unquote(passwd or '') | |
273 |
|
274 | |||
274 | try: |
|
275 | try: | |
275 | host = socket.gethostbyname(host) |
|
276 | host = socket.gethostbyname(host) | |
276 | except socket.error as msg: |
|
277 | except socket.error as msg: | |
277 | raise urlerr.urlerror(msg) |
|
278 | raise urlerr.urlerror(msg) | |
278 | path, attrs = splitattr(req.get_selector()) |
|
279 | path, attrs = splitattr(req.get_selector()) | |
279 | dirs = path.split('/') |
|
280 | dirs = path.split('/') | |
280 | dirs = map(unquote, dirs) |
|
281 | dirs = map(unquote, dirs) | |
281 | dirs, file = dirs[:-1], dirs[-1] |
|
282 | dirs, file = dirs[:-1], dirs[-1] | |
282 | if dirs and not dirs[0]: |
|
283 | if dirs and not dirs[0]: | |
283 | dirs = dirs[1:] |
|
284 | dirs = dirs[1:] | |
284 | try: |
|
285 | try: | |
285 | fw = self.connect_ftp(user, passwd, host, port, dirs) |
|
286 | fw = self.connect_ftp(user, passwd, host, port, dirs) | |
286 | if file: |
|
287 | if file: | |
287 | type = 'I' |
|
288 | type = 'I' | |
288 | else: |
|
289 | else: | |
289 | type = 'D' |
|
290 | type = 'D' | |
290 |
|
291 | |||
291 | for attr in attrs: |
|
292 | for attr in attrs: | |
292 | attr, value = splitattr(attr) |
|
293 | attr, value = splitattr(attr) | |
293 | if attr.lower() == 'type' and \ |
|
294 | if attr.lower() == 'type' and \ | |
294 | value in ('a', 'A', 'i', 'I', 'd', 'D'): |
|
295 | value in ('a', 'A', 'i', 'I', 'd', 'D'): | |
295 | type = value.upper() |
|
296 | type = value.upper() | |
296 |
|
297 | |||
297 | # -- range support modifications start here |
|
298 | # -- range support modifications start here | |
298 | rest = None |
|
299 | rest = None | |
299 | range_tup = range_header_to_tuple(req.headers.get('Range', None)) |
|
300 | range_tup = range_header_to_tuple(req.headers.get('Range', None)) | |
300 | assert range_tup != () |
|
301 | assert range_tup != () | |
301 | if range_tup: |
|
302 | if range_tup: | |
302 | (fb, lb) = range_tup |
|
303 | (fb, lb) = range_tup | |
303 | if fb > 0: |
|
304 | if fb > 0: | |
304 | rest = fb |
|
305 | rest = fb | |
305 | # -- range support modifications end here |
|
306 | # -- range support modifications end here | |
306 |
|
307 | |||
307 | fp, retrlen = fw.retrfile(file, type, rest) |
|
308 | fp, retrlen = fw.retrfile(file, type, rest) | |
308 |
|
309 | |||
309 | # -- range support modifications start here |
|
310 | # -- range support modifications start here | |
310 | if range_tup: |
|
311 | if range_tup: | |
311 | (fb, lb) = range_tup |
|
312 | (fb, lb) = range_tup | |
312 | if lb == '': |
|
313 | if lb == '': | |
313 | if retrlen is None or retrlen == 0: |
|
314 | if retrlen is None or retrlen == 0: | |
314 | raise RangeError('Requested Range Not Satisfiable due' |
|
315 | raise RangeError('Requested Range Not Satisfiable due' | |
315 | ' to unobtainable file length.') |
|
316 | ' to unobtainable file length.') | |
316 | lb = retrlen |
|
317 | lb = retrlen | |
317 | retrlen = lb - fb |
|
318 | retrlen = lb - fb | |
318 | if retrlen < 0: |
|
319 | if retrlen < 0: | |
319 | # beginning of range is larger than file |
|
320 | # beginning of range is larger than file | |
320 | raise RangeError('Requested Range Not Satisfiable') |
|
321 | raise RangeError('Requested Range Not Satisfiable') | |
321 | else: |
|
322 | else: | |
322 | retrlen = lb - fb |
|
323 | retrlen = lb - fb | |
323 | fp = RangeableFileObject(fp, (0, retrlen)) |
|
324 | fp = RangeableFileObject(fp, (0, retrlen)) | |
324 | # -- range support modifications end here |
|
325 | # -- range support modifications end here | |
325 |
|
326 | |||
326 | headers = "" |
|
327 | headers = "" | |
327 | mtype = mimetypes.guess_type(req.get_full_url())[0] |
|
328 | mtype = mimetypes.guess_type(req.get_full_url())[0] | |
328 | if mtype: |
|
329 | if mtype: | |
329 | headers += "Content-Type: %s\n" % mtype |
|
330 | headers += "Content-Type: %s\n" % mtype | |
330 | if retrlen is not None and retrlen >= 0: |
|
331 | if retrlen is not None and retrlen >= 0: | |
331 | headers += "Content-Length: %d\n" % retrlen |
|
332 | headers += "Content-Length: %d\n" % retrlen | |
332 | headers = email.message_from_string(headers) |
|
333 | headers = email.message_from_string(headers) | |
333 | return addinfourl(fp, headers, req.get_full_url()) |
|
334 | return addinfourl(fp, headers, req.get_full_url()) | |
334 | except ftplib.all_errors as msg: |
|
335 | except ftplib.all_errors as msg: | |
335 | raise IOError('ftp error', msg) |
|
336 | raise IOError('ftp error', msg) | |
336 |
|
337 | |||
337 | def connect_ftp(self, user, passwd, host, port, dirs): |
|
338 | def connect_ftp(self, user, passwd, host, port, dirs): | |
338 | fw = ftpwrapper(user, passwd, host, port, dirs) |
|
339 | fw = ftpwrapper(user, passwd, host, port, dirs) | |
339 | return fw |
|
340 | return fw | |
340 |
|
341 | |||
341 | class ftpwrapper(urlreq.ftpwrapper): |
|
342 | class ftpwrapper(urlreq.ftpwrapper): | |
342 | # range support note: |
|
343 | # range support note: | |
343 | # this ftpwrapper code is copied directly from |
|
344 | # this ftpwrapper code is copied directly from | |
344 | # urllib. The only enhancement is to add the rest |
|
345 | # urllib. The only enhancement is to add the rest | |
345 | # argument and pass it on to ftp.ntransfercmd |
|
346 | # argument and pass it on to ftp.ntransfercmd | |
346 | def retrfile(self, file, type, rest=None): |
|
347 | def retrfile(self, file, type, rest=None): | |
347 | self.endtransfer() |
|
348 | self.endtransfer() | |
348 | if type in ('d', 'D'): |
|
349 | if type in ('d', 'D'): | |
349 | cmd = 'TYPE A' |
|
350 | cmd = 'TYPE A' | |
350 | isdir = 1 |
|
351 | isdir = 1 | |
351 | else: |
|
352 | else: | |
352 | cmd = 'TYPE ' + type |
|
353 | cmd = 'TYPE ' + type | |
353 | isdir = 0 |
|
354 | isdir = 0 | |
354 | try: |
|
355 | try: | |
355 | self.ftp.voidcmd(cmd) |
|
356 | self.ftp.voidcmd(cmd) | |
356 | except ftplib.all_errors: |
|
357 | except ftplib.all_errors: | |
357 | self.init() |
|
358 | self.init() | |
358 | self.ftp.voidcmd(cmd) |
|
359 | self.ftp.voidcmd(cmd) | |
359 | conn = None |
|
360 | conn = None | |
360 | if file and not isdir: |
|
361 | if file and not isdir: | |
361 | # Use nlst to see if the file exists at all |
|
362 | # Use nlst to see if the file exists at all | |
362 | try: |
|
363 | try: | |
363 | self.ftp.nlst(file) |
|
364 | self.ftp.nlst(file) | |
364 | except ftplib.error_perm as reason: |
|
365 | except ftplib.error_perm as reason: | |
365 | raise IOError('ftp error', reason) |
|
366 | raise IOError('ftp error', reason) | |
366 | # Restore the transfer mode! |
|
367 | # Restore the transfer mode! | |
367 | self.ftp.voidcmd(cmd) |
|
368 | self.ftp.voidcmd(cmd) | |
368 | # Try to retrieve as a file |
|
369 | # Try to retrieve as a file | |
369 | try: |
|
370 | try: | |
370 | cmd = 'RETR ' + file |
|
371 | cmd = 'RETR ' + file | |
371 | conn = self.ftp.ntransfercmd(cmd, rest) |
|
372 | conn = self.ftp.ntransfercmd(cmd, rest) | |
372 | except ftplib.error_perm as reason: |
|
373 | except ftplib.error_perm as reason: | |
373 | if str(reason).startswith('501'): |
|
374 | if str(reason).startswith('501'): | |
374 | # workaround for REST not supported error |
|
375 | # workaround for REST not supported error | |
375 | fp, retrlen = self.retrfile(file, type) |
|
376 | fp, retrlen = self.retrfile(file, type) | |
376 | fp = RangeableFileObject(fp, (rest,'')) |
|
377 | fp = RangeableFileObject(fp, (rest,'')) | |
377 | return (fp, retrlen) |
|
378 | return (fp, retrlen) | |
378 | elif not str(reason).startswith('550'): |
|
379 | elif not str(reason).startswith('550'): | |
379 | raise IOError('ftp error', reason) |
|
380 | raise IOError('ftp error', reason) | |
380 | if not conn: |
|
381 | if not conn: | |
381 | # Set transfer mode to ASCII! |
|
382 | # Set transfer mode to ASCII! | |
382 | self.ftp.voidcmd('TYPE A') |
|
383 | self.ftp.voidcmd('TYPE A') | |
383 | # Try a directory listing |
|
384 | # Try a directory listing | |
384 | if file: |
|
385 | if file: | |
385 | cmd = 'LIST ' + file |
|
386 | cmd = 'LIST ' + file | |
386 | else: |
|
387 | else: | |
387 | cmd = 'LIST' |
|
388 | cmd = 'LIST' | |
388 | conn = self.ftp.ntransfercmd(cmd) |
|
389 | conn = self.ftp.ntransfercmd(cmd) | |
389 | self.busy = 1 |
|
390 | self.busy = 1 | |
390 | # Pass back both a suitably decorated object and a retrieval length |
|
391 | # Pass back both a suitably decorated object and a retrieval length | |
391 | return (addclosehook(conn[0].makefile('rb'), |
|
392 | return (addclosehook(conn[0].makefile('rb'), | |
392 | self.endtransfer), conn[1]) |
|
393 | self.endtransfer), conn[1]) | |
393 |
|
394 | |||
394 |
|
395 | |||
395 | #################################################################### |
|
396 | #################################################################### | |
396 | # Range Tuple Functions |
|
397 | # Range Tuple Functions | |
397 | # XXX: These range tuple functions might go better in a class. |
|
398 | # XXX: These range tuple functions might go better in a class. | |
398 |
|
399 | |||
399 | _rangere = None |
|
400 | _rangere = None | |
400 | def range_header_to_tuple(range_header): |
|
401 | def range_header_to_tuple(range_header): | |
401 | """Get a (firstbyte,lastbyte) tuple from a Range header value. |
|
402 | """Get a (firstbyte,lastbyte) tuple from a Range header value. | |
402 |
|
403 | |||
403 | Range headers have the form "bytes=<firstbyte>-<lastbyte>". This |
|
404 | Range headers have the form "bytes=<firstbyte>-<lastbyte>". This | |
404 | function pulls the firstbyte and lastbyte values and returns |
|
405 | function pulls the firstbyte and lastbyte values and returns | |
405 | a (firstbyte,lastbyte) tuple. If lastbyte is not specified in |
|
406 | a (firstbyte,lastbyte) tuple. If lastbyte is not specified in | |
406 | the header value, it is returned as an empty string in the |
|
407 | the header value, it is returned as an empty string in the | |
407 | tuple. |
|
408 | tuple. | |
408 |
|
409 | |||
409 | Return None if range_header is None |
|
410 | Return None if range_header is None | |
410 | Return () if range_header does not conform to the range spec |
|
411 | Return () if range_header does not conform to the range spec | |
411 | pattern. |
|
412 | pattern. | |
412 |
|
413 | |||
413 | """ |
|
414 | """ | |
414 | global _rangere |
|
415 | global _rangere | |
415 | if range_header is None: |
|
416 | if range_header is None: | |
416 | return None |
|
417 | return None | |
417 | if _rangere is None: |
|
418 | if _rangere is None: | |
418 | _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)') |
|
419 | _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)') | |
419 | match = _rangere.match(range_header) |
|
420 | match = _rangere.match(range_header) | |
420 | if match: |
|
421 | if match: | |
421 | tup = range_tuple_normalize(match.group(1, 2)) |
|
422 | tup = range_tuple_normalize(match.group(1, 2)) | |
422 | if tup and tup[1]: |
|
423 | if tup and tup[1]: | |
423 | tup = (tup[0], tup[1]+1) |
|
424 | tup = (tup[0], tup[1]+1) | |
424 | return tup |
|
425 | return tup | |
425 | return () |
|
426 | return () | |
426 |
|
427 | |||
427 | def range_tuple_to_header(range_tup): |
|
428 | def range_tuple_to_header(range_tup): | |
428 | """Convert a range tuple to a Range header value. |
|
429 | """Convert a range tuple to a Range header value. | |
429 | Return a string of the form "bytes=<firstbyte>-<lastbyte>" or None |
|
430 | Return a string of the form "bytes=<firstbyte>-<lastbyte>" or None | |
430 | if no range is needed. |
|
431 | if no range is needed. | |
431 | """ |
|
432 | """ | |
432 | if range_tup is None: |
|
433 | if range_tup is None: | |
433 | return None |
|
434 | return None | |
434 | range_tup = range_tuple_normalize(range_tup) |
|
435 | range_tup = range_tuple_normalize(range_tup) | |
435 | if range_tup: |
|
436 | if range_tup: | |
436 | if range_tup[1]: |
|
437 | if range_tup[1]: | |
437 | range_tup = (range_tup[0], range_tup[1] - 1) |
|
438 | range_tup = (range_tup[0], range_tup[1] - 1) | |
438 | return 'bytes=%s-%s' % range_tup |
|
439 | return 'bytes=%s-%s' % range_tup | |
439 |
|
440 | |||
440 | def range_tuple_normalize(range_tup): |
|
441 | def range_tuple_normalize(range_tup): | |
441 | """Normalize a (first_byte,last_byte) range tuple. |
|
442 | """Normalize a (first_byte,last_byte) range tuple. | |
442 | Return a tuple whose first element is guaranteed to be an int |
|
443 | Return a tuple whose first element is guaranteed to be an int | |
443 | and whose second element will be '' (meaning: the last byte) or |
|
444 | and whose second element will be '' (meaning: the last byte) or | |
444 | an int. Finally, return None if the normalized tuple == (0,'') |
|
445 | an int. Finally, return None if the normalized tuple == (0,'') | |
445 | as that is equivalent to retrieving the entire file. |
|
446 | as that is equivalent to retrieving the entire file. | |
446 | """ |
|
447 | """ | |
447 | if range_tup is None: |
|
448 | if range_tup is None: | |
448 | return None |
|
449 | return None | |
449 | # handle first byte |
|
450 | # handle first byte | |
450 | fb = range_tup[0] |
|
451 | fb = range_tup[0] | |
451 | if fb in (None, ''): |
|
452 | if fb in (None, ''): | |
452 | fb = 0 |
|
453 | fb = 0 | |
453 | else: |
|
454 | else: | |
454 | fb = int(fb) |
|
455 | fb = int(fb) | |
455 | # handle last byte |
|
456 | # handle last byte | |
456 | try: |
|
457 | try: | |
457 | lb = range_tup[1] |
|
458 | lb = range_tup[1] | |
458 | except IndexError: |
|
459 | except IndexError: | |
459 | lb = '' |
|
460 | lb = '' | |
460 | else: |
|
461 | else: | |
461 | if lb is None: |
|
462 | if lb is None: | |
462 | lb = '' |
|
463 | lb = '' | |
463 | elif lb != '': |
|
464 | elif lb != '': | |
464 | lb = int(lb) |
|
465 | lb = int(lb) | |
465 | # check if range is over the entire file |
|
466 | # check if range is over the entire file | |
466 | if (fb, lb) == (0, ''): |
|
467 | if (fb, lb) == (0, ''): | |
467 | return None |
|
468 | return None | |
468 | # check that the range is valid |
|
469 | # check that the range is valid | |
469 | if lb < fb: |
|
470 | if lb < fb: | |
470 | raise RangeError('Invalid byte range: %s-%s' % (fb, lb)) |
|
471 | raise RangeError('Invalid byte range: %s-%s' % (fb, lb)) | |
471 | return (fb, lb) |
|
472 | return (fb, lb) |
@@ -1,297 +1,299 b'' | |||||
1 | # httpconnection.py - urllib2 handler for new http support |
|
1 | # httpconnection.py - urllib2 handler for new http support | |
2 | # |
|
2 | # | |
3 | # Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com> |
|
3 | # Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com> | |
4 | # Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br> |
|
4 | # Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br> | |
5 | # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com> |
|
5 | # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com> | |
6 | # Copyright 2011 Google, Inc. |
|
6 | # Copyright 2011 Google, Inc. | |
7 | # |
|
7 | # | |
8 | # This software may be used and distributed according to the terms of the |
|
8 | # This software may be used and distributed according to the terms of the | |
9 | # GNU General Public License version 2 or any later version. |
|
9 | # GNU General Public License version 2 or any later version. | |
10 |
|
10 | |||
11 | from __future__ import absolute_import |
|
11 | from __future__ import absolute_import | |
12 |
|
12 | |||
13 | import logging |
|
13 | import logging | |
14 | import os |
|
14 | import os | |
15 | import socket |
|
15 | import socket | |
16 |
|
16 | |||
17 | from .i18n import _ |
|
17 | from .i18n import _ | |
18 | from . import ( |
|
18 | from . import ( | |
19 | httpclient, |
|
19 | httpclient, | |
20 | sslutil, |
|
20 | sslutil, | |
|
21 | urllibcompat, | |||
21 | util, |
|
22 | util, | |
22 | ) |
|
23 | ) | |
23 |
|
24 | |||
24 | urlerr = util.urlerr |
|
25 | urlerr = util.urlerr | |
25 | urlreq = util.urlreq |
|
26 | urlreq = util.urlreq | |
26 |
|
27 | |||
27 | # moved here from url.py to avoid a cycle |
|
28 | # moved here from url.py to avoid a cycle | |
28 | class httpsendfile(object): |
|
29 | class httpsendfile(object): | |
29 | """This is a wrapper around the objects returned by python's "open". |
|
30 | """This is a wrapper around the objects returned by python's "open". | |
30 |
|
31 | |||
31 | Its purpose is to send file-like objects via HTTP. |
|
32 | Its purpose is to send file-like objects via HTTP. | |
32 | It do however not define a __len__ attribute because the length |
|
33 | It do however not define a __len__ attribute because the length | |
33 | might be more than Py_ssize_t can handle. |
|
34 | might be more than Py_ssize_t can handle. | |
34 | """ |
|
35 | """ | |
35 |
|
36 | |||
36 | def __init__(self, ui, *args, **kwargs): |
|
37 | def __init__(self, ui, *args, **kwargs): | |
37 | self.ui = ui |
|
38 | self.ui = ui | |
38 | self._data = open(*args, **kwargs) |
|
39 | self._data = open(*args, **kwargs) | |
39 | self.seek = self._data.seek |
|
40 | self.seek = self._data.seek | |
40 | self.close = self._data.close |
|
41 | self.close = self._data.close | |
41 | self.write = self._data.write |
|
42 | self.write = self._data.write | |
42 | self.length = os.fstat(self._data.fileno()).st_size |
|
43 | self.length = os.fstat(self._data.fileno()).st_size | |
43 | self._pos = 0 |
|
44 | self._pos = 0 | |
44 | self._total = self.length // 1024 * 2 |
|
45 | self._total = self.length // 1024 * 2 | |
45 |
|
46 | |||
46 | def read(self, *args, **kwargs): |
|
47 | def read(self, *args, **kwargs): | |
47 | ret = self._data.read(*args, **kwargs) |
|
48 | ret = self._data.read(*args, **kwargs) | |
48 | if not ret: |
|
49 | if not ret: | |
49 | self.ui.progress(_('sending'), None) |
|
50 | self.ui.progress(_('sending'), None) | |
50 | return ret |
|
51 | return ret | |
51 | self._pos += len(ret) |
|
52 | self._pos += len(ret) | |
52 | # We pass double the max for total because we currently have |
|
53 | # We pass double the max for total because we currently have | |
53 | # to send the bundle twice in the case of a server that |
|
54 | # to send the bundle twice in the case of a server that | |
54 | # requires authentication. Since we can't know until we try |
|
55 | # requires authentication. Since we can't know until we try | |
55 | # once whether authentication will be required, just lie to |
|
56 | # once whether authentication will be required, just lie to | |
56 | # the user and maybe the push succeeds suddenly at 50%. |
|
57 | # the user and maybe the push succeeds suddenly at 50%. | |
57 | self.ui.progress(_('sending'), self._pos // 1024, |
|
58 | self.ui.progress(_('sending'), self._pos // 1024, | |
58 | unit=_('kb'), total=self._total) |
|
59 | unit=_('kb'), total=self._total) | |
59 | return ret |
|
60 | return ret | |
60 |
|
61 | |||
61 | def __enter__(self): |
|
62 | def __enter__(self): | |
62 | return self |
|
63 | return self | |
63 |
|
64 | |||
64 | def __exit__(self, exc_type, exc_val, exc_tb): |
|
65 | def __exit__(self, exc_type, exc_val, exc_tb): | |
65 | self.close() |
|
66 | self.close() | |
66 |
|
67 | |||
67 | # moved here from url.py to avoid a cycle |
|
68 | # moved here from url.py to avoid a cycle | |
68 | def readauthforuri(ui, uri, user): |
|
69 | def readauthforuri(ui, uri, user): | |
69 | # Read configuration |
|
70 | # Read configuration | |
70 | groups = {} |
|
71 | groups = {} | |
71 | for key, val in ui.configitems('auth'): |
|
72 | for key, val in ui.configitems('auth'): | |
72 | if key in ('cookiefile',): |
|
73 | if key in ('cookiefile',): | |
73 | continue |
|
74 | continue | |
74 |
|
75 | |||
75 | if '.' not in key: |
|
76 | if '.' not in key: | |
76 | ui.warn(_("ignoring invalid [auth] key '%s'\n") % key) |
|
77 | ui.warn(_("ignoring invalid [auth] key '%s'\n") % key) | |
77 | continue |
|
78 | continue | |
78 | group, setting = key.rsplit('.', 1) |
|
79 | group, setting = key.rsplit('.', 1) | |
79 | gdict = groups.setdefault(group, {}) |
|
80 | gdict = groups.setdefault(group, {}) | |
80 | if setting in ('username', 'cert', 'key'): |
|
81 | if setting in ('username', 'cert', 'key'): | |
81 | val = util.expandpath(val) |
|
82 | val = util.expandpath(val) | |
82 | gdict[setting] = val |
|
83 | gdict[setting] = val | |
83 |
|
84 | |||
84 | # Find the best match |
|
85 | # Find the best match | |
85 | scheme, hostpath = uri.split('://', 1) |
|
86 | scheme, hostpath = uri.split('://', 1) | |
86 | bestuser = None |
|
87 | bestuser = None | |
87 | bestlen = 0 |
|
88 | bestlen = 0 | |
88 | bestauth = None |
|
89 | bestauth = None | |
89 | for group, auth in groups.iteritems(): |
|
90 | for group, auth in groups.iteritems(): | |
90 | if user and user != auth.get('username', user): |
|
91 | if user and user != auth.get('username', user): | |
91 | # If a username was set in the URI, the entry username |
|
92 | # If a username was set in the URI, the entry username | |
92 | # must either match it or be unset |
|
93 | # must either match it or be unset | |
93 | continue |
|
94 | continue | |
94 | prefix = auth.get('prefix') |
|
95 | prefix = auth.get('prefix') | |
95 | if not prefix: |
|
96 | if not prefix: | |
96 | continue |
|
97 | continue | |
97 | p = prefix.split('://', 1) |
|
98 | p = prefix.split('://', 1) | |
98 | if len(p) > 1: |
|
99 | if len(p) > 1: | |
99 | schemes, prefix = [p[0]], p[1] |
|
100 | schemes, prefix = [p[0]], p[1] | |
100 | else: |
|
101 | else: | |
101 | schemes = (auth.get('schemes') or 'https').split() |
|
102 | schemes = (auth.get('schemes') or 'https').split() | |
102 | if (prefix == '*' or hostpath.startswith(prefix)) and \ |
|
103 | if (prefix == '*' or hostpath.startswith(prefix)) and \ | |
103 | (len(prefix) > bestlen or (len(prefix) == bestlen and \ |
|
104 | (len(prefix) > bestlen or (len(prefix) == bestlen and \ | |
104 | not bestuser and 'username' in auth)) \ |
|
105 | not bestuser and 'username' in auth)) \ | |
105 | and scheme in schemes: |
|
106 | and scheme in schemes: | |
106 | bestlen = len(prefix) |
|
107 | bestlen = len(prefix) | |
107 | bestauth = group, auth |
|
108 | bestauth = group, auth | |
108 | bestuser = auth.get('username') |
|
109 | bestuser = auth.get('username') | |
109 | if user and not bestuser: |
|
110 | if user and not bestuser: | |
110 | auth['username'] = user |
|
111 | auth['username'] = user | |
111 | return bestauth |
|
112 | return bestauth | |
112 |
|
113 | |||
113 | # Mercurial (at least until we can remove the old codepath) requires |
|
114 | # Mercurial (at least until we can remove the old codepath) requires | |
114 | # that the http response object be sufficiently file-like, so we |
|
115 | # that the http response object be sufficiently file-like, so we | |
115 | # provide a close() method here. |
|
116 | # provide a close() method here. | |
116 | class HTTPResponse(httpclient.HTTPResponse): |
|
117 | class HTTPResponse(httpclient.HTTPResponse): | |
117 | def close(self): |
|
118 | def close(self): | |
118 | pass |
|
119 | pass | |
119 |
|
120 | |||
120 | class HTTPConnection(httpclient.HTTPConnection): |
|
121 | class HTTPConnection(httpclient.HTTPConnection): | |
121 | response_class = HTTPResponse |
|
122 | response_class = HTTPResponse | |
122 | def request(self, method, uri, body=None, headers=None): |
|
123 | def request(self, method, uri, body=None, headers=None): | |
123 | if headers is None: |
|
124 | if headers is None: | |
124 | headers = {} |
|
125 | headers = {} | |
125 | if isinstance(body, httpsendfile): |
|
126 | if isinstance(body, httpsendfile): | |
126 | body.seek(0) |
|
127 | body.seek(0) | |
127 | httpclient.HTTPConnection.request(self, method, uri, body=body, |
|
128 | httpclient.HTTPConnection.request(self, method, uri, body=body, | |
128 | headers=headers) |
|
129 | headers=headers) | |
129 |
|
130 | |||
130 |
|
131 | |||
131 | _configuredlogging = False |
|
132 | _configuredlogging = False | |
132 | LOGFMT = '%(levelname)s:%(name)s:%(lineno)d:%(message)s' |
|
133 | LOGFMT = '%(levelname)s:%(name)s:%(lineno)d:%(message)s' | |
133 | # Subclass BOTH of these because otherwise urllib2 "helpfully" |
|
134 | # Subclass BOTH of these because otherwise urllib2 "helpfully" | |
134 | # reinserts them since it notices we don't include any subclasses of |
|
135 | # reinserts them since it notices we don't include any subclasses of | |
135 | # them. |
|
136 | # them. | |
136 | class http2handler(urlreq.httphandler, urlreq.httpshandler): |
|
137 | class http2handler(urlreq.httphandler, urlreq.httpshandler): | |
137 | def __init__(self, ui, pwmgr): |
|
138 | def __init__(self, ui, pwmgr): | |
138 | global _configuredlogging |
|
139 | global _configuredlogging | |
139 | urlreq.abstracthttphandler.__init__(self) |
|
140 | urlreq.abstracthttphandler.__init__(self) | |
140 | self.ui = ui |
|
141 | self.ui = ui | |
141 | self.pwmgr = pwmgr |
|
142 | self.pwmgr = pwmgr | |
142 | self._connections = {} |
|
143 | self._connections = {} | |
143 | # developer config: ui.http2debuglevel |
|
144 | # developer config: ui.http2debuglevel | |
144 | loglevel = ui.config('ui', 'http2debuglevel') |
|
145 | loglevel = ui.config('ui', 'http2debuglevel') | |
145 | if loglevel and not _configuredlogging: |
|
146 | if loglevel and not _configuredlogging: | |
146 | _configuredlogging = True |
|
147 | _configuredlogging = True | |
147 | logger = logging.getLogger('mercurial.httpclient') |
|
148 | logger = logging.getLogger('mercurial.httpclient') | |
148 | logger.setLevel(getattr(logging, loglevel.upper())) |
|
149 | logger.setLevel(getattr(logging, loglevel.upper())) | |
149 | handler = logging.StreamHandler() |
|
150 | handler = logging.StreamHandler() | |
150 | handler.setFormatter(logging.Formatter(LOGFMT)) |
|
151 | handler.setFormatter(logging.Formatter(LOGFMT)) | |
151 | logger.addHandler(handler) |
|
152 | logger.addHandler(handler) | |
152 |
|
153 | |||
153 | def close_all(self): |
|
154 | def close_all(self): | |
154 | """Close and remove all connection objects being kept for reuse.""" |
|
155 | """Close and remove all connection objects being kept for reuse.""" | |
155 | for openconns in self._connections.values(): |
|
156 | for openconns in self._connections.values(): | |
156 | for conn in openconns: |
|
157 | for conn in openconns: | |
157 | conn.close() |
|
158 | conn.close() | |
158 | self._connections = {} |
|
159 | self._connections = {} | |
159 |
|
160 | |||
160 | # shamelessly borrowed from urllib2.AbstractHTTPHandler |
|
161 | # shamelessly borrowed from urllib2.AbstractHTTPHandler | |
161 | def do_open(self, http_class, req, use_ssl): |
|
162 | def do_open(self, http_class, req, use_ssl): | |
162 | """Return an addinfourl object for the request, using http_class. |
|
163 | """Return an addinfourl object for the request, using http_class. | |
163 |
|
164 | |||
164 | http_class must implement the HTTPConnection API from httplib. |
|
165 | http_class must implement the HTTPConnection API from httplib. | |
165 | The addinfourl return value is a file-like object. It also |
|
166 | The addinfourl return value is a file-like object. It also | |
166 | has methods and attributes including: |
|
167 | has methods and attributes including: | |
167 | - info(): return a mimetools.Message object for the headers |
|
168 | - info(): return a mimetools.Message object for the headers | |
168 | - geturl(): return the original request URL |
|
169 | - geturl(): return the original request URL | |
169 | - code: HTTP status code |
|
170 | - code: HTTP status code | |
170 | """ |
|
171 | """ | |
171 | # If using a proxy, the host returned by get_host() is |
|
172 | # If using a proxy, the host returned by get_host() is | |
172 | # actually the proxy. On Python 2.6.1, the real destination |
|
173 | # actually the proxy. On Python 2.6.1, the real destination | |
173 | # hostname is encoded in the URI in the urllib2 request |
|
174 | # hostname is encoded in the URI in the urllib2 request | |
174 | # object. On Python 2.6.5, it's stored in the _tunnel_host |
|
175 | # object. On Python 2.6.5, it's stored in the _tunnel_host | |
175 | # attribute which has no accessor. |
|
176 | # attribute which has no accessor. | |
176 | tunhost = getattr(req, '_tunnel_host', None) |
|
177 | tunhost = getattr(req, '_tunnel_host', None) | |
177 |
host = req |
|
178 | host = urllibcompat.gethost(req) | |
178 | if tunhost: |
|
179 | if tunhost: | |
179 | proxyhost = host |
|
180 | proxyhost = host | |
180 | host = tunhost |
|
181 | host = tunhost | |
181 | elif req.has_proxy(): |
|
182 | elif req.has_proxy(): | |
182 |
proxyhost = req |
|
183 | proxyhost = urllibcompat.gethost(req) | |
183 | host = req.get_selector().split('://', 1)[1].split('/', 1)[0] |
|
184 | host = urllibcompat.getselector( | |
|
185 | req).split('://', 1)[1].split('/', 1)[0] | |||
184 | else: |
|
186 | else: | |
185 | proxyhost = None |
|
187 | proxyhost = None | |
186 |
|
188 | |||
187 | if proxyhost: |
|
189 | if proxyhost: | |
188 | if ':' in proxyhost: |
|
190 | if ':' in proxyhost: | |
189 | # Note: this means we'll explode if we try and use an |
|
191 | # Note: this means we'll explode if we try and use an | |
190 | # IPv6 http proxy. This isn't a regression, so we |
|
192 | # IPv6 http proxy. This isn't a regression, so we | |
191 | # won't worry about it for now. |
|
193 | # won't worry about it for now. | |
192 | proxyhost, proxyport = proxyhost.rsplit(':', 1) |
|
194 | proxyhost, proxyport = proxyhost.rsplit(':', 1) | |
193 | else: |
|
195 | else: | |
194 | proxyport = 3128 # squid default |
|
196 | proxyport = 3128 # squid default | |
195 | proxy = (proxyhost, proxyport) |
|
197 | proxy = (proxyhost, proxyport) | |
196 | else: |
|
198 | else: | |
197 | proxy = None |
|
199 | proxy = None | |
198 |
|
200 | |||
199 | if not host: |
|
201 | if not host: | |
200 | raise urlerr.urlerror('no host given') |
|
202 | raise urlerr.urlerror('no host given') | |
201 |
|
203 | |||
202 | connkey = use_ssl, host, proxy |
|
204 | connkey = use_ssl, host, proxy | |
203 | allconns = self._connections.get(connkey, []) |
|
205 | allconns = self._connections.get(connkey, []) | |
204 | conns = [c for c in allconns if not c.busy()] |
|
206 | conns = [c for c in allconns if not c.busy()] | |
205 | if conns: |
|
207 | if conns: | |
206 | h = conns[0] |
|
208 | h = conns[0] | |
207 | else: |
|
209 | else: | |
208 | if allconns: |
|
210 | if allconns: | |
209 | self.ui.debug('all connections for %s busy, making a new ' |
|
211 | self.ui.debug('all connections for %s busy, making a new ' | |
210 | 'one\n' % host) |
|
212 | 'one\n' % host) | |
211 | timeout = None |
|
213 | timeout = None | |
212 | if req.timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: |
|
214 | if req.timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: | |
213 | timeout = req.timeout |
|
215 | timeout = req.timeout | |
214 | h = http_class(host, timeout=timeout, proxy_hostport=proxy) |
|
216 | h = http_class(host, timeout=timeout, proxy_hostport=proxy) | |
215 | self._connections.setdefault(connkey, []).append(h) |
|
217 | self._connections.setdefault(connkey, []).append(h) | |
216 |
|
218 | |||
217 | headers = dict(req.headers) |
|
219 | headers = dict(req.headers) | |
218 | headers.update(req.unredirected_hdrs) |
|
220 | headers.update(req.unredirected_hdrs) | |
219 | headers = dict( |
|
221 | headers = dict( | |
220 | (name.title(), val) for name, val in headers.items()) |
|
222 | (name.title(), val) for name, val in headers.items()) | |
221 | try: |
|
223 | try: | |
222 |
path = req |
|
224 | path = urllibcompat.getselector(req) | |
223 | if '://' in path: |
|
225 | if '://' in path: | |
224 | path = path.split('://', 1)[1].split('/', 1)[1] |
|
226 | path = path.split('://', 1)[1].split('/', 1)[1] | |
225 | if path[0] != '/': |
|
227 | if path[0] != '/': | |
226 | path = '/' + path |
|
228 | path = '/' + path | |
227 | h.request(req.get_method(), path, req.data, headers) |
|
229 | h.request(req.get_method(), path, req.data, headers) | |
228 | r = h.getresponse() |
|
230 | r = h.getresponse() | |
229 | except socket.error as err: # XXX what error? |
|
231 | except socket.error as err: # XXX what error? | |
230 | raise urlerr.urlerror(err) |
|
232 | raise urlerr.urlerror(err) | |
231 |
|
233 | |||
232 | # Pick apart the HTTPResponse object to get the addinfourl |
|
234 | # Pick apart the HTTPResponse object to get the addinfourl | |
233 | # object initialized properly. |
|
235 | # object initialized properly. | |
234 | r.recv = r.read |
|
236 | r.recv = r.read | |
235 |
|
237 | |||
236 |
resp = urlreq.addinfourl(r, r.headers, req |
|
238 | resp = urlreq.addinfourl(r, r.headers, urllibcompat.getfullurl(req)) | |
237 | resp.code = r.status |
|
239 | resp.code = r.status | |
238 | resp.msg = r.reason |
|
240 | resp.msg = r.reason | |
239 | return resp |
|
241 | return resp | |
240 |
|
242 | |||
241 | # httplib always uses the given host/port as the socket connect |
|
243 | # httplib always uses the given host/port as the socket connect | |
242 | # target, and then allows full URIs in the request path, which it |
|
244 | # target, and then allows full URIs in the request path, which it | |
243 | # then observes and treats as a signal to do proxying instead. |
|
245 | # then observes and treats as a signal to do proxying instead. | |
244 | def http_open(self, req): |
|
246 | def http_open(self, req): | |
245 |
if req |
|
247 | if urllibcompat.getfullurl(req).startswith('https'): | |
246 | return self.https_open(req) |
|
248 | return self.https_open(req) | |
247 | def makehttpcon(*args, **kwargs): |
|
249 | def makehttpcon(*args, **kwargs): | |
248 | k2 = dict(kwargs) |
|
250 | k2 = dict(kwargs) | |
249 | k2['use_ssl'] = False |
|
251 | k2['use_ssl'] = False | |
250 | return HTTPConnection(*args, **k2) |
|
252 | return HTTPConnection(*args, **k2) | |
251 | return self.do_open(makehttpcon, req, False) |
|
253 | return self.do_open(makehttpcon, req, False) | |
252 |
|
254 | |||
253 | def https_open(self, req): |
|
255 | def https_open(self, req): | |
254 |
# |
|
256 | # urllibcompat.getfullurl(req) does not contain credentials and we may | |
255 | # need them to match the certificates. |
|
257 | # need them to match the certificates. | |
256 |
url = req |
|
258 | url = urllibcompat.getfullurl(req) | |
257 | user, password = self.pwmgr.find_stored_password(url) |
|
259 | user, password = self.pwmgr.find_stored_password(url) | |
258 | res = readauthforuri(self.ui, url, user) |
|
260 | res = readauthforuri(self.ui, url, user) | |
259 | if res: |
|
261 | if res: | |
260 | group, auth = res |
|
262 | group, auth = res | |
261 | self.auth = auth |
|
263 | self.auth = auth | |
262 | self.ui.debug("using auth.%s.* for authentication\n" % group) |
|
264 | self.ui.debug("using auth.%s.* for authentication\n" % group) | |
263 | else: |
|
265 | else: | |
264 | self.auth = None |
|
266 | self.auth = None | |
265 | return self.do_open(self._makesslconnection, req, True) |
|
267 | return self.do_open(self._makesslconnection, req, True) | |
266 |
|
268 | |||
267 | def _makesslconnection(self, host, port=443, *args, **kwargs): |
|
269 | def _makesslconnection(self, host, port=443, *args, **kwargs): | |
268 | keyfile = None |
|
270 | keyfile = None | |
269 | certfile = None |
|
271 | certfile = None | |
270 |
|
272 | |||
271 | if args: # key_file |
|
273 | if args: # key_file | |
272 | keyfile = args.pop(0) |
|
274 | keyfile = args.pop(0) | |
273 | if args: # cert_file |
|
275 | if args: # cert_file | |
274 | certfile = args.pop(0) |
|
276 | certfile = args.pop(0) | |
275 |
|
277 | |||
276 | # if the user has specified different key/cert files in |
|
278 | # if the user has specified different key/cert files in | |
277 | # hgrc, we prefer these |
|
279 | # hgrc, we prefer these | |
278 | if self.auth and 'key' in self.auth and 'cert' in self.auth: |
|
280 | if self.auth and 'key' in self.auth and 'cert' in self.auth: | |
279 | keyfile = self.auth['key'] |
|
281 | keyfile = self.auth['key'] | |
280 | certfile = self.auth['cert'] |
|
282 | certfile = self.auth['cert'] | |
281 |
|
283 | |||
282 | # let host port take precedence |
|
284 | # let host port take precedence | |
283 | if ':' in host and '[' not in host or ']:' in host: |
|
285 | if ':' in host and '[' not in host or ']:' in host: | |
284 | host, port = host.rsplit(':', 1) |
|
286 | host, port = host.rsplit(':', 1) | |
285 | port = int(port) |
|
287 | port = int(port) | |
286 | if '[' in host: |
|
288 | if '[' in host: | |
287 | host = host[1:-1] |
|
289 | host = host[1:-1] | |
288 |
|
290 | |||
289 | kwargs['keyfile'] = keyfile |
|
291 | kwargs['keyfile'] = keyfile | |
290 | kwargs['certfile'] = certfile |
|
292 | kwargs['certfile'] = certfile | |
291 |
|
293 | |||
292 | con = HTTPConnection(host, port, use_ssl=True, |
|
294 | con = HTTPConnection(host, port, use_ssl=True, | |
293 | ssl_wrap_socket=sslutil.wrapsocket, |
|
295 | ssl_wrap_socket=sslutil.wrapsocket, | |
294 | ssl_validator=sslutil.validatesocket, |
|
296 | ssl_validator=sslutil.validatesocket, | |
295 | ui=self.ui, |
|
297 | ui=self.ui, | |
296 | **kwargs) |
|
298 | **kwargs) | |
297 | return con |
|
299 | return con |
@@ -1,723 +1,726 b'' | |||||
1 | # This library is free software; you can redistribute it and/or |
|
1 | # This library is free software; you can redistribute it and/or | |
2 | # modify it under the terms of the GNU Lesser General Public |
|
2 | # modify it under the terms of the GNU Lesser General Public | |
3 | # License as published by the Free Software Foundation; either |
|
3 | # License as published by the Free Software Foundation; either | |
4 | # version 2.1 of the License, or (at your option) any later version. |
|
4 | # version 2.1 of the License, or (at your option) any later version. | |
5 | # |
|
5 | # | |
6 | # This library is distributed in the hope that it will be useful, |
|
6 | # This library is distributed in the hope that it will be useful, | |
7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
9 | # Lesser General Public License for more details. |
|
9 | # Lesser General Public License for more details. | |
10 | # |
|
10 | # | |
11 | # You should have received a copy of the GNU Lesser General Public |
|
11 | # You should have received a copy of the GNU Lesser General Public | |
12 | # License along with this library; if not, see |
|
12 | # License along with this library; if not, see | |
13 | # <http://www.gnu.org/licenses/>. |
|
13 | # <http://www.gnu.org/licenses/>. | |
14 |
|
14 | |||
15 | # This file is part of urlgrabber, a high-level cross-protocol url-grabber |
|
15 | # This file is part of urlgrabber, a high-level cross-protocol url-grabber | |
16 | # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko |
|
16 | # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko | |
17 |
|
17 | |||
18 | # Modified by Benoit Boissinot: |
|
18 | # Modified by Benoit Boissinot: | |
19 | # - fix for digest auth (inspired from urllib2.py @ Python v2.4) |
|
19 | # - fix for digest auth (inspired from urllib2.py @ Python v2.4) | |
20 | # Modified by Dirkjan Ochtman: |
|
20 | # Modified by Dirkjan Ochtman: | |
21 | # - import md5 function from a local util module |
|
21 | # - import md5 function from a local util module | |
22 | # Modified by Augie Fackler: |
|
22 | # Modified by Augie Fackler: | |
23 | # - add safesend method and use it to prevent broken pipe errors |
|
23 | # - add safesend method and use it to prevent broken pipe errors | |
24 | # on large POST requests |
|
24 | # on large POST requests | |
25 |
|
25 | |||
26 | """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive. |
|
26 | """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive. | |
27 |
|
27 | |||
28 | >>> import urllib2 |
|
28 | >>> import urllib2 | |
29 | >>> from keepalive import HTTPHandler |
|
29 | >>> from keepalive import HTTPHandler | |
30 | >>> keepalive_handler = HTTPHandler() |
|
30 | >>> keepalive_handler = HTTPHandler() | |
31 | >>> opener = urlreq.buildopener(keepalive_handler) |
|
31 | >>> opener = urlreq.buildopener(keepalive_handler) | |
32 | >>> urlreq.installopener(opener) |
|
32 | >>> urlreq.installopener(opener) | |
33 | >>> |
|
33 | >>> | |
34 | >>> fo = urlreq.urlopen('http://www.python.org') |
|
34 | >>> fo = urlreq.urlopen('http://www.python.org') | |
35 |
|
35 | |||
36 | If a connection to a given host is requested, and all of the existing |
|
36 | If a connection to a given host is requested, and all of the existing | |
37 | connections are still in use, another connection will be opened. If |
|
37 | connections are still in use, another connection will be opened. If | |
38 | the handler tries to use an existing connection but it fails in some |
|
38 | the handler tries to use an existing connection but it fails in some | |
39 | way, it will be closed and removed from the pool. |
|
39 | way, it will be closed and removed from the pool. | |
40 |
|
40 | |||
41 | To remove the handler, simply re-run build_opener with no arguments, and |
|
41 | To remove the handler, simply re-run build_opener with no arguments, and | |
42 | install that opener. |
|
42 | install that opener. | |
43 |
|
43 | |||
44 | You can explicitly close connections by using the close_connection() |
|
44 | You can explicitly close connections by using the close_connection() | |
45 | method of the returned file-like object (described below) or you can |
|
45 | method of the returned file-like object (described below) or you can | |
46 | use the handler methods: |
|
46 | use the handler methods: | |
47 |
|
47 | |||
48 | close_connection(host) |
|
48 | close_connection(host) | |
49 | close_all() |
|
49 | close_all() | |
50 | open_connections() |
|
50 | open_connections() | |
51 |
|
51 | |||
52 | NOTE: using the close_connection and close_all methods of the handler |
|
52 | NOTE: using the close_connection and close_all methods of the handler | |
53 | should be done with care when using multiple threads. |
|
53 | should be done with care when using multiple threads. | |
54 | * there is nothing that prevents another thread from creating new |
|
54 | * there is nothing that prevents another thread from creating new | |
55 | connections immediately after connections are closed |
|
55 | connections immediately after connections are closed | |
56 | * no checks are done to prevent in-use connections from being closed |
|
56 | * no checks are done to prevent in-use connections from being closed | |
57 |
|
57 | |||
58 | >>> keepalive_handler.close_all() |
|
58 | >>> keepalive_handler.close_all() | |
59 |
|
59 | |||
60 | EXTRA ATTRIBUTES AND METHODS |
|
60 | EXTRA ATTRIBUTES AND METHODS | |
61 |
|
61 | |||
62 | Upon a status of 200, the object returned has a few additional |
|
62 | Upon a status of 200, the object returned has a few additional | |
63 | attributes and methods, which should not be used if you want to |
|
63 | attributes and methods, which should not be used if you want to | |
64 | remain consistent with the normal urllib2-returned objects: |
|
64 | remain consistent with the normal urllib2-returned objects: | |
65 |
|
65 | |||
66 | close_connection() - close the connection to the host |
|
66 | close_connection() - close the connection to the host | |
67 | readlines() - you know, readlines() |
|
67 | readlines() - you know, readlines() | |
68 | status - the return status (i.e. 404) |
|
68 | status - the return status (i.e. 404) | |
69 | reason - english translation of status (i.e. 'File not found') |
|
69 | reason - english translation of status (i.e. 'File not found') | |
70 |
|
70 | |||
71 | If you want the best of both worlds, use this inside an |
|
71 | If you want the best of both worlds, use this inside an | |
72 | AttributeError-catching try: |
|
72 | AttributeError-catching try: | |
73 |
|
73 | |||
74 | >>> try: status = fo.status |
|
74 | >>> try: status = fo.status | |
75 | >>> except AttributeError: status = None |
|
75 | >>> except AttributeError: status = None | |
76 |
|
76 | |||
77 | Unfortunately, these are ONLY there if status == 200, so it's not |
|
77 | Unfortunately, these are ONLY there if status == 200, so it's not | |
78 | easy to distinguish between non-200 responses. The reason is that |
|
78 | easy to distinguish between non-200 responses. The reason is that | |
79 | urllib2 tries to do clever things with error codes 301, 302, 401, |
|
79 | urllib2 tries to do clever things with error codes 301, 302, 401, | |
80 | and 407, and it wraps the object upon return. |
|
80 | and 407, and it wraps the object upon return. | |
81 | """ |
|
81 | """ | |
82 |
|
82 | |||
83 | # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $ |
|
83 | # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $ | |
84 |
|
84 | |||
85 | from __future__ import absolute_import, print_function |
|
85 | from __future__ import absolute_import, print_function | |
86 |
|
86 | |||
87 | import errno |
|
87 | import errno | |
88 | import hashlib |
|
88 | import hashlib | |
89 | import socket |
|
89 | import socket | |
90 | import sys |
|
90 | import sys | |
91 | import threading |
|
91 | import threading | |
92 |
|
92 | |||
93 | from .i18n import _ |
|
93 | from .i18n import _ | |
94 | from . import ( |
|
94 | from . import ( | |
95 | pycompat, |
|
95 | pycompat, | |
|
96 | urllibcompat, | |||
96 | util, |
|
97 | util, | |
97 | ) |
|
98 | ) | |
98 |
|
99 | |||
99 | httplib = util.httplib |
|
100 | httplib = util.httplib | |
100 | urlerr = util.urlerr |
|
101 | urlerr = util.urlerr | |
101 | urlreq = util.urlreq |
|
102 | urlreq = util.urlreq | |
102 |
|
103 | |||
103 | DEBUG = None |
|
104 | DEBUG = None | |
104 |
|
105 | |||
105 | class ConnectionManager(object): |
|
106 | class ConnectionManager(object): | |
106 | """ |
|
107 | """ | |
107 | The connection manager must be able to: |
|
108 | The connection manager must be able to: | |
108 | * keep track of all existing |
|
109 | * keep track of all existing | |
109 | """ |
|
110 | """ | |
110 | def __init__(self): |
|
111 | def __init__(self): | |
111 | self._lock = threading.Lock() |
|
112 | self._lock = threading.Lock() | |
112 | self._hostmap = {} # map hosts to a list of connections |
|
113 | self._hostmap = {} # map hosts to a list of connections | |
113 | self._connmap = {} # map connections to host |
|
114 | self._connmap = {} # map connections to host | |
114 | self._readymap = {} # map connection to ready state |
|
115 | self._readymap = {} # map connection to ready state | |
115 |
|
116 | |||
116 | def add(self, host, connection, ready): |
|
117 | def add(self, host, connection, ready): | |
117 | self._lock.acquire() |
|
118 | self._lock.acquire() | |
118 | try: |
|
119 | try: | |
119 | if host not in self._hostmap: |
|
120 | if host not in self._hostmap: | |
120 | self._hostmap[host] = [] |
|
121 | self._hostmap[host] = [] | |
121 | self._hostmap[host].append(connection) |
|
122 | self._hostmap[host].append(connection) | |
122 | self._connmap[connection] = host |
|
123 | self._connmap[connection] = host | |
123 | self._readymap[connection] = ready |
|
124 | self._readymap[connection] = ready | |
124 | finally: |
|
125 | finally: | |
125 | self._lock.release() |
|
126 | self._lock.release() | |
126 |
|
127 | |||
127 | def remove(self, connection): |
|
128 | def remove(self, connection): | |
128 | self._lock.acquire() |
|
129 | self._lock.acquire() | |
129 | try: |
|
130 | try: | |
130 | try: |
|
131 | try: | |
131 | host = self._connmap[connection] |
|
132 | host = self._connmap[connection] | |
132 | except KeyError: |
|
133 | except KeyError: | |
133 | pass |
|
134 | pass | |
134 | else: |
|
135 | else: | |
135 | del self._connmap[connection] |
|
136 | del self._connmap[connection] | |
136 | del self._readymap[connection] |
|
137 | del self._readymap[connection] | |
137 | self._hostmap[host].remove(connection) |
|
138 | self._hostmap[host].remove(connection) | |
138 | if not self._hostmap[host]: |
|
139 | if not self._hostmap[host]: | |
139 | del self._hostmap[host] |
|
140 | del self._hostmap[host] | |
140 | finally: |
|
141 | finally: | |
141 | self._lock.release() |
|
142 | self._lock.release() | |
142 |
|
143 | |||
143 | def set_ready(self, connection, ready): |
|
144 | def set_ready(self, connection, ready): | |
144 | try: |
|
145 | try: | |
145 | self._readymap[connection] = ready |
|
146 | self._readymap[connection] = ready | |
146 | except KeyError: |
|
147 | except KeyError: | |
147 | pass |
|
148 | pass | |
148 |
|
149 | |||
149 | def get_ready_conn(self, host): |
|
150 | def get_ready_conn(self, host): | |
150 | conn = None |
|
151 | conn = None | |
151 | self._lock.acquire() |
|
152 | self._lock.acquire() | |
152 | try: |
|
153 | try: | |
153 | if host in self._hostmap: |
|
154 | if host in self._hostmap: | |
154 | for c in self._hostmap[host]: |
|
155 | for c in self._hostmap[host]: | |
155 | if self._readymap[c]: |
|
156 | if self._readymap[c]: | |
156 | self._readymap[c] = 0 |
|
157 | self._readymap[c] = 0 | |
157 | conn = c |
|
158 | conn = c | |
158 | break |
|
159 | break | |
159 | finally: |
|
160 | finally: | |
160 | self._lock.release() |
|
161 | self._lock.release() | |
161 | return conn |
|
162 | return conn | |
162 |
|
163 | |||
163 | def get_all(self, host=None): |
|
164 | def get_all(self, host=None): | |
164 | if host: |
|
165 | if host: | |
165 | return list(self._hostmap.get(host, [])) |
|
166 | return list(self._hostmap.get(host, [])) | |
166 | else: |
|
167 | else: | |
167 | return dict(self._hostmap) |
|
168 | return dict(self._hostmap) | |
168 |
|
169 | |||
169 | class KeepAliveHandler(object): |
|
170 | class KeepAliveHandler(object): | |
170 | def __init__(self): |
|
171 | def __init__(self): | |
171 | self._cm = ConnectionManager() |
|
172 | self._cm = ConnectionManager() | |
172 |
|
173 | |||
173 | #### Connection Management |
|
174 | #### Connection Management | |
174 | def open_connections(self): |
|
175 | def open_connections(self): | |
175 | """return a list of connected hosts and the number of connections |
|
176 | """return a list of connected hosts and the number of connections | |
176 | to each. [('foo.com:80', 2), ('bar.org', 1)]""" |
|
177 | to each. [('foo.com:80', 2), ('bar.org', 1)]""" | |
177 | return [(host, len(li)) for (host, li) in self._cm.get_all().items()] |
|
178 | return [(host, len(li)) for (host, li) in self._cm.get_all().items()] | |
178 |
|
179 | |||
179 | def close_connection(self, host): |
|
180 | def close_connection(self, host): | |
180 | """close connection(s) to <host> |
|
181 | """close connection(s) to <host> | |
181 | host is the host:port spec, as in 'www.cnn.com:8080' as passed in. |
|
182 | host is the host:port spec, as in 'www.cnn.com:8080' as passed in. | |
182 | no error occurs if there is no connection to that host.""" |
|
183 | no error occurs if there is no connection to that host.""" | |
183 | for h in self._cm.get_all(host): |
|
184 | for h in self._cm.get_all(host): | |
184 | self._cm.remove(h) |
|
185 | self._cm.remove(h) | |
185 | h.close() |
|
186 | h.close() | |
186 |
|
187 | |||
187 | def close_all(self): |
|
188 | def close_all(self): | |
188 | """close all open connections""" |
|
189 | """close all open connections""" | |
189 | for host, conns in self._cm.get_all().iteritems(): |
|
190 | for host, conns in self._cm.get_all().iteritems(): | |
190 | for h in conns: |
|
191 | for h in conns: | |
191 | self._cm.remove(h) |
|
192 | self._cm.remove(h) | |
192 | h.close() |
|
193 | h.close() | |
193 |
|
194 | |||
194 | def _request_closed(self, request, host, connection): |
|
195 | def _request_closed(self, request, host, connection): | |
195 | """tells us that this request is now closed and that the |
|
196 | """tells us that this request is now closed and that the | |
196 | connection is ready for another request""" |
|
197 | connection is ready for another request""" | |
197 | self._cm.set_ready(connection, 1) |
|
198 | self._cm.set_ready(connection, 1) | |
198 |
|
199 | |||
199 | def _remove_connection(self, host, connection, close=0): |
|
200 | def _remove_connection(self, host, connection, close=0): | |
200 | if close: |
|
201 | if close: | |
201 | connection.close() |
|
202 | connection.close() | |
202 | self._cm.remove(connection) |
|
203 | self._cm.remove(connection) | |
203 |
|
204 | |||
204 | #### Transaction Execution |
|
205 | #### Transaction Execution | |
205 | def http_open(self, req): |
|
206 | def http_open(self, req): | |
206 | return self.do_open(HTTPConnection, req) |
|
207 | return self.do_open(HTTPConnection, req) | |
207 |
|
208 | |||
208 | def do_open(self, http_class, req): |
|
209 | def do_open(self, http_class, req): | |
209 |
host = req |
|
210 | host = urllibcompat.gethost(req) | |
210 | if not host: |
|
211 | if not host: | |
211 | raise urlerr.urlerror('no host given') |
|
212 | raise urlerr.urlerror('no host given') | |
212 |
|
213 | |||
213 | try: |
|
214 | try: | |
214 | h = self._cm.get_ready_conn(host) |
|
215 | h = self._cm.get_ready_conn(host) | |
215 | while h: |
|
216 | while h: | |
216 | r = self._reuse_connection(h, req, host) |
|
217 | r = self._reuse_connection(h, req, host) | |
217 |
|
218 | |||
218 | # if this response is non-None, then it worked and we're |
|
219 | # if this response is non-None, then it worked and we're | |
219 | # done. Break out, skipping the else block. |
|
220 | # done. Break out, skipping the else block. | |
220 | if r: |
|
221 | if r: | |
221 | break |
|
222 | break | |
222 |
|
223 | |||
223 | # connection is bad - possibly closed by server |
|
224 | # connection is bad - possibly closed by server | |
224 | # discard it and ask for the next free connection |
|
225 | # discard it and ask for the next free connection | |
225 | h.close() |
|
226 | h.close() | |
226 | self._cm.remove(h) |
|
227 | self._cm.remove(h) | |
227 | h = self._cm.get_ready_conn(host) |
|
228 | h = self._cm.get_ready_conn(host) | |
228 | else: |
|
229 | else: | |
229 | # no (working) free connections were found. Create a new one. |
|
230 | # no (working) free connections were found. Create a new one. | |
230 | h = http_class(host) |
|
231 | h = http_class(host) | |
231 | if DEBUG: |
|
232 | if DEBUG: | |
232 | DEBUG.info("creating new connection to %s (%d)", |
|
233 | DEBUG.info("creating new connection to %s (%d)", | |
233 | host, id(h)) |
|
234 | host, id(h)) | |
234 | self._cm.add(host, h, 0) |
|
235 | self._cm.add(host, h, 0) | |
235 | self._start_transaction(h, req) |
|
236 | self._start_transaction(h, req) | |
236 | r = h.getresponse() |
|
237 | r = h.getresponse() | |
237 | # The string form of BadStatusLine is the status line. Add some context |
|
238 | # The string form of BadStatusLine is the status line. Add some context | |
238 | # to make the error message slightly more useful. |
|
239 | # to make the error message slightly more useful. | |
239 | except httplib.BadStatusLine as err: |
|
240 | except httplib.BadStatusLine as err: | |
240 | raise urlerr.urlerror( |
|
241 | raise urlerr.urlerror( | |
241 | _('bad HTTP status line: %s') % pycompat.sysbytes(err.line)) |
|
242 | _('bad HTTP status line: %s') % pycompat.sysbytes(err.line)) | |
242 | except (socket.error, httplib.HTTPException) as err: |
|
243 | except (socket.error, httplib.HTTPException) as err: | |
243 | raise urlerr.urlerror(err) |
|
244 | raise urlerr.urlerror(err) | |
244 |
|
245 | |||
245 | # if not a persistent connection, don't try to reuse it |
|
246 | # if not a persistent connection, don't try to reuse it | |
246 | if r.will_close: |
|
247 | if r.will_close: | |
247 | self._cm.remove(h) |
|
248 | self._cm.remove(h) | |
248 |
|
249 | |||
249 | if DEBUG: |
|
250 | if DEBUG: | |
250 | DEBUG.info("STATUS: %s, %s", r.status, r.reason) |
|
251 | DEBUG.info("STATUS: %s, %s", r.status, r.reason) | |
251 | r._handler = self |
|
252 | r._handler = self | |
252 | r._host = host |
|
253 | r._host = host | |
253 | r._url = req.get_full_url() |
|
254 | r._url = req.get_full_url() | |
254 | r._connection = h |
|
255 | r._connection = h | |
255 | r.code = r.status |
|
256 | r.code = r.status | |
256 | r.headers = r.msg |
|
257 | r.headers = r.msg | |
257 | r.msg = r.reason |
|
258 | r.msg = r.reason | |
258 |
|
259 | |||
259 | return r |
|
260 | return r | |
260 |
|
261 | |||
261 | def _reuse_connection(self, h, req, host): |
|
262 | def _reuse_connection(self, h, req, host): | |
262 | """start the transaction with a re-used connection |
|
263 | """start the transaction with a re-used connection | |
263 | return a response object (r) upon success or None on failure. |
|
264 | return a response object (r) upon success or None on failure. | |
264 | This DOES not close or remove bad connections in cases where |
|
265 | This DOES not close or remove bad connections in cases where | |
265 | it returns. However, if an unexpected exception occurs, it |
|
266 | it returns. However, if an unexpected exception occurs, it | |
266 | will close and remove the connection before re-raising. |
|
267 | will close and remove the connection before re-raising. | |
267 | """ |
|
268 | """ | |
268 | try: |
|
269 | try: | |
269 | self._start_transaction(h, req) |
|
270 | self._start_transaction(h, req) | |
270 | r = h.getresponse() |
|
271 | r = h.getresponse() | |
271 | # note: just because we got something back doesn't mean it |
|
272 | # note: just because we got something back doesn't mean it | |
272 | # worked. We'll check the version below, too. |
|
273 | # worked. We'll check the version below, too. | |
273 | except (socket.error, httplib.HTTPException): |
|
274 | except (socket.error, httplib.HTTPException): | |
274 | r = None |
|
275 | r = None | |
275 | except: # re-raises |
|
276 | except: # re-raises | |
276 | # adding this block just in case we've missed |
|
277 | # adding this block just in case we've missed | |
277 | # something we will still raise the exception, but |
|
278 | # something we will still raise the exception, but | |
278 | # lets try and close the connection and remove it |
|
279 | # lets try and close the connection and remove it | |
279 | # first. We previously got into a nasty loop |
|
280 | # first. We previously got into a nasty loop | |
280 | # where an exception was uncaught, and so the |
|
281 | # where an exception was uncaught, and so the | |
281 | # connection stayed open. On the next try, the |
|
282 | # connection stayed open. On the next try, the | |
282 | # same exception was raised, etc. The trade-off is |
|
283 | # same exception was raised, etc. The trade-off is | |
283 | # that it's now possible this call will raise |
|
284 | # that it's now possible this call will raise | |
284 | # a DIFFERENT exception |
|
285 | # a DIFFERENT exception | |
285 | if DEBUG: |
|
286 | if DEBUG: | |
286 | DEBUG.error("unexpected exception - closing " |
|
287 | DEBUG.error("unexpected exception - closing " | |
287 | "connection to %s (%d)", host, id(h)) |
|
288 | "connection to %s (%d)", host, id(h)) | |
288 | self._cm.remove(h) |
|
289 | self._cm.remove(h) | |
289 | h.close() |
|
290 | h.close() | |
290 | raise |
|
291 | raise | |
291 |
|
292 | |||
292 | if r is None or r.version == 9: |
|
293 | if r is None or r.version == 9: | |
293 | # httplib falls back to assuming HTTP 0.9 if it gets a |
|
294 | # httplib falls back to assuming HTTP 0.9 if it gets a | |
294 | # bad header back. This is most likely to happen if |
|
295 | # bad header back. This is most likely to happen if | |
295 | # the socket has been closed by the server since we |
|
296 | # the socket has been closed by the server since we | |
296 | # last used the connection. |
|
297 | # last used the connection. | |
297 | if DEBUG: |
|
298 | if DEBUG: | |
298 | DEBUG.info("failed to re-use connection to %s (%d)", |
|
299 | DEBUG.info("failed to re-use connection to %s (%d)", | |
299 | host, id(h)) |
|
300 | host, id(h)) | |
300 | r = None |
|
301 | r = None | |
301 | else: |
|
302 | else: | |
302 | if DEBUG: |
|
303 | if DEBUG: | |
303 | DEBUG.info("re-using connection to %s (%d)", host, id(h)) |
|
304 | DEBUG.info("re-using connection to %s (%d)", host, id(h)) | |
304 |
|
305 | |||
305 | return r |
|
306 | return r | |
306 |
|
307 | |||
307 | def _start_transaction(self, h, req): |
|
308 | def _start_transaction(self, h, req): | |
308 | # What follows mostly reimplements HTTPConnection.request() |
|
309 | # What follows mostly reimplements HTTPConnection.request() | |
309 | # except it adds self.parent.addheaders in the mix and sends headers |
|
310 | # except it adds self.parent.addheaders in the mix and sends headers | |
310 | # in a deterministic order (to make testing easier). |
|
311 | # in a deterministic order (to make testing easier). | |
311 | headers = util.sortdict(self.parent.addheaders) |
|
312 | headers = util.sortdict(self.parent.addheaders) | |
312 | headers.update(sorted(req.headers.items())) |
|
313 | headers.update(sorted(req.headers.items())) | |
313 | headers.update(sorted(req.unredirected_hdrs.items())) |
|
314 | headers.update(sorted(req.unredirected_hdrs.items())) | |
314 | headers = util.sortdict((n.lower(), v) for n, v in headers.items()) |
|
315 | headers = util.sortdict((n.lower(), v) for n, v in headers.items()) | |
315 | skipheaders = {} |
|
316 | skipheaders = {} | |
316 | for n in ('host', 'accept-encoding'): |
|
317 | for n in ('host', 'accept-encoding'): | |
317 | if n in headers: |
|
318 | if n in headers: | |
318 | skipheaders['skip_' + n.replace('-', '_')] = 1 |
|
319 | skipheaders['skip_' + n.replace('-', '_')] = 1 | |
319 | try: |
|
320 | try: | |
320 |
if req |
|
321 | if urllibcompat.hasdata(req): | |
321 |
data = req |
|
322 | data = urllibcompat.getdata(req) | |
322 | h.putrequest( |
|
323 | h.putrequest( | |
323 |
req.get_method(), req |
|
324 | req.get_method(), urllibcompat.getselector(req), | |
|
325 | **skipheaders) | |||
324 | if 'content-type' not in headers: |
|
326 | if 'content-type' not in headers: | |
325 | h.putheader('Content-type', |
|
327 | h.putheader('Content-type', | |
326 | 'application/x-www-form-urlencoded') |
|
328 | 'application/x-www-form-urlencoded') | |
327 | if 'content-length' not in headers: |
|
329 | if 'content-length' not in headers: | |
328 | h.putheader('Content-length', '%d' % len(data)) |
|
330 | h.putheader('Content-length', '%d' % len(data)) | |
329 | else: |
|
331 | else: | |
330 | h.putrequest( |
|
332 | h.putrequest( | |
331 |
req.get_method(), req |
|
333 | req.get_method(), urllibcompat.getselector(req), | |
|
334 | **skipheaders) | |||
332 | except socket.error as err: |
|
335 | except socket.error as err: | |
333 | raise urlerr.urlerror(err) |
|
336 | raise urlerr.urlerror(err) | |
334 | for k, v in headers.items(): |
|
337 | for k, v in headers.items(): | |
335 | h.putheader(k, v) |
|
338 | h.putheader(k, v) | |
336 | h.endheaders() |
|
339 | h.endheaders() | |
337 |
if req |
|
340 | if urllibcompat.hasdata(req): | |
338 | h.send(data) |
|
341 | h.send(data) | |
339 |
|
342 | |||
340 | class HTTPHandler(KeepAliveHandler, urlreq.httphandler): |
|
343 | class HTTPHandler(KeepAliveHandler, urlreq.httphandler): | |
341 | pass |
|
344 | pass | |
342 |
|
345 | |||
343 | class HTTPResponse(httplib.HTTPResponse): |
|
346 | class HTTPResponse(httplib.HTTPResponse): | |
344 | # we need to subclass HTTPResponse in order to |
|
347 | # we need to subclass HTTPResponse in order to | |
345 | # 1) add readline() and readlines() methods |
|
348 | # 1) add readline() and readlines() methods | |
346 | # 2) add close_connection() methods |
|
349 | # 2) add close_connection() methods | |
347 | # 3) add info() and geturl() methods |
|
350 | # 3) add info() and geturl() methods | |
348 |
|
351 | |||
349 | # in order to add readline(), read must be modified to deal with a |
|
352 | # in order to add readline(), read must be modified to deal with a | |
350 | # buffer. example: readline must read a buffer and then spit back |
|
353 | # buffer. example: readline must read a buffer and then spit back | |
351 | # one line at a time. The only real alternative is to read one |
|
354 | # one line at a time. The only real alternative is to read one | |
352 | # BYTE at a time (ick). Once something has been read, it can't be |
|
355 | # BYTE at a time (ick). Once something has been read, it can't be | |
353 | # put back (ok, maybe it can, but that's even uglier than this), |
|
356 | # put back (ok, maybe it can, but that's even uglier than this), | |
354 | # so if you THEN do a normal read, you must first take stuff from |
|
357 | # so if you THEN do a normal read, you must first take stuff from | |
355 | # the buffer. |
|
358 | # the buffer. | |
356 |
|
359 | |||
357 | # the read method wraps the original to accommodate buffering, |
|
360 | # the read method wraps the original to accommodate buffering, | |
358 | # although read() never adds to the buffer. |
|
361 | # although read() never adds to the buffer. | |
359 | # Both readline and readlines have been stolen with almost no |
|
362 | # Both readline and readlines have been stolen with almost no | |
360 | # modification from socket.py |
|
363 | # modification from socket.py | |
361 |
|
364 | |||
362 |
|
365 | |||
363 | def __init__(self, sock, debuglevel=0, strict=0, method=None): |
|
366 | def __init__(self, sock, debuglevel=0, strict=0, method=None): | |
364 | extrakw = {} |
|
367 | extrakw = {} | |
365 | if not pycompat.ispy3: |
|
368 | if not pycompat.ispy3: | |
366 | extrakw['strict'] = True |
|
369 | extrakw['strict'] = True | |
367 | extrakw['buffering'] = True |
|
370 | extrakw['buffering'] = True | |
368 | httplib.HTTPResponse.__init__(self, sock, debuglevel=debuglevel, |
|
371 | httplib.HTTPResponse.__init__(self, sock, debuglevel=debuglevel, | |
369 | method=method, **extrakw) |
|
372 | method=method, **extrakw) | |
370 | self.fileno = sock.fileno |
|
373 | self.fileno = sock.fileno | |
371 | self.code = None |
|
374 | self.code = None | |
372 | self._rbuf = '' |
|
375 | self._rbuf = '' | |
373 | self._rbufsize = 8096 |
|
376 | self._rbufsize = 8096 | |
374 | self._handler = None # inserted by the handler later |
|
377 | self._handler = None # inserted by the handler later | |
375 | self._host = None # (same) |
|
378 | self._host = None # (same) | |
376 | self._url = None # (same) |
|
379 | self._url = None # (same) | |
377 | self._connection = None # (same) |
|
380 | self._connection = None # (same) | |
378 |
|
381 | |||
379 | _raw_read = httplib.HTTPResponse.read |
|
382 | _raw_read = httplib.HTTPResponse.read | |
380 |
|
383 | |||
381 | def close(self): |
|
384 | def close(self): | |
382 | if self.fp: |
|
385 | if self.fp: | |
383 | self.fp.close() |
|
386 | self.fp.close() | |
384 | self.fp = None |
|
387 | self.fp = None | |
385 | if self._handler: |
|
388 | if self._handler: | |
386 | self._handler._request_closed(self, self._host, |
|
389 | self._handler._request_closed(self, self._host, | |
387 | self._connection) |
|
390 | self._connection) | |
388 |
|
391 | |||
389 | def close_connection(self): |
|
392 | def close_connection(self): | |
390 | self._handler._remove_connection(self._host, self._connection, close=1) |
|
393 | self._handler._remove_connection(self._host, self._connection, close=1) | |
391 | self.close() |
|
394 | self.close() | |
392 |
|
395 | |||
393 | def info(self): |
|
396 | def info(self): | |
394 | return self.headers |
|
397 | return self.headers | |
395 |
|
398 | |||
396 | def geturl(self): |
|
399 | def geturl(self): | |
397 | return self._url |
|
400 | return self._url | |
398 |
|
401 | |||
399 | def read(self, amt=None): |
|
402 | def read(self, amt=None): | |
400 | # the _rbuf test is only in this first if for speed. It's not |
|
403 | # the _rbuf test is only in this first if for speed. It's not | |
401 | # logically necessary |
|
404 | # logically necessary | |
402 | if self._rbuf and amt is not None: |
|
405 | if self._rbuf and amt is not None: | |
403 | L = len(self._rbuf) |
|
406 | L = len(self._rbuf) | |
404 | if amt > L: |
|
407 | if amt > L: | |
405 | amt -= L |
|
408 | amt -= L | |
406 | else: |
|
409 | else: | |
407 | s = self._rbuf[:amt] |
|
410 | s = self._rbuf[:amt] | |
408 | self._rbuf = self._rbuf[amt:] |
|
411 | self._rbuf = self._rbuf[amt:] | |
409 | return s |
|
412 | return s | |
410 |
|
413 | |||
411 | s = self._rbuf + self._raw_read(amt) |
|
414 | s = self._rbuf + self._raw_read(amt) | |
412 | self._rbuf = '' |
|
415 | self._rbuf = '' | |
413 | return s |
|
416 | return s | |
414 |
|
417 | |||
415 | # stolen from Python SVN #68532 to fix issue1088 |
|
418 | # stolen from Python SVN #68532 to fix issue1088 | |
416 | def _read_chunked(self, amt): |
|
419 | def _read_chunked(self, amt): | |
417 | chunk_left = self.chunk_left |
|
420 | chunk_left = self.chunk_left | |
418 | parts = [] |
|
421 | parts = [] | |
419 |
|
422 | |||
420 | while True: |
|
423 | while True: | |
421 | if chunk_left is None: |
|
424 | if chunk_left is None: | |
422 | line = self.fp.readline() |
|
425 | line = self.fp.readline() | |
423 | i = line.find(';') |
|
426 | i = line.find(';') | |
424 | if i >= 0: |
|
427 | if i >= 0: | |
425 | line = line[:i] # strip chunk-extensions |
|
428 | line = line[:i] # strip chunk-extensions | |
426 | try: |
|
429 | try: | |
427 | chunk_left = int(line, 16) |
|
430 | chunk_left = int(line, 16) | |
428 | except ValueError: |
|
431 | except ValueError: | |
429 | # close the connection as protocol synchronization is |
|
432 | # close the connection as protocol synchronization is | |
430 | # probably lost |
|
433 | # probably lost | |
431 | self.close() |
|
434 | self.close() | |
432 | raise httplib.IncompleteRead(''.join(parts)) |
|
435 | raise httplib.IncompleteRead(''.join(parts)) | |
433 | if chunk_left == 0: |
|
436 | if chunk_left == 0: | |
434 | break |
|
437 | break | |
435 | if amt is None: |
|
438 | if amt is None: | |
436 | parts.append(self._safe_read(chunk_left)) |
|
439 | parts.append(self._safe_read(chunk_left)) | |
437 | elif amt < chunk_left: |
|
440 | elif amt < chunk_left: | |
438 | parts.append(self._safe_read(amt)) |
|
441 | parts.append(self._safe_read(amt)) | |
439 | self.chunk_left = chunk_left - amt |
|
442 | self.chunk_left = chunk_left - amt | |
440 | return ''.join(parts) |
|
443 | return ''.join(parts) | |
441 | elif amt == chunk_left: |
|
444 | elif amt == chunk_left: | |
442 | parts.append(self._safe_read(amt)) |
|
445 | parts.append(self._safe_read(amt)) | |
443 | self._safe_read(2) # toss the CRLF at the end of the chunk |
|
446 | self._safe_read(2) # toss the CRLF at the end of the chunk | |
444 | self.chunk_left = None |
|
447 | self.chunk_left = None | |
445 | return ''.join(parts) |
|
448 | return ''.join(parts) | |
446 | else: |
|
449 | else: | |
447 | parts.append(self._safe_read(chunk_left)) |
|
450 | parts.append(self._safe_read(chunk_left)) | |
448 | amt -= chunk_left |
|
451 | amt -= chunk_left | |
449 |
|
452 | |||
450 | # we read the whole chunk, get another |
|
453 | # we read the whole chunk, get another | |
451 | self._safe_read(2) # toss the CRLF at the end of the chunk |
|
454 | self._safe_read(2) # toss the CRLF at the end of the chunk | |
452 | chunk_left = None |
|
455 | chunk_left = None | |
453 |
|
456 | |||
454 | # read and discard trailer up to the CRLF terminator |
|
457 | # read and discard trailer up to the CRLF terminator | |
455 | ### note: we shouldn't have any trailers! |
|
458 | ### note: we shouldn't have any trailers! | |
456 | while True: |
|
459 | while True: | |
457 | line = self.fp.readline() |
|
460 | line = self.fp.readline() | |
458 | if not line: |
|
461 | if not line: | |
459 | # a vanishingly small number of sites EOF without |
|
462 | # a vanishingly small number of sites EOF without | |
460 | # sending the trailer |
|
463 | # sending the trailer | |
461 | break |
|
464 | break | |
462 | if line == '\r\n': |
|
465 | if line == '\r\n': | |
463 | break |
|
466 | break | |
464 |
|
467 | |||
465 | # we read everything; close the "file" |
|
468 | # we read everything; close the "file" | |
466 | self.close() |
|
469 | self.close() | |
467 |
|
470 | |||
468 | return ''.join(parts) |
|
471 | return ''.join(parts) | |
469 |
|
472 | |||
470 | def readline(self): |
|
473 | def readline(self): | |
471 | # Fast path for a line is already available in read buffer. |
|
474 | # Fast path for a line is already available in read buffer. | |
472 | i = self._rbuf.find('\n') |
|
475 | i = self._rbuf.find('\n') | |
473 | if i >= 0: |
|
476 | if i >= 0: | |
474 | i += 1 |
|
477 | i += 1 | |
475 | line = self._rbuf[:i] |
|
478 | line = self._rbuf[:i] | |
476 | self._rbuf = self._rbuf[i:] |
|
479 | self._rbuf = self._rbuf[i:] | |
477 | return line |
|
480 | return line | |
478 |
|
481 | |||
479 | # No newline in local buffer. Read until we find one. |
|
482 | # No newline in local buffer. Read until we find one. | |
480 | chunks = [self._rbuf] |
|
483 | chunks = [self._rbuf] | |
481 | i = -1 |
|
484 | i = -1 | |
482 | readsize = self._rbufsize |
|
485 | readsize = self._rbufsize | |
483 | while True: |
|
486 | while True: | |
484 | new = self._raw_read(readsize) |
|
487 | new = self._raw_read(readsize) | |
485 | if not new: |
|
488 | if not new: | |
486 | break |
|
489 | break | |
487 |
|
490 | |||
488 | chunks.append(new) |
|
491 | chunks.append(new) | |
489 | i = new.find('\n') |
|
492 | i = new.find('\n') | |
490 | if i >= 0: |
|
493 | if i >= 0: | |
491 | break |
|
494 | break | |
492 |
|
495 | |||
493 | # We either have exhausted the stream or have a newline in chunks[-1]. |
|
496 | # We either have exhausted the stream or have a newline in chunks[-1]. | |
494 |
|
497 | |||
495 | # EOF |
|
498 | # EOF | |
496 | if i == -1: |
|
499 | if i == -1: | |
497 | self._rbuf = '' |
|
500 | self._rbuf = '' | |
498 | return ''.join(chunks) |
|
501 | return ''.join(chunks) | |
499 |
|
502 | |||
500 | i += 1 |
|
503 | i += 1 | |
501 | self._rbuf = chunks[-1][i:] |
|
504 | self._rbuf = chunks[-1][i:] | |
502 | chunks[-1] = chunks[-1][:i] |
|
505 | chunks[-1] = chunks[-1][:i] | |
503 | return ''.join(chunks) |
|
506 | return ''.join(chunks) | |
504 |
|
507 | |||
505 | def readlines(self, sizehint=0): |
|
508 | def readlines(self, sizehint=0): | |
506 | total = 0 |
|
509 | total = 0 | |
507 | list = [] |
|
510 | list = [] | |
508 | while True: |
|
511 | while True: | |
509 | line = self.readline() |
|
512 | line = self.readline() | |
510 | if not line: |
|
513 | if not line: | |
511 | break |
|
514 | break | |
512 | list.append(line) |
|
515 | list.append(line) | |
513 | total += len(line) |
|
516 | total += len(line) | |
514 | if sizehint and total >= sizehint: |
|
517 | if sizehint and total >= sizehint: | |
515 | break |
|
518 | break | |
516 | return list |
|
519 | return list | |
517 |
|
520 | |||
518 | def safesend(self, str): |
|
521 | def safesend(self, str): | |
519 | """Send `str' to the server. |
|
522 | """Send `str' to the server. | |
520 |
|
523 | |||
521 | Shamelessly ripped off from httplib to patch a bad behavior. |
|
524 | Shamelessly ripped off from httplib to patch a bad behavior. | |
522 | """ |
|
525 | """ | |
523 | # _broken_pipe_resp is an attribute we set in this function |
|
526 | # _broken_pipe_resp is an attribute we set in this function | |
524 | # if the socket is closed while we're sending data but |
|
527 | # if the socket is closed while we're sending data but | |
525 | # the server sent us a response before hanging up. |
|
528 | # the server sent us a response before hanging up. | |
526 | # In that case, we want to pretend to send the rest of the |
|
529 | # In that case, we want to pretend to send the rest of the | |
527 | # outgoing data, and then let the user use getresponse() |
|
530 | # outgoing data, and then let the user use getresponse() | |
528 | # (which we wrap) to get this last response before |
|
531 | # (which we wrap) to get this last response before | |
529 | # opening a new socket. |
|
532 | # opening a new socket. | |
530 | if getattr(self, '_broken_pipe_resp', None) is not None: |
|
533 | if getattr(self, '_broken_pipe_resp', None) is not None: | |
531 | return |
|
534 | return | |
532 |
|
535 | |||
533 | if self.sock is None: |
|
536 | if self.sock is None: | |
534 | if self.auto_open: |
|
537 | if self.auto_open: | |
535 | self.connect() |
|
538 | self.connect() | |
536 | else: |
|
539 | else: | |
537 | raise httplib.NotConnected |
|
540 | raise httplib.NotConnected | |
538 |
|
541 | |||
539 | # send the data to the server. if we get a broken pipe, then close |
|
542 | # send the data to the server. if we get a broken pipe, then close | |
540 | # the socket. we want to reconnect when somebody tries to send again. |
|
543 | # the socket. we want to reconnect when somebody tries to send again. | |
541 | # |
|
544 | # | |
542 | # NOTE: we DO propagate the error, though, because we cannot simply |
|
545 | # NOTE: we DO propagate the error, though, because we cannot simply | |
543 | # ignore the error... the caller will know if they can retry. |
|
546 | # ignore the error... the caller will know if they can retry. | |
544 | if self.debuglevel > 0: |
|
547 | if self.debuglevel > 0: | |
545 | print("send:", repr(str)) |
|
548 | print("send:", repr(str)) | |
546 | try: |
|
549 | try: | |
547 | blocksize = 8192 |
|
550 | blocksize = 8192 | |
548 | read = getattr(str, 'read', None) |
|
551 | read = getattr(str, 'read', None) | |
549 | if read is not None: |
|
552 | if read is not None: | |
550 | if self.debuglevel > 0: |
|
553 | if self.debuglevel > 0: | |
551 | print("sending a read()able") |
|
554 | print("sending a read()able") | |
552 | data = read(blocksize) |
|
555 | data = read(blocksize) | |
553 | while data: |
|
556 | while data: | |
554 | self.sock.sendall(data) |
|
557 | self.sock.sendall(data) | |
555 | data = read(blocksize) |
|
558 | data = read(blocksize) | |
556 | else: |
|
559 | else: | |
557 | self.sock.sendall(str) |
|
560 | self.sock.sendall(str) | |
558 | except socket.error as v: |
|
561 | except socket.error as v: | |
559 | reraise = True |
|
562 | reraise = True | |
560 | if v[0] == errno.EPIPE: # Broken pipe |
|
563 | if v[0] == errno.EPIPE: # Broken pipe | |
561 | if self._HTTPConnection__state == httplib._CS_REQ_SENT: |
|
564 | if self._HTTPConnection__state == httplib._CS_REQ_SENT: | |
562 | self._broken_pipe_resp = None |
|
565 | self._broken_pipe_resp = None | |
563 | self._broken_pipe_resp = self.getresponse() |
|
566 | self._broken_pipe_resp = self.getresponse() | |
564 | reraise = False |
|
567 | reraise = False | |
565 | self.close() |
|
568 | self.close() | |
566 | if reraise: |
|
569 | if reraise: | |
567 | raise |
|
570 | raise | |
568 |
|
571 | |||
569 | def wrapgetresponse(cls): |
|
572 | def wrapgetresponse(cls): | |
570 | """Wraps getresponse in cls with a broken-pipe sane version. |
|
573 | """Wraps getresponse in cls with a broken-pipe sane version. | |
571 | """ |
|
574 | """ | |
572 | def safegetresponse(self): |
|
575 | def safegetresponse(self): | |
573 | # In safesend() we might set the _broken_pipe_resp |
|
576 | # In safesend() we might set the _broken_pipe_resp | |
574 | # attribute, in which case the socket has already |
|
577 | # attribute, in which case the socket has already | |
575 | # been closed and we just need to give them the response |
|
578 | # been closed and we just need to give them the response | |
576 | # back. Otherwise, we use the normal response path. |
|
579 | # back. Otherwise, we use the normal response path. | |
577 | r = getattr(self, '_broken_pipe_resp', None) |
|
580 | r = getattr(self, '_broken_pipe_resp', None) | |
578 | if r is not None: |
|
581 | if r is not None: | |
579 | return r |
|
582 | return r | |
580 | return cls.getresponse(self) |
|
583 | return cls.getresponse(self) | |
581 | safegetresponse.__doc__ = cls.getresponse.__doc__ |
|
584 | safegetresponse.__doc__ = cls.getresponse.__doc__ | |
582 | return safegetresponse |
|
585 | return safegetresponse | |
583 |
|
586 | |||
584 | class HTTPConnection(httplib.HTTPConnection): |
|
587 | class HTTPConnection(httplib.HTTPConnection): | |
585 | # use the modified response class |
|
588 | # use the modified response class | |
586 | response_class = HTTPResponse |
|
589 | response_class = HTTPResponse | |
587 | send = safesend |
|
590 | send = safesend | |
588 | getresponse = wrapgetresponse(httplib.HTTPConnection) |
|
591 | getresponse = wrapgetresponse(httplib.HTTPConnection) | |
589 |
|
592 | |||
590 |
|
593 | |||
591 | ######################################################################### |
|
594 | ######################################################################### | |
592 | ##### TEST FUNCTIONS |
|
595 | ##### TEST FUNCTIONS | |
593 | ######################################################################### |
|
596 | ######################################################################### | |
594 |
|
597 | |||
595 |
|
598 | |||
596 | def continuity(url): |
|
599 | def continuity(url): | |
597 | md5 = hashlib.md5 |
|
600 | md5 = hashlib.md5 | |
598 | format = '%25s: %s' |
|
601 | format = '%25s: %s' | |
599 |
|
602 | |||
600 | # first fetch the file with the normal http handler |
|
603 | # first fetch the file with the normal http handler | |
601 | opener = urlreq.buildopener() |
|
604 | opener = urlreq.buildopener() | |
602 | urlreq.installopener(opener) |
|
605 | urlreq.installopener(opener) | |
603 | fo = urlreq.urlopen(url) |
|
606 | fo = urlreq.urlopen(url) | |
604 | foo = fo.read() |
|
607 | foo = fo.read() | |
605 | fo.close() |
|
608 | fo.close() | |
606 | m = md5(foo) |
|
609 | m = md5(foo) | |
607 | print(format % ('normal urllib', m.hexdigest())) |
|
610 | print(format % ('normal urllib', m.hexdigest())) | |
608 |
|
611 | |||
609 | # now install the keepalive handler and try again |
|
612 | # now install the keepalive handler and try again | |
610 | opener = urlreq.buildopener(HTTPHandler()) |
|
613 | opener = urlreq.buildopener(HTTPHandler()) | |
611 | urlreq.installopener(opener) |
|
614 | urlreq.installopener(opener) | |
612 |
|
615 | |||
613 | fo = urlreq.urlopen(url) |
|
616 | fo = urlreq.urlopen(url) | |
614 | foo = fo.read() |
|
617 | foo = fo.read() | |
615 | fo.close() |
|
618 | fo.close() | |
616 | m = md5(foo) |
|
619 | m = md5(foo) | |
617 | print(format % ('keepalive read', m.hexdigest())) |
|
620 | print(format % ('keepalive read', m.hexdigest())) | |
618 |
|
621 | |||
619 | fo = urlreq.urlopen(url) |
|
622 | fo = urlreq.urlopen(url) | |
620 | foo = '' |
|
623 | foo = '' | |
621 | while True: |
|
624 | while True: | |
622 | f = fo.readline() |
|
625 | f = fo.readline() | |
623 | if f: |
|
626 | if f: | |
624 | foo = foo + f |
|
627 | foo = foo + f | |
625 | else: |
|
628 | else: | |
626 | break |
|
629 | break | |
627 | fo.close() |
|
630 | fo.close() | |
628 | m = md5(foo) |
|
631 | m = md5(foo) | |
629 | print(format % ('keepalive readline', m.hexdigest())) |
|
632 | print(format % ('keepalive readline', m.hexdigest())) | |
630 |
|
633 | |||
631 | def comp(N, url): |
|
634 | def comp(N, url): | |
632 | print(' making %i connections to:\n %s' % (N, url)) |
|
635 | print(' making %i connections to:\n %s' % (N, url)) | |
633 |
|
636 | |||
634 | util.stdout.write(' first using the normal urllib handlers') |
|
637 | util.stdout.write(' first using the normal urllib handlers') | |
635 | # first use normal opener |
|
638 | # first use normal opener | |
636 | opener = urlreq.buildopener() |
|
639 | opener = urlreq.buildopener() | |
637 | urlreq.installopener(opener) |
|
640 | urlreq.installopener(opener) | |
638 | t1 = fetch(N, url) |
|
641 | t1 = fetch(N, url) | |
639 | print(' TIME: %.3f s' % t1) |
|
642 | print(' TIME: %.3f s' % t1) | |
640 |
|
643 | |||
641 | util.stdout.write(' now using the keepalive handler ') |
|
644 | util.stdout.write(' now using the keepalive handler ') | |
642 | # now install the keepalive handler and try again |
|
645 | # now install the keepalive handler and try again | |
643 | opener = urlreq.buildopener(HTTPHandler()) |
|
646 | opener = urlreq.buildopener(HTTPHandler()) | |
644 | urlreq.installopener(opener) |
|
647 | urlreq.installopener(opener) | |
645 | t2 = fetch(N, url) |
|
648 | t2 = fetch(N, url) | |
646 | print(' TIME: %.3f s' % t2) |
|
649 | print(' TIME: %.3f s' % t2) | |
647 | print(' improvement factor: %.2f' % (t1 / t2)) |
|
650 | print(' improvement factor: %.2f' % (t1 / t2)) | |
648 |
|
651 | |||
649 | def fetch(N, url, delay=0): |
|
652 | def fetch(N, url, delay=0): | |
650 | import time |
|
653 | import time | |
651 | lens = [] |
|
654 | lens = [] | |
652 | starttime = time.time() |
|
655 | starttime = time.time() | |
653 | for i in range(N): |
|
656 | for i in range(N): | |
654 | if delay and i > 0: |
|
657 | if delay and i > 0: | |
655 | time.sleep(delay) |
|
658 | time.sleep(delay) | |
656 | fo = urlreq.urlopen(url) |
|
659 | fo = urlreq.urlopen(url) | |
657 | foo = fo.read() |
|
660 | foo = fo.read() | |
658 | fo.close() |
|
661 | fo.close() | |
659 | lens.append(len(foo)) |
|
662 | lens.append(len(foo)) | |
660 | diff = time.time() - starttime |
|
663 | diff = time.time() - starttime | |
661 |
|
664 | |||
662 | j = 0 |
|
665 | j = 0 | |
663 | for i in lens[1:]: |
|
666 | for i in lens[1:]: | |
664 | j = j + 1 |
|
667 | j = j + 1 | |
665 | if not i == lens[0]: |
|
668 | if not i == lens[0]: | |
666 | print("WARNING: inconsistent length on read %i: %i" % (j, i)) |
|
669 | print("WARNING: inconsistent length on read %i: %i" % (j, i)) | |
667 |
|
670 | |||
668 | return diff |
|
671 | return diff | |
669 |
|
672 | |||
670 | def test_timeout(url): |
|
673 | def test_timeout(url): | |
671 | global DEBUG |
|
674 | global DEBUG | |
672 | dbbackup = DEBUG |
|
675 | dbbackup = DEBUG | |
673 | class FakeLogger(object): |
|
676 | class FakeLogger(object): | |
674 | def debug(self, msg, *args): |
|
677 | def debug(self, msg, *args): | |
675 | print(msg % args) |
|
678 | print(msg % args) | |
676 | info = warning = error = debug |
|
679 | info = warning = error = debug | |
677 | DEBUG = FakeLogger() |
|
680 | DEBUG = FakeLogger() | |
678 | print(" fetching the file to establish a connection") |
|
681 | print(" fetching the file to establish a connection") | |
679 | fo = urlreq.urlopen(url) |
|
682 | fo = urlreq.urlopen(url) | |
680 | data1 = fo.read() |
|
683 | data1 = fo.read() | |
681 | fo.close() |
|
684 | fo.close() | |
682 |
|
685 | |||
683 | i = 20 |
|
686 | i = 20 | |
684 | print(" waiting %i seconds for the server to close the connection" % i) |
|
687 | print(" waiting %i seconds for the server to close the connection" % i) | |
685 | while i > 0: |
|
688 | while i > 0: | |
686 | util.stdout.write('\r %2i' % i) |
|
689 | util.stdout.write('\r %2i' % i) | |
687 | util.stdout.flush() |
|
690 | util.stdout.flush() | |
688 | time.sleep(1) |
|
691 | time.sleep(1) | |
689 | i -= 1 |
|
692 | i -= 1 | |
690 | util.stderr.write('\r') |
|
693 | util.stderr.write('\r') | |
691 |
|
694 | |||
692 | print(" fetching the file a second time") |
|
695 | print(" fetching the file a second time") | |
693 | fo = urlreq.urlopen(url) |
|
696 | fo = urlreq.urlopen(url) | |
694 | data2 = fo.read() |
|
697 | data2 = fo.read() | |
695 | fo.close() |
|
698 | fo.close() | |
696 |
|
699 | |||
697 | if data1 == data2: |
|
700 | if data1 == data2: | |
698 | print(' data are identical') |
|
701 | print(' data are identical') | |
699 | else: |
|
702 | else: | |
700 | print(' ERROR: DATA DIFFER') |
|
703 | print(' ERROR: DATA DIFFER') | |
701 |
|
704 | |||
702 | DEBUG = dbbackup |
|
705 | DEBUG = dbbackup | |
703 |
|
706 | |||
704 |
|
707 | |||
705 | def test(url, N=10): |
|
708 | def test(url, N=10): | |
706 | print("performing continuity test (making sure stuff isn't corrupted)") |
|
709 | print("performing continuity test (making sure stuff isn't corrupted)") | |
707 | continuity(url) |
|
710 | continuity(url) | |
708 | print('') |
|
711 | print('') | |
709 | print("performing speed comparison") |
|
712 | print("performing speed comparison") | |
710 | comp(N, url) |
|
713 | comp(N, url) | |
711 | print('') |
|
714 | print('') | |
712 | print("performing dropped-connection check") |
|
715 | print("performing dropped-connection check") | |
713 | test_timeout(url) |
|
716 | test_timeout(url) | |
714 |
|
717 | |||
715 | if __name__ == '__main__': |
|
718 | if __name__ == '__main__': | |
716 | import time |
|
719 | import time | |
717 | try: |
|
720 | try: | |
718 | N = int(sys.argv[1]) |
|
721 | N = int(sys.argv[1]) | |
719 | url = sys.argv[2] |
|
722 | url = sys.argv[2] | |
720 | except (IndexError, ValueError): |
|
723 | except (IndexError, ValueError): | |
721 | print("%s <integer> <url>" % sys.argv[0]) |
|
724 | print("%s <integer> <url>" % sys.argv[0]) | |
722 | else: |
|
725 | else: | |
723 | test(url, N) |
|
726 | test(url, N) |
@@ -1,517 +1,519 b'' | |||||
1 | # url.py - HTTP handling for mercurial |
|
1 | # url.py - HTTP handling for mercurial | |
2 | # |
|
2 | # | |
3 | # Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com> |
|
3 | # Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com> | |
4 | # Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br> |
|
4 | # Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br> | |
5 | # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com> |
|
5 | # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com> | |
6 | # |
|
6 | # | |
7 | # This software may be used and distributed according to the terms of the |
|
7 | # This software may be used and distributed according to the terms of the | |
8 | # GNU General Public License version 2 or any later version. |
|
8 | # GNU General Public License version 2 or any later version. | |
9 |
|
9 | |||
10 | from __future__ import absolute_import |
|
10 | from __future__ import absolute_import | |
11 |
|
11 | |||
12 | import base64 |
|
12 | import base64 | |
13 | import os |
|
13 | import os | |
14 | import socket |
|
14 | import socket | |
15 |
|
15 | |||
16 | from .i18n import _ |
|
16 | from .i18n import _ | |
17 | from . import ( |
|
17 | from . import ( | |
18 | encoding, |
|
18 | encoding, | |
19 | error, |
|
19 | error, | |
20 | httpconnection as httpconnectionmod, |
|
20 | httpconnection as httpconnectionmod, | |
21 | keepalive, |
|
21 | keepalive, | |
22 | pycompat, |
|
22 | pycompat, | |
23 | sslutil, |
|
23 | sslutil, | |
|
24 | urllibcompat, | |||
24 | util, |
|
25 | util, | |
25 | ) |
|
26 | ) | |
26 |
|
27 | |||
27 | httplib = util.httplib |
|
28 | httplib = util.httplib | |
28 | stringio = util.stringio |
|
29 | stringio = util.stringio | |
29 | urlerr = util.urlerr |
|
30 | urlerr = util.urlerr | |
30 | urlreq = util.urlreq |
|
31 | urlreq = util.urlreq | |
31 |
|
32 | |||
32 | class passwordmgr(object): |
|
33 | class passwordmgr(object): | |
33 | def __init__(self, ui, passwddb): |
|
34 | def __init__(self, ui, passwddb): | |
34 | self.ui = ui |
|
35 | self.ui = ui | |
35 | self.passwddb = passwddb |
|
36 | self.passwddb = passwddb | |
36 |
|
37 | |||
37 | def add_password(self, realm, uri, user, passwd): |
|
38 | def add_password(self, realm, uri, user, passwd): | |
38 | return self.passwddb.add_password(realm, uri, user, passwd) |
|
39 | return self.passwddb.add_password(realm, uri, user, passwd) | |
39 |
|
40 | |||
40 | def find_user_password(self, realm, authuri): |
|
41 | def find_user_password(self, realm, authuri): | |
41 | authinfo = self.passwddb.find_user_password(realm, authuri) |
|
42 | authinfo = self.passwddb.find_user_password(realm, authuri) | |
42 | user, passwd = authinfo |
|
43 | user, passwd = authinfo | |
43 | if user and passwd: |
|
44 | if user and passwd: | |
44 | self._writedebug(user, passwd) |
|
45 | self._writedebug(user, passwd) | |
45 | return (user, passwd) |
|
46 | return (user, passwd) | |
46 |
|
47 | |||
47 | if not user or not passwd: |
|
48 | if not user or not passwd: | |
48 | res = httpconnectionmod.readauthforuri(self.ui, authuri, user) |
|
49 | res = httpconnectionmod.readauthforuri(self.ui, authuri, user) | |
49 | if res: |
|
50 | if res: | |
50 | group, auth = res |
|
51 | group, auth = res | |
51 | user, passwd = auth.get('username'), auth.get('password') |
|
52 | user, passwd = auth.get('username'), auth.get('password') | |
52 | self.ui.debug("using auth.%s.* for authentication\n" % group) |
|
53 | self.ui.debug("using auth.%s.* for authentication\n" % group) | |
53 | if not user or not passwd: |
|
54 | if not user or not passwd: | |
54 | u = util.url(authuri) |
|
55 | u = util.url(authuri) | |
55 | u.query = None |
|
56 | u.query = None | |
56 | if not self.ui.interactive(): |
|
57 | if not self.ui.interactive(): | |
57 | raise error.Abort(_('http authorization required for %s') % |
|
58 | raise error.Abort(_('http authorization required for %s') % | |
58 | util.hidepassword(str(u))) |
|
59 | util.hidepassword(str(u))) | |
59 |
|
60 | |||
60 | self.ui.write(_("http authorization required for %s\n") % |
|
61 | self.ui.write(_("http authorization required for %s\n") % | |
61 | util.hidepassword(str(u))) |
|
62 | util.hidepassword(str(u))) | |
62 | self.ui.write(_("realm: %s\n") % realm) |
|
63 | self.ui.write(_("realm: %s\n") % realm) | |
63 | if user: |
|
64 | if user: | |
64 | self.ui.write(_("user: %s\n") % user) |
|
65 | self.ui.write(_("user: %s\n") % user) | |
65 | else: |
|
66 | else: | |
66 | user = self.ui.prompt(_("user:"), default=None) |
|
67 | user = self.ui.prompt(_("user:"), default=None) | |
67 |
|
68 | |||
68 | if not passwd: |
|
69 | if not passwd: | |
69 | passwd = self.ui.getpass() |
|
70 | passwd = self.ui.getpass() | |
70 |
|
71 | |||
71 | self.passwddb.add_password(realm, authuri, user, passwd) |
|
72 | self.passwddb.add_password(realm, authuri, user, passwd) | |
72 | self._writedebug(user, passwd) |
|
73 | self._writedebug(user, passwd) | |
73 | return (user, passwd) |
|
74 | return (user, passwd) | |
74 |
|
75 | |||
75 | def _writedebug(self, user, passwd): |
|
76 | def _writedebug(self, user, passwd): | |
76 | msg = _('http auth: user %s, password %s\n') |
|
77 | msg = _('http auth: user %s, password %s\n') | |
77 | self.ui.debug(msg % (user, passwd and '*' * len(passwd) or 'not set')) |
|
78 | self.ui.debug(msg % (user, passwd and '*' * len(passwd) or 'not set')) | |
78 |
|
79 | |||
79 | def find_stored_password(self, authuri): |
|
80 | def find_stored_password(self, authuri): | |
80 | return self.passwddb.find_user_password(None, authuri) |
|
81 | return self.passwddb.find_user_password(None, authuri) | |
81 |
|
82 | |||
82 | class proxyhandler(urlreq.proxyhandler): |
|
83 | class proxyhandler(urlreq.proxyhandler): | |
83 | def __init__(self, ui): |
|
84 | def __init__(self, ui): | |
84 | proxyurl = (ui.config("http_proxy", "host") or |
|
85 | proxyurl = (ui.config("http_proxy", "host") or | |
85 | encoding.environ.get('http_proxy')) |
|
86 | encoding.environ.get('http_proxy')) | |
86 | # XXX proxyauthinfo = None |
|
87 | # XXX proxyauthinfo = None | |
87 |
|
88 | |||
88 | if proxyurl: |
|
89 | if proxyurl: | |
89 | # proxy can be proper url or host[:port] |
|
90 | # proxy can be proper url or host[:port] | |
90 | if not (proxyurl.startswith('http:') or |
|
91 | if not (proxyurl.startswith('http:') or | |
91 | proxyurl.startswith('https:')): |
|
92 | proxyurl.startswith('https:')): | |
92 | proxyurl = 'http://' + proxyurl + '/' |
|
93 | proxyurl = 'http://' + proxyurl + '/' | |
93 | proxy = util.url(proxyurl) |
|
94 | proxy = util.url(proxyurl) | |
94 | if not proxy.user: |
|
95 | if not proxy.user: | |
95 | proxy.user = ui.config("http_proxy", "user") |
|
96 | proxy.user = ui.config("http_proxy", "user") | |
96 | proxy.passwd = ui.config("http_proxy", "passwd") |
|
97 | proxy.passwd = ui.config("http_proxy", "passwd") | |
97 |
|
98 | |||
98 | # see if we should use a proxy for this url |
|
99 | # see if we should use a proxy for this url | |
99 | no_list = ["localhost", "127.0.0.1"] |
|
100 | no_list = ["localhost", "127.0.0.1"] | |
100 | no_list.extend([p.lower() for |
|
101 | no_list.extend([p.lower() for | |
101 | p in ui.configlist("http_proxy", "no")]) |
|
102 | p in ui.configlist("http_proxy", "no")]) | |
102 | no_list.extend([p.strip().lower() for |
|
103 | no_list.extend([p.strip().lower() for | |
103 | p in encoding.environ.get("no_proxy", '').split(',') |
|
104 | p in encoding.environ.get("no_proxy", '').split(',') | |
104 | if p.strip()]) |
|
105 | if p.strip()]) | |
105 | # "http_proxy.always" config is for running tests on localhost |
|
106 | # "http_proxy.always" config is for running tests on localhost | |
106 | if ui.configbool("http_proxy", "always"): |
|
107 | if ui.configbool("http_proxy", "always"): | |
107 | self.no_list = [] |
|
108 | self.no_list = [] | |
108 | else: |
|
109 | else: | |
109 | self.no_list = no_list |
|
110 | self.no_list = no_list | |
110 |
|
111 | |||
111 | proxyurl = str(proxy) |
|
112 | proxyurl = str(proxy) | |
112 | proxies = {'http': proxyurl, 'https': proxyurl} |
|
113 | proxies = {'http': proxyurl, 'https': proxyurl} | |
113 | ui.debug('proxying through http://%s:%s\n' % |
|
114 | ui.debug('proxying through http://%s:%s\n' % | |
114 | (proxy.host, proxy.port)) |
|
115 | (proxy.host, proxy.port)) | |
115 | else: |
|
116 | else: | |
116 | proxies = {} |
|
117 | proxies = {} | |
117 |
|
118 | |||
118 | urlreq.proxyhandler.__init__(self, proxies) |
|
119 | urlreq.proxyhandler.__init__(self, proxies) | |
119 | self.ui = ui |
|
120 | self.ui = ui | |
120 |
|
121 | |||
121 | def proxy_open(self, req, proxy, type_): |
|
122 | def proxy_open(self, req, proxy, type_): | |
122 |
host = req |
|
123 | host = urllibcompat.gethost(req).split(':')[0] | |
123 | for e in self.no_list: |
|
124 | for e in self.no_list: | |
124 | if host == e: |
|
125 | if host == e: | |
125 | return None |
|
126 | return None | |
126 | if e.startswith('*.') and host.endswith(e[2:]): |
|
127 | if e.startswith('*.') and host.endswith(e[2:]): | |
127 | return None |
|
128 | return None | |
128 | if e.startswith('.') and host.endswith(e[1:]): |
|
129 | if e.startswith('.') and host.endswith(e[1:]): | |
129 | return None |
|
130 | return None | |
130 |
|
131 | |||
131 | return urlreq.proxyhandler.proxy_open(self, req, proxy, type_) |
|
132 | return urlreq.proxyhandler.proxy_open(self, req, proxy, type_) | |
132 |
|
133 | |||
133 | def _gen_sendfile(orgsend): |
|
134 | def _gen_sendfile(orgsend): | |
134 | def _sendfile(self, data): |
|
135 | def _sendfile(self, data): | |
135 | # send a file |
|
136 | # send a file | |
136 | if isinstance(data, httpconnectionmod.httpsendfile): |
|
137 | if isinstance(data, httpconnectionmod.httpsendfile): | |
137 | # if auth required, some data sent twice, so rewind here |
|
138 | # if auth required, some data sent twice, so rewind here | |
138 | data.seek(0) |
|
139 | data.seek(0) | |
139 | for chunk in util.filechunkiter(data): |
|
140 | for chunk in util.filechunkiter(data): | |
140 | orgsend(self, chunk) |
|
141 | orgsend(self, chunk) | |
141 | else: |
|
142 | else: | |
142 | orgsend(self, data) |
|
143 | orgsend(self, data) | |
143 | return _sendfile |
|
144 | return _sendfile | |
144 |
|
145 | |||
145 | has_https = util.safehasattr(urlreq, 'httpshandler') |
|
146 | has_https = util.safehasattr(urlreq, 'httpshandler') | |
146 |
|
147 | |||
147 | class httpconnection(keepalive.HTTPConnection): |
|
148 | class httpconnection(keepalive.HTTPConnection): | |
148 | # must be able to send big bundle as stream. |
|
149 | # must be able to send big bundle as stream. | |
149 | send = _gen_sendfile(keepalive.HTTPConnection.send) |
|
150 | send = _gen_sendfile(keepalive.HTTPConnection.send) | |
150 |
|
151 | |||
151 | def getresponse(self): |
|
152 | def getresponse(self): | |
152 | proxyres = getattr(self, 'proxyres', None) |
|
153 | proxyres = getattr(self, 'proxyres', None) | |
153 | if proxyres: |
|
154 | if proxyres: | |
154 | if proxyres.will_close: |
|
155 | if proxyres.will_close: | |
155 | self.close() |
|
156 | self.close() | |
156 | self.proxyres = None |
|
157 | self.proxyres = None | |
157 | return proxyres |
|
158 | return proxyres | |
158 | return keepalive.HTTPConnection.getresponse(self) |
|
159 | return keepalive.HTTPConnection.getresponse(self) | |
159 |
|
160 | |||
160 | # general transaction handler to support different ways to handle |
|
161 | # general transaction handler to support different ways to handle | |
161 | # HTTPS proxying before and after Python 2.6.3. |
|
162 | # HTTPS proxying before and after Python 2.6.3. | |
162 | def _generic_start_transaction(handler, h, req): |
|
163 | def _generic_start_transaction(handler, h, req): | |
163 | tunnel_host = getattr(req, '_tunnel_host', None) |
|
164 | tunnel_host = getattr(req, '_tunnel_host', None) | |
164 | if tunnel_host: |
|
165 | if tunnel_host: | |
165 | if tunnel_host[:7] not in ['http://', 'https:/']: |
|
166 | if tunnel_host[:7] not in ['http://', 'https:/']: | |
166 | tunnel_host = 'https://' + tunnel_host |
|
167 | tunnel_host = 'https://' + tunnel_host | |
167 | new_tunnel = True |
|
168 | new_tunnel = True | |
168 | else: |
|
169 | else: | |
169 |
tunnel_host = req |
|
170 | tunnel_host = urllibcompat.getselector(req) | |
170 | new_tunnel = False |
|
171 | new_tunnel = False | |
171 |
|
172 | |||
172 |
if new_tunnel or tunnel_host == req |
|
173 | if new_tunnel or tunnel_host == urllibcompat.getfullurl(req): # has proxy | |
173 | u = util.url(tunnel_host) |
|
174 | u = util.url(tunnel_host) | |
174 | if new_tunnel or u.scheme == 'https': # only use CONNECT for HTTPS |
|
175 | if new_tunnel or u.scheme == 'https': # only use CONNECT for HTTPS | |
175 | h.realhostport = ':'.join([u.host, (u.port or '443')]) |
|
176 | h.realhostport = ':'.join([u.host, (u.port or '443')]) | |
176 | h.headers = req.headers.copy() |
|
177 | h.headers = req.headers.copy() | |
177 | h.headers.update(handler.parent.addheaders) |
|
178 | h.headers.update(handler.parent.addheaders) | |
178 | return |
|
179 | return | |
179 |
|
180 | |||
180 | h.realhostport = None |
|
181 | h.realhostport = None | |
181 | h.headers = None |
|
182 | h.headers = None | |
182 |
|
183 | |||
183 | def _generic_proxytunnel(self): |
|
184 | def _generic_proxytunnel(self): | |
184 | proxyheaders = dict( |
|
185 | proxyheaders = dict( | |
185 | [(x, self.headers[x]) for x in self.headers |
|
186 | [(x, self.headers[x]) for x in self.headers | |
186 | if x.lower().startswith('proxy-')]) |
|
187 | if x.lower().startswith('proxy-')]) | |
187 | self.send('CONNECT %s HTTP/1.0\r\n' % self.realhostport) |
|
188 | self.send('CONNECT %s HTTP/1.0\r\n' % self.realhostport) | |
188 | for header in proxyheaders.iteritems(): |
|
189 | for header in proxyheaders.iteritems(): | |
189 | self.send('%s: %s\r\n' % header) |
|
190 | self.send('%s: %s\r\n' % header) | |
190 | self.send('\r\n') |
|
191 | self.send('\r\n') | |
191 |
|
192 | |||
192 | # majority of the following code is duplicated from |
|
193 | # majority of the following code is duplicated from | |
193 | # httplib.HTTPConnection as there are no adequate places to |
|
194 | # httplib.HTTPConnection as there are no adequate places to | |
194 | # override functions to provide the needed functionality |
|
195 | # override functions to provide the needed functionality | |
195 | res = self.response_class(self.sock, |
|
196 | res = self.response_class(self.sock, | |
196 | strict=self.strict, |
|
197 | strict=self.strict, | |
197 | method=self._method) |
|
198 | method=self._method) | |
198 |
|
199 | |||
199 | while True: |
|
200 | while True: | |
200 | version, status, reason = res._read_status() |
|
201 | version, status, reason = res._read_status() | |
201 | if status != httplib.CONTINUE: |
|
202 | if status != httplib.CONTINUE: | |
202 | break |
|
203 | break | |
203 | # skip lines that are all whitespace |
|
204 | # skip lines that are all whitespace | |
204 | list(iter(lambda: res.fp.readline().strip(), '')) |
|
205 | list(iter(lambda: res.fp.readline().strip(), '')) | |
205 | res.status = status |
|
206 | res.status = status | |
206 | res.reason = reason.strip() |
|
207 | res.reason = reason.strip() | |
207 |
|
208 | |||
208 | if res.status == 200: |
|
209 | if res.status == 200: | |
209 | # skip lines until we find a blank line |
|
210 | # skip lines until we find a blank line | |
210 | list(iter(res.fp.readline, '\r\n')) |
|
211 | list(iter(res.fp.readline, '\r\n')) | |
211 | return True |
|
212 | return True | |
212 |
|
213 | |||
213 | if version == 'HTTP/1.0': |
|
214 | if version == 'HTTP/1.0': | |
214 | res.version = 10 |
|
215 | res.version = 10 | |
215 | elif version.startswith('HTTP/1.'): |
|
216 | elif version.startswith('HTTP/1.'): | |
216 | res.version = 11 |
|
217 | res.version = 11 | |
217 | elif version == 'HTTP/0.9': |
|
218 | elif version == 'HTTP/0.9': | |
218 | res.version = 9 |
|
219 | res.version = 9 | |
219 | else: |
|
220 | else: | |
220 | raise httplib.UnknownProtocol(version) |
|
221 | raise httplib.UnknownProtocol(version) | |
221 |
|
222 | |||
222 | if res.version == 9: |
|
223 | if res.version == 9: | |
223 | res.length = None |
|
224 | res.length = None | |
224 | res.chunked = 0 |
|
225 | res.chunked = 0 | |
225 | res.will_close = 1 |
|
226 | res.will_close = 1 | |
226 | res.msg = httplib.HTTPMessage(stringio()) |
|
227 | res.msg = httplib.HTTPMessage(stringio()) | |
227 | return False |
|
228 | return False | |
228 |
|
229 | |||
229 | res.msg = httplib.HTTPMessage(res.fp) |
|
230 | res.msg = httplib.HTTPMessage(res.fp) | |
230 | res.msg.fp = None |
|
231 | res.msg.fp = None | |
231 |
|
232 | |||
232 | # are we using the chunked-style of transfer encoding? |
|
233 | # are we using the chunked-style of transfer encoding? | |
233 | trenc = res.msg.getheader('transfer-encoding') |
|
234 | trenc = res.msg.getheader('transfer-encoding') | |
234 | if trenc and trenc.lower() == "chunked": |
|
235 | if trenc and trenc.lower() == "chunked": | |
235 | res.chunked = 1 |
|
236 | res.chunked = 1 | |
236 | res.chunk_left = None |
|
237 | res.chunk_left = None | |
237 | else: |
|
238 | else: | |
238 | res.chunked = 0 |
|
239 | res.chunked = 0 | |
239 |
|
240 | |||
240 | # will the connection close at the end of the response? |
|
241 | # will the connection close at the end of the response? | |
241 | res.will_close = res._check_close() |
|
242 | res.will_close = res._check_close() | |
242 |
|
243 | |||
243 | # do we have a Content-Length? |
|
244 | # do we have a Content-Length? | |
244 | # NOTE: RFC 2616, section 4.4, #3 says we ignore this if |
|
245 | # NOTE: RFC 2616, section 4.4, #3 says we ignore this if | |
245 | # transfer-encoding is "chunked" |
|
246 | # transfer-encoding is "chunked" | |
246 | length = res.msg.getheader('content-length') |
|
247 | length = res.msg.getheader('content-length') | |
247 | if length and not res.chunked: |
|
248 | if length and not res.chunked: | |
248 | try: |
|
249 | try: | |
249 | res.length = int(length) |
|
250 | res.length = int(length) | |
250 | except ValueError: |
|
251 | except ValueError: | |
251 | res.length = None |
|
252 | res.length = None | |
252 | else: |
|
253 | else: | |
253 | if res.length < 0: # ignore nonsensical negative lengths |
|
254 | if res.length < 0: # ignore nonsensical negative lengths | |
254 | res.length = None |
|
255 | res.length = None | |
255 | else: |
|
256 | else: | |
256 | res.length = None |
|
257 | res.length = None | |
257 |
|
258 | |||
258 | # does the body have a fixed length? (of zero) |
|
259 | # does the body have a fixed length? (of zero) | |
259 | if (status == httplib.NO_CONTENT or status == httplib.NOT_MODIFIED or |
|
260 | if (status == httplib.NO_CONTENT or status == httplib.NOT_MODIFIED or | |
260 | 100 <= status < 200 or # 1xx codes |
|
261 | 100 <= status < 200 or # 1xx codes | |
261 | res._method == 'HEAD'): |
|
262 | res._method == 'HEAD'): | |
262 | res.length = 0 |
|
263 | res.length = 0 | |
263 |
|
264 | |||
264 | # if the connection remains open, and we aren't using chunked, and |
|
265 | # if the connection remains open, and we aren't using chunked, and | |
265 | # a content-length was not provided, then assume that the connection |
|
266 | # a content-length was not provided, then assume that the connection | |
266 | # WILL close. |
|
267 | # WILL close. | |
267 | if (not res.will_close and |
|
268 | if (not res.will_close and | |
268 | not res.chunked and |
|
269 | not res.chunked and | |
269 | res.length is None): |
|
270 | res.length is None): | |
270 | res.will_close = 1 |
|
271 | res.will_close = 1 | |
271 |
|
272 | |||
272 | self.proxyres = res |
|
273 | self.proxyres = res | |
273 |
|
274 | |||
274 | return False |
|
275 | return False | |
275 |
|
276 | |||
276 | class httphandler(keepalive.HTTPHandler): |
|
277 | class httphandler(keepalive.HTTPHandler): | |
277 | def http_open(self, req): |
|
278 | def http_open(self, req): | |
278 | return self.do_open(httpconnection, req) |
|
279 | return self.do_open(httpconnection, req) | |
279 |
|
280 | |||
280 | def _start_transaction(self, h, req): |
|
281 | def _start_transaction(self, h, req): | |
281 | _generic_start_transaction(self, h, req) |
|
282 | _generic_start_transaction(self, h, req) | |
282 | return keepalive.HTTPHandler._start_transaction(self, h, req) |
|
283 | return keepalive.HTTPHandler._start_transaction(self, h, req) | |
283 |
|
284 | |||
284 | if has_https: |
|
285 | if has_https: | |
285 | class httpsconnection(httplib.HTTPConnection): |
|
286 | class httpsconnection(httplib.HTTPConnection): | |
286 | response_class = keepalive.HTTPResponse |
|
287 | response_class = keepalive.HTTPResponse | |
287 | default_port = httplib.HTTPS_PORT |
|
288 | default_port = httplib.HTTPS_PORT | |
288 | # must be able to send big bundle as stream. |
|
289 | # must be able to send big bundle as stream. | |
289 | send = _gen_sendfile(keepalive.safesend) |
|
290 | send = _gen_sendfile(keepalive.safesend) | |
290 | getresponse = keepalive.wrapgetresponse(httplib.HTTPConnection) |
|
291 | getresponse = keepalive.wrapgetresponse(httplib.HTTPConnection) | |
291 |
|
292 | |||
292 | def __init__(self, host, port=None, key_file=None, cert_file=None, |
|
293 | def __init__(self, host, port=None, key_file=None, cert_file=None, | |
293 | *args, **kwargs): |
|
294 | *args, **kwargs): | |
294 | httplib.HTTPConnection.__init__(self, host, port, *args, **kwargs) |
|
295 | httplib.HTTPConnection.__init__(self, host, port, *args, **kwargs) | |
295 | self.key_file = key_file |
|
296 | self.key_file = key_file | |
296 | self.cert_file = cert_file |
|
297 | self.cert_file = cert_file | |
297 |
|
298 | |||
298 | def connect(self): |
|
299 | def connect(self): | |
299 | self.sock = socket.create_connection((self.host, self.port)) |
|
300 | self.sock = socket.create_connection((self.host, self.port)) | |
300 |
|
301 | |||
301 | host = self.host |
|
302 | host = self.host | |
302 | if self.realhostport: # use CONNECT proxy |
|
303 | if self.realhostport: # use CONNECT proxy | |
303 | _generic_proxytunnel(self) |
|
304 | _generic_proxytunnel(self) | |
304 | host = self.realhostport.rsplit(':', 1)[0] |
|
305 | host = self.realhostport.rsplit(':', 1)[0] | |
305 | self.sock = sslutil.wrapsocket( |
|
306 | self.sock = sslutil.wrapsocket( | |
306 | self.sock, self.key_file, self.cert_file, ui=self.ui, |
|
307 | self.sock, self.key_file, self.cert_file, ui=self.ui, | |
307 | serverhostname=host) |
|
308 | serverhostname=host) | |
308 | sslutil.validatesocket(self.sock) |
|
309 | sslutil.validatesocket(self.sock) | |
309 |
|
310 | |||
310 | class httpshandler(keepalive.KeepAliveHandler, urlreq.httpshandler): |
|
311 | class httpshandler(keepalive.KeepAliveHandler, urlreq.httpshandler): | |
311 | def __init__(self, ui): |
|
312 | def __init__(self, ui): | |
312 | keepalive.KeepAliveHandler.__init__(self) |
|
313 | keepalive.KeepAliveHandler.__init__(self) | |
313 | urlreq.httpshandler.__init__(self) |
|
314 | urlreq.httpshandler.__init__(self) | |
314 | self.ui = ui |
|
315 | self.ui = ui | |
315 | self.pwmgr = passwordmgr(self.ui, |
|
316 | self.pwmgr = passwordmgr(self.ui, | |
316 | self.ui.httppasswordmgrdb) |
|
317 | self.ui.httppasswordmgrdb) | |
317 |
|
318 | |||
318 | def _start_transaction(self, h, req): |
|
319 | def _start_transaction(self, h, req): | |
319 | _generic_start_transaction(self, h, req) |
|
320 | _generic_start_transaction(self, h, req) | |
320 | return keepalive.KeepAliveHandler._start_transaction(self, h, req) |
|
321 | return keepalive.KeepAliveHandler._start_transaction(self, h, req) | |
321 |
|
322 | |||
322 | def https_open(self, req): |
|
323 | def https_open(self, req): | |
323 |
# |
|
324 | # urllibcompat.getfullurl() does not contain credentials | |
324 | # need them to match the certificates. |
|
325 | # and we may need them to match the certificates. | |
325 |
url = req |
|
326 | url = urllibcompat.getfullurl(req) | |
326 | user, password = self.pwmgr.find_stored_password(url) |
|
327 | user, password = self.pwmgr.find_stored_password(url) | |
327 | res = httpconnectionmod.readauthforuri(self.ui, url, user) |
|
328 | res = httpconnectionmod.readauthforuri(self.ui, url, user) | |
328 | if res: |
|
329 | if res: | |
329 | group, auth = res |
|
330 | group, auth = res | |
330 | self.auth = auth |
|
331 | self.auth = auth | |
331 | self.ui.debug("using auth.%s.* for authentication\n" % group) |
|
332 | self.ui.debug("using auth.%s.* for authentication\n" % group) | |
332 | else: |
|
333 | else: | |
333 | self.auth = None |
|
334 | self.auth = None | |
334 | return self.do_open(self._makeconnection, req) |
|
335 | return self.do_open(self._makeconnection, req) | |
335 |
|
336 | |||
336 | def _makeconnection(self, host, port=None, *args, **kwargs): |
|
337 | def _makeconnection(self, host, port=None, *args, **kwargs): | |
337 | keyfile = None |
|
338 | keyfile = None | |
338 | certfile = None |
|
339 | certfile = None | |
339 |
|
340 | |||
340 | if len(args) >= 1: # key_file |
|
341 | if len(args) >= 1: # key_file | |
341 | keyfile = args[0] |
|
342 | keyfile = args[0] | |
342 | if len(args) >= 2: # cert_file |
|
343 | if len(args) >= 2: # cert_file | |
343 | certfile = args[1] |
|
344 | certfile = args[1] | |
344 | args = args[2:] |
|
345 | args = args[2:] | |
345 |
|
346 | |||
346 | # if the user has specified different key/cert files in |
|
347 | # if the user has specified different key/cert files in | |
347 | # hgrc, we prefer these |
|
348 | # hgrc, we prefer these | |
348 | if self.auth and 'key' in self.auth and 'cert' in self.auth: |
|
349 | if self.auth and 'key' in self.auth and 'cert' in self.auth: | |
349 | keyfile = self.auth['key'] |
|
350 | keyfile = self.auth['key'] | |
350 | certfile = self.auth['cert'] |
|
351 | certfile = self.auth['cert'] | |
351 |
|
352 | |||
352 | conn = httpsconnection(host, port, keyfile, certfile, *args, |
|
353 | conn = httpsconnection(host, port, keyfile, certfile, *args, | |
353 | **kwargs) |
|
354 | **kwargs) | |
354 | conn.ui = self.ui |
|
355 | conn.ui = self.ui | |
355 | return conn |
|
356 | return conn | |
356 |
|
357 | |||
357 | class httpdigestauthhandler(urlreq.httpdigestauthhandler): |
|
358 | class httpdigestauthhandler(urlreq.httpdigestauthhandler): | |
358 | def __init__(self, *args, **kwargs): |
|
359 | def __init__(self, *args, **kwargs): | |
359 | urlreq.httpdigestauthhandler.__init__(self, *args, **kwargs) |
|
360 | urlreq.httpdigestauthhandler.__init__(self, *args, **kwargs) | |
360 | self.retried_req = None |
|
361 | self.retried_req = None | |
361 |
|
362 | |||
362 | def reset_retry_count(self): |
|
363 | def reset_retry_count(self): | |
363 | # Python 2.6.5 will call this on 401 or 407 errors and thus loop |
|
364 | # Python 2.6.5 will call this on 401 or 407 errors and thus loop | |
364 | # forever. We disable reset_retry_count completely and reset in |
|
365 | # forever. We disable reset_retry_count completely and reset in | |
365 | # http_error_auth_reqed instead. |
|
366 | # http_error_auth_reqed instead. | |
366 | pass |
|
367 | pass | |
367 |
|
368 | |||
368 | def http_error_auth_reqed(self, auth_header, host, req, headers): |
|
369 | def http_error_auth_reqed(self, auth_header, host, req, headers): | |
369 | # Reset the retry counter once for each request. |
|
370 | # Reset the retry counter once for each request. | |
370 | if req is not self.retried_req: |
|
371 | if req is not self.retried_req: | |
371 | self.retried_req = req |
|
372 | self.retried_req = req | |
372 | self.retried = 0 |
|
373 | self.retried = 0 | |
373 | return urlreq.httpdigestauthhandler.http_error_auth_reqed( |
|
374 | return urlreq.httpdigestauthhandler.http_error_auth_reqed( | |
374 | self, auth_header, host, req, headers) |
|
375 | self, auth_header, host, req, headers) | |
375 |
|
376 | |||
376 | class httpbasicauthhandler(urlreq.httpbasicauthhandler): |
|
377 | class httpbasicauthhandler(urlreq.httpbasicauthhandler): | |
377 | def __init__(self, *args, **kwargs): |
|
378 | def __init__(self, *args, **kwargs): | |
378 | self.auth = None |
|
379 | self.auth = None | |
379 | urlreq.httpbasicauthhandler.__init__(self, *args, **kwargs) |
|
380 | urlreq.httpbasicauthhandler.__init__(self, *args, **kwargs) | |
380 | self.retried_req = None |
|
381 | self.retried_req = None | |
381 |
|
382 | |||
382 | def http_request(self, request): |
|
383 | def http_request(self, request): | |
383 | if self.auth: |
|
384 | if self.auth: | |
384 | request.add_unredirected_header(self.auth_header, self.auth) |
|
385 | request.add_unredirected_header(self.auth_header, self.auth) | |
385 |
|
386 | |||
386 | return request |
|
387 | return request | |
387 |
|
388 | |||
388 | def https_request(self, request): |
|
389 | def https_request(self, request): | |
389 | if self.auth: |
|
390 | if self.auth: | |
390 | request.add_unredirected_header(self.auth_header, self.auth) |
|
391 | request.add_unredirected_header(self.auth_header, self.auth) | |
391 |
|
392 | |||
392 | return request |
|
393 | return request | |
393 |
|
394 | |||
394 | def reset_retry_count(self): |
|
395 | def reset_retry_count(self): | |
395 | # Python 2.6.5 will call this on 401 or 407 errors and thus loop |
|
396 | # Python 2.6.5 will call this on 401 or 407 errors and thus loop | |
396 | # forever. We disable reset_retry_count completely and reset in |
|
397 | # forever. We disable reset_retry_count completely and reset in | |
397 | # http_error_auth_reqed instead. |
|
398 | # http_error_auth_reqed instead. | |
398 | pass |
|
399 | pass | |
399 |
|
400 | |||
400 | def http_error_auth_reqed(self, auth_header, host, req, headers): |
|
401 | def http_error_auth_reqed(self, auth_header, host, req, headers): | |
401 | # Reset the retry counter once for each request. |
|
402 | # Reset the retry counter once for each request. | |
402 | if req is not self.retried_req: |
|
403 | if req is not self.retried_req: | |
403 | self.retried_req = req |
|
404 | self.retried_req = req | |
404 | self.retried = 0 |
|
405 | self.retried = 0 | |
405 | return urlreq.httpbasicauthhandler.http_error_auth_reqed( |
|
406 | return urlreq.httpbasicauthhandler.http_error_auth_reqed( | |
406 | self, auth_header, host, req, headers) |
|
407 | self, auth_header, host, req, headers) | |
407 |
|
408 | |||
408 | def retry_http_basic_auth(self, host, req, realm): |
|
409 | def retry_http_basic_auth(self, host, req, realm): | |
409 |
user, pw = self.passwd.find_user_password( |
|
410 | user, pw = self.passwd.find_user_password( | |
|
411 | realm, urllibcompat.getfullurl(req)) | |||
410 | if pw is not None: |
|
412 | if pw is not None: | |
411 | raw = "%s:%s" % (user, pw) |
|
413 | raw = "%s:%s" % (user, pw) | |
412 | auth = 'Basic %s' % base64.b64encode(raw).strip() |
|
414 | auth = 'Basic %s' % base64.b64encode(raw).strip() | |
413 | if req.get_header(self.auth_header, None) == auth: |
|
415 | if req.get_header(self.auth_header, None) == auth: | |
414 | return None |
|
416 | return None | |
415 | self.auth = auth |
|
417 | self.auth = auth | |
416 | req.add_unredirected_header(self.auth_header, auth) |
|
418 | req.add_unredirected_header(self.auth_header, auth) | |
417 | return self.parent.open(req) |
|
419 | return self.parent.open(req) | |
418 | else: |
|
420 | else: | |
419 | return None |
|
421 | return None | |
420 |
|
422 | |||
421 | class cookiehandler(urlreq.basehandler): |
|
423 | class cookiehandler(urlreq.basehandler): | |
422 | def __init__(self, ui): |
|
424 | def __init__(self, ui): | |
423 | self.cookiejar = None |
|
425 | self.cookiejar = None | |
424 |
|
426 | |||
425 | cookiefile = ui.config('auth', 'cookiefile') |
|
427 | cookiefile = ui.config('auth', 'cookiefile') | |
426 | if not cookiefile: |
|
428 | if not cookiefile: | |
427 | return |
|
429 | return | |
428 |
|
430 | |||
429 | cookiefile = util.expandpath(cookiefile) |
|
431 | cookiefile = util.expandpath(cookiefile) | |
430 | try: |
|
432 | try: | |
431 | cookiejar = util.cookielib.MozillaCookieJar(cookiefile) |
|
433 | cookiejar = util.cookielib.MozillaCookieJar(cookiefile) | |
432 | cookiejar.load() |
|
434 | cookiejar.load() | |
433 | self.cookiejar = cookiejar |
|
435 | self.cookiejar = cookiejar | |
434 | except util.cookielib.LoadError as e: |
|
436 | except util.cookielib.LoadError as e: | |
435 | ui.warn(_('(error loading cookie file %s: %s; continuing without ' |
|
437 | ui.warn(_('(error loading cookie file %s: %s; continuing without ' | |
436 | 'cookies)\n') % (cookiefile, str(e))) |
|
438 | 'cookies)\n') % (cookiefile, str(e))) | |
437 |
|
439 | |||
438 | def http_request(self, request): |
|
440 | def http_request(self, request): | |
439 | if self.cookiejar: |
|
441 | if self.cookiejar: | |
440 | self.cookiejar.add_cookie_header(request) |
|
442 | self.cookiejar.add_cookie_header(request) | |
441 |
|
443 | |||
442 | return request |
|
444 | return request | |
443 |
|
445 | |||
444 | def https_request(self, request): |
|
446 | def https_request(self, request): | |
445 | if self.cookiejar: |
|
447 | if self.cookiejar: | |
446 | self.cookiejar.add_cookie_header(request) |
|
448 | self.cookiejar.add_cookie_header(request) | |
447 |
|
449 | |||
448 | return request |
|
450 | return request | |
449 |
|
451 | |||
450 | handlerfuncs = [] |
|
452 | handlerfuncs = [] | |
451 |
|
453 | |||
452 | def opener(ui, authinfo=None): |
|
454 | def opener(ui, authinfo=None): | |
453 | ''' |
|
455 | ''' | |
454 | construct an opener suitable for urllib2 |
|
456 | construct an opener suitable for urllib2 | |
455 | authinfo will be added to the password manager |
|
457 | authinfo will be added to the password manager | |
456 | ''' |
|
458 | ''' | |
457 | # experimental config: ui.usehttp2 |
|
459 | # experimental config: ui.usehttp2 | |
458 | if ui.configbool('ui', 'usehttp2'): |
|
460 | if ui.configbool('ui', 'usehttp2'): | |
459 | handlers = [ |
|
461 | handlers = [ | |
460 | httpconnectionmod.http2handler( |
|
462 | httpconnectionmod.http2handler( | |
461 | ui, |
|
463 | ui, | |
462 | passwordmgr(ui, ui.httppasswordmgrdb)) |
|
464 | passwordmgr(ui, ui.httppasswordmgrdb)) | |
463 | ] |
|
465 | ] | |
464 | else: |
|
466 | else: | |
465 | handlers = [httphandler()] |
|
467 | handlers = [httphandler()] | |
466 | if has_https: |
|
468 | if has_https: | |
467 | handlers.append(httpshandler(ui)) |
|
469 | handlers.append(httpshandler(ui)) | |
468 |
|
470 | |||
469 | handlers.append(proxyhandler(ui)) |
|
471 | handlers.append(proxyhandler(ui)) | |
470 |
|
472 | |||
471 | passmgr = passwordmgr(ui, ui.httppasswordmgrdb) |
|
473 | passmgr = passwordmgr(ui, ui.httppasswordmgrdb) | |
472 | if authinfo is not None: |
|
474 | if authinfo is not None: | |
473 | realm, uris, user, passwd = authinfo |
|
475 | realm, uris, user, passwd = authinfo | |
474 | saveduser, savedpass = passmgr.find_stored_password(uris[0]) |
|
476 | saveduser, savedpass = passmgr.find_stored_password(uris[0]) | |
475 | if user != saveduser or passwd: |
|
477 | if user != saveduser or passwd: | |
476 | passmgr.add_password(realm, uris, user, passwd) |
|
478 | passmgr.add_password(realm, uris, user, passwd) | |
477 | ui.debug('http auth: user %s, password %s\n' % |
|
479 | ui.debug('http auth: user %s, password %s\n' % | |
478 | (user, passwd and '*' * len(passwd) or 'not set')) |
|
480 | (user, passwd and '*' * len(passwd) or 'not set')) | |
479 |
|
481 | |||
480 | handlers.extend((httpbasicauthhandler(passmgr), |
|
482 | handlers.extend((httpbasicauthhandler(passmgr), | |
481 | httpdigestauthhandler(passmgr))) |
|
483 | httpdigestauthhandler(passmgr))) | |
482 | handlers.extend([h(ui, passmgr) for h in handlerfuncs]) |
|
484 | handlers.extend([h(ui, passmgr) for h in handlerfuncs]) | |
483 | handlers.append(cookiehandler(ui)) |
|
485 | handlers.append(cookiehandler(ui)) | |
484 | opener = urlreq.buildopener(*handlers) |
|
486 | opener = urlreq.buildopener(*handlers) | |
485 |
|
487 | |||
486 | # The user agent should should *NOT* be used by servers for e.g. |
|
488 | # The user agent should should *NOT* be used by servers for e.g. | |
487 | # protocol detection or feature negotiation: there are other |
|
489 | # protocol detection or feature negotiation: there are other | |
488 | # facilities for that. |
|
490 | # facilities for that. | |
489 | # |
|
491 | # | |
490 | # "mercurial/proto-1.0" was the original user agent string and |
|
492 | # "mercurial/proto-1.0" was the original user agent string and | |
491 | # exists for backwards compatibility reasons. |
|
493 | # exists for backwards compatibility reasons. | |
492 | # |
|
494 | # | |
493 | # The "(Mercurial %s)" string contains the distribution |
|
495 | # The "(Mercurial %s)" string contains the distribution | |
494 | # name and version. Other client implementations should choose their |
|
496 | # name and version. Other client implementations should choose their | |
495 | # own distribution name. Since servers should not be using the user |
|
497 | # own distribution name. Since servers should not be using the user | |
496 | # agent string for anything, clients should be able to define whatever |
|
498 | # agent string for anything, clients should be able to define whatever | |
497 | # user agent they deem appropriate. |
|
499 | # user agent they deem appropriate. | |
498 | agent = 'mercurial/proto-1.0 (Mercurial %s)' % util.version() |
|
500 | agent = 'mercurial/proto-1.0 (Mercurial %s)' % util.version() | |
499 | opener.addheaders = [(r'User-agent', pycompat.sysstr(agent))] |
|
501 | opener.addheaders = [(r'User-agent', pycompat.sysstr(agent))] | |
500 |
|
502 | |||
501 | # This header should only be needed by wire protocol requests. But it has |
|
503 | # This header should only be needed by wire protocol requests. But it has | |
502 | # been sent on all requests since forever. We keep sending it for backwards |
|
504 | # been sent on all requests since forever. We keep sending it for backwards | |
503 | # compatibility reasons. Modern versions of the wire protocol use |
|
505 | # compatibility reasons. Modern versions of the wire protocol use | |
504 | # X-HgProto-<N> for advertising client support. |
|
506 | # X-HgProto-<N> for advertising client support. | |
505 | opener.addheaders.append((r'Accept', r'application/mercurial-0.1')) |
|
507 | opener.addheaders.append((r'Accept', r'application/mercurial-0.1')) | |
506 | return opener |
|
508 | return opener | |
507 |
|
509 | |||
508 | def open(ui, url_, data=None): |
|
510 | def open(ui, url_, data=None): | |
509 | u = util.url(url_) |
|
511 | u = util.url(url_) | |
510 | if u.scheme: |
|
512 | if u.scheme: | |
511 | u.scheme = u.scheme.lower() |
|
513 | u.scheme = u.scheme.lower() | |
512 | url_, authinfo = u.authinfo() |
|
514 | url_, authinfo = u.authinfo() | |
513 | else: |
|
515 | else: | |
514 | path = util.normpath(os.path.abspath(url_)) |
|
516 | path = util.normpath(os.path.abspath(url_)) | |
515 | url_ = 'file://' + urlreq.pathname2url(path) |
|
517 | url_ = 'file://' + urlreq.pathname2url(path) | |
516 | authinfo = None |
|
518 | authinfo = None | |
517 | return opener(ui, authinfo).open(url_, data) |
|
519 | return opener(ui, authinfo).open(url_, data) |
General Comments 0
You need to be logged in to leave comments.
Login now