##// END OF EJS Templates
style: always use `x is not None` instead of `not x is None`...
Alex Gaynor -
r34332:53133250 default
parent child Browse files
Show More
@@ -1,716 +1,716
1 # This library is free software; you can redistribute it and/or
1 # This library is free software; you can redistribute it and/or
2 # modify it under the terms of the GNU Lesser General Public
2 # modify it under the terms of the GNU Lesser General Public
3 # License as published by the Free Software Foundation; either
3 # License as published by the Free Software Foundation; either
4 # version 2.1 of the License, or (at your option) any later version.
4 # version 2.1 of the License, or (at your option) any later version.
5 #
5 #
6 # This library is distributed in the hope that it will be useful,
6 # This library is distributed in the hope that it will be useful,
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 # Lesser General Public License for more details.
9 # Lesser General Public License for more details.
10 #
10 #
11 # You should have received a copy of the GNU Lesser General Public
11 # You should have received a copy of the GNU Lesser General Public
12 # License along with this library; if not, see
12 # License along with this library; if not, see
13 # <http://www.gnu.org/licenses/>.
13 # <http://www.gnu.org/licenses/>.
14
14
15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
17
17
18 # Modified by Benoit Boissinot:
18 # Modified by Benoit Boissinot:
19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
20 # Modified by Dirkjan Ochtman:
20 # Modified by Dirkjan Ochtman:
21 # - import md5 function from a local util module
21 # - import md5 function from a local util module
22 # Modified by Augie Fackler:
22 # Modified by Augie Fackler:
23 # - add safesend method and use it to prevent broken pipe errors
23 # - add safesend method and use it to prevent broken pipe errors
24 # on large POST requests
24 # on large POST requests
25
25
26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
27
27
28 >>> import urllib2
28 >>> import urllib2
29 >>> from keepalive import HTTPHandler
29 >>> from keepalive import HTTPHandler
30 >>> keepalive_handler = HTTPHandler()
30 >>> keepalive_handler = HTTPHandler()
31 >>> opener = urlreq.buildopener(keepalive_handler)
31 >>> opener = urlreq.buildopener(keepalive_handler)
32 >>> urlreq.installopener(opener)
32 >>> urlreq.installopener(opener)
33 >>>
33 >>>
34 >>> fo = urlreq.urlopen('http://www.python.org')
34 >>> fo = urlreq.urlopen('http://www.python.org')
35
35
36 If a connection to a given host is requested, and all of the existing
36 If a connection to a given host is requested, and all of the existing
37 connections are still in use, another connection will be opened. If
37 connections are still in use, another connection will be opened. If
38 the handler tries to use an existing connection but it fails in some
38 the handler tries to use an existing connection but it fails in some
39 way, it will be closed and removed from the pool.
39 way, it will be closed and removed from the pool.
40
40
41 To remove the handler, simply re-run build_opener with no arguments, and
41 To remove the handler, simply re-run build_opener with no arguments, and
42 install that opener.
42 install that opener.
43
43
44 You can explicitly close connections by using the close_connection()
44 You can explicitly close connections by using the close_connection()
45 method of the returned file-like object (described below) or you can
45 method of the returned file-like object (described below) or you can
46 use the handler methods:
46 use the handler methods:
47
47
48 close_connection(host)
48 close_connection(host)
49 close_all()
49 close_all()
50 open_connections()
50 open_connections()
51
51
52 NOTE: using the close_connection and close_all methods of the handler
52 NOTE: using the close_connection and close_all methods of the handler
53 should be done with care when using multiple threads.
53 should be done with care when using multiple threads.
54 * there is nothing that prevents another thread from creating new
54 * there is nothing that prevents another thread from creating new
55 connections immediately after connections are closed
55 connections immediately after connections are closed
56 * no checks are done to prevent in-use connections from being closed
56 * no checks are done to prevent in-use connections from being closed
57
57
58 >>> keepalive_handler.close_all()
58 >>> keepalive_handler.close_all()
59
59
60 EXTRA ATTRIBUTES AND METHODS
60 EXTRA ATTRIBUTES AND METHODS
61
61
62 Upon a status of 200, the object returned has a few additional
62 Upon a status of 200, the object returned has a few additional
63 attributes and methods, which should not be used if you want to
63 attributes and methods, which should not be used if you want to
64 remain consistent with the normal urllib2-returned objects:
64 remain consistent with the normal urllib2-returned objects:
65
65
66 close_connection() - close the connection to the host
66 close_connection() - close the connection to the host
67 readlines() - you know, readlines()
67 readlines() - you know, readlines()
68 status - the return status (i.e. 404)
68 status - the return status (i.e. 404)
69 reason - english translation of status (i.e. 'File not found')
69 reason - english translation of status (i.e. 'File not found')
70
70
71 If you want the best of both worlds, use this inside an
71 If you want the best of both worlds, use this inside an
72 AttributeError-catching try:
72 AttributeError-catching try:
73
73
74 >>> try: status = fo.status
74 >>> try: status = fo.status
75 >>> except AttributeError: status = None
75 >>> except AttributeError: status = None
76
76
77 Unfortunately, these are ONLY there if status == 200, so it's not
77 Unfortunately, these are ONLY there if status == 200, so it's not
78 easy to distinguish between non-200 responses. The reason is that
78 easy to distinguish between non-200 responses. The reason is that
79 urllib2 tries to do clever things with error codes 301, 302, 401,
79 urllib2 tries to do clever things with error codes 301, 302, 401,
80 and 407, and it wraps the object upon return.
80 and 407, and it wraps the object upon return.
81 """
81 """
82
82
83 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
83 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
84
84
85 from __future__ import absolute_import, print_function
85 from __future__ import absolute_import, print_function
86
86
87 import errno
87 import errno
88 import hashlib
88 import hashlib
89 import socket
89 import socket
90 import sys
90 import sys
91 import threading
91 import threading
92
92
93 from .i18n import _
93 from .i18n import _
94 from . import (
94 from . import (
95 util,
95 util,
96 )
96 )
97
97
98 httplib = util.httplib
98 httplib = util.httplib
99 urlerr = util.urlerr
99 urlerr = util.urlerr
100 urlreq = util.urlreq
100 urlreq = util.urlreq
101
101
102 DEBUG = None
102 DEBUG = None
103
103
104 class ConnectionManager(object):
104 class ConnectionManager(object):
105 """
105 """
106 The connection manager must be able to:
106 The connection manager must be able to:
107 * keep track of all existing
107 * keep track of all existing
108 """
108 """
109 def __init__(self):
109 def __init__(self):
110 self._lock = threading.Lock()
110 self._lock = threading.Lock()
111 self._hostmap = {} # map hosts to a list of connections
111 self._hostmap = {} # map hosts to a list of connections
112 self._connmap = {} # map connections to host
112 self._connmap = {} # map connections to host
113 self._readymap = {} # map connection to ready state
113 self._readymap = {} # map connection to ready state
114
114
115 def add(self, host, connection, ready):
115 def add(self, host, connection, ready):
116 self._lock.acquire()
116 self._lock.acquire()
117 try:
117 try:
118 if host not in self._hostmap:
118 if host not in self._hostmap:
119 self._hostmap[host] = []
119 self._hostmap[host] = []
120 self._hostmap[host].append(connection)
120 self._hostmap[host].append(connection)
121 self._connmap[connection] = host
121 self._connmap[connection] = host
122 self._readymap[connection] = ready
122 self._readymap[connection] = ready
123 finally:
123 finally:
124 self._lock.release()
124 self._lock.release()
125
125
126 def remove(self, connection):
126 def remove(self, connection):
127 self._lock.acquire()
127 self._lock.acquire()
128 try:
128 try:
129 try:
129 try:
130 host = self._connmap[connection]
130 host = self._connmap[connection]
131 except KeyError:
131 except KeyError:
132 pass
132 pass
133 else:
133 else:
134 del self._connmap[connection]
134 del self._connmap[connection]
135 del self._readymap[connection]
135 del self._readymap[connection]
136 self._hostmap[host].remove(connection)
136 self._hostmap[host].remove(connection)
137 if not self._hostmap[host]: del self._hostmap[host]
137 if not self._hostmap[host]: del self._hostmap[host]
138 finally:
138 finally:
139 self._lock.release()
139 self._lock.release()
140
140
141 def set_ready(self, connection, ready):
141 def set_ready(self, connection, ready):
142 try:
142 try:
143 self._readymap[connection] = ready
143 self._readymap[connection] = ready
144 except KeyError:
144 except KeyError:
145 pass
145 pass
146
146
147 def get_ready_conn(self, host):
147 def get_ready_conn(self, host):
148 conn = None
148 conn = None
149 self._lock.acquire()
149 self._lock.acquire()
150 try:
150 try:
151 if host in self._hostmap:
151 if host in self._hostmap:
152 for c in self._hostmap[host]:
152 for c in self._hostmap[host]:
153 if self._readymap[c]:
153 if self._readymap[c]:
154 self._readymap[c] = 0
154 self._readymap[c] = 0
155 conn = c
155 conn = c
156 break
156 break
157 finally:
157 finally:
158 self._lock.release()
158 self._lock.release()
159 return conn
159 return conn
160
160
161 def get_all(self, host=None):
161 def get_all(self, host=None):
162 if host:
162 if host:
163 return list(self._hostmap.get(host, []))
163 return list(self._hostmap.get(host, []))
164 else:
164 else:
165 return dict(self._hostmap)
165 return dict(self._hostmap)
166
166
167 class KeepAliveHandler(object):
167 class KeepAliveHandler(object):
168 def __init__(self):
168 def __init__(self):
169 self._cm = ConnectionManager()
169 self._cm = ConnectionManager()
170
170
171 #### Connection Management
171 #### Connection Management
172 def open_connections(self):
172 def open_connections(self):
173 """return a list of connected hosts and the number of connections
173 """return a list of connected hosts and the number of connections
174 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
174 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
175 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
175 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
176
176
177 def close_connection(self, host):
177 def close_connection(self, host):
178 """close connection(s) to <host>
178 """close connection(s) to <host>
179 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
179 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
180 no error occurs if there is no connection to that host."""
180 no error occurs if there is no connection to that host."""
181 for h in self._cm.get_all(host):
181 for h in self._cm.get_all(host):
182 self._cm.remove(h)
182 self._cm.remove(h)
183 h.close()
183 h.close()
184
184
185 def close_all(self):
185 def close_all(self):
186 """close all open connections"""
186 """close all open connections"""
187 for host, conns in self._cm.get_all().iteritems():
187 for host, conns in self._cm.get_all().iteritems():
188 for h in conns:
188 for h in conns:
189 self._cm.remove(h)
189 self._cm.remove(h)
190 h.close()
190 h.close()
191
191
192 def _request_closed(self, request, host, connection):
192 def _request_closed(self, request, host, connection):
193 """tells us that this request is now closed and that the
193 """tells us that this request is now closed and that the
194 connection is ready for another request"""
194 connection is ready for another request"""
195 self._cm.set_ready(connection, 1)
195 self._cm.set_ready(connection, 1)
196
196
197 def _remove_connection(self, host, connection, close=0):
197 def _remove_connection(self, host, connection, close=0):
198 if close:
198 if close:
199 connection.close()
199 connection.close()
200 self._cm.remove(connection)
200 self._cm.remove(connection)
201
201
202 #### Transaction Execution
202 #### Transaction Execution
203 def http_open(self, req):
203 def http_open(self, req):
204 return self.do_open(HTTPConnection, req)
204 return self.do_open(HTTPConnection, req)
205
205
206 def do_open(self, http_class, req):
206 def do_open(self, http_class, req):
207 host = req.get_host()
207 host = req.get_host()
208 if not host:
208 if not host:
209 raise urlerr.urlerror('no host given')
209 raise urlerr.urlerror('no host given')
210
210
211 try:
211 try:
212 h = self._cm.get_ready_conn(host)
212 h = self._cm.get_ready_conn(host)
213 while h:
213 while h:
214 r = self._reuse_connection(h, req, host)
214 r = self._reuse_connection(h, req, host)
215
215
216 # if this response is non-None, then it worked and we're
216 # if this response is non-None, then it worked and we're
217 # done. Break out, skipping the else block.
217 # done. Break out, skipping the else block.
218 if r:
218 if r:
219 break
219 break
220
220
221 # connection is bad - possibly closed by server
221 # connection is bad - possibly closed by server
222 # discard it and ask for the next free connection
222 # discard it and ask for the next free connection
223 h.close()
223 h.close()
224 self._cm.remove(h)
224 self._cm.remove(h)
225 h = self._cm.get_ready_conn(host)
225 h = self._cm.get_ready_conn(host)
226 else:
226 else:
227 # no (working) free connections were found. Create a new one.
227 # no (working) free connections were found. Create a new one.
228 h = http_class(host)
228 h = http_class(host)
229 if DEBUG:
229 if DEBUG:
230 DEBUG.info("creating new connection to %s (%d)",
230 DEBUG.info("creating new connection to %s (%d)",
231 host, id(h))
231 host, id(h))
232 self._cm.add(host, h, 0)
232 self._cm.add(host, h, 0)
233 self._start_transaction(h, req)
233 self._start_transaction(h, req)
234 r = h.getresponse()
234 r = h.getresponse()
235 # The string form of BadStatusLine is the status line. Add some context
235 # The string form of BadStatusLine is the status line. Add some context
236 # to make the error message slightly more useful.
236 # to make the error message slightly more useful.
237 except httplib.BadStatusLine as err:
237 except httplib.BadStatusLine as err:
238 raise urlerr.urlerror(_('bad HTTP status line: %s') % err.line)
238 raise urlerr.urlerror(_('bad HTTP status line: %s') % err.line)
239 except (socket.error, httplib.HTTPException) as err:
239 except (socket.error, httplib.HTTPException) as err:
240 raise urlerr.urlerror(err)
240 raise urlerr.urlerror(err)
241
241
242 # if not a persistent connection, don't try to reuse it
242 # if not a persistent connection, don't try to reuse it
243 if r.will_close:
243 if r.will_close:
244 self._cm.remove(h)
244 self._cm.remove(h)
245
245
246 if DEBUG:
246 if DEBUG:
247 DEBUG.info("STATUS: %s, %s", r.status, r.reason)
247 DEBUG.info("STATUS: %s, %s", r.status, r.reason)
248 r._handler = self
248 r._handler = self
249 r._host = host
249 r._host = host
250 r._url = req.get_full_url()
250 r._url = req.get_full_url()
251 r._connection = h
251 r._connection = h
252 r.code = r.status
252 r.code = r.status
253 r.headers = r.msg
253 r.headers = r.msg
254 r.msg = r.reason
254 r.msg = r.reason
255
255
256 return r
256 return r
257
257
258 def _reuse_connection(self, h, req, host):
258 def _reuse_connection(self, h, req, host):
259 """start the transaction with a re-used connection
259 """start the transaction with a re-used connection
260 return a response object (r) upon success or None on failure.
260 return a response object (r) upon success or None on failure.
261 This DOES not close or remove bad connections in cases where
261 This DOES not close or remove bad connections in cases where
262 it returns. However, if an unexpected exception occurs, it
262 it returns. However, if an unexpected exception occurs, it
263 will close and remove the connection before re-raising.
263 will close and remove the connection before re-raising.
264 """
264 """
265 try:
265 try:
266 self._start_transaction(h, req)
266 self._start_transaction(h, req)
267 r = h.getresponse()
267 r = h.getresponse()
268 # note: just because we got something back doesn't mean it
268 # note: just because we got something back doesn't mean it
269 # worked. We'll check the version below, too.
269 # worked. We'll check the version below, too.
270 except (socket.error, httplib.HTTPException):
270 except (socket.error, httplib.HTTPException):
271 r = None
271 r = None
272 except: # re-raises
272 except: # re-raises
273 # adding this block just in case we've missed
273 # adding this block just in case we've missed
274 # something we will still raise the exception, but
274 # something we will still raise the exception, but
275 # lets try and close the connection and remove it
275 # lets try and close the connection and remove it
276 # first. We previously got into a nasty loop
276 # first. We previously got into a nasty loop
277 # where an exception was uncaught, and so the
277 # where an exception was uncaught, and so the
278 # connection stayed open. On the next try, the
278 # connection stayed open. On the next try, the
279 # same exception was raised, etc. The trade-off is
279 # same exception was raised, etc. The trade-off is
280 # that it's now possible this call will raise
280 # that it's now possible this call will raise
281 # a DIFFERENT exception
281 # a DIFFERENT exception
282 if DEBUG:
282 if DEBUG:
283 DEBUG.error("unexpected exception - closing "
283 DEBUG.error("unexpected exception - closing "
284 "connection to %s (%d)", host, id(h))
284 "connection to %s (%d)", host, id(h))
285 self._cm.remove(h)
285 self._cm.remove(h)
286 h.close()
286 h.close()
287 raise
287 raise
288
288
289 if r is None or r.version == 9:
289 if r is None or r.version == 9:
290 # httplib falls back to assuming HTTP 0.9 if it gets a
290 # httplib falls back to assuming HTTP 0.9 if it gets a
291 # bad header back. This is most likely to happen if
291 # bad header back. This is most likely to happen if
292 # the socket has been closed by the server since we
292 # the socket has been closed by the server since we
293 # last used the connection.
293 # last used the connection.
294 if DEBUG:
294 if DEBUG:
295 DEBUG.info("failed to re-use connection to %s (%d)",
295 DEBUG.info("failed to re-use connection to %s (%d)",
296 host, id(h))
296 host, id(h))
297 r = None
297 r = None
298 else:
298 else:
299 if DEBUG:
299 if DEBUG:
300 DEBUG.info("re-using connection to %s (%d)", host, id(h))
300 DEBUG.info("re-using connection to %s (%d)", host, id(h))
301
301
302 return r
302 return r
303
303
304 def _start_transaction(self, h, req):
304 def _start_transaction(self, h, req):
305 # What follows mostly reimplements HTTPConnection.request()
305 # What follows mostly reimplements HTTPConnection.request()
306 # except it adds self.parent.addheaders in the mix and sends headers
306 # except it adds self.parent.addheaders in the mix and sends headers
307 # in a deterministic order (to make testing easier).
307 # in a deterministic order (to make testing easier).
308 headers = util.sortdict(self.parent.addheaders)
308 headers = util.sortdict(self.parent.addheaders)
309 headers.update(sorted(req.headers.items()))
309 headers.update(sorted(req.headers.items()))
310 headers.update(sorted(req.unredirected_hdrs.items()))
310 headers.update(sorted(req.unredirected_hdrs.items()))
311 headers = util.sortdict((n.lower(), v) for n, v in headers.items())
311 headers = util.sortdict((n.lower(), v) for n, v in headers.items())
312 skipheaders = {}
312 skipheaders = {}
313 for n in ('host', 'accept-encoding'):
313 for n in ('host', 'accept-encoding'):
314 if n in headers:
314 if n in headers:
315 skipheaders['skip_' + n.replace('-', '_')] = 1
315 skipheaders['skip_' + n.replace('-', '_')] = 1
316 try:
316 try:
317 if req.has_data():
317 if req.has_data():
318 data = req.get_data()
318 data = req.get_data()
319 h.putrequest(
319 h.putrequest(
320 req.get_method(), req.get_selector(), **skipheaders)
320 req.get_method(), req.get_selector(), **skipheaders)
321 if 'content-type' not in headers:
321 if 'content-type' not in headers:
322 h.putheader('Content-type',
322 h.putheader('Content-type',
323 'application/x-www-form-urlencoded')
323 'application/x-www-form-urlencoded')
324 if 'content-length' not in headers:
324 if 'content-length' not in headers:
325 h.putheader('Content-length', '%d' % len(data))
325 h.putheader('Content-length', '%d' % len(data))
326 else:
326 else:
327 h.putrequest(
327 h.putrequest(
328 req.get_method(), req.get_selector(), **skipheaders)
328 req.get_method(), req.get_selector(), **skipheaders)
329 except socket.error as err:
329 except socket.error as err:
330 raise urlerr.urlerror(err)
330 raise urlerr.urlerror(err)
331 for k, v in headers.items():
331 for k, v in headers.items():
332 h.putheader(k, v)
332 h.putheader(k, v)
333 h.endheaders()
333 h.endheaders()
334 if req.has_data():
334 if req.has_data():
335 h.send(data)
335 h.send(data)
336
336
337 class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
337 class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
338 pass
338 pass
339
339
340 class HTTPResponse(httplib.HTTPResponse):
340 class HTTPResponse(httplib.HTTPResponse):
341 # we need to subclass HTTPResponse in order to
341 # we need to subclass HTTPResponse in order to
342 # 1) add readline() and readlines() methods
342 # 1) add readline() and readlines() methods
343 # 2) add close_connection() methods
343 # 2) add close_connection() methods
344 # 3) add info() and geturl() methods
344 # 3) add info() and geturl() methods
345
345
346 # in order to add readline(), read must be modified to deal with a
346 # in order to add readline(), read must be modified to deal with a
347 # buffer. example: readline must read a buffer and then spit back
347 # buffer. example: readline must read a buffer and then spit back
348 # one line at a time. The only real alternative is to read one
348 # one line at a time. The only real alternative is to read one
349 # BYTE at a time (ick). Once something has been read, it can't be
349 # BYTE at a time (ick). Once something has been read, it can't be
350 # put back (ok, maybe it can, but that's even uglier than this),
350 # put back (ok, maybe it can, but that's even uglier than this),
351 # so if you THEN do a normal read, you must first take stuff from
351 # so if you THEN do a normal read, you must first take stuff from
352 # the buffer.
352 # the buffer.
353
353
354 # the read method wraps the original to accommodate buffering,
354 # the read method wraps the original to accommodate buffering,
355 # although read() never adds to the buffer.
355 # although read() never adds to the buffer.
356 # Both readline and readlines have been stolen with almost no
356 # Both readline and readlines have been stolen with almost no
357 # modification from socket.py
357 # modification from socket.py
358
358
359
359
360 def __init__(self, sock, debuglevel=0, strict=0, method=None):
360 def __init__(self, sock, debuglevel=0, strict=0, method=None):
361 httplib.HTTPResponse.__init__(self, sock, debuglevel=debuglevel,
361 httplib.HTTPResponse.__init__(self, sock, debuglevel=debuglevel,
362 strict=True, method=method,
362 strict=True, method=method,
363 buffering=True)
363 buffering=True)
364 self.fileno = sock.fileno
364 self.fileno = sock.fileno
365 self.code = None
365 self.code = None
366 self._rbuf = ''
366 self._rbuf = ''
367 self._rbufsize = 8096
367 self._rbufsize = 8096
368 self._handler = None # inserted by the handler later
368 self._handler = None # inserted by the handler later
369 self._host = None # (same)
369 self._host = None # (same)
370 self._url = None # (same)
370 self._url = None # (same)
371 self._connection = None # (same)
371 self._connection = None # (same)
372
372
373 _raw_read = httplib.HTTPResponse.read
373 _raw_read = httplib.HTTPResponse.read
374
374
375 def close(self):
375 def close(self):
376 if self.fp:
376 if self.fp:
377 self.fp.close()
377 self.fp.close()
378 self.fp = None
378 self.fp = None
379 if self._handler:
379 if self._handler:
380 self._handler._request_closed(self, self._host,
380 self._handler._request_closed(self, self._host,
381 self._connection)
381 self._connection)
382
382
383 def close_connection(self):
383 def close_connection(self):
384 self._handler._remove_connection(self._host, self._connection, close=1)
384 self._handler._remove_connection(self._host, self._connection, close=1)
385 self.close()
385 self.close()
386
386
387 def info(self):
387 def info(self):
388 return self.headers
388 return self.headers
389
389
390 def geturl(self):
390 def geturl(self):
391 return self._url
391 return self._url
392
392
393 def read(self, amt=None):
393 def read(self, amt=None):
394 # the _rbuf test is only in this first if for speed. It's not
394 # the _rbuf test is only in this first if for speed. It's not
395 # logically necessary
395 # logically necessary
396 if self._rbuf and not amt is None:
396 if self._rbuf and amt is not None:
397 L = len(self._rbuf)
397 L = len(self._rbuf)
398 if amt > L:
398 if amt > L:
399 amt -= L
399 amt -= L
400 else:
400 else:
401 s = self._rbuf[:amt]
401 s = self._rbuf[:amt]
402 self._rbuf = self._rbuf[amt:]
402 self._rbuf = self._rbuf[amt:]
403 return s
403 return s
404
404
405 s = self._rbuf + self._raw_read(amt)
405 s = self._rbuf + self._raw_read(amt)
406 self._rbuf = ''
406 self._rbuf = ''
407 return s
407 return s
408
408
409 # stolen from Python SVN #68532 to fix issue1088
409 # stolen from Python SVN #68532 to fix issue1088
410 def _read_chunked(self, amt):
410 def _read_chunked(self, amt):
411 chunk_left = self.chunk_left
411 chunk_left = self.chunk_left
412 parts = []
412 parts = []
413
413
414 while True:
414 while True:
415 if chunk_left is None:
415 if chunk_left is None:
416 line = self.fp.readline()
416 line = self.fp.readline()
417 i = line.find(';')
417 i = line.find(';')
418 if i >= 0:
418 if i >= 0:
419 line = line[:i] # strip chunk-extensions
419 line = line[:i] # strip chunk-extensions
420 try:
420 try:
421 chunk_left = int(line, 16)
421 chunk_left = int(line, 16)
422 except ValueError:
422 except ValueError:
423 # close the connection as protocol synchronization is
423 # close the connection as protocol synchronization is
424 # probably lost
424 # probably lost
425 self.close()
425 self.close()
426 raise httplib.IncompleteRead(''.join(parts))
426 raise httplib.IncompleteRead(''.join(parts))
427 if chunk_left == 0:
427 if chunk_left == 0:
428 break
428 break
429 if amt is None:
429 if amt is None:
430 parts.append(self._safe_read(chunk_left))
430 parts.append(self._safe_read(chunk_left))
431 elif amt < chunk_left:
431 elif amt < chunk_left:
432 parts.append(self._safe_read(amt))
432 parts.append(self._safe_read(amt))
433 self.chunk_left = chunk_left - amt
433 self.chunk_left = chunk_left - amt
434 return ''.join(parts)
434 return ''.join(parts)
435 elif amt == chunk_left:
435 elif amt == chunk_left:
436 parts.append(self._safe_read(amt))
436 parts.append(self._safe_read(amt))
437 self._safe_read(2) # toss the CRLF at the end of the chunk
437 self._safe_read(2) # toss the CRLF at the end of the chunk
438 self.chunk_left = None
438 self.chunk_left = None
439 return ''.join(parts)
439 return ''.join(parts)
440 else:
440 else:
441 parts.append(self._safe_read(chunk_left))
441 parts.append(self._safe_read(chunk_left))
442 amt -= chunk_left
442 amt -= chunk_left
443
443
444 # we read the whole chunk, get another
444 # we read the whole chunk, get another
445 self._safe_read(2) # toss the CRLF at the end of the chunk
445 self._safe_read(2) # toss the CRLF at the end of the chunk
446 chunk_left = None
446 chunk_left = None
447
447
448 # read and discard trailer up to the CRLF terminator
448 # read and discard trailer up to the CRLF terminator
449 ### note: we shouldn't have any trailers!
449 ### note: we shouldn't have any trailers!
450 while True:
450 while True:
451 line = self.fp.readline()
451 line = self.fp.readline()
452 if not line:
452 if not line:
453 # a vanishingly small number of sites EOF without
453 # a vanishingly small number of sites EOF without
454 # sending the trailer
454 # sending the trailer
455 break
455 break
456 if line == '\r\n':
456 if line == '\r\n':
457 break
457 break
458
458
459 # we read everything; close the "file"
459 # we read everything; close the "file"
460 self.close()
460 self.close()
461
461
462 return ''.join(parts)
462 return ''.join(parts)
463
463
464 def readline(self):
464 def readline(self):
465 # Fast path for a line is already available in read buffer.
465 # Fast path for a line is already available in read buffer.
466 i = self._rbuf.find('\n')
466 i = self._rbuf.find('\n')
467 if i >= 0:
467 if i >= 0:
468 i += 1
468 i += 1
469 line = self._rbuf[:i]
469 line = self._rbuf[:i]
470 self._rbuf = self._rbuf[i:]
470 self._rbuf = self._rbuf[i:]
471 return line
471 return line
472
472
473 # No newline in local buffer. Read until we find one.
473 # No newline in local buffer. Read until we find one.
474 chunks = [self._rbuf]
474 chunks = [self._rbuf]
475 i = -1
475 i = -1
476 readsize = self._rbufsize
476 readsize = self._rbufsize
477 while True:
477 while True:
478 new = self._raw_read(readsize)
478 new = self._raw_read(readsize)
479 if not new:
479 if not new:
480 break
480 break
481
481
482 chunks.append(new)
482 chunks.append(new)
483 i = new.find('\n')
483 i = new.find('\n')
484 if i >= 0:
484 if i >= 0:
485 break
485 break
486
486
487 # We either have exhausted the stream or have a newline in chunks[-1].
487 # We either have exhausted the stream or have a newline in chunks[-1].
488
488
489 # EOF
489 # EOF
490 if i == -1:
490 if i == -1:
491 self._rbuf = ''
491 self._rbuf = ''
492 return ''.join(chunks)
492 return ''.join(chunks)
493
493
494 i += 1
494 i += 1
495 self._rbuf = chunks[-1][i:]
495 self._rbuf = chunks[-1][i:]
496 chunks[-1] = chunks[-1][:i]
496 chunks[-1] = chunks[-1][:i]
497 return ''.join(chunks)
497 return ''.join(chunks)
498
498
499 def readlines(self, sizehint=0):
499 def readlines(self, sizehint=0):
500 total = 0
500 total = 0
501 list = []
501 list = []
502 while True:
502 while True:
503 line = self.readline()
503 line = self.readline()
504 if not line:
504 if not line:
505 break
505 break
506 list.append(line)
506 list.append(line)
507 total += len(line)
507 total += len(line)
508 if sizehint and total >= sizehint:
508 if sizehint and total >= sizehint:
509 break
509 break
510 return list
510 return list
511
511
512 def safesend(self, str):
512 def safesend(self, str):
513 """Send `str' to the server.
513 """Send `str' to the server.
514
514
515 Shamelessly ripped off from httplib to patch a bad behavior.
515 Shamelessly ripped off from httplib to patch a bad behavior.
516 """
516 """
517 # _broken_pipe_resp is an attribute we set in this function
517 # _broken_pipe_resp is an attribute we set in this function
518 # if the socket is closed while we're sending data but
518 # if the socket is closed while we're sending data but
519 # the server sent us a response before hanging up.
519 # the server sent us a response before hanging up.
520 # In that case, we want to pretend to send the rest of the
520 # In that case, we want to pretend to send the rest of the
521 # outgoing data, and then let the user use getresponse()
521 # outgoing data, and then let the user use getresponse()
522 # (which we wrap) to get this last response before
522 # (which we wrap) to get this last response before
523 # opening a new socket.
523 # opening a new socket.
524 if getattr(self, '_broken_pipe_resp', None) is not None:
524 if getattr(self, '_broken_pipe_resp', None) is not None:
525 return
525 return
526
526
527 if self.sock is None:
527 if self.sock is None:
528 if self.auto_open:
528 if self.auto_open:
529 self.connect()
529 self.connect()
530 else:
530 else:
531 raise httplib.NotConnected
531 raise httplib.NotConnected
532
532
533 # send the data to the server. if we get a broken pipe, then close
533 # send the data to the server. if we get a broken pipe, then close
534 # the socket. we want to reconnect when somebody tries to send again.
534 # the socket. we want to reconnect when somebody tries to send again.
535 #
535 #
536 # NOTE: we DO propagate the error, though, because we cannot simply
536 # NOTE: we DO propagate the error, though, because we cannot simply
537 # ignore the error... the caller will know if they can retry.
537 # ignore the error... the caller will know if they can retry.
538 if self.debuglevel > 0:
538 if self.debuglevel > 0:
539 print("send:", repr(str))
539 print("send:", repr(str))
540 try:
540 try:
541 blocksize = 8192
541 blocksize = 8192
542 read = getattr(str, 'read', None)
542 read = getattr(str, 'read', None)
543 if read is not None:
543 if read is not None:
544 if self.debuglevel > 0:
544 if self.debuglevel > 0:
545 print("sending a read()able")
545 print("sending a read()able")
546 data = read(blocksize)
546 data = read(blocksize)
547 while data:
547 while data:
548 self.sock.sendall(data)
548 self.sock.sendall(data)
549 data = read(blocksize)
549 data = read(blocksize)
550 else:
550 else:
551 self.sock.sendall(str)
551 self.sock.sendall(str)
552 except socket.error as v:
552 except socket.error as v:
553 reraise = True
553 reraise = True
554 if v[0] == errno.EPIPE: # Broken pipe
554 if v[0] == errno.EPIPE: # Broken pipe
555 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
555 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
556 self._broken_pipe_resp = None
556 self._broken_pipe_resp = None
557 self._broken_pipe_resp = self.getresponse()
557 self._broken_pipe_resp = self.getresponse()
558 reraise = False
558 reraise = False
559 self.close()
559 self.close()
560 if reraise:
560 if reraise:
561 raise
561 raise
562
562
563 def wrapgetresponse(cls):
563 def wrapgetresponse(cls):
564 """Wraps getresponse in cls with a broken-pipe sane version.
564 """Wraps getresponse in cls with a broken-pipe sane version.
565 """
565 """
566 def safegetresponse(self):
566 def safegetresponse(self):
567 # In safesend() we might set the _broken_pipe_resp
567 # In safesend() we might set the _broken_pipe_resp
568 # attribute, in which case the socket has already
568 # attribute, in which case the socket has already
569 # been closed and we just need to give them the response
569 # been closed and we just need to give them the response
570 # back. Otherwise, we use the normal response path.
570 # back. Otherwise, we use the normal response path.
571 r = getattr(self, '_broken_pipe_resp', None)
571 r = getattr(self, '_broken_pipe_resp', None)
572 if r is not None:
572 if r is not None:
573 return r
573 return r
574 return cls.getresponse(self)
574 return cls.getresponse(self)
575 safegetresponse.__doc__ = cls.getresponse.__doc__
575 safegetresponse.__doc__ = cls.getresponse.__doc__
576 return safegetresponse
576 return safegetresponse
577
577
578 class HTTPConnection(httplib.HTTPConnection):
578 class HTTPConnection(httplib.HTTPConnection):
579 # use the modified response class
579 # use the modified response class
580 response_class = HTTPResponse
580 response_class = HTTPResponse
581 send = safesend
581 send = safesend
582 getresponse = wrapgetresponse(httplib.HTTPConnection)
582 getresponse = wrapgetresponse(httplib.HTTPConnection)
583
583
584
584
585 #########################################################################
585 #########################################################################
586 ##### TEST FUNCTIONS
586 ##### TEST FUNCTIONS
587 #########################################################################
587 #########################################################################
588
588
589
589
590 def continuity(url):
590 def continuity(url):
591 md5 = hashlib.md5
591 md5 = hashlib.md5
592 format = '%25s: %s'
592 format = '%25s: %s'
593
593
594 # first fetch the file with the normal http handler
594 # first fetch the file with the normal http handler
595 opener = urlreq.buildopener()
595 opener = urlreq.buildopener()
596 urlreq.installopener(opener)
596 urlreq.installopener(opener)
597 fo = urlreq.urlopen(url)
597 fo = urlreq.urlopen(url)
598 foo = fo.read()
598 foo = fo.read()
599 fo.close()
599 fo.close()
600 m = md5(foo)
600 m = md5(foo)
601 print(format % ('normal urllib', m.hexdigest()))
601 print(format % ('normal urllib', m.hexdigest()))
602
602
603 # now install the keepalive handler and try again
603 # now install the keepalive handler and try again
604 opener = urlreq.buildopener(HTTPHandler())
604 opener = urlreq.buildopener(HTTPHandler())
605 urlreq.installopener(opener)
605 urlreq.installopener(opener)
606
606
607 fo = urlreq.urlopen(url)
607 fo = urlreq.urlopen(url)
608 foo = fo.read()
608 foo = fo.read()
609 fo.close()
609 fo.close()
610 m = md5(foo)
610 m = md5(foo)
611 print(format % ('keepalive read', m.hexdigest()))
611 print(format % ('keepalive read', m.hexdigest()))
612
612
613 fo = urlreq.urlopen(url)
613 fo = urlreq.urlopen(url)
614 foo = ''
614 foo = ''
615 while True:
615 while True:
616 f = fo.readline()
616 f = fo.readline()
617 if f:
617 if f:
618 foo = foo + f
618 foo = foo + f
619 else: break
619 else: break
620 fo.close()
620 fo.close()
621 m = md5(foo)
621 m = md5(foo)
622 print(format % ('keepalive readline', m.hexdigest()))
622 print(format % ('keepalive readline', m.hexdigest()))
623
623
624 def comp(N, url):
624 def comp(N, url):
625 print(' making %i connections to:\n %s' % (N, url))
625 print(' making %i connections to:\n %s' % (N, url))
626
626
627 util.stdout.write(' first using the normal urllib handlers')
627 util.stdout.write(' first using the normal urllib handlers')
628 # first use normal opener
628 # first use normal opener
629 opener = urlreq.buildopener()
629 opener = urlreq.buildopener()
630 urlreq.installopener(opener)
630 urlreq.installopener(opener)
631 t1 = fetch(N, url)
631 t1 = fetch(N, url)
632 print(' TIME: %.3f s' % t1)
632 print(' TIME: %.3f s' % t1)
633
633
634 util.stdout.write(' now using the keepalive handler ')
634 util.stdout.write(' now using the keepalive handler ')
635 # now install the keepalive handler and try again
635 # now install the keepalive handler and try again
636 opener = urlreq.buildopener(HTTPHandler())
636 opener = urlreq.buildopener(HTTPHandler())
637 urlreq.installopener(opener)
637 urlreq.installopener(opener)
638 t2 = fetch(N, url)
638 t2 = fetch(N, url)
639 print(' TIME: %.3f s' % t2)
639 print(' TIME: %.3f s' % t2)
640 print(' improvement factor: %.2f' % (t1 / t2))
640 print(' improvement factor: %.2f' % (t1 / t2))
641
641
642 def fetch(N, url, delay=0):
642 def fetch(N, url, delay=0):
643 import time
643 import time
644 lens = []
644 lens = []
645 starttime = time.time()
645 starttime = time.time()
646 for i in range(N):
646 for i in range(N):
647 if delay and i > 0:
647 if delay and i > 0:
648 time.sleep(delay)
648 time.sleep(delay)
649 fo = urlreq.urlopen(url)
649 fo = urlreq.urlopen(url)
650 foo = fo.read()
650 foo = fo.read()
651 fo.close()
651 fo.close()
652 lens.append(len(foo))
652 lens.append(len(foo))
653 diff = time.time() - starttime
653 diff = time.time() - starttime
654
654
655 j = 0
655 j = 0
656 for i in lens[1:]:
656 for i in lens[1:]:
657 j = j + 1
657 j = j + 1
658 if not i == lens[0]:
658 if not i == lens[0]:
659 print("WARNING: inconsistent length on read %i: %i" % (j, i))
659 print("WARNING: inconsistent length on read %i: %i" % (j, i))
660
660
661 return diff
661 return diff
662
662
663 def test_timeout(url):
663 def test_timeout(url):
664 global DEBUG
664 global DEBUG
665 dbbackup = DEBUG
665 dbbackup = DEBUG
666 class FakeLogger(object):
666 class FakeLogger(object):
667 def debug(self, msg, *args):
667 def debug(self, msg, *args):
668 print(msg % args)
668 print(msg % args)
669 info = warning = error = debug
669 info = warning = error = debug
670 DEBUG = FakeLogger()
670 DEBUG = FakeLogger()
671 print(" fetching the file to establish a connection")
671 print(" fetching the file to establish a connection")
672 fo = urlreq.urlopen(url)
672 fo = urlreq.urlopen(url)
673 data1 = fo.read()
673 data1 = fo.read()
674 fo.close()
674 fo.close()
675
675
676 i = 20
676 i = 20
677 print(" waiting %i seconds for the server to close the connection" % i)
677 print(" waiting %i seconds for the server to close the connection" % i)
678 while i > 0:
678 while i > 0:
679 util.stdout.write('\r %2i' % i)
679 util.stdout.write('\r %2i' % i)
680 util.stdout.flush()
680 util.stdout.flush()
681 time.sleep(1)
681 time.sleep(1)
682 i -= 1
682 i -= 1
683 util.stderr.write('\r')
683 util.stderr.write('\r')
684
684
685 print(" fetching the file a second time")
685 print(" fetching the file a second time")
686 fo = urlreq.urlopen(url)
686 fo = urlreq.urlopen(url)
687 data2 = fo.read()
687 data2 = fo.read()
688 fo.close()
688 fo.close()
689
689
690 if data1 == data2:
690 if data1 == data2:
691 print(' data are identical')
691 print(' data are identical')
692 else:
692 else:
693 print(' ERROR: DATA DIFFER')
693 print(' ERROR: DATA DIFFER')
694
694
695 DEBUG = dbbackup
695 DEBUG = dbbackup
696
696
697
697
698 def test(url, N=10):
698 def test(url, N=10):
699 print("performing continuity test (making sure stuff isn't corrupted)")
699 print("performing continuity test (making sure stuff isn't corrupted)")
700 continuity(url)
700 continuity(url)
701 print('')
701 print('')
702 print("performing speed comparison")
702 print("performing speed comparison")
703 comp(N, url)
703 comp(N, url)
704 print('')
704 print('')
705 print("performing dropped-connection check")
705 print("performing dropped-connection check")
706 test_timeout(url)
706 test_timeout(url)
707
707
708 if __name__ == '__main__':
708 if __name__ == '__main__':
709 import time
709 import time
710 try:
710 try:
711 N = int(sys.argv[1])
711 N = int(sys.argv[1])
712 url = sys.argv[2]
712 url = sys.argv[2]
713 except (IndexError, ValueError):
713 except (IndexError, ValueError):
714 print("%s <integer> <url>" % sys.argv[0])
714 print("%s <integer> <url>" % sys.argv[0])
715 else:
715 else:
716 test(url, N)
716 test(url, N)
@@ -1,179 +1,179
1 # parsers.py - Python implementation of parsers.c
1 # parsers.py - Python implementation of parsers.c
2 #
2 #
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11 import zlib
11 import zlib
12
12
13 from ..node import nullid
13 from ..node import nullid
14 from .. import pycompat
14 from .. import pycompat
15 stringio = pycompat.stringio
15 stringio = pycompat.stringio
16
16
17
17
18 _pack = struct.pack
18 _pack = struct.pack
19 _unpack = struct.unpack
19 _unpack = struct.unpack
20 _compress = zlib.compress
20 _compress = zlib.compress
21 _decompress = zlib.decompress
21 _decompress = zlib.decompress
22
22
23 # Some code below makes tuples directly because it's more convenient. However,
23 # Some code below makes tuples directly because it's more convenient. However,
24 # code outside this module should always use dirstatetuple.
24 # code outside this module should always use dirstatetuple.
25 def dirstatetuple(*x):
25 def dirstatetuple(*x):
26 # x is a tuple
26 # x is a tuple
27 return x
27 return x
28
28
29 indexformatng = ">Qiiiiii20s12x"
29 indexformatng = ">Qiiiiii20s12x"
30 indexfirst = struct.calcsize('Q')
30 indexfirst = struct.calcsize('Q')
31 sizeint = struct.calcsize('i')
31 sizeint = struct.calcsize('i')
32 indexsize = struct.calcsize(indexformatng)
32 indexsize = struct.calcsize(indexformatng)
33
33
34 def gettype(q):
34 def gettype(q):
35 return int(q & 0xFFFF)
35 return int(q & 0xFFFF)
36
36
37 def offset_type(offset, type):
37 def offset_type(offset, type):
38 return int(int(offset) << 16 | type)
38 return int(int(offset) << 16 | type)
39
39
40 class BaseIndexObject(object):
40 class BaseIndexObject(object):
41 def __len__(self):
41 def __len__(self):
42 return self._lgt + len(self._extra) + 1
42 return self._lgt + len(self._extra) + 1
43
43
44 def insert(self, i, tup):
44 def insert(self, i, tup):
45 assert i == -1
45 assert i == -1
46 self._extra.append(tup)
46 self._extra.append(tup)
47
47
48 def _fix_index(self, i):
48 def _fix_index(self, i):
49 if not isinstance(i, int):
49 if not isinstance(i, int):
50 raise TypeError("expecting int indexes")
50 raise TypeError("expecting int indexes")
51 if i < 0:
51 if i < 0:
52 i = len(self) + i
52 i = len(self) + i
53 if i < 0 or i >= len(self):
53 if i < 0 or i >= len(self):
54 raise IndexError
54 raise IndexError
55 return i
55 return i
56
56
57 def __getitem__(self, i):
57 def __getitem__(self, i):
58 i = self._fix_index(i)
58 i = self._fix_index(i)
59 if i == len(self) - 1:
59 if i == len(self) - 1:
60 return (0, 0, 0, -1, -1, -1, -1, nullid)
60 return (0, 0, 0, -1, -1, -1, -1, nullid)
61 if i >= self._lgt:
61 if i >= self._lgt:
62 return self._extra[i - self._lgt]
62 return self._extra[i - self._lgt]
63 index = self._calculate_index(i)
63 index = self._calculate_index(i)
64 r = struct.unpack(indexformatng, self._data[index:index + indexsize])
64 r = struct.unpack(indexformatng, self._data[index:index + indexsize])
65 if i == 0:
65 if i == 0:
66 e = list(r)
66 e = list(r)
67 type = gettype(e[0])
67 type = gettype(e[0])
68 e[0] = offset_type(0, type)
68 e[0] = offset_type(0, type)
69 return tuple(e)
69 return tuple(e)
70 return r
70 return r
71
71
72 class IndexObject(BaseIndexObject):
72 class IndexObject(BaseIndexObject):
73 def __init__(self, data):
73 def __init__(self, data):
74 assert len(data) % indexsize == 0
74 assert len(data) % indexsize == 0
75 self._data = data
75 self._data = data
76 self._lgt = len(data) // indexsize
76 self._lgt = len(data) // indexsize
77 self._extra = []
77 self._extra = []
78
78
79 def _calculate_index(self, i):
79 def _calculate_index(self, i):
80 return i * indexsize
80 return i * indexsize
81
81
82 def __delitem__(self, i):
82 def __delitem__(self, i):
83 if not isinstance(i, slice) or not i.stop == -1 or not i.step is None:
83 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
84 raise ValueError("deleting slices only supports a:-1 with step 1")
84 raise ValueError("deleting slices only supports a:-1 with step 1")
85 i = self._fix_index(i.start)
85 i = self._fix_index(i.start)
86 if i < self._lgt:
86 if i < self._lgt:
87 self._data = self._data[:i * indexsize]
87 self._data = self._data[:i * indexsize]
88 self._lgt = i
88 self._lgt = i
89 self._extra = []
89 self._extra = []
90 else:
90 else:
91 self._extra = self._extra[:i - self._lgt]
91 self._extra = self._extra[:i - self._lgt]
92
92
93 class InlinedIndexObject(BaseIndexObject):
93 class InlinedIndexObject(BaseIndexObject):
94 def __init__(self, data, inline=0):
94 def __init__(self, data, inline=0):
95 self._data = data
95 self._data = data
96 self._lgt = self._inline_scan(None)
96 self._lgt = self._inline_scan(None)
97 self._inline_scan(self._lgt)
97 self._inline_scan(self._lgt)
98 self._extra = []
98 self._extra = []
99
99
100 def _inline_scan(self, lgt):
100 def _inline_scan(self, lgt):
101 off = 0
101 off = 0
102 if lgt is not None:
102 if lgt is not None:
103 self._offsets = [0] * lgt
103 self._offsets = [0] * lgt
104 count = 0
104 count = 0
105 while off <= len(self._data) - indexsize:
105 while off <= len(self._data) - indexsize:
106 s, = struct.unpack('>i',
106 s, = struct.unpack('>i',
107 self._data[off + indexfirst:off + sizeint + indexfirst])
107 self._data[off + indexfirst:off + sizeint + indexfirst])
108 if lgt is not None:
108 if lgt is not None:
109 self._offsets[count] = off
109 self._offsets[count] = off
110 count += 1
110 count += 1
111 off += indexsize + s
111 off += indexsize + s
112 if off != len(self._data):
112 if off != len(self._data):
113 raise ValueError("corrupted data")
113 raise ValueError("corrupted data")
114 return count
114 return count
115
115
116 def __delitem__(self, i):
116 def __delitem__(self, i):
117 if not isinstance(i, slice) or not i.stop == -1 or not i.step is None:
117 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
118 raise ValueError("deleting slices only supports a:-1 with step 1")
118 raise ValueError("deleting slices only supports a:-1 with step 1")
119 i = self._fix_index(i.start)
119 i = self._fix_index(i.start)
120 if i < self._lgt:
120 if i < self._lgt:
121 self._offsets = self._offsets[:i]
121 self._offsets = self._offsets[:i]
122 self._lgt = i
122 self._lgt = i
123 self._extra = []
123 self._extra = []
124 else:
124 else:
125 self._extra = self._extra[:i - self._lgt]
125 self._extra = self._extra[:i - self._lgt]
126
126
127 def _calculate_index(self, i):
127 def _calculate_index(self, i):
128 return self._offsets[i]
128 return self._offsets[i]
129
129
130 def parse_index2(data, inline):
130 def parse_index2(data, inline):
131 if not inline:
131 if not inline:
132 return IndexObject(data), None
132 return IndexObject(data), None
133 return InlinedIndexObject(data, inline), (0, data)
133 return InlinedIndexObject(data, inline), (0, data)
134
134
135 def parse_dirstate(dmap, copymap, st):
135 def parse_dirstate(dmap, copymap, st):
136 parents = [st[:20], st[20: 40]]
136 parents = [st[:20], st[20: 40]]
137 # dereference fields so they will be local in loop
137 # dereference fields so they will be local in loop
138 format = ">cllll"
138 format = ">cllll"
139 e_size = struct.calcsize(format)
139 e_size = struct.calcsize(format)
140 pos1 = 40
140 pos1 = 40
141 l = len(st)
141 l = len(st)
142
142
143 # the inner loop
143 # the inner loop
144 while pos1 < l:
144 while pos1 < l:
145 pos2 = pos1 + e_size
145 pos2 = pos1 + e_size
146 e = _unpack(">cllll", st[pos1:pos2]) # a literal here is faster
146 e = _unpack(">cllll", st[pos1:pos2]) # a literal here is faster
147 pos1 = pos2 + e[4]
147 pos1 = pos2 + e[4]
148 f = st[pos2:pos1]
148 f = st[pos2:pos1]
149 if '\0' in f:
149 if '\0' in f:
150 f, c = f.split('\0')
150 f, c = f.split('\0')
151 copymap[f] = c
151 copymap[f] = c
152 dmap[f] = e[:4]
152 dmap[f] = e[:4]
153 return parents
153 return parents
154
154
155 def pack_dirstate(dmap, copymap, pl, now):
155 def pack_dirstate(dmap, copymap, pl, now):
156 now = int(now)
156 now = int(now)
157 cs = stringio()
157 cs = stringio()
158 write = cs.write
158 write = cs.write
159 write("".join(pl))
159 write("".join(pl))
160 for f, e in dmap.iteritems():
160 for f, e in dmap.iteritems():
161 if e[0] == 'n' and e[3] == now:
161 if e[0] == 'n' and e[3] == now:
162 # The file was last modified "simultaneously" with the current
162 # The file was last modified "simultaneously" with the current
163 # write to dirstate (i.e. within the same second for file-
163 # write to dirstate (i.e. within the same second for file-
164 # systems with a granularity of 1 sec). This commonly happens
164 # systems with a granularity of 1 sec). This commonly happens
165 # for at least a couple of files on 'update'.
165 # for at least a couple of files on 'update'.
166 # The user could change the file without changing its size
166 # The user could change the file without changing its size
167 # within the same second. Invalidate the file's mtime in
167 # within the same second. Invalidate the file's mtime in
168 # dirstate, forcing future 'status' calls to compare the
168 # dirstate, forcing future 'status' calls to compare the
169 # contents of the file if the size is the same. This prevents
169 # contents of the file if the size is the same. This prevents
170 # mistakenly treating such files as clean.
170 # mistakenly treating such files as clean.
171 e = dirstatetuple(e[0], e[1], e[2], -1)
171 e = dirstatetuple(e[0], e[1], e[2], -1)
172 dmap[f] = e
172 dmap[f] = e
173
173
174 if f in copymap:
174 if f in copymap:
175 f = "%s\0%s" % (f, copymap[f])
175 f = "%s\0%s" % (f, copymap[f])
176 e = _pack(">cllll", e[0], e[1], e[2], e[3], len(f))
176 e = _pack(">cllll", e[0], e[1], e[2], e[3], len(f))
177 write(e)
177 write(e)
178 write(f)
178 write(f)
179 return cs.getvalue()
179 return cs.getvalue()
General Comments 0
You need to be logged in to leave comments. Login now