##// END OF EJS Templates
style: always use `x is not None` instead of `not x is None`...
Alex Gaynor -
r34332:53133250 default
parent child Browse files
Show More
@@ -1,716 +1,716
1 1 # This library is free software; you can redistribute it and/or
2 2 # modify it under the terms of the GNU Lesser General Public
3 3 # License as published by the Free Software Foundation; either
4 4 # version 2.1 of the License, or (at your option) any later version.
5 5 #
6 6 # This library is distributed in the hope that it will be useful,
7 7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 9 # Lesser General Public License for more details.
10 10 #
11 11 # You should have received a copy of the GNU Lesser General Public
12 12 # License along with this library; if not, see
13 13 # <http://www.gnu.org/licenses/>.
14 14
15 15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
16 16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
17 17
18 18 # Modified by Benoit Boissinot:
19 19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
20 20 # Modified by Dirkjan Ochtman:
21 21 # - import md5 function from a local util module
22 22 # Modified by Augie Fackler:
23 23 # - add safesend method and use it to prevent broken pipe errors
24 24 # on large POST requests
25 25
26 26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
27 27
28 28 >>> import urllib2
29 29 >>> from keepalive import HTTPHandler
30 30 >>> keepalive_handler = HTTPHandler()
31 31 >>> opener = urlreq.buildopener(keepalive_handler)
32 32 >>> urlreq.installopener(opener)
33 33 >>>
34 34 >>> fo = urlreq.urlopen('http://www.python.org')
35 35
36 36 If a connection to a given host is requested, and all of the existing
37 37 connections are still in use, another connection will be opened. If
38 38 the handler tries to use an existing connection but it fails in some
39 39 way, it will be closed and removed from the pool.
40 40
41 41 To remove the handler, simply re-run build_opener with no arguments, and
42 42 install that opener.
43 43
44 44 You can explicitly close connections by using the close_connection()
45 45 method of the returned file-like object (described below) or you can
46 46 use the handler methods:
47 47
48 48 close_connection(host)
49 49 close_all()
50 50 open_connections()
51 51
52 52 NOTE: using the close_connection and close_all methods of the handler
53 53 should be done with care when using multiple threads.
54 54 * there is nothing that prevents another thread from creating new
55 55 connections immediately after connections are closed
56 56 * no checks are done to prevent in-use connections from being closed
57 57
58 58 >>> keepalive_handler.close_all()
59 59
60 60 EXTRA ATTRIBUTES AND METHODS
61 61
62 62 Upon a status of 200, the object returned has a few additional
63 63 attributes and methods, which should not be used if you want to
64 64 remain consistent with the normal urllib2-returned objects:
65 65
66 66 close_connection() - close the connection to the host
67 67 readlines() - you know, readlines()
68 68 status - the return status (i.e. 404)
69 69 reason - english translation of status (i.e. 'File not found')
70 70
71 71 If you want the best of both worlds, use this inside an
72 72 AttributeError-catching try:
73 73
74 74 >>> try: status = fo.status
75 75 >>> except AttributeError: status = None
76 76
77 77 Unfortunately, these are ONLY there if status == 200, so it's not
78 78 easy to distinguish between non-200 responses. The reason is that
79 79 urllib2 tries to do clever things with error codes 301, 302, 401,
80 80 and 407, and it wraps the object upon return.
81 81 """
82 82
83 83 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
84 84
85 85 from __future__ import absolute_import, print_function
86 86
87 87 import errno
88 88 import hashlib
89 89 import socket
90 90 import sys
91 91 import threading
92 92
93 93 from .i18n import _
94 94 from . import (
95 95 util,
96 96 )
97 97
98 98 httplib = util.httplib
99 99 urlerr = util.urlerr
100 100 urlreq = util.urlreq
101 101
102 102 DEBUG = None
103 103
104 104 class ConnectionManager(object):
105 105 """
106 106 The connection manager must be able to:
107 107 * keep track of all existing
108 108 """
109 109 def __init__(self):
110 110 self._lock = threading.Lock()
111 111 self._hostmap = {} # map hosts to a list of connections
112 112 self._connmap = {} # map connections to host
113 113 self._readymap = {} # map connection to ready state
114 114
115 115 def add(self, host, connection, ready):
116 116 self._lock.acquire()
117 117 try:
118 118 if host not in self._hostmap:
119 119 self._hostmap[host] = []
120 120 self._hostmap[host].append(connection)
121 121 self._connmap[connection] = host
122 122 self._readymap[connection] = ready
123 123 finally:
124 124 self._lock.release()
125 125
126 126 def remove(self, connection):
127 127 self._lock.acquire()
128 128 try:
129 129 try:
130 130 host = self._connmap[connection]
131 131 except KeyError:
132 132 pass
133 133 else:
134 134 del self._connmap[connection]
135 135 del self._readymap[connection]
136 136 self._hostmap[host].remove(connection)
137 137 if not self._hostmap[host]: del self._hostmap[host]
138 138 finally:
139 139 self._lock.release()
140 140
141 141 def set_ready(self, connection, ready):
142 142 try:
143 143 self._readymap[connection] = ready
144 144 except KeyError:
145 145 pass
146 146
147 147 def get_ready_conn(self, host):
148 148 conn = None
149 149 self._lock.acquire()
150 150 try:
151 151 if host in self._hostmap:
152 152 for c in self._hostmap[host]:
153 153 if self._readymap[c]:
154 154 self._readymap[c] = 0
155 155 conn = c
156 156 break
157 157 finally:
158 158 self._lock.release()
159 159 return conn
160 160
161 161 def get_all(self, host=None):
162 162 if host:
163 163 return list(self._hostmap.get(host, []))
164 164 else:
165 165 return dict(self._hostmap)
166 166
167 167 class KeepAliveHandler(object):
168 168 def __init__(self):
169 169 self._cm = ConnectionManager()
170 170
171 171 #### Connection Management
172 172 def open_connections(self):
173 173 """return a list of connected hosts and the number of connections
174 174 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
175 175 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
176 176
177 177 def close_connection(self, host):
178 178 """close connection(s) to <host>
179 179 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
180 180 no error occurs if there is no connection to that host."""
181 181 for h in self._cm.get_all(host):
182 182 self._cm.remove(h)
183 183 h.close()
184 184
185 185 def close_all(self):
186 186 """close all open connections"""
187 187 for host, conns in self._cm.get_all().iteritems():
188 188 for h in conns:
189 189 self._cm.remove(h)
190 190 h.close()
191 191
192 192 def _request_closed(self, request, host, connection):
193 193 """tells us that this request is now closed and that the
194 194 connection is ready for another request"""
195 195 self._cm.set_ready(connection, 1)
196 196
197 197 def _remove_connection(self, host, connection, close=0):
198 198 if close:
199 199 connection.close()
200 200 self._cm.remove(connection)
201 201
202 202 #### Transaction Execution
203 203 def http_open(self, req):
204 204 return self.do_open(HTTPConnection, req)
205 205
206 206 def do_open(self, http_class, req):
207 207 host = req.get_host()
208 208 if not host:
209 209 raise urlerr.urlerror('no host given')
210 210
211 211 try:
212 212 h = self._cm.get_ready_conn(host)
213 213 while h:
214 214 r = self._reuse_connection(h, req, host)
215 215
216 216 # if this response is non-None, then it worked and we're
217 217 # done. Break out, skipping the else block.
218 218 if r:
219 219 break
220 220
221 221 # connection is bad - possibly closed by server
222 222 # discard it and ask for the next free connection
223 223 h.close()
224 224 self._cm.remove(h)
225 225 h = self._cm.get_ready_conn(host)
226 226 else:
227 227 # no (working) free connections were found. Create a new one.
228 228 h = http_class(host)
229 229 if DEBUG:
230 230 DEBUG.info("creating new connection to %s (%d)",
231 231 host, id(h))
232 232 self._cm.add(host, h, 0)
233 233 self._start_transaction(h, req)
234 234 r = h.getresponse()
235 235 # The string form of BadStatusLine is the status line. Add some context
236 236 # to make the error message slightly more useful.
237 237 except httplib.BadStatusLine as err:
238 238 raise urlerr.urlerror(_('bad HTTP status line: %s') % err.line)
239 239 except (socket.error, httplib.HTTPException) as err:
240 240 raise urlerr.urlerror(err)
241 241
242 242 # if not a persistent connection, don't try to reuse it
243 243 if r.will_close:
244 244 self._cm.remove(h)
245 245
246 246 if DEBUG:
247 247 DEBUG.info("STATUS: %s, %s", r.status, r.reason)
248 248 r._handler = self
249 249 r._host = host
250 250 r._url = req.get_full_url()
251 251 r._connection = h
252 252 r.code = r.status
253 253 r.headers = r.msg
254 254 r.msg = r.reason
255 255
256 256 return r
257 257
258 258 def _reuse_connection(self, h, req, host):
259 259 """start the transaction with a re-used connection
260 260 return a response object (r) upon success or None on failure.
261 261 This DOES not close or remove bad connections in cases where
262 262 it returns. However, if an unexpected exception occurs, it
263 263 will close and remove the connection before re-raising.
264 264 """
265 265 try:
266 266 self._start_transaction(h, req)
267 267 r = h.getresponse()
268 268 # note: just because we got something back doesn't mean it
269 269 # worked. We'll check the version below, too.
270 270 except (socket.error, httplib.HTTPException):
271 271 r = None
272 272 except: # re-raises
273 273 # adding this block just in case we've missed
274 274 # something we will still raise the exception, but
275 275 # lets try and close the connection and remove it
276 276 # first. We previously got into a nasty loop
277 277 # where an exception was uncaught, and so the
278 278 # connection stayed open. On the next try, the
279 279 # same exception was raised, etc. The trade-off is
280 280 # that it's now possible this call will raise
281 281 # a DIFFERENT exception
282 282 if DEBUG:
283 283 DEBUG.error("unexpected exception - closing "
284 284 "connection to %s (%d)", host, id(h))
285 285 self._cm.remove(h)
286 286 h.close()
287 287 raise
288 288
289 289 if r is None or r.version == 9:
290 290 # httplib falls back to assuming HTTP 0.9 if it gets a
291 291 # bad header back. This is most likely to happen if
292 292 # the socket has been closed by the server since we
293 293 # last used the connection.
294 294 if DEBUG:
295 295 DEBUG.info("failed to re-use connection to %s (%d)",
296 296 host, id(h))
297 297 r = None
298 298 else:
299 299 if DEBUG:
300 300 DEBUG.info("re-using connection to %s (%d)", host, id(h))
301 301
302 302 return r
303 303
304 304 def _start_transaction(self, h, req):
305 305 # What follows mostly reimplements HTTPConnection.request()
306 306 # except it adds self.parent.addheaders in the mix and sends headers
307 307 # in a deterministic order (to make testing easier).
308 308 headers = util.sortdict(self.parent.addheaders)
309 309 headers.update(sorted(req.headers.items()))
310 310 headers.update(sorted(req.unredirected_hdrs.items()))
311 311 headers = util.sortdict((n.lower(), v) for n, v in headers.items())
312 312 skipheaders = {}
313 313 for n in ('host', 'accept-encoding'):
314 314 if n in headers:
315 315 skipheaders['skip_' + n.replace('-', '_')] = 1
316 316 try:
317 317 if req.has_data():
318 318 data = req.get_data()
319 319 h.putrequest(
320 320 req.get_method(), req.get_selector(), **skipheaders)
321 321 if 'content-type' not in headers:
322 322 h.putheader('Content-type',
323 323 'application/x-www-form-urlencoded')
324 324 if 'content-length' not in headers:
325 325 h.putheader('Content-length', '%d' % len(data))
326 326 else:
327 327 h.putrequest(
328 328 req.get_method(), req.get_selector(), **skipheaders)
329 329 except socket.error as err:
330 330 raise urlerr.urlerror(err)
331 331 for k, v in headers.items():
332 332 h.putheader(k, v)
333 333 h.endheaders()
334 334 if req.has_data():
335 335 h.send(data)
336 336
337 337 class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
338 338 pass
339 339
340 340 class HTTPResponse(httplib.HTTPResponse):
341 341 # we need to subclass HTTPResponse in order to
342 342 # 1) add readline() and readlines() methods
343 343 # 2) add close_connection() methods
344 344 # 3) add info() and geturl() methods
345 345
346 346 # in order to add readline(), read must be modified to deal with a
347 347 # buffer. example: readline must read a buffer and then spit back
348 348 # one line at a time. The only real alternative is to read one
349 349 # BYTE at a time (ick). Once something has been read, it can't be
350 350 # put back (ok, maybe it can, but that's even uglier than this),
351 351 # so if you THEN do a normal read, you must first take stuff from
352 352 # the buffer.
353 353
354 354 # the read method wraps the original to accommodate buffering,
355 355 # although read() never adds to the buffer.
356 356 # Both readline and readlines have been stolen with almost no
357 357 # modification from socket.py
358 358
359 359
360 360 def __init__(self, sock, debuglevel=0, strict=0, method=None):
361 361 httplib.HTTPResponse.__init__(self, sock, debuglevel=debuglevel,
362 362 strict=True, method=method,
363 363 buffering=True)
364 364 self.fileno = sock.fileno
365 365 self.code = None
366 366 self._rbuf = ''
367 367 self._rbufsize = 8096
368 368 self._handler = None # inserted by the handler later
369 369 self._host = None # (same)
370 370 self._url = None # (same)
371 371 self._connection = None # (same)
372 372
373 373 _raw_read = httplib.HTTPResponse.read
374 374
375 375 def close(self):
376 376 if self.fp:
377 377 self.fp.close()
378 378 self.fp = None
379 379 if self._handler:
380 380 self._handler._request_closed(self, self._host,
381 381 self._connection)
382 382
383 383 def close_connection(self):
384 384 self._handler._remove_connection(self._host, self._connection, close=1)
385 385 self.close()
386 386
387 387 def info(self):
388 388 return self.headers
389 389
390 390 def geturl(self):
391 391 return self._url
392 392
393 393 def read(self, amt=None):
394 394 # the _rbuf test is only in this first if for speed. It's not
395 395 # logically necessary
396 if self._rbuf and not amt is None:
396 if self._rbuf and amt is not None:
397 397 L = len(self._rbuf)
398 398 if amt > L:
399 399 amt -= L
400 400 else:
401 401 s = self._rbuf[:amt]
402 402 self._rbuf = self._rbuf[amt:]
403 403 return s
404 404
405 405 s = self._rbuf + self._raw_read(amt)
406 406 self._rbuf = ''
407 407 return s
408 408
409 409 # stolen from Python SVN #68532 to fix issue1088
410 410 def _read_chunked(self, amt):
411 411 chunk_left = self.chunk_left
412 412 parts = []
413 413
414 414 while True:
415 415 if chunk_left is None:
416 416 line = self.fp.readline()
417 417 i = line.find(';')
418 418 if i >= 0:
419 419 line = line[:i] # strip chunk-extensions
420 420 try:
421 421 chunk_left = int(line, 16)
422 422 except ValueError:
423 423 # close the connection as protocol synchronization is
424 424 # probably lost
425 425 self.close()
426 426 raise httplib.IncompleteRead(''.join(parts))
427 427 if chunk_left == 0:
428 428 break
429 429 if amt is None:
430 430 parts.append(self._safe_read(chunk_left))
431 431 elif amt < chunk_left:
432 432 parts.append(self._safe_read(amt))
433 433 self.chunk_left = chunk_left - amt
434 434 return ''.join(parts)
435 435 elif amt == chunk_left:
436 436 parts.append(self._safe_read(amt))
437 437 self._safe_read(2) # toss the CRLF at the end of the chunk
438 438 self.chunk_left = None
439 439 return ''.join(parts)
440 440 else:
441 441 parts.append(self._safe_read(chunk_left))
442 442 amt -= chunk_left
443 443
444 444 # we read the whole chunk, get another
445 445 self._safe_read(2) # toss the CRLF at the end of the chunk
446 446 chunk_left = None
447 447
448 448 # read and discard trailer up to the CRLF terminator
449 449 ### note: we shouldn't have any trailers!
450 450 while True:
451 451 line = self.fp.readline()
452 452 if not line:
453 453 # a vanishingly small number of sites EOF without
454 454 # sending the trailer
455 455 break
456 456 if line == '\r\n':
457 457 break
458 458
459 459 # we read everything; close the "file"
460 460 self.close()
461 461
462 462 return ''.join(parts)
463 463
464 464 def readline(self):
465 465 # Fast path for a line is already available in read buffer.
466 466 i = self._rbuf.find('\n')
467 467 if i >= 0:
468 468 i += 1
469 469 line = self._rbuf[:i]
470 470 self._rbuf = self._rbuf[i:]
471 471 return line
472 472
473 473 # No newline in local buffer. Read until we find one.
474 474 chunks = [self._rbuf]
475 475 i = -1
476 476 readsize = self._rbufsize
477 477 while True:
478 478 new = self._raw_read(readsize)
479 479 if not new:
480 480 break
481 481
482 482 chunks.append(new)
483 483 i = new.find('\n')
484 484 if i >= 0:
485 485 break
486 486
487 487 # We either have exhausted the stream or have a newline in chunks[-1].
488 488
489 489 # EOF
490 490 if i == -1:
491 491 self._rbuf = ''
492 492 return ''.join(chunks)
493 493
494 494 i += 1
495 495 self._rbuf = chunks[-1][i:]
496 496 chunks[-1] = chunks[-1][:i]
497 497 return ''.join(chunks)
498 498
499 499 def readlines(self, sizehint=0):
500 500 total = 0
501 501 list = []
502 502 while True:
503 503 line = self.readline()
504 504 if not line:
505 505 break
506 506 list.append(line)
507 507 total += len(line)
508 508 if sizehint and total >= sizehint:
509 509 break
510 510 return list
511 511
512 512 def safesend(self, str):
513 513 """Send `str' to the server.
514 514
515 515 Shamelessly ripped off from httplib to patch a bad behavior.
516 516 """
517 517 # _broken_pipe_resp is an attribute we set in this function
518 518 # if the socket is closed while we're sending data but
519 519 # the server sent us a response before hanging up.
520 520 # In that case, we want to pretend to send the rest of the
521 521 # outgoing data, and then let the user use getresponse()
522 522 # (which we wrap) to get this last response before
523 523 # opening a new socket.
524 524 if getattr(self, '_broken_pipe_resp', None) is not None:
525 525 return
526 526
527 527 if self.sock is None:
528 528 if self.auto_open:
529 529 self.connect()
530 530 else:
531 531 raise httplib.NotConnected
532 532
533 533 # send the data to the server. if we get a broken pipe, then close
534 534 # the socket. we want to reconnect when somebody tries to send again.
535 535 #
536 536 # NOTE: we DO propagate the error, though, because we cannot simply
537 537 # ignore the error... the caller will know if they can retry.
538 538 if self.debuglevel > 0:
539 539 print("send:", repr(str))
540 540 try:
541 541 blocksize = 8192
542 542 read = getattr(str, 'read', None)
543 543 if read is not None:
544 544 if self.debuglevel > 0:
545 545 print("sending a read()able")
546 546 data = read(blocksize)
547 547 while data:
548 548 self.sock.sendall(data)
549 549 data = read(blocksize)
550 550 else:
551 551 self.sock.sendall(str)
552 552 except socket.error as v:
553 553 reraise = True
554 554 if v[0] == errno.EPIPE: # Broken pipe
555 555 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
556 556 self._broken_pipe_resp = None
557 557 self._broken_pipe_resp = self.getresponse()
558 558 reraise = False
559 559 self.close()
560 560 if reraise:
561 561 raise
562 562
563 563 def wrapgetresponse(cls):
564 564 """Wraps getresponse in cls with a broken-pipe sane version.
565 565 """
566 566 def safegetresponse(self):
567 567 # In safesend() we might set the _broken_pipe_resp
568 568 # attribute, in which case the socket has already
569 569 # been closed and we just need to give them the response
570 570 # back. Otherwise, we use the normal response path.
571 571 r = getattr(self, '_broken_pipe_resp', None)
572 572 if r is not None:
573 573 return r
574 574 return cls.getresponse(self)
575 575 safegetresponse.__doc__ = cls.getresponse.__doc__
576 576 return safegetresponse
577 577
578 578 class HTTPConnection(httplib.HTTPConnection):
579 579 # use the modified response class
580 580 response_class = HTTPResponse
581 581 send = safesend
582 582 getresponse = wrapgetresponse(httplib.HTTPConnection)
583 583
584 584
585 585 #########################################################################
586 586 ##### TEST FUNCTIONS
587 587 #########################################################################
588 588
589 589
590 590 def continuity(url):
591 591 md5 = hashlib.md5
592 592 format = '%25s: %s'
593 593
594 594 # first fetch the file with the normal http handler
595 595 opener = urlreq.buildopener()
596 596 urlreq.installopener(opener)
597 597 fo = urlreq.urlopen(url)
598 598 foo = fo.read()
599 599 fo.close()
600 600 m = md5(foo)
601 601 print(format % ('normal urllib', m.hexdigest()))
602 602
603 603 # now install the keepalive handler and try again
604 604 opener = urlreq.buildopener(HTTPHandler())
605 605 urlreq.installopener(opener)
606 606
607 607 fo = urlreq.urlopen(url)
608 608 foo = fo.read()
609 609 fo.close()
610 610 m = md5(foo)
611 611 print(format % ('keepalive read', m.hexdigest()))
612 612
613 613 fo = urlreq.urlopen(url)
614 614 foo = ''
615 615 while True:
616 616 f = fo.readline()
617 617 if f:
618 618 foo = foo + f
619 619 else: break
620 620 fo.close()
621 621 m = md5(foo)
622 622 print(format % ('keepalive readline', m.hexdigest()))
623 623
624 624 def comp(N, url):
625 625 print(' making %i connections to:\n %s' % (N, url))
626 626
627 627 util.stdout.write(' first using the normal urllib handlers')
628 628 # first use normal opener
629 629 opener = urlreq.buildopener()
630 630 urlreq.installopener(opener)
631 631 t1 = fetch(N, url)
632 632 print(' TIME: %.3f s' % t1)
633 633
634 634 util.stdout.write(' now using the keepalive handler ')
635 635 # now install the keepalive handler and try again
636 636 opener = urlreq.buildopener(HTTPHandler())
637 637 urlreq.installopener(opener)
638 638 t2 = fetch(N, url)
639 639 print(' TIME: %.3f s' % t2)
640 640 print(' improvement factor: %.2f' % (t1 / t2))
641 641
642 642 def fetch(N, url, delay=0):
643 643 import time
644 644 lens = []
645 645 starttime = time.time()
646 646 for i in range(N):
647 647 if delay and i > 0:
648 648 time.sleep(delay)
649 649 fo = urlreq.urlopen(url)
650 650 foo = fo.read()
651 651 fo.close()
652 652 lens.append(len(foo))
653 653 diff = time.time() - starttime
654 654
655 655 j = 0
656 656 for i in lens[1:]:
657 657 j = j + 1
658 658 if not i == lens[0]:
659 659 print("WARNING: inconsistent length on read %i: %i" % (j, i))
660 660
661 661 return diff
662 662
663 663 def test_timeout(url):
664 664 global DEBUG
665 665 dbbackup = DEBUG
666 666 class FakeLogger(object):
667 667 def debug(self, msg, *args):
668 668 print(msg % args)
669 669 info = warning = error = debug
670 670 DEBUG = FakeLogger()
671 671 print(" fetching the file to establish a connection")
672 672 fo = urlreq.urlopen(url)
673 673 data1 = fo.read()
674 674 fo.close()
675 675
676 676 i = 20
677 677 print(" waiting %i seconds for the server to close the connection" % i)
678 678 while i > 0:
679 679 util.stdout.write('\r %2i' % i)
680 680 util.stdout.flush()
681 681 time.sleep(1)
682 682 i -= 1
683 683 util.stderr.write('\r')
684 684
685 685 print(" fetching the file a second time")
686 686 fo = urlreq.urlopen(url)
687 687 data2 = fo.read()
688 688 fo.close()
689 689
690 690 if data1 == data2:
691 691 print(' data are identical')
692 692 else:
693 693 print(' ERROR: DATA DIFFER')
694 694
695 695 DEBUG = dbbackup
696 696
697 697
698 698 def test(url, N=10):
699 699 print("performing continuity test (making sure stuff isn't corrupted)")
700 700 continuity(url)
701 701 print('')
702 702 print("performing speed comparison")
703 703 comp(N, url)
704 704 print('')
705 705 print("performing dropped-connection check")
706 706 test_timeout(url)
707 707
708 708 if __name__ == '__main__':
709 709 import time
710 710 try:
711 711 N = int(sys.argv[1])
712 712 url = sys.argv[2]
713 713 except (IndexError, ValueError):
714 714 print("%s <integer> <url>" % sys.argv[0])
715 715 else:
716 716 test(url, N)
@@ -1,179 +1,179
1 1 # parsers.py - Python implementation of parsers.c
2 2 #
3 3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11 import zlib
12 12
13 13 from ..node import nullid
14 14 from .. import pycompat
15 15 stringio = pycompat.stringio
16 16
17 17
18 18 _pack = struct.pack
19 19 _unpack = struct.unpack
20 20 _compress = zlib.compress
21 21 _decompress = zlib.decompress
22 22
23 23 # Some code below makes tuples directly because it's more convenient. However,
24 24 # code outside this module should always use dirstatetuple.
25 25 def dirstatetuple(*x):
26 26 # x is a tuple
27 27 return x
28 28
29 29 indexformatng = ">Qiiiiii20s12x"
30 30 indexfirst = struct.calcsize('Q')
31 31 sizeint = struct.calcsize('i')
32 32 indexsize = struct.calcsize(indexformatng)
33 33
34 34 def gettype(q):
35 35 return int(q & 0xFFFF)
36 36
37 37 def offset_type(offset, type):
38 38 return int(int(offset) << 16 | type)
39 39
40 40 class BaseIndexObject(object):
41 41 def __len__(self):
42 42 return self._lgt + len(self._extra) + 1
43 43
44 44 def insert(self, i, tup):
45 45 assert i == -1
46 46 self._extra.append(tup)
47 47
48 48 def _fix_index(self, i):
49 49 if not isinstance(i, int):
50 50 raise TypeError("expecting int indexes")
51 51 if i < 0:
52 52 i = len(self) + i
53 53 if i < 0 or i >= len(self):
54 54 raise IndexError
55 55 return i
56 56
57 57 def __getitem__(self, i):
58 58 i = self._fix_index(i)
59 59 if i == len(self) - 1:
60 60 return (0, 0, 0, -1, -1, -1, -1, nullid)
61 61 if i >= self._lgt:
62 62 return self._extra[i - self._lgt]
63 63 index = self._calculate_index(i)
64 64 r = struct.unpack(indexformatng, self._data[index:index + indexsize])
65 65 if i == 0:
66 66 e = list(r)
67 67 type = gettype(e[0])
68 68 e[0] = offset_type(0, type)
69 69 return tuple(e)
70 70 return r
71 71
72 72 class IndexObject(BaseIndexObject):
73 73 def __init__(self, data):
74 74 assert len(data) % indexsize == 0
75 75 self._data = data
76 76 self._lgt = len(data) // indexsize
77 77 self._extra = []
78 78
79 79 def _calculate_index(self, i):
80 80 return i * indexsize
81 81
82 82 def __delitem__(self, i):
83 if not isinstance(i, slice) or not i.stop == -1 or not i.step is None:
83 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
84 84 raise ValueError("deleting slices only supports a:-1 with step 1")
85 85 i = self._fix_index(i.start)
86 86 if i < self._lgt:
87 87 self._data = self._data[:i * indexsize]
88 88 self._lgt = i
89 89 self._extra = []
90 90 else:
91 91 self._extra = self._extra[:i - self._lgt]
92 92
93 93 class InlinedIndexObject(BaseIndexObject):
94 94 def __init__(self, data, inline=0):
95 95 self._data = data
96 96 self._lgt = self._inline_scan(None)
97 97 self._inline_scan(self._lgt)
98 98 self._extra = []
99 99
100 100 def _inline_scan(self, lgt):
101 101 off = 0
102 102 if lgt is not None:
103 103 self._offsets = [0] * lgt
104 104 count = 0
105 105 while off <= len(self._data) - indexsize:
106 106 s, = struct.unpack('>i',
107 107 self._data[off + indexfirst:off + sizeint + indexfirst])
108 108 if lgt is not None:
109 109 self._offsets[count] = off
110 110 count += 1
111 111 off += indexsize + s
112 112 if off != len(self._data):
113 113 raise ValueError("corrupted data")
114 114 return count
115 115
116 116 def __delitem__(self, i):
117 if not isinstance(i, slice) or not i.stop == -1 or not i.step is None:
117 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
118 118 raise ValueError("deleting slices only supports a:-1 with step 1")
119 119 i = self._fix_index(i.start)
120 120 if i < self._lgt:
121 121 self._offsets = self._offsets[:i]
122 122 self._lgt = i
123 123 self._extra = []
124 124 else:
125 125 self._extra = self._extra[:i - self._lgt]
126 126
127 127 def _calculate_index(self, i):
128 128 return self._offsets[i]
129 129
130 130 def parse_index2(data, inline):
131 131 if not inline:
132 132 return IndexObject(data), None
133 133 return InlinedIndexObject(data, inline), (0, data)
134 134
135 135 def parse_dirstate(dmap, copymap, st):
136 136 parents = [st[:20], st[20: 40]]
137 137 # dereference fields so they will be local in loop
138 138 format = ">cllll"
139 139 e_size = struct.calcsize(format)
140 140 pos1 = 40
141 141 l = len(st)
142 142
143 143 # the inner loop
144 144 while pos1 < l:
145 145 pos2 = pos1 + e_size
146 146 e = _unpack(">cllll", st[pos1:pos2]) # a literal here is faster
147 147 pos1 = pos2 + e[4]
148 148 f = st[pos2:pos1]
149 149 if '\0' in f:
150 150 f, c = f.split('\0')
151 151 copymap[f] = c
152 152 dmap[f] = e[:4]
153 153 return parents
154 154
155 155 def pack_dirstate(dmap, copymap, pl, now):
156 156 now = int(now)
157 157 cs = stringio()
158 158 write = cs.write
159 159 write("".join(pl))
160 160 for f, e in dmap.iteritems():
161 161 if e[0] == 'n' and e[3] == now:
162 162 # The file was last modified "simultaneously" with the current
163 163 # write to dirstate (i.e. within the same second for file-
164 164 # systems with a granularity of 1 sec). This commonly happens
165 165 # for at least a couple of files on 'update'.
166 166 # The user could change the file without changing its size
167 167 # within the same second. Invalidate the file's mtime in
168 168 # dirstate, forcing future 'status' calls to compare the
169 169 # contents of the file if the size is the same. This prevents
170 170 # mistakenly treating such files as clean.
171 171 e = dirstatetuple(e[0], e[1], e[2], -1)
172 172 dmap[f] = e
173 173
174 174 if f in copymap:
175 175 f = "%s\0%s" % (f, copymap[f])
176 176 e = _pack(">cllll", e[0], e[1], e[2], e[3], len(f))
177 177 write(e)
178 178 write(f)
179 179 return cs.getvalue()
General Comments 0
You need to be logged in to leave comments. Login now