##// END OF EJS Templates
util: remove md5...
Martin Geisler -
r8296:908c5906 default
parent child Browse files
Show More
@@ -1,661 +1,671 b''
1 1 # This library is free software; you can redistribute it and/or
2 2 # modify it under the terms of the GNU Lesser General Public
3 3 # License as published by the Free Software Foundation; either
4 4 # version 2.1 of the License, or (at your option) any later version.
5 5 #
6 6 # This library is distributed in the hope that it will be useful,
7 7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 9 # Lesser General Public License for more details.
10 10 #
11 11 # You should have received a copy of the GNU Lesser General Public
12 12 # License along with this library; if not, write to the
13 13 # Free Software Foundation, Inc.,
14 14 # 59 Temple Place, Suite 330,
15 15 # Boston, MA 02111-1307 USA
16 16
17 17 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
18 18 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
19 19
20 20 # Modified by Benoit Boissinot:
21 21 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
22 22 # Modified by Dirkjan Ochtman:
23 23 # - import md5 function from a local util module
24 # Modified by Martin Geisler:
25 # - moved md5 function from local util module to this module
24 26
25 27 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
26 28
27 29 >>> import urllib2
28 30 >>> from keepalive import HTTPHandler
29 31 >>> keepalive_handler = HTTPHandler()
30 32 >>> opener = urllib2.build_opener(keepalive_handler)
31 33 >>> urllib2.install_opener(opener)
32 34 >>>
33 35 >>> fo = urllib2.urlopen('http://www.python.org')
34 36
35 37 If a connection to a given host is requested, and all of the existing
36 38 connections are still in use, another connection will be opened. If
37 39 the handler tries to use an existing connection but it fails in some
38 40 way, it will be closed and removed from the pool.
39 41
40 42 To remove the handler, simply re-run build_opener with no arguments, and
41 43 install that opener.
42 44
43 45 You can explicitly close connections by using the close_connection()
44 46 method of the returned file-like object (described below) or you can
45 47 use the handler methods:
46 48
47 49 close_connection(host)
48 50 close_all()
49 51 open_connections()
50 52
51 53 NOTE: using the close_connection and close_all methods of the handler
52 54 should be done with care when using multiple threads.
53 55 * there is nothing that prevents another thread from creating new
54 56 connections immediately after connections are closed
55 57 * no checks are done to prevent in-use connections from being closed
56 58
57 59 >>> keepalive_handler.close_all()
58 60
59 61 EXTRA ATTRIBUTES AND METHODS
60 62
61 63 Upon a status of 200, the object returned has a few additional
62 64 attributes and methods, which should not be used if you want to
63 65 remain consistent with the normal urllib2-returned objects:
64 66
65 67 close_connection() - close the connection to the host
66 68 readlines() - you know, readlines()
67 69 status - the return status (ie 404)
68 70 reason - english translation of status (ie 'File not found')
69 71
70 72 If you want the best of both worlds, use this inside an
71 73 AttributeError-catching try:
72 74
73 75 >>> try: status = fo.status
74 76 >>> except AttributeError: status = None
75 77
76 78 Unfortunately, these are ONLY there if status == 200, so it's not
77 79 easy to distinguish between non-200 responses. The reason is that
78 80 urllib2 tries to do clever things with error codes 301, 302, 401,
79 81 and 407, and it wraps the object upon return.
80 82
81 83 For python versions earlier than 2.4, you can avoid this fancy error
82 84 handling by setting the module-level global HANDLE_ERRORS to zero.
83 85 You see, prior to 2.4, it's the HTTP Handler's job to determine what
84 86 to handle specially, and what to just pass up. HANDLE_ERRORS == 0
85 87 means "pass everything up". In python 2.4, however, this job no
86 88 longer belongs to the HTTP Handler and is now done by a NEW handler,
87 89 HTTPErrorProcessor. Here's the bottom line:
88 90
89 91 python version < 2.4
90 92 HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as
91 93 errors
92 94 HANDLE_ERRORS == 0 pass everything up, error processing is
93 95 left to the calling code
94 96 python version >= 2.4
95 97 HANDLE_ERRORS == 1 pass up 200, treat the rest as errors
96 98 HANDLE_ERRORS == 0 (default) pass everything up, let the
97 99 other handlers (specifically,
98 100 HTTPErrorProcessor) decide what to do
99 101
100 102 In practice, setting the variable either way makes little difference
101 103 in python 2.4, so for the most consistent behavior across versions,
102 104 you probably just want to use the defaults, which will give you
103 105 exceptions on errors.
104 106
105 107 """
106 108
107 109 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
108 110
109 111 import urllib2
110 112 import httplib
111 113 import socket
112 114 import thread
113 115
114 116 DEBUG = None
115 117
116 118 import sys
117 119 if sys.version_info < (2, 4): HANDLE_ERRORS = 1
118 120 else: HANDLE_ERRORS = 0
119 121
120 122 class ConnectionManager:
121 123 """
122 124 The connection manager must be able to:
123 125 * keep track of all existing
124 126 """
125 127 def __init__(self):
126 128 self._lock = thread.allocate_lock()
127 129 self._hostmap = {} # map hosts to a list of connections
128 130 self._connmap = {} # map connections to host
129 131 self._readymap = {} # map connection to ready state
130 132
131 133 def add(self, host, connection, ready):
132 134 self._lock.acquire()
133 135 try:
134 136 if not host in self._hostmap: self._hostmap[host] = []
135 137 self._hostmap[host].append(connection)
136 138 self._connmap[connection] = host
137 139 self._readymap[connection] = ready
138 140 finally:
139 141 self._lock.release()
140 142
141 143 def remove(self, connection):
142 144 self._lock.acquire()
143 145 try:
144 146 try:
145 147 host = self._connmap[connection]
146 148 except KeyError:
147 149 pass
148 150 else:
149 151 del self._connmap[connection]
150 152 del self._readymap[connection]
151 153 self._hostmap[host].remove(connection)
152 154 if not self._hostmap[host]: del self._hostmap[host]
153 155 finally:
154 156 self._lock.release()
155 157
156 158 def set_ready(self, connection, ready):
157 159 try: self._readymap[connection] = ready
158 160 except KeyError: pass
159 161
160 162 def get_ready_conn(self, host):
161 163 conn = None
162 164 self._lock.acquire()
163 165 try:
164 166 if host in self._hostmap:
165 167 for c in self._hostmap[host]:
166 168 if self._readymap[c]:
167 169 self._readymap[c] = 0
168 170 conn = c
169 171 break
170 172 finally:
171 173 self._lock.release()
172 174 return conn
173 175
174 176 def get_all(self, host=None):
175 177 if host:
176 178 return list(self._hostmap.get(host, []))
177 179 else:
178 180 return dict(self._hostmap)
179 181
180 182 class KeepAliveHandler:
181 183 def __init__(self):
182 184 self._cm = ConnectionManager()
183 185
184 186 #### Connection Management
185 187 def open_connections(self):
186 188 """return a list of connected hosts and the number of connections
187 189 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
188 190 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
189 191
190 192 def close_connection(self, host):
191 193 """close connection(s) to <host>
192 194 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
193 195 no error occurs if there is no connection to that host."""
194 196 for h in self._cm.get_all(host):
195 197 self._cm.remove(h)
196 198 h.close()
197 199
198 200 def close_all(self):
199 201 """close all open connections"""
200 202 for host, conns in self._cm.get_all().iteritems():
201 203 for h in conns:
202 204 self._cm.remove(h)
203 205 h.close()
204 206
205 207 def _request_closed(self, request, host, connection):
206 208 """tells us that this request is now closed and the the
207 209 connection is ready for another request"""
208 210 self._cm.set_ready(connection, 1)
209 211
210 212 def _remove_connection(self, host, connection, close=0):
211 213 if close: connection.close()
212 214 self._cm.remove(connection)
213 215
214 216 #### Transaction Execution
215 217 def http_open(self, req):
216 218 return self.do_open(HTTPConnection, req)
217 219
218 220 def do_open(self, http_class, req):
219 221 host = req.get_host()
220 222 if not host:
221 223 raise urllib2.URLError('no host given')
222 224
223 225 try:
224 226 h = self._cm.get_ready_conn(host)
225 227 while h:
226 228 r = self._reuse_connection(h, req, host)
227 229
228 230 # if this response is non-None, then it worked and we're
229 231 # done. Break out, skipping the else block.
230 232 if r: break
231 233
232 234 # connection is bad - possibly closed by server
233 235 # discard it and ask for the next free connection
234 236 h.close()
235 237 self._cm.remove(h)
236 238 h = self._cm.get_ready_conn(host)
237 239 else:
238 240 # no (working) free connections were found. Create a new one.
239 241 h = http_class(host)
240 242 if DEBUG: DEBUG.info("creating new connection to %s (%d)",
241 243 host, id(h))
242 244 self._cm.add(host, h, 0)
243 245 self._start_transaction(h, req)
244 246 r = h.getresponse()
245 247 except (socket.error, httplib.HTTPException), err:
246 248 raise urllib2.URLError(err)
247 249
248 250 # if not a persistent connection, don't try to reuse it
249 251 if r.will_close: self._cm.remove(h)
250 252
251 253 if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason)
252 254 r._handler = self
253 255 r._host = host
254 256 r._url = req.get_full_url()
255 257 r._connection = h
256 258 r.code = r.status
257 259 r.headers = r.msg
258 260 r.msg = r.reason
259 261
260 262 if r.status == 200 or not HANDLE_ERRORS:
261 263 return r
262 264 else:
263 265 return self.parent.error('http', req, r,
264 266 r.status, r.msg, r.headers)
265 267
266 268 def _reuse_connection(self, h, req, host):
267 269 """start the transaction with a re-used connection
268 270 return a response object (r) upon success or None on failure.
269 271 This DOES not close or remove bad connections in cases where
270 272 it returns. However, if an unexpected exception occurs, it
271 273 will close and remove the connection before re-raising.
272 274 """
273 275 try:
274 276 self._start_transaction(h, req)
275 277 r = h.getresponse()
276 278 # note: just because we got something back doesn't mean it
277 279 # worked. We'll check the version below, too.
278 280 except (socket.error, httplib.HTTPException):
279 281 r = None
280 282 except:
281 283 # adding this block just in case we've missed
282 284 # something we will still raise the exception, but
283 285 # lets try and close the connection and remove it
284 286 # first. We previously got into a nasty loop
285 287 # where an exception was uncaught, and so the
286 288 # connection stayed open. On the next try, the
287 289 # same exception was raised, etc. The tradeoff is
288 290 # that it's now possible this call will raise
289 291 # a DIFFERENT exception
290 292 if DEBUG: DEBUG.error("unexpected exception - closing " + \
291 293 "connection to %s (%d)", host, id(h))
292 294 self._cm.remove(h)
293 295 h.close()
294 296 raise
295 297
296 298 if r is None or r.version == 9:
297 299 # httplib falls back to assuming HTTP 0.9 if it gets a
298 300 # bad header back. This is most likely to happen if
299 301 # the socket has been closed by the server since we
300 302 # last used the connection.
301 303 if DEBUG: DEBUG.info("failed to re-use connection to %s (%d)",
302 304 host, id(h))
303 305 r = None
304 306 else:
305 307 if DEBUG: DEBUG.info("re-using connection to %s (%d)", host, id(h))
306 308
307 309 return r
308 310
309 311 def _start_transaction(self, h, req):
310 312 # What follows mostly reimplements HTTPConnection.request()
311 313 # except it adds self.parent.addheaders in the mix.
312 314 headers = req.headers.copy()
313 315 if sys.version_info >= (2, 4):
314 316 headers.update(req.unredirected_hdrs)
315 317 headers.update(self.parent.addheaders)
316 318 headers = dict((n.lower(), v) for n,v in headers.items())
317 319 skipheaders = {}
318 320 for n in ('host', 'accept-encoding'):
319 321 if n in headers:
320 322 skipheaders['skip_' + n.replace('-', '_')] = 1
321 323 try:
322 324 if req.has_data():
323 325 data = req.get_data()
324 326 h.putrequest('POST', req.get_selector(), **skipheaders)
325 327 if 'content-type' not in headers:
326 328 h.putheader('Content-type',
327 329 'application/x-www-form-urlencoded')
328 330 if 'content-length' not in headers:
329 331 h.putheader('Content-length', '%d' % len(data))
330 332 else:
331 333 h.putrequest('GET', req.get_selector(), **skipheaders)
332 334 except (socket.error), err:
333 335 raise urllib2.URLError(err)
334 336 for k, v in headers.items():
335 337 h.putheader(k, v)
336 338 h.endheaders()
337 339 if req.has_data():
338 340 h.send(data)
339 341
340 342 class HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler):
341 343 pass
342 344
343 345 class HTTPResponse(httplib.HTTPResponse):
344 346 # we need to subclass HTTPResponse in order to
345 347 # 1) add readline() and readlines() methods
346 348 # 2) add close_connection() methods
347 349 # 3) add info() and geturl() methods
348 350
349 351 # in order to add readline(), read must be modified to deal with a
350 352 # buffer. example: readline must read a buffer and then spit back
351 353 # one line at a time. The only real alternative is to read one
352 354 # BYTE at a time (ick). Once something has been read, it can't be
353 355 # put back (ok, maybe it can, but that's even uglier than this),
354 356 # so if you THEN do a normal read, you must first take stuff from
355 357 # the buffer.
356 358
357 359 # the read method wraps the original to accomodate buffering,
358 360 # although read() never adds to the buffer.
359 361 # Both readline and readlines have been stolen with almost no
360 362 # modification from socket.py
361 363
362 364
363 365 def __init__(self, sock, debuglevel=0, strict=0, method=None):
364 366 if method: # the httplib in python 2.3 uses the method arg
365 367 httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
366 368 else: # 2.2 doesn't
367 369 httplib.HTTPResponse.__init__(self, sock, debuglevel)
368 370 self.fileno = sock.fileno
369 371 self.code = None
370 372 self._rbuf = ''
371 373 self._rbufsize = 8096
372 374 self._handler = None # inserted by the handler later
373 375 self._host = None # (same)
374 376 self._url = None # (same)
375 377 self._connection = None # (same)
376 378
377 379 _raw_read = httplib.HTTPResponse.read
378 380
379 381 def close(self):
380 382 if self.fp:
381 383 self.fp.close()
382 384 self.fp = None
383 385 if self._handler:
384 386 self._handler._request_closed(self, self._host,
385 387 self._connection)
386 388
387 389 def close_connection(self):
388 390 self._handler._remove_connection(self._host, self._connection, close=1)
389 391 self.close()
390 392
391 393 def info(self):
392 394 return self.headers
393 395
394 396 def geturl(self):
395 397 return self._url
396 398
397 399 def read(self, amt=None):
398 400 # the _rbuf test is only in this first if for speed. It's not
399 401 # logically necessary
400 402 if self._rbuf and not amt is None:
401 403 L = len(self._rbuf)
402 404 if amt > L:
403 405 amt -= L
404 406 else:
405 407 s = self._rbuf[:amt]
406 408 self._rbuf = self._rbuf[amt:]
407 409 return s
408 410
409 411 s = self._rbuf + self._raw_read(amt)
410 412 self._rbuf = ''
411 413 return s
412 414
413 415 # stolen from Python SVN #68532 to fix issue1088
414 416 def _read_chunked(self, amt):
415 417 chunk_left = self.chunk_left
416 418 value = ''
417 419
418 420 # XXX This accumulates chunks by repeated string concatenation,
419 421 # which is not efficient as the number or size of chunks gets big.
420 422 while True:
421 423 if chunk_left is None:
422 424 line = self.fp.readline()
423 425 i = line.find(';')
424 426 if i >= 0:
425 427 line = line[:i] # strip chunk-extensions
426 428 try:
427 429 chunk_left = int(line, 16)
428 430 except ValueError:
429 431 # close the connection as protocol synchronisation is
430 432 # probably lost
431 433 self.close()
432 434 raise httplib.IncompleteRead(value)
433 435 if chunk_left == 0:
434 436 break
435 437 if amt is None:
436 438 value += self._safe_read(chunk_left)
437 439 elif amt < chunk_left:
438 440 value += self._safe_read(amt)
439 441 self.chunk_left = chunk_left - amt
440 442 return value
441 443 elif amt == chunk_left:
442 444 value += self._safe_read(amt)
443 445 self._safe_read(2) # toss the CRLF at the end of the chunk
444 446 self.chunk_left = None
445 447 return value
446 448 else:
447 449 value += self._safe_read(chunk_left)
448 450 amt -= chunk_left
449 451
450 452 # we read the whole chunk, get another
451 453 self._safe_read(2) # toss the CRLF at the end of the chunk
452 454 chunk_left = None
453 455
454 456 # read and discard trailer up to the CRLF terminator
455 457 ### note: we shouldn't have any trailers!
456 458 while True:
457 459 line = self.fp.readline()
458 460 if not line:
459 461 # a vanishingly small number of sites EOF without
460 462 # sending the trailer
461 463 break
462 464 if line == '\r\n':
463 465 break
464 466
465 467 # we read everything; close the "file"
466 468 self.close()
467 469
468 470 return value
469 471
470 472 def readline(self, limit=-1):
471 473 i = self._rbuf.find('\n')
472 474 while i < 0 and not (0 < limit <= len(self._rbuf)):
473 475 new = self._raw_read(self._rbufsize)
474 476 if not new: break
475 477 i = new.find('\n')
476 478 if i >= 0: i = i + len(self._rbuf)
477 479 self._rbuf = self._rbuf + new
478 480 if i < 0: i = len(self._rbuf)
479 481 else: i = i+1
480 482 if 0 <= limit < len(self._rbuf): i = limit
481 483 data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
482 484 return data
483 485
484 486 def readlines(self, sizehint = 0):
485 487 total = 0
486 488 list = []
487 489 while 1:
488 490 line = self.readline()
489 491 if not line: break
490 492 list.append(line)
491 493 total += len(line)
492 494 if sizehint and total >= sizehint:
493 495 break
494 496 return list
495 497
496 498
497 499 class HTTPConnection(httplib.HTTPConnection):
498 500 # use the modified response class
499 501 response_class = HTTPResponse
500 502
501 503 #########################################################################
502 504 ##### TEST FUNCTIONS
503 505 #########################################################################
504 506
505 507 def error_handler(url):
506 508 global HANDLE_ERRORS
507 509 orig = HANDLE_ERRORS
508 510 keepalive_handler = HTTPHandler()
509 511 opener = urllib2.build_opener(keepalive_handler)
510 512 urllib2.install_opener(opener)
511 513 pos = {0: 'off', 1: 'on'}
512 514 for i in (0, 1):
513 515 print " fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i)
514 516 HANDLE_ERRORS = i
515 517 try:
516 518 fo = urllib2.urlopen(url)
517 519 fo.read()
518 520 fo.close()
519 521 try: status, reason = fo.status, fo.reason
520 522 except AttributeError: status, reason = None, None
521 523 except IOError, e:
522 524 print " EXCEPTION: %s" % e
523 525 raise
524 526 else:
525 527 print " status = %s, reason = %s" % (status, reason)
526 528 HANDLE_ERRORS = orig
527 529 hosts = keepalive_handler.open_connections()
528 530 print "open connections:", hosts
529 531 keepalive_handler.close_all()
530 532
533 def md5(s):
534 try:
535 from hashlib import md5 as _md5
536 except ImportError:
537 from md5 import md5 as _md5
538 global md5
539 md5 = _md5
540 return _md5(s)
541
531 542 def continuity(url):
532 from util import md5
533 543 format = '%25s: %s'
534 544
535 545 # first fetch the file with the normal http handler
536 546 opener = urllib2.build_opener()
537 547 urllib2.install_opener(opener)
538 548 fo = urllib2.urlopen(url)
539 549 foo = fo.read()
540 550 fo.close()
541 551 m = md5.new(foo)
542 552 print format % ('normal urllib', m.hexdigest())
543 553
544 554 # now install the keepalive handler and try again
545 555 opener = urllib2.build_opener(HTTPHandler())
546 556 urllib2.install_opener(opener)
547 557
548 558 fo = urllib2.urlopen(url)
549 559 foo = fo.read()
550 560 fo.close()
551 561 m = md5.new(foo)
552 562 print format % ('keepalive read', m.hexdigest())
553 563
554 564 fo = urllib2.urlopen(url)
555 565 foo = ''
556 566 while 1:
557 567 f = fo.readline()
558 568 if f: foo = foo + f
559 569 else: break
560 570 fo.close()
561 571 m = md5.new(foo)
562 572 print format % ('keepalive readline', m.hexdigest())
563 573
564 574 def comp(N, url):
565 575 print ' making %i connections to:\n %s' % (N, url)
566 576
567 577 sys.stdout.write(' first using the normal urllib handlers')
568 578 # first use normal opener
569 579 opener = urllib2.build_opener()
570 580 urllib2.install_opener(opener)
571 581 t1 = fetch(N, url)
572 582 print ' TIME: %.3f s' % t1
573 583
574 584 sys.stdout.write(' now using the keepalive handler ')
575 585 # now install the keepalive handler and try again
576 586 opener = urllib2.build_opener(HTTPHandler())
577 587 urllib2.install_opener(opener)
578 588 t2 = fetch(N, url)
579 589 print ' TIME: %.3f s' % t2
580 590 print ' improvement factor: %.2f' % (t1/t2, )
581 591
582 592 def fetch(N, url, delay=0):
583 593 import time
584 594 lens = []
585 595 starttime = time.time()
586 596 for i in range(N):
587 597 if delay and i > 0: time.sleep(delay)
588 598 fo = urllib2.urlopen(url)
589 599 foo = fo.read()
590 600 fo.close()
591 601 lens.append(len(foo))
592 602 diff = time.time() - starttime
593 603
594 604 j = 0
595 605 for i in lens[1:]:
596 606 j = j + 1
597 607 if not i == lens[0]:
598 608 print "WARNING: inconsistent length on read %i: %i" % (j, i)
599 609
600 610 return diff
601 611
602 612 def test_timeout(url):
603 613 global DEBUG
604 614 dbbackup = DEBUG
605 615 class FakeLogger:
606 616 def debug(self, msg, *args): print msg % args
607 617 info = warning = error = debug
608 618 DEBUG = FakeLogger()
609 619 print " fetching the file to establish a connection"
610 620 fo = urllib2.urlopen(url)
611 621 data1 = fo.read()
612 622 fo.close()
613 623
614 624 i = 20
615 625 print " waiting %i seconds for the server to close the connection" % i
616 626 while i > 0:
617 627 sys.stdout.write('\r %2i' % i)
618 628 sys.stdout.flush()
619 629 time.sleep(1)
620 630 i -= 1
621 631 sys.stderr.write('\r')
622 632
623 633 print " fetching the file a second time"
624 634 fo = urllib2.urlopen(url)
625 635 data2 = fo.read()
626 636 fo.close()
627 637
628 638 if data1 == data2:
629 639 print ' data are identical'
630 640 else:
631 641 print ' ERROR: DATA DIFFER'
632 642
633 643 DEBUG = dbbackup
634 644
635 645
636 646 def test(url, N=10):
637 647 print "checking error hander (do this on a non-200)"
638 648 try: error_handler(url)
639 649 except IOError:
640 650 print "exiting - exception will prevent further tests"
641 651 sys.exit()
642 652 print
643 653 print "performing continuity test (making sure stuff isn't corrupted)"
644 654 continuity(url)
645 655 print
646 656 print "performing speed comparison"
647 657 comp(N, url)
648 658 print
649 659 print "performing dropped-connection check"
650 660 test_timeout(url)
651 661
652 662 if __name__ == '__main__':
653 663 import time
654 664 import sys
655 665 try:
656 666 N = int(sys.argv[1])
657 667 url = sys.argv[2]
658 668 except:
659 669 print "%s <integer> <url>" % sys.argv[0]
660 670 else:
661 671 test(url, N)
@@ -1,1481 +1,1471 b''
1 1 # util.py - Mercurial utility functions and platform specfic implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2, incorporated herein by reference.
9 9
10 10 """Mercurial utility functions and platform specfic implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from i18n import _
17 17 import cStringIO, errno, re, shutil, sys, tempfile, traceback, error
18 18 import os, stat, threading, time, calendar, glob, osutil, random
19 19 import imp
20 20
21 21 # Python compatibility
22 22
23 def md5(s):
24 try:
25 import hashlib
26 _md5 = hashlib.md5
27 except ImportError:
28 from md5 import md5 as _md5
29 global md5
30 md5 = _md5
31 return _md5(s)
32
33 23 def sha1(s):
34 24 try:
35 25 import hashlib
36 26 _sha1 = hashlib.sha1
37 27 except ImportError:
38 28 from sha import sha as _sha1
39 29 global sha1
40 30 sha1 = _sha1
41 31 return _sha1(s)
42 32
43 33 import subprocess
44 34 closefds = os.name == 'posix'
45 35 def popen2(cmd, mode='t', bufsize=-1):
46 36 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
47 37 close_fds=closefds,
48 38 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
49 39 return p.stdin, p.stdout
50 40 def popen3(cmd, mode='t', bufsize=-1):
51 41 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
52 42 close_fds=closefds,
53 43 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
54 44 stderr=subprocess.PIPE)
55 45 return p.stdin, p.stdout, p.stderr
56 46 def Popen3(cmd, capturestderr=False, bufsize=-1):
57 47 stderr = capturestderr and subprocess.PIPE or None
58 48 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
59 49 close_fds=closefds,
60 50 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
61 51 stderr=stderr)
62 52 p.fromchild = p.stdout
63 53 p.tochild = p.stdin
64 54 p.childerr = p.stderr
65 55 return p
66 56
67 57 def version():
68 58 """Return version information if available."""
69 59 try:
70 60 import __version__
71 61 return __version__.version
72 62 except ImportError:
73 63 return 'unknown'
74 64
75 65 # used by parsedate
76 66 defaultdateformats = (
77 67 '%Y-%m-%d %H:%M:%S',
78 68 '%Y-%m-%d %I:%M:%S%p',
79 69 '%Y-%m-%d %H:%M',
80 70 '%Y-%m-%d %I:%M%p',
81 71 '%Y-%m-%d',
82 72 '%m-%d',
83 73 '%m/%d',
84 74 '%m/%d/%y',
85 75 '%m/%d/%Y',
86 76 '%a %b %d %H:%M:%S %Y',
87 77 '%a %b %d %I:%M:%S%p %Y',
88 78 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
89 79 '%b %d %H:%M:%S %Y',
90 80 '%b %d %I:%M:%S%p %Y',
91 81 '%b %d %H:%M:%S',
92 82 '%b %d %I:%M:%S%p',
93 83 '%b %d %H:%M',
94 84 '%b %d %I:%M%p',
95 85 '%b %d %Y',
96 86 '%b %d',
97 87 '%H:%M:%S',
98 88 '%I:%M:%SP',
99 89 '%H:%M',
100 90 '%I:%M%p',
101 91 )
102 92
103 93 extendeddateformats = defaultdateformats + (
104 94 "%Y",
105 95 "%Y-%m",
106 96 "%b",
107 97 "%b %Y",
108 98 )
109 99
110 100 def cachefunc(func):
111 101 '''cache the result of function calls'''
112 102 # XXX doesn't handle keywords args
113 103 cache = {}
114 104 if func.func_code.co_argcount == 1:
115 105 # we gain a small amount of time because
116 106 # we don't need to pack/unpack the list
117 107 def f(arg):
118 108 if arg not in cache:
119 109 cache[arg] = func(arg)
120 110 return cache[arg]
121 111 else:
122 112 def f(*args):
123 113 if args not in cache:
124 114 cache[args] = func(*args)
125 115 return cache[args]
126 116
127 117 return f
128 118
129 119 class propertycache(object):
130 120 def __init__(self, func):
131 121 self.func = func
132 122 self.name = func.__name__
133 123 def __get__(self, obj, type=None):
134 124 result = self.func(obj)
135 125 setattr(obj, self.name, result)
136 126 return result
137 127
138 128 def pipefilter(s, cmd):
139 129 '''filter string S through command CMD, returning its output'''
140 130 (pin, pout) = popen2(cmd, 'b')
141 131 def writer():
142 132 try:
143 133 pin.write(s)
144 134 pin.close()
145 135 except IOError, inst:
146 136 if inst.errno != errno.EPIPE:
147 137 raise
148 138
149 139 # we should use select instead on UNIX, but this will work on most
150 140 # systems, including Windows
151 141 w = threading.Thread(target=writer)
152 142 w.start()
153 143 f = pout.read()
154 144 pout.close()
155 145 w.join()
156 146 return f
157 147
158 148 def tempfilter(s, cmd):
159 149 '''filter string S through a pair of temporary files with CMD.
160 150 CMD is used as a template to create the real command to be run,
161 151 with the strings INFILE and OUTFILE replaced by the real names of
162 152 the temporary files generated.'''
163 153 inname, outname = None, None
164 154 try:
165 155 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
166 156 fp = os.fdopen(infd, 'wb')
167 157 fp.write(s)
168 158 fp.close()
169 159 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
170 160 os.close(outfd)
171 161 cmd = cmd.replace('INFILE', inname)
172 162 cmd = cmd.replace('OUTFILE', outname)
173 163 code = os.system(cmd)
174 164 if sys.platform == 'OpenVMS' and code & 1:
175 165 code = 0
176 166 if code: raise Abort(_("command '%s' failed: %s") %
177 167 (cmd, explain_exit(code)))
178 168 return open(outname, 'rb').read()
179 169 finally:
180 170 try:
181 171 if inname: os.unlink(inname)
182 172 except: pass
183 173 try:
184 174 if outname: os.unlink(outname)
185 175 except: pass
186 176
187 177 filtertable = {
188 178 'tempfile:': tempfilter,
189 179 'pipe:': pipefilter,
190 180 }
191 181
192 182 def filter(s, cmd):
193 183 "filter a string through a command that transforms its input to its output"
194 184 for name, fn in filtertable.iteritems():
195 185 if cmd.startswith(name):
196 186 return fn(s, cmd[len(name):].lstrip())
197 187 return pipefilter(s, cmd)
198 188
199 189 def binary(s):
200 190 """return true if a string is binary data"""
201 191 return bool(s and '\0' in s)
202 192
203 193 def increasingchunks(source, min=1024, max=65536):
204 194 '''return no less than min bytes per chunk while data remains,
205 195 doubling min after each chunk until it reaches max'''
206 196 def log2(x):
207 197 if not x:
208 198 return 0
209 199 i = 0
210 200 while x:
211 201 x >>= 1
212 202 i += 1
213 203 return i - 1
214 204
215 205 buf = []
216 206 blen = 0
217 207 for chunk in source:
218 208 buf.append(chunk)
219 209 blen += len(chunk)
220 210 if blen >= min:
221 211 if min < max:
222 212 min = min << 1
223 213 nmin = 1 << log2(blen)
224 214 if nmin > min:
225 215 min = nmin
226 216 if min > max:
227 217 min = max
228 218 yield ''.join(buf)
229 219 blen = 0
230 220 buf = []
231 221 if buf:
232 222 yield ''.join(buf)
233 223
234 224 Abort = error.Abort
235 225
236 226 def always(fn): return True
237 227 def never(fn): return False
238 228
239 229 def patkind(name, default):
240 230 """Split a string into an optional pattern kind prefix and the
241 231 actual pattern."""
242 232 for prefix in 're', 'glob', 'path', 'relglob', 'relpath', 'relre':
243 233 if name.startswith(prefix + ':'): return name.split(':', 1)
244 234 return default, name
245 235
246 236 def globre(pat, head='^', tail='$'):
247 237 "convert a glob pattern into a regexp"
248 238 i, n = 0, len(pat)
249 239 res = ''
250 240 group = 0
251 241 def peek(): return i < n and pat[i]
252 242 while i < n:
253 243 c = pat[i]
254 244 i = i+1
255 245 if c == '*':
256 246 if peek() == '*':
257 247 i += 1
258 248 res += '.*'
259 249 else:
260 250 res += '[^/]*'
261 251 elif c == '?':
262 252 res += '.'
263 253 elif c == '[':
264 254 j = i
265 255 if j < n and pat[j] in '!]':
266 256 j += 1
267 257 while j < n and pat[j] != ']':
268 258 j += 1
269 259 if j >= n:
270 260 res += '\\['
271 261 else:
272 262 stuff = pat[i:j].replace('\\','\\\\')
273 263 i = j + 1
274 264 if stuff[0] == '!':
275 265 stuff = '^' + stuff[1:]
276 266 elif stuff[0] == '^':
277 267 stuff = '\\' + stuff
278 268 res = '%s[%s]' % (res, stuff)
279 269 elif c == '{':
280 270 group += 1
281 271 res += '(?:'
282 272 elif c == '}' and group:
283 273 res += ')'
284 274 group -= 1
285 275 elif c == ',' and group:
286 276 res += '|'
287 277 elif c == '\\':
288 278 p = peek()
289 279 if p:
290 280 i += 1
291 281 res += re.escape(p)
292 282 else:
293 283 res += re.escape(c)
294 284 else:
295 285 res += re.escape(c)
296 286 return head + res + tail
297 287
298 288 _globchars = {'[': 1, '{': 1, '*': 1, '?': 1}
299 289
300 290 def pathto(root, n1, n2):
301 291 '''return the relative path from one place to another.
302 292 root should use os.sep to separate directories
303 293 n1 should use os.sep to separate directories
304 294 n2 should use "/" to separate directories
305 295 returns an os.sep-separated path.
306 296
307 297 If n1 is a relative path, it's assumed it's
308 298 relative to root.
309 299 n2 should always be relative to root.
310 300 '''
311 301 if not n1: return localpath(n2)
312 302 if os.path.isabs(n1):
313 303 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
314 304 return os.path.join(root, localpath(n2))
315 305 n2 = '/'.join((pconvert(root), n2))
316 306 a, b = splitpath(n1), n2.split('/')
317 307 a.reverse()
318 308 b.reverse()
319 309 while a and b and a[-1] == b[-1]:
320 310 a.pop()
321 311 b.pop()
322 312 b.reverse()
323 313 return os.sep.join((['..'] * len(a)) + b) or '.'
324 314
325 315 def canonpath(root, cwd, myname):
326 316 """return the canonical path of myname, given cwd and root"""
327 317 if root == os.sep:
328 318 rootsep = os.sep
329 319 elif endswithsep(root):
330 320 rootsep = root
331 321 else:
332 322 rootsep = root + os.sep
333 323 name = myname
334 324 if not os.path.isabs(name):
335 325 name = os.path.join(root, cwd, name)
336 326 name = os.path.normpath(name)
337 327 audit_path = path_auditor(root)
338 328 if name != rootsep and name.startswith(rootsep):
339 329 name = name[len(rootsep):]
340 330 audit_path(name)
341 331 return pconvert(name)
342 332 elif name == root:
343 333 return ''
344 334 else:
345 335 # Determine whether `name' is in the hierarchy at or beneath `root',
346 336 # by iterating name=dirname(name) until that causes no change (can't
347 337 # check name == '/', because that doesn't work on windows). For each
348 338 # `name', compare dev/inode numbers. If they match, the list `rel'
349 339 # holds the reversed list of components making up the relative file
350 340 # name we want.
351 341 root_st = os.stat(root)
352 342 rel = []
353 343 while True:
354 344 try:
355 345 name_st = os.stat(name)
356 346 except OSError:
357 347 break
358 348 if samestat(name_st, root_st):
359 349 if not rel:
360 350 # name was actually the same as root (maybe a symlink)
361 351 return ''
362 352 rel.reverse()
363 353 name = os.path.join(*rel)
364 354 audit_path(name)
365 355 return pconvert(name)
366 356 dirname, basename = os.path.split(name)
367 357 rel.append(basename)
368 358 if dirname == name:
369 359 break
370 360 name = dirname
371 361
372 362 raise Abort('%s not under root' % myname)
373 363
374 364 def matcher(canonroot, cwd='', names=[], inc=[], exc=[], src=None, dflt_pat='glob'):
375 365 """build a function to match a set of file patterns
376 366
377 367 arguments:
378 368 canonroot - the canonical root of the tree you're matching against
379 369 cwd - the current working directory, if relevant
380 370 names - patterns to find
381 371 inc - patterns to include
382 372 exc - patterns to exclude
383 373 dflt_pat - if a pattern in names has no explicit type, assume this one
384 374 src - where these patterns came from (e.g. .hgignore)
385 375
386 376 a pattern is one of:
387 377 'glob:<glob>' - a glob relative to cwd
388 378 're:<regexp>' - a regular expression
389 379 'path:<path>' - a path relative to canonroot
390 380 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
391 381 'relpath:<path>' - a path relative to cwd
392 382 'relre:<regexp>' - a regexp that doesn't have to match the start of a name
393 383 '<something>' - one of the cases above, selected by the dflt_pat argument
394 384
395 385 returns:
396 386 a 3-tuple containing
397 387 - list of roots (places where one should start a recursive walk of the fs);
398 388 this often matches the explicit non-pattern names passed in, but also
399 389 includes the initial part of glob: patterns that has no glob characters
400 390 - a bool match(filename) function
401 391 - a bool indicating if any patterns were passed in
402 392 """
403 393
404 394 # a common case: no patterns at all
405 395 if not names and not inc and not exc:
406 396 return [], always, False
407 397
408 398 def contains_glob(name):
409 399 for c in name:
410 400 if c in _globchars: return True
411 401 return False
412 402
413 403 def regex(kind, name, tail):
414 404 '''convert a pattern into a regular expression'''
415 405 if not name:
416 406 return ''
417 407 if kind == 're':
418 408 return name
419 409 elif kind == 'path':
420 410 return '^' + re.escape(name) + '(?:/|$)'
421 411 elif kind == 'relglob':
422 412 return globre(name, '(?:|.*/)', tail)
423 413 elif kind == 'relpath':
424 414 return re.escape(name) + '(?:/|$)'
425 415 elif kind == 'relre':
426 416 if name.startswith('^'):
427 417 return name
428 418 return '.*' + name
429 419 return globre(name, '', tail)
430 420
431 421 def matchfn(pats, tail):
432 422 """build a matching function from a set of patterns"""
433 423 if not pats:
434 424 return
435 425 try:
436 426 pat = '(?:%s)' % '|'.join([regex(k, p, tail) for (k, p) in pats])
437 427 if len(pat) > 20000:
438 428 raise OverflowError()
439 429 return re.compile(pat).match
440 430 except OverflowError:
441 431 # We're using a Python with a tiny regex engine and we
442 432 # made it explode, so we'll divide the pattern list in two
443 433 # until it works
444 434 l = len(pats)
445 435 if l < 2:
446 436 raise
447 437 a, b = matchfn(pats[:l//2], tail), matchfn(pats[l//2:], tail)
448 438 return lambda s: a(s) or b(s)
449 439 except re.error:
450 440 for k, p in pats:
451 441 try:
452 442 re.compile('(?:%s)' % regex(k, p, tail))
453 443 except re.error:
454 444 if src:
455 445 raise Abort("%s: invalid pattern (%s): %s" %
456 446 (src, k, p))
457 447 else:
458 448 raise Abort("invalid pattern (%s): %s" % (k, p))
459 449 raise Abort("invalid pattern")
460 450
461 451 def globprefix(pat):
462 452 '''return the non-glob prefix of a path, e.g. foo/* -> foo'''
463 453 root = []
464 454 for p in pat.split('/'):
465 455 if contains_glob(p): break
466 456 root.append(p)
467 457 return '/'.join(root) or '.'
468 458
469 459 def normalizepats(names, default):
470 460 pats = []
471 461 roots = []
472 462 anypats = False
473 463 for kind, name in [patkind(p, default) for p in names]:
474 464 if kind in ('glob', 'relpath'):
475 465 name = canonpath(canonroot, cwd, name)
476 466 elif kind in ('relglob', 'path'):
477 467 name = normpath(name)
478 468
479 469 pats.append((kind, name))
480 470
481 471 if kind in ('glob', 're', 'relglob', 'relre'):
482 472 anypats = True
483 473
484 474 if kind == 'glob':
485 475 root = globprefix(name)
486 476 roots.append(root)
487 477 elif kind in ('relpath', 'path'):
488 478 roots.append(name or '.')
489 479 elif kind == 'relglob':
490 480 roots.append('.')
491 481 return roots, pats, anypats
492 482
493 483 roots, pats, anypats = normalizepats(names, dflt_pat)
494 484
495 485 patmatch = matchfn(pats, '$') or always
496 486 incmatch = always
497 487 if inc:
498 488 dummy, inckinds, dummy = normalizepats(inc, 'glob')
499 489 incmatch = matchfn(inckinds, '(?:/|$)')
500 490 excmatch = never
501 491 if exc:
502 492 dummy, exckinds, dummy = normalizepats(exc, 'glob')
503 493 excmatch = matchfn(exckinds, '(?:/|$)')
504 494
505 495 if not names and inc and not exc:
506 496 # common case: hgignore patterns
507 497 match = incmatch
508 498 else:
509 499 match = lambda fn: incmatch(fn) and not excmatch(fn) and patmatch(fn)
510 500
511 501 return (roots, match, (inc or exc or anypats) and True)
512 502
513 503 _hgexecutable = None
514 504
515 505 def main_is_frozen():
516 506 """return True if we are a frozen executable.
517 507
518 508 The code supports py2exe (most common, Windows only) and tools/freeze
519 509 (portable, not much used).
520 510 """
521 511 return (hasattr(sys, "frozen") or # new py2exe
522 512 hasattr(sys, "importers") or # old py2exe
523 513 imp.is_frozen("__main__")) # tools/freeze
524 514
525 515 def hgexecutable():
526 516 """return location of the 'hg' executable.
527 517
528 518 Defaults to $HG or 'hg' in the search path.
529 519 """
530 520 if _hgexecutable is None:
531 521 hg = os.environ.get('HG')
532 522 if hg:
533 523 set_hgexecutable(hg)
534 524 elif main_is_frozen():
535 525 set_hgexecutable(sys.executable)
536 526 else:
537 527 set_hgexecutable(find_exe('hg') or 'hg')
538 528 return _hgexecutable
539 529
540 530 def set_hgexecutable(path):
541 531 """set location of the 'hg' executable"""
542 532 global _hgexecutable
543 533 _hgexecutable = path
544 534
545 535 def system(cmd, environ={}, cwd=None, onerr=None, errprefix=None):
546 536 '''enhanced shell command execution.
547 537 run with environment maybe modified, maybe in different dir.
548 538
549 539 if command fails and onerr is None, return status. if ui object,
550 540 print error message and return status, else raise onerr object as
551 541 exception.'''
552 542 def py2shell(val):
553 543 'convert python object into string that is useful to shell'
554 544 if val in (None, False):
555 545 return '0'
556 546 if val == True:
557 547 return '1'
558 548 return str(val)
559 549 oldenv = {}
560 550 for k in environ:
561 551 oldenv[k] = os.environ.get(k)
562 552 if cwd is not None:
563 553 oldcwd = os.getcwd()
564 554 origcmd = cmd
565 555 if os.name == 'nt':
566 556 cmd = '"%s"' % cmd
567 557 try:
568 558 for k, v in environ.iteritems():
569 559 os.environ[k] = py2shell(v)
570 560 os.environ['HG'] = hgexecutable()
571 561 if cwd is not None and oldcwd != cwd:
572 562 os.chdir(cwd)
573 563 rc = os.system(cmd)
574 564 if sys.platform == 'OpenVMS' and rc & 1:
575 565 rc = 0
576 566 if rc and onerr:
577 567 errmsg = '%s %s' % (os.path.basename(origcmd.split(None, 1)[0]),
578 568 explain_exit(rc)[0])
579 569 if errprefix:
580 570 errmsg = '%s: %s' % (errprefix, errmsg)
581 571 try:
582 572 onerr.warn(errmsg + '\n')
583 573 except AttributeError:
584 574 raise onerr(errmsg)
585 575 return rc
586 576 finally:
587 577 for k, v in oldenv.iteritems():
588 578 if v is None:
589 579 del os.environ[k]
590 580 else:
591 581 os.environ[k] = v
592 582 if cwd is not None and oldcwd != cwd:
593 583 os.chdir(oldcwd)
594 584
595 585 def checksignature(func):
596 586 '''wrap a function with code to check for calling errors'''
597 587 def check(*args, **kwargs):
598 588 try:
599 589 return func(*args, **kwargs)
600 590 except TypeError:
601 591 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
602 592 raise error.SignatureError
603 593 raise
604 594
605 595 return check
606 596
607 597 # os.path.lexists is not available on python2.3
608 598 def lexists(filename):
609 599 "test whether a file with this name exists. does not follow symlinks"
610 600 try:
611 601 os.lstat(filename)
612 602 except:
613 603 return False
614 604 return True
615 605
616 606 def rename(src, dst):
617 607 """forcibly rename a file"""
618 608 try:
619 609 os.rename(src, dst)
620 610 except OSError, err: # FIXME: check err (EEXIST ?)
621 611
622 612 # On windows, rename to existing file is not allowed, so we
623 613 # must delete destination first. But if a file is open, unlink
624 614 # schedules it for delete but does not delete it. Rename
625 615 # happens immediately even for open files, so we rename
626 616 # destination to a temporary name, then delete that. Then
627 617 # rename is safe to do.
628 618 # The temporary name is chosen at random to avoid the situation
629 619 # where a file is left lying around from a previous aborted run.
630 620 # The usual race condition this introduces can't be avoided as
631 621 # we need the name to rename into, and not the file itself. Due
632 622 # to the nature of the operation however, any races will at worst
633 623 # lead to the rename failing and the current operation aborting.
634 624
635 625 def tempname(prefix):
636 626 for tries in xrange(10):
637 627 temp = '%s-%08x' % (prefix, random.randint(0, 0xffffffff))
638 628 if not os.path.exists(temp):
639 629 return temp
640 630 raise IOError, (errno.EEXIST, "No usable temporary filename found")
641 631
642 632 temp = tempname(dst)
643 633 os.rename(dst, temp)
644 634 os.unlink(temp)
645 635 os.rename(src, dst)
646 636
647 637 def unlink(f):
648 638 """unlink and remove the directory if it is empty"""
649 639 os.unlink(f)
650 640 # try removing directories that might now be empty
651 641 try:
652 642 os.removedirs(os.path.dirname(f))
653 643 except OSError:
654 644 pass
655 645
656 646 def copyfile(src, dest):
657 647 "copy a file, preserving mode and atime/mtime"
658 648 if os.path.islink(src):
659 649 try:
660 650 os.unlink(dest)
661 651 except:
662 652 pass
663 653 os.symlink(os.readlink(src), dest)
664 654 else:
665 655 try:
666 656 shutil.copyfile(src, dest)
667 657 shutil.copystat(src, dest)
668 658 except shutil.Error, inst:
669 659 raise Abort(str(inst))
670 660
671 661 def copyfiles(src, dst, hardlink=None):
672 662 """Copy a directory tree using hardlinks if possible"""
673 663
674 664 if hardlink is None:
675 665 hardlink = (os.stat(src).st_dev ==
676 666 os.stat(os.path.dirname(dst)).st_dev)
677 667
678 668 if os.path.isdir(src):
679 669 os.mkdir(dst)
680 670 for name, kind in osutil.listdir(src):
681 671 srcname = os.path.join(src, name)
682 672 dstname = os.path.join(dst, name)
683 673 copyfiles(srcname, dstname, hardlink)
684 674 else:
685 675 if hardlink:
686 676 try:
687 677 os_link(src, dst)
688 678 except (IOError, OSError):
689 679 hardlink = False
690 680 shutil.copy(src, dst)
691 681 else:
692 682 shutil.copy(src, dst)
693 683
694 684 class path_auditor(object):
695 685 '''ensure that a filesystem path contains no banned components.
696 686 the following properties of a path are checked:
697 687
698 688 - under top-level .hg
699 689 - starts at the root of a windows drive
700 690 - contains ".."
701 691 - traverses a symlink (e.g. a/symlink_here/b)
702 692 - inside a nested repository'''
703 693
704 694 def __init__(self, root):
705 695 self.audited = set()
706 696 self.auditeddir = set()
707 697 self.root = root
708 698
709 699 def __call__(self, path):
710 700 if path in self.audited:
711 701 return
712 702 normpath = os.path.normcase(path)
713 703 parts = splitpath(normpath)
714 704 if (os.path.splitdrive(path)[0]
715 705 or parts[0].lower() in ('.hg', '.hg.', '')
716 706 or os.pardir in parts):
717 707 raise Abort(_("path contains illegal component: %s") % path)
718 708 if '.hg' in path.lower():
719 709 lparts = [p.lower() for p in parts]
720 710 for p in '.hg', '.hg.':
721 711 if p in lparts[1:]:
722 712 pos = lparts.index(p)
723 713 base = os.path.join(*parts[:pos])
724 714 raise Abort(_('path %r is inside repo %r') % (path, base))
725 715 def check(prefix):
726 716 curpath = os.path.join(self.root, prefix)
727 717 try:
728 718 st = os.lstat(curpath)
729 719 except OSError, err:
730 720 # EINVAL can be raised as invalid path syntax under win32.
731 721 # They must be ignored for patterns can be checked too.
732 722 if err.errno not in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
733 723 raise
734 724 else:
735 725 if stat.S_ISLNK(st.st_mode):
736 726 raise Abort(_('path %r traverses symbolic link %r') %
737 727 (path, prefix))
738 728 elif (stat.S_ISDIR(st.st_mode) and
739 729 os.path.isdir(os.path.join(curpath, '.hg'))):
740 730 raise Abort(_('path %r is inside repo %r') %
741 731 (path, prefix))
742 732 parts.pop()
743 733 prefixes = []
744 734 for n in range(len(parts)):
745 735 prefix = os.sep.join(parts)
746 736 if prefix in self.auditeddir:
747 737 break
748 738 check(prefix)
749 739 prefixes.append(prefix)
750 740 parts.pop()
751 741
752 742 self.audited.add(path)
753 743 # only add prefixes to the cache after checking everything: we don't
754 744 # want to add "foo/bar/baz" before checking if there's a "foo/.hg"
755 745 self.auditeddir.update(prefixes)
756 746
757 747 def nlinks(pathname):
758 748 """Return number of hardlinks for the given file."""
759 749 return os.lstat(pathname).st_nlink
760 750
761 751 if hasattr(os, 'link'):
762 752 os_link = os.link
763 753 else:
764 754 def os_link(src, dst):
765 755 raise OSError(0, _("Hardlinks not supported"))
766 756
767 757 def lookup_reg(key, name=None, scope=None):
768 758 return None
769 759
770 760 if os.name == 'nt':
771 761 from windows import *
772 762 def expand_glob(pats):
773 763 '''On Windows, expand the implicit globs in a list of patterns'''
774 764 ret = []
775 765 for p in pats:
776 766 kind, name = patkind(p, None)
777 767 if kind is None:
778 768 globbed = glob.glob(name)
779 769 if globbed:
780 770 ret.extend(globbed)
781 771 continue
782 772 # if we couldn't expand the glob, just keep it around
783 773 ret.append(p)
784 774 return ret
785 775 else:
786 776 from posix import *
787 777
788 778 def makelock(info, pathname):
789 779 try:
790 780 return os.symlink(info, pathname)
791 781 except OSError, why:
792 782 if why.errno == errno.EEXIST:
793 783 raise
794 784 except AttributeError: # no symlink in os
795 785 pass
796 786
797 787 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
798 788 os.write(ld, info)
799 789 os.close(ld)
800 790
801 791 def readlock(pathname):
802 792 try:
803 793 return os.readlink(pathname)
804 794 except OSError, why:
805 795 if why.errno not in (errno.EINVAL, errno.ENOSYS):
806 796 raise
807 797 except AttributeError: # no symlink in os
808 798 pass
809 799 return posixfile(pathname).read()
810 800
811 801 def fstat(fp):
812 802 '''stat file object that may not have fileno method.'''
813 803 try:
814 804 return os.fstat(fp.fileno())
815 805 except AttributeError:
816 806 return os.stat(fp.name)
817 807
818 808 # File system features
819 809
820 810 def checkcase(path):
821 811 """
822 812 Check whether the given path is on a case-sensitive filesystem
823 813
824 814 Requires a path (like /foo/.hg) ending with a foldable final
825 815 directory component.
826 816 """
827 817 s1 = os.stat(path)
828 818 d, b = os.path.split(path)
829 819 p2 = os.path.join(d, b.upper())
830 820 if path == p2:
831 821 p2 = os.path.join(d, b.lower())
832 822 try:
833 823 s2 = os.stat(p2)
834 824 if s2 == s1:
835 825 return False
836 826 return True
837 827 except:
838 828 return True
839 829
840 830 _fspathcache = {}
841 831 def fspath(name, root):
842 832 '''Get name in the case stored in the filesystem
843 833
844 834 The name is either relative to root, or it is an absolute path starting
845 835 with root. Note that this function is unnecessary, and should not be
846 836 called, for case-sensitive filesystems (simply because it's expensive).
847 837 '''
848 838 # If name is absolute, make it relative
849 839 if name.lower().startswith(root.lower()):
850 840 l = len(root)
851 841 if name[l] == os.sep or name[l] == os.altsep:
852 842 l = l + 1
853 843 name = name[l:]
854 844
855 845 if not os.path.exists(os.path.join(root, name)):
856 846 return None
857 847
858 848 seps = os.sep
859 849 if os.altsep:
860 850 seps = seps + os.altsep
861 851 # Protect backslashes. This gets silly very quickly.
862 852 seps.replace('\\','\\\\')
863 853 pattern = re.compile(r'([^%s]+)|([%s]+)' % (seps, seps))
864 854 dir = os.path.normcase(os.path.normpath(root))
865 855 result = []
866 856 for part, sep in pattern.findall(name):
867 857 if sep:
868 858 result.append(sep)
869 859 continue
870 860
871 861 if dir not in _fspathcache:
872 862 _fspathcache[dir] = os.listdir(dir)
873 863 contents = _fspathcache[dir]
874 864
875 865 lpart = part.lower()
876 866 for n in contents:
877 867 if n.lower() == lpart:
878 868 result.append(n)
879 869 break
880 870 else:
881 871 # Cannot happen, as the file exists!
882 872 result.append(part)
883 873 dir = os.path.join(dir, lpart)
884 874
885 875 return ''.join(result)
886 876
887 877 def checkexec(path):
888 878 """
889 879 Check whether the given path is on a filesystem with UNIX-like exec flags
890 880
891 881 Requires a directory (like /foo/.hg)
892 882 """
893 883
894 884 # VFAT on some Linux versions can flip mode but it doesn't persist
895 885 # a FS remount. Frequently we can detect it if files are created
896 886 # with exec bit on.
897 887
898 888 try:
899 889 EXECFLAGS = stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
900 890 fh, fn = tempfile.mkstemp("", "", path)
901 891 try:
902 892 os.close(fh)
903 893 m = os.stat(fn).st_mode & 0777
904 894 new_file_has_exec = m & EXECFLAGS
905 895 os.chmod(fn, m ^ EXECFLAGS)
906 896 exec_flags_cannot_flip = ((os.stat(fn).st_mode & 0777) == m)
907 897 finally:
908 898 os.unlink(fn)
909 899 except (IOError, OSError):
910 900 # we don't care, the user probably won't be able to commit anyway
911 901 return False
912 902 return not (new_file_has_exec or exec_flags_cannot_flip)
913 903
914 904 def checklink(path):
915 905 """check whether the given path is on a symlink-capable filesystem"""
916 906 # mktemp is not racy because symlink creation will fail if the
917 907 # file already exists
918 908 name = tempfile.mktemp(dir=path)
919 909 try:
920 910 os.symlink(".", name)
921 911 os.unlink(name)
922 912 return True
923 913 except (OSError, AttributeError):
924 914 return False
925 915
926 916 def needbinarypatch():
927 917 """return True if patches should be applied in binary mode by default."""
928 918 return os.name == 'nt'
929 919
930 920 def endswithsep(path):
931 921 '''Check path ends with os.sep or os.altsep.'''
932 922 return path.endswith(os.sep) or os.altsep and path.endswith(os.altsep)
933 923
934 924 def splitpath(path):
935 925 '''Split path by os.sep.
936 926 Note that this function does not use os.altsep because this is
937 927 an alternative of simple "xxx.split(os.sep)".
938 928 It is recommended to use os.path.normpath() before using this
939 929 function if need.'''
940 930 return path.split(os.sep)
941 931
942 932 def gui():
943 933 '''Are we running in a GUI?'''
944 934 return os.name == "nt" or os.name == "mac" or os.environ.get("DISPLAY")
945 935
946 936 def mktempcopy(name, emptyok=False, createmode=None):
947 937 """Create a temporary file with the same contents from name
948 938
949 939 The permission bits are copied from the original file.
950 940
951 941 If the temporary file is going to be truncated immediately, you
952 942 can use emptyok=True as an optimization.
953 943
954 944 Returns the name of the temporary file.
955 945 """
956 946 d, fn = os.path.split(name)
957 947 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
958 948 os.close(fd)
959 949 # Temporary files are created with mode 0600, which is usually not
960 950 # what we want. If the original file already exists, just copy
961 951 # its mode. Otherwise, manually obey umask.
962 952 try:
963 953 st_mode = os.lstat(name).st_mode & 0777
964 954 except OSError, inst:
965 955 if inst.errno != errno.ENOENT:
966 956 raise
967 957 st_mode = createmode
968 958 if st_mode is None:
969 959 st_mode = ~umask
970 960 st_mode &= 0666
971 961 os.chmod(temp, st_mode)
972 962 if emptyok:
973 963 return temp
974 964 try:
975 965 try:
976 966 ifp = posixfile(name, "rb")
977 967 except IOError, inst:
978 968 if inst.errno == errno.ENOENT:
979 969 return temp
980 970 if not getattr(inst, 'filename', None):
981 971 inst.filename = name
982 972 raise
983 973 ofp = posixfile(temp, "wb")
984 974 for chunk in filechunkiter(ifp):
985 975 ofp.write(chunk)
986 976 ifp.close()
987 977 ofp.close()
988 978 except:
989 979 try: os.unlink(temp)
990 980 except: pass
991 981 raise
992 982 return temp
993 983
994 984 class atomictempfile(posixfile):
995 985 """file-like object that atomically updates a file
996 986
997 987 All writes will be redirected to a temporary copy of the original
998 988 file. When rename is called, the copy is renamed to the original
999 989 name, making the changes visible.
1000 990 """
1001 991 def __init__(self, name, mode, createmode):
1002 992 self.__name = name
1003 993 self.temp = mktempcopy(name, emptyok=('w' in mode),
1004 994 createmode=createmode)
1005 995 posixfile.__init__(self, self.temp, mode)
1006 996
1007 997 def rename(self):
1008 998 if not self.closed:
1009 999 posixfile.close(self)
1010 1000 rename(self.temp, localpath(self.__name))
1011 1001
1012 1002 def __del__(self):
1013 1003 if not self.closed:
1014 1004 try:
1015 1005 os.unlink(self.temp)
1016 1006 except: pass
1017 1007 posixfile.close(self)
1018 1008
1019 1009 def makedirs(name, mode=None):
1020 1010 """recursive directory creation with parent mode inheritance"""
1021 1011 try:
1022 1012 os.mkdir(name)
1023 1013 if mode is not None:
1024 1014 os.chmod(name, mode)
1025 1015 return
1026 1016 except OSError, err:
1027 1017 if err.errno == errno.EEXIST:
1028 1018 return
1029 1019 if err.errno != errno.ENOENT:
1030 1020 raise
1031 1021 parent = os.path.abspath(os.path.dirname(name))
1032 1022 makedirs(parent, mode)
1033 1023 makedirs(name, mode)
1034 1024
1035 1025 class opener(object):
1036 1026 """Open files relative to a base directory
1037 1027
1038 1028 This class is used to hide the details of COW semantics and
1039 1029 remote file access from higher level code.
1040 1030 """
1041 1031 def __init__(self, base, audit=True):
1042 1032 self.base = base
1043 1033 if audit:
1044 1034 self.audit_path = path_auditor(base)
1045 1035 else:
1046 1036 self.audit_path = always
1047 1037 self.createmode = None
1048 1038
1049 1039 def __getattr__(self, name):
1050 1040 if name == '_can_symlink':
1051 1041 self._can_symlink = checklink(self.base)
1052 1042 return self._can_symlink
1053 1043 raise AttributeError(name)
1054 1044
1055 1045 def _fixfilemode(self, name):
1056 1046 if self.createmode is None:
1057 1047 return
1058 1048 os.chmod(name, self.createmode & 0666)
1059 1049
1060 1050 def __call__(self, path, mode="r", text=False, atomictemp=False):
1061 1051 self.audit_path(path)
1062 1052 f = os.path.join(self.base, path)
1063 1053
1064 1054 if not text and "b" not in mode:
1065 1055 mode += "b" # for that other OS
1066 1056
1067 1057 nlink = -1
1068 1058 if mode not in ("r", "rb"):
1069 1059 try:
1070 1060 nlink = nlinks(f)
1071 1061 except OSError:
1072 1062 nlink = 0
1073 1063 d = os.path.dirname(f)
1074 1064 if not os.path.isdir(d):
1075 1065 makedirs(d, self.createmode)
1076 1066 if atomictemp:
1077 1067 return atomictempfile(f, mode, self.createmode)
1078 1068 if nlink > 1:
1079 1069 rename(mktempcopy(f), f)
1080 1070 fp = posixfile(f, mode)
1081 1071 if nlink == 0:
1082 1072 self._fixfilemode(f)
1083 1073 return fp
1084 1074
1085 1075 def symlink(self, src, dst):
1086 1076 self.audit_path(dst)
1087 1077 linkname = os.path.join(self.base, dst)
1088 1078 try:
1089 1079 os.unlink(linkname)
1090 1080 except OSError:
1091 1081 pass
1092 1082
1093 1083 dirname = os.path.dirname(linkname)
1094 1084 if not os.path.exists(dirname):
1095 1085 makedirs(dirname, self.createmode)
1096 1086
1097 1087 if self._can_symlink:
1098 1088 try:
1099 1089 os.symlink(src, linkname)
1100 1090 except OSError, err:
1101 1091 raise OSError(err.errno, _('could not symlink to %r: %s') %
1102 1092 (src, err.strerror), linkname)
1103 1093 else:
1104 1094 f = self(dst, "w")
1105 1095 f.write(src)
1106 1096 f.close()
1107 1097 self._fixfilemode(dst)
1108 1098
1109 1099 class chunkbuffer(object):
1110 1100 """Allow arbitrary sized chunks of data to be efficiently read from an
1111 1101 iterator over chunks of arbitrary size."""
1112 1102
1113 1103 def __init__(self, in_iter):
1114 1104 """in_iter is the iterator that's iterating over the input chunks.
1115 1105 targetsize is how big a buffer to try to maintain."""
1116 1106 self.iter = iter(in_iter)
1117 1107 self.buf = ''
1118 1108 self.targetsize = 2**16
1119 1109
1120 1110 def read(self, l):
1121 1111 """Read L bytes of data from the iterator of chunks of data.
1122 1112 Returns less than L bytes if the iterator runs dry."""
1123 1113 if l > len(self.buf) and self.iter:
1124 1114 # Clamp to a multiple of self.targetsize
1125 1115 targetsize = max(l, self.targetsize)
1126 1116 collector = cStringIO.StringIO()
1127 1117 collector.write(self.buf)
1128 1118 collected = len(self.buf)
1129 1119 for chunk in self.iter:
1130 1120 collector.write(chunk)
1131 1121 collected += len(chunk)
1132 1122 if collected >= targetsize:
1133 1123 break
1134 1124 if collected < targetsize:
1135 1125 self.iter = False
1136 1126 self.buf = collector.getvalue()
1137 1127 if len(self.buf) == l:
1138 1128 s, self.buf = str(self.buf), ''
1139 1129 else:
1140 1130 s, self.buf = self.buf[:l], buffer(self.buf, l)
1141 1131 return s
1142 1132
1143 1133 def filechunkiter(f, size=65536, limit=None):
1144 1134 """Create a generator that produces the data in the file size
1145 1135 (default 65536) bytes at a time, up to optional limit (default is
1146 1136 to read all data). Chunks may be less than size bytes if the
1147 1137 chunk is the last chunk in the file, or the file is a socket or
1148 1138 some other type of file that sometimes reads less data than is
1149 1139 requested."""
1150 1140 assert size >= 0
1151 1141 assert limit is None or limit >= 0
1152 1142 while True:
1153 1143 if limit is None: nbytes = size
1154 1144 else: nbytes = min(limit, size)
1155 1145 s = nbytes and f.read(nbytes)
1156 1146 if not s: break
1157 1147 if limit: limit -= len(s)
1158 1148 yield s
1159 1149
1160 1150 def makedate():
1161 1151 lt = time.localtime()
1162 1152 if lt[8] == 1 and time.daylight:
1163 1153 tz = time.altzone
1164 1154 else:
1165 1155 tz = time.timezone
1166 1156 return time.mktime(lt), tz
1167 1157
1168 1158 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1169 1159 """represent a (unixtime, offset) tuple as a localized time.
1170 1160 unixtime is seconds since the epoch, and offset is the time zone's
1171 1161 number of seconds away from UTC. if timezone is false, do not
1172 1162 append time zone to string."""
1173 1163 t, tz = date or makedate()
1174 1164 if "%1" in format or "%2" in format:
1175 1165 sign = (tz > 0) and "-" or "+"
1176 1166 minutes = abs(tz) / 60
1177 1167 format = format.replace("%1", "%c%02d" % (sign, minutes / 60))
1178 1168 format = format.replace("%2", "%02d" % (minutes % 60))
1179 1169 s = time.strftime(format, time.gmtime(float(t) - tz))
1180 1170 return s
1181 1171
1182 1172 def shortdate(date=None):
1183 1173 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1184 1174 return datestr(date, format='%Y-%m-%d')
1185 1175
1186 1176 def strdate(string, format, defaults=[]):
1187 1177 """parse a localized time string and return a (unixtime, offset) tuple.
1188 1178 if the string cannot be parsed, ValueError is raised."""
1189 1179 def timezone(string):
1190 1180 tz = string.split()[-1]
1191 1181 if tz[0] in "+-" and len(tz) == 5 and tz[1:].isdigit():
1192 1182 sign = (tz[0] == "+") and 1 or -1
1193 1183 hours = int(tz[1:3])
1194 1184 minutes = int(tz[3:5])
1195 1185 return -sign * (hours * 60 + minutes) * 60
1196 1186 if tz == "GMT" or tz == "UTC":
1197 1187 return 0
1198 1188 return None
1199 1189
1200 1190 # NOTE: unixtime = localunixtime + offset
1201 1191 offset, date = timezone(string), string
1202 1192 if offset != None:
1203 1193 date = " ".join(string.split()[:-1])
1204 1194
1205 1195 # add missing elements from defaults
1206 1196 for part in defaults:
1207 1197 found = [True for p in part if ("%"+p) in format]
1208 1198 if not found:
1209 1199 date += "@" + defaults[part]
1210 1200 format += "@%" + part[0]
1211 1201
1212 1202 timetuple = time.strptime(date, format)
1213 1203 localunixtime = int(calendar.timegm(timetuple))
1214 1204 if offset is None:
1215 1205 # local timezone
1216 1206 unixtime = int(time.mktime(timetuple))
1217 1207 offset = unixtime - localunixtime
1218 1208 else:
1219 1209 unixtime = localunixtime + offset
1220 1210 return unixtime, offset
1221 1211
1222 1212 def parsedate(date, formats=None, defaults=None):
1223 1213 """parse a localized date/time string and return a (unixtime, offset) tuple.
1224 1214
1225 1215 The date may be a "unixtime offset" string or in one of the specified
1226 1216 formats. If the date already is a (unixtime, offset) tuple, it is returned.
1227 1217 """
1228 1218 if not date:
1229 1219 return 0, 0
1230 1220 if isinstance(date, tuple) and len(date) == 2:
1231 1221 return date
1232 1222 if not formats:
1233 1223 formats = defaultdateformats
1234 1224 date = date.strip()
1235 1225 try:
1236 1226 when, offset = map(int, date.split(' '))
1237 1227 except ValueError:
1238 1228 # fill out defaults
1239 1229 if not defaults:
1240 1230 defaults = {}
1241 1231 now = makedate()
1242 1232 for part in "d mb yY HI M S".split():
1243 1233 if part not in defaults:
1244 1234 if part[0] in "HMS":
1245 1235 defaults[part] = "00"
1246 1236 else:
1247 1237 defaults[part] = datestr(now, "%" + part[0])
1248 1238
1249 1239 for format in formats:
1250 1240 try:
1251 1241 when, offset = strdate(date, format, defaults)
1252 1242 except (ValueError, OverflowError):
1253 1243 pass
1254 1244 else:
1255 1245 break
1256 1246 else:
1257 1247 raise Abort(_('invalid date: %r ') % date)
1258 1248 # validate explicit (probably user-specified) date and
1259 1249 # time zone offset. values must fit in signed 32 bits for
1260 1250 # current 32-bit linux runtimes. timezones go from UTC-12
1261 1251 # to UTC+14
1262 1252 if abs(when) > 0x7fffffff:
1263 1253 raise Abort(_('date exceeds 32 bits: %d') % when)
1264 1254 if offset < -50400 or offset > 43200:
1265 1255 raise Abort(_('impossible time zone offset: %d') % offset)
1266 1256 return when, offset
1267 1257
1268 1258 def matchdate(date):
1269 1259 """Return a function that matches a given date match specifier
1270 1260
1271 1261 Formats include:
1272 1262
1273 1263 '{date}' match a given date to the accuracy provided
1274 1264
1275 1265 '<{date}' on or before a given date
1276 1266
1277 1267 '>{date}' on or after a given date
1278 1268
1279 1269 """
1280 1270
1281 1271 def lower(date):
1282 1272 d = dict(mb="1", d="1")
1283 1273 return parsedate(date, extendeddateformats, d)[0]
1284 1274
1285 1275 def upper(date):
1286 1276 d = dict(mb="12", HI="23", M="59", S="59")
1287 1277 for days in "31 30 29".split():
1288 1278 try:
1289 1279 d["d"] = days
1290 1280 return parsedate(date, extendeddateformats, d)[0]
1291 1281 except:
1292 1282 pass
1293 1283 d["d"] = "28"
1294 1284 return parsedate(date, extendeddateformats, d)[0]
1295 1285
1296 1286 date = date.strip()
1297 1287 if date[0] == "<":
1298 1288 when = upper(date[1:])
1299 1289 return lambda x: x <= when
1300 1290 elif date[0] == ">":
1301 1291 when = lower(date[1:])
1302 1292 return lambda x: x >= when
1303 1293 elif date[0] == "-":
1304 1294 try:
1305 1295 days = int(date[1:])
1306 1296 except ValueError:
1307 1297 raise Abort(_("invalid day spec: %s") % date[1:])
1308 1298 when = makedate()[0] - days * 3600 * 24
1309 1299 return lambda x: x >= when
1310 1300 elif " to " in date:
1311 1301 a, b = date.split(" to ")
1312 1302 start, stop = lower(a), upper(b)
1313 1303 return lambda x: x >= start and x <= stop
1314 1304 else:
1315 1305 start, stop = lower(date), upper(date)
1316 1306 return lambda x: x >= start and x <= stop
1317 1307
1318 1308 def shortuser(user):
1319 1309 """Return a short representation of a user name or email address."""
1320 1310 f = user.find('@')
1321 1311 if f >= 0:
1322 1312 user = user[:f]
1323 1313 f = user.find('<')
1324 1314 if f >= 0:
1325 1315 user = user[f+1:]
1326 1316 f = user.find(' ')
1327 1317 if f >= 0:
1328 1318 user = user[:f]
1329 1319 f = user.find('.')
1330 1320 if f >= 0:
1331 1321 user = user[:f]
1332 1322 return user
1333 1323
1334 1324 def email(author):
1335 1325 '''get email of author.'''
1336 1326 r = author.find('>')
1337 1327 if r == -1: r = None
1338 1328 return author[author.find('<')+1:r]
1339 1329
1340 1330 def ellipsis(text, maxlength=400):
1341 1331 """Trim string to at most maxlength (default: 400) characters."""
1342 1332 if len(text) <= maxlength:
1343 1333 return text
1344 1334 else:
1345 1335 return "%s..." % (text[:maxlength-3])
1346 1336
1347 1337 def walkrepos(path, followsym=False, seen_dirs=None, recurse=False):
1348 1338 '''yield every hg repository under path, recursively.'''
1349 1339 def errhandler(err):
1350 1340 if err.filename == path:
1351 1341 raise err
1352 1342 if followsym and hasattr(os.path, 'samestat'):
1353 1343 def _add_dir_if_not_there(dirlst, dirname):
1354 1344 match = False
1355 1345 samestat = os.path.samestat
1356 1346 dirstat = os.stat(dirname)
1357 1347 for lstdirstat in dirlst:
1358 1348 if samestat(dirstat, lstdirstat):
1359 1349 match = True
1360 1350 break
1361 1351 if not match:
1362 1352 dirlst.append(dirstat)
1363 1353 return not match
1364 1354 else:
1365 1355 followsym = False
1366 1356
1367 1357 if (seen_dirs is None) and followsym:
1368 1358 seen_dirs = []
1369 1359 _add_dir_if_not_there(seen_dirs, path)
1370 1360 for root, dirs, files in os.walk(path, topdown=True, onerror=errhandler):
1371 1361 if '.hg' in dirs:
1372 1362 yield root # found a repository
1373 1363 qroot = os.path.join(root, '.hg', 'patches')
1374 1364 if os.path.isdir(os.path.join(qroot, '.hg')):
1375 1365 yield qroot # we have a patch queue repo here
1376 1366 if recurse:
1377 1367 # avoid recursing inside the .hg directory
1378 1368 dirs.remove('.hg')
1379 1369 else:
1380 1370 dirs[:] = [] # don't descend further
1381 1371 elif followsym:
1382 1372 newdirs = []
1383 1373 for d in dirs:
1384 1374 fname = os.path.join(root, d)
1385 1375 if _add_dir_if_not_there(seen_dirs, fname):
1386 1376 if os.path.islink(fname):
1387 1377 for hgname in walkrepos(fname, True, seen_dirs):
1388 1378 yield hgname
1389 1379 else:
1390 1380 newdirs.append(d)
1391 1381 dirs[:] = newdirs
1392 1382
1393 1383 _rcpath = None
1394 1384
1395 1385 def os_rcpath():
1396 1386 '''return default os-specific hgrc search path'''
1397 1387 path = system_rcpath()
1398 1388 path.extend(user_rcpath())
1399 1389 path = [os.path.normpath(f) for f in path]
1400 1390 return path
1401 1391
1402 1392 def rcpath():
1403 1393 '''return hgrc search path. if env var HGRCPATH is set, use it.
1404 1394 for each item in path, if directory, use files ending in .rc,
1405 1395 else use item.
1406 1396 make HGRCPATH empty to only look in .hg/hgrc of current repo.
1407 1397 if no HGRCPATH, use default os-specific path.'''
1408 1398 global _rcpath
1409 1399 if _rcpath is None:
1410 1400 if 'HGRCPATH' in os.environ:
1411 1401 _rcpath = []
1412 1402 for p in os.environ['HGRCPATH'].split(os.pathsep):
1413 1403 if not p: continue
1414 1404 if os.path.isdir(p):
1415 1405 for f, kind in osutil.listdir(p):
1416 1406 if f.endswith('.rc'):
1417 1407 _rcpath.append(os.path.join(p, f))
1418 1408 else:
1419 1409 _rcpath.append(p)
1420 1410 else:
1421 1411 _rcpath = os_rcpath()
1422 1412 return _rcpath
1423 1413
1424 1414 def bytecount(nbytes):
1425 1415 '''return byte count formatted as readable string, with units'''
1426 1416
1427 1417 units = (
1428 1418 (100, 1<<30, _('%.0f GB')),
1429 1419 (10, 1<<30, _('%.1f GB')),
1430 1420 (1, 1<<30, _('%.2f GB')),
1431 1421 (100, 1<<20, _('%.0f MB')),
1432 1422 (10, 1<<20, _('%.1f MB')),
1433 1423 (1, 1<<20, _('%.2f MB')),
1434 1424 (100, 1<<10, _('%.0f KB')),
1435 1425 (10, 1<<10, _('%.1f KB')),
1436 1426 (1, 1<<10, _('%.2f KB')),
1437 1427 (1, 1, _('%.0f bytes')),
1438 1428 )
1439 1429
1440 1430 for multiplier, divisor, format in units:
1441 1431 if nbytes >= divisor * multiplier:
1442 1432 return format % (nbytes / float(divisor))
1443 1433 return units[-1][2] % nbytes
1444 1434
1445 1435 def drop_scheme(scheme, path):
1446 1436 sc = scheme + ':'
1447 1437 if path.startswith(sc):
1448 1438 path = path[len(sc):]
1449 1439 if path.startswith('//'):
1450 1440 path = path[2:]
1451 1441 return path
1452 1442
1453 1443 def uirepr(s):
1454 1444 # Avoid double backslash in Windows path repr()
1455 1445 return repr(s).replace('\\\\', '\\')
1456 1446
1457 1447 def termwidth():
1458 1448 if 'COLUMNS' in os.environ:
1459 1449 try:
1460 1450 return int(os.environ['COLUMNS'])
1461 1451 except ValueError:
1462 1452 pass
1463 1453 try:
1464 1454 import termios, array, fcntl
1465 1455 for dev in (sys.stdout, sys.stdin):
1466 1456 try:
1467 1457 fd = dev.fileno()
1468 1458 if not os.isatty(fd):
1469 1459 continue
1470 1460 arri = fcntl.ioctl(fd, termios.TIOCGWINSZ, '\0' * 8)
1471 1461 return array.array('h', arri)[1]
1472 1462 except ValueError:
1473 1463 pass
1474 1464 except ImportError:
1475 1465 pass
1476 1466 return 80
1477 1467
1478 1468 def iterlines(iterator):
1479 1469 for chunk in iterator:
1480 1470 for line in chunk.splitlines():
1481 1471 yield line
@@ -1,118 +1,121 b''
1 1 #!/bin/sh
2 2
3 3 mkdir test
4 4 cd test
5 5 hg init
6 6 echo foo>foo
7 7 hg commit -Am 1 -d '1 0'
8 8 echo bar>bar
9 9 hg commit -Am 2 -d '2 0'
10 10 mkdir baz
11 11 echo bletch>baz/bletch
12 12 hg commit -Am 3 -d '1000000000 0'
13 13 echo "[web]" >> .hg/hgrc
14 14 echo "name = test-archive" >> .hg/hgrc
15 15 cp .hg/hgrc .hg/hgrc-base
16 16
17 17 # check http return codes
18 18 test_archtype() {
19 19 echo "allow_archive = $1" >> .hg/hgrc
20 20 hg serve -p $HGPORT -d --pid-file=hg.pid -E errors.log
21 21 cat hg.pid >> $DAEMON_PIDS
22 22 echo % $1 allowed should give 200
23 23 "$TESTDIR/get-with-headers.py" localhost:$HGPORT "/archive/tip.$2" | head -n 1
24 24 echo % $3 and $4 disallowed should both give 403
25 25 "$TESTDIR/get-with-headers.py" localhost:$HGPORT "/archive/tip.$3" | head -n 1
26 26 "$TESTDIR/get-with-headers.py" localhost:$HGPORT "/archive/tip.$4" | head -n 1
27 27 "$TESTDIR/killdaemons.py"
28 28 cat errors.log
29 29 cp .hg/hgrc-base .hg/hgrc
30 30 }
31 31
32 32 echo
33 33 test_archtype gz tar.gz tar.bz2 zip
34 34 test_archtype bz2 tar.bz2 zip tar.gz
35 35 test_archtype zip zip tar.gz tar.bz2
36 36
37 37 echo "allow_archive = gz bz2 zip" >> .hg/hgrc
38 38 hg serve -p $HGPORT -d --pid-file=hg.pid -E errors.log
39 39 cat hg.pid >> $DAEMON_PIDS
40 40
41 41 echo % invalid arch type should give 404
42 42 "$TESTDIR/get-with-headers.py" localhost:$HGPORT "/archive/tip.invalid" | head -n 1
43 43 echo
44 44
45 45 TIP=`hg id -v | cut -f1 -d' '`
46 46 QTIP=`hg id -q`
47 47 cat > getarchive.py <<EOF
48 48 import os, sys, urllib2
49 49 try:
50 50 # Set stdout to binary mode for win32 platforms
51 51 import msvcrt
52 52 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
53 53 except ImportError:
54 54 pass
55 55
56 56 node, archive = sys.argv[1:]
57 57 f = urllib2.urlopen('http://127.0.0.1:%s/?cmd=archive;node=%s;type=%s'
58 58 % (os.environ['HGPORT'], node, archive))
59 59 sys.stdout.write(f.read())
60 60 EOF
61 61 http_proxy= python getarchive.py "$TIP" gz | gunzip | tar tf - 2>/dev/null | sed "s/$QTIP/TIP/"
62 62 http_proxy= python getarchive.py "$TIP" bz2 | bunzip2 | tar tf - 2>/dev/null | sed "s/$QTIP/TIP/"
63 63 http_proxy= python getarchive.py "$TIP" zip > archive.zip
64 64 unzip -t archive.zip | sed "s/$QTIP/TIP/"
65 65
66 66 "$TESTDIR/killdaemons.py"
67 67
68 68 hg archive -t tar test.tar
69 69 tar tf test.tar
70 70
71 71 hg archive -t tbz2 -X baz test.tar.bz2
72 72 bunzip2 -dc test.tar.bz2 | tar tf - 2>/dev/null
73 73
74 74 hg archive -t tgz -p %b-%h test-%h.tar.gz
75 75 gzip -dc test-$QTIP.tar.gz | tar tf - 2>/dev/null | sed "s/$QTIP/TIP/"
76 76
77 77 cat > md5comp.py <<EOF
78 from mercurial.util import md5
78 try:
79 from hashlib import md5
80 except ImportError:
81 from md5 import md5
79 82 import sys
80 83 f1, f2 = sys.argv[1:3]
81 84 h1 = md5(file(f1, 'rb').read()).hexdigest()
82 85 h2 = md5(file(f2, 'rb').read()).hexdigest()
83 86 print h1 == h2 or "md5 differ: " + repr((h1, h2))
84 87 EOF
85 88
86 89 # archive name is stored in the archive, so create similar
87 90 # archives and rename them afterwards.
88 91 hg archive -t tgz tip.tar.gz
89 92 mv tip.tar.gz tip1.tar.gz
90 93 sleep 1
91 94 hg archive -t tgz tip.tar.gz
92 95 mv tip.tar.gz tip2.tar.gz
93 96 python md5comp.py tip1.tar.gz tip2.tar.gz
94 97
95 98 hg archive -t zip -p /illegal test.zip
96 99 hg archive -t zip -p very/../bad test.zip
97 100
98 101 hg archive --config ui.archivemeta=false -t zip -r 2 test.zip
99 102 unzip -t test.zip
100 103
101 104 hg archive -t tar - | tar tf - 2>/dev/null | sed "s/$QTIP/TIP/"
102 105
103 106 hg archive -r 0 -t tar rev-%r.tar
104 107 if [ -f rev-0.tar ]; then
105 108 echo 'rev-0.tar created'
106 109 fi
107 110
108 111 hg archive -t bogus test.bogus
109 112
110 113 echo % server errors
111 114 cat errors.log
112 115
113 116 echo '% empty repo'
114 117 hg init ../empty
115 118 cd ../empty
116 119 hg archive ../test-empty
117 120
118 121 exit 0
General Comments 0
You need to be logged in to leave comments. Login now