##// END OF EJS Templates
util.chunkbuffer: avoid extra mutations when reading partial chunks...
Gregory Szorc -
r26480:6ae14d1c default
parent child Browse files
Show More
@@ -1,2402 +1,2423
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 import i18n
17 17 _ = i18n._
18 18 import error, osutil, encoding, parsers
19 19 import errno, shutil, sys, tempfile, traceback
20 20 import re as remod
21 21 import os, time, datetime, calendar, textwrap, signal, collections
22 22 import imp, socket, urllib
23 23 import gc
24 24 import bz2
25 25 import zlib
26 26
27 27 if os.name == 'nt':
28 28 import windows as platform
29 29 else:
30 30 import posix as platform
31 31
32 32 cachestat = platform.cachestat
33 33 checkexec = platform.checkexec
34 34 checklink = platform.checklink
35 35 copymode = platform.copymode
36 36 executablepath = platform.executablepath
37 37 expandglobs = platform.expandglobs
38 38 explainexit = platform.explainexit
39 39 findexe = platform.findexe
40 40 gethgcmd = platform.gethgcmd
41 41 getuser = platform.getuser
42 42 groupmembers = platform.groupmembers
43 43 groupname = platform.groupname
44 44 hidewindow = platform.hidewindow
45 45 isexec = platform.isexec
46 46 isowner = platform.isowner
47 47 localpath = platform.localpath
48 48 lookupreg = platform.lookupreg
49 49 makedir = platform.makedir
50 50 nlinks = platform.nlinks
51 51 normpath = platform.normpath
52 52 normcase = platform.normcase
53 53 normcasespec = platform.normcasespec
54 54 normcasefallback = platform.normcasefallback
55 55 openhardlinks = platform.openhardlinks
56 56 oslink = platform.oslink
57 57 parsepatchoutput = platform.parsepatchoutput
58 58 pconvert = platform.pconvert
59 59 poll = platform.poll
60 60 popen = platform.popen
61 61 posixfile = platform.posixfile
62 62 quotecommand = platform.quotecommand
63 63 readpipe = platform.readpipe
64 64 rename = platform.rename
65 65 removedirs = platform.removedirs
66 66 samedevice = platform.samedevice
67 67 samefile = platform.samefile
68 68 samestat = platform.samestat
69 69 setbinary = platform.setbinary
70 70 setflags = platform.setflags
71 71 setsignalhandler = platform.setsignalhandler
72 72 shellquote = platform.shellquote
73 73 spawndetached = platform.spawndetached
74 74 split = platform.split
75 75 sshargs = platform.sshargs
76 76 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
77 77 statisexec = platform.statisexec
78 78 statislink = platform.statislink
79 79 termwidth = platform.termwidth
80 80 testpid = platform.testpid
81 81 umask = platform.umask
82 82 unlink = platform.unlink
83 83 unlinkpath = platform.unlinkpath
84 84 username = platform.username
85 85
86 86 # Python compatibility
87 87
88 88 _notset = object()
89 89
90 90 def safehasattr(thing, attr):
91 91 return getattr(thing, attr, _notset) is not _notset
92 92
93 93 def sha1(s=''):
94 94 '''
95 95 Low-overhead wrapper around Python's SHA support
96 96
97 97 >>> f = _fastsha1
98 98 >>> a = sha1()
99 99 >>> a = f()
100 100 >>> a.hexdigest()
101 101 'da39a3ee5e6b4b0d3255bfef95601890afd80709'
102 102 '''
103 103
104 104 return _fastsha1(s)
105 105
106 106 def _fastsha1(s=''):
107 107 # This function will import sha1 from hashlib or sha (whichever is
108 108 # available) and overwrite itself with it on the first call.
109 109 # Subsequent calls will go directly to the imported function.
110 110 if sys.version_info >= (2, 5):
111 111 from hashlib import sha1 as _sha1
112 112 else:
113 113 from sha import sha as _sha1
114 114 global _fastsha1, sha1
115 115 _fastsha1 = sha1 = _sha1
116 116 return _sha1(s)
117 117
118 118 def md5(s=''):
119 119 try:
120 120 from hashlib import md5 as _md5
121 121 except ImportError:
122 122 from md5 import md5 as _md5
123 123 global md5
124 124 md5 = _md5
125 125 return _md5(s)
126 126
127 127 DIGESTS = {
128 128 'md5': md5,
129 129 'sha1': sha1,
130 130 }
131 131 # List of digest types from strongest to weakest
132 132 DIGESTS_BY_STRENGTH = ['sha1', 'md5']
133 133
134 134 try:
135 135 import hashlib
136 136 DIGESTS.update({
137 137 'sha512': hashlib.sha512,
138 138 })
139 139 DIGESTS_BY_STRENGTH.insert(0, 'sha512')
140 140 except ImportError:
141 141 pass
142 142
143 143 for k in DIGESTS_BY_STRENGTH:
144 144 assert k in DIGESTS
145 145
146 146 class digester(object):
147 147 """helper to compute digests.
148 148
149 149 This helper can be used to compute one or more digests given their name.
150 150
151 151 >>> d = digester(['md5', 'sha1'])
152 152 >>> d.update('foo')
153 153 >>> [k for k in sorted(d)]
154 154 ['md5', 'sha1']
155 155 >>> d['md5']
156 156 'acbd18db4cc2f85cedef654fccc4a4d8'
157 157 >>> d['sha1']
158 158 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
159 159 >>> digester.preferred(['md5', 'sha1'])
160 160 'sha1'
161 161 """
162 162
163 163 def __init__(self, digests, s=''):
164 164 self._hashes = {}
165 165 for k in digests:
166 166 if k not in DIGESTS:
167 167 raise Abort(_('unknown digest type: %s') % k)
168 168 self._hashes[k] = DIGESTS[k]()
169 169 if s:
170 170 self.update(s)
171 171
172 172 def update(self, data):
173 173 for h in self._hashes.values():
174 174 h.update(data)
175 175
176 176 def __getitem__(self, key):
177 177 if key not in DIGESTS:
178 178 raise Abort(_('unknown digest type: %s') % k)
179 179 return self._hashes[key].hexdigest()
180 180
181 181 def __iter__(self):
182 182 return iter(self._hashes)
183 183
184 184 @staticmethod
185 185 def preferred(supported):
186 186 """returns the strongest digest type in both supported and DIGESTS."""
187 187
188 188 for k in DIGESTS_BY_STRENGTH:
189 189 if k in supported:
190 190 return k
191 191 return None
192 192
193 193 class digestchecker(object):
194 194 """file handle wrapper that additionally checks content against a given
195 195 size and digests.
196 196
197 197 d = digestchecker(fh, size, {'md5': '...'})
198 198
199 199 When multiple digests are given, all of them are validated.
200 200 """
201 201
202 202 def __init__(self, fh, size, digests):
203 203 self._fh = fh
204 204 self._size = size
205 205 self._got = 0
206 206 self._digests = dict(digests)
207 207 self._digester = digester(self._digests.keys())
208 208
209 209 def read(self, length=-1):
210 210 content = self._fh.read(length)
211 211 self._digester.update(content)
212 212 self._got += len(content)
213 213 return content
214 214
215 215 def validate(self):
216 216 if self._size != self._got:
217 217 raise Abort(_('size mismatch: expected %d, got %d') %
218 218 (self._size, self._got))
219 219 for k, v in self._digests.items():
220 220 if v != self._digester[k]:
221 221 # i18n: first parameter is a digest name
222 222 raise Abort(_('%s mismatch: expected %s, got %s') %
223 223 (k, v, self._digester[k]))
224 224
225 225 try:
226 226 buffer = buffer
227 227 except NameError:
228 228 if sys.version_info[0] < 3:
229 229 def buffer(sliceable, offset=0):
230 230 return sliceable[offset:]
231 231 else:
232 232 def buffer(sliceable, offset=0):
233 233 return memoryview(sliceable)[offset:]
234 234
235 235 import subprocess
236 236 closefds = os.name == 'posix'
237 237
238 238 _chunksize = 4096
239 239
240 240 class bufferedinputpipe(object):
241 241 """a manually buffered input pipe
242 242
243 243 Python will not let us use buffered IO and lazy reading with 'polling' at
244 244 the same time. We cannot probe the buffer state and select will not detect
245 245 that data are ready to read if they are already buffered.
246 246
247 247 This class let us work around that by implementing its own buffering
248 248 (allowing efficient readline) while offering a way to know if the buffer is
249 249 empty from the output (allowing collaboration of the buffer with polling).
250 250
251 251 This class lives in the 'util' module because it makes use of the 'os'
252 252 module from the python stdlib.
253 253 """
254 254
255 255 def __init__(self, input):
256 256 self._input = input
257 257 self._buffer = []
258 258 self._eof = False
259 259 self._lenbuf = 0
260 260
261 261 @property
262 262 def hasbuffer(self):
263 263 """True is any data is currently buffered
264 264
265 265 This will be used externally a pre-step for polling IO. If there is
266 266 already data then no polling should be set in place."""
267 267 return bool(self._buffer)
268 268
269 269 @property
270 270 def closed(self):
271 271 return self._input.closed
272 272
273 273 def fileno(self):
274 274 return self._input.fileno()
275 275
276 276 def close(self):
277 277 return self._input.close()
278 278
279 279 def read(self, size):
280 280 while (not self._eof) and (self._lenbuf < size):
281 281 self._fillbuffer()
282 282 return self._frombuffer(size)
283 283
284 284 def readline(self, *args, **kwargs):
285 285 if 1 < len(self._buffer):
286 286 # this should not happen because both read and readline end with a
287 287 # _frombuffer call that collapse it.
288 288 self._buffer = [''.join(self._buffer)]
289 289 self._lenbuf = len(self._buffer[0])
290 290 lfi = -1
291 291 if self._buffer:
292 292 lfi = self._buffer[-1].find('\n')
293 293 while (not self._eof) and lfi < 0:
294 294 self._fillbuffer()
295 295 if self._buffer:
296 296 lfi = self._buffer[-1].find('\n')
297 297 size = lfi + 1
298 298 if lfi < 0: # end of file
299 299 size = self._lenbuf
300 300 elif 1 < len(self._buffer):
301 301 # we need to take previous chunks into account
302 302 size += self._lenbuf - len(self._buffer[-1])
303 303 return self._frombuffer(size)
304 304
305 305 def _frombuffer(self, size):
306 306 """return at most 'size' data from the buffer
307 307
308 308 The data are removed from the buffer."""
309 309 if size == 0 or not self._buffer:
310 310 return ''
311 311 buf = self._buffer[0]
312 312 if 1 < len(self._buffer):
313 313 buf = ''.join(self._buffer)
314 314
315 315 data = buf[:size]
316 316 buf = buf[len(data):]
317 317 if buf:
318 318 self._buffer = [buf]
319 319 self._lenbuf = len(buf)
320 320 else:
321 321 self._buffer = []
322 322 self._lenbuf = 0
323 323 return data
324 324
325 325 def _fillbuffer(self):
326 326 """read data to the buffer"""
327 327 data = os.read(self._input.fileno(), _chunksize)
328 328 if not data:
329 329 self._eof = True
330 330 else:
331 331 self._lenbuf += len(data)
332 332 self._buffer.append(data)
333 333
334 334 def popen2(cmd, env=None, newlines=False):
335 335 # Setting bufsize to -1 lets the system decide the buffer size.
336 336 # The default for bufsize is 0, meaning unbuffered. This leads to
337 337 # poor performance on Mac OS X: http://bugs.python.org/issue4194
338 338 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
339 339 close_fds=closefds,
340 340 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
341 341 universal_newlines=newlines,
342 342 env=env)
343 343 return p.stdin, p.stdout
344 344
345 345 def popen3(cmd, env=None, newlines=False):
346 346 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
347 347 return stdin, stdout, stderr
348 348
349 349 def popen4(cmd, env=None, newlines=False, bufsize=-1):
350 350 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
351 351 close_fds=closefds,
352 352 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
353 353 stderr=subprocess.PIPE,
354 354 universal_newlines=newlines,
355 355 env=env)
356 356 return p.stdin, p.stdout, p.stderr, p
357 357
358 358 def version():
359 359 """Return version information if available."""
360 360 try:
361 361 import __version__
362 362 return __version__.version
363 363 except ImportError:
364 364 return 'unknown'
365 365
366 366 # used by parsedate
367 367 defaultdateformats = (
368 368 '%Y-%m-%d %H:%M:%S',
369 369 '%Y-%m-%d %I:%M:%S%p',
370 370 '%Y-%m-%d %H:%M',
371 371 '%Y-%m-%d %I:%M%p',
372 372 '%Y-%m-%d',
373 373 '%m-%d',
374 374 '%m/%d',
375 375 '%m/%d/%y',
376 376 '%m/%d/%Y',
377 377 '%a %b %d %H:%M:%S %Y',
378 378 '%a %b %d %I:%M:%S%p %Y',
379 379 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
380 380 '%b %d %H:%M:%S %Y',
381 381 '%b %d %I:%M:%S%p %Y',
382 382 '%b %d %H:%M:%S',
383 383 '%b %d %I:%M:%S%p',
384 384 '%b %d %H:%M',
385 385 '%b %d %I:%M%p',
386 386 '%b %d %Y',
387 387 '%b %d',
388 388 '%H:%M:%S',
389 389 '%I:%M:%S%p',
390 390 '%H:%M',
391 391 '%I:%M%p',
392 392 )
393 393
394 394 extendeddateformats = defaultdateformats + (
395 395 "%Y",
396 396 "%Y-%m",
397 397 "%b",
398 398 "%b %Y",
399 399 )
400 400
401 401 def cachefunc(func):
402 402 '''cache the result of function calls'''
403 403 # XXX doesn't handle keywords args
404 404 if func.func_code.co_argcount == 0:
405 405 cache = []
406 406 def f():
407 407 if len(cache) == 0:
408 408 cache.append(func())
409 409 return cache[0]
410 410 return f
411 411 cache = {}
412 412 if func.func_code.co_argcount == 1:
413 413 # we gain a small amount of time because
414 414 # we don't need to pack/unpack the list
415 415 def f(arg):
416 416 if arg not in cache:
417 417 cache[arg] = func(arg)
418 418 return cache[arg]
419 419 else:
420 420 def f(*args):
421 421 if args not in cache:
422 422 cache[args] = func(*args)
423 423 return cache[args]
424 424
425 425 return f
426 426
427 427 class sortdict(dict):
428 428 '''a simple sorted dictionary'''
429 429 def __init__(self, data=None):
430 430 self._list = []
431 431 if data:
432 432 self.update(data)
433 433 def copy(self):
434 434 return sortdict(self)
435 435 def __setitem__(self, key, val):
436 436 if key in self:
437 437 self._list.remove(key)
438 438 self._list.append(key)
439 439 dict.__setitem__(self, key, val)
440 440 def __iter__(self):
441 441 return self._list.__iter__()
442 442 def update(self, src):
443 443 if isinstance(src, dict):
444 444 src = src.iteritems()
445 445 for k, v in src:
446 446 self[k] = v
447 447 def clear(self):
448 448 dict.clear(self)
449 449 self._list = []
450 450 def items(self):
451 451 return [(k, self[k]) for k in self._list]
452 452 def __delitem__(self, key):
453 453 dict.__delitem__(self, key)
454 454 self._list.remove(key)
455 455 def pop(self, key, *args, **kwargs):
456 456 dict.pop(self, key, *args, **kwargs)
457 457 try:
458 458 self._list.remove(key)
459 459 except ValueError:
460 460 pass
461 461 def keys(self):
462 462 return self._list
463 463 def iterkeys(self):
464 464 return self._list.__iter__()
465 465 def iteritems(self):
466 466 for k in self._list:
467 467 yield k, self[k]
468 468 def insert(self, index, key, val):
469 469 self._list.insert(index, key)
470 470 dict.__setitem__(self, key, val)
471 471
472 472 class lrucachedict(object):
473 473 '''cache most recent gets from or sets to this dictionary'''
474 474 def __init__(self, maxsize):
475 475 self._cache = {}
476 476 self._maxsize = maxsize
477 477 self._order = collections.deque()
478 478
479 479 def __getitem__(self, key):
480 480 value = self._cache[key]
481 481 self._order.remove(key)
482 482 self._order.append(key)
483 483 return value
484 484
485 485 def __setitem__(self, key, value):
486 486 if key not in self._cache:
487 487 if len(self._cache) >= self._maxsize:
488 488 del self._cache[self._order.popleft()]
489 489 else:
490 490 self._order.remove(key)
491 491 self._cache[key] = value
492 492 self._order.append(key)
493 493
494 494 def __contains__(self, key):
495 495 return key in self._cache
496 496
497 497 def clear(self):
498 498 self._cache.clear()
499 499 self._order = collections.deque()
500 500
501 501 def lrucachefunc(func):
502 502 '''cache most recent results of function calls'''
503 503 cache = {}
504 504 order = collections.deque()
505 505 if func.func_code.co_argcount == 1:
506 506 def f(arg):
507 507 if arg not in cache:
508 508 if len(cache) > 20:
509 509 del cache[order.popleft()]
510 510 cache[arg] = func(arg)
511 511 else:
512 512 order.remove(arg)
513 513 order.append(arg)
514 514 return cache[arg]
515 515 else:
516 516 def f(*args):
517 517 if args not in cache:
518 518 if len(cache) > 20:
519 519 del cache[order.popleft()]
520 520 cache[args] = func(*args)
521 521 else:
522 522 order.remove(args)
523 523 order.append(args)
524 524 return cache[args]
525 525
526 526 return f
527 527
528 528 class propertycache(object):
529 529 def __init__(self, func):
530 530 self.func = func
531 531 self.name = func.__name__
532 532 def __get__(self, obj, type=None):
533 533 result = self.func(obj)
534 534 self.cachevalue(obj, result)
535 535 return result
536 536
537 537 def cachevalue(self, obj, value):
538 538 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
539 539 obj.__dict__[self.name] = value
540 540
541 541 def pipefilter(s, cmd):
542 542 '''filter string S through command CMD, returning its output'''
543 543 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
544 544 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
545 545 pout, perr = p.communicate(s)
546 546 return pout
547 547
548 548 def tempfilter(s, cmd):
549 549 '''filter string S through a pair of temporary files with CMD.
550 550 CMD is used as a template to create the real command to be run,
551 551 with the strings INFILE and OUTFILE replaced by the real names of
552 552 the temporary files generated.'''
553 553 inname, outname = None, None
554 554 try:
555 555 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
556 556 fp = os.fdopen(infd, 'wb')
557 557 fp.write(s)
558 558 fp.close()
559 559 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
560 560 os.close(outfd)
561 561 cmd = cmd.replace('INFILE', inname)
562 562 cmd = cmd.replace('OUTFILE', outname)
563 563 code = os.system(cmd)
564 564 if sys.platform == 'OpenVMS' and code & 1:
565 565 code = 0
566 566 if code:
567 567 raise Abort(_("command '%s' failed: %s") %
568 568 (cmd, explainexit(code)))
569 569 fp = open(outname, 'rb')
570 570 r = fp.read()
571 571 fp.close()
572 572 return r
573 573 finally:
574 574 try:
575 575 if inname:
576 576 os.unlink(inname)
577 577 except OSError:
578 578 pass
579 579 try:
580 580 if outname:
581 581 os.unlink(outname)
582 582 except OSError:
583 583 pass
584 584
585 585 filtertable = {
586 586 'tempfile:': tempfilter,
587 587 'pipe:': pipefilter,
588 588 }
589 589
590 590 def filter(s, cmd):
591 591 "filter a string through a command that transforms its input to its output"
592 592 for name, fn in filtertable.iteritems():
593 593 if cmd.startswith(name):
594 594 return fn(s, cmd[len(name):].lstrip())
595 595 return pipefilter(s, cmd)
596 596
597 597 def binary(s):
598 598 """return true if a string is binary data"""
599 599 return bool(s and '\0' in s)
600 600
601 601 def increasingchunks(source, min=1024, max=65536):
602 602 '''return no less than min bytes per chunk while data remains,
603 603 doubling min after each chunk until it reaches max'''
604 604 def log2(x):
605 605 if not x:
606 606 return 0
607 607 i = 0
608 608 while x:
609 609 x >>= 1
610 610 i += 1
611 611 return i - 1
612 612
613 613 buf = []
614 614 blen = 0
615 615 for chunk in source:
616 616 buf.append(chunk)
617 617 blen += len(chunk)
618 618 if blen >= min:
619 619 if min < max:
620 620 min = min << 1
621 621 nmin = 1 << log2(blen)
622 622 if nmin > min:
623 623 min = nmin
624 624 if min > max:
625 625 min = max
626 626 yield ''.join(buf)
627 627 blen = 0
628 628 buf = []
629 629 if buf:
630 630 yield ''.join(buf)
631 631
632 632 Abort = error.Abort
633 633
634 634 def always(fn):
635 635 return True
636 636
637 637 def never(fn):
638 638 return False
639 639
640 640 def nogc(func):
641 641 """disable garbage collector
642 642
643 643 Python's garbage collector triggers a GC each time a certain number of
644 644 container objects (the number being defined by gc.get_threshold()) are
645 645 allocated even when marked not to be tracked by the collector. Tracking has
646 646 no effect on when GCs are triggered, only on what objects the GC looks
647 647 into. As a workaround, disable GC while building complex (huge)
648 648 containers.
649 649
650 650 This garbage collector issue have been fixed in 2.7.
651 651 """
652 652 def wrapper(*args, **kwargs):
653 653 gcenabled = gc.isenabled()
654 654 gc.disable()
655 655 try:
656 656 return func(*args, **kwargs)
657 657 finally:
658 658 if gcenabled:
659 659 gc.enable()
660 660 return wrapper
661 661
662 662 def pathto(root, n1, n2):
663 663 '''return the relative path from one place to another.
664 664 root should use os.sep to separate directories
665 665 n1 should use os.sep to separate directories
666 666 n2 should use "/" to separate directories
667 667 returns an os.sep-separated path.
668 668
669 669 If n1 is a relative path, it's assumed it's
670 670 relative to root.
671 671 n2 should always be relative to root.
672 672 '''
673 673 if not n1:
674 674 return localpath(n2)
675 675 if os.path.isabs(n1):
676 676 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
677 677 return os.path.join(root, localpath(n2))
678 678 n2 = '/'.join((pconvert(root), n2))
679 679 a, b = splitpath(n1), n2.split('/')
680 680 a.reverse()
681 681 b.reverse()
682 682 while a and b and a[-1] == b[-1]:
683 683 a.pop()
684 684 b.pop()
685 685 b.reverse()
686 686 return os.sep.join((['..'] * len(a)) + b) or '.'
687 687
688 688 def mainfrozen():
689 689 """return True if we are a frozen executable.
690 690
691 691 The code supports py2exe (most common, Windows only) and tools/freeze
692 692 (portable, not much used).
693 693 """
694 694 return (safehasattr(sys, "frozen") or # new py2exe
695 695 safehasattr(sys, "importers") or # old py2exe
696 696 imp.is_frozen("__main__")) # tools/freeze
697 697
698 698 # the location of data files matching the source code
699 699 if mainfrozen():
700 700 # executable version (py2exe) doesn't support __file__
701 701 datapath = os.path.dirname(sys.executable)
702 702 else:
703 703 datapath = os.path.dirname(__file__)
704 704
705 705 i18n.setdatapath(datapath)
706 706
707 707 _hgexecutable = None
708 708
709 709 def hgexecutable():
710 710 """return location of the 'hg' executable.
711 711
712 712 Defaults to $HG or 'hg' in the search path.
713 713 """
714 714 if _hgexecutable is None:
715 715 hg = os.environ.get('HG')
716 716 mainmod = sys.modules['__main__']
717 717 if hg:
718 718 _sethgexecutable(hg)
719 719 elif mainfrozen():
720 720 _sethgexecutable(sys.executable)
721 721 elif os.path.basename(getattr(mainmod, '__file__', '')) == 'hg':
722 722 _sethgexecutable(mainmod.__file__)
723 723 else:
724 724 exe = findexe('hg') or os.path.basename(sys.argv[0])
725 725 _sethgexecutable(exe)
726 726 return _hgexecutable
727 727
728 728 def _sethgexecutable(path):
729 729 """set location of the 'hg' executable"""
730 730 global _hgexecutable
731 731 _hgexecutable = path
732 732
733 733 def _isstdout(f):
734 734 fileno = getattr(f, 'fileno', None)
735 735 return fileno and fileno() == sys.__stdout__.fileno()
736 736
737 737 def system(cmd, environ=None, cwd=None, onerr=None, errprefix=None, out=None):
738 738 '''enhanced shell command execution.
739 739 run with environment maybe modified, maybe in different dir.
740 740
741 741 if command fails and onerr is None, return status, else raise onerr
742 742 object as exception.
743 743
744 744 if out is specified, it is assumed to be a file-like object that has a
745 745 write() method. stdout and stderr will be redirected to out.'''
746 746 if environ is None:
747 747 environ = {}
748 748 try:
749 749 sys.stdout.flush()
750 750 except Exception:
751 751 pass
752 752 def py2shell(val):
753 753 'convert python object into string that is useful to shell'
754 754 if val is None or val is False:
755 755 return '0'
756 756 if val is True:
757 757 return '1'
758 758 return str(val)
759 759 origcmd = cmd
760 760 cmd = quotecommand(cmd)
761 761 if sys.platform == 'plan9' and (sys.version_info[0] == 2
762 762 and sys.version_info[1] < 7):
763 763 # subprocess kludge to work around issues in half-baked Python
764 764 # ports, notably bichued/python:
765 765 if not cwd is None:
766 766 os.chdir(cwd)
767 767 rc = os.system(cmd)
768 768 else:
769 769 env = dict(os.environ)
770 770 env.update((k, py2shell(v)) for k, v in environ.iteritems())
771 771 env['HG'] = hgexecutable()
772 772 if out is None or _isstdout(out):
773 773 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
774 774 env=env, cwd=cwd)
775 775 else:
776 776 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
777 777 env=env, cwd=cwd, stdout=subprocess.PIPE,
778 778 stderr=subprocess.STDOUT)
779 779 while True:
780 780 line = proc.stdout.readline()
781 781 if not line:
782 782 break
783 783 out.write(line)
784 784 proc.wait()
785 785 rc = proc.returncode
786 786 if sys.platform == 'OpenVMS' and rc & 1:
787 787 rc = 0
788 788 if rc and onerr:
789 789 errmsg = '%s %s' % (os.path.basename(origcmd.split(None, 1)[0]),
790 790 explainexit(rc)[0])
791 791 if errprefix:
792 792 errmsg = '%s: %s' % (errprefix, errmsg)
793 793 raise onerr(errmsg)
794 794 return rc
795 795
796 796 def checksignature(func):
797 797 '''wrap a function with code to check for calling errors'''
798 798 def check(*args, **kwargs):
799 799 try:
800 800 return func(*args, **kwargs)
801 801 except TypeError:
802 802 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
803 803 raise error.SignatureError
804 804 raise
805 805
806 806 return check
807 807
808 808 def copyfile(src, dest, hardlink=False):
809 809 "copy a file, preserving mode and atime/mtime"
810 810 if os.path.lexists(dest):
811 811 unlink(dest)
812 812 # hardlinks are problematic on CIFS, quietly ignore this flag
813 813 # until we find a way to work around it cleanly (issue4546)
814 814 if False and hardlink:
815 815 try:
816 816 oslink(src, dest)
817 817 return
818 818 except (IOError, OSError):
819 819 pass # fall back to normal copy
820 820 if os.path.islink(src):
821 821 os.symlink(os.readlink(src), dest)
822 822 else:
823 823 try:
824 824 shutil.copyfile(src, dest)
825 825 shutil.copymode(src, dest)
826 826 except shutil.Error as inst:
827 827 raise Abort(str(inst))
828 828
829 829 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
830 830 """Copy a directory tree using hardlinks if possible."""
831 831 num = 0
832 832
833 833 if hardlink is None:
834 834 hardlink = (os.stat(src).st_dev ==
835 835 os.stat(os.path.dirname(dst)).st_dev)
836 836 if hardlink:
837 837 topic = _('linking')
838 838 else:
839 839 topic = _('copying')
840 840
841 841 if os.path.isdir(src):
842 842 os.mkdir(dst)
843 843 for name, kind in osutil.listdir(src):
844 844 srcname = os.path.join(src, name)
845 845 dstname = os.path.join(dst, name)
846 846 def nprog(t, pos):
847 847 if pos is not None:
848 848 return progress(t, pos + num)
849 849 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
850 850 num += n
851 851 else:
852 852 if hardlink:
853 853 try:
854 854 oslink(src, dst)
855 855 except (IOError, OSError):
856 856 hardlink = False
857 857 shutil.copy(src, dst)
858 858 else:
859 859 shutil.copy(src, dst)
860 860 num += 1
861 861 progress(topic, num)
862 862 progress(topic, None)
863 863
864 864 return hardlink, num
865 865
866 866 _winreservednames = '''con prn aux nul
867 867 com1 com2 com3 com4 com5 com6 com7 com8 com9
868 868 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
869 869 _winreservedchars = ':*?"<>|'
870 870 def checkwinfilename(path):
871 871 r'''Check that the base-relative path is a valid filename on Windows.
872 872 Returns None if the path is ok, or a UI string describing the problem.
873 873
874 874 >>> checkwinfilename("just/a/normal/path")
875 875 >>> checkwinfilename("foo/bar/con.xml")
876 876 "filename contains 'con', which is reserved on Windows"
877 877 >>> checkwinfilename("foo/con.xml/bar")
878 878 "filename contains 'con', which is reserved on Windows"
879 879 >>> checkwinfilename("foo/bar/xml.con")
880 880 >>> checkwinfilename("foo/bar/AUX/bla.txt")
881 881 "filename contains 'AUX', which is reserved on Windows"
882 882 >>> checkwinfilename("foo/bar/bla:.txt")
883 883 "filename contains ':', which is reserved on Windows"
884 884 >>> checkwinfilename("foo/bar/b\07la.txt")
885 885 "filename contains '\\x07', which is invalid on Windows"
886 886 >>> checkwinfilename("foo/bar/bla ")
887 887 "filename ends with ' ', which is not allowed on Windows"
888 888 >>> checkwinfilename("../bar")
889 889 >>> checkwinfilename("foo\\")
890 890 "filename ends with '\\', which is invalid on Windows"
891 891 >>> checkwinfilename("foo\\/bar")
892 892 "directory name ends with '\\', which is invalid on Windows"
893 893 '''
894 894 if path.endswith('\\'):
895 895 return _("filename ends with '\\', which is invalid on Windows")
896 896 if '\\/' in path:
897 897 return _("directory name ends with '\\', which is invalid on Windows")
898 898 for n in path.replace('\\', '/').split('/'):
899 899 if not n:
900 900 continue
901 901 for c in n:
902 902 if c in _winreservedchars:
903 903 return _("filename contains '%s', which is reserved "
904 904 "on Windows") % c
905 905 if ord(c) <= 31:
906 906 return _("filename contains %r, which is invalid "
907 907 "on Windows") % c
908 908 base = n.split('.')[0]
909 909 if base and base.lower() in _winreservednames:
910 910 return _("filename contains '%s', which is reserved "
911 911 "on Windows") % base
912 912 t = n[-1]
913 913 if t in '. ' and n not in '..':
914 914 return _("filename ends with '%s', which is not allowed "
915 915 "on Windows") % t
916 916
917 917 if os.name == 'nt':
918 918 checkosfilename = checkwinfilename
919 919 else:
920 920 checkosfilename = platform.checkosfilename
921 921
922 922 def makelock(info, pathname):
923 923 try:
924 924 return os.symlink(info, pathname)
925 925 except OSError as why:
926 926 if why.errno == errno.EEXIST:
927 927 raise
928 928 except AttributeError: # no symlink in os
929 929 pass
930 930
931 931 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
932 932 os.write(ld, info)
933 933 os.close(ld)
934 934
935 935 def readlock(pathname):
936 936 try:
937 937 return os.readlink(pathname)
938 938 except OSError as why:
939 939 if why.errno not in (errno.EINVAL, errno.ENOSYS):
940 940 raise
941 941 except AttributeError: # no symlink in os
942 942 pass
943 943 fp = posixfile(pathname)
944 944 r = fp.read()
945 945 fp.close()
946 946 return r
947 947
948 948 def fstat(fp):
949 949 '''stat file object that may not have fileno method.'''
950 950 try:
951 951 return os.fstat(fp.fileno())
952 952 except AttributeError:
953 953 return os.stat(fp.name)
954 954
955 955 # File system features
956 956
957 957 def checkcase(path):
958 958 """
959 959 Return true if the given path is on a case-sensitive filesystem
960 960
961 961 Requires a path (like /foo/.hg) ending with a foldable final
962 962 directory component.
963 963 """
964 964 s1 = os.lstat(path)
965 965 d, b = os.path.split(path)
966 966 b2 = b.upper()
967 967 if b == b2:
968 968 b2 = b.lower()
969 969 if b == b2:
970 970 return True # no evidence against case sensitivity
971 971 p2 = os.path.join(d, b2)
972 972 try:
973 973 s2 = os.lstat(p2)
974 974 if s2 == s1:
975 975 return False
976 976 return True
977 977 except OSError:
978 978 return True
979 979
980 980 try:
981 981 import re2
982 982 _re2 = None
983 983 except ImportError:
984 984 _re2 = False
985 985
986 986 class _re(object):
987 987 def _checkre2(self):
988 988 global _re2
989 989 try:
990 990 # check if match works, see issue3964
991 991 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
992 992 except ImportError:
993 993 _re2 = False
994 994
995 995 def compile(self, pat, flags=0):
996 996 '''Compile a regular expression, using re2 if possible
997 997
998 998 For best performance, use only re2-compatible regexp features. The
999 999 only flags from the re module that are re2-compatible are
1000 1000 IGNORECASE and MULTILINE.'''
1001 1001 if _re2 is None:
1002 1002 self._checkre2()
1003 1003 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1004 1004 if flags & remod.IGNORECASE:
1005 1005 pat = '(?i)' + pat
1006 1006 if flags & remod.MULTILINE:
1007 1007 pat = '(?m)' + pat
1008 1008 try:
1009 1009 return re2.compile(pat)
1010 1010 except re2.error:
1011 1011 pass
1012 1012 return remod.compile(pat, flags)
1013 1013
1014 1014 @propertycache
1015 1015 def escape(self):
1016 1016 '''Return the version of escape corresponding to self.compile.
1017 1017
1018 1018 This is imperfect because whether re2 or re is used for a particular
1019 1019 function depends on the flags, etc, but it's the best we can do.
1020 1020 '''
1021 1021 global _re2
1022 1022 if _re2 is None:
1023 1023 self._checkre2()
1024 1024 if _re2:
1025 1025 return re2.escape
1026 1026 else:
1027 1027 return remod.escape
1028 1028
1029 1029 re = _re()
1030 1030
1031 1031 _fspathcache = {}
1032 1032 def fspath(name, root):
1033 1033 '''Get name in the case stored in the filesystem
1034 1034
1035 1035 The name should be relative to root, and be normcase-ed for efficiency.
1036 1036
1037 1037 Note that this function is unnecessary, and should not be
1038 1038 called, for case-sensitive filesystems (simply because it's expensive).
1039 1039
1040 1040 The root should be normcase-ed, too.
1041 1041 '''
1042 1042 def _makefspathcacheentry(dir):
1043 1043 return dict((normcase(n), n) for n in os.listdir(dir))
1044 1044
1045 1045 seps = os.sep
1046 1046 if os.altsep:
1047 1047 seps = seps + os.altsep
1048 1048 # Protect backslashes. This gets silly very quickly.
1049 1049 seps.replace('\\','\\\\')
1050 1050 pattern = remod.compile(r'([^%s]+)|([%s]+)' % (seps, seps))
1051 1051 dir = os.path.normpath(root)
1052 1052 result = []
1053 1053 for part, sep in pattern.findall(name):
1054 1054 if sep:
1055 1055 result.append(sep)
1056 1056 continue
1057 1057
1058 1058 if dir not in _fspathcache:
1059 1059 _fspathcache[dir] = _makefspathcacheentry(dir)
1060 1060 contents = _fspathcache[dir]
1061 1061
1062 1062 found = contents.get(part)
1063 1063 if not found:
1064 1064 # retry "once per directory" per "dirstate.walk" which
1065 1065 # may take place for each patches of "hg qpush", for example
1066 1066 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1067 1067 found = contents.get(part)
1068 1068
1069 1069 result.append(found or part)
1070 1070 dir = os.path.join(dir, part)
1071 1071
1072 1072 return ''.join(result)
1073 1073
1074 1074 def checknlink(testfile):
1075 1075 '''check whether hardlink count reporting works properly'''
1076 1076
1077 1077 # testfile may be open, so we need a separate file for checking to
1078 1078 # work around issue2543 (or testfile may get lost on Samba shares)
1079 1079 f1 = testfile + ".hgtmp1"
1080 1080 if os.path.lexists(f1):
1081 1081 return False
1082 1082 try:
1083 1083 posixfile(f1, 'w').close()
1084 1084 except IOError:
1085 1085 return False
1086 1086
1087 1087 f2 = testfile + ".hgtmp2"
1088 1088 fd = None
1089 1089 try:
1090 1090 oslink(f1, f2)
1091 1091 # nlinks() may behave differently for files on Windows shares if
1092 1092 # the file is open.
1093 1093 fd = posixfile(f2)
1094 1094 return nlinks(f2) > 1
1095 1095 except OSError:
1096 1096 return False
1097 1097 finally:
1098 1098 if fd is not None:
1099 1099 fd.close()
1100 1100 for f in (f1, f2):
1101 1101 try:
1102 1102 os.unlink(f)
1103 1103 except OSError:
1104 1104 pass
1105 1105
1106 1106 def endswithsep(path):
1107 1107 '''Check path ends with os.sep or os.altsep.'''
1108 1108 return path.endswith(os.sep) or os.altsep and path.endswith(os.altsep)
1109 1109
1110 1110 def splitpath(path):
1111 1111 '''Split path by os.sep.
1112 1112 Note that this function does not use os.altsep because this is
1113 1113 an alternative of simple "xxx.split(os.sep)".
1114 1114 It is recommended to use os.path.normpath() before using this
1115 1115 function if need.'''
1116 1116 return path.split(os.sep)
1117 1117
1118 1118 def gui():
1119 1119 '''Are we running in a GUI?'''
1120 1120 if sys.platform == 'darwin':
1121 1121 if 'SSH_CONNECTION' in os.environ:
1122 1122 # handle SSH access to a box where the user is logged in
1123 1123 return False
1124 1124 elif getattr(osutil, 'isgui', None):
1125 1125 # check if a CoreGraphics session is available
1126 1126 return osutil.isgui()
1127 1127 else:
1128 1128 # pure build; use a safe default
1129 1129 return True
1130 1130 else:
1131 1131 return os.name == "nt" or os.environ.get("DISPLAY")
1132 1132
1133 1133 def mktempcopy(name, emptyok=False, createmode=None):
1134 1134 """Create a temporary file with the same contents from name
1135 1135
1136 1136 The permission bits are copied from the original file.
1137 1137
1138 1138 If the temporary file is going to be truncated immediately, you
1139 1139 can use emptyok=True as an optimization.
1140 1140
1141 1141 Returns the name of the temporary file.
1142 1142 """
1143 1143 d, fn = os.path.split(name)
1144 1144 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
1145 1145 os.close(fd)
1146 1146 # Temporary files are created with mode 0600, which is usually not
1147 1147 # what we want. If the original file already exists, just copy
1148 1148 # its mode. Otherwise, manually obey umask.
1149 1149 copymode(name, temp, createmode)
1150 1150 if emptyok:
1151 1151 return temp
1152 1152 try:
1153 1153 try:
1154 1154 ifp = posixfile(name, "rb")
1155 1155 except IOError as inst:
1156 1156 if inst.errno == errno.ENOENT:
1157 1157 return temp
1158 1158 if not getattr(inst, 'filename', None):
1159 1159 inst.filename = name
1160 1160 raise
1161 1161 ofp = posixfile(temp, "wb")
1162 1162 for chunk in filechunkiter(ifp):
1163 1163 ofp.write(chunk)
1164 1164 ifp.close()
1165 1165 ofp.close()
1166 1166 except: # re-raises
1167 1167 try: os.unlink(temp)
1168 1168 except OSError: pass
1169 1169 raise
1170 1170 return temp
1171 1171
1172 1172 class atomictempfile(object):
1173 1173 '''writable file object that atomically updates a file
1174 1174
1175 1175 All writes will go to a temporary copy of the original file. Call
1176 1176 close() when you are done writing, and atomictempfile will rename
1177 1177 the temporary copy to the original name, making the changes
1178 1178 visible. If the object is destroyed without being closed, all your
1179 1179 writes are discarded.
1180 1180 '''
1181 1181 def __init__(self, name, mode='w+b', createmode=None):
1182 1182 self.__name = name # permanent name
1183 1183 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1184 1184 createmode=createmode)
1185 1185 self._fp = posixfile(self._tempname, mode)
1186 1186
1187 1187 # delegated methods
1188 1188 self.write = self._fp.write
1189 1189 self.seek = self._fp.seek
1190 1190 self.tell = self._fp.tell
1191 1191 self.fileno = self._fp.fileno
1192 1192
1193 1193 def close(self):
1194 1194 if not self._fp.closed:
1195 1195 self._fp.close()
1196 1196 rename(self._tempname, localpath(self.__name))
1197 1197
1198 1198 def discard(self):
1199 1199 if not self._fp.closed:
1200 1200 try:
1201 1201 os.unlink(self._tempname)
1202 1202 except OSError:
1203 1203 pass
1204 1204 self._fp.close()
1205 1205
1206 1206 def __del__(self):
1207 1207 if safehasattr(self, '_fp'): # constructor actually did something
1208 1208 self.discard()
1209 1209
1210 1210 def makedirs(name, mode=None, notindexed=False):
1211 1211 """recursive directory creation with parent mode inheritance"""
1212 1212 try:
1213 1213 makedir(name, notindexed)
1214 1214 except OSError as err:
1215 1215 if err.errno == errno.EEXIST:
1216 1216 return
1217 1217 if err.errno != errno.ENOENT or not name:
1218 1218 raise
1219 1219 parent = os.path.dirname(os.path.abspath(name))
1220 1220 if parent == name:
1221 1221 raise
1222 1222 makedirs(parent, mode, notindexed)
1223 1223 makedir(name, notindexed)
1224 1224 if mode is not None:
1225 1225 os.chmod(name, mode)
1226 1226
1227 1227 def ensuredirs(name, mode=None, notindexed=False):
1228 1228 """race-safe recursive directory creation
1229 1229
1230 1230 Newly created directories are marked as "not to be indexed by
1231 1231 the content indexing service", if ``notindexed`` is specified
1232 1232 for "write" mode access.
1233 1233 """
1234 1234 if os.path.isdir(name):
1235 1235 return
1236 1236 parent = os.path.dirname(os.path.abspath(name))
1237 1237 if parent != name:
1238 1238 ensuredirs(parent, mode, notindexed)
1239 1239 try:
1240 1240 makedir(name, notindexed)
1241 1241 except OSError as err:
1242 1242 if err.errno == errno.EEXIST and os.path.isdir(name):
1243 1243 # someone else seems to have won a directory creation race
1244 1244 return
1245 1245 raise
1246 1246 if mode is not None:
1247 1247 os.chmod(name, mode)
1248 1248
1249 1249 def readfile(path):
1250 1250 fp = open(path, 'rb')
1251 1251 try:
1252 1252 return fp.read()
1253 1253 finally:
1254 1254 fp.close()
1255 1255
1256 1256 def writefile(path, text):
1257 1257 fp = open(path, 'wb')
1258 1258 try:
1259 1259 fp.write(text)
1260 1260 finally:
1261 1261 fp.close()
1262 1262
1263 1263 def appendfile(path, text):
1264 1264 fp = open(path, 'ab')
1265 1265 try:
1266 1266 fp.write(text)
1267 1267 finally:
1268 1268 fp.close()
1269 1269
1270 1270 class chunkbuffer(object):
1271 1271 """Allow arbitrary sized chunks of data to be efficiently read from an
1272 1272 iterator over chunks of arbitrary size."""
1273 1273
1274 1274 def __init__(self, in_iter):
1275 1275 """in_iter is the iterator that's iterating over the input chunks.
1276 1276 targetsize is how big a buffer to try to maintain."""
1277 1277 def splitbig(chunks):
1278 1278 for chunk in chunks:
1279 1279 if len(chunk) > 2**20:
1280 1280 pos = 0
1281 1281 while pos < len(chunk):
1282 1282 end = pos + 2 ** 18
1283 1283 yield chunk[pos:end]
1284 1284 pos = end
1285 1285 else:
1286 1286 yield chunk
1287 1287 self.iter = splitbig(in_iter)
1288 1288 self._queue = collections.deque()
1289 self._chunkoffset = 0
1289 1290
1290 1291 def read(self, l=None):
1291 1292 """Read L bytes of data from the iterator of chunks of data.
1292 1293 Returns less than L bytes if the iterator runs dry.
1293 1294
1294 1295 If size parameter is omitted, read everything"""
1295 1296 if l is None:
1296 1297 return ''.join(self.iter)
1297 1298
1298 1299 left = l
1299 1300 buf = []
1300 1301 queue = self._queue
1301 1302 while left > 0:
1302 1303 # refill the queue
1303 1304 if not queue:
1304 1305 target = 2**18
1305 1306 for chunk in self.iter:
1306 1307 queue.append(chunk)
1307 1308 target -= len(chunk)
1308 1309 if target <= 0:
1309 1310 break
1310 1311 if not queue:
1311 1312 break
1312 1313
1314 # The easy way to do this would be to queue.popleft(), modify the
1315 # chunk (if necessary), then queue.appendleft(). However, for cases
1316 # where we read partial chunk content, this incurs 2 dequeue
1317 # mutations and creates a new str for the remaining chunk in the
1318 # queue. Our code below avoids this overhead.
1319
1313 1320 chunk = queue[0]
1314 1321 chunkl = len(chunk)
1322 offset = self._chunkoffset
1315 1323
1316 1324 # Use full chunk.
1317 if left >= chunkl:
1325 if offset == 0 and left >= chunkl:
1318 1326 left -= chunkl
1319 1327 queue.popleft()
1320 1328 buf.append(chunk)
1329 # self._chunkoffset remains at 0.
1330 continue
1331
1332 chunkremaining = chunkl - offset
1333
1334 # Use all of unconsumed part of chunk.
1335 if left >= chunkremaining:
1336 left -= chunkremaining
1337 queue.popleft()
1338 # offset == 0 is enabled by block above, so this won't merely
1339 # copy via ``chunk[0:]``.
1340 buf.append(chunk[offset:])
1341 self._chunkoffset = 0
1342
1321 1343 # Partial chunk needed.
1322 1344 else:
1323 left -= chunkl
1324 queue.popleft()
1325 queue.appendleft(chunk[left:])
1326 buf.append(chunk[:left])
1345 buf.append(chunk[offset:offset + left])
1346 self._chunkoffset += left
1347 left -= chunkremaining
1327 1348
1328 1349 return ''.join(buf)
1329 1350
1330 1351 def filechunkiter(f, size=65536, limit=None):
1331 1352 """Create a generator that produces the data in the file size
1332 1353 (default 65536) bytes at a time, up to optional limit (default is
1333 1354 to read all data). Chunks may be less than size bytes if the
1334 1355 chunk is the last chunk in the file, or the file is a socket or
1335 1356 some other type of file that sometimes reads less data than is
1336 1357 requested."""
1337 1358 assert size >= 0
1338 1359 assert limit is None or limit >= 0
1339 1360 while True:
1340 1361 if limit is None:
1341 1362 nbytes = size
1342 1363 else:
1343 1364 nbytes = min(limit, size)
1344 1365 s = nbytes and f.read(nbytes)
1345 1366 if not s:
1346 1367 break
1347 1368 if limit:
1348 1369 limit -= len(s)
1349 1370 yield s
1350 1371
1351 1372 def makedate(timestamp=None):
1352 1373 '''Return a unix timestamp (or the current time) as a (unixtime,
1353 1374 offset) tuple based off the local timezone.'''
1354 1375 if timestamp is None:
1355 1376 timestamp = time.time()
1356 1377 if timestamp < 0:
1357 1378 hint = _("check your clock")
1358 1379 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1359 1380 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1360 1381 datetime.datetime.fromtimestamp(timestamp))
1361 1382 tz = delta.days * 86400 + delta.seconds
1362 1383 return timestamp, tz
1363 1384
1364 1385 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1365 1386 """represent a (unixtime, offset) tuple as a localized time.
1366 1387 unixtime is seconds since the epoch, and offset is the time zone's
1367 1388 number of seconds away from UTC. if timezone is false, do not
1368 1389 append time zone to string."""
1369 1390 t, tz = date or makedate()
1370 1391 if t < 0:
1371 1392 t = 0 # time.gmtime(lt) fails on Windows for lt < -43200
1372 1393 tz = 0
1373 1394 if "%1" in format or "%2" in format or "%z" in format:
1374 1395 sign = (tz > 0) and "-" or "+"
1375 1396 minutes = abs(tz) // 60
1376 1397 format = format.replace("%z", "%1%2")
1377 1398 format = format.replace("%1", "%c%02d" % (sign, minutes // 60))
1378 1399 format = format.replace("%2", "%02d" % (minutes % 60))
1379 1400 try:
1380 1401 t = time.gmtime(float(t) - tz)
1381 1402 except ValueError:
1382 1403 # time was out of range
1383 1404 t = time.gmtime(sys.maxint)
1384 1405 s = time.strftime(format, t)
1385 1406 return s
1386 1407
1387 1408 def shortdate(date=None):
1388 1409 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1389 1410 return datestr(date, format='%Y-%m-%d')
1390 1411
1391 1412 def parsetimezone(tz):
1392 1413 """parse a timezone string and return an offset integer"""
1393 1414 if tz[0] in "+-" and len(tz) == 5 and tz[1:].isdigit():
1394 1415 sign = (tz[0] == "+") and 1 or -1
1395 1416 hours = int(tz[1:3])
1396 1417 minutes = int(tz[3:5])
1397 1418 return -sign * (hours * 60 + minutes) * 60
1398 1419 if tz == "GMT" or tz == "UTC":
1399 1420 return 0
1400 1421 return None
1401 1422
1402 1423 def strdate(string, format, defaults=[]):
1403 1424 """parse a localized time string and return a (unixtime, offset) tuple.
1404 1425 if the string cannot be parsed, ValueError is raised."""
1405 1426 # NOTE: unixtime = localunixtime + offset
1406 1427 offset, date = parsetimezone(string.split()[-1]), string
1407 1428 if offset is not None:
1408 1429 date = " ".join(string.split()[:-1])
1409 1430
1410 1431 # add missing elements from defaults
1411 1432 usenow = False # default to using biased defaults
1412 1433 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
1413 1434 found = [True for p in part if ("%"+p) in format]
1414 1435 if not found:
1415 1436 date += "@" + defaults[part][usenow]
1416 1437 format += "@%" + part[0]
1417 1438 else:
1418 1439 # We've found a specific time element, less specific time
1419 1440 # elements are relative to today
1420 1441 usenow = True
1421 1442
1422 1443 timetuple = time.strptime(date, format)
1423 1444 localunixtime = int(calendar.timegm(timetuple))
1424 1445 if offset is None:
1425 1446 # local timezone
1426 1447 unixtime = int(time.mktime(timetuple))
1427 1448 offset = unixtime - localunixtime
1428 1449 else:
1429 1450 unixtime = localunixtime + offset
1430 1451 return unixtime, offset
1431 1452
1432 1453 def parsedate(date, formats=None, bias=None):
1433 1454 """parse a localized date/time and return a (unixtime, offset) tuple.
1434 1455
1435 1456 The date may be a "unixtime offset" string or in one of the specified
1436 1457 formats. If the date already is a (unixtime, offset) tuple, it is returned.
1437 1458
1438 1459 >>> parsedate(' today ') == parsedate(\
1439 1460 datetime.date.today().strftime('%b %d'))
1440 1461 True
1441 1462 >>> parsedate( 'yesterday ') == parsedate((datetime.date.today() -\
1442 1463 datetime.timedelta(days=1)\
1443 1464 ).strftime('%b %d'))
1444 1465 True
1445 1466 >>> now, tz = makedate()
1446 1467 >>> strnow, strtz = parsedate('now')
1447 1468 >>> (strnow - now) < 1
1448 1469 True
1449 1470 >>> tz == strtz
1450 1471 True
1451 1472 """
1452 1473 if bias is None:
1453 1474 bias = {}
1454 1475 if not date:
1455 1476 return 0, 0
1456 1477 if isinstance(date, tuple) and len(date) == 2:
1457 1478 return date
1458 1479 if not formats:
1459 1480 formats = defaultdateformats
1460 1481 date = date.strip()
1461 1482
1462 1483 if date == 'now' or date == _('now'):
1463 1484 return makedate()
1464 1485 if date == 'today' or date == _('today'):
1465 1486 date = datetime.date.today().strftime('%b %d')
1466 1487 elif date == 'yesterday' or date == _('yesterday'):
1467 1488 date = (datetime.date.today() -
1468 1489 datetime.timedelta(days=1)).strftime('%b %d')
1469 1490
1470 1491 try:
1471 1492 when, offset = map(int, date.split(' '))
1472 1493 except ValueError:
1473 1494 # fill out defaults
1474 1495 now = makedate()
1475 1496 defaults = {}
1476 1497 for part in ("d", "mb", "yY", "HI", "M", "S"):
1477 1498 # this piece is for rounding the specific end of unknowns
1478 1499 b = bias.get(part)
1479 1500 if b is None:
1480 1501 if part[0] in "HMS":
1481 1502 b = "00"
1482 1503 else:
1483 1504 b = "0"
1484 1505
1485 1506 # this piece is for matching the generic end to today's date
1486 1507 n = datestr(now, "%" + part[0])
1487 1508
1488 1509 defaults[part] = (b, n)
1489 1510
1490 1511 for format in formats:
1491 1512 try:
1492 1513 when, offset = strdate(date, format, defaults)
1493 1514 except (ValueError, OverflowError):
1494 1515 pass
1495 1516 else:
1496 1517 break
1497 1518 else:
1498 1519 raise Abort(_('invalid date: %r') % date)
1499 1520 # validate explicit (probably user-specified) date and
1500 1521 # time zone offset. values must fit in signed 32 bits for
1501 1522 # current 32-bit linux runtimes. timezones go from UTC-12
1502 1523 # to UTC+14
1503 1524 if abs(when) > 0x7fffffff:
1504 1525 raise Abort(_('date exceeds 32 bits: %d') % when)
1505 1526 if when < 0:
1506 1527 raise Abort(_('negative date value: %d') % when)
1507 1528 if offset < -50400 or offset > 43200:
1508 1529 raise Abort(_('impossible time zone offset: %d') % offset)
1509 1530 return when, offset
1510 1531
1511 1532 def matchdate(date):
1512 1533 """Return a function that matches a given date match specifier
1513 1534
1514 1535 Formats include:
1515 1536
1516 1537 '{date}' match a given date to the accuracy provided
1517 1538
1518 1539 '<{date}' on or before a given date
1519 1540
1520 1541 '>{date}' on or after a given date
1521 1542
1522 1543 >>> p1 = parsedate("10:29:59")
1523 1544 >>> p2 = parsedate("10:30:00")
1524 1545 >>> p3 = parsedate("10:30:59")
1525 1546 >>> p4 = parsedate("10:31:00")
1526 1547 >>> p5 = parsedate("Sep 15 10:30:00 1999")
1527 1548 >>> f = matchdate("10:30")
1528 1549 >>> f(p1[0])
1529 1550 False
1530 1551 >>> f(p2[0])
1531 1552 True
1532 1553 >>> f(p3[0])
1533 1554 True
1534 1555 >>> f(p4[0])
1535 1556 False
1536 1557 >>> f(p5[0])
1537 1558 False
1538 1559 """
1539 1560
1540 1561 def lower(date):
1541 1562 d = {'mb': "1", 'd': "1"}
1542 1563 return parsedate(date, extendeddateformats, d)[0]
1543 1564
1544 1565 def upper(date):
1545 1566 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
1546 1567 for days in ("31", "30", "29"):
1547 1568 try:
1548 1569 d["d"] = days
1549 1570 return parsedate(date, extendeddateformats, d)[0]
1550 1571 except Abort:
1551 1572 pass
1552 1573 d["d"] = "28"
1553 1574 return parsedate(date, extendeddateformats, d)[0]
1554 1575
1555 1576 date = date.strip()
1556 1577
1557 1578 if not date:
1558 1579 raise Abort(_("dates cannot consist entirely of whitespace"))
1559 1580 elif date[0] == "<":
1560 1581 if not date[1:]:
1561 1582 raise Abort(_("invalid day spec, use '<DATE'"))
1562 1583 when = upper(date[1:])
1563 1584 return lambda x: x <= when
1564 1585 elif date[0] == ">":
1565 1586 if not date[1:]:
1566 1587 raise Abort(_("invalid day spec, use '>DATE'"))
1567 1588 when = lower(date[1:])
1568 1589 return lambda x: x >= when
1569 1590 elif date[0] == "-":
1570 1591 try:
1571 1592 days = int(date[1:])
1572 1593 except ValueError:
1573 1594 raise Abort(_("invalid day spec: %s") % date[1:])
1574 1595 if days < 0:
1575 1596 raise Abort(_('%s must be nonnegative (see "hg help dates")')
1576 1597 % date[1:])
1577 1598 when = makedate()[0] - days * 3600 * 24
1578 1599 return lambda x: x >= when
1579 1600 elif " to " in date:
1580 1601 a, b = date.split(" to ")
1581 1602 start, stop = lower(a), upper(b)
1582 1603 return lambda x: x >= start and x <= stop
1583 1604 else:
1584 1605 start, stop = lower(date), upper(date)
1585 1606 return lambda x: x >= start and x <= stop
1586 1607
1587 1608 def shortuser(user):
1588 1609 """Return a short representation of a user name or email address."""
1589 1610 f = user.find('@')
1590 1611 if f >= 0:
1591 1612 user = user[:f]
1592 1613 f = user.find('<')
1593 1614 if f >= 0:
1594 1615 user = user[f + 1:]
1595 1616 f = user.find(' ')
1596 1617 if f >= 0:
1597 1618 user = user[:f]
1598 1619 f = user.find('.')
1599 1620 if f >= 0:
1600 1621 user = user[:f]
1601 1622 return user
1602 1623
1603 1624 def emailuser(user):
1604 1625 """Return the user portion of an email address."""
1605 1626 f = user.find('@')
1606 1627 if f >= 0:
1607 1628 user = user[:f]
1608 1629 f = user.find('<')
1609 1630 if f >= 0:
1610 1631 user = user[f + 1:]
1611 1632 return user
1612 1633
1613 1634 def email(author):
1614 1635 '''get email of author.'''
1615 1636 r = author.find('>')
1616 1637 if r == -1:
1617 1638 r = None
1618 1639 return author[author.find('<') + 1:r]
1619 1640
1620 1641 def ellipsis(text, maxlength=400):
1621 1642 """Trim string to at most maxlength (default: 400) columns in display."""
1622 1643 return encoding.trim(text, maxlength, ellipsis='...')
1623 1644
1624 1645 def unitcountfn(*unittable):
1625 1646 '''return a function that renders a readable count of some quantity'''
1626 1647
1627 1648 def go(count):
1628 1649 for multiplier, divisor, format in unittable:
1629 1650 if count >= divisor * multiplier:
1630 1651 return format % (count / float(divisor))
1631 1652 return unittable[-1][2] % count
1632 1653
1633 1654 return go
1634 1655
1635 1656 bytecount = unitcountfn(
1636 1657 (100, 1 << 30, _('%.0f GB')),
1637 1658 (10, 1 << 30, _('%.1f GB')),
1638 1659 (1, 1 << 30, _('%.2f GB')),
1639 1660 (100, 1 << 20, _('%.0f MB')),
1640 1661 (10, 1 << 20, _('%.1f MB')),
1641 1662 (1, 1 << 20, _('%.2f MB')),
1642 1663 (100, 1 << 10, _('%.0f KB')),
1643 1664 (10, 1 << 10, _('%.1f KB')),
1644 1665 (1, 1 << 10, _('%.2f KB')),
1645 1666 (1, 1, _('%.0f bytes')),
1646 1667 )
1647 1668
1648 1669 def uirepr(s):
1649 1670 # Avoid double backslash in Windows path repr()
1650 1671 return repr(s).replace('\\\\', '\\')
1651 1672
1652 1673 # delay import of textwrap
1653 1674 def MBTextWrapper(**kwargs):
1654 1675 class tw(textwrap.TextWrapper):
1655 1676 """
1656 1677 Extend TextWrapper for width-awareness.
1657 1678
1658 1679 Neither number of 'bytes' in any encoding nor 'characters' is
1659 1680 appropriate to calculate terminal columns for specified string.
1660 1681
1661 1682 Original TextWrapper implementation uses built-in 'len()' directly,
1662 1683 so overriding is needed to use width information of each characters.
1663 1684
1664 1685 In addition, characters classified into 'ambiguous' width are
1665 1686 treated as wide in East Asian area, but as narrow in other.
1666 1687
1667 1688 This requires use decision to determine width of such characters.
1668 1689 """
1669 1690 def _cutdown(self, ucstr, space_left):
1670 1691 l = 0
1671 1692 colwidth = encoding.ucolwidth
1672 1693 for i in xrange(len(ucstr)):
1673 1694 l += colwidth(ucstr[i])
1674 1695 if space_left < l:
1675 1696 return (ucstr[:i], ucstr[i:])
1676 1697 return ucstr, ''
1677 1698
1678 1699 # overriding of base class
1679 1700 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
1680 1701 space_left = max(width - cur_len, 1)
1681 1702
1682 1703 if self.break_long_words:
1683 1704 cut, res = self._cutdown(reversed_chunks[-1], space_left)
1684 1705 cur_line.append(cut)
1685 1706 reversed_chunks[-1] = res
1686 1707 elif not cur_line:
1687 1708 cur_line.append(reversed_chunks.pop())
1688 1709
1689 1710 # this overriding code is imported from TextWrapper of Python 2.6
1690 1711 # to calculate columns of string by 'encoding.ucolwidth()'
1691 1712 def _wrap_chunks(self, chunks):
1692 1713 colwidth = encoding.ucolwidth
1693 1714
1694 1715 lines = []
1695 1716 if self.width <= 0:
1696 1717 raise ValueError("invalid width %r (must be > 0)" % self.width)
1697 1718
1698 1719 # Arrange in reverse order so items can be efficiently popped
1699 1720 # from a stack of chucks.
1700 1721 chunks.reverse()
1701 1722
1702 1723 while chunks:
1703 1724
1704 1725 # Start the list of chunks that will make up the current line.
1705 1726 # cur_len is just the length of all the chunks in cur_line.
1706 1727 cur_line = []
1707 1728 cur_len = 0
1708 1729
1709 1730 # Figure out which static string will prefix this line.
1710 1731 if lines:
1711 1732 indent = self.subsequent_indent
1712 1733 else:
1713 1734 indent = self.initial_indent
1714 1735
1715 1736 # Maximum width for this line.
1716 1737 width = self.width - len(indent)
1717 1738
1718 1739 # First chunk on line is whitespace -- drop it, unless this
1719 1740 # is the very beginning of the text (i.e. no lines started yet).
1720 1741 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
1721 1742 del chunks[-1]
1722 1743
1723 1744 while chunks:
1724 1745 l = colwidth(chunks[-1])
1725 1746
1726 1747 # Can at least squeeze this chunk onto the current line.
1727 1748 if cur_len + l <= width:
1728 1749 cur_line.append(chunks.pop())
1729 1750 cur_len += l
1730 1751
1731 1752 # Nope, this line is full.
1732 1753 else:
1733 1754 break
1734 1755
1735 1756 # The current line is full, and the next chunk is too big to
1736 1757 # fit on *any* line (not just this one).
1737 1758 if chunks and colwidth(chunks[-1]) > width:
1738 1759 self._handle_long_word(chunks, cur_line, cur_len, width)
1739 1760
1740 1761 # If the last chunk on this line is all whitespace, drop it.
1741 1762 if (self.drop_whitespace and
1742 1763 cur_line and cur_line[-1].strip() == ''):
1743 1764 del cur_line[-1]
1744 1765
1745 1766 # Convert current line back to a string and store it in list
1746 1767 # of all lines (return value).
1747 1768 if cur_line:
1748 1769 lines.append(indent + ''.join(cur_line))
1749 1770
1750 1771 return lines
1751 1772
1752 1773 global MBTextWrapper
1753 1774 MBTextWrapper = tw
1754 1775 return tw(**kwargs)
1755 1776
1756 1777 def wrap(line, width, initindent='', hangindent=''):
1757 1778 maxindent = max(len(hangindent), len(initindent))
1758 1779 if width <= maxindent:
1759 1780 # adjust for weird terminal size
1760 1781 width = max(78, maxindent + 1)
1761 1782 line = line.decode(encoding.encoding, encoding.encodingmode)
1762 1783 initindent = initindent.decode(encoding.encoding, encoding.encodingmode)
1763 1784 hangindent = hangindent.decode(encoding.encoding, encoding.encodingmode)
1764 1785 wrapper = MBTextWrapper(width=width,
1765 1786 initial_indent=initindent,
1766 1787 subsequent_indent=hangindent)
1767 1788 return wrapper.fill(line).encode(encoding.encoding)
1768 1789
1769 1790 def iterlines(iterator):
1770 1791 for chunk in iterator:
1771 1792 for line in chunk.splitlines():
1772 1793 yield line
1773 1794
1774 1795 def expandpath(path):
1775 1796 return os.path.expanduser(os.path.expandvars(path))
1776 1797
1777 1798 def hgcmd():
1778 1799 """Return the command used to execute current hg
1779 1800
1780 1801 This is different from hgexecutable() because on Windows we want
1781 1802 to avoid things opening new shell windows like batch files, so we
1782 1803 get either the python call or current executable.
1783 1804 """
1784 1805 if mainfrozen():
1785 1806 return [sys.executable]
1786 1807 return gethgcmd()
1787 1808
1788 1809 def rundetached(args, condfn):
1789 1810 """Execute the argument list in a detached process.
1790 1811
1791 1812 condfn is a callable which is called repeatedly and should return
1792 1813 True once the child process is known to have started successfully.
1793 1814 At this point, the child process PID is returned. If the child
1794 1815 process fails to start or finishes before condfn() evaluates to
1795 1816 True, return -1.
1796 1817 """
1797 1818 # Windows case is easier because the child process is either
1798 1819 # successfully starting and validating the condition or exiting
1799 1820 # on failure. We just poll on its PID. On Unix, if the child
1800 1821 # process fails to start, it will be left in a zombie state until
1801 1822 # the parent wait on it, which we cannot do since we expect a long
1802 1823 # running process on success. Instead we listen for SIGCHLD telling
1803 1824 # us our child process terminated.
1804 1825 terminated = set()
1805 1826 def handler(signum, frame):
1806 1827 terminated.add(os.wait())
1807 1828 prevhandler = None
1808 1829 SIGCHLD = getattr(signal, 'SIGCHLD', None)
1809 1830 if SIGCHLD is not None:
1810 1831 prevhandler = signal.signal(SIGCHLD, handler)
1811 1832 try:
1812 1833 pid = spawndetached(args)
1813 1834 while not condfn():
1814 1835 if ((pid in terminated or not testpid(pid))
1815 1836 and not condfn()):
1816 1837 return -1
1817 1838 time.sleep(0.1)
1818 1839 return pid
1819 1840 finally:
1820 1841 if prevhandler is not None:
1821 1842 signal.signal(signal.SIGCHLD, prevhandler)
1822 1843
1823 1844 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
1824 1845 """Return the result of interpolating items in the mapping into string s.
1825 1846
1826 1847 prefix is a single character string, or a two character string with
1827 1848 a backslash as the first character if the prefix needs to be escaped in
1828 1849 a regular expression.
1829 1850
1830 1851 fn is an optional function that will be applied to the replacement text
1831 1852 just before replacement.
1832 1853
1833 1854 escape_prefix is an optional flag that allows using doubled prefix for
1834 1855 its escaping.
1835 1856 """
1836 1857 fn = fn or (lambda s: s)
1837 1858 patterns = '|'.join(mapping.keys())
1838 1859 if escape_prefix:
1839 1860 patterns += '|' + prefix
1840 1861 if len(prefix) > 1:
1841 1862 prefix_char = prefix[1:]
1842 1863 else:
1843 1864 prefix_char = prefix
1844 1865 mapping[prefix_char] = prefix_char
1845 1866 r = remod.compile(r'%s(%s)' % (prefix, patterns))
1846 1867 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
1847 1868
1848 1869 def getport(port):
1849 1870 """Return the port for a given network service.
1850 1871
1851 1872 If port is an integer, it's returned as is. If it's a string, it's
1852 1873 looked up using socket.getservbyname(). If there's no matching
1853 1874 service, util.Abort is raised.
1854 1875 """
1855 1876 try:
1856 1877 return int(port)
1857 1878 except ValueError:
1858 1879 pass
1859 1880
1860 1881 try:
1861 1882 return socket.getservbyname(port)
1862 1883 except socket.error:
1863 1884 raise Abort(_("no port number associated with service '%s'") % port)
1864 1885
1865 1886 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
1866 1887 '0': False, 'no': False, 'false': False, 'off': False,
1867 1888 'never': False}
1868 1889
1869 1890 def parsebool(s):
1870 1891 """Parse s into a boolean.
1871 1892
1872 1893 If s is not a valid boolean, returns None.
1873 1894 """
1874 1895 return _booleans.get(s.lower(), None)
1875 1896
1876 1897 _hexdig = '0123456789ABCDEFabcdef'
1877 1898 _hextochr = dict((a + b, chr(int(a + b, 16)))
1878 1899 for a in _hexdig for b in _hexdig)
1879 1900
1880 1901 def _urlunquote(s):
1881 1902 """Decode HTTP/HTML % encoding.
1882 1903
1883 1904 >>> _urlunquote('abc%20def')
1884 1905 'abc def'
1885 1906 """
1886 1907 res = s.split('%')
1887 1908 # fastpath
1888 1909 if len(res) == 1:
1889 1910 return s
1890 1911 s = res[0]
1891 1912 for item in res[1:]:
1892 1913 try:
1893 1914 s += _hextochr[item[:2]] + item[2:]
1894 1915 except KeyError:
1895 1916 s += '%' + item
1896 1917 except UnicodeDecodeError:
1897 1918 s += unichr(int(item[:2], 16)) + item[2:]
1898 1919 return s
1899 1920
1900 1921 class url(object):
1901 1922 r"""Reliable URL parser.
1902 1923
1903 1924 This parses URLs and provides attributes for the following
1904 1925 components:
1905 1926
1906 1927 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
1907 1928
1908 1929 Missing components are set to None. The only exception is
1909 1930 fragment, which is set to '' if present but empty.
1910 1931
1911 1932 If parsefragment is False, fragment is included in query. If
1912 1933 parsequery is False, query is included in path. If both are
1913 1934 False, both fragment and query are included in path.
1914 1935
1915 1936 See http://www.ietf.org/rfc/rfc2396.txt for more information.
1916 1937
1917 1938 Note that for backward compatibility reasons, bundle URLs do not
1918 1939 take host names. That means 'bundle://../' has a path of '../'.
1919 1940
1920 1941 Examples:
1921 1942
1922 1943 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
1923 1944 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
1924 1945 >>> url('ssh://[::1]:2200//home/joe/repo')
1925 1946 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
1926 1947 >>> url('file:///home/joe/repo')
1927 1948 <url scheme: 'file', path: '/home/joe/repo'>
1928 1949 >>> url('file:///c:/temp/foo/')
1929 1950 <url scheme: 'file', path: 'c:/temp/foo/'>
1930 1951 >>> url('bundle:foo')
1931 1952 <url scheme: 'bundle', path: 'foo'>
1932 1953 >>> url('bundle://../foo')
1933 1954 <url scheme: 'bundle', path: '../foo'>
1934 1955 >>> url(r'c:\foo\bar')
1935 1956 <url path: 'c:\\foo\\bar'>
1936 1957 >>> url(r'\\blah\blah\blah')
1937 1958 <url path: '\\\\blah\\blah\\blah'>
1938 1959 >>> url(r'\\blah\blah\blah#baz')
1939 1960 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
1940 1961 >>> url(r'file:///C:\users\me')
1941 1962 <url scheme: 'file', path: 'C:\\users\\me'>
1942 1963
1943 1964 Authentication credentials:
1944 1965
1945 1966 >>> url('ssh://joe:xyz@x/repo')
1946 1967 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
1947 1968 >>> url('ssh://joe@x/repo')
1948 1969 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
1949 1970
1950 1971 Query strings and fragments:
1951 1972
1952 1973 >>> url('http://host/a?b#c')
1953 1974 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
1954 1975 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
1955 1976 <url scheme: 'http', host: 'host', path: 'a?b#c'>
1956 1977 """
1957 1978
1958 1979 _safechars = "!~*'()+"
1959 1980 _safepchars = "/!~*'()+:\\"
1960 1981 _matchscheme = remod.compile(r'^[a-zA-Z0-9+.\-]+:').match
1961 1982
1962 1983 def __init__(self, path, parsequery=True, parsefragment=True):
1963 1984 # We slowly chomp away at path until we have only the path left
1964 1985 self.scheme = self.user = self.passwd = self.host = None
1965 1986 self.port = self.path = self.query = self.fragment = None
1966 1987 self._localpath = True
1967 1988 self._hostport = ''
1968 1989 self._origpath = path
1969 1990
1970 1991 if parsefragment and '#' in path:
1971 1992 path, self.fragment = path.split('#', 1)
1972 1993 if not path:
1973 1994 path = None
1974 1995
1975 1996 # special case for Windows drive letters and UNC paths
1976 1997 if hasdriveletter(path) or path.startswith(r'\\'):
1977 1998 self.path = path
1978 1999 return
1979 2000
1980 2001 # For compatibility reasons, we can't handle bundle paths as
1981 2002 # normal URLS
1982 2003 if path.startswith('bundle:'):
1983 2004 self.scheme = 'bundle'
1984 2005 path = path[7:]
1985 2006 if path.startswith('//'):
1986 2007 path = path[2:]
1987 2008 self.path = path
1988 2009 return
1989 2010
1990 2011 if self._matchscheme(path):
1991 2012 parts = path.split(':', 1)
1992 2013 if parts[0]:
1993 2014 self.scheme, path = parts
1994 2015 self._localpath = False
1995 2016
1996 2017 if not path:
1997 2018 path = None
1998 2019 if self._localpath:
1999 2020 self.path = ''
2000 2021 return
2001 2022 else:
2002 2023 if self._localpath:
2003 2024 self.path = path
2004 2025 return
2005 2026
2006 2027 if parsequery and '?' in path:
2007 2028 path, self.query = path.split('?', 1)
2008 2029 if not path:
2009 2030 path = None
2010 2031 if not self.query:
2011 2032 self.query = None
2012 2033
2013 2034 # // is required to specify a host/authority
2014 2035 if path and path.startswith('//'):
2015 2036 parts = path[2:].split('/', 1)
2016 2037 if len(parts) > 1:
2017 2038 self.host, path = parts
2018 2039 else:
2019 2040 self.host = parts[0]
2020 2041 path = None
2021 2042 if not self.host:
2022 2043 self.host = None
2023 2044 # path of file:///d is /d
2024 2045 # path of file:///d:/ is d:/, not /d:/
2025 2046 if path and not hasdriveletter(path):
2026 2047 path = '/' + path
2027 2048
2028 2049 if self.host and '@' in self.host:
2029 2050 self.user, self.host = self.host.rsplit('@', 1)
2030 2051 if ':' in self.user:
2031 2052 self.user, self.passwd = self.user.split(':', 1)
2032 2053 if not self.host:
2033 2054 self.host = None
2034 2055
2035 2056 # Don't split on colons in IPv6 addresses without ports
2036 2057 if (self.host and ':' in self.host and
2037 2058 not (self.host.startswith('[') and self.host.endswith(']'))):
2038 2059 self._hostport = self.host
2039 2060 self.host, self.port = self.host.rsplit(':', 1)
2040 2061 if not self.host:
2041 2062 self.host = None
2042 2063
2043 2064 if (self.host and self.scheme == 'file' and
2044 2065 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2045 2066 raise Abort(_('file:// URLs can only refer to localhost'))
2046 2067
2047 2068 self.path = path
2048 2069
2049 2070 # leave the query string escaped
2050 2071 for a in ('user', 'passwd', 'host', 'port',
2051 2072 'path', 'fragment'):
2052 2073 v = getattr(self, a)
2053 2074 if v is not None:
2054 2075 setattr(self, a, _urlunquote(v))
2055 2076
2056 2077 def __repr__(self):
2057 2078 attrs = []
2058 2079 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2059 2080 'query', 'fragment'):
2060 2081 v = getattr(self, a)
2061 2082 if v is not None:
2062 2083 attrs.append('%s: %r' % (a, v))
2063 2084 return '<url %s>' % ', '.join(attrs)
2064 2085
2065 2086 def __str__(self):
2066 2087 r"""Join the URL's components back into a URL string.
2067 2088
2068 2089 Examples:
2069 2090
2070 2091 >>> str(url('http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2071 2092 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2072 2093 >>> str(url('http://user:pw@host:80/?foo=bar&baz=42'))
2073 2094 'http://user:pw@host:80/?foo=bar&baz=42'
2074 2095 >>> str(url('http://user:pw@host:80/?foo=bar%3dbaz'))
2075 2096 'http://user:pw@host:80/?foo=bar%3dbaz'
2076 2097 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
2077 2098 'ssh://user:pw@[::1]:2200//home/joe#'
2078 2099 >>> str(url('http://localhost:80//'))
2079 2100 'http://localhost:80//'
2080 2101 >>> str(url('http://localhost:80/'))
2081 2102 'http://localhost:80/'
2082 2103 >>> str(url('http://localhost:80'))
2083 2104 'http://localhost:80/'
2084 2105 >>> str(url('bundle:foo'))
2085 2106 'bundle:foo'
2086 2107 >>> str(url('bundle://../foo'))
2087 2108 'bundle:../foo'
2088 2109 >>> str(url('path'))
2089 2110 'path'
2090 2111 >>> str(url('file:///tmp/foo/bar'))
2091 2112 'file:///tmp/foo/bar'
2092 2113 >>> str(url('file:///c:/tmp/foo/bar'))
2093 2114 'file:///c:/tmp/foo/bar'
2094 2115 >>> print url(r'bundle:foo\bar')
2095 2116 bundle:foo\bar
2096 2117 >>> print url(r'file:///D:\data\hg')
2097 2118 file:///D:\data\hg
2098 2119 """
2099 2120 if self._localpath:
2100 2121 s = self.path
2101 2122 if self.scheme == 'bundle':
2102 2123 s = 'bundle:' + s
2103 2124 if self.fragment:
2104 2125 s += '#' + self.fragment
2105 2126 return s
2106 2127
2107 2128 s = self.scheme + ':'
2108 2129 if self.user or self.passwd or self.host:
2109 2130 s += '//'
2110 2131 elif self.scheme and (not self.path or self.path.startswith('/')
2111 2132 or hasdriveletter(self.path)):
2112 2133 s += '//'
2113 2134 if hasdriveletter(self.path):
2114 2135 s += '/'
2115 2136 if self.user:
2116 2137 s += urllib.quote(self.user, safe=self._safechars)
2117 2138 if self.passwd:
2118 2139 s += ':' + urllib.quote(self.passwd, safe=self._safechars)
2119 2140 if self.user or self.passwd:
2120 2141 s += '@'
2121 2142 if self.host:
2122 2143 if not (self.host.startswith('[') and self.host.endswith(']')):
2123 2144 s += urllib.quote(self.host)
2124 2145 else:
2125 2146 s += self.host
2126 2147 if self.port:
2127 2148 s += ':' + urllib.quote(self.port)
2128 2149 if self.host:
2129 2150 s += '/'
2130 2151 if self.path:
2131 2152 # TODO: similar to the query string, we should not unescape the
2132 2153 # path when we store it, the path might contain '%2f' = '/',
2133 2154 # which we should *not* escape.
2134 2155 s += urllib.quote(self.path, safe=self._safepchars)
2135 2156 if self.query:
2136 2157 # we store the query in escaped form.
2137 2158 s += '?' + self.query
2138 2159 if self.fragment is not None:
2139 2160 s += '#' + urllib.quote(self.fragment, safe=self._safepchars)
2140 2161 return s
2141 2162
2142 2163 def authinfo(self):
2143 2164 user, passwd = self.user, self.passwd
2144 2165 try:
2145 2166 self.user, self.passwd = None, None
2146 2167 s = str(self)
2147 2168 finally:
2148 2169 self.user, self.passwd = user, passwd
2149 2170 if not self.user:
2150 2171 return (s, None)
2151 2172 # authinfo[1] is passed to urllib2 password manager, and its
2152 2173 # URIs must not contain credentials. The host is passed in the
2153 2174 # URIs list because Python < 2.4.3 uses only that to search for
2154 2175 # a password.
2155 2176 return (s, (None, (s, self.host),
2156 2177 self.user, self.passwd or ''))
2157 2178
2158 2179 def isabs(self):
2159 2180 if self.scheme and self.scheme != 'file':
2160 2181 return True # remote URL
2161 2182 if hasdriveletter(self.path):
2162 2183 return True # absolute for our purposes - can't be joined()
2163 2184 if self.path.startswith(r'\\'):
2164 2185 return True # Windows UNC path
2165 2186 if self.path.startswith('/'):
2166 2187 return True # POSIX-style
2167 2188 return False
2168 2189
2169 2190 def localpath(self):
2170 2191 if self.scheme == 'file' or self.scheme == 'bundle':
2171 2192 path = self.path or '/'
2172 2193 # For Windows, we need to promote hosts containing drive
2173 2194 # letters to paths with drive letters.
2174 2195 if hasdriveletter(self._hostport):
2175 2196 path = self._hostport + '/' + self.path
2176 2197 elif (self.host is not None and self.path
2177 2198 and not hasdriveletter(path)):
2178 2199 path = '/' + path
2179 2200 return path
2180 2201 return self._origpath
2181 2202
2182 2203 def islocal(self):
2183 2204 '''whether localpath will return something that posixfile can open'''
2184 2205 return (not self.scheme or self.scheme == 'file'
2185 2206 or self.scheme == 'bundle')
2186 2207
2187 2208 def hasscheme(path):
2188 2209 return bool(url(path).scheme)
2189 2210
2190 2211 def hasdriveletter(path):
2191 2212 return path and path[1:2] == ':' and path[0:1].isalpha()
2192 2213
2193 2214 def urllocalpath(path):
2194 2215 return url(path, parsequery=False, parsefragment=False).localpath()
2195 2216
2196 2217 def hidepassword(u):
2197 2218 '''hide user credential in a url string'''
2198 2219 u = url(u)
2199 2220 if u.passwd:
2200 2221 u.passwd = '***'
2201 2222 return str(u)
2202 2223
2203 2224 def removeauth(u):
2204 2225 '''remove all authentication information from a url string'''
2205 2226 u = url(u)
2206 2227 u.user = u.passwd = None
2207 2228 return str(u)
2208 2229
2209 2230 def isatty(fd):
2210 2231 try:
2211 2232 return fd.isatty()
2212 2233 except AttributeError:
2213 2234 return False
2214 2235
2215 2236 timecount = unitcountfn(
2216 2237 (1, 1e3, _('%.0f s')),
2217 2238 (100, 1, _('%.1f s')),
2218 2239 (10, 1, _('%.2f s')),
2219 2240 (1, 1, _('%.3f s')),
2220 2241 (100, 0.001, _('%.1f ms')),
2221 2242 (10, 0.001, _('%.2f ms')),
2222 2243 (1, 0.001, _('%.3f ms')),
2223 2244 (100, 0.000001, _('%.1f us')),
2224 2245 (10, 0.000001, _('%.2f us')),
2225 2246 (1, 0.000001, _('%.3f us')),
2226 2247 (100, 0.000000001, _('%.1f ns')),
2227 2248 (10, 0.000000001, _('%.2f ns')),
2228 2249 (1, 0.000000001, _('%.3f ns')),
2229 2250 )
2230 2251
2231 2252 _timenesting = [0]
2232 2253
2233 2254 def timed(func):
2234 2255 '''Report the execution time of a function call to stderr.
2235 2256
2236 2257 During development, use as a decorator when you need to measure
2237 2258 the cost of a function, e.g. as follows:
2238 2259
2239 2260 @util.timed
2240 2261 def foo(a, b, c):
2241 2262 pass
2242 2263 '''
2243 2264
2244 2265 def wrapper(*args, **kwargs):
2245 2266 start = time.time()
2246 2267 indent = 2
2247 2268 _timenesting[0] += indent
2248 2269 try:
2249 2270 return func(*args, **kwargs)
2250 2271 finally:
2251 2272 elapsed = time.time() - start
2252 2273 _timenesting[0] -= indent
2253 2274 sys.stderr.write('%s%s: %s\n' %
2254 2275 (' ' * _timenesting[0], func.__name__,
2255 2276 timecount(elapsed)))
2256 2277 return wrapper
2257 2278
2258 2279 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
2259 2280 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
2260 2281
2261 2282 def sizetoint(s):
2262 2283 '''Convert a space specifier to a byte count.
2263 2284
2264 2285 >>> sizetoint('30')
2265 2286 30
2266 2287 >>> sizetoint('2.2kb')
2267 2288 2252
2268 2289 >>> sizetoint('6M')
2269 2290 6291456
2270 2291 '''
2271 2292 t = s.strip().lower()
2272 2293 try:
2273 2294 for k, u in _sizeunits:
2274 2295 if t.endswith(k):
2275 2296 return int(float(t[:-len(k)]) * u)
2276 2297 return int(t)
2277 2298 except ValueError:
2278 2299 raise error.ParseError(_("couldn't parse size: %s") % s)
2279 2300
2280 2301 class hooks(object):
2281 2302 '''A collection of hook functions that can be used to extend a
2282 2303 function's behavior. Hooks are called in lexicographic order,
2283 2304 based on the names of their sources.'''
2284 2305
2285 2306 def __init__(self):
2286 2307 self._hooks = []
2287 2308
2288 2309 def add(self, source, hook):
2289 2310 self._hooks.append((source, hook))
2290 2311
2291 2312 def __call__(self, *args):
2292 2313 self._hooks.sort(key=lambda x: x[0])
2293 2314 results = []
2294 2315 for source, hook in self._hooks:
2295 2316 results.append(hook(*args))
2296 2317 return results
2297 2318
2298 2319 def debugstacktrace(msg='stacktrace', skip=0, f=sys.stderr, otherf=sys.stdout):
2299 2320 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
2300 2321 Skips the 'skip' last entries. By default it will flush stdout first.
2301 2322 It can be used everywhere and do intentionally not require an ui object.
2302 2323 Not be used in production code but very convenient while developing.
2303 2324 '''
2304 2325 if otherf:
2305 2326 otherf.flush()
2306 2327 f.write('%s at:\n' % msg)
2307 2328 entries = [('%s:%s' % (fn, ln), func)
2308 2329 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]]
2309 2330 if entries:
2310 2331 fnmax = max(len(entry[0]) for entry in entries)
2311 2332 for fnln, func in entries:
2312 2333 f.write(' %-*s in %s\n' % (fnmax, fnln, func))
2313 2334 f.flush()
2314 2335
2315 2336 class dirs(object):
2316 2337 '''a multiset of directory names from a dirstate or manifest'''
2317 2338
2318 2339 def __init__(self, map, skip=None):
2319 2340 self._dirs = {}
2320 2341 addpath = self.addpath
2321 2342 if safehasattr(map, 'iteritems') and skip is not None:
2322 2343 for f, s in map.iteritems():
2323 2344 if s[0] != skip:
2324 2345 addpath(f)
2325 2346 else:
2326 2347 for f in map:
2327 2348 addpath(f)
2328 2349
2329 2350 def addpath(self, path):
2330 2351 dirs = self._dirs
2331 2352 for base in finddirs(path):
2332 2353 if base in dirs:
2333 2354 dirs[base] += 1
2334 2355 return
2335 2356 dirs[base] = 1
2336 2357
2337 2358 def delpath(self, path):
2338 2359 dirs = self._dirs
2339 2360 for base in finddirs(path):
2340 2361 if dirs[base] > 1:
2341 2362 dirs[base] -= 1
2342 2363 return
2343 2364 del dirs[base]
2344 2365
2345 2366 def __iter__(self):
2346 2367 return self._dirs.iterkeys()
2347 2368
2348 2369 def __contains__(self, d):
2349 2370 return d in self._dirs
2350 2371
2351 2372 if safehasattr(parsers, 'dirs'):
2352 2373 dirs = parsers.dirs
2353 2374
2354 2375 def finddirs(path):
2355 2376 pos = path.rfind('/')
2356 2377 while pos != -1:
2357 2378 yield path[:pos]
2358 2379 pos = path.rfind('/', 0, pos)
2359 2380
2360 2381 # compression utility
2361 2382
2362 2383 class nocompress(object):
2363 2384 def compress(self, x):
2364 2385 return x
2365 2386 def flush(self):
2366 2387 return ""
2367 2388
2368 2389 compressors = {
2369 2390 None: nocompress,
2370 2391 # lambda to prevent early import
2371 2392 'BZ': lambda: bz2.BZ2Compressor(),
2372 2393 'GZ': lambda: zlib.compressobj(),
2373 2394 }
2374 2395 # also support the old form by courtesies
2375 2396 compressors['UN'] = compressors[None]
2376 2397
2377 2398 def _makedecompressor(decompcls):
2378 2399 def generator(f):
2379 2400 d = decompcls()
2380 2401 for chunk in filechunkiter(f):
2381 2402 yield d.decompress(chunk)
2382 2403 def func(fh):
2383 2404 return chunkbuffer(generator(fh))
2384 2405 return func
2385 2406
2386 2407 def _bz2():
2387 2408 d = bz2.BZ2Decompressor()
2388 2409 # Bzip2 stream start with BZ, but we stripped it.
2389 2410 # we put it back for good measure.
2390 2411 d.decompress('BZ')
2391 2412 return d
2392 2413
2393 2414 decompressors = {None: lambda fh: fh,
2394 2415 '_truncatedBZ': _makedecompressor(_bz2),
2395 2416 'BZ': _makedecompressor(lambda: bz2.BZ2Decompressor()),
2396 2417 'GZ': _makedecompressor(lambda: zlib.decompressobj()),
2397 2418 }
2398 2419 # also support the old form by courtesies
2399 2420 decompressors['UN'] = decompressors[None]
2400 2421
2401 2422 # convenient shortcut
2402 2423 dst = debugstacktrace
General Comments 0
You need to be logged in to leave comments. Login now