##// END OF EJS Templates
util: make readfile() operate in binary mode...
Patrick Mezard -
r14250:34ec9b31 default
parent child Browse files
Show More
@@ -1,1590 +1,1590 b''
1 1 # util.py - Mercurial utility functions and platform specfic implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specfic implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from i18n import _
17 17 import error, osutil, encoding
18 18 import errno, re, shutil, sys, tempfile, traceback
19 19 import os, time, calendar, textwrap, unicodedata, signal
20 20 import imp, socket, urllib
21 21
22 22 # Python compatibility
23 23
24 24 def sha1(s):
25 25 return _fastsha1(s)
26 26
27 27 def _fastsha1(s):
28 28 # This function will import sha1 from hashlib or sha (whichever is
29 29 # available) and overwrite itself with it on the first call.
30 30 # Subsequent calls will go directly to the imported function.
31 31 if sys.version_info >= (2, 5):
32 32 from hashlib import sha1 as _sha1
33 33 else:
34 34 from sha import sha as _sha1
35 35 global _fastsha1, sha1
36 36 _fastsha1 = sha1 = _sha1
37 37 return _sha1(s)
38 38
39 39 import __builtin__
40 40
41 41 if sys.version_info[0] < 3:
42 42 def fakebuffer(sliceable, offset=0):
43 43 return sliceable[offset:]
44 44 else:
45 45 def fakebuffer(sliceable, offset=0):
46 46 return memoryview(sliceable)[offset:]
47 47 try:
48 48 buffer
49 49 except NameError:
50 50 __builtin__.buffer = fakebuffer
51 51
52 52 import subprocess
53 53 closefds = os.name == 'posix'
54 54
55 55 def popen2(cmd, env=None, newlines=False):
56 56 # Setting bufsize to -1 lets the system decide the buffer size.
57 57 # The default for bufsize is 0, meaning unbuffered. This leads to
58 58 # poor performance on Mac OS X: http://bugs.python.org/issue4194
59 59 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
60 60 close_fds=closefds,
61 61 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
62 62 universal_newlines=newlines,
63 63 env=env)
64 64 return p.stdin, p.stdout
65 65
66 66 def popen3(cmd, env=None, newlines=False):
67 67 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
68 68 close_fds=closefds,
69 69 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
70 70 stderr=subprocess.PIPE,
71 71 universal_newlines=newlines,
72 72 env=env)
73 73 return p.stdin, p.stdout, p.stderr
74 74
75 75 def version():
76 76 """Return version information if available."""
77 77 try:
78 78 import __version__
79 79 return __version__.version
80 80 except ImportError:
81 81 return 'unknown'
82 82
83 83 # used by parsedate
84 84 defaultdateformats = (
85 85 '%Y-%m-%d %H:%M:%S',
86 86 '%Y-%m-%d %I:%M:%S%p',
87 87 '%Y-%m-%d %H:%M',
88 88 '%Y-%m-%d %I:%M%p',
89 89 '%Y-%m-%d',
90 90 '%m-%d',
91 91 '%m/%d',
92 92 '%m/%d/%y',
93 93 '%m/%d/%Y',
94 94 '%a %b %d %H:%M:%S %Y',
95 95 '%a %b %d %I:%M:%S%p %Y',
96 96 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
97 97 '%b %d %H:%M:%S %Y',
98 98 '%b %d %I:%M:%S%p %Y',
99 99 '%b %d %H:%M:%S',
100 100 '%b %d %I:%M:%S%p',
101 101 '%b %d %H:%M',
102 102 '%b %d %I:%M%p',
103 103 '%b %d %Y',
104 104 '%b %d',
105 105 '%H:%M:%S',
106 106 '%I:%M:%S%p',
107 107 '%H:%M',
108 108 '%I:%M%p',
109 109 )
110 110
111 111 extendeddateformats = defaultdateformats + (
112 112 "%Y",
113 113 "%Y-%m",
114 114 "%b",
115 115 "%b %Y",
116 116 )
117 117
118 118 def cachefunc(func):
119 119 '''cache the result of function calls'''
120 120 # XXX doesn't handle keywords args
121 121 cache = {}
122 122 if func.func_code.co_argcount == 1:
123 123 # we gain a small amount of time because
124 124 # we don't need to pack/unpack the list
125 125 def f(arg):
126 126 if arg not in cache:
127 127 cache[arg] = func(arg)
128 128 return cache[arg]
129 129 else:
130 130 def f(*args):
131 131 if args not in cache:
132 132 cache[args] = func(*args)
133 133 return cache[args]
134 134
135 135 return f
136 136
137 137 def lrucachefunc(func):
138 138 '''cache most recent results of function calls'''
139 139 cache = {}
140 140 order = []
141 141 if func.func_code.co_argcount == 1:
142 142 def f(arg):
143 143 if arg not in cache:
144 144 if len(cache) > 20:
145 145 del cache[order.pop(0)]
146 146 cache[arg] = func(arg)
147 147 else:
148 148 order.remove(arg)
149 149 order.append(arg)
150 150 return cache[arg]
151 151 else:
152 152 def f(*args):
153 153 if args not in cache:
154 154 if len(cache) > 20:
155 155 del cache[order.pop(0)]
156 156 cache[args] = func(*args)
157 157 else:
158 158 order.remove(args)
159 159 order.append(args)
160 160 return cache[args]
161 161
162 162 return f
163 163
164 164 class propertycache(object):
165 165 def __init__(self, func):
166 166 self.func = func
167 167 self.name = func.__name__
168 168 def __get__(self, obj, type=None):
169 169 result = self.func(obj)
170 170 setattr(obj, self.name, result)
171 171 return result
172 172
173 173 def pipefilter(s, cmd):
174 174 '''filter string S through command CMD, returning its output'''
175 175 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
176 176 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
177 177 pout, perr = p.communicate(s)
178 178 return pout
179 179
180 180 def tempfilter(s, cmd):
181 181 '''filter string S through a pair of temporary files with CMD.
182 182 CMD is used as a template to create the real command to be run,
183 183 with the strings INFILE and OUTFILE replaced by the real names of
184 184 the temporary files generated.'''
185 185 inname, outname = None, None
186 186 try:
187 187 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
188 188 fp = os.fdopen(infd, 'wb')
189 189 fp.write(s)
190 190 fp.close()
191 191 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
192 192 os.close(outfd)
193 193 cmd = cmd.replace('INFILE', inname)
194 194 cmd = cmd.replace('OUTFILE', outname)
195 195 code = os.system(cmd)
196 196 if sys.platform == 'OpenVMS' and code & 1:
197 197 code = 0
198 198 if code:
199 199 raise Abort(_("command '%s' failed: %s") %
200 200 (cmd, explainexit(code)))
201 201 fp = open(outname, 'rb')
202 202 r = fp.read()
203 203 fp.close()
204 204 return r
205 205 finally:
206 206 try:
207 207 if inname:
208 208 os.unlink(inname)
209 209 except OSError:
210 210 pass
211 211 try:
212 212 if outname:
213 213 os.unlink(outname)
214 214 except OSError:
215 215 pass
216 216
217 217 filtertable = {
218 218 'tempfile:': tempfilter,
219 219 'pipe:': pipefilter,
220 220 }
221 221
222 222 def filter(s, cmd):
223 223 "filter a string through a command that transforms its input to its output"
224 224 for name, fn in filtertable.iteritems():
225 225 if cmd.startswith(name):
226 226 return fn(s, cmd[len(name):].lstrip())
227 227 return pipefilter(s, cmd)
228 228
229 229 def binary(s):
230 230 """return true if a string is binary data"""
231 231 return bool(s and '\0' in s)
232 232
233 233 def increasingchunks(source, min=1024, max=65536):
234 234 '''return no less than min bytes per chunk while data remains,
235 235 doubling min after each chunk until it reaches max'''
236 236 def log2(x):
237 237 if not x:
238 238 return 0
239 239 i = 0
240 240 while x:
241 241 x >>= 1
242 242 i += 1
243 243 return i - 1
244 244
245 245 buf = []
246 246 blen = 0
247 247 for chunk in source:
248 248 buf.append(chunk)
249 249 blen += len(chunk)
250 250 if blen >= min:
251 251 if min < max:
252 252 min = min << 1
253 253 nmin = 1 << log2(blen)
254 254 if nmin > min:
255 255 min = nmin
256 256 if min > max:
257 257 min = max
258 258 yield ''.join(buf)
259 259 blen = 0
260 260 buf = []
261 261 if buf:
262 262 yield ''.join(buf)
263 263
264 264 Abort = error.Abort
265 265
266 266 def always(fn):
267 267 return True
268 268
269 269 def never(fn):
270 270 return False
271 271
272 272 def pathto(root, n1, n2):
273 273 '''return the relative path from one place to another.
274 274 root should use os.sep to separate directories
275 275 n1 should use os.sep to separate directories
276 276 n2 should use "/" to separate directories
277 277 returns an os.sep-separated path.
278 278
279 279 If n1 is a relative path, it's assumed it's
280 280 relative to root.
281 281 n2 should always be relative to root.
282 282 '''
283 283 if not n1:
284 284 return localpath(n2)
285 285 if os.path.isabs(n1):
286 286 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
287 287 return os.path.join(root, localpath(n2))
288 288 n2 = '/'.join((pconvert(root), n2))
289 289 a, b = splitpath(n1), n2.split('/')
290 290 a.reverse()
291 291 b.reverse()
292 292 while a and b and a[-1] == b[-1]:
293 293 a.pop()
294 294 b.pop()
295 295 b.reverse()
296 296 return os.sep.join((['..'] * len(a)) + b) or '.'
297 297
298 298 _hgexecutable = None
299 299
300 300 def mainfrozen():
301 301 """return True if we are a frozen executable.
302 302
303 303 The code supports py2exe (most common, Windows only) and tools/freeze
304 304 (portable, not much used).
305 305 """
306 306 return (hasattr(sys, "frozen") or # new py2exe
307 307 hasattr(sys, "importers") or # old py2exe
308 308 imp.is_frozen("__main__")) # tools/freeze
309 309
310 310 def hgexecutable():
311 311 """return location of the 'hg' executable.
312 312
313 313 Defaults to $HG or 'hg' in the search path.
314 314 """
315 315 if _hgexecutable is None:
316 316 hg = os.environ.get('HG')
317 317 if hg:
318 318 _sethgexecutable(hg)
319 319 elif mainfrozen():
320 320 _sethgexecutable(sys.executable)
321 321 else:
322 322 exe = find_exe('hg') or os.path.basename(sys.argv[0])
323 323 _sethgexecutable(exe)
324 324 return _hgexecutable
325 325
326 326 def _sethgexecutable(path):
327 327 """set location of the 'hg' executable"""
328 328 global _hgexecutable
329 329 _hgexecutable = path
330 330
331 331 def system(cmd, environ={}, cwd=None, onerr=None, errprefix=None, out=None):
332 332 '''enhanced shell command execution.
333 333 run with environment maybe modified, maybe in different dir.
334 334
335 335 if command fails and onerr is None, return status. if ui object,
336 336 print error message and return status, else raise onerr object as
337 337 exception.
338 338
339 339 if out is specified, it is assumed to be a file-like object that has a
340 340 write() method. stdout and stderr will be redirected to out.'''
341 341 try:
342 342 sys.stdout.flush()
343 343 except Exception:
344 344 pass
345 345 def py2shell(val):
346 346 'convert python object into string that is useful to shell'
347 347 if val is None or val is False:
348 348 return '0'
349 349 if val is True:
350 350 return '1'
351 351 return str(val)
352 352 origcmd = cmd
353 353 cmd = quotecommand(cmd)
354 354 env = dict(os.environ)
355 355 env.update((k, py2shell(v)) for k, v in environ.iteritems())
356 356 env['HG'] = hgexecutable()
357 357 if out is None:
358 358 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
359 359 env=env, cwd=cwd)
360 360 else:
361 361 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
362 362 env=env, cwd=cwd, stdout=subprocess.PIPE,
363 363 stderr=subprocess.STDOUT)
364 364 for line in proc.stdout:
365 365 out.write(line)
366 366 proc.wait()
367 367 rc = proc.returncode
368 368 if sys.platform == 'OpenVMS' and rc & 1:
369 369 rc = 0
370 370 if rc and onerr:
371 371 errmsg = '%s %s' % (os.path.basename(origcmd.split(None, 1)[0]),
372 372 explainexit(rc)[0])
373 373 if errprefix:
374 374 errmsg = '%s: %s' % (errprefix, errmsg)
375 375 try:
376 376 onerr.warn(errmsg + '\n')
377 377 except AttributeError:
378 378 raise onerr(errmsg)
379 379 return rc
380 380
381 381 def checksignature(func):
382 382 '''wrap a function with code to check for calling errors'''
383 383 def check(*args, **kwargs):
384 384 try:
385 385 return func(*args, **kwargs)
386 386 except TypeError:
387 387 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
388 388 raise error.SignatureError
389 389 raise
390 390
391 391 return check
392 392
393 393 def makedir(path, notindexed):
394 394 os.mkdir(path)
395 395
396 396 def unlinkpath(f):
397 397 """unlink and remove the directory if it is empty"""
398 398 os.unlink(f)
399 399 # try removing directories that might now be empty
400 400 try:
401 401 os.removedirs(os.path.dirname(f))
402 402 except OSError:
403 403 pass
404 404
405 405 def copyfile(src, dest):
406 406 "copy a file, preserving mode and atime/mtime"
407 407 if os.path.islink(src):
408 408 try:
409 409 os.unlink(dest)
410 410 except OSError:
411 411 pass
412 412 os.symlink(os.readlink(src), dest)
413 413 else:
414 414 try:
415 415 shutil.copyfile(src, dest)
416 416 shutil.copymode(src, dest)
417 417 except shutil.Error, inst:
418 418 raise Abort(str(inst))
419 419
420 420 def copyfiles(src, dst, hardlink=None):
421 421 """Copy a directory tree using hardlinks if possible"""
422 422
423 423 if hardlink is None:
424 424 hardlink = (os.stat(src).st_dev ==
425 425 os.stat(os.path.dirname(dst)).st_dev)
426 426
427 427 num = 0
428 428 if os.path.isdir(src):
429 429 os.mkdir(dst)
430 430 for name, kind in osutil.listdir(src):
431 431 srcname = os.path.join(src, name)
432 432 dstname = os.path.join(dst, name)
433 433 hardlink, n = copyfiles(srcname, dstname, hardlink)
434 434 num += n
435 435 else:
436 436 if hardlink:
437 437 try:
438 438 oslink(src, dst)
439 439 except (IOError, OSError):
440 440 hardlink = False
441 441 shutil.copy(src, dst)
442 442 else:
443 443 shutil.copy(src, dst)
444 444 num += 1
445 445
446 446 return hardlink, num
447 447
448 448 _windows_reserved_filenames = '''con prn aux nul
449 449 com1 com2 com3 com4 com5 com6 com7 com8 com9
450 450 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
451 451 _windows_reserved_chars = ':*?"<>|'
452 452 def checkwinfilename(path):
453 453 '''Check that the base-relative path is a valid filename on Windows.
454 454 Returns None if the path is ok, or a UI string describing the problem.
455 455
456 456 >>> checkwinfilename("just/a/normal/path")
457 457 >>> checkwinfilename("foo/bar/con.xml")
458 458 "filename contains 'con', which is reserved on Windows"
459 459 >>> checkwinfilename("foo/con.xml/bar")
460 460 "filename contains 'con', which is reserved on Windows"
461 461 >>> checkwinfilename("foo/bar/xml.con")
462 462 >>> checkwinfilename("foo/bar/AUX/bla.txt")
463 463 "filename contains 'AUX', which is reserved on Windows"
464 464 >>> checkwinfilename("foo/bar/bla:.txt")
465 465 "filename contains ':', which is reserved on Windows"
466 466 >>> checkwinfilename("foo/bar/b\07la.txt")
467 467 "filename contains '\\\\x07', which is invalid on Windows"
468 468 >>> checkwinfilename("foo/bar/bla ")
469 469 "filename ends with ' ', which is not allowed on Windows"
470 470 '''
471 471 for n in path.replace('\\', '/').split('/'):
472 472 if not n:
473 473 continue
474 474 for c in n:
475 475 if c in _windows_reserved_chars:
476 476 return _("filename contains '%s', which is reserved "
477 477 "on Windows") % c
478 478 if ord(c) <= 31:
479 479 return _("filename contains %r, which is invalid "
480 480 "on Windows") % c
481 481 base = n.split('.')[0]
482 482 if base and base.lower() in _windows_reserved_filenames:
483 483 return _("filename contains '%s', which is reserved "
484 484 "on Windows") % base
485 485 t = n[-1]
486 486 if t in '. ':
487 487 return _("filename ends with '%s', which is not allowed "
488 488 "on Windows") % t
489 489
490 490 def lookupreg(key, name=None, scope=None):
491 491 return None
492 492
493 493 def hidewindow():
494 494 """Hide current shell window.
495 495
496 496 Used to hide the window opened when starting asynchronous
497 497 child process under Windows, unneeded on other systems.
498 498 """
499 499 pass
500 500
501 501 if os.name == 'nt':
502 502 checkosfilename = checkwinfilename
503 503 from windows import *
504 504 else:
505 505 from posix import *
506 506
507 507 def makelock(info, pathname):
508 508 try:
509 509 return os.symlink(info, pathname)
510 510 except OSError, why:
511 511 if why.errno == errno.EEXIST:
512 512 raise
513 513 except AttributeError: # no symlink in os
514 514 pass
515 515
516 516 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
517 517 os.write(ld, info)
518 518 os.close(ld)
519 519
520 520 def readlock(pathname):
521 521 try:
522 522 return os.readlink(pathname)
523 523 except OSError, why:
524 524 if why.errno not in (errno.EINVAL, errno.ENOSYS):
525 525 raise
526 526 except AttributeError: # no symlink in os
527 527 pass
528 528 fp = posixfile(pathname)
529 529 r = fp.read()
530 530 fp.close()
531 531 return r
532 532
533 533 def fstat(fp):
534 534 '''stat file object that may not have fileno method.'''
535 535 try:
536 536 return os.fstat(fp.fileno())
537 537 except AttributeError:
538 538 return os.stat(fp.name)
539 539
540 540 # File system features
541 541
542 542 def checkcase(path):
543 543 """
544 544 Check whether the given path is on a case-sensitive filesystem
545 545
546 546 Requires a path (like /foo/.hg) ending with a foldable final
547 547 directory component.
548 548 """
549 549 s1 = os.stat(path)
550 550 d, b = os.path.split(path)
551 551 p2 = os.path.join(d, b.upper())
552 552 if path == p2:
553 553 p2 = os.path.join(d, b.lower())
554 554 try:
555 555 s2 = os.stat(p2)
556 556 if s2 == s1:
557 557 return False
558 558 return True
559 559 except OSError:
560 560 return True
561 561
562 562 _fspathcache = {}
563 563 def fspath(name, root):
564 564 '''Get name in the case stored in the filesystem
565 565
566 566 The name is either relative to root, or it is an absolute path starting
567 567 with root. Note that this function is unnecessary, and should not be
568 568 called, for case-sensitive filesystems (simply because it's expensive).
569 569 '''
570 570 # If name is absolute, make it relative
571 571 if name.lower().startswith(root.lower()):
572 572 l = len(root)
573 573 if name[l] == os.sep or name[l] == os.altsep:
574 574 l = l + 1
575 575 name = name[l:]
576 576
577 577 if not os.path.lexists(os.path.join(root, name)):
578 578 return None
579 579
580 580 seps = os.sep
581 581 if os.altsep:
582 582 seps = seps + os.altsep
583 583 # Protect backslashes. This gets silly very quickly.
584 584 seps.replace('\\','\\\\')
585 585 pattern = re.compile(r'([^%s]+)|([%s]+)' % (seps, seps))
586 586 dir = os.path.normcase(os.path.normpath(root))
587 587 result = []
588 588 for part, sep in pattern.findall(name):
589 589 if sep:
590 590 result.append(sep)
591 591 continue
592 592
593 593 if dir not in _fspathcache:
594 594 _fspathcache[dir] = os.listdir(dir)
595 595 contents = _fspathcache[dir]
596 596
597 597 lpart = part.lower()
598 598 lenp = len(part)
599 599 for n in contents:
600 600 if lenp == len(n) and n.lower() == lpart:
601 601 result.append(n)
602 602 break
603 603 else:
604 604 # Cannot happen, as the file exists!
605 605 result.append(part)
606 606 dir = os.path.join(dir, lpart)
607 607
608 608 return ''.join(result)
609 609
610 610 def checknlink(testfile):
611 611 '''check whether hardlink count reporting works properly'''
612 612
613 613 # testfile may be open, so we need a separate file for checking to
614 614 # work around issue2543 (or testfile may get lost on Samba shares)
615 615 f1 = testfile + ".hgtmp1"
616 616 if os.path.lexists(f1):
617 617 return False
618 618 try:
619 619 posixfile(f1, 'w').close()
620 620 except IOError:
621 621 return False
622 622
623 623 f2 = testfile + ".hgtmp2"
624 624 fd = None
625 625 try:
626 626 try:
627 627 oslink(f1, f2)
628 628 except OSError:
629 629 return False
630 630
631 631 # nlinks() may behave differently for files on Windows shares if
632 632 # the file is open.
633 633 fd = posixfile(f2)
634 634 return nlinks(f2) > 1
635 635 finally:
636 636 if fd is not None:
637 637 fd.close()
638 638 for f in (f1, f2):
639 639 try:
640 640 os.unlink(f)
641 641 except OSError:
642 642 pass
643 643
644 644 return False
645 645
646 646 def endswithsep(path):
647 647 '''Check path ends with os.sep or os.altsep.'''
648 648 return path.endswith(os.sep) or os.altsep and path.endswith(os.altsep)
649 649
650 650 def splitpath(path):
651 651 '''Split path by os.sep.
652 652 Note that this function does not use os.altsep because this is
653 653 an alternative of simple "xxx.split(os.sep)".
654 654 It is recommended to use os.path.normpath() before using this
655 655 function if need.'''
656 656 return path.split(os.sep)
657 657
658 658 def gui():
659 659 '''Are we running in a GUI?'''
660 660 if sys.platform == 'darwin':
661 661 if 'SSH_CONNECTION' in os.environ:
662 662 # handle SSH access to a box where the user is logged in
663 663 return False
664 664 elif getattr(osutil, 'isgui', None):
665 665 # check if a CoreGraphics session is available
666 666 return osutil.isgui()
667 667 else:
668 668 # pure build; use a safe default
669 669 return True
670 670 else:
671 671 return os.name == "nt" or os.environ.get("DISPLAY")
672 672
673 673 def mktempcopy(name, emptyok=False, createmode=None):
674 674 """Create a temporary file with the same contents from name
675 675
676 676 The permission bits are copied from the original file.
677 677
678 678 If the temporary file is going to be truncated immediately, you
679 679 can use emptyok=True as an optimization.
680 680
681 681 Returns the name of the temporary file.
682 682 """
683 683 d, fn = os.path.split(name)
684 684 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
685 685 os.close(fd)
686 686 # Temporary files are created with mode 0600, which is usually not
687 687 # what we want. If the original file already exists, just copy
688 688 # its mode. Otherwise, manually obey umask.
689 689 try:
690 690 st_mode = os.lstat(name).st_mode & 0777
691 691 except OSError, inst:
692 692 if inst.errno != errno.ENOENT:
693 693 raise
694 694 st_mode = createmode
695 695 if st_mode is None:
696 696 st_mode = ~umask
697 697 st_mode &= 0666
698 698 os.chmod(temp, st_mode)
699 699 if emptyok:
700 700 return temp
701 701 try:
702 702 try:
703 703 ifp = posixfile(name, "rb")
704 704 except IOError, inst:
705 705 if inst.errno == errno.ENOENT:
706 706 return temp
707 707 if not getattr(inst, 'filename', None):
708 708 inst.filename = name
709 709 raise
710 710 ofp = posixfile(temp, "wb")
711 711 for chunk in filechunkiter(ifp):
712 712 ofp.write(chunk)
713 713 ifp.close()
714 714 ofp.close()
715 715 except:
716 716 try: os.unlink(temp)
717 717 except: pass
718 718 raise
719 719 return temp
720 720
721 721 class atomictempfile(object):
722 722 '''writeable file object that atomically updates a file
723 723
724 724 All writes will go to a temporary copy of the original file. Call
725 725 rename() when you are done writing, and atomictempfile will rename
726 726 the temporary copy to the original name, making the changes visible.
727 727
728 728 Unlike other file-like objects, close() discards your writes by
729 729 simply deleting the temporary file.
730 730 '''
731 731 def __init__(self, name, mode='w+b', createmode=None):
732 732 self.__name = name # permanent name
733 733 self._tempname = mktempcopy(name, emptyok=('w' in mode),
734 734 createmode=createmode)
735 735 self._fp = posixfile(self._tempname, mode)
736 736
737 737 # delegated methods
738 738 self.write = self._fp.write
739 739 self.fileno = self._fp.fileno
740 740
741 741 def rename(self):
742 742 if not self._fp.closed:
743 743 self._fp.close()
744 744 rename(self._tempname, localpath(self.__name))
745 745
746 746 def close(self):
747 747 if not self._fp.closed:
748 748 try:
749 749 os.unlink(self._tempname)
750 750 except OSError:
751 751 pass
752 752 self._fp.close()
753 753
754 754 def __del__(self):
755 755 if hasattr(self, '_fp'): # constructor actually did something
756 756 self.close()
757 757
758 758 def makedirs(name, mode=None):
759 759 """recursive directory creation with parent mode inheritance"""
760 760 parent = os.path.abspath(os.path.dirname(name))
761 761 try:
762 762 os.mkdir(name)
763 763 if mode is not None:
764 764 os.chmod(name, mode)
765 765 return
766 766 except OSError, err:
767 767 if err.errno == errno.EEXIST:
768 768 return
769 769 if not name or parent == name or err.errno != errno.ENOENT:
770 770 raise
771 771 makedirs(parent, mode)
772 772 makedirs(name, mode)
773 773
774 774 def readfile(path):
775 fp = open(path)
775 fp = open(path, 'rb')
776 776 try:
777 777 return fp.read()
778 778 finally:
779 779 fp.close()
780 780
781 781 def writefile(path, text):
782 782 fp = open(path, 'wb')
783 783 try:
784 784 fp.write(text)
785 785 finally:
786 786 fp.close()
787 787
788 788 def appendfile(path, text):
789 789 fp = open(path, 'ab')
790 790 try:
791 791 fp.write(text)
792 792 finally:
793 793 fp.close()
794 794
795 795 class chunkbuffer(object):
796 796 """Allow arbitrary sized chunks of data to be efficiently read from an
797 797 iterator over chunks of arbitrary size."""
798 798
799 799 def __init__(self, in_iter):
800 800 """in_iter is the iterator that's iterating over the input chunks.
801 801 targetsize is how big a buffer to try to maintain."""
802 802 def splitbig(chunks):
803 803 for chunk in chunks:
804 804 if len(chunk) > 2**20:
805 805 pos = 0
806 806 while pos < len(chunk):
807 807 end = pos + 2 ** 18
808 808 yield chunk[pos:end]
809 809 pos = end
810 810 else:
811 811 yield chunk
812 812 self.iter = splitbig(in_iter)
813 813 self._queue = []
814 814
815 815 def read(self, l):
816 816 """Read L bytes of data from the iterator of chunks of data.
817 817 Returns less than L bytes if the iterator runs dry."""
818 818 left = l
819 819 buf = ''
820 820 queue = self._queue
821 821 while left > 0:
822 822 # refill the queue
823 823 if not queue:
824 824 target = 2**18
825 825 for chunk in self.iter:
826 826 queue.append(chunk)
827 827 target -= len(chunk)
828 828 if target <= 0:
829 829 break
830 830 if not queue:
831 831 break
832 832
833 833 chunk = queue.pop(0)
834 834 left -= len(chunk)
835 835 if left < 0:
836 836 queue.insert(0, chunk[left:])
837 837 buf += chunk[:left]
838 838 else:
839 839 buf += chunk
840 840
841 841 return buf
842 842
843 843 def filechunkiter(f, size=65536, limit=None):
844 844 """Create a generator that produces the data in the file size
845 845 (default 65536) bytes at a time, up to optional limit (default is
846 846 to read all data). Chunks may be less than size bytes if the
847 847 chunk is the last chunk in the file, or the file is a socket or
848 848 some other type of file that sometimes reads less data than is
849 849 requested."""
850 850 assert size >= 0
851 851 assert limit is None or limit >= 0
852 852 while True:
853 853 if limit is None:
854 854 nbytes = size
855 855 else:
856 856 nbytes = min(limit, size)
857 857 s = nbytes and f.read(nbytes)
858 858 if not s:
859 859 break
860 860 if limit:
861 861 limit -= len(s)
862 862 yield s
863 863
864 864 def makedate():
865 865 lt = time.localtime()
866 866 if lt[8] == 1 and time.daylight:
867 867 tz = time.altzone
868 868 else:
869 869 tz = time.timezone
870 870 t = time.mktime(lt)
871 871 if t < 0:
872 872 hint = _("check your clock")
873 873 raise Abort(_("negative timestamp: %d") % t, hint=hint)
874 874 return t, tz
875 875
876 876 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
877 877 """represent a (unixtime, offset) tuple as a localized time.
878 878 unixtime is seconds since the epoch, and offset is the time zone's
879 879 number of seconds away from UTC. if timezone is false, do not
880 880 append time zone to string."""
881 881 t, tz = date or makedate()
882 882 if t < 0:
883 883 t = 0 # time.gmtime(lt) fails on Windows for lt < -43200
884 884 tz = 0
885 885 if "%1" in format or "%2" in format:
886 886 sign = (tz > 0) and "-" or "+"
887 887 minutes = abs(tz) // 60
888 888 format = format.replace("%1", "%c%02d" % (sign, minutes // 60))
889 889 format = format.replace("%2", "%02d" % (minutes % 60))
890 890 s = time.strftime(format, time.gmtime(float(t) - tz))
891 891 return s
892 892
893 893 def shortdate(date=None):
894 894 """turn (timestamp, tzoff) tuple into iso 8631 date."""
895 895 return datestr(date, format='%Y-%m-%d')
896 896
897 897 def strdate(string, format, defaults=[]):
898 898 """parse a localized time string and return a (unixtime, offset) tuple.
899 899 if the string cannot be parsed, ValueError is raised."""
900 900 def timezone(string):
901 901 tz = string.split()[-1]
902 902 if tz[0] in "+-" and len(tz) == 5 and tz[1:].isdigit():
903 903 sign = (tz[0] == "+") and 1 or -1
904 904 hours = int(tz[1:3])
905 905 minutes = int(tz[3:5])
906 906 return -sign * (hours * 60 + minutes) * 60
907 907 if tz == "GMT" or tz == "UTC":
908 908 return 0
909 909 return None
910 910
911 911 # NOTE: unixtime = localunixtime + offset
912 912 offset, date = timezone(string), string
913 913 if offset is not None:
914 914 date = " ".join(string.split()[:-1])
915 915
916 916 # add missing elements from defaults
917 917 usenow = False # default to using biased defaults
918 918 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
919 919 found = [True for p in part if ("%"+p) in format]
920 920 if not found:
921 921 date += "@" + defaults[part][usenow]
922 922 format += "@%" + part[0]
923 923 else:
924 924 # We've found a specific time element, less specific time
925 925 # elements are relative to today
926 926 usenow = True
927 927
928 928 timetuple = time.strptime(date, format)
929 929 localunixtime = int(calendar.timegm(timetuple))
930 930 if offset is None:
931 931 # local timezone
932 932 unixtime = int(time.mktime(timetuple))
933 933 offset = unixtime - localunixtime
934 934 else:
935 935 unixtime = localunixtime + offset
936 936 return unixtime, offset
937 937
938 938 def parsedate(date, formats=None, bias={}):
939 939 """parse a localized date/time and return a (unixtime, offset) tuple.
940 940
941 941 The date may be a "unixtime offset" string or in one of the specified
942 942 formats. If the date already is a (unixtime, offset) tuple, it is returned.
943 943 """
944 944 if not date:
945 945 return 0, 0
946 946 if isinstance(date, tuple) and len(date) == 2:
947 947 return date
948 948 if not formats:
949 949 formats = defaultdateformats
950 950 date = date.strip()
951 951 try:
952 952 when, offset = map(int, date.split(' '))
953 953 except ValueError:
954 954 # fill out defaults
955 955 now = makedate()
956 956 defaults = {}
957 957 for part in ("d", "mb", "yY", "HI", "M", "S"):
958 958 # this piece is for rounding the specific end of unknowns
959 959 b = bias.get(part)
960 960 if b is None:
961 961 if part[0] in "HMS":
962 962 b = "00"
963 963 else:
964 964 b = "0"
965 965
966 966 # this piece is for matching the generic end to today's date
967 967 n = datestr(now, "%" + part[0])
968 968
969 969 defaults[part] = (b, n)
970 970
971 971 for format in formats:
972 972 try:
973 973 when, offset = strdate(date, format, defaults)
974 974 except (ValueError, OverflowError):
975 975 pass
976 976 else:
977 977 break
978 978 else:
979 979 raise Abort(_('invalid date: %r') % date)
980 980 # validate explicit (probably user-specified) date and
981 981 # time zone offset. values must fit in signed 32 bits for
982 982 # current 32-bit linux runtimes. timezones go from UTC-12
983 983 # to UTC+14
984 984 if abs(when) > 0x7fffffff:
985 985 raise Abort(_('date exceeds 32 bits: %d') % when)
986 986 if when < 0:
987 987 raise Abort(_('negative date value: %d') % when)
988 988 if offset < -50400 or offset > 43200:
989 989 raise Abort(_('impossible time zone offset: %d') % offset)
990 990 return when, offset
991 991
992 992 def matchdate(date):
993 993 """Return a function that matches a given date match specifier
994 994
995 995 Formats include:
996 996
997 997 '{date}' match a given date to the accuracy provided
998 998
999 999 '<{date}' on or before a given date
1000 1000
1001 1001 '>{date}' on or after a given date
1002 1002
1003 1003 >>> p1 = parsedate("10:29:59")
1004 1004 >>> p2 = parsedate("10:30:00")
1005 1005 >>> p3 = parsedate("10:30:59")
1006 1006 >>> p4 = parsedate("10:31:00")
1007 1007 >>> p5 = parsedate("Sep 15 10:30:00 1999")
1008 1008 >>> f = matchdate("10:30")
1009 1009 >>> f(p1[0])
1010 1010 False
1011 1011 >>> f(p2[0])
1012 1012 True
1013 1013 >>> f(p3[0])
1014 1014 True
1015 1015 >>> f(p4[0])
1016 1016 False
1017 1017 >>> f(p5[0])
1018 1018 False
1019 1019 """
1020 1020
1021 1021 def lower(date):
1022 1022 d = dict(mb="1", d="1")
1023 1023 return parsedate(date, extendeddateformats, d)[0]
1024 1024
1025 1025 def upper(date):
1026 1026 d = dict(mb="12", HI="23", M="59", S="59")
1027 1027 for days in ("31", "30", "29"):
1028 1028 try:
1029 1029 d["d"] = days
1030 1030 return parsedate(date, extendeddateformats, d)[0]
1031 1031 except:
1032 1032 pass
1033 1033 d["d"] = "28"
1034 1034 return parsedate(date, extendeddateformats, d)[0]
1035 1035
1036 1036 date = date.strip()
1037 1037
1038 1038 if not date:
1039 1039 raise Abort(_("dates cannot consist entirely of whitespace"))
1040 1040 elif date[0] == "<":
1041 1041 if not date[1:]:
1042 1042 raise Abort(_("invalid day spec, use '<DATE'"))
1043 1043 when = upper(date[1:])
1044 1044 return lambda x: x <= when
1045 1045 elif date[0] == ">":
1046 1046 if not date[1:]:
1047 1047 raise Abort(_("invalid day spec, use '>DATE'"))
1048 1048 when = lower(date[1:])
1049 1049 return lambda x: x >= when
1050 1050 elif date[0] == "-":
1051 1051 try:
1052 1052 days = int(date[1:])
1053 1053 except ValueError:
1054 1054 raise Abort(_("invalid day spec: %s") % date[1:])
1055 1055 if days < 0:
1056 1056 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
1057 1057 % date[1:])
1058 1058 when = makedate()[0] - days * 3600 * 24
1059 1059 return lambda x: x >= when
1060 1060 elif " to " in date:
1061 1061 a, b = date.split(" to ")
1062 1062 start, stop = lower(a), upper(b)
1063 1063 return lambda x: x >= start and x <= stop
1064 1064 else:
1065 1065 start, stop = lower(date), upper(date)
1066 1066 return lambda x: x >= start and x <= stop
1067 1067
1068 1068 def shortuser(user):
1069 1069 """Return a short representation of a user name or email address."""
1070 1070 f = user.find('@')
1071 1071 if f >= 0:
1072 1072 user = user[:f]
1073 1073 f = user.find('<')
1074 1074 if f >= 0:
1075 1075 user = user[f + 1:]
1076 1076 f = user.find(' ')
1077 1077 if f >= 0:
1078 1078 user = user[:f]
1079 1079 f = user.find('.')
1080 1080 if f >= 0:
1081 1081 user = user[:f]
1082 1082 return user
1083 1083
1084 1084 def email(author):
1085 1085 '''get email of author.'''
1086 1086 r = author.find('>')
1087 1087 if r == -1:
1088 1088 r = None
1089 1089 return author[author.find('<') + 1:r]
1090 1090
1091 1091 def _ellipsis(text, maxlength):
1092 1092 if len(text) <= maxlength:
1093 1093 return text, False
1094 1094 else:
1095 1095 return "%s..." % (text[:maxlength - 3]), True
1096 1096
1097 1097 def ellipsis(text, maxlength=400):
1098 1098 """Trim string to at most maxlength (default: 400) characters."""
1099 1099 try:
1100 1100 # use unicode not to split at intermediate multi-byte sequence
1101 1101 utext, truncated = _ellipsis(text.decode(encoding.encoding),
1102 1102 maxlength)
1103 1103 if not truncated:
1104 1104 return text
1105 1105 return utext.encode(encoding.encoding)
1106 1106 except (UnicodeDecodeError, UnicodeEncodeError):
1107 1107 return _ellipsis(text, maxlength)[0]
1108 1108
1109 1109 def bytecount(nbytes):
1110 1110 '''return byte count formatted as readable string, with units'''
1111 1111
1112 1112 units = (
1113 1113 (100, 1 << 30, _('%.0f GB')),
1114 1114 (10, 1 << 30, _('%.1f GB')),
1115 1115 (1, 1 << 30, _('%.2f GB')),
1116 1116 (100, 1 << 20, _('%.0f MB')),
1117 1117 (10, 1 << 20, _('%.1f MB')),
1118 1118 (1, 1 << 20, _('%.2f MB')),
1119 1119 (100, 1 << 10, _('%.0f KB')),
1120 1120 (10, 1 << 10, _('%.1f KB')),
1121 1121 (1, 1 << 10, _('%.2f KB')),
1122 1122 (1, 1, _('%.0f bytes')),
1123 1123 )
1124 1124
1125 1125 for multiplier, divisor, format in units:
1126 1126 if nbytes >= divisor * multiplier:
1127 1127 return format % (nbytes / float(divisor))
1128 1128 return units[-1][2] % nbytes
1129 1129
1130 1130 def uirepr(s):
1131 1131 # Avoid double backslash in Windows path repr()
1132 1132 return repr(s).replace('\\\\', '\\')
1133 1133
1134 1134 # delay import of textwrap
1135 1135 def MBTextWrapper(**kwargs):
1136 1136 class tw(textwrap.TextWrapper):
1137 1137 """
1138 1138 Extend TextWrapper for double-width characters.
1139 1139
1140 1140 Some Asian characters use two terminal columns instead of one.
1141 1141 A good example of this behavior can be seen with u'\u65e5\u672c',
1142 1142 the two Japanese characters for "Japan":
1143 1143 len() returns 2, but when printed to a terminal, they eat 4 columns.
1144 1144
1145 1145 (Note that this has nothing to do whatsoever with unicode
1146 1146 representation, or encoding of the underlying string)
1147 1147 """
1148 1148 def __init__(self, **kwargs):
1149 1149 textwrap.TextWrapper.__init__(self, **kwargs)
1150 1150
1151 1151 def _cutdown(self, str, space_left):
1152 1152 l = 0
1153 1153 ucstr = unicode(str, encoding.encoding)
1154 1154 colwidth = unicodedata.east_asian_width
1155 1155 for i in xrange(len(ucstr)):
1156 1156 l += colwidth(ucstr[i]) in 'WFA' and 2 or 1
1157 1157 if space_left < l:
1158 1158 return (ucstr[:i].encode(encoding.encoding),
1159 1159 ucstr[i:].encode(encoding.encoding))
1160 1160 return str, ''
1161 1161
1162 1162 # overriding of base class
1163 1163 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
1164 1164 space_left = max(width - cur_len, 1)
1165 1165
1166 1166 if self.break_long_words:
1167 1167 cut, res = self._cutdown(reversed_chunks[-1], space_left)
1168 1168 cur_line.append(cut)
1169 1169 reversed_chunks[-1] = res
1170 1170 elif not cur_line:
1171 1171 cur_line.append(reversed_chunks.pop())
1172 1172
1173 1173 global MBTextWrapper
1174 1174 MBTextWrapper = tw
1175 1175 return tw(**kwargs)
1176 1176
1177 1177 def wrap(line, width, initindent='', hangindent=''):
1178 1178 maxindent = max(len(hangindent), len(initindent))
1179 1179 if width <= maxindent:
1180 1180 # adjust for weird terminal size
1181 1181 width = max(78, maxindent + 1)
1182 1182 wrapper = MBTextWrapper(width=width,
1183 1183 initial_indent=initindent,
1184 1184 subsequent_indent=hangindent)
1185 1185 return wrapper.fill(line)
1186 1186
1187 1187 def iterlines(iterator):
1188 1188 for chunk in iterator:
1189 1189 for line in chunk.splitlines():
1190 1190 yield line
1191 1191
1192 1192 def expandpath(path):
1193 1193 return os.path.expanduser(os.path.expandvars(path))
1194 1194
1195 1195 def hgcmd():
1196 1196 """Return the command used to execute current hg
1197 1197
1198 1198 This is different from hgexecutable() because on Windows we want
1199 1199 to avoid things opening new shell windows like batch files, so we
1200 1200 get either the python call or current executable.
1201 1201 """
1202 1202 if mainfrozen():
1203 1203 return [sys.executable]
1204 1204 return gethgcmd()
1205 1205
1206 1206 def rundetached(args, condfn):
1207 1207 """Execute the argument list in a detached process.
1208 1208
1209 1209 condfn is a callable which is called repeatedly and should return
1210 1210 True once the child process is known to have started successfully.
1211 1211 At this point, the child process PID is returned. If the child
1212 1212 process fails to start or finishes before condfn() evaluates to
1213 1213 True, return -1.
1214 1214 """
1215 1215 # Windows case is easier because the child process is either
1216 1216 # successfully starting and validating the condition or exiting
1217 1217 # on failure. We just poll on its PID. On Unix, if the child
1218 1218 # process fails to start, it will be left in a zombie state until
1219 1219 # the parent wait on it, which we cannot do since we expect a long
1220 1220 # running process on success. Instead we listen for SIGCHLD telling
1221 1221 # us our child process terminated.
1222 1222 terminated = set()
1223 1223 def handler(signum, frame):
1224 1224 terminated.add(os.wait())
1225 1225 prevhandler = None
1226 1226 if hasattr(signal, 'SIGCHLD'):
1227 1227 prevhandler = signal.signal(signal.SIGCHLD, handler)
1228 1228 try:
1229 1229 pid = spawndetached(args)
1230 1230 while not condfn():
1231 1231 if ((pid in terminated or not testpid(pid))
1232 1232 and not condfn()):
1233 1233 return -1
1234 1234 time.sleep(0.1)
1235 1235 return pid
1236 1236 finally:
1237 1237 if prevhandler is not None:
1238 1238 signal.signal(signal.SIGCHLD, prevhandler)
1239 1239
1240 1240 try:
1241 1241 any, all = any, all
1242 1242 except NameError:
1243 1243 def any(iterable):
1244 1244 for i in iterable:
1245 1245 if i:
1246 1246 return True
1247 1247 return False
1248 1248
1249 1249 def all(iterable):
1250 1250 for i in iterable:
1251 1251 if not i:
1252 1252 return False
1253 1253 return True
1254 1254
1255 1255 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
1256 1256 """Return the result of interpolating items in the mapping into string s.
1257 1257
1258 1258 prefix is a single character string, or a two character string with
1259 1259 a backslash as the first character if the prefix needs to be escaped in
1260 1260 a regular expression.
1261 1261
1262 1262 fn is an optional function that will be applied to the replacement text
1263 1263 just before replacement.
1264 1264
1265 1265 escape_prefix is an optional flag that allows using doubled prefix for
1266 1266 its escaping.
1267 1267 """
1268 1268 fn = fn or (lambda s: s)
1269 1269 patterns = '|'.join(mapping.keys())
1270 1270 if escape_prefix:
1271 1271 patterns += '|' + prefix
1272 1272 if len(prefix) > 1:
1273 1273 prefix_char = prefix[1:]
1274 1274 else:
1275 1275 prefix_char = prefix
1276 1276 mapping[prefix_char] = prefix_char
1277 1277 r = re.compile(r'%s(%s)' % (prefix, patterns))
1278 1278 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
1279 1279
1280 1280 def getport(port):
1281 1281 """Return the port for a given network service.
1282 1282
1283 1283 If port is an integer, it's returned as is. If it's a string, it's
1284 1284 looked up using socket.getservbyname(). If there's no matching
1285 1285 service, util.Abort is raised.
1286 1286 """
1287 1287 try:
1288 1288 return int(port)
1289 1289 except ValueError:
1290 1290 pass
1291 1291
1292 1292 try:
1293 1293 return socket.getservbyname(port)
1294 1294 except socket.error:
1295 1295 raise Abort(_("no port number associated with service '%s'") % port)
1296 1296
1297 1297 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
1298 1298 '0': False, 'no': False, 'false': False, 'off': False,
1299 1299 'never': False}
1300 1300
1301 1301 def parsebool(s):
1302 1302 """Parse s into a boolean.
1303 1303
1304 1304 If s is not a valid boolean, returns None.
1305 1305 """
1306 1306 return _booleans.get(s.lower(), None)
1307 1307
1308 1308 _hexdig = '0123456789ABCDEFabcdef'
1309 1309 _hextochr = dict((a + b, chr(int(a + b, 16)))
1310 1310 for a in _hexdig for b in _hexdig)
1311 1311
1312 1312 def _urlunquote(s):
1313 1313 """unquote('abc%20def') -> 'abc def'."""
1314 1314 res = s.split('%')
1315 1315 # fastpath
1316 1316 if len(res) == 1:
1317 1317 return s
1318 1318 s = res[0]
1319 1319 for item in res[1:]:
1320 1320 try:
1321 1321 s += _hextochr[item[:2]] + item[2:]
1322 1322 except KeyError:
1323 1323 s += '%' + item
1324 1324 except UnicodeDecodeError:
1325 1325 s += unichr(int(item[:2], 16)) + item[2:]
1326 1326 return s
1327 1327
1328 1328 class url(object):
1329 1329 r"""Reliable URL parser.
1330 1330
1331 1331 This parses URLs and provides attributes for the following
1332 1332 components:
1333 1333
1334 1334 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
1335 1335
1336 1336 Missing components are set to None. The only exception is
1337 1337 fragment, which is set to '' if present but empty.
1338 1338
1339 1339 If parsefragment is False, fragment is included in query. If
1340 1340 parsequery is False, query is included in path. If both are
1341 1341 False, both fragment and query are included in path.
1342 1342
1343 1343 See http://www.ietf.org/rfc/rfc2396.txt for more information.
1344 1344
1345 1345 Note that for backward compatibility reasons, bundle URLs do not
1346 1346 take host names. That means 'bundle://../' has a path of '../'.
1347 1347
1348 1348 Examples:
1349 1349
1350 1350 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
1351 1351 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
1352 1352 >>> url('ssh://[::1]:2200//home/joe/repo')
1353 1353 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
1354 1354 >>> url('file:///home/joe/repo')
1355 1355 <url scheme: 'file', path: '/home/joe/repo'>
1356 1356 >>> url('bundle:foo')
1357 1357 <url scheme: 'bundle', path: 'foo'>
1358 1358 >>> url('bundle://../foo')
1359 1359 <url scheme: 'bundle', path: '../foo'>
1360 1360 >>> url(r'c:\foo\bar')
1361 1361 <url path: 'c:\\foo\\bar'>
1362 1362
1363 1363 Authentication credentials:
1364 1364
1365 1365 >>> url('ssh://joe:xyz@x/repo')
1366 1366 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
1367 1367 >>> url('ssh://joe@x/repo')
1368 1368 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
1369 1369
1370 1370 Query strings and fragments:
1371 1371
1372 1372 >>> url('http://host/a?b#c')
1373 1373 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
1374 1374 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
1375 1375 <url scheme: 'http', host: 'host', path: 'a?b#c'>
1376 1376 """
1377 1377
1378 1378 _safechars = "!~*'()+"
1379 1379 _safepchars = "/!~*'()+"
1380 1380 _matchscheme = re.compile(r'^[a-zA-Z0-9+.\-]+:').match
1381 1381
1382 1382 def __init__(self, path, parsequery=True, parsefragment=True):
1383 1383 # We slowly chomp away at path until we have only the path left
1384 1384 self.scheme = self.user = self.passwd = self.host = None
1385 1385 self.port = self.path = self.query = self.fragment = None
1386 1386 self._localpath = True
1387 1387 self._hostport = ''
1388 1388 self._origpath = path
1389 1389
1390 1390 # special case for Windows drive letters
1391 1391 if hasdriveletter(path):
1392 1392 self.path = path
1393 1393 return
1394 1394
1395 1395 # For compatibility reasons, we can't handle bundle paths as
1396 1396 # normal URLS
1397 1397 if path.startswith('bundle:'):
1398 1398 self.scheme = 'bundle'
1399 1399 path = path[7:]
1400 1400 if path.startswith('//'):
1401 1401 path = path[2:]
1402 1402 self.path = path
1403 1403 return
1404 1404
1405 1405 if self._matchscheme(path):
1406 1406 parts = path.split(':', 1)
1407 1407 if parts[0]:
1408 1408 self.scheme, path = parts
1409 1409 self._localpath = False
1410 1410
1411 1411 if not path:
1412 1412 path = None
1413 1413 if self._localpath:
1414 1414 self.path = ''
1415 1415 return
1416 1416 else:
1417 1417 if parsefragment and '#' in path:
1418 1418 path, self.fragment = path.split('#', 1)
1419 1419 if not path:
1420 1420 path = None
1421 1421 if self._localpath:
1422 1422 self.path = path
1423 1423 return
1424 1424
1425 1425 if parsequery and '?' in path:
1426 1426 path, self.query = path.split('?', 1)
1427 1427 if not path:
1428 1428 path = None
1429 1429 if not self.query:
1430 1430 self.query = None
1431 1431
1432 1432 # // is required to specify a host/authority
1433 1433 if path and path.startswith('//'):
1434 1434 parts = path[2:].split('/', 1)
1435 1435 if len(parts) > 1:
1436 1436 self.host, path = parts
1437 1437 path = path
1438 1438 else:
1439 1439 self.host = parts[0]
1440 1440 path = None
1441 1441 if not self.host:
1442 1442 self.host = None
1443 1443 if path:
1444 1444 path = '/' + path
1445 1445
1446 1446 if self.host and '@' in self.host:
1447 1447 self.user, self.host = self.host.rsplit('@', 1)
1448 1448 if ':' in self.user:
1449 1449 self.user, self.passwd = self.user.split(':', 1)
1450 1450 if not self.host:
1451 1451 self.host = None
1452 1452
1453 1453 # Don't split on colons in IPv6 addresses without ports
1454 1454 if (self.host and ':' in self.host and
1455 1455 not (self.host.startswith('[') and self.host.endswith(']'))):
1456 1456 self._hostport = self.host
1457 1457 self.host, self.port = self.host.rsplit(':', 1)
1458 1458 if not self.host:
1459 1459 self.host = None
1460 1460
1461 1461 if (self.host and self.scheme == 'file' and
1462 1462 self.host not in ('localhost', '127.0.0.1', '[::1]')):
1463 1463 raise Abort(_('file:// URLs can only refer to localhost'))
1464 1464
1465 1465 self.path = path
1466 1466
1467 1467 for a in ('user', 'passwd', 'host', 'port',
1468 1468 'path', 'query', 'fragment'):
1469 1469 v = getattr(self, a)
1470 1470 if v is not None:
1471 1471 setattr(self, a, _urlunquote(v))
1472 1472
1473 1473 def __repr__(self):
1474 1474 attrs = []
1475 1475 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
1476 1476 'query', 'fragment'):
1477 1477 v = getattr(self, a)
1478 1478 if v is not None:
1479 1479 attrs.append('%s: %r' % (a, v))
1480 1480 return '<url %s>' % ', '.join(attrs)
1481 1481
1482 1482 def __str__(self):
1483 1483 r"""Join the URL's components back into a URL string.
1484 1484
1485 1485 Examples:
1486 1486
1487 1487 >>> str(url('http://user:pw@host:80/?foo#bar'))
1488 1488 'http://user:pw@host:80/?foo#bar'
1489 1489 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
1490 1490 'ssh://user:pw@[::1]:2200//home/joe#'
1491 1491 >>> str(url('http://localhost:80//'))
1492 1492 'http://localhost:80//'
1493 1493 >>> str(url('http://localhost:80/'))
1494 1494 'http://localhost:80/'
1495 1495 >>> str(url('http://localhost:80'))
1496 1496 'http://localhost:80/'
1497 1497 >>> str(url('bundle:foo'))
1498 1498 'bundle:foo'
1499 1499 >>> str(url('bundle://../foo'))
1500 1500 'bundle:../foo'
1501 1501 >>> str(url('path'))
1502 1502 'path'
1503 1503 >>> print url(r'bundle:foo\bar')
1504 1504 bundle:foo\bar
1505 1505 """
1506 1506 if self._localpath:
1507 1507 s = self.path
1508 1508 if self.scheme == 'bundle':
1509 1509 s = 'bundle:' + s
1510 1510 if self.fragment:
1511 1511 s += '#' + self.fragment
1512 1512 return s
1513 1513
1514 1514 s = self.scheme + ':'
1515 1515 if (self.user or self.passwd or self.host or
1516 1516 self.scheme and not self.path):
1517 1517 s += '//'
1518 1518 if self.user:
1519 1519 s += urllib.quote(self.user, safe=self._safechars)
1520 1520 if self.passwd:
1521 1521 s += ':' + urllib.quote(self.passwd, safe=self._safechars)
1522 1522 if self.user or self.passwd:
1523 1523 s += '@'
1524 1524 if self.host:
1525 1525 if not (self.host.startswith('[') and self.host.endswith(']')):
1526 1526 s += urllib.quote(self.host)
1527 1527 else:
1528 1528 s += self.host
1529 1529 if self.port:
1530 1530 s += ':' + urllib.quote(self.port)
1531 1531 if self.host:
1532 1532 s += '/'
1533 1533 if self.path:
1534 1534 s += urllib.quote(self.path, safe=self._safepchars)
1535 1535 if self.query:
1536 1536 s += '?' + urllib.quote(self.query, safe=self._safepchars)
1537 1537 if self.fragment is not None:
1538 1538 s += '#' + urllib.quote(self.fragment, safe=self._safepchars)
1539 1539 return s
1540 1540
1541 1541 def authinfo(self):
1542 1542 user, passwd = self.user, self.passwd
1543 1543 try:
1544 1544 self.user, self.passwd = None, None
1545 1545 s = str(self)
1546 1546 finally:
1547 1547 self.user, self.passwd = user, passwd
1548 1548 if not self.user:
1549 1549 return (s, None)
1550 1550 return (s, (None, (str(self), self.host),
1551 1551 self.user, self.passwd or ''))
1552 1552
1553 1553 def localpath(self):
1554 1554 if self.scheme == 'file' or self.scheme == 'bundle':
1555 1555 path = self.path or '/'
1556 1556 # For Windows, we need to promote hosts containing drive
1557 1557 # letters to paths with drive letters.
1558 1558 if hasdriveletter(self._hostport):
1559 1559 path = self._hostport + '/' + self.path
1560 1560 elif self.host is not None and self.path:
1561 1561 path = '/' + path
1562 1562 # We also need to handle the case of file:///C:/, which
1563 1563 # should return C:/, not /C:/.
1564 1564 elif hasdriveletter(path):
1565 1565 # Strip leading slash from paths with drive names
1566 1566 return path[1:]
1567 1567 return path
1568 1568 return self._origpath
1569 1569
1570 1570 def hasscheme(path):
1571 1571 return bool(url(path).scheme)
1572 1572
1573 1573 def hasdriveletter(path):
1574 1574 return path[1:2] == ':' and path[0:1].isalpha()
1575 1575
1576 1576 def localpath(path):
1577 1577 return url(path, parsequery=False, parsefragment=False).localpath()
1578 1578
1579 1579 def hidepassword(u):
1580 1580 '''hide user credential in a url string'''
1581 1581 u = url(u)
1582 1582 if u.passwd:
1583 1583 u.passwd = '***'
1584 1584 return str(u)
1585 1585
1586 1586 def removeauth(u):
1587 1587 '''remove all authentication information from a url string'''
1588 1588 u = url(u)
1589 1589 u.user = u.passwd = None
1590 1590 return str(u)
General Comments 0
You need to be logged in to leave comments. Login now