##// END OF EJS Templates
mmap: add a `is_mmap_safe` method to vfs...
marmoute -
r52545:ba205f94 stable
parent child Browse files
Show More
@@ -1,3339 +1,3346
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16
17 17 import abc
18 18 import collections
19 19 import contextlib
20 20 import errno
21 21 import gc
22 22 import hashlib
23 23 import io
24 24 import itertools
25 25 import locale
26 26 import mmap
27 27 import os
28 28 import pickle # provides util.pickle symbol
29 29 import re as remod
30 30 import shutil
31 31 import stat
32 32 import sys
33 33 import time
34 34 import traceback
35 35 import warnings
36 36
37 37 from typing import (
38 38 Any,
39 39 Iterable,
40 40 Iterator,
41 41 List,
42 42 Optional,
43 43 Tuple,
44 44 )
45 45
46 46 from .node import hex
47 47 from .thirdparty import attr
48 48 from .pycompat import (
49 49 open,
50 50 )
51 51 from hgdemandimport import tracing
52 52 from . import (
53 53 encoding,
54 54 error,
55 55 i18n,
56 56 policy,
57 57 pycompat,
58 58 urllibcompat,
59 59 )
60 60 from .utils import (
61 61 compression,
62 62 hashutil,
63 63 procutil,
64 64 stringutil,
65 65 )
66 66
67 67 # keeps pyflakes happy
68 68 assert [
69 69 Iterable,
70 70 Iterator,
71 71 List,
72 72 Optional,
73 73 Tuple,
74 74 ]
75 75
76 76
77 77 base85 = policy.importmod('base85')
78 78 osutil = policy.importmod('osutil')
79 79
80 80 b85decode = base85.b85decode
81 81 b85encode = base85.b85encode
82 82
83 83 cookielib = pycompat.cookielib
84 84 httplib = pycompat.httplib
85 85 safehasattr = pycompat.safehasattr
86 86 socketserver = pycompat.socketserver
87 87 bytesio = io.BytesIO
88 88 # TODO deprecate stringio name, as it is a lie on Python 3.
89 89 stringio = bytesio
90 90 xmlrpclib = pycompat.xmlrpclib
91 91
92 92 httpserver = urllibcompat.httpserver
93 93 urlerr = urllibcompat.urlerr
94 94 urlreq = urllibcompat.urlreq
95 95
96 96 # workaround for win32mbcs
97 97 _filenamebytestr = pycompat.bytestr
98 98
99 99 if pycompat.iswindows:
100 100 from . import windows as platform
101 101 else:
102 102 from . import posix as platform
103 103
104 104 _ = i18n._
105 105
106 106 abspath = platform.abspath
107 107 bindunixsocket = platform.bindunixsocket
108 108 cachestat = platform.cachestat
109 109 checkexec = platform.checkexec
110 110 checklink = platform.checklink
111 111 copymode = platform.copymode
112 112 expandglobs = platform.expandglobs
113 113 getfsmountpoint = platform.getfsmountpoint
114 114 getfstype = platform.getfstype
115 115 get_password = platform.get_password
116 116 groupmembers = platform.groupmembers
117 117 groupname = platform.groupname
118 118 isexec = platform.isexec
119 119 isowner = platform.isowner
120 120 listdir = osutil.listdir
121 121 localpath = platform.localpath
122 122 lookupreg = platform.lookupreg
123 123 makedir = platform.makedir
124 124 nlinks = platform.nlinks
125 125 normpath = platform.normpath
126 126 normcase = platform.normcase
127 127 normcasespec = platform.normcasespec
128 128 normcasefallback = platform.normcasefallback
129 129 openhardlinks = platform.openhardlinks
130 130 oslink = platform.oslink
131 131 parsepatchoutput = platform.parsepatchoutput
132 132 pconvert = platform.pconvert
133 133 poll = platform.poll
134 134 posixfile = platform.posixfile
135 135 readlink = platform.readlink
136 136 rename = platform.rename
137 137 removedirs = platform.removedirs
138 138 samedevice = platform.samedevice
139 139 samefile = platform.samefile
140 140 samestat = platform.samestat
141 141 setflags = platform.setflags
142 142 split = platform.split
143 143 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
144 144 statisexec = platform.statisexec
145 145 statislink = platform.statislink
146 146 umask = platform.umask
147 147 unlink = platform.unlink
148 148 username = platform.username
149 149
150 150
151 151 def setumask(val: int) -> None:
152 152 '''updates the umask. used by chg server'''
153 153 if pycompat.iswindows:
154 154 return
155 155 os.umask(val)
156 156 global umask
157 157 platform.umask = umask = val & 0o777
158 158
159 159
160 160 # small compat layer
161 161 compengines = compression.compengines
162 162 SERVERROLE = compression.SERVERROLE
163 163 CLIENTROLE = compression.CLIENTROLE
164 164
165 165 # Python compatibility
166 166
167 167 _notset = object()
168 168
169 169
170 170 def bitsfrom(container):
171 171 bits = 0
172 172 for bit in container:
173 173 bits |= bit
174 174 return bits
175 175
176 176
177 177 # python 2.6 still have deprecation warning enabled by default. We do not want
178 178 # to display anything to standard user so detect if we are running test and
179 179 # only use python deprecation warning in this case.
180 180 _dowarn = bool(encoding.environ.get(b'HGEMITWARNINGS'))
181 181 if _dowarn:
182 182 # explicitly unfilter our warning for python 2.7
183 183 #
184 184 # The option of setting PYTHONWARNINGS in the test runner was investigated.
185 185 # However, module name set through PYTHONWARNINGS was exactly matched, so
186 186 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
187 187 # makes the whole PYTHONWARNINGS thing useless for our usecase.
188 188 warnings.filterwarnings('default', '', DeprecationWarning, 'mercurial')
189 189 warnings.filterwarnings('default', '', DeprecationWarning, 'hgext')
190 190 warnings.filterwarnings('default', '', DeprecationWarning, 'hgext3rd')
191 191 if _dowarn:
192 192 # silence warning emitted by passing user string to re.sub()
193 193 warnings.filterwarnings(
194 194 'ignore', 'bad escape', DeprecationWarning, 'mercurial'
195 195 )
196 196 warnings.filterwarnings(
197 197 'ignore', 'invalid escape sequence', DeprecationWarning, 'mercurial'
198 198 )
199 199 # TODO: reinvent imp.is_frozen()
200 200 warnings.filterwarnings(
201 201 'ignore',
202 202 'the imp module is deprecated',
203 203 DeprecationWarning,
204 204 'mercurial',
205 205 )
206 206
207 207
208 208 def nouideprecwarn(msg, version, stacklevel=1):
209 209 """Issue an python native deprecation warning
210 210
211 211 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
212 212 """
213 213 if _dowarn:
214 214 msg += (
215 215 b"\n(compatibility will be dropped after Mercurial-%s,"
216 216 b" update your code.)"
217 217 ) % version
218 218 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
219 219 # on python 3 with chg, we will need to explicitly flush the output
220 220 sys.stderr.flush()
221 221
222 222
223 223 DIGESTS = {
224 224 b'md5': hashlib.md5,
225 225 b'sha1': hashutil.sha1,
226 226 b'sha512': hashlib.sha512,
227 227 }
228 228 # List of digest types from strongest to weakest
229 229 DIGESTS_BY_STRENGTH = [b'sha512', b'sha1', b'md5']
230 230
231 231 for k in DIGESTS_BY_STRENGTH:
232 232 assert k in DIGESTS
233 233
234 234
235 235 class digester:
236 236 """helper to compute digests.
237 237
238 238 This helper can be used to compute one or more digests given their name.
239 239
240 240 >>> d = digester([b'md5', b'sha1'])
241 241 >>> d.update(b'foo')
242 242 >>> [k for k in sorted(d)]
243 243 ['md5', 'sha1']
244 244 >>> d[b'md5']
245 245 'acbd18db4cc2f85cedef654fccc4a4d8'
246 246 >>> d[b'sha1']
247 247 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
248 248 >>> digester.preferred([b'md5', b'sha1'])
249 249 'sha1'
250 250 """
251 251
252 252 def __init__(self, digests, s=b''):
253 253 self._hashes = {}
254 254 for k in digests:
255 255 if k not in DIGESTS:
256 256 raise error.Abort(_(b'unknown digest type: %s') % k)
257 257 self._hashes[k] = DIGESTS[k]()
258 258 if s:
259 259 self.update(s)
260 260
261 261 def update(self, data):
262 262 for h in self._hashes.values():
263 263 h.update(data)
264 264
265 265 def __getitem__(self, key):
266 266 if key not in DIGESTS:
267 267 raise error.Abort(_(b'unknown digest type: %s') % k)
268 268 return hex(self._hashes[key].digest())
269 269
270 270 def __iter__(self):
271 271 return iter(self._hashes)
272 272
273 273 @staticmethod
274 274 def preferred(supported):
275 275 """returns the strongest digest type in both supported and DIGESTS."""
276 276
277 277 for k in DIGESTS_BY_STRENGTH:
278 278 if k in supported:
279 279 return k
280 280 return None
281 281
282 282
283 283 class digestchecker:
284 284 """file handle wrapper that additionally checks content against a given
285 285 size and digests.
286 286
287 287 d = digestchecker(fh, size, {'md5': '...'})
288 288
289 289 When multiple digests are given, all of them are validated.
290 290 """
291 291
292 292 def __init__(self, fh, size, digests):
293 293 self._fh = fh
294 294 self._size = size
295 295 self._got = 0
296 296 self._digests = dict(digests)
297 297 self._digester = digester(self._digests.keys())
298 298
299 299 def read(self, length=-1):
300 300 content = self._fh.read(length)
301 301 self._digester.update(content)
302 302 self._got += len(content)
303 303 return content
304 304
305 305 def validate(self):
306 306 if self._size != self._got:
307 307 raise error.Abort(
308 308 _(b'size mismatch: expected %d, got %d')
309 309 % (self._size, self._got)
310 310 )
311 311 for k, v in self._digests.items():
312 312 if v != self._digester[k]:
313 313 # i18n: first parameter is a digest name
314 314 raise error.Abort(
315 315 _(b'%s mismatch: expected %s, got %s')
316 316 % (k, v, self._digester[k])
317 317 )
318 318
319 319
320 320 try:
321 321 buffer = buffer # pytype: disable=name-error
322 322 except NameError:
323 323
324 324 def buffer(sliceable, offset=0, length=None):
325 325 if length is not None:
326 326 return memoryview(sliceable)[offset : offset + length]
327 327 return memoryview(sliceable)[offset:]
328 328
329 329
330 330 _chunksize = 4096
331 331
332 332
333 333 class bufferedinputpipe:
334 334 """a manually buffered input pipe
335 335
336 336 Python will not let us use buffered IO and lazy reading with 'polling' at
337 337 the same time. We cannot probe the buffer state and select will not detect
338 338 that data are ready to read if they are already buffered.
339 339
340 340 This class let us work around that by implementing its own buffering
341 341 (allowing efficient readline) while offering a way to know if the buffer is
342 342 empty from the output (allowing collaboration of the buffer with polling).
343 343
344 344 This class lives in the 'util' module because it makes use of the 'os'
345 345 module from the python stdlib.
346 346 """
347 347
348 348 def __new__(cls, fh):
349 349 # If we receive a fileobjectproxy, we need to use a variation of this
350 350 # class that notifies observers about activity.
351 351 if isinstance(fh, fileobjectproxy):
352 352 cls = observedbufferedinputpipe
353 353
354 354 return super(bufferedinputpipe, cls).__new__(cls)
355 355
356 356 def __init__(self, input):
357 357 self._input = input
358 358 self._buffer = []
359 359 self._eof = False
360 360 self._lenbuf = 0
361 361
362 362 @property
363 363 def hasbuffer(self):
364 364 """True is any data is currently buffered
365 365
366 366 This will be used externally a pre-step for polling IO. If there is
367 367 already data then no polling should be set in place."""
368 368 return bool(self._buffer)
369 369
370 370 @property
371 371 def closed(self):
372 372 return self._input.closed
373 373
374 374 def fileno(self):
375 375 return self._input.fileno()
376 376
377 377 def close(self):
378 378 return self._input.close()
379 379
380 380 def read(self, size):
381 381 while (not self._eof) and (self._lenbuf < size):
382 382 self._fillbuffer()
383 383 return self._frombuffer(size)
384 384
385 385 def unbufferedread(self, size):
386 386 if not self._eof and self._lenbuf == 0:
387 387 self._fillbuffer(max(size, _chunksize))
388 388 return self._frombuffer(min(self._lenbuf, size))
389 389
390 390 def readline(self, *args, **kwargs):
391 391 if len(self._buffer) > 1:
392 392 # this should not happen because both read and readline end with a
393 393 # _frombuffer call that collapse it.
394 394 self._buffer = [b''.join(self._buffer)]
395 395 self._lenbuf = len(self._buffer[0])
396 396 lfi = -1
397 397 if self._buffer:
398 398 lfi = self._buffer[-1].find(b'\n')
399 399 while (not self._eof) and lfi < 0:
400 400 self._fillbuffer()
401 401 if self._buffer:
402 402 lfi = self._buffer[-1].find(b'\n')
403 403 size = lfi + 1
404 404 if lfi < 0: # end of file
405 405 size = self._lenbuf
406 406 elif len(self._buffer) > 1:
407 407 # we need to take previous chunks into account
408 408 size += self._lenbuf - len(self._buffer[-1])
409 409 return self._frombuffer(size)
410 410
411 411 def _frombuffer(self, size):
412 412 """return at most 'size' data from the buffer
413 413
414 414 The data are removed from the buffer."""
415 415 if size == 0 or not self._buffer:
416 416 return b''
417 417 buf = self._buffer[0]
418 418 if len(self._buffer) > 1:
419 419 buf = b''.join(self._buffer)
420 420
421 421 data = buf[:size]
422 422 buf = buf[len(data) :]
423 423 if buf:
424 424 self._buffer = [buf]
425 425 self._lenbuf = len(buf)
426 426 else:
427 427 self._buffer = []
428 428 self._lenbuf = 0
429 429 return data
430 430
431 431 def _fillbuffer(self, size=_chunksize):
432 432 """read data to the buffer"""
433 433 data = os.read(self._input.fileno(), size)
434 434 if not data:
435 435 self._eof = True
436 436 else:
437 437 self._lenbuf += len(data)
438 438 self._buffer.append(data)
439 439
440 440 return data
441 441
442 442
443 443 def mmapread(fp, size=None):
444 """Read a file content using mmap
445
446 The responsability of checking the file system is mmap safe is the
447 responsability of the caller.
448
449 In some case, a normal string might be returned.
450 """
444 451 if size == 0:
445 452 # size of 0 to mmap.mmap() means "all data"
446 453 # rather than "zero bytes", so special case that.
447 454 return b''
448 455 elif size is None:
449 456 size = 0
450 457 fd = getattr(fp, 'fileno', lambda: fp)()
451 458 try:
452 459 return mmap.mmap(fd, size, access=mmap.ACCESS_READ)
453 460 except ValueError:
454 461 # Empty files cannot be mmapped, but mmapread should still work. Check
455 462 # if the file is empty, and if so, return an empty buffer.
456 463 if os.fstat(fd).st_size == 0:
457 464 return b''
458 465 raise
459 466
460 467
461 468 class fileobjectproxy:
462 469 """A proxy around file objects that tells a watcher when events occur.
463 470
464 471 This type is intended to only be used for testing purposes. Think hard
465 472 before using it in important code.
466 473 """
467 474
468 475 __slots__ = (
469 476 '_orig',
470 477 '_observer',
471 478 )
472 479
473 480 def __init__(self, fh, observer):
474 481 object.__setattr__(self, '_orig', fh)
475 482 object.__setattr__(self, '_observer', observer)
476 483
477 484 def __getattribute__(self, name):
478 485 ours = {
479 486 '_observer',
480 487 # IOBase
481 488 'close',
482 489 # closed if a property
483 490 'fileno',
484 491 'flush',
485 492 'isatty',
486 493 'readable',
487 494 'readline',
488 495 'readlines',
489 496 'seek',
490 497 'seekable',
491 498 'tell',
492 499 'truncate',
493 500 'writable',
494 501 'writelines',
495 502 # RawIOBase
496 503 'read',
497 504 'readall',
498 505 'readinto',
499 506 'write',
500 507 # BufferedIOBase
501 508 # raw is a property
502 509 'detach',
503 510 # read defined above
504 511 'read1',
505 512 # readinto defined above
506 513 # write defined above
507 514 }
508 515
509 516 # We only observe some methods.
510 517 if name in ours:
511 518 return object.__getattribute__(self, name)
512 519
513 520 return getattr(object.__getattribute__(self, '_orig'), name)
514 521
515 522 def __nonzero__(self):
516 523 return bool(object.__getattribute__(self, '_orig'))
517 524
518 525 __bool__ = __nonzero__
519 526
520 527 def __delattr__(self, name):
521 528 return delattr(object.__getattribute__(self, '_orig'), name)
522 529
523 530 def __setattr__(self, name, value):
524 531 return setattr(object.__getattribute__(self, '_orig'), name, value)
525 532
526 533 def __iter__(self):
527 534 return object.__getattribute__(self, '_orig').__iter__()
528 535
529 536 def _observedcall(self, name, *args, **kwargs):
530 537 # Call the original object.
531 538 orig = object.__getattribute__(self, '_orig')
532 539 res = getattr(orig, name)(*args, **kwargs)
533 540
534 541 # Call a method on the observer of the same name with arguments
535 542 # so it can react, log, etc.
536 543 observer = object.__getattribute__(self, '_observer')
537 544 fn = getattr(observer, name, None)
538 545 if fn:
539 546 fn(res, *args, **kwargs)
540 547
541 548 return res
542 549
543 550 def close(self, *args, **kwargs):
544 551 return object.__getattribute__(self, '_observedcall')(
545 552 'close', *args, **kwargs
546 553 )
547 554
548 555 def fileno(self, *args, **kwargs):
549 556 return object.__getattribute__(self, '_observedcall')(
550 557 'fileno', *args, **kwargs
551 558 )
552 559
553 560 def flush(self, *args, **kwargs):
554 561 return object.__getattribute__(self, '_observedcall')(
555 562 'flush', *args, **kwargs
556 563 )
557 564
558 565 def isatty(self, *args, **kwargs):
559 566 return object.__getattribute__(self, '_observedcall')(
560 567 'isatty', *args, **kwargs
561 568 )
562 569
563 570 def readable(self, *args, **kwargs):
564 571 return object.__getattribute__(self, '_observedcall')(
565 572 'readable', *args, **kwargs
566 573 )
567 574
568 575 def readline(self, *args, **kwargs):
569 576 return object.__getattribute__(self, '_observedcall')(
570 577 'readline', *args, **kwargs
571 578 )
572 579
573 580 def readlines(self, *args, **kwargs):
574 581 return object.__getattribute__(self, '_observedcall')(
575 582 'readlines', *args, **kwargs
576 583 )
577 584
578 585 def seek(self, *args, **kwargs):
579 586 return object.__getattribute__(self, '_observedcall')(
580 587 'seek', *args, **kwargs
581 588 )
582 589
583 590 def seekable(self, *args, **kwargs):
584 591 return object.__getattribute__(self, '_observedcall')(
585 592 'seekable', *args, **kwargs
586 593 )
587 594
588 595 def tell(self, *args, **kwargs):
589 596 return object.__getattribute__(self, '_observedcall')(
590 597 'tell', *args, **kwargs
591 598 )
592 599
593 600 def truncate(self, *args, **kwargs):
594 601 return object.__getattribute__(self, '_observedcall')(
595 602 'truncate', *args, **kwargs
596 603 )
597 604
598 605 def writable(self, *args, **kwargs):
599 606 return object.__getattribute__(self, '_observedcall')(
600 607 'writable', *args, **kwargs
601 608 )
602 609
603 610 def writelines(self, *args, **kwargs):
604 611 return object.__getattribute__(self, '_observedcall')(
605 612 'writelines', *args, **kwargs
606 613 )
607 614
608 615 def read(self, *args, **kwargs):
609 616 return object.__getattribute__(self, '_observedcall')(
610 617 'read', *args, **kwargs
611 618 )
612 619
613 620 def readall(self, *args, **kwargs):
614 621 return object.__getattribute__(self, '_observedcall')(
615 622 'readall', *args, **kwargs
616 623 )
617 624
618 625 def readinto(self, *args, **kwargs):
619 626 return object.__getattribute__(self, '_observedcall')(
620 627 'readinto', *args, **kwargs
621 628 )
622 629
623 630 def write(self, *args, **kwargs):
624 631 return object.__getattribute__(self, '_observedcall')(
625 632 'write', *args, **kwargs
626 633 )
627 634
628 635 def detach(self, *args, **kwargs):
629 636 return object.__getattribute__(self, '_observedcall')(
630 637 'detach', *args, **kwargs
631 638 )
632 639
633 640 def read1(self, *args, **kwargs):
634 641 return object.__getattribute__(self, '_observedcall')(
635 642 'read1', *args, **kwargs
636 643 )
637 644
638 645
639 646 class observedbufferedinputpipe(bufferedinputpipe):
640 647 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
641 648
642 649 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
643 650 bypass ``fileobjectproxy``. Because of this, we need to make
644 651 ``bufferedinputpipe`` aware of these operations.
645 652
646 653 This variation of ``bufferedinputpipe`` can notify observers about
647 654 ``os.read()`` events. It also re-publishes other events, such as
648 655 ``read()`` and ``readline()``.
649 656 """
650 657
651 658 def _fillbuffer(self, size=_chunksize):
652 659 res = super(observedbufferedinputpipe, self)._fillbuffer(size=size)
653 660
654 661 fn = getattr(self._input._observer, 'osread', None)
655 662 if fn:
656 663 fn(res, size)
657 664
658 665 return res
659 666
660 667 # We use different observer methods because the operation isn't
661 668 # performed on the actual file object but on us.
662 669 def read(self, size):
663 670 res = super(observedbufferedinputpipe, self).read(size)
664 671
665 672 fn = getattr(self._input._observer, 'bufferedread', None)
666 673 if fn:
667 674 fn(res, size)
668 675
669 676 return res
670 677
671 678 def readline(self, *args, **kwargs):
672 679 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
673 680
674 681 fn = getattr(self._input._observer, 'bufferedreadline', None)
675 682 if fn:
676 683 fn(res)
677 684
678 685 return res
679 686
680 687
681 688 PROXIED_SOCKET_METHODS = {
682 689 'makefile',
683 690 'recv',
684 691 'recvfrom',
685 692 'recvfrom_into',
686 693 'recv_into',
687 694 'send',
688 695 'sendall',
689 696 'sendto',
690 697 'setblocking',
691 698 'settimeout',
692 699 'gettimeout',
693 700 'setsockopt',
694 701 }
695 702
696 703
697 704 class socketproxy:
698 705 """A proxy around a socket that tells a watcher when events occur.
699 706
700 707 This is like ``fileobjectproxy`` except for sockets.
701 708
702 709 This type is intended to only be used for testing purposes. Think hard
703 710 before using it in important code.
704 711 """
705 712
706 713 __slots__ = (
707 714 '_orig',
708 715 '_observer',
709 716 )
710 717
711 718 def __init__(self, sock, observer):
712 719 object.__setattr__(self, '_orig', sock)
713 720 object.__setattr__(self, '_observer', observer)
714 721
715 722 def __getattribute__(self, name):
716 723 if name in PROXIED_SOCKET_METHODS:
717 724 return object.__getattribute__(self, name)
718 725
719 726 return getattr(object.__getattribute__(self, '_orig'), name)
720 727
721 728 def __delattr__(self, name):
722 729 return delattr(object.__getattribute__(self, '_orig'), name)
723 730
724 731 def __setattr__(self, name, value):
725 732 return setattr(object.__getattribute__(self, '_orig'), name, value)
726 733
727 734 def __nonzero__(self):
728 735 return bool(object.__getattribute__(self, '_orig'))
729 736
730 737 __bool__ = __nonzero__
731 738
732 739 def _observedcall(self, name, *args, **kwargs):
733 740 # Call the original object.
734 741 orig = object.__getattribute__(self, '_orig')
735 742 res = getattr(orig, name)(*args, **kwargs)
736 743
737 744 # Call a method on the observer of the same name with arguments
738 745 # so it can react, log, etc.
739 746 observer = object.__getattribute__(self, '_observer')
740 747 fn = getattr(observer, name, None)
741 748 if fn:
742 749 fn(res, *args, **kwargs)
743 750
744 751 return res
745 752
746 753 def makefile(self, *args, **kwargs):
747 754 res = object.__getattribute__(self, '_observedcall')(
748 755 'makefile', *args, **kwargs
749 756 )
750 757
751 758 # The file object may be used for I/O. So we turn it into a
752 759 # proxy using our observer.
753 760 observer = object.__getattribute__(self, '_observer')
754 761 return makeloggingfileobject(
755 762 observer.fh,
756 763 res,
757 764 observer.name,
758 765 reads=observer.reads,
759 766 writes=observer.writes,
760 767 logdata=observer.logdata,
761 768 logdataapis=observer.logdataapis,
762 769 )
763 770
764 771 def recv(self, *args, **kwargs):
765 772 return object.__getattribute__(self, '_observedcall')(
766 773 'recv', *args, **kwargs
767 774 )
768 775
769 776 def recvfrom(self, *args, **kwargs):
770 777 return object.__getattribute__(self, '_observedcall')(
771 778 'recvfrom', *args, **kwargs
772 779 )
773 780
774 781 def recvfrom_into(self, *args, **kwargs):
775 782 return object.__getattribute__(self, '_observedcall')(
776 783 'recvfrom_into', *args, **kwargs
777 784 )
778 785
779 786 def recv_into(self, *args, **kwargs):
780 787 return object.__getattribute__(self, '_observedcall')(
781 788 'recv_info', *args, **kwargs
782 789 )
783 790
784 791 def send(self, *args, **kwargs):
785 792 return object.__getattribute__(self, '_observedcall')(
786 793 'send', *args, **kwargs
787 794 )
788 795
789 796 def sendall(self, *args, **kwargs):
790 797 return object.__getattribute__(self, '_observedcall')(
791 798 'sendall', *args, **kwargs
792 799 )
793 800
794 801 def sendto(self, *args, **kwargs):
795 802 return object.__getattribute__(self, '_observedcall')(
796 803 'sendto', *args, **kwargs
797 804 )
798 805
799 806 def setblocking(self, *args, **kwargs):
800 807 return object.__getattribute__(self, '_observedcall')(
801 808 'setblocking', *args, **kwargs
802 809 )
803 810
804 811 def settimeout(self, *args, **kwargs):
805 812 return object.__getattribute__(self, '_observedcall')(
806 813 'settimeout', *args, **kwargs
807 814 )
808 815
809 816 def gettimeout(self, *args, **kwargs):
810 817 return object.__getattribute__(self, '_observedcall')(
811 818 'gettimeout', *args, **kwargs
812 819 )
813 820
814 821 def setsockopt(self, *args, **kwargs):
815 822 return object.__getattribute__(self, '_observedcall')(
816 823 'setsockopt', *args, **kwargs
817 824 )
818 825
819 826
820 827 class baseproxyobserver:
821 828 def __init__(self, fh, name, logdata, logdataapis):
822 829 self.fh = fh
823 830 self.name = name
824 831 self.logdata = logdata
825 832 self.logdataapis = logdataapis
826 833
827 834 def _writedata(self, data):
828 835 if not self.logdata:
829 836 if self.logdataapis:
830 837 self.fh.write(b'\n')
831 838 self.fh.flush()
832 839 return
833 840
834 841 # Simple case writes all data on a single line.
835 842 if b'\n' not in data:
836 843 if self.logdataapis:
837 844 self.fh.write(b': %s\n' % stringutil.escapestr(data))
838 845 else:
839 846 self.fh.write(
840 847 b'%s> %s\n' % (self.name, stringutil.escapestr(data))
841 848 )
842 849 self.fh.flush()
843 850 return
844 851
845 852 # Data with newlines is written to multiple lines.
846 853 if self.logdataapis:
847 854 self.fh.write(b':\n')
848 855
849 856 lines = data.splitlines(True)
850 857 for line in lines:
851 858 self.fh.write(
852 859 b'%s> %s\n' % (self.name, stringutil.escapestr(line))
853 860 )
854 861 self.fh.flush()
855 862
856 863
857 864 class fileobjectobserver(baseproxyobserver):
858 865 """Logs file object activity."""
859 866
860 867 def __init__(
861 868 self, fh, name, reads=True, writes=True, logdata=False, logdataapis=True
862 869 ):
863 870 super(fileobjectobserver, self).__init__(fh, name, logdata, logdataapis)
864 871 self.reads = reads
865 872 self.writes = writes
866 873
867 874 def read(self, res, size=-1):
868 875 if not self.reads:
869 876 return
870 877 # Python 3 can return None from reads at EOF instead of empty strings.
871 878 if res is None:
872 879 res = b''
873 880
874 881 if size == -1 and res == b'':
875 882 # Suppress pointless read(-1) calls that return
876 883 # nothing. These happen _a lot_ on Python 3, and there
877 884 # doesn't seem to be a better workaround to have matching
878 885 # Python 2 and 3 behavior. :(
879 886 return
880 887
881 888 if self.logdataapis:
882 889 self.fh.write(b'%s> read(%d) -> %d' % (self.name, size, len(res)))
883 890
884 891 self._writedata(res)
885 892
886 893 def readline(self, res, limit=-1):
887 894 if not self.reads:
888 895 return
889 896
890 897 if self.logdataapis:
891 898 self.fh.write(b'%s> readline() -> %d' % (self.name, len(res)))
892 899
893 900 self._writedata(res)
894 901
895 902 def readinto(self, res, dest):
896 903 if not self.reads:
897 904 return
898 905
899 906 if self.logdataapis:
900 907 self.fh.write(
901 908 b'%s> readinto(%d) -> %r' % (self.name, len(dest), res)
902 909 )
903 910
904 911 data = dest[0:res] if res is not None else b''
905 912
906 913 # _writedata() uses "in" operator and is confused by memoryview because
907 914 # characters are ints on Python 3.
908 915 if isinstance(data, memoryview):
909 916 data = data.tobytes()
910 917
911 918 self._writedata(data)
912 919
913 920 def write(self, res, data):
914 921 if not self.writes:
915 922 return
916 923
917 924 # Python 2 returns None from some write() calls. Python 3 (reasonably)
918 925 # returns the integer bytes written.
919 926 if res is None and data:
920 927 res = len(data)
921 928
922 929 if self.logdataapis:
923 930 self.fh.write(b'%s> write(%d) -> %r' % (self.name, len(data), res))
924 931
925 932 self._writedata(data)
926 933
927 934 def flush(self, res):
928 935 if not self.writes:
929 936 return
930 937
931 938 self.fh.write(b'%s> flush() -> %r\n' % (self.name, res))
932 939
933 940 # For observedbufferedinputpipe.
934 941 def bufferedread(self, res, size):
935 942 if not self.reads:
936 943 return
937 944
938 945 if self.logdataapis:
939 946 self.fh.write(
940 947 b'%s> bufferedread(%d) -> %d' % (self.name, size, len(res))
941 948 )
942 949
943 950 self._writedata(res)
944 951
945 952 def bufferedreadline(self, res):
946 953 if not self.reads:
947 954 return
948 955
949 956 if self.logdataapis:
950 957 self.fh.write(
951 958 b'%s> bufferedreadline() -> %d' % (self.name, len(res))
952 959 )
953 960
954 961 self._writedata(res)
955 962
956 963
957 964 def makeloggingfileobject(
958 965 logh, fh, name, reads=True, writes=True, logdata=False, logdataapis=True
959 966 ):
960 967 """Turn a file object into a logging file object."""
961 968
962 969 observer = fileobjectobserver(
963 970 logh,
964 971 name,
965 972 reads=reads,
966 973 writes=writes,
967 974 logdata=logdata,
968 975 logdataapis=logdataapis,
969 976 )
970 977 return fileobjectproxy(fh, observer)
971 978
972 979
973 980 class socketobserver(baseproxyobserver):
974 981 """Logs socket activity."""
975 982
976 983 def __init__(
977 984 self,
978 985 fh,
979 986 name,
980 987 reads=True,
981 988 writes=True,
982 989 states=True,
983 990 logdata=False,
984 991 logdataapis=True,
985 992 ):
986 993 super(socketobserver, self).__init__(fh, name, logdata, logdataapis)
987 994 self.reads = reads
988 995 self.writes = writes
989 996 self.states = states
990 997
991 998 def makefile(self, res, mode=None, bufsize=None):
992 999 if not self.states:
993 1000 return
994 1001
995 1002 self.fh.write(b'%s> makefile(%r, %r)\n' % (self.name, mode, bufsize))
996 1003
997 1004 def recv(self, res, size, flags=0):
998 1005 if not self.reads:
999 1006 return
1000 1007
1001 1008 if self.logdataapis:
1002 1009 self.fh.write(
1003 1010 b'%s> recv(%d, %d) -> %d' % (self.name, size, flags, len(res))
1004 1011 )
1005 1012 self._writedata(res)
1006 1013
1007 1014 def recvfrom(self, res, size, flags=0):
1008 1015 if not self.reads:
1009 1016 return
1010 1017
1011 1018 if self.logdataapis:
1012 1019 self.fh.write(
1013 1020 b'%s> recvfrom(%d, %d) -> %d'
1014 1021 % (self.name, size, flags, len(res[0]))
1015 1022 )
1016 1023
1017 1024 self._writedata(res[0])
1018 1025
1019 1026 def recvfrom_into(self, res, buf, size, flags=0):
1020 1027 if not self.reads:
1021 1028 return
1022 1029
1023 1030 if self.logdataapis:
1024 1031 self.fh.write(
1025 1032 b'%s> recvfrom_into(%d, %d) -> %d'
1026 1033 % (self.name, size, flags, res[0])
1027 1034 )
1028 1035
1029 1036 self._writedata(buf[0 : res[0]])
1030 1037
1031 1038 def recv_into(self, res, buf, size=0, flags=0):
1032 1039 if not self.reads:
1033 1040 return
1034 1041
1035 1042 if self.logdataapis:
1036 1043 self.fh.write(
1037 1044 b'%s> recv_into(%d, %d) -> %d' % (self.name, size, flags, res)
1038 1045 )
1039 1046
1040 1047 self._writedata(buf[0:res])
1041 1048
1042 1049 def send(self, res, data, flags=0):
1043 1050 if not self.writes:
1044 1051 return
1045 1052
1046 1053 self.fh.write(
1047 1054 b'%s> send(%d, %d) -> %d' % (self.name, len(data), flags, len(res))
1048 1055 )
1049 1056 self._writedata(data)
1050 1057
1051 1058 def sendall(self, res, data, flags=0):
1052 1059 if not self.writes:
1053 1060 return
1054 1061
1055 1062 if self.logdataapis:
1056 1063 # Returns None on success. So don't bother reporting return value.
1057 1064 self.fh.write(
1058 1065 b'%s> sendall(%d, %d)' % (self.name, len(data), flags)
1059 1066 )
1060 1067
1061 1068 self._writedata(data)
1062 1069
1063 1070 def sendto(self, res, data, flagsoraddress, address=None):
1064 1071 if not self.writes:
1065 1072 return
1066 1073
1067 1074 if address:
1068 1075 flags = flagsoraddress
1069 1076 else:
1070 1077 flags = 0
1071 1078
1072 1079 if self.logdataapis:
1073 1080 self.fh.write(
1074 1081 b'%s> sendto(%d, %d, %r) -> %d'
1075 1082 % (self.name, len(data), flags, address, res)
1076 1083 )
1077 1084
1078 1085 self._writedata(data)
1079 1086
1080 1087 def setblocking(self, res, flag):
1081 1088 if not self.states:
1082 1089 return
1083 1090
1084 1091 self.fh.write(b'%s> setblocking(%r)\n' % (self.name, flag))
1085 1092
1086 1093 def settimeout(self, res, value):
1087 1094 if not self.states:
1088 1095 return
1089 1096
1090 1097 self.fh.write(b'%s> settimeout(%r)\n' % (self.name, value))
1091 1098
1092 1099 def gettimeout(self, res):
1093 1100 if not self.states:
1094 1101 return
1095 1102
1096 1103 self.fh.write(b'%s> gettimeout() -> %f\n' % (self.name, res))
1097 1104
1098 1105 def setsockopt(self, res, level, optname, value):
1099 1106 if not self.states:
1100 1107 return
1101 1108
1102 1109 self.fh.write(
1103 1110 b'%s> setsockopt(%r, %r, %r) -> %r\n'
1104 1111 % (self.name, level, optname, value, res)
1105 1112 )
1106 1113
1107 1114
1108 1115 def makeloggingsocket(
1109 1116 logh,
1110 1117 fh,
1111 1118 name,
1112 1119 reads=True,
1113 1120 writes=True,
1114 1121 states=True,
1115 1122 logdata=False,
1116 1123 logdataapis=True,
1117 1124 ):
1118 1125 """Turn a socket into a logging socket."""
1119 1126
1120 1127 observer = socketobserver(
1121 1128 logh,
1122 1129 name,
1123 1130 reads=reads,
1124 1131 writes=writes,
1125 1132 states=states,
1126 1133 logdata=logdata,
1127 1134 logdataapis=logdataapis,
1128 1135 )
1129 1136 return socketproxy(fh, observer)
1130 1137
1131 1138
1132 1139 def version():
1133 1140 """Return version information if available."""
1134 1141 try:
1135 1142 from . import __version__
1136 1143
1137 1144 return __version__.version
1138 1145 except ImportError:
1139 1146 return b'unknown'
1140 1147
1141 1148
1142 1149 def versiontuple(v=None, n=4):
1143 1150 """Parses a Mercurial version string into an N-tuple.
1144 1151
1145 1152 The version string to be parsed is specified with the ``v`` argument.
1146 1153 If it isn't defined, the current Mercurial version string will be parsed.
1147 1154
1148 1155 ``n`` can be 2, 3, or 4. Here is how some version strings map to
1149 1156 returned values:
1150 1157
1151 1158 >>> v = b'3.6.1+190-df9b73d2d444'
1152 1159 >>> versiontuple(v, 2)
1153 1160 (3, 6)
1154 1161 >>> versiontuple(v, 3)
1155 1162 (3, 6, 1)
1156 1163 >>> versiontuple(v, 4)
1157 1164 (3, 6, 1, '190-df9b73d2d444')
1158 1165
1159 1166 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1160 1167 (3, 6, 1, '190-df9b73d2d444+20151118')
1161 1168
1162 1169 >>> v = b'3.6'
1163 1170 >>> versiontuple(v, 2)
1164 1171 (3, 6)
1165 1172 >>> versiontuple(v, 3)
1166 1173 (3, 6, None)
1167 1174 >>> versiontuple(v, 4)
1168 1175 (3, 6, None, None)
1169 1176
1170 1177 >>> v = b'3.9-rc'
1171 1178 >>> versiontuple(v, 2)
1172 1179 (3, 9)
1173 1180 >>> versiontuple(v, 3)
1174 1181 (3, 9, None)
1175 1182 >>> versiontuple(v, 4)
1176 1183 (3, 9, None, 'rc')
1177 1184
1178 1185 >>> v = b'3.9-rc+2-02a8fea4289b'
1179 1186 >>> versiontuple(v, 2)
1180 1187 (3, 9)
1181 1188 >>> versiontuple(v, 3)
1182 1189 (3, 9, None)
1183 1190 >>> versiontuple(v, 4)
1184 1191 (3, 9, None, 'rc+2-02a8fea4289b')
1185 1192
1186 1193 >>> versiontuple(b'4.6rc0')
1187 1194 (4, 6, None, 'rc0')
1188 1195 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1189 1196 (4, 6, None, 'rc0+12-425d55e54f98')
1190 1197 >>> versiontuple(b'.1.2.3')
1191 1198 (None, None, None, '.1.2.3')
1192 1199 >>> versiontuple(b'12.34..5')
1193 1200 (12, 34, None, '..5')
1194 1201 >>> versiontuple(b'1.2.3.4.5.6')
1195 1202 (1, 2, 3, '.4.5.6')
1196 1203 """
1197 1204 if not v:
1198 1205 v = version()
1199 1206 m = remod.match(br'(\d+(?:\.\d+){,2})[+-]?(.*)', v)
1200 1207 if not m:
1201 1208 vparts, extra = b'', v
1202 1209 elif m.group(2):
1203 1210 vparts, extra = m.groups()
1204 1211 else:
1205 1212 vparts, extra = m.group(1), None
1206 1213
1207 1214 assert vparts is not None # help pytype
1208 1215
1209 1216 vints = []
1210 1217 for i in vparts.split(b'.'):
1211 1218 try:
1212 1219 vints.append(int(i))
1213 1220 except ValueError:
1214 1221 break
1215 1222 # (3, 6) -> (3, 6, None)
1216 1223 while len(vints) < 3:
1217 1224 vints.append(None)
1218 1225
1219 1226 if n == 2:
1220 1227 return (vints[0], vints[1])
1221 1228 if n == 3:
1222 1229 return (vints[0], vints[1], vints[2])
1223 1230 if n == 4:
1224 1231 return (vints[0], vints[1], vints[2], extra)
1225 1232
1226 1233 raise error.ProgrammingError(b"invalid version part request: %d" % n)
1227 1234
1228 1235
1229 1236 def cachefunc(func):
1230 1237 '''cache the result of function calls'''
1231 1238 # XXX doesn't handle keywords args
1232 1239 if func.__code__.co_argcount == 0:
1233 1240 listcache = []
1234 1241
1235 1242 def f():
1236 1243 if len(listcache) == 0:
1237 1244 listcache.append(func())
1238 1245 return listcache[0]
1239 1246
1240 1247 return f
1241 1248 cache = {}
1242 1249 if func.__code__.co_argcount == 1:
1243 1250 # we gain a small amount of time because
1244 1251 # we don't need to pack/unpack the list
1245 1252 def f(arg):
1246 1253 if arg not in cache:
1247 1254 cache[arg] = func(arg)
1248 1255 return cache[arg]
1249 1256
1250 1257 else:
1251 1258
1252 1259 def f(*args):
1253 1260 if args not in cache:
1254 1261 cache[args] = func(*args)
1255 1262 return cache[args]
1256 1263
1257 1264 return f
1258 1265
1259 1266
1260 1267 class cow:
1261 1268 """helper class to make copy-on-write easier
1262 1269
1263 1270 Call preparewrite before doing any writes.
1264 1271 """
1265 1272
1266 1273 def preparewrite(self):
1267 1274 """call this before writes, return self or a copied new object"""
1268 1275 if getattr(self, '_copied', 0):
1269 1276 self._copied -= 1
1270 1277 # Function cow.__init__ expects 1 arg(s), got 2 [wrong-arg-count]
1271 1278 return self.__class__(self) # pytype: disable=wrong-arg-count
1272 1279 return self
1273 1280
1274 1281 def copy(self):
1275 1282 """always do a cheap copy"""
1276 1283 self._copied = getattr(self, '_copied', 0) + 1
1277 1284 return self
1278 1285
1279 1286
1280 1287 class sortdict(collections.OrderedDict):
1281 1288 """a simple sorted dictionary
1282 1289
1283 1290 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1284 1291 >>> d2 = d1.copy()
1285 1292 >>> list(d2.items())
1286 1293 [('a', 0), ('b', 1)]
1287 1294 >>> d2.update([(b'a', 2)])
1288 1295 >>> list(d2.keys()) # should still be in last-set order
1289 1296 ['b', 'a']
1290 1297 >>> d1.insert(1, b'a.5', 0.5)
1291 1298 >>> list(d1.items())
1292 1299 [('a', 0), ('a.5', 0.5), ('b', 1)]
1293 1300 """
1294 1301
1295 1302 def __setitem__(self, key, value):
1296 1303 if key in self:
1297 1304 del self[key]
1298 1305 super(sortdict, self).__setitem__(key, value)
1299 1306
1300 1307 if pycompat.ispypy:
1301 1308 # __setitem__() isn't called as of PyPy 5.8.0
1302 1309 def update(self, src, **f):
1303 1310 if isinstance(src, dict):
1304 1311 src = src.items()
1305 1312 for k, v in src:
1306 1313 self[k] = v
1307 1314 for k in f:
1308 1315 self[k] = f[k]
1309 1316
1310 1317 def insert(self, position, key, value):
1311 1318 for (i, (k, v)) in enumerate(list(self.items())):
1312 1319 if i == position:
1313 1320 self[key] = value
1314 1321 if i >= position:
1315 1322 del self[k]
1316 1323 self[k] = v
1317 1324
1318 1325
1319 1326 class cowdict(cow, dict):
1320 1327 """copy-on-write dict
1321 1328
1322 1329 Be sure to call d = d.preparewrite() before writing to d.
1323 1330
1324 1331 >>> a = cowdict()
1325 1332 >>> a is a.preparewrite()
1326 1333 True
1327 1334 >>> b = a.copy()
1328 1335 >>> b is a
1329 1336 True
1330 1337 >>> c = b.copy()
1331 1338 >>> c is a
1332 1339 True
1333 1340 >>> a = a.preparewrite()
1334 1341 >>> b is a
1335 1342 False
1336 1343 >>> a is a.preparewrite()
1337 1344 True
1338 1345 >>> c = c.preparewrite()
1339 1346 >>> b is c
1340 1347 False
1341 1348 >>> b is b.preparewrite()
1342 1349 True
1343 1350 """
1344 1351
1345 1352
1346 1353 class cowsortdict(cow, sortdict):
1347 1354 """copy-on-write sortdict
1348 1355
1349 1356 Be sure to call d = d.preparewrite() before writing to d.
1350 1357 """
1351 1358
1352 1359
1353 1360 class transactional: # pytype: disable=ignored-metaclass
1354 1361 """Base class for making a transactional type into a context manager."""
1355 1362
1356 1363 __metaclass__ = abc.ABCMeta
1357 1364
1358 1365 @abc.abstractmethod
1359 1366 def close(self):
1360 1367 """Successfully closes the transaction."""
1361 1368
1362 1369 @abc.abstractmethod
1363 1370 def release(self):
1364 1371 """Marks the end of the transaction.
1365 1372
1366 1373 If the transaction has not been closed, it will be aborted.
1367 1374 """
1368 1375
1369 1376 def __enter__(self):
1370 1377 return self
1371 1378
1372 1379 def __exit__(self, exc_type, exc_val, exc_tb):
1373 1380 try:
1374 1381 if exc_type is None:
1375 1382 self.close()
1376 1383 finally:
1377 1384 self.release()
1378 1385
1379 1386
1380 1387 @contextlib.contextmanager
1381 1388 def acceptintervention(tr=None):
1382 1389 """A context manager that closes the transaction on InterventionRequired
1383 1390
1384 1391 If no transaction was provided, this simply runs the body and returns
1385 1392 """
1386 1393 if not tr:
1387 1394 yield
1388 1395 return
1389 1396 try:
1390 1397 yield
1391 1398 tr.close()
1392 1399 except error.InterventionRequired:
1393 1400 tr.close()
1394 1401 raise
1395 1402 finally:
1396 1403 tr.release()
1397 1404
1398 1405
1399 1406 @contextlib.contextmanager
1400 1407 def nullcontextmanager(enter_result=None):
1401 1408 yield enter_result
1402 1409
1403 1410
1404 1411 class _lrucachenode:
1405 1412 """A node in a doubly linked list.
1406 1413
1407 1414 Holds a reference to nodes on either side as well as a key-value
1408 1415 pair for the dictionary entry.
1409 1416 """
1410 1417
1411 1418 __slots__ = ('next', 'prev', 'key', 'value', 'cost')
1412 1419
1413 1420 def __init__(self):
1414 1421 self.next = self
1415 1422 self.prev = self
1416 1423
1417 1424 self.key = _notset
1418 1425 self.value = None
1419 1426 self.cost = 0
1420 1427
1421 1428 def markempty(self):
1422 1429 """Mark the node as emptied."""
1423 1430 self.key = _notset
1424 1431 self.value = None
1425 1432 self.cost = 0
1426 1433
1427 1434
1428 1435 class lrucachedict:
1429 1436 """Dict that caches most recent accesses and sets.
1430 1437
1431 1438 The dict consists of an actual backing dict - indexed by original
1432 1439 key - and a doubly linked circular list defining the order of entries in
1433 1440 the cache.
1434 1441
1435 1442 The head node is the newest entry in the cache. If the cache is full,
1436 1443 we recycle head.prev and make it the new head. Cache accesses result in
1437 1444 the node being moved to before the existing head and being marked as the
1438 1445 new head node.
1439 1446
1440 1447 Items in the cache can be inserted with an optional "cost" value. This is
1441 1448 simply an integer that is specified by the caller. The cache can be queried
1442 1449 for the total cost of all items presently in the cache.
1443 1450
1444 1451 The cache can also define a maximum cost. If a cache insertion would
1445 1452 cause the total cost of the cache to go beyond the maximum cost limit,
1446 1453 nodes will be evicted to make room for the new code. This can be used
1447 1454 to e.g. set a max memory limit and associate an estimated bytes size
1448 1455 cost to each item in the cache. By default, no maximum cost is enforced.
1449 1456 """
1450 1457
1451 1458 def __init__(self, max, maxcost=0):
1452 1459 self._cache = {}
1453 1460
1454 1461 self._head = _lrucachenode()
1455 1462 self._size = 1
1456 1463 self.capacity = max
1457 1464 self.totalcost = 0
1458 1465 self.maxcost = maxcost
1459 1466
1460 1467 def __len__(self):
1461 1468 return len(self._cache)
1462 1469
1463 1470 def __contains__(self, k):
1464 1471 return k in self._cache
1465 1472
1466 1473 def __iter__(self):
1467 1474 # We don't have to iterate in cache order, but why not.
1468 1475 n = self._head
1469 1476 for i in range(len(self._cache)):
1470 1477 yield n.key
1471 1478 n = n.next
1472 1479
1473 1480 def __getitem__(self, k):
1474 1481 node = self._cache[k]
1475 1482 self._movetohead(node)
1476 1483 return node.value
1477 1484
1478 1485 def insert(self, k, v, cost=0):
1479 1486 """Insert a new item in the cache with optional cost value."""
1480 1487 node = self._cache.get(k)
1481 1488 # Replace existing value and mark as newest.
1482 1489 if node is not None:
1483 1490 self.totalcost -= node.cost
1484 1491 node.value = v
1485 1492 node.cost = cost
1486 1493 self.totalcost += cost
1487 1494 self._movetohead(node)
1488 1495
1489 1496 if self.maxcost:
1490 1497 self._enforcecostlimit()
1491 1498
1492 1499 return
1493 1500
1494 1501 if self._size < self.capacity:
1495 1502 node = self._addcapacity()
1496 1503 else:
1497 1504 # Grab the last/oldest item.
1498 1505 node = self._head.prev
1499 1506
1500 1507 # At capacity. Kill the old entry.
1501 1508 if node.key is not _notset:
1502 1509 self.totalcost -= node.cost
1503 1510 del self._cache[node.key]
1504 1511
1505 1512 node.key = k
1506 1513 node.value = v
1507 1514 node.cost = cost
1508 1515 self.totalcost += cost
1509 1516 self._cache[k] = node
1510 1517 # And mark it as newest entry. No need to adjust order since it
1511 1518 # is already self._head.prev.
1512 1519 self._head = node
1513 1520
1514 1521 if self.maxcost:
1515 1522 self._enforcecostlimit()
1516 1523
1517 1524 def __setitem__(self, k, v):
1518 1525 self.insert(k, v)
1519 1526
1520 1527 def __delitem__(self, k):
1521 1528 self.pop(k)
1522 1529
1523 1530 def pop(self, k, default=_notset):
1524 1531 try:
1525 1532 node = self._cache.pop(k)
1526 1533 except KeyError:
1527 1534 if default is _notset:
1528 1535 raise
1529 1536 return default
1530 1537
1531 1538 value = node.value
1532 1539 self.totalcost -= node.cost
1533 1540 node.markempty()
1534 1541
1535 1542 # Temporarily mark as newest item before re-adjusting head to make
1536 1543 # this node the oldest item.
1537 1544 self._movetohead(node)
1538 1545 self._head = node.next
1539 1546
1540 1547 return value
1541 1548
1542 1549 # Additional dict methods.
1543 1550
1544 1551 def get(self, k, default=None):
1545 1552 try:
1546 1553 return self.__getitem__(k)
1547 1554 except KeyError:
1548 1555 return default
1549 1556
1550 1557 def peek(self, k, default=_notset):
1551 1558 """Get the specified item without moving it to the head
1552 1559
1553 1560 Unlike get(), this doesn't mutate the internal state. But be aware
1554 1561 that it doesn't mean peek() is thread safe.
1555 1562 """
1556 1563 try:
1557 1564 node = self._cache[k]
1558 1565 return node.value
1559 1566 except KeyError:
1560 1567 if default is _notset:
1561 1568 raise
1562 1569 return default
1563 1570
1564 1571 def clear(self):
1565 1572 n = self._head
1566 1573 while n.key is not _notset:
1567 1574 self.totalcost -= n.cost
1568 1575 n.markempty()
1569 1576 n = n.next
1570 1577
1571 1578 self._cache.clear()
1572 1579
1573 1580 def copy(self, capacity=None, maxcost=0):
1574 1581 """Create a new cache as a copy of the current one.
1575 1582
1576 1583 By default, the new cache has the same capacity as the existing one.
1577 1584 But, the cache capacity can be changed as part of performing the
1578 1585 copy.
1579 1586
1580 1587 Items in the copy have an insertion/access order matching this
1581 1588 instance.
1582 1589 """
1583 1590
1584 1591 capacity = capacity or self.capacity
1585 1592 maxcost = maxcost or self.maxcost
1586 1593 result = lrucachedict(capacity, maxcost=maxcost)
1587 1594
1588 1595 # We copy entries by iterating in oldest-to-newest order so the copy
1589 1596 # has the correct ordering.
1590 1597
1591 1598 # Find the first non-empty entry.
1592 1599 n = self._head.prev
1593 1600 while n.key is _notset and n is not self._head:
1594 1601 n = n.prev
1595 1602
1596 1603 # We could potentially skip the first N items when decreasing capacity.
1597 1604 # But let's keep it simple unless it is a performance problem.
1598 1605 for i in range(len(self._cache)):
1599 1606 result.insert(n.key, n.value, cost=n.cost)
1600 1607 n = n.prev
1601 1608
1602 1609 return result
1603 1610
1604 1611 def popoldest(self):
1605 1612 """Remove the oldest item from the cache.
1606 1613
1607 1614 Returns the (key, value) describing the removed cache entry.
1608 1615 """
1609 1616 if not self._cache:
1610 1617 return
1611 1618
1612 1619 # Walk the linked list backwards starting at tail node until we hit
1613 1620 # a non-empty node.
1614 1621 n = self._head.prev
1615 1622
1616 1623 while n.key is _notset:
1617 1624 n = n.prev
1618 1625
1619 1626 key, value = n.key, n.value
1620 1627
1621 1628 # And remove it from the cache and mark it as empty.
1622 1629 del self._cache[n.key]
1623 1630 self.totalcost -= n.cost
1624 1631 n.markempty()
1625 1632
1626 1633 return key, value
1627 1634
1628 1635 def _movetohead(self, node: _lrucachenode):
1629 1636 """Mark a node as the newest, making it the new head.
1630 1637
1631 1638 When a node is accessed, it becomes the freshest entry in the LRU
1632 1639 list, which is denoted by self._head.
1633 1640
1634 1641 Visually, let's make ``N`` the new head node (* denotes head):
1635 1642
1636 1643 previous/oldest <-> head <-> next/next newest
1637 1644
1638 1645 ----<->--- A* ---<->-----
1639 1646 | |
1640 1647 E <-> D <-> N <-> C <-> B
1641 1648
1642 1649 To:
1643 1650
1644 1651 ----<->--- N* ---<->-----
1645 1652 | |
1646 1653 E <-> D <-> C <-> B <-> A
1647 1654
1648 1655 This requires the following moves:
1649 1656
1650 1657 C.next = D (node.prev.next = node.next)
1651 1658 D.prev = C (node.next.prev = node.prev)
1652 1659 E.next = N (head.prev.next = node)
1653 1660 N.prev = E (node.prev = head.prev)
1654 1661 N.next = A (node.next = head)
1655 1662 A.prev = N (head.prev = node)
1656 1663 """
1657 1664 head = self._head
1658 1665 # C.next = D
1659 1666 node.prev.next = node.next
1660 1667 # D.prev = C
1661 1668 node.next.prev = node.prev
1662 1669 # N.prev = E
1663 1670 node.prev = head.prev
1664 1671 # N.next = A
1665 1672 # It is tempting to do just "head" here, however if node is
1666 1673 # adjacent to head, this will do bad things.
1667 1674 node.next = head.prev.next
1668 1675 # E.next = N
1669 1676 node.next.prev = node
1670 1677 # A.prev = N
1671 1678 node.prev.next = node
1672 1679
1673 1680 self._head = node
1674 1681
1675 1682 def _addcapacity(self) -> _lrucachenode:
1676 1683 """Add a node to the circular linked list.
1677 1684
1678 1685 The new node is inserted before the head node.
1679 1686 """
1680 1687 head = self._head
1681 1688 node = _lrucachenode()
1682 1689 head.prev.next = node
1683 1690 node.prev = head.prev
1684 1691 node.next = head
1685 1692 head.prev = node
1686 1693 self._size += 1
1687 1694 return node
1688 1695
1689 1696 def _enforcecostlimit(self):
1690 1697 # This should run after an insertion. It should only be called if total
1691 1698 # cost limits are being enforced.
1692 1699 # The most recently inserted node is never evicted.
1693 1700 if len(self) <= 1 or self.totalcost <= self.maxcost:
1694 1701 return
1695 1702
1696 1703 # This is logically equivalent to calling popoldest() until we
1697 1704 # free up enough cost. We don't do that since popoldest() needs
1698 1705 # to walk the linked list and doing this in a loop would be
1699 1706 # quadratic. So we find the first non-empty node and then
1700 1707 # walk nodes until we free up enough capacity.
1701 1708 #
1702 1709 # If we only removed the minimum number of nodes to free enough
1703 1710 # cost at insert time, chances are high that the next insert would
1704 1711 # also require pruning. This would effectively constitute quadratic
1705 1712 # behavior for insert-heavy workloads. To mitigate this, we set a
1706 1713 # target cost that is a percentage of the max cost. This will tend
1707 1714 # to free more nodes when the high water mark is reached, which
1708 1715 # lowers the chances of needing to prune on the subsequent insert.
1709 1716 targetcost = int(self.maxcost * 0.75)
1710 1717
1711 1718 n = self._head.prev
1712 1719 while n.key is _notset:
1713 1720 n = n.prev
1714 1721
1715 1722 while len(self) > 1 and self.totalcost > targetcost:
1716 1723 del self._cache[n.key]
1717 1724 self.totalcost -= n.cost
1718 1725 n.markempty()
1719 1726 n = n.prev
1720 1727
1721 1728
1722 1729 def lrucachefunc(func):
1723 1730 '''cache most recent results of function calls'''
1724 1731 cache = {}
1725 1732 order = collections.deque()
1726 1733 if func.__code__.co_argcount == 1:
1727 1734
1728 1735 def f(arg):
1729 1736 if arg not in cache:
1730 1737 if len(cache) > 20:
1731 1738 del cache[order.popleft()]
1732 1739 cache[arg] = func(arg)
1733 1740 else:
1734 1741 order.remove(arg)
1735 1742 order.append(arg)
1736 1743 return cache[arg]
1737 1744
1738 1745 else:
1739 1746
1740 1747 def f(*args):
1741 1748 if args not in cache:
1742 1749 if len(cache) > 20:
1743 1750 del cache[order.popleft()]
1744 1751 cache[args] = func(*args)
1745 1752 else:
1746 1753 order.remove(args)
1747 1754 order.append(args)
1748 1755 return cache[args]
1749 1756
1750 1757 return f
1751 1758
1752 1759
1753 1760 class propertycache:
1754 1761 def __init__(self, func):
1755 1762 self.func = func
1756 1763 self.name = func.__name__
1757 1764
1758 1765 def __get__(self, obj, type=None):
1759 1766 result = self.func(obj)
1760 1767 self.cachevalue(obj, result)
1761 1768 return result
1762 1769
1763 1770 def cachevalue(self, obj, value):
1764 1771 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1765 1772 obj.__dict__[self.name] = value
1766 1773
1767 1774
1768 1775 def clearcachedproperty(obj, prop):
1769 1776 '''clear a cached property value, if one has been set'''
1770 1777 prop = pycompat.sysstr(prop)
1771 1778 if prop in obj.__dict__:
1772 1779 del obj.__dict__[prop]
1773 1780
1774 1781
1775 1782 def increasingchunks(source, min=1024, max=65536):
1776 1783 """return no less than min bytes per chunk while data remains,
1777 1784 doubling min after each chunk until it reaches max"""
1778 1785
1779 1786 def log2(x):
1780 1787 if not x:
1781 1788 return 0
1782 1789 i = 0
1783 1790 while x:
1784 1791 x >>= 1
1785 1792 i += 1
1786 1793 return i - 1
1787 1794
1788 1795 buf = []
1789 1796 blen = 0
1790 1797 for chunk in source:
1791 1798 buf.append(chunk)
1792 1799 blen += len(chunk)
1793 1800 if blen >= min:
1794 1801 if min < max:
1795 1802 min = min << 1
1796 1803 nmin = 1 << log2(blen)
1797 1804 if nmin > min:
1798 1805 min = nmin
1799 1806 if min > max:
1800 1807 min = max
1801 1808 yield b''.join(buf)
1802 1809 blen = 0
1803 1810 buf = []
1804 1811 if buf:
1805 1812 yield b''.join(buf)
1806 1813
1807 1814
1808 1815 def always(fn):
1809 1816 return True
1810 1817
1811 1818
1812 1819 def never(fn):
1813 1820 return False
1814 1821
1815 1822
1816 1823 def nogc(func=None) -> Any:
1817 1824 """disable garbage collector
1818 1825
1819 1826 Python's garbage collector triggers a GC each time a certain number of
1820 1827 container objects (the number being defined by gc.get_threshold()) are
1821 1828 allocated even when marked not to be tracked by the collector. Tracking has
1822 1829 no effect on when GCs are triggered, only on what objects the GC looks
1823 1830 into. As a workaround, disable GC while building complex (huge)
1824 1831 containers.
1825 1832
1826 1833 This garbage collector issue have been fixed in 2.7. But it still affect
1827 1834 CPython's performance.
1828 1835 """
1829 1836 if func is None:
1830 1837 return _nogc_context()
1831 1838 else:
1832 1839 return _nogc_decorator(func)
1833 1840
1834 1841
1835 1842 @contextlib.contextmanager
1836 1843 def _nogc_context():
1837 1844 gcenabled = gc.isenabled()
1838 1845 gc.disable()
1839 1846 try:
1840 1847 yield
1841 1848 finally:
1842 1849 if gcenabled:
1843 1850 gc.enable()
1844 1851
1845 1852
1846 1853 def _nogc_decorator(func):
1847 1854 def wrapper(*args, **kwargs):
1848 1855 with _nogc_context():
1849 1856 return func(*args, **kwargs)
1850 1857
1851 1858 return wrapper
1852 1859
1853 1860
1854 1861 if pycompat.ispypy:
1855 1862 # PyPy runs slower with gc disabled
1856 1863 nogc = lambda x: x
1857 1864
1858 1865
1859 1866 def pathto(root: bytes, n1: bytes, n2: bytes) -> bytes:
1860 1867 """return the relative path from one place to another.
1861 1868 root should use os.sep to separate directories
1862 1869 n1 should use os.sep to separate directories
1863 1870 n2 should use "/" to separate directories
1864 1871 returns an os.sep-separated path.
1865 1872
1866 1873 If n1 is a relative path, it's assumed it's
1867 1874 relative to root.
1868 1875 n2 should always be relative to root.
1869 1876 """
1870 1877 if not n1:
1871 1878 return localpath(n2)
1872 1879 if os.path.isabs(n1):
1873 1880 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1874 1881 return os.path.join(root, localpath(n2))
1875 1882 n2 = b'/'.join((pconvert(root), n2))
1876 1883 a, b = splitpath(n1), n2.split(b'/')
1877 1884 a.reverse()
1878 1885 b.reverse()
1879 1886 while a and b and a[-1] == b[-1]:
1880 1887 a.pop()
1881 1888 b.pop()
1882 1889 b.reverse()
1883 1890 return pycompat.ossep.join(([b'..'] * len(a)) + b) or b'.'
1884 1891
1885 1892
1886 1893 def checksignature(func, depth=1):
1887 1894 '''wrap a function with code to check for calling errors'''
1888 1895
1889 1896 def check(*args, **kwargs):
1890 1897 try:
1891 1898 return func(*args, **kwargs)
1892 1899 except TypeError:
1893 1900 if len(traceback.extract_tb(sys.exc_info()[2])) == depth:
1894 1901 raise error.SignatureError
1895 1902 raise
1896 1903
1897 1904 return check
1898 1905
1899 1906
1900 1907 # a whilelist of known filesystems where hardlink works reliably
1901 1908 _hardlinkfswhitelist = {
1902 1909 b'apfs',
1903 1910 b'btrfs',
1904 1911 b'ext2',
1905 1912 b'ext3',
1906 1913 b'ext4',
1907 1914 b'hfs',
1908 1915 b'jfs',
1909 1916 b'NTFS',
1910 1917 b'reiserfs',
1911 1918 b'tmpfs',
1912 1919 b'ufs',
1913 1920 b'xfs',
1914 1921 b'zfs',
1915 1922 }
1916 1923
1917 1924
1918 1925 def copyfile(
1919 1926 src,
1920 1927 dest,
1921 1928 hardlink=False,
1922 1929 copystat=False,
1923 1930 checkambig=False,
1924 1931 nb_bytes=None,
1925 1932 no_hardlink_cb=None,
1926 1933 check_fs_hardlink=True,
1927 1934 ):
1928 1935 """copy a file, preserving mode and optionally other stat info like
1929 1936 atime/mtime
1930 1937
1931 1938 checkambig argument is used with filestat, and is useful only if
1932 1939 destination file is guarded by any lock (e.g. repo.lock or
1933 1940 repo.wlock).
1934 1941
1935 1942 copystat and checkambig should be exclusive.
1936 1943
1937 1944 nb_bytes: if set only copy the first `nb_bytes` of the source file.
1938 1945 """
1939 1946 assert not (copystat and checkambig)
1940 1947 oldstat = None
1941 1948 if os.path.lexists(dest):
1942 1949 if checkambig:
1943 1950 oldstat = checkambig and filestat.frompath(dest)
1944 1951 unlink(dest)
1945 1952 if hardlink and check_fs_hardlink:
1946 1953 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1947 1954 # unless we are confident that dest is on a whitelisted filesystem.
1948 1955 try:
1949 1956 fstype = getfstype(os.path.dirname(dest))
1950 1957 except OSError:
1951 1958 fstype = None
1952 1959 if fstype not in _hardlinkfswhitelist:
1953 1960 if no_hardlink_cb is not None:
1954 1961 no_hardlink_cb()
1955 1962 hardlink = False
1956 1963 if hardlink:
1957 1964 try:
1958 1965 oslink(src, dest)
1959 1966 if nb_bytes is not None:
1960 1967 m = "the `nb_bytes` argument is incompatible with `hardlink`"
1961 1968 raise error.ProgrammingError(m)
1962 1969 return
1963 1970 except (IOError, OSError) as exc:
1964 1971 if exc.errno != errno.EEXIST and no_hardlink_cb is not None:
1965 1972 no_hardlink_cb()
1966 1973 # fall back to normal copy
1967 1974 if os.path.islink(src):
1968 1975 os.symlink(os.readlink(src), dest)
1969 1976 # copytime is ignored for symlinks, but in general copytime isn't needed
1970 1977 # for them anyway
1971 1978 if nb_bytes is not None:
1972 1979 m = "cannot use `nb_bytes` on a symlink"
1973 1980 raise error.ProgrammingError(m)
1974 1981 else:
1975 1982 try:
1976 1983 shutil.copyfile(src, dest)
1977 1984 if copystat:
1978 1985 # copystat also copies mode
1979 1986 shutil.copystat(src, dest)
1980 1987 else:
1981 1988 shutil.copymode(src, dest)
1982 1989 if oldstat and oldstat.stat:
1983 1990 newstat = filestat.frompath(dest)
1984 1991 if newstat.isambig(oldstat):
1985 1992 # stat of copied file is ambiguous to original one
1986 1993 advanced = (
1987 1994 oldstat.stat[stat.ST_MTIME] + 1
1988 1995 ) & 0x7FFFFFFF
1989 1996 os.utime(dest, (advanced, advanced))
1990 1997 # We could do something smarter using `copy_file_range` call or similar
1991 1998 if nb_bytes is not None:
1992 1999 with open(dest, mode='r+') as f:
1993 2000 f.truncate(nb_bytes)
1994 2001 except shutil.Error as inst:
1995 2002 raise error.Abort(stringutil.forcebytestr(inst))
1996 2003
1997 2004
1998 2005 def copyfiles(src, dst, hardlink=None, progress=None):
1999 2006 """Copy a directory tree using hardlinks if possible."""
2000 2007 num = 0
2001 2008
2002 2009 def settopic():
2003 2010 if progress:
2004 2011 progress.topic = _(b'linking') if hardlink else _(b'copying')
2005 2012
2006 2013 if os.path.isdir(src):
2007 2014 if hardlink is None:
2008 2015 hardlink = (
2009 2016 os.stat(src).st_dev == os.stat(os.path.dirname(dst)).st_dev
2010 2017 )
2011 2018 settopic()
2012 2019 os.mkdir(dst)
2013 2020 for name, kind in listdir(src):
2014 2021 srcname = os.path.join(src, name)
2015 2022 dstname = os.path.join(dst, name)
2016 2023 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
2017 2024 num += n
2018 2025 else:
2019 2026 if hardlink is None:
2020 2027 hardlink = (
2021 2028 os.stat(os.path.dirname(src)).st_dev
2022 2029 == os.stat(os.path.dirname(dst)).st_dev
2023 2030 )
2024 2031 settopic()
2025 2032
2026 2033 if hardlink:
2027 2034 try:
2028 2035 oslink(src, dst)
2029 2036 except (IOError, OSError) as exc:
2030 2037 if exc.errno != errno.EEXIST:
2031 2038 hardlink = False
2032 2039 # XXX maybe try to relink if the file exist ?
2033 2040 shutil.copy(src, dst)
2034 2041 else:
2035 2042 shutil.copy(src, dst)
2036 2043 num += 1
2037 2044 if progress:
2038 2045 progress.increment()
2039 2046
2040 2047 return hardlink, num
2041 2048
2042 2049
2043 2050 _winreservednames = {
2044 2051 b'con',
2045 2052 b'prn',
2046 2053 b'aux',
2047 2054 b'nul',
2048 2055 b'com1',
2049 2056 b'com2',
2050 2057 b'com3',
2051 2058 b'com4',
2052 2059 b'com5',
2053 2060 b'com6',
2054 2061 b'com7',
2055 2062 b'com8',
2056 2063 b'com9',
2057 2064 b'lpt1',
2058 2065 b'lpt2',
2059 2066 b'lpt3',
2060 2067 b'lpt4',
2061 2068 b'lpt5',
2062 2069 b'lpt6',
2063 2070 b'lpt7',
2064 2071 b'lpt8',
2065 2072 b'lpt9',
2066 2073 }
2067 2074 _winreservedchars = b':*?"<>|'
2068 2075
2069 2076
2070 2077 def checkwinfilename(path: bytes) -> Optional[bytes]:
2071 2078 r"""Check that the base-relative path is a valid filename on Windows.
2072 2079 Returns None if the path is ok, or a UI string describing the problem.
2073 2080
2074 2081 >>> checkwinfilename(b"just/a/normal/path")
2075 2082 >>> checkwinfilename(b"foo/bar/con.xml")
2076 2083 "filename contains 'con', which is reserved on Windows"
2077 2084 >>> checkwinfilename(b"foo/con.xml/bar")
2078 2085 "filename contains 'con', which is reserved on Windows"
2079 2086 >>> checkwinfilename(b"foo/bar/xml.con")
2080 2087 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
2081 2088 "filename contains 'AUX', which is reserved on Windows"
2082 2089 >>> checkwinfilename(b"foo/bar/bla:.txt")
2083 2090 "filename contains ':', which is reserved on Windows"
2084 2091 >>> checkwinfilename(b"foo/bar/b\07la.txt")
2085 2092 "filename contains '\\x07', which is invalid on Windows"
2086 2093 >>> checkwinfilename(b"foo/bar/bla ")
2087 2094 "filename ends with ' ', which is not allowed on Windows"
2088 2095 >>> checkwinfilename(b"../bar")
2089 2096 >>> checkwinfilename(b"foo\\")
2090 2097 "filename ends with '\\', which is invalid on Windows"
2091 2098 >>> checkwinfilename(b"foo\\/bar")
2092 2099 "directory name ends with '\\', which is invalid on Windows"
2093 2100 """
2094 2101 if path.endswith(b'\\'):
2095 2102 return _(b"filename ends with '\\', which is invalid on Windows")
2096 2103 if b'\\/' in path:
2097 2104 return _(b"directory name ends with '\\', which is invalid on Windows")
2098 2105 for n in path.replace(b'\\', b'/').split(b'/'):
2099 2106 if not n:
2100 2107 continue
2101 2108 for c in _filenamebytestr(n):
2102 2109 if c in _winreservedchars:
2103 2110 return (
2104 2111 _(
2105 2112 b"filename contains '%s', which is reserved "
2106 2113 b"on Windows"
2107 2114 )
2108 2115 % c
2109 2116 )
2110 2117 if ord(c) <= 31:
2111 2118 return _(
2112 2119 b"filename contains '%s', which is invalid on Windows"
2113 2120 ) % stringutil.escapestr(c)
2114 2121 base = n.split(b'.')[0]
2115 2122 if base and base.lower() in _winreservednames:
2116 2123 return (
2117 2124 _(b"filename contains '%s', which is reserved on Windows")
2118 2125 % base
2119 2126 )
2120 2127 t = n[-1:]
2121 2128 if t in b'. ' and n not in b'..':
2122 2129 return (
2123 2130 _(
2124 2131 b"filename ends with '%s', which is not allowed "
2125 2132 b"on Windows"
2126 2133 )
2127 2134 % t
2128 2135 )
2129 2136
2130 2137
2131 2138 timer = getattr(time, "perf_counter", None)
2132 2139
2133 2140 if pycompat.iswindows:
2134 2141 checkosfilename = checkwinfilename
2135 2142 if not timer:
2136 2143 timer = time.clock # pytype: disable=module-attr
2137 2144 else:
2138 2145 # mercurial.windows doesn't have platform.checkosfilename
2139 2146 checkosfilename = platform.checkosfilename # pytype: disable=module-attr
2140 2147 if not timer:
2141 2148 timer = time.time
2142 2149
2143 2150
2144 2151 def makelock(info, pathname):
2145 2152 """Create a lock file atomically if possible
2146 2153
2147 2154 This may leave a stale lock file if symlink isn't supported and signal
2148 2155 interrupt is enabled.
2149 2156 """
2150 2157 try:
2151 2158 return os.symlink(info, pathname)
2152 2159 except OSError as why:
2153 2160 if why.errno == errno.EEXIST:
2154 2161 raise
2155 2162 except AttributeError: # no symlink in os
2156 2163 pass
2157 2164
2158 2165 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
2159 2166 ld = os.open(pathname, flags)
2160 2167 os.write(ld, info)
2161 2168 os.close(ld)
2162 2169
2163 2170
2164 2171 def readlock(pathname: bytes) -> bytes:
2165 2172 try:
2166 2173 return readlink(pathname)
2167 2174 except OSError as why:
2168 2175 if why.errno not in (errno.EINVAL, errno.ENOSYS):
2169 2176 raise
2170 2177 except AttributeError: # no symlink in os
2171 2178 pass
2172 2179 with posixfile(pathname, b'rb') as fp:
2173 2180 return fp.read()
2174 2181
2175 2182
2176 2183 def fstat(fp):
2177 2184 '''stat file object that may not have fileno method.'''
2178 2185 try:
2179 2186 return os.fstat(fp.fileno())
2180 2187 except AttributeError:
2181 2188 return os.stat(fp.name)
2182 2189
2183 2190
2184 2191 # File system features
2185 2192
2186 2193
2187 2194 def fscasesensitive(path: bytes) -> bool:
2188 2195 """
2189 2196 Return true if the given path is on a case-sensitive filesystem
2190 2197
2191 2198 Requires a path (like /foo/.hg) ending with a foldable final
2192 2199 directory component.
2193 2200 """
2194 2201 s1 = os.lstat(path)
2195 2202 d, b = os.path.split(path)
2196 2203 b2 = b.upper()
2197 2204 if b == b2:
2198 2205 b2 = b.lower()
2199 2206 if b == b2:
2200 2207 return True # no evidence against case sensitivity
2201 2208 p2 = os.path.join(d, b2)
2202 2209 try:
2203 2210 s2 = os.lstat(p2)
2204 2211 if s2 == s1:
2205 2212 return False
2206 2213 return True
2207 2214 except OSError:
2208 2215 return True
2209 2216
2210 2217
2211 2218 _re2_input = lambda x: x
2212 2219 # google-re2 will need to be tell to not output error on its own
2213 2220 _re2_options = None
2214 2221 try:
2215 2222 import re2 # pytype: disable=import-error
2216 2223
2217 2224 _re2 = None
2218 2225 except ImportError:
2219 2226 _re2 = False
2220 2227
2221 2228
2222 2229 def has_re2():
2223 2230 """return True is re2 is available, False otherwise"""
2224 2231 if _re2 is None:
2225 2232 _re._checkre2()
2226 2233 return _re2
2227 2234
2228 2235
2229 2236 class _re:
2230 2237 @staticmethod
2231 2238 def _checkre2():
2232 2239 global _re2
2233 2240 global _re2_input
2234 2241 global _re2_options
2235 2242 if _re2 is not None:
2236 2243 # we already have the answer
2237 2244 return
2238 2245
2239 2246 check_pattern = br'\[([^\[]+)\]'
2240 2247 check_input = b'[ui]'
2241 2248 try:
2242 2249 # check if match works, see issue3964
2243 2250 _re2 = bool(re2.match(check_pattern, check_input))
2244 2251 except ImportError:
2245 2252 _re2 = False
2246 2253 except TypeError:
2247 2254 # the `pyre-2` project provides a re2 module that accept bytes
2248 2255 # the `fb-re2` project provides a re2 module that acccept sysstr
2249 2256 check_pattern = pycompat.sysstr(check_pattern)
2250 2257 check_input = pycompat.sysstr(check_input)
2251 2258 _re2 = bool(re2.match(check_pattern, check_input))
2252 2259 _re2_input = pycompat.sysstr
2253 2260 try:
2254 2261 quiet = re2.Options()
2255 2262 quiet.log_errors = False
2256 2263 _re2_options = quiet
2257 2264 except AttributeError:
2258 2265 pass
2259 2266
2260 2267 def compile(self, pat, flags=0):
2261 2268 """Compile a regular expression, using re2 if possible
2262 2269
2263 2270 For best performance, use only re2-compatible regexp features. The
2264 2271 only flags from the re module that are re2-compatible are
2265 2272 IGNORECASE and MULTILINE."""
2266 2273 if _re2 is None:
2267 2274 self._checkre2()
2268 2275 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
2269 2276 if flags & remod.IGNORECASE:
2270 2277 pat = b'(?i)' + pat
2271 2278 if flags & remod.MULTILINE:
2272 2279 pat = b'(?m)' + pat
2273 2280 try:
2274 2281 input_regex = _re2_input(pat)
2275 2282 if _re2_options is not None:
2276 2283 compiled = re2.compile(input_regex, options=_re2_options)
2277 2284 else:
2278 2285 compiled = re2.compile(input_regex)
2279 2286 return compiled
2280 2287 except re2.error:
2281 2288 pass
2282 2289 return remod.compile(pat, flags)
2283 2290
2284 2291 @propertycache
2285 2292 def escape(self):
2286 2293 """Return the version of escape corresponding to self.compile.
2287 2294
2288 2295 This is imperfect because whether re2 or re is used for a particular
2289 2296 function depends on the flags, etc, but it's the best we can do.
2290 2297 """
2291 2298 global _re2
2292 2299 if _re2 is None:
2293 2300 self._checkre2()
2294 2301 if _re2:
2295 2302 return re2.escape
2296 2303 else:
2297 2304 return remod.escape
2298 2305
2299 2306
2300 2307 re = _re()
2301 2308
2302 2309 _fspathcache = {}
2303 2310
2304 2311
2305 2312 def fspath(name: bytes, root: bytes) -> bytes:
2306 2313 """Get name in the case stored in the filesystem
2307 2314
2308 2315 The name should be relative to root, and be normcase-ed for efficiency.
2309 2316
2310 2317 Note that this function is unnecessary, and should not be
2311 2318 called, for case-sensitive filesystems (simply because it's expensive).
2312 2319
2313 2320 The root should be normcase-ed, too.
2314 2321 """
2315 2322
2316 2323 def _makefspathcacheentry(dir):
2317 2324 return {normcase(n): n for n in os.listdir(dir)}
2318 2325
2319 2326 seps = pycompat.ossep
2320 2327 if pycompat.osaltsep:
2321 2328 seps = seps + pycompat.osaltsep
2322 2329 # Protect backslashes. This gets silly very quickly.
2323 2330 seps.replace(b'\\', b'\\\\')
2324 2331 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
2325 2332 dir = os.path.normpath(root)
2326 2333 result = []
2327 2334 for part, sep in pattern.findall(name):
2328 2335 if sep:
2329 2336 result.append(sep)
2330 2337 continue
2331 2338
2332 2339 if dir not in _fspathcache:
2333 2340 _fspathcache[dir] = _makefspathcacheentry(dir)
2334 2341 contents = _fspathcache[dir]
2335 2342
2336 2343 found = contents.get(part)
2337 2344 if not found:
2338 2345 # retry "once per directory" per "dirstate.walk" which
2339 2346 # may take place for each patches of "hg qpush", for example
2340 2347 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
2341 2348 found = contents.get(part)
2342 2349
2343 2350 result.append(found or part)
2344 2351 dir = os.path.join(dir, part)
2345 2352
2346 2353 return b''.join(result)
2347 2354
2348 2355
2349 2356 def checknlink(testfile: bytes) -> bool:
2350 2357 '''check whether hardlink count reporting works properly'''
2351 2358
2352 2359 # testfile may be open, so we need a separate file for checking to
2353 2360 # work around issue2543 (or testfile may get lost on Samba shares)
2354 2361 f1, f2, fp = None, None, None
2355 2362 try:
2356 2363 fd, f1 = pycompat.mkstemp(
2357 2364 prefix=b'.%s-' % os.path.basename(testfile),
2358 2365 suffix=b'1~',
2359 2366 dir=os.path.dirname(testfile),
2360 2367 )
2361 2368 os.close(fd)
2362 2369 f2 = b'%s2~' % f1[:-2]
2363 2370
2364 2371 oslink(f1, f2)
2365 2372 # nlinks() may behave differently for files on Windows shares if
2366 2373 # the file is open.
2367 2374 fp = posixfile(f2)
2368 2375 return nlinks(f2) > 1
2369 2376 except OSError:
2370 2377 return False
2371 2378 finally:
2372 2379 if fp is not None:
2373 2380 fp.close()
2374 2381 for f in (f1, f2):
2375 2382 try:
2376 2383 if f is not None:
2377 2384 os.unlink(f)
2378 2385 except OSError:
2379 2386 pass
2380 2387
2381 2388
2382 2389 def endswithsep(path: bytes) -> bool:
2383 2390 '''Check path ends with os.sep or os.altsep.'''
2384 2391 return bool( # help pytype
2385 2392 path.endswith(pycompat.ossep)
2386 2393 or pycompat.osaltsep
2387 2394 and path.endswith(pycompat.osaltsep)
2388 2395 )
2389 2396
2390 2397
2391 2398 def splitpath(path: bytes) -> List[bytes]:
2392 2399 """Split path by os.sep.
2393 2400 Note that this function does not use os.altsep because this is
2394 2401 an alternative of simple "xxx.split(os.sep)".
2395 2402 It is recommended to use os.path.normpath() before using this
2396 2403 function if need."""
2397 2404 return path.split(pycompat.ossep)
2398 2405
2399 2406
2400 2407 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2401 2408 """Create a temporary file with the same contents from name
2402 2409
2403 2410 The permission bits are copied from the original file.
2404 2411
2405 2412 If the temporary file is going to be truncated immediately, you
2406 2413 can use emptyok=True as an optimization.
2407 2414
2408 2415 Returns the name of the temporary file.
2409 2416 """
2410 2417 d, fn = os.path.split(name)
2411 2418 fd, temp = pycompat.mkstemp(prefix=b'.%s-' % fn, suffix=b'~', dir=d)
2412 2419 os.close(fd)
2413 2420 # Temporary files are created with mode 0600, which is usually not
2414 2421 # what we want. If the original file already exists, just copy
2415 2422 # its mode. Otherwise, manually obey umask.
2416 2423 copymode(name, temp, createmode, enforcewritable)
2417 2424
2418 2425 if emptyok:
2419 2426 return temp
2420 2427 try:
2421 2428 try:
2422 2429 ifp = posixfile(name, b"rb")
2423 2430 except IOError as inst:
2424 2431 if inst.errno == errno.ENOENT:
2425 2432 return temp
2426 2433 if not getattr(inst, 'filename', None):
2427 2434 inst.filename = name
2428 2435 raise
2429 2436 ofp = posixfile(temp, b"wb")
2430 2437 for chunk in filechunkiter(ifp):
2431 2438 ofp.write(chunk)
2432 2439 ifp.close()
2433 2440 ofp.close()
2434 2441 except: # re-raises
2435 2442 try:
2436 2443 os.unlink(temp)
2437 2444 except OSError:
2438 2445 pass
2439 2446 raise
2440 2447 return temp
2441 2448
2442 2449
2443 2450 class filestat:
2444 2451 """help to exactly detect change of a file
2445 2452
2446 2453 'stat' attribute is result of 'os.stat()' if specified 'path'
2447 2454 exists. Otherwise, it is None. This can avoid preparative
2448 2455 'exists()' examination on client side of this class.
2449 2456 """
2450 2457
2451 2458 def __init__(self, stat):
2452 2459 self.stat = stat
2453 2460
2454 2461 @classmethod
2455 2462 def frompath(cls, path):
2456 2463 try:
2457 2464 stat = os.stat(path)
2458 2465 except FileNotFoundError:
2459 2466 stat = None
2460 2467 return cls(stat)
2461 2468
2462 2469 @classmethod
2463 2470 def fromfp(cls, fp):
2464 2471 stat = os.fstat(fp.fileno())
2465 2472 return cls(stat)
2466 2473
2467 2474 __hash__ = object.__hash__
2468 2475
2469 2476 def __eq__(self, old):
2470 2477 try:
2471 2478 # if ambiguity between stat of new and old file is
2472 2479 # avoided, comparison of size, ctime and mtime is enough
2473 2480 # to exactly detect change of a file regardless of platform
2474 2481 return (
2475 2482 self.stat.st_size == old.stat.st_size
2476 2483 and self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME]
2477 2484 and self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME]
2478 2485 )
2479 2486 except AttributeError:
2480 2487 pass
2481 2488 try:
2482 2489 return self.stat is None and old.stat is None
2483 2490 except AttributeError:
2484 2491 return False
2485 2492
2486 2493 def isambig(self, old):
2487 2494 """Examine whether new (= self) stat is ambiguous against old one
2488 2495
2489 2496 "S[N]" below means stat of a file at N-th change:
2490 2497
2491 2498 - S[n-1].ctime < S[n].ctime: can detect change of a file
2492 2499 - S[n-1].ctime == S[n].ctime
2493 2500 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2494 2501 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2495 2502 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2496 2503 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2497 2504
2498 2505 Case (*2) above means that a file was changed twice or more at
2499 2506 same time in sec (= S[n-1].ctime), and comparison of timestamp
2500 2507 is ambiguous.
2501 2508
2502 2509 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2503 2510 timestamp is ambiguous".
2504 2511
2505 2512 But advancing mtime only in case (*2) doesn't work as
2506 2513 expected, because naturally advanced S[n].mtime in case (*1)
2507 2514 might be equal to manually advanced S[n-1 or earlier].mtime.
2508 2515
2509 2516 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2510 2517 treated as ambiguous regardless of mtime, to avoid overlooking
2511 2518 by confliction between such mtime.
2512 2519
2513 2520 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2514 2521 S[n].mtime", even if size of a file isn't changed.
2515 2522 """
2516 2523 try:
2517 2524 return self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME]
2518 2525 except AttributeError:
2519 2526 return False
2520 2527
2521 2528 def avoidambig(self, path, old):
2522 2529 """Change file stat of specified path to avoid ambiguity
2523 2530
2524 2531 'old' should be previous filestat of 'path'.
2525 2532
2526 2533 This skips avoiding ambiguity, if a process doesn't have
2527 2534 appropriate privileges for 'path'. This returns False in this
2528 2535 case.
2529 2536
2530 2537 Otherwise, this returns True, as "ambiguity is avoided".
2531 2538 """
2532 2539 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7FFFFFFF
2533 2540 try:
2534 2541 os.utime(path, (advanced, advanced))
2535 2542 except PermissionError:
2536 2543 # utime() on the file created by another user causes EPERM,
2537 2544 # if a process doesn't have appropriate privileges
2538 2545 return False
2539 2546 return True
2540 2547
2541 2548 def __ne__(self, other):
2542 2549 return not self == other
2543 2550
2544 2551
2545 2552 class atomictempfile:
2546 2553 """writable file object that atomically updates a file
2547 2554
2548 2555 All writes will go to a temporary copy of the original file. Call
2549 2556 close() when you are done writing, and atomictempfile will rename
2550 2557 the temporary copy to the original name, making the changes
2551 2558 visible. If the object is destroyed without being closed, all your
2552 2559 writes are discarded.
2553 2560
2554 2561 checkambig argument of constructor is used with filestat, and is
2555 2562 useful only if target file is guarded by any lock (e.g. repo.lock
2556 2563 or repo.wlock).
2557 2564 """
2558 2565
2559 2566 def __init__(self, name, mode=b'w+b', createmode=None, checkambig=False):
2560 2567 self.__name = name # permanent name
2561 2568 self._tempname = mktempcopy(
2562 2569 name,
2563 2570 emptyok=(b'w' in mode),
2564 2571 createmode=createmode,
2565 2572 enforcewritable=(b'w' in mode),
2566 2573 )
2567 2574
2568 2575 self._fp = posixfile(self._tempname, mode)
2569 2576 self._checkambig = checkambig
2570 2577
2571 2578 # delegated methods
2572 2579 self.read = self._fp.read
2573 2580 self.write = self._fp.write
2574 2581 self.writelines = self._fp.writelines
2575 2582 self.seek = self._fp.seek
2576 2583 self.tell = self._fp.tell
2577 2584 self.fileno = self._fp.fileno
2578 2585
2579 2586 def close(self):
2580 2587 if not self._fp.closed:
2581 2588 self._fp.close()
2582 2589 filename = localpath(self.__name)
2583 2590 oldstat = self._checkambig and filestat.frompath(filename)
2584 2591 if oldstat and oldstat.stat:
2585 2592 rename(self._tempname, filename)
2586 2593 newstat = filestat.frompath(filename)
2587 2594 if newstat.isambig(oldstat):
2588 2595 # stat of changed file is ambiguous to original one
2589 2596 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7FFFFFFF
2590 2597 os.utime(filename, (advanced, advanced))
2591 2598 else:
2592 2599 rename(self._tempname, filename)
2593 2600
2594 2601 def discard(self):
2595 2602 if not self._fp.closed:
2596 2603 try:
2597 2604 os.unlink(self._tempname)
2598 2605 except OSError:
2599 2606 pass
2600 2607 self._fp.close()
2601 2608
2602 2609 def __del__(self):
2603 2610 if hasattr(self, '_fp'): # constructor actually did something
2604 2611 self.discard()
2605 2612
2606 2613 def __enter__(self):
2607 2614 return self
2608 2615
2609 2616 def __exit__(self, exctype, excvalue, traceback):
2610 2617 if exctype is not None:
2611 2618 self.discard()
2612 2619 else:
2613 2620 self.close()
2614 2621
2615 2622
2616 2623 def tryrmdir(f):
2617 2624 try:
2618 2625 removedirs(f)
2619 2626 except OSError as e:
2620 2627 if e.errno != errno.ENOENT and e.errno != errno.ENOTEMPTY:
2621 2628 raise
2622 2629
2623 2630
2624 2631 def unlinkpath(
2625 2632 f: bytes, ignoremissing: bool = False, rmdir: bool = True
2626 2633 ) -> None:
2627 2634 """unlink and remove the directory if it is empty"""
2628 2635 if ignoremissing:
2629 2636 tryunlink(f)
2630 2637 else:
2631 2638 unlink(f)
2632 2639 if rmdir:
2633 2640 # try removing directories that might now be empty
2634 2641 try:
2635 2642 removedirs(os.path.dirname(f))
2636 2643 except OSError:
2637 2644 pass
2638 2645
2639 2646
2640 2647 def tryunlink(f: bytes) -> bool:
2641 2648 """Attempt to remove a file, ignoring FileNotFoundError.
2642 2649
2643 2650 Returns False in case the file did not exit, True otherwise
2644 2651 """
2645 2652 try:
2646 2653 unlink(f)
2647 2654 return True
2648 2655 except FileNotFoundError:
2649 2656 return False
2650 2657
2651 2658
2652 2659 def makedirs(
2653 2660 name: bytes, mode: Optional[int] = None, notindexed: bool = False
2654 2661 ) -> None:
2655 2662 """recursive directory creation with parent mode inheritance
2656 2663
2657 2664 Newly created directories are marked as "not to be indexed by
2658 2665 the content indexing service", if ``notindexed`` is specified
2659 2666 for "write" mode access.
2660 2667 """
2661 2668 try:
2662 2669 makedir(name, notindexed)
2663 2670 except OSError as err:
2664 2671 if err.errno == errno.EEXIST:
2665 2672 return
2666 2673 if err.errno != errno.ENOENT or not name:
2667 2674 raise
2668 2675 parent = os.path.dirname(abspath(name))
2669 2676 if parent == name:
2670 2677 raise
2671 2678 makedirs(parent, mode, notindexed)
2672 2679 try:
2673 2680 makedir(name, notindexed)
2674 2681 except OSError as err:
2675 2682 # Catch EEXIST to handle races
2676 2683 if err.errno == errno.EEXIST:
2677 2684 return
2678 2685 raise
2679 2686 if mode is not None:
2680 2687 os.chmod(name, mode)
2681 2688
2682 2689
2683 2690 def readfile(path: bytes) -> bytes:
2684 2691 with open(path, b'rb') as fp:
2685 2692 return fp.read()
2686 2693
2687 2694
2688 2695 def writefile(path: bytes, text: bytes) -> None:
2689 2696 with open(path, b'wb') as fp:
2690 2697 fp.write(text)
2691 2698
2692 2699
2693 2700 def appendfile(path: bytes, text: bytes) -> None:
2694 2701 with open(path, b'ab') as fp:
2695 2702 fp.write(text)
2696 2703
2697 2704
2698 2705 class chunkbuffer:
2699 2706 """Allow arbitrary sized chunks of data to be efficiently read from an
2700 2707 iterator over chunks of arbitrary size."""
2701 2708
2702 2709 def __init__(self, in_iter):
2703 2710 """in_iter is the iterator that's iterating over the input chunks."""
2704 2711
2705 2712 def splitbig(chunks):
2706 2713 for chunk in chunks:
2707 2714 if len(chunk) > 2 ** 20:
2708 2715 pos = 0
2709 2716 while pos < len(chunk):
2710 2717 end = pos + 2 ** 18
2711 2718 yield chunk[pos:end]
2712 2719 pos = end
2713 2720 else:
2714 2721 yield chunk
2715 2722
2716 2723 self.iter = splitbig(in_iter)
2717 2724 self._queue = collections.deque()
2718 2725 self._chunkoffset = 0
2719 2726
2720 2727 def read(self, l=None):
2721 2728 """Read L bytes of data from the iterator of chunks of data.
2722 2729 Returns less than L bytes if the iterator runs dry.
2723 2730
2724 2731 If size parameter is omitted, read everything"""
2725 2732 if l is None:
2726 2733 return b''.join(self.iter)
2727 2734
2728 2735 left = l
2729 2736 buf = []
2730 2737 queue = self._queue
2731 2738 while left > 0:
2732 2739 # refill the queue
2733 2740 if not queue:
2734 2741 target = 2 ** 18
2735 2742 for chunk in self.iter:
2736 2743 queue.append(chunk)
2737 2744 target -= len(chunk)
2738 2745 if target <= 0:
2739 2746 break
2740 2747 if not queue:
2741 2748 break
2742 2749
2743 2750 # The easy way to do this would be to queue.popleft(), modify the
2744 2751 # chunk (if necessary), then queue.appendleft(). However, for cases
2745 2752 # where we read partial chunk content, this incurs 2 dequeue
2746 2753 # mutations and creates a new str for the remaining chunk in the
2747 2754 # queue. Our code below avoids this overhead.
2748 2755
2749 2756 chunk = queue[0]
2750 2757 chunkl = len(chunk)
2751 2758 offset = self._chunkoffset
2752 2759
2753 2760 # Use full chunk.
2754 2761 if offset == 0 and left >= chunkl:
2755 2762 left -= chunkl
2756 2763 queue.popleft()
2757 2764 buf.append(chunk)
2758 2765 # self._chunkoffset remains at 0.
2759 2766 continue
2760 2767
2761 2768 chunkremaining = chunkl - offset
2762 2769
2763 2770 # Use all of unconsumed part of chunk.
2764 2771 if left >= chunkremaining:
2765 2772 left -= chunkremaining
2766 2773 queue.popleft()
2767 2774 # offset == 0 is enabled by block above, so this won't merely
2768 2775 # copy via ``chunk[0:]``.
2769 2776 buf.append(chunk[offset:])
2770 2777 self._chunkoffset = 0
2771 2778
2772 2779 # Partial chunk needed.
2773 2780 else:
2774 2781 buf.append(chunk[offset : offset + left])
2775 2782 self._chunkoffset += left
2776 2783 left -= chunkremaining
2777 2784
2778 2785 return b''.join(buf)
2779 2786
2780 2787
2781 2788 def filechunkiter(f, size=131072, limit=None):
2782 2789 """Create a generator that produces the data in the file size
2783 2790 (default 131072) bytes at a time, up to optional limit (default is
2784 2791 to read all data). Chunks may be less than size bytes if the
2785 2792 chunk is the last chunk in the file, or the file is a socket or
2786 2793 some other type of file that sometimes reads less data than is
2787 2794 requested."""
2788 2795 assert size >= 0
2789 2796 assert limit is None or limit >= 0
2790 2797 while True:
2791 2798 if limit is None:
2792 2799 nbytes = size
2793 2800 else:
2794 2801 nbytes = min(limit, size)
2795 2802 s = nbytes and f.read(nbytes)
2796 2803 if not s:
2797 2804 break
2798 2805 if limit:
2799 2806 limit -= len(s)
2800 2807 yield s
2801 2808
2802 2809
2803 2810 class cappedreader:
2804 2811 """A file object proxy that allows reading up to N bytes.
2805 2812
2806 2813 Given a source file object, instances of this type allow reading up to
2807 2814 N bytes from that source file object. Attempts to read past the allowed
2808 2815 limit are treated as EOF.
2809 2816
2810 2817 It is assumed that I/O is not performed on the original file object
2811 2818 in addition to I/O that is performed by this instance. If there is,
2812 2819 state tracking will get out of sync and unexpected results will ensue.
2813 2820 """
2814 2821
2815 2822 def __init__(self, fh, limit):
2816 2823 """Allow reading up to <limit> bytes from <fh>."""
2817 2824 self._fh = fh
2818 2825 self._left = limit
2819 2826
2820 2827 def read(self, n=-1):
2821 2828 if not self._left:
2822 2829 return b''
2823 2830
2824 2831 if n < 0:
2825 2832 n = self._left
2826 2833
2827 2834 data = self._fh.read(min(n, self._left))
2828 2835 self._left -= len(data)
2829 2836 assert self._left >= 0
2830 2837
2831 2838 return data
2832 2839
2833 2840 def readinto(self, b):
2834 2841 res = self.read(len(b))
2835 2842 if res is None:
2836 2843 return None
2837 2844
2838 2845 b[0 : len(res)] = res
2839 2846 return len(res)
2840 2847
2841 2848
2842 2849 def unitcountfn(*unittable):
2843 2850 '''return a function that renders a readable count of some quantity'''
2844 2851
2845 2852 def go(count):
2846 2853 for multiplier, divisor, format in unittable:
2847 2854 if abs(count) >= divisor * multiplier:
2848 2855 return format % (count / float(divisor))
2849 2856 return unittable[-1][2] % count
2850 2857
2851 2858 return go
2852 2859
2853 2860
2854 2861 def processlinerange(fromline: int, toline: int) -> Tuple[int, int]:
2855 2862 """Check that linerange <fromline>:<toline> makes sense and return a
2856 2863 0-based range.
2857 2864
2858 2865 >>> processlinerange(10, 20)
2859 2866 (9, 20)
2860 2867 >>> processlinerange(2, 1)
2861 2868 Traceback (most recent call last):
2862 2869 ...
2863 2870 ParseError: line range must be positive
2864 2871 >>> processlinerange(0, 5)
2865 2872 Traceback (most recent call last):
2866 2873 ...
2867 2874 ParseError: fromline must be strictly positive
2868 2875 """
2869 2876 if toline - fromline < 0:
2870 2877 raise error.ParseError(_(b"line range must be positive"))
2871 2878 if fromline < 1:
2872 2879 raise error.ParseError(_(b"fromline must be strictly positive"))
2873 2880 return fromline - 1, toline
2874 2881
2875 2882
2876 2883 bytecount = unitcountfn(
2877 2884 (100, 1 << 30, _(b'%.0f GB')),
2878 2885 (10, 1 << 30, _(b'%.1f GB')),
2879 2886 (1, 1 << 30, _(b'%.2f GB')),
2880 2887 (100, 1 << 20, _(b'%.0f MB')),
2881 2888 (10, 1 << 20, _(b'%.1f MB')),
2882 2889 (1, 1 << 20, _(b'%.2f MB')),
2883 2890 (100, 1 << 10, _(b'%.0f KB')),
2884 2891 (10, 1 << 10, _(b'%.1f KB')),
2885 2892 (1, 1 << 10, _(b'%.2f KB')),
2886 2893 (1, 1, _(b'%.0f bytes')),
2887 2894 )
2888 2895
2889 2896
2890 2897 class transformingwriter:
2891 2898 """Writable file wrapper to transform data by function"""
2892 2899
2893 2900 def __init__(self, fp, encode):
2894 2901 self._fp = fp
2895 2902 self._encode = encode
2896 2903
2897 2904 def close(self):
2898 2905 self._fp.close()
2899 2906
2900 2907 def flush(self):
2901 2908 self._fp.flush()
2902 2909
2903 2910 def write(self, data):
2904 2911 return self._fp.write(self._encode(data))
2905 2912
2906 2913
2907 2914 # Matches a single EOL which can either be a CRLF where repeated CR
2908 2915 # are removed or a LF. We do not care about old Macintosh files, so a
2909 2916 # stray CR is an error.
2910 2917 _eolre = remod.compile(br'\r*\n')
2911 2918
2912 2919
2913 2920 def tolf(s: bytes) -> bytes:
2914 2921 return _eolre.sub(b'\n', s)
2915 2922
2916 2923
2917 2924 def tocrlf(s: bytes) -> bytes:
2918 2925 return _eolre.sub(b'\r\n', s)
2919 2926
2920 2927
2921 2928 def _crlfwriter(fp):
2922 2929 return transformingwriter(fp, tocrlf)
2923 2930
2924 2931
2925 2932 if pycompat.oslinesep == b'\r\n':
2926 2933 tonativeeol = tocrlf
2927 2934 fromnativeeol = tolf
2928 2935 nativeeolwriter = _crlfwriter
2929 2936 else:
2930 2937 tonativeeol = pycompat.identity
2931 2938 fromnativeeol = pycompat.identity
2932 2939 nativeeolwriter = pycompat.identity
2933 2940
2934 2941
2935 2942 # TODO delete since workaround variant for Python 2 no longer needed.
2936 2943 def iterfile(fp):
2937 2944 return fp
2938 2945
2939 2946
2940 2947 def iterlines(iterator: Iterable[bytes]) -> Iterator[bytes]:
2941 2948 for chunk in iterator:
2942 2949 for line in chunk.splitlines():
2943 2950 yield line
2944 2951
2945 2952
2946 2953 def expandpath(path: bytes) -> bytes:
2947 2954 return os.path.expanduser(os.path.expandvars(path))
2948 2955
2949 2956
2950 2957 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2951 2958 """Return the result of interpolating items in the mapping into string s.
2952 2959
2953 2960 prefix is a single character string, or a two character string with
2954 2961 a backslash as the first character if the prefix needs to be escaped in
2955 2962 a regular expression.
2956 2963
2957 2964 fn is an optional function that will be applied to the replacement text
2958 2965 just before replacement.
2959 2966
2960 2967 escape_prefix is an optional flag that allows using doubled prefix for
2961 2968 its escaping.
2962 2969 """
2963 2970 fn = fn or (lambda s: s)
2964 2971 patterns = b'|'.join(mapping.keys())
2965 2972 if escape_prefix:
2966 2973 patterns += b'|' + prefix
2967 2974 if len(prefix) > 1:
2968 2975 prefix_char = prefix[1:]
2969 2976 else:
2970 2977 prefix_char = prefix
2971 2978 mapping[prefix_char] = prefix_char
2972 2979 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2973 2980 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2974 2981
2975 2982
2976 2983 timecount = unitcountfn(
2977 2984 (1, 1e3, _(b'%.0f s')),
2978 2985 (100, 1, _(b'%.1f s')),
2979 2986 (10, 1, _(b'%.2f s')),
2980 2987 (1, 1, _(b'%.3f s')),
2981 2988 (100, 0.001, _(b'%.1f ms')),
2982 2989 (10, 0.001, _(b'%.2f ms')),
2983 2990 (1, 0.001, _(b'%.3f ms')),
2984 2991 (100, 0.000001, _(b'%.1f us')),
2985 2992 (10, 0.000001, _(b'%.2f us')),
2986 2993 (1, 0.000001, _(b'%.3f us')),
2987 2994 (100, 0.000000001, _(b'%.1f ns')),
2988 2995 (10, 0.000000001, _(b'%.2f ns')),
2989 2996 (1, 0.000000001, _(b'%.3f ns')),
2990 2997 )
2991 2998
2992 2999
2993 3000 @attr.s
2994 3001 class timedcmstats:
2995 3002 """Stats information produced by the timedcm context manager on entering."""
2996 3003
2997 3004 # the starting value of the timer as a float (meaning and resulution is
2998 3005 # platform dependent, see util.timer)
2999 3006 start = attr.ib(default=attr.Factory(lambda: timer()))
3000 3007 # the number of seconds as a floating point value; starts at 0, updated when
3001 3008 # the context is exited.
3002 3009 elapsed = attr.ib(default=0)
3003 3010 # the number of nested timedcm context managers.
3004 3011 level = attr.ib(default=1)
3005 3012
3006 3013 def __bytes__(self):
3007 3014 return timecount(self.elapsed) if self.elapsed else b'<unknown>'
3008 3015
3009 3016 __str__ = encoding.strmethod(__bytes__)
3010 3017
3011 3018
3012 3019 @contextlib.contextmanager
3013 3020 def timedcm(whencefmt, *whenceargs):
3014 3021 """A context manager that produces timing information for a given context.
3015 3022
3016 3023 On entering a timedcmstats instance is produced.
3017 3024
3018 3025 This context manager is reentrant.
3019 3026
3020 3027 """
3021 3028 # track nested context managers
3022 3029 timedcm._nested += 1
3023 3030 timing_stats = timedcmstats(level=timedcm._nested)
3024 3031 try:
3025 3032 with tracing.log(whencefmt, *whenceargs):
3026 3033 yield timing_stats
3027 3034 finally:
3028 3035 timing_stats.elapsed = timer() - timing_stats.start
3029 3036 timedcm._nested -= 1
3030 3037
3031 3038
3032 3039 timedcm._nested = 0
3033 3040
3034 3041
3035 3042 def timed(func):
3036 3043 """Report the execution time of a function call to stderr.
3037 3044
3038 3045 During development, use as a decorator when you need to measure
3039 3046 the cost of a function, e.g. as follows:
3040 3047
3041 3048 @util.timed
3042 3049 def foo(a, b, c):
3043 3050 pass
3044 3051 """
3045 3052
3046 3053 def wrapper(*args, **kwargs):
3047 3054 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3048 3055 result = func(*args, **kwargs)
3049 3056 stderr = procutil.stderr
3050 3057 stderr.write(
3051 3058 b'%s%s: %s\n'
3052 3059 % (
3053 3060 b' ' * time_stats.level * 2,
3054 3061 pycompat.bytestr(func.__name__),
3055 3062 time_stats,
3056 3063 )
3057 3064 )
3058 3065 return result
3059 3066
3060 3067 return wrapper
3061 3068
3062 3069
3063 3070 _sizeunits = (
3064 3071 (b'm', 2 ** 20),
3065 3072 (b'k', 2 ** 10),
3066 3073 (b'g', 2 ** 30),
3067 3074 (b'kb', 2 ** 10),
3068 3075 (b'mb', 2 ** 20),
3069 3076 (b'gb', 2 ** 30),
3070 3077 (b'b', 1),
3071 3078 )
3072 3079
3073 3080
3074 3081 def sizetoint(s: bytes) -> int:
3075 3082 """Convert a space specifier to a byte count.
3076 3083
3077 3084 >>> sizetoint(b'30')
3078 3085 30
3079 3086 >>> sizetoint(b'2.2kb')
3080 3087 2252
3081 3088 >>> sizetoint(b'6M')
3082 3089 6291456
3083 3090 """
3084 3091 t = s.strip().lower()
3085 3092 try:
3086 3093 for k, u in _sizeunits:
3087 3094 if t.endswith(k):
3088 3095 return int(float(t[: -len(k)]) * u)
3089 3096 return int(t)
3090 3097 except ValueError:
3091 3098 raise error.ParseError(_(b"couldn't parse size: %s") % s)
3092 3099
3093 3100
3094 3101 class hooks:
3095 3102 """A collection of hook functions that can be used to extend a
3096 3103 function's behavior. Hooks are called in lexicographic order,
3097 3104 based on the names of their sources."""
3098 3105
3099 3106 def __init__(self):
3100 3107 self._hooks = []
3101 3108
3102 3109 def add(self, source, hook):
3103 3110 self._hooks.append((source, hook))
3104 3111
3105 3112 def __call__(self, *args):
3106 3113 self._hooks.sort(key=lambda x: x[0])
3107 3114 results = []
3108 3115 for source, hook in self._hooks:
3109 3116 results.append(hook(*args))
3110 3117 return results
3111 3118
3112 3119
3113 3120 def getstackframes(skip=0, line=b' %-*s in %s\n', fileline=b'%s:%d', depth=0):
3114 3121 """Yields lines for a nicely formatted stacktrace.
3115 3122 Skips the 'skip' last entries, then return the last 'depth' entries.
3116 3123 Each file+linenumber is formatted according to fileline.
3117 3124 Each line is formatted according to line.
3118 3125 If line is None, it yields:
3119 3126 length of longest filepath+line number,
3120 3127 filepath+linenumber,
3121 3128 function
3122 3129
3123 3130 Not be used in production code but very convenient while developing.
3124 3131 """
3125 3132 entries = [
3126 3133 (fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3127 3134 for fn, ln, func, _text in traceback.extract_stack()[: -skip - 1]
3128 3135 ][-depth:]
3129 3136 if entries:
3130 3137 fnmax = max(len(entry[0]) for entry in entries)
3131 3138 for fnln, func in entries:
3132 3139 if line is None:
3133 3140 yield (fnmax, fnln, func)
3134 3141 else:
3135 3142 yield line % (fnmax, fnln, func)
3136 3143
3137 3144
3138 3145 def debugstacktrace(
3139 3146 msg=b'stacktrace',
3140 3147 skip=0,
3141 3148 f=procutil.stderr,
3142 3149 otherf=procutil.stdout,
3143 3150 depth=0,
3144 3151 prefix=b'',
3145 3152 ):
3146 3153 """Writes a message to f (stderr) with a nicely formatted stacktrace.
3147 3154 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3148 3155 By default it will flush stdout first.
3149 3156 It can be used everywhere and intentionally does not require an ui object.
3150 3157 Not be used in production code but very convenient while developing.
3151 3158 """
3152 3159 if otherf:
3153 3160 otherf.flush()
3154 3161 f.write(b'%s%s at:\n' % (prefix, msg.rstrip()))
3155 3162 for line in getstackframes(skip + 1, depth=depth):
3156 3163 f.write(prefix + line)
3157 3164 f.flush()
3158 3165
3159 3166
3160 3167 # convenient shortcut
3161 3168 dst = debugstacktrace
3162 3169
3163 3170
3164 3171 def safename(f, tag, ctx, others=None):
3165 3172 """
3166 3173 Generate a name that it is safe to rename f to in the given context.
3167 3174
3168 3175 f: filename to rename
3169 3176 tag: a string tag that will be included in the new name
3170 3177 ctx: a context, in which the new name must not exist
3171 3178 others: a set of other filenames that the new name must not be in
3172 3179
3173 3180 Returns a file name of the form oldname~tag[~number] which does not exist
3174 3181 in the provided context and is not in the set of other names.
3175 3182 """
3176 3183 if others is None:
3177 3184 others = set()
3178 3185
3179 3186 fn = b'%s~%s' % (f, tag)
3180 3187 if fn not in ctx and fn not in others:
3181 3188 return fn
3182 3189 for n in itertools.count(1):
3183 3190 fn = b'%s~%s~%s' % (f, tag, n)
3184 3191 if fn not in ctx and fn not in others:
3185 3192 return fn
3186 3193
3187 3194
3188 3195 def readexactly(stream, n):
3189 3196 '''read n bytes from stream.read and abort if less was available'''
3190 3197 s = stream.read(n)
3191 3198 if len(s) < n:
3192 3199 raise error.Abort(
3193 3200 _(b"stream ended unexpectedly (got %d bytes, expected %d)")
3194 3201 % (len(s), n)
3195 3202 )
3196 3203 return s
3197 3204
3198 3205
3199 3206 def uvarintencode(value):
3200 3207 """Encode an unsigned integer value to a varint.
3201 3208
3202 3209 A varint is a variable length integer of 1 or more bytes. Each byte
3203 3210 except the last has the most significant bit set. The lower 7 bits of
3204 3211 each byte store the 2's complement representation, least significant group
3205 3212 first.
3206 3213
3207 3214 >>> uvarintencode(0)
3208 3215 '\\x00'
3209 3216 >>> uvarintencode(1)
3210 3217 '\\x01'
3211 3218 >>> uvarintencode(127)
3212 3219 '\\x7f'
3213 3220 >>> uvarintencode(1337)
3214 3221 '\\xb9\\n'
3215 3222 >>> uvarintencode(65536)
3216 3223 '\\x80\\x80\\x04'
3217 3224 >>> uvarintencode(-1)
3218 3225 Traceback (most recent call last):
3219 3226 ...
3220 3227 ProgrammingError: negative value for uvarint: -1
3221 3228 """
3222 3229 if value < 0:
3223 3230 raise error.ProgrammingError(b'negative value for uvarint: %d' % value)
3224 3231 bits = value & 0x7F
3225 3232 value >>= 7
3226 3233 bytes = []
3227 3234 while value:
3228 3235 bytes.append(pycompat.bytechr(0x80 | bits))
3229 3236 bits = value & 0x7F
3230 3237 value >>= 7
3231 3238 bytes.append(pycompat.bytechr(bits))
3232 3239
3233 3240 return b''.join(bytes)
3234 3241
3235 3242
3236 3243 def uvarintdecodestream(fh):
3237 3244 """Decode an unsigned variable length integer from a stream.
3238 3245
3239 3246 The passed argument is anything that has a ``.read(N)`` method.
3240 3247
3241 3248 >>> from io import BytesIO
3242 3249 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3243 3250 0
3244 3251 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3245 3252 1
3246 3253 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3247 3254 127
3248 3255 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3249 3256 1337
3250 3257 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3251 3258 65536
3252 3259 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3253 3260 Traceback (most recent call last):
3254 3261 ...
3255 3262 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3256 3263 """
3257 3264 result = 0
3258 3265 shift = 0
3259 3266 while True:
3260 3267 byte = ord(readexactly(fh, 1))
3261 3268 result |= (byte & 0x7F) << shift
3262 3269 if not (byte & 0x80):
3263 3270 return result
3264 3271 shift += 7
3265 3272
3266 3273
3267 3274 # Passing the '' locale means that the locale should be set according to the
3268 3275 # user settings (environment variables).
3269 3276 # Python sometimes avoids setting the global locale settings. When interfacing
3270 3277 # with C code (e.g. the curses module or the Subversion bindings), the global
3271 3278 # locale settings must be initialized correctly. Python 2 does not initialize
3272 3279 # the global locale settings on interpreter startup. Python 3 sometimes
3273 3280 # initializes LC_CTYPE, but not consistently at least on Windows. Therefore we
3274 3281 # explicitly initialize it to get consistent behavior if it's not already
3275 3282 # initialized. Since CPython commit 177d921c8c03d30daa32994362023f777624b10d,
3276 3283 # LC_CTYPE is always initialized. If we require Python 3.8+, we should re-check
3277 3284 # if we can remove this code.
3278 3285 @contextlib.contextmanager
3279 3286 def with_lc_ctype():
3280 3287 oldloc = locale.setlocale(locale.LC_CTYPE, None)
3281 3288 if oldloc == 'C':
3282 3289 try:
3283 3290 try:
3284 3291 locale.setlocale(locale.LC_CTYPE, '')
3285 3292 except locale.Error:
3286 3293 # The likely case is that the locale from the environment
3287 3294 # variables is unknown.
3288 3295 pass
3289 3296 yield
3290 3297 finally:
3291 3298 locale.setlocale(locale.LC_CTYPE, oldloc)
3292 3299 else:
3293 3300 yield
3294 3301
3295 3302
3296 3303 def _estimatememory() -> Optional[int]:
3297 3304 """Provide an estimate for the available system memory in Bytes.
3298 3305
3299 3306 If no estimate can be provided on the platform, returns None.
3300 3307 """
3301 3308 if pycompat.sysplatform.startswith(b'win'):
3302 3309 # On Windows, use the GlobalMemoryStatusEx kernel function directly.
3303 3310 from ctypes import c_long as DWORD, c_ulonglong as DWORDLONG
3304 3311 from ctypes.wintypes import ( # pytype: disable=import-error
3305 3312 Structure,
3306 3313 byref,
3307 3314 sizeof,
3308 3315 windll,
3309 3316 )
3310 3317
3311 3318 class MEMORYSTATUSEX(Structure):
3312 3319 _fields_ = [
3313 3320 ('dwLength', DWORD),
3314 3321 ('dwMemoryLoad', DWORD),
3315 3322 ('ullTotalPhys', DWORDLONG),
3316 3323 ('ullAvailPhys', DWORDLONG),
3317 3324 ('ullTotalPageFile', DWORDLONG),
3318 3325 ('ullAvailPageFile', DWORDLONG),
3319 3326 ('ullTotalVirtual', DWORDLONG),
3320 3327 ('ullAvailVirtual', DWORDLONG),
3321 3328 ('ullExtendedVirtual', DWORDLONG),
3322 3329 ]
3323 3330
3324 3331 x = MEMORYSTATUSEX()
3325 3332 x.dwLength = sizeof(x)
3326 3333 windll.kernel32.GlobalMemoryStatusEx(byref(x))
3327 3334 return x.ullAvailPhys
3328 3335
3329 3336 # On newer Unix-like systems and Mac OSX, the sysconf interface
3330 3337 # can be used. _SC_PAGE_SIZE is part of POSIX; _SC_PHYS_PAGES
3331 3338 # seems to be implemented on most systems.
3332 3339 try:
3333 3340 pagesize = os.sysconf(os.sysconf_names['SC_PAGE_SIZE'])
3334 3341 pages = os.sysconf(os.sysconf_names['SC_PHYS_PAGES'])
3335 3342 return pagesize * pages
3336 3343 except OSError: # sysconf can fail
3337 3344 pass
3338 3345 except KeyError: # unknown parameter
3339 3346 pass
@@ -1,823 +1,852
1 1 # vfs.py - Mercurial 'vfs' classes
2 2 #
3 3 # Copyright Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import contextlib
9 9 import os
10 10 import shutil
11 11 import stat
12 12 import threading
13 13
14 14 from typing import (
15 15 Optional,
16 16 )
17 17
18 18 from .i18n import _
19 19 from . import (
20 20 encoding,
21 21 error,
22 22 pathutil,
23 23 pycompat,
24 24 util,
25 25 )
26 26
27 27
28 28 def _avoidambig(path: bytes, oldstat):
29 29 """Avoid file stat ambiguity forcibly
30 30
31 31 This function causes copying ``path`` file, if it is owned by
32 32 another (see issue5418 and issue5584 for detail).
33 33 """
34 34
35 35 def checkandavoid():
36 36 newstat = util.filestat.frompath(path)
37 37 # return whether file stat ambiguity is (already) avoided
38 38 return not newstat.isambig(oldstat) or newstat.avoidambig(path, oldstat)
39 39
40 40 if not checkandavoid():
41 41 # simply copy to change owner of path to get privilege to
42 42 # advance mtime (see issue5418)
43 43 util.rename(util.mktempcopy(path), path)
44 44 checkandavoid()
45 45
46 46
47 47 class abstractvfs:
48 48 """Abstract base class; cannot be instantiated"""
49 49
50 50 # default directory separator for vfs
51 51 #
52 52 # Other vfs code always use `/` and this works fine because python file API
53 53 # abstract the use of `/` and make it work transparently. For consistency
54 54 # vfs will always use `/` when joining. This avoid some confusion in
55 55 # encoded vfs (see issue6546)
56 56 _dir_sep = b'/'
57 57
58 58 def __init__(self, *args, **kwargs):
59 59 '''Prevent instantiation; don't call this from subclasses.'''
60 60 raise NotImplementedError('attempted instantiating ' + str(type(self)))
61 61
62 62 # TODO: type return, which is util.posixfile wrapped by a proxy
63 63 def __call__(self, path: bytes, mode: bytes = b'rb', **kwargs):
64 64 raise NotImplementedError
65 65
66 66 def _auditpath(self, path: bytes, mode: bytes):
67 67 raise NotImplementedError
68 68
69 69 def join(self, path: Optional[bytes], *insidef: bytes) -> bytes:
70 70 raise NotImplementedError
71 71
72 72 def tryread(self, path: bytes) -> bytes:
73 73 '''gracefully return an empty string for missing files'''
74 74 try:
75 75 return self.read(path)
76 76 except FileNotFoundError:
77 77 pass
78 78 return b""
79 79
80 80 def tryreadlines(self, path: bytes, mode: bytes = b'rb'):
81 81 '''gracefully return an empty array for missing files'''
82 82 try:
83 83 return self.readlines(path, mode=mode)
84 84 except FileNotFoundError:
85 85 pass
86 86 return []
87 87
88 88 @util.propertycache
89 89 def open(self):
90 90 """Open ``path`` file, which is relative to vfs root.
91 91
92 92 Newly created directories are marked as "not to be indexed by
93 93 the content indexing service", if ``notindexed`` is specified
94 94 for "write" mode access.
95 95 """
96 96 return self.__call__
97 97
98 98 def read(self, path: bytes) -> bytes:
99 99 with self(path, b'rb') as fp:
100 100 return fp.read()
101 101
102 102 def readlines(self, path: bytes, mode: bytes = b'rb'):
103 103 with self(path, mode=mode) as fp:
104 104 return fp.readlines()
105 105
106 106 def write(
107 107 self, path: bytes, data: bytes, backgroundclose=False, **kwargs
108 108 ) -> int:
109 109 with self(path, b'wb', backgroundclose=backgroundclose, **kwargs) as fp:
110 110 return fp.write(data)
111 111
112 112 def writelines(
113 113 self, path: bytes, data: bytes, mode: bytes = b'wb', notindexed=False
114 114 ) -> None:
115 115 with self(path, mode=mode, notindexed=notindexed) as fp:
116 116 return fp.writelines(data)
117 117
118 118 def append(self, path: bytes, data: bytes) -> int:
119 119 with self(path, b'ab') as fp:
120 120 return fp.write(data)
121 121
122 122 def basename(self, path: bytes) -> bytes:
123 123 """return base element of a path (as os.path.basename would do)
124 124
125 125 This exists to allow handling of strange encoding if needed."""
126 126 return os.path.basename(path)
127 127
128 128 def chmod(self, path: bytes, mode: int) -> None:
129 129 return os.chmod(self.join(path), mode)
130 130
131 131 def dirname(self, path: bytes) -> bytes:
132 132 """return dirname element of a path (as os.path.dirname would do)
133 133
134 134 This exists to allow handling of strange encoding if needed."""
135 135 return os.path.dirname(path)
136 136
137 137 def exists(self, path: Optional[bytes] = None) -> bool:
138 138 return os.path.exists(self.join(path))
139 139
140 140 def fstat(self, fp):
141 141 return util.fstat(fp)
142 142
143 143 def isdir(self, path: Optional[bytes] = None) -> bool:
144 144 return os.path.isdir(self.join(path))
145 145
146 146 def isfile(self, path: Optional[bytes] = None) -> bool:
147 147 return os.path.isfile(self.join(path))
148 148
149 149 def islink(self, path: Optional[bytes] = None) -> bool:
150 150 return os.path.islink(self.join(path))
151 151
152 152 def isfileorlink(self, path: Optional[bytes] = None) -> bool:
153 153 """return whether path is a regular file or a symlink
154 154
155 155 Unlike isfile, this doesn't follow symlinks."""
156 156 try:
157 157 st = self.lstat(path)
158 158 except OSError:
159 159 return False
160 160 mode = st.st_mode
161 161 return stat.S_ISREG(mode) or stat.S_ISLNK(mode)
162 162
163 163 def _join(self, *paths: bytes) -> bytes:
164 164 root_idx = 0
165 165 for idx, p in enumerate(paths):
166 166 if os.path.isabs(p) or p.startswith(self._dir_sep):
167 167 root_idx = idx
168 168 if root_idx != 0:
169 169 paths = paths[root_idx:]
170 170 paths = [p for p in paths if p]
171 171 return self._dir_sep.join(paths)
172 172
173 173 def reljoin(self, *paths: bytes) -> bytes:
174 174 """join various elements of a path together (as os.path.join would do)
175 175
176 176 The vfs base is not injected so that path stay relative. This exists
177 177 to allow handling of strange encoding if needed."""
178 178 return self._join(*paths)
179 179
180 180 def split(self, path: bytes):
181 181 """split top-most element of a path (as os.path.split would do)
182 182
183 183 This exists to allow handling of strange encoding if needed."""
184 184 return os.path.split(path)
185 185
186 186 def lexists(self, path: Optional[bytes] = None) -> bool:
187 187 return os.path.lexists(self.join(path))
188 188
189 189 def lstat(self, path: Optional[bytes] = None):
190 190 return os.lstat(self.join(path))
191 191
192 def is_mmap_safe(self, path: Optional[bytes] = None) -> bool:
193 """return True if it is safe to read a file content as mmap
194
195 This focus on the file system aspect of such safety, the application
196 logic around that file is not taken into account, so caller need to
197 make sure the file won't be truncated in a way that will create SIGBUS
198 on access.
199
200
201 The initial motivation for this logic is that if mmap is used on NFS
202 and somebody deletes the mapped file (e.g. by renaming on top of it),
203 then you get SIGBUS, which can be pretty disruptive: we get core dump
204 reports, and the process terminates without writing to the blackbox.
205
206 Instead in this situation we prefer to read the file normally.
207 The risk of ESTALE in the middle of the read remains, but it's
208 smaller because we read sooner and the error should be reported
209 just as any other error.
210
211 Note that python standard library does not offer the necessary function
212 to detect the file stem bits. So this detection rely on compiled bits
213 and is not available in pure python.
214 """
215 # XXX Since we already assume a vfs to address a consistent file system
216 # in other location, we could determine the fstype once for the root
217 # and cache that value.
218 fstype = util.getfstype(self.join(path))
219 return fstype is not None and fstype != b'nfs'
220
192 221 def listdir(self, path: Optional[bytes] = None):
193 222 return os.listdir(self.join(path))
194 223
195 224 def makedir(self, path: Optional[bytes] = None, notindexed=True):
196 225 return util.makedir(self.join(path), notindexed)
197 226
198 227 def makedirs(
199 228 self, path: Optional[bytes] = None, mode: Optional[int] = None
200 229 ):
201 230 return util.makedirs(self.join(path), mode)
202 231
203 232 def makelock(self, info, path: bytes):
204 233 return util.makelock(info, self.join(path))
205 234
206 235 def mkdir(self, path: Optional[bytes] = None):
207 236 return os.mkdir(self.join(path))
208 237
209 238 def mkstemp(
210 239 self,
211 240 suffix: bytes = b'',
212 241 prefix: bytes = b'tmp',
213 242 dir: Optional[bytes] = None,
214 243 ):
215 244 fd, name = pycompat.mkstemp(
216 245 suffix=suffix, prefix=prefix, dir=self.join(dir)
217 246 )
218 247 dname, fname = util.split(name)
219 248 if dir:
220 249 return fd, os.path.join(dir, fname)
221 250 else:
222 251 return fd, fname
223 252
224 253 def readdir(self, path: Optional[bytes] = None, stat=None, skip=None):
225 254 return util.listdir(self.join(path), stat, skip)
226 255
227 256 def readlock(self, path: bytes) -> bytes:
228 257 return util.readlock(self.join(path))
229 258
230 259 def rename(self, src: bytes, dst: bytes, checkambig=False):
231 260 """Rename from src to dst
232 261
233 262 checkambig argument is used with util.filestat, and is useful
234 263 only if destination file is guarded by any lock
235 264 (e.g. repo.lock or repo.wlock).
236 265
237 266 To avoid file stat ambiguity forcibly, checkambig=True involves
238 267 copying ``src`` file, if it is owned by another. Therefore, use
239 268 checkambig=True only in limited cases (see also issue5418 and
240 269 issue5584 for detail).
241 270 """
242 271 self._auditpath(dst, b'w')
243 272 srcpath = self.join(src)
244 273 dstpath = self.join(dst)
245 274 oldstat = checkambig and util.filestat.frompath(dstpath)
246 275 if oldstat and oldstat.stat:
247 276 ret = util.rename(srcpath, dstpath)
248 277 _avoidambig(dstpath, oldstat)
249 278 return ret
250 279 return util.rename(srcpath, dstpath)
251 280
252 281 def readlink(self, path: bytes) -> bytes:
253 282 return util.readlink(self.join(path))
254 283
255 284 def removedirs(self, path: Optional[bytes] = None):
256 285 """Remove a leaf directory and all empty intermediate ones"""
257 286 return util.removedirs(self.join(path))
258 287
259 288 def rmdir(self, path: Optional[bytes] = None):
260 289 """Remove an empty directory."""
261 290 return os.rmdir(self.join(path))
262 291
263 292 def rmtree(
264 293 self, path: Optional[bytes] = None, ignore_errors=False, forcibly=False
265 294 ):
266 295 """Remove a directory tree recursively
267 296
268 297 If ``forcibly``, this tries to remove READ-ONLY files, too.
269 298 """
270 299 if forcibly:
271 300
272 301 def onexc(function, path, excinfo):
273 302 if function is not os.remove:
274 303 raise
275 304 # read-only files cannot be unlinked under Windows
276 305 s = os.stat(path)
277 306 if (s.st_mode & stat.S_IWRITE) != 0:
278 307 raise
279 308 os.chmod(path, stat.S_IMODE(s.st_mode) | stat.S_IWRITE)
280 309 os.remove(path)
281 310
282 311 else:
283 312 onexc = None
284 313 try:
285 314 # pytype: disable=wrong-keyword-args
286 315 return shutil.rmtree(
287 316 self.join(path), ignore_errors=ignore_errors, onexc=onexc
288 317 )
289 318 # pytype: enable=wrong-keyword-args
290 319 except TypeError: # onexc was introduced in Python 3.12
291 320 return shutil.rmtree(
292 321 self.join(path), ignore_errors=ignore_errors, onerror=onexc
293 322 )
294 323
295 324 def setflags(self, path: bytes, l: bool, x: bool):
296 325 return util.setflags(self.join(path), l, x)
297 326
298 327 def stat(self, path: Optional[bytes] = None):
299 328 return os.stat(self.join(path))
300 329
301 330 def unlink(self, path: Optional[bytes] = None):
302 331 return util.unlink(self.join(path))
303 332
304 333 def tryunlink(self, path: Optional[bytes] = None):
305 334 """Attempt to remove a file, ignoring missing file errors."""
306 335 return util.tryunlink(self.join(path))
307 336
308 337 def unlinkpath(
309 338 self, path: Optional[bytes] = None, ignoremissing=False, rmdir=True
310 339 ):
311 340 return util.unlinkpath(
312 341 self.join(path), ignoremissing=ignoremissing, rmdir=rmdir
313 342 )
314 343
315 344 def utime(self, path: Optional[bytes] = None, t=None):
316 345 return os.utime(self.join(path), t)
317 346
318 347 def walk(self, path: Optional[bytes] = None, onerror=None):
319 348 """Yield (dirpath, dirs, files) tuple for each directories under path
320 349
321 350 ``dirpath`` is relative one from the root of this vfs. This
322 351 uses ``os.sep`` as path separator, even you specify POSIX
323 352 style ``path``.
324 353
325 354 "The root of this vfs" is represented as empty ``dirpath``.
326 355 """
327 356 root = os.path.normpath(self.join(None))
328 357 # when dirpath == root, dirpath[prefixlen:] becomes empty
329 358 # because len(dirpath) < prefixlen.
330 359 prefixlen = len(pathutil.normasprefix(root))
331 360 for dirpath, dirs, files in os.walk(self.join(path), onerror=onerror):
332 361 yield (dirpath[prefixlen:], dirs, files)
333 362
334 363 @contextlib.contextmanager
335 364 def backgroundclosing(self, ui, expectedcount=-1):
336 365 """Allow files to be closed asynchronously.
337 366
338 367 When this context manager is active, ``backgroundclose`` can be passed
339 368 to ``__call__``/``open`` to result in the file possibly being closed
340 369 asynchronously, on a background thread.
341 370 """
342 371 # Sharing backgroundfilecloser between threads is complex and using
343 372 # multiple instances puts us at risk of running out of file descriptors
344 373 # only allow to use backgroundfilecloser when in main thread.
345 374 if not isinstance(
346 375 threading.current_thread(),
347 376 threading._MainThread, # pytype: disable=module-attr
348 377 ):
349 378 yield
350 379 return
351 380 vfs = getattr(self, 'vfs', self)
352 381 if getattr(vfs, '_backgroundfilecloser', None):
353 382 raise error.Abort(
354 383 _(b'can only have 1 active background file closer')
355 384 )
356 385
357 386 with backgroundfilecloser(ui, expectedcount=expectedcount) as bfc:
358 387 try:
359 388 vfs._backgroundfilecloser = (
360 389 bfc # pytype: disable=attribute-error
361 390 )
362 391 yield bfc
363 392 finally:
364 393 vfs._backgroundfilecloser = (
365 394 None # pytype: disable=attribute-error
366 395 )
367 396
368 397 def register_file(self, path):
369 398 """generic hook point to lets fncache steer its stew"""
370 399
371 400
372 401 class vfs(abstractvfs):
373 402 """Operate files relative to a base directory
374 403
375 404 This class is used to hide the details of COW semantics and
376 405 remote file access from higher level code.
377 406
378 407 'cacheaudited' should be enabled only if (a) vfs object is short-lived, or
379 408 (b) the base directory is managed by hg and considered sort-of append-only.
380 409 See pathutil.pathauditor() for details.
381 410 """
382 411
383 412 def __init__(
384 413 self,
385 414 base: bytes,
386 415 audit=True,
387 416 cacheaudited=False,
388 417 expandpath=False,
389 418 realpath=False,
390 419 ):
391 420 if expandpath:
392 421 base = util.expandpath(base)
393 422 if realpath:
394 423 base = os.path.realpath(base)
395 424 self.base = base
396 425 self._audit = audit
397 426 if audit:
398 427 self.audit = pathutil.pathauditor(self.base, cached=cacheaudited)
399 428 else:
400 429 self.audit = lambda path, mode=None: True
401 430 self.createmode = None
402 431 self._trustnlink = None
403 432 self.options = {}
404 433
405 434 @util.propertycache
406 435 def _cansymlink(self) -> bool:
407 436 return util.checklink(self.base)
408 437
409 438 @util.propertycache
410 439 def _chmod(self):
411 440 return util.checkexec(self.base)
412 441
413 442 def _fixfilemode(self, name):
414 443 if self.createmode is None or not self._chmod:
415 444 return
416 445 os.chmod(name, self.createmode & 0o666)
417 446
418 447 def _auditpath(self, path, mode) -> None:
419 448 if self._audit:
420 449 if os.path.isabs(path) and path.startswith(self.base):
421 450 path = os.path.relpath(path, self.base)
422 451 r = util.checkosfilename(path)
423 452 if r:
424 453 raise error.Abort(b"%s: %r" % (r, path))
425 454 self.audit(path, mode=mode)
426 455
427 456 def isfileorlink_checkdir(
428 457 self, dircache, path: Optional[bytes] = None
429 458 ) -> bool:
430 459 """return True if the path is a regular file or a symlink and
431 460 the directories along the path are "normal", that is
432 461 not symlinks or nested hg repositories.
433 462
434 463 Ignores the `_audit` setting, and checks the directories regardless.
435 464 `dircache` is used to cache the directory checks.
436 465 """
437 466 try:
438 467 for prefix in pathutil.finddirs_rev_noroot(util.localpath(path)):
439 468 if prefix in dircache:
440 469 res = dircache[prefix]
441 470 else:
442 471 res = pathutil.pathauditor._checkfs_exists(
443 472 self.base, prefix, path
444 473 )
445 474 dircache[prefix] = res
446 475 if not res:
447 476 return False
448 477 except (OSError, error.Abort):
449 478 return False
450 479 return self.isfileorlink(path)
451 480
452 481 def __call__(
453 482 self,
454 483 path: bytes,
455 484 mode: bytes = b"rb",
456 485 atomictemp=False,
457 486 notindexed=False,
458 487 backgroundclose=False,
459 488 checkambig=False,
460 489 auditpath=True,
461 490 makeparentdirs=True,
462 491 ):
463 492 """Open ``path`` file, which is relative to vfs root.
464 493
465 494 By default, parent directories are created as needed. Newly created
466 495 directories are marked as "not to be indexed by the content indexing
467 496 service", if ``notindexed`` is specified for "write" mode access.
468 497 Set ``makeparentdirs=False`` to not create directories implicitly.
469 498
470 499 If ``backgroundclose`` is passed, the file may be closed asynchronously.
471 500 It can only be used if the ``self.backgroundclosing()`` context manager
472 501 is active. This should only be specified if the following criteria hold:
473 502
474 503 1. There is a potential for writing thousands of files. Unless you
475 504 are writing thousands of files, the performance benefits of
476 505 asynchronously closing files is not realized.
477 506 2. Files are opened exactly once for the ``backgroundclosing``
478 507 active duration and are therefore free of race conditions between
479 508 closing a file on a background thread and reopening it. (If the
480 509 file were opened multiple times, there could be unflushed data
481 510 because the original file handle hasn't been flushed/closed yet.)
482 511
483 512 ``checkambig`` argument is passed to atomictempfile (valid
484 513 only for writing), and is useful only if target file is
485 514 guarded by any lock (e.g. repo.lock or repo.wlock).
486 515
487 516 To avoid file stat ambiguity forcibly, checkambig=True involves
488 517 copying ``path`` file opened in "append" mode (e.g. for
489 518 truncation), if it is owned by another. Therefore, use
490 519 combination of append mode and checkambig=True only in limited
491 520 cases (see also issue5418 and issue5584 for detail).
492 521 """
493 522 if auditpath:
494 523 self._auditpath(path, mode)
495 524 f = self.join(path)
496 525
497 526 if b"b" not in mode:
498 527 mode += b"b" # for that other OS
499 528
500 529 nlink = -1
501 530 if mode not in (b'r', b'rb'):
502 531 dirname, basename = util.split(f)
503 532 # If basename is empty, then the path is malformed because it points
504 533 # to a directory. Let the posixfile() call below raise IOError.
505 534 if basename:
506 535 if atomictemp:
507 536 if makeparentdirs:
508 537 util.makedirs(dirname, self.createmode, notindexed)
509 538 return util.atomictempfile(
510 539 f, mode, self.createmode, checkambig=checkambig
511 540 )
512 541 try:
513 542 if b'w' in mode:
514 543 util.unlink(f)
515 544 nlink = 0
516 545 else:
517 546 # nlinks() may behave differently for files on Windows
518 547 # shares if the file is open.
519 548 with util.posixfile(f):
520 549 nlink = util.nlinks(f)
521 550 if nlink < 1:
522 551 nlink = 2 # force mktempcopy (issue1922)
523 552 except FileNotFoundError:
524 553 nlink = 0
525 554 if makeparentdirs:
526 555 util.makedirs(dirname, self.createmode, notindexed)
527 556 if nlink > 0:
528 557 if self._trustnlink is None:
529 558 self._trustnlink = nlink > 1 or util.checknlink(f)
530 559 if nlink > 1 or not self._trustnlink:
531 560 util.rename(util.mktempcopy(f), f)
532 561 fp = util.posixfile(f, mode)
533 562 if nlink == 0:
534 563 self._fixfilemode(f)
535 564
536 565 if checkambig:
537 566 if mode in (b'r', b'rb'):
538 567 raise error.Abort(
539 568 _(
540 569 b'implementation error: mode %s is not'
541 570 b' valid for checkambig=True'
542 571 )
543 572 % mode
544 573 )
545 574 fp = checkambigatclosing(fp)
546 575
547 576 if backgroundclose and isinstance(
548 577 threading.current_thread(),
549 578 threading._MainThread, # pytype: disable=module-attr
550 579 ):
551 580 if (
552 581 not self._backgroundfilecloser # pytype: disable=attribute-error
553 582 ):
554 583 raise error.Abort(
555 584 _(
556 585 b'backgroundclose can only be used when a '
557 586 b'backgroundclosing context manager is active'
558 587 )
559 588 )
560 589
561 590 fp = delayclosedfile(
562 591 fp,
563 592 self._backgroundfilecloser, # pytype: disable=attribute-error
564 593 )
565 594
566 595 return fp
567 596
568 597 def symlink(self, src: bytes, dst: bytes) -> None:
569 598 self.audit(dst)
570 599 linkname = self.join(dst)
571 600 util.tryunlink(linkname)
572 601
573 602 util.makedirs(os.path.dirname(linkname), self.createmode)
574 603
575 604 if self._cansymlink:
576 605 try:
577 606 os.symlink(src, linkname)
578 607 except OSError as err:
579 608 raise OSError(
580 609 err.errno,
581 610 _(b'could not symlink to %r: %s')
582 611 % (src, encoding.strtolocal(err.strerror)),
583 612 linkname,
584 613 )
585 614 else:
586 615 self.write(dst, src)
587 616
588 617 def join(self, path: Optional[bytes], *insidef: bytes) -> bytes:
589 618 if path:
590 619 parts = [self.base, path]
591 620 parts.extend(insidef)
592 621 return self._join(*parts)
593 622 else:
594 623 return self.base
595 624
596 625
597 626 opener = vfs
598 627
599 628
600 629 class proxyvfs(abstractvfs):
601 630 def __init__(self, vfs: "vfs"):
602 631 self.vfs = vfs
603 632
604 633 @property
605 634 def createmode(self):
606 635 return self.vfs.createmode
607 636
608 637 def _auditpath(self, path, mode):
609 638 return self.vfs._auditpath(path, mode)
610 639
611 640 @property
612 641 def options(self):
613 642 return self.vfs.options
614 643
615 644 @options.setter
616 645 def options(self, value):
617 646 self.vfs.options = value
618 647
619 648 @property
620 649 def audit(self):
621 650 return self.vfs.audit
622 651
623 652
624 653 class filtervfs(proxyvfs, abstractvfs):
625 654 '''Wrapper vfs for filtering filenames with a function.'''
626 655
627 656 def __init__(self, vfs: "vfs", filter):
628 657 proxyvfs.__init__(self, vfs)
629 658 self._filter = filter
630 659
631 660 def __call__(self, path: bytes, *args, **kwargs):
632 661 return self.vfs(self._filter(path), *args, **kwargs)
633 662
634 663 def join(self, path: Optional[bytes], *insidef: bytes) -> bytes:
635 664 if path:
636 665 return self.vfs.join(self._filter(self.vfs.reljoin(path, *insidef)))
637 666 else:
638 667 return self.vfs.join(path)
639 668
640 669
641 670 filteropener = filtervfs
642 671
643 672
644 673 class readonlyvfs(proxyvfs):
645 674 '''Wrapper vfs preventing any writing.'''
646 675
647 676 def __init__(self, vfs: "vfs"):
648 677 proxyvfs.__init__(self, vfs)
649 678
650 679 def __call__(self, path: bytes, mode: bytes = b'rb', *args, **kw):
651 680 if mode not in (b'r', b'rb'):
652 681 raise error.Abort(_(b'this vfs is read only'))
653 682 return self.vfs(path, mode, *args, **kw)
654 683
655 684 def join(self, path: Optional[bytes], *insidef: bytes) -> bytes:
656 685 return self.vfs.join(path, *insidef)
657 686
658 687
659 688 class closewrapbase:
660 689 """Base class of wrapper, which hooks closing
661 690
662 691 Do not instantiate outside of the vfs layer.
663 692 """
664 693
665 694 def __init__(self, fh):
666 695 object.__setattr__(self, '_origfh', fh)
667 696
668 697 def __getattr__(self, attr):
669 698 return getattr(self._origfh, attr)
670 699
671 700 def __setattr__(self, attr, value):
672 701 return setattr(self._origfh, attr, value)
673 702
674 703 def __delattr__(self, attr):
675 704 return delattr(self._origfh, attr)
676 705
677 706 def __enter__(self):
678 707 self._origfh.__enter__()
679 708 return self
680 709
681 710 def __exit__(self, exc_type, exc_value, exc_tb):
682 711 raise NotImplementedError('attempted instantiating ' + str(type(self)))
683 712
684 713 def close(self):
685 714 raise NotImplementedError('attempted instantiating ' + str(type(self)))
686 715
687 716
688 717 class delayclosedfile(closewrapbase):
689 718 """Proxy for a file object whose close is delayed.
690 719
691 720 Do not instantiate outside of the vfs layer.
692 721 """
693 722
694 723 def __init__(self, fh, closer):
695 724 super(delayclosedfile, self).__init__(fh)
696 725 object.__setattr__(self, '_closer', closer)
697 726
698 727 def __exit__(self, exc_type, exc_value, exc_tb):
699 728 self._closer.close(self._origfh)
700 729
701 730 def close(self):
702 731 self._closer.close(self._origfh)
703 732
704 733
705 734 class backgroundfilecloser:
706 735 """Coordinates background closing of file handles on multiple threads."""
707 736
708 737 def __init__(self, ui, expectedcount=-1):
709 738 self._running = False
710 739 self._entered = False
711 740 self._threads = []
712 741 self._threadexception = None
713 742
714 743 # Only Windows/NTFS has slow file closing. So only enable by default
715 744 # on that platform. But allow to be enabled elsewhere for testing.
716 745 defaultenabled = pycompat.iswindows
717 746 enabled = ui.configbool(b'worker', b'backgroundclose', defaultenabled)
718 747
719 748 if not enabled:
720 749 return
721 750
722 751 # There is overhead to starting and stopping the background threads.
723 752 # Don't do background processing unless the file count is large enough
724 753 # to justify it.
725 754 minfilecount = ui.configint(b'worker', b'backgroundcloseminfilecount')
726 755 # FUTURE dynamically start background threads after minfilecount closes.
727 756 # (We don't currently have any callers that don't know their file count)
728 757 if expectedcount > 0 and expectedcount < minfilecount:
729 758 return
730 759
731 760 maxqueue = ui.configint(b'worker', b'backgroundclosemaxqueue')
732 761 threadcount = ui.configint(b'worker', b'backgroundclosethreadcount')
733 762
734 763 ui.debug(
735 764 b'starting %d threads for background file closing\n' % threadcount
736 765 )
737 766
738 767 self._queue = pycompat.queue.Queue(maxsize=maxqueue)
739 768 self._running = True
740 769
741 770 for i in range(threadcount):
742 771 t = threading.Thread(target=self._worker, name='backgroundcloser')
743 772 self._threads.append(t)
744 773 t.start()
745 774
746 775 def __enter__(self):
747 776 self._entered = True
748 777 return self
749 778
750 779 def __exit__(self, exc_type, exc_value, exc_tb):
751 780 self._running = False
752 781
753 782 # Wait for threads to finish closing so open files don't linger for
754 783 # longer than lifetime of context manager.
755 784 for t in self._threads:
756 785 t.join()
757 786
758 787 def _worker(self):
759 788 """Main routine for worker thread."""
760 789 while True:
761 790 try:
762 791 fh = self._queue.get(block=True, timeout=0.100)
763 792 # Need to catch or the thread will terminate and
764 793 # we could orphan file descriptors.
765 794 try:
766 795 fh.close()
767 796 except Exception as e:
768 797 # Stash so can re-raise from main thread later.
769 798 self._threadexception = e
770 799 except pycompat.queue.Empty:
771 800 if not self._running:
772 801 break
773 802
774 803 def close(self, fh):
775 804 """Schedule a file for closing."""
776 805 if not self._entered:
777 806 raise error.Abort(
778 807 _(b'can only call close() when context manager active')
779 808 )
780 809
781 810 # If a background thread encountered an exception, raise now so we fail
782 811 # fast. Otherwise we may potentially go on for minutes until the error
783 812 # is acted on.
784 813 if self._threadexception:
785 814 e = self._threadexception
786 815 self._threadexception = None
787 816 raise e
788 817
789 818 # If we're not actively running, close synchronously.
790 819 if not self._running:
791 820 fh.close()
792 821 return
793 822
794 823 self._queue.put(fh, block=True, timeout=None)
795 824
796 825
797 826 class checkambigatclosing(closewrapbase):
798 827 """Proxy for a file object, to avoid ambiguity of file stat
799 828
800 829 See also util.filestat for detail about "ambiguity of file stat".
801 830
802 831 This proxy is useful only if the target file is guarded by any
803 832 lock (e.g. repo.lock or repo.wlock)
804 833
805 834 Do not instantiate outside of the vfs layer.
806 835 """
807 836
808 837 def __init__(self, fh):
809 838 super(checkambigatclosing, self).__init__(fh)
810 839 object.__setattr__(self, '_oldstat', util.filestat.frompath(fh.name))
811 840
812 841 def _checkambig(self):
813 842 oldstat = self._oldstat
814 843 if oldstat.stat:
815 844 _avoidambig(self._origfh.name, oldstat)
816 845
817 846 def __exit__(self, exc_type, exc_value, exc_tb):
818 847 self._origfh.__exit__(exc_type, exc_value, exc_tb)
819 848 self._checkambig()
820 849
821 850 def close(self):
822 851 self._origfh.close()
823 852 self._checkambig()
General Comments 0
You need to be logged in to leave comments. Login now